C++ STL map与unordered_map详解：有序与无序的抉择

概述

map 和 unordered_map 是 C++ STL 中最常用的关联容器，都提供键值对的存储和快速查找。但它们的底层实现完全不同，适用的场景也各有侧重。

简单来说：

map：基于红黑树，按键有序
unordered_map：基于哈希表，按键无序，查找更快

#include <iostream>
#include <map>
#include <unordered_map>

int main() {
    // map：按键升序排列
    std::map<std::string, int> m;
    m["banana"] = 3;
    m["apple"] = 5;
    m["cherry"] = 2;
    
    std::cout << "map (有序):" << std::endl;
    for (auto& [k, v] : m) {
        std::cout << "  " << k << ": " << v << std::endl;
    }
    // apple: 5
    // banana: 3
    // cherry: 2
    
    // unordered_map：遍历顺序不确定
    std::unordered_map<std::string, int> um;
    um["banana"] = 3;
    um["apple"] = 5;
    um["cherry"] = 2;
    
    std::cout << "\nunordered_map (无序):" << std::endl;
    for (auto& [k, v] : um) {
        std::cout << "  " << k << ": " << v << std::endl;
    }
    // 输出顺序不确定
    
    return 0;
}

底层实现原理

map —— 红黑树

std::map 的底层是一棵红黑树（Red-Black Tree），它是一种自平衡二叉搜索树。

核心特性：

每个节点是红色或黑色
根节点是黑色
叶子节点（NIL）是黑色
红色节点的子节点必须是黑色
从任一节点到其所有叶子节点的路径上，黑色节点数相同

这些约束保证了树的高度始终为 O(log n)，从而确保：

查找、插入、删除都是 O(log n)

#include <iostream>
#include <map>

int main() {
    std::map<int, std::string> m;
    
    // 插入会自动按 key 排序
    m[5] = "five";
    m[3] = "three";
    m[8] = "eight";
    m[1] = "one";
    m[4] = "four";
    
    // 中序遍历就是有序的
    for (auto& [k, v] : m) {
        std::cout << k << ": " << v << std::endl;
    }
    // 1: one
    // 3: three
    // 4: four
    // 5: five
    // 8: eight
    
    return 0;
}

unordered_map —— 哈希表

std::unordered_map 的底层是一个哈希表，核心组件：

哈希函数：将 key 映射到 bucket 索引
桶数组（buckets）：存储元素
冲突解决：链地址法（每个桶挂一个链表）

核心特性：

平均查找、插入、删除是 O(1)
**最坏情况 O(n)**（所有 key 哈希冲突时）
元素无序

#include <iostream>
#include <unordered_map>

int main() {
    std::unordered_map<int, std::string> um;
    
    um[5] = "five";
    um[3] = "three";
    um[8] = "eight";
    um[1] = "one";
    um[4] = "four";
    
    std::cout << "bucket count: " << um.bucket_count() << std::endl;
    std::cout << "max load factor: " << um.max_load_factor() << std::endl;
    std::cout << "load factor: " << um.load_factor() << std::endl;
    
    // 查看某个 key 在哪个桶
    std::cout << "key 3 in bucket: " << um.bucket(3) << std::endl;
    std::cout << "bucket 3 has " << um.bucket_size(3) << " elements" << std::endl;
    
    return 0;
}

增删改查操作

插入

#include <iostream>
#include <map>
#include <unordered_map>

int main() {
    std::map<std::string, int> m;
    
    // 方式1：[] 运算符（如果key不存在，自动插入默认值）
    m["apple"] = 5;  // 插入或更新
    
    // 方式2：insert
    m.insert({"banana", 3});
    m.insert(std::make_pair("cherry", 2));
    m.insert(std::pair<std::string, int>("date", 7));
    
    // 方式3：insert 返回 pair<iterator, bool>
    auto [it, success] = m.insert({"apple", 10});
    if (!success) {
        std::cout << "apple already exists, value: " << it->second << std::endl;
        // 输出：apple already exists, value: 5（不会覆盖）
    }
    
    // 方式4：emplace（原地构造，可能更高效）
    m.emplace("elderberry", 4);
    
    // 方式5：try_emplace (C++17)
    m.try_emplace("fig", 6);  // key 不存在才插入
    
    // 方式6：insert_or_assign (C++17) —— 存在则更新，不存在则插入
    m.insert_or_assign("apple", 10);  // apple -> 10
    m.insert_or_assign("grape", 8);   // 新增 grape -> 8
    
    for (auto& [k, v] : m) {
        std::cout << k << ": " << v << std::endl;
    }
    
    return 0;
}

查找

#include <iostream>
#include <map>

int main() {
    std::map<std::string, int> m = {{"apple", 5}, {"banana", 3}};
    
    // 方式1：find —— 找不到返回 end()
    auto it = m.find("apple");
    if (it != m.end()) {
        std::cout << "Found: " << it->first << " = " << it->second << std::endl;
    }
    
    // 方式2：count —— 判断是否存在（map 中 count 只返回 0 或 1）
    if (m.count("banana")) {
        std::cout << "banana exists" << std::endl;
    }
    
    // 方式3：[] —— 找不到会自动插入默认值！（注意副作用）
    int val = m["cherry"];  // cherry 不存在，自动插入 cherry -> 0
    std::cout << "cherry (auto-inserted): " << val << std::endl;
    
    // 方式4：at() —— 找不到抛出 std::out_of_range（推荐）
    try {
        int v = m.at("apple");  // 安全访问
        std::cout << "apple: " << v << std::endl;
        m.at("nonexistent");    // 抛异常
    } catch (const std::out_of_range& e) {
        std::cout << "Key not found: " << e.what() << std::endl;
    }
    
    // 方式5：contains (C++20)
    if (m.contains("banana")) {
        std::cout << "banana exists (C++20)" << std::endl;
    }
    
    return 0;
}

删除

#include <iostream>
#include <map>

int main() {
    std::map<std::string, int> m = {
        {"apple", 5}, {"banana", 3}, {"cherry", 2}
    };
    
    // 按key删除
    m.erase("banana");
    
    // 按迭代器删除
    auto it = m.find("cherry");
    if (it != m.end()) {
        m.erase(it);
    }
    
    // 范围删除
    std::map<int, int> nums = {{1, 10}, {2, 20}, {3, 30}, {4, 40}, {5, 50}};
    auto first = nums.begin();
    auto last = nums.find(4);
    nums.erase(first, last);  // 删除 key 1, 2, 3
    
    // 返回删除数量
    size_t count = m.erase("apple");  // 返回 1（成功删除）或 0（不存在）
    std::cout << "Erased " << count << " elements" << std::endl;
    
    return 0;
}

遍历技巧

#include <iostream>
#include <map>

int main() {
    std::map<std::string, int> scores = {
        {"Alice", 95}, {"Bob", 87}, {"Charlie", 92}
    };
    
    // C++17 结构化绑定
    for (const auto& [name, score] : scores) {
        std::cout << name << ": " << score << std::endl;
    }
    
    // 反向遍历
    std::cout << "\nReverse order:" << std::endl;
    for (auto it = scores.rbegin(); it != scores.rend(); ++it) {
        std::cout << it->first << ": " << it->second << std::endl;
    }
    
    // 只遍历 values
    std::cout << "\nAll scores: ";
    for (const auto& [_, score] : scores) {
        std::cout << score << " ";  // 87 92 95
    }
    std::cout << std::endl;
    
    // 只遍历 keys
    std::cout << "All names: ";
    for (const auto& [name, _] : scores) {
        std::cout << name << " ";  // Alice Bob Charlie
    }
    std::cout << std::endl;
    
    return 0;
}

自定义 Key 类型

map —— 自定义比较器

map 要求 key 支持比较（默认 < 运算符）。自定义类型需要提供比较器：

#include <iostream>
#include <map>
#include <string>

struct Person {
    std::string name;
    int age;
    
    // 方式1：重载 < 运算符
    bool operator<(const Person& other) const {
        if (age != other.age) return age < other.age;
        return name < other.name;
    }
};

int main() {
    // 使用重载的 < 运算符
    std::map<Person, std::string> m1;
    m1[{"Alice", 25}] = "Engineer";
    m1[{"Bob", 30}] = "Manager";
    m1[{"Charlie", 25}] = "Designer";
    
    for (const auto& [person, role] : m1) {
        std::cout << person.name << " (" << person.age << "): " << role << std::endl;
    }
    // Charlie (25): Designer
    // Alice (25): Engineer
    // Bob (30): Manager
    
    // 方式2：自定义比较器
    auto cmp = [](const Person& a, const Person& b) {
        if (a.name != b.name) return a.name < b.name;
        return a.age < b.age;
    };
    std::map<Person, std::string, decltype(cmp)> m2(cmp);
    m2[{"Alice", 25}] = "Engineer";
    
    return 0;
}

unordered_map —— 自定义哈希函数

unordered_map 要求 key 支持 == 比较和哈希函数：

#include <iostream>
#include <unordered_map>
#include <string>

struct Person {
    std::string name;
    int age;
    
    bool operator==(const Person& other) const {
        return name == other.name && age == other.age;
    }
};

// 自定义哈希函数
struct PersonHash {
    size_t operator()(const Person& p) const {
        size_t h1 = std::hash<std::string>{}(p.name);
        size_t h2 = std::hash<int>{}(p.age);
        return h1 ^ (h2 << 1);  // 简单的组合方式
    }
};

int main() {
    std::unordered_map<Person, std::string, PersonHash> um;
    um[{"Alice", 25}] = "Engineer";
    um[{"Bob", 30}] = "Manager";
    
    for (const auto& [person, role] : um) {
        std::cout << person.name << ": " << role << std::endl;
    }
    
    return 0;
}

性能对比

#include <iostream>
#include <map>
#include <unordered_map>
#include <chrono>
#include <string>

const int N = 1000000;

void benchmark() {
    // 准备数据
    std::vector<std::string> keys;
    keys.reserve(N);
    for (int i = 0; i < N; i++) {
        keys.push_back("key_" + std::to_string(i));
    }
    
    // benchmark map
    {
        auto start = std::chrono::high_resolution_clock::now();
        std::map<std::string, int> m;
        for (int i = 0; i < N; i++) {
            m[keys[i]] = i;
        }
        for (int i = 0; i < N; i++) {
            auto it = m.find(keys[i]);
        }
        auto end = std::chrono::high_resolution_clock::now();
        std::cout << "map: " 
                  << std::chrono::duration<double, std::milli>(end - start).count() 
                  << " ms" << std::endl;
    }
    
    // benchmark unordered_map
    {
        auto start = std::chrono::high_resolution_clock::now();
        std::unordered_map<std::string, int> um;
        um.reserve(N);  // 预分配，避免 rehash
        for (int i = 0; i < N; i++) {
            um[keys[i]] = i;
        }
        for (int i = 0; i < N; i++) {
            auto it = um.find(keys[i]);
        }
        auto end = std::chrono::high_resolution_clock::now();
        std::cout << "unordered_map: " 
                  << std::chrono::duration<double, std::milli>(end - start).count() 
                  << " ms" << std::endl;
    }
}

int main() {
    benchmark();
    // 典型结果：
    // map: ~800 ms
    // unordered_map: ~300 ms
    return 0;
}

常见陷阱

1. `[]` 的副作用——会插入默认值

std::map<std::string, int> m = {{"apple", 5}};

// ❌ 本意是查找，结果插入了新元素
if (m["banana"] > 0) {  // banana 被插入，值为 0
    std::cout << "found" << std::endl;
}
// 现在 m 包含 {"apple", 5} 和 {"banana", 0}

// ✅ 用 find 或 at 或 contains
if (m.find("banana") != m.end()) { ... }
if (m.contains("banana")) { ... }  // C++20

2. `unordered_map` 的 rehash 导致性能抖动

// ❌ 不知道最终大小，频繁 rehash
std::unordered_map<int, int> um;
for (int i = 0; i < 1000000; i++) {
    um[i] = i * 2;  // 可能 rehash 好几次
}

// ✅ 预先 reserve
std::unordered_map<int, int> um;
um.reserve(1000000);  // 一次性分配足够空间
for (int i = 0; i < 1000000; i++) {
    um[i] = i * 2;
}

3. 迭代器稳定性

std::map<int, int> m = {{1, 10}, {2, 20}, {3, 30}};

// map：插入和删除不影响其他元素的迭代器（除了被删除的）
auto it1 = m.find(3);
m.insert({0, 0});
// it1 仍然有效

std::unordered_map<int, int> um = {{1, 10}, {2, 20}, {3, 30}};

// unordered_map：任何插入都可能导致 rehash，使所有迭代器失效！
auto it2 = um.find(3);
um.insert({0, 0});  // 可能触发 rehash
// it2 可能已失效！❌

如何选择？

考量因素	选 map	选 unordered_map
需要有序遍历	✅	❌
需要 lower_bound/upper_bound	✅	❌
查找性能最重要	❌ (O(log n))	✅ (O(1))
Key 不支持哈希	✅	❌
内存占用敏感	✅（3个指针/节点）	❌（哈希表开销大）
迭代器稳定性要求高	✅	❌

经验法则：大多数场景下 unordered_map 更快，选它。需要有序性或范围查询时才选 map。