C++ STL set与unordered_set详解：集合的优雅运用

概述

set 和 unordered_set 是 C++ STL 中的集合容器，与 map 的区别在于它们只存储 key，不存储 value。它们的核心功能是：

自动去重：相同的元素只会存在一份
快速查找：判断元素是否存在

#include <iostream>
#include <set>
#include <unordered_set>

int main() {
    std::set<int> s = {3, 1, 4, 1, 5, 9, 2, 6, 5};  // 自动去重+排序
    
    std::cout << "set: ";
    for (int x : s) {
        std::cout << x << " ";  // 1 2 3 4 5 6 9
    }
    std::cout << std::endl;
    std::cout << "size: " << s.size() << std::endl;  // 7（原始9个元素，去重后7个）
    
    std::unordered_set<int> us = {3, 1, 4, 1, 5, 9, 2, 6, 5};  // 自动去重
    
    std::cout << "unordered_set size: " << us.size() << std::endl;  // 7
    
    return 0;
}

set vs unordered_set vs map 的关系

可以简单理解为：

容器	底层实现	元素类型	有序	查找复杂度
`set`	红黑树	key only	✅	O(log n)
`unordered_set`	哈希表	key only	❌	O(1) 平均
`map`	红黑树	key-value	✅	O(log n)
`unordered_map`	哈希表	key-value	❌	O(1) 平均

本质上 set<T> 就是 map<T, void> 的简化版本，底层结构相同，只是去掉了 value 部分。

常用操作

插入与删除

#include <iostream>
#include <set>

int main() {
    std::set<int> s;
    
    // 插入
    s.insert(3);
    s.insert(1);
    s.insert(5);
    s.insert(1);  // 重复插入，不会生效
    
    // insert 返回 pair<iterator, bool>
    auto [it, ok] = s.insert(2);
    if (ok) {
        std::cout << "Inserted 2" << std::endl;
    }
    
    auto [it2, ok2] = s.insert(1);  // 已存在
    if (!ok2) {
        std::cout << "1 already exists, value: " << *it2 << std::endl;
    }
    
    // emplace（原地构造）
    s.emplace(4);
    
    // emplace_hint（C++11，带位置提示）
    s.emplace_hint(s.end(), 6);
    
    // 删除
    s.erase(1);     // 按值删除，返回删除数量
    auto it3 = s.find(3);
    s.erase(it3);   // 按迭代器删除
    
    // 删除不存在的元素不会报错
    size_t cnt = s.erase(999);  // 返回 0
    
    // 清空
    s.clear();
    
    return 0;
}

查找操作

#include <iostream>
#include <set>

int main() {
    std::set<int> s = {1, 3, 5, 7, 9};
    
    // find：找到返回迭代器，没找到返回 end()
    auto it = s.find(5);
    if (it != s.end()) {
        std::cout << "Found: " << *it << std::endl;
    }
    
    // count：set 中只返回 0 或 1
    std::cout << "count(5): " << s.count(5) << std::endl;   // 1
    std::cout << "count(6): " << s.count(6) << std::endl;   // 0
    
    // contains (C++20)
    if (s.contains(7)) {
        std::cout << "7 exists" << std::endl;
    }
    
    // lower_bound：第一个 >= key 的元素
    auto lb = s.lower_bound(5);
    std::cout << "lower_bound(5): " << *lb << std::endl;  // 5
    
    // upper_bound：第一个 > key 的元素
    auto ub = s.upper_bound(5);
    std::cout << "upper_bound(5): " << *ub << std::endl;  // 7
    
    // equal_range：返回 [lower_bound, upper_bound)
    auto [first, last] = s.equal_range(5);
    std::cout << "equal_range(5): ";
    for (auto it = first; it != last; ++it) {
        std::cout << *it << " ";  // 5
    }
    std::cout << std::endl;
    
    return 0;
}

multiset —— 允许重复的集合

标准库还提供了 multiset，允许存储重复元素：

#include <iostream>
#include <set>

int main() {
    std::multiset<int> ms = {3, 1, 4, 1, 5, 9, 2, 6, 5, 3};
    
    std::cout << "multiset: ";
    for (int x : ms) {
        std::cout << x << " ";  // 1 1 2 3 3 4 5 5 6 9
    }
    std::cout << std::endl;
    
    // count 返回重复次数
    std::cout << "count(1): " << ms.count(1) << std::endl;  // 2
    std::cout << "count(3): " << ms.count(3) << std::endl;  // 2
    
    // erase 删除所有值为 x 的元素
    ms.erase(1);  // 删除所有 1
    
    // 只删除一个：先 find 再 erase
    auto it = ms.find(3);
    if (it != ms.end()) {
        ms.erase(it);  // 只删除一个 3
    }
    
    // equal_range 获取某个值的所有出现位置
    auto [first, last] = ms.equal_range(5);
    std::cout << "All 5s: ";
    for (auto i = first; i != last; ++i) {
        std::cout << *i << " ";  // 5 5
    }
    std::cout << std::endl;
    
    return 0;
}

自定义类型的 set

set —— 需要定义 `<` 运算符

#include <iostream>
#include <set>
#include <string>

struct Student {
    std::string name;
    int score;
    
    bool operator<(const Student& other) const {
        // 按分数降序排列（注意：返回 this > other 时用 less 排序是降序）
        if (score != other.score) return score > other.score;
        return name < other.name;
    }
};

int main() {
    std::set<Student> students;
    students.insert({"Alice", 95});
    students.insert({"Bob", 87});
    students.insert({"Charlie", 95});
    students.insert({"Alice", 95});  // 重复，不会插入
    
    for (const auto& s : students) {
        std::cout << s.name << ": " << s.score << std::endl;
    }
    // Alice: 95
    // Charlie: 95
    // Bob: 87
    
    return 0;
}

unordered_set —— 需要定义 `==` 和哈希函数

#include <iostream>
#include <unordered_set>
#include <string>

struct Student {
    std::string name;
    int score;
    
    bool operator==(const Student& other) const {
        return name == other.name && score == other.score;
    }
};

struct StudentHash {
    size_t operator()(const Student& s) const {
        size_t h1 = std::hash<std::string>{}(s.name);
        size_t h2 = std::hash<int>{}(s.score);
        return h1 ^ (h2 << 1);
    }
};

int main() {
    std::unordered_set<Student, StudentHash> students;
    students.insert({"Alice", 95});
    students.insert({"Bob", 87});
    
    std::cout << "size: " << students.size() << std::endl;  // 2
    
    return 0;
}

实际应用场景

1. 数组去重

#include <iostream>
#include <vector>
#include <set>

int main() {
    std::vector<int> arr = {3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5};
    
    // 方式1：转 set 去重（会排序）
    std::set<int> unique_sorted(arr.begin(), arr.end());
    std::vector<int> result(unique_sorted.begin(), unique_sorted.end());
    
    std::cout << "Sorted unique: ";
    for (int x : result) {
        std::cout << x << " ";  // 1 2 3 4 5 6 9
    }
    std::cout << std::endl;
    
    // 方式2：保持原始顺序的去重（用 unordered_set 辅助）
    std::vector<int> arr2 = {3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5};
    std::unordered_set<int> seen;
    std::vector<int> ordered_unique;
    for (int x : arr2) {
        if (seen.insert(x).second) {  // insert 成功才加入
            ordered_unique.push_back(x);
        }
    }
    
    std::cout << "Original order unique: ";
    for (int x : ordered_unique) {
        std::cout << x << " ";  // 3 1 4 5 9 2 6
    }
    std::cout << std::endl;
    
    return 0;
}

2. 两个数组的交集、并集、差集

#include <iostream>
#include <vector>
#include <set>
#include <algorithm>

int main() {
    std::vector<int> a = {1, 2, 3, 4, 5};
    std::vector<int> b = {3, 4, 5, 6, 7};
    
    // 转为有序集合
    std::set<int> sa(a.begin(), a.end());
    std::set<int> sb(b.begin(), b.end());
    
    // 交集
    std::vector<int> intersection;
    std::set_intersection(sa.begin(), sa.end(), sb.begin(), sb.end(),
                          std::back_inserter(intersection));
    std::cout << "Intersection: ";
    for (int x : intersection) std::cout << x << " ";  // 3 4 5
    std::cout << std::endl;
    
    // 并集
    std::vector<int> union_set;
    std::set_union(sa.begin(), sa.end(), sb.begin(), sb.end(),
                   std::back_inserter(union_set));
    std::cout << "Union: ";
    for (int x : union_set) std::cout << x << " ";  // 1 2 3 4 5 6 7
    std::cout << std::endl;
    
    // 差集（a - b）
    std::vector<int> difference;
    std::set_difference(sa.begin(), sa.end(), sb.begin(), sb.end(),
                        std::back_inserter(difference));
    std::cout << "Difference (a-b): ";
    for (int x : difference) std::cout << x << " ";  // 1 2
    std::cout << std::endl;
    
    return 0;
}

3. 判断元素是否出现过（出现检测）

#include <iostream>
#include <unordered_set>
#include <vector>
#include <string>

// 找到第一个重复的元素
int findFirstDuplicate(const std::vector<int>& nums) {
    std::unordered_set<int> seen;
    for (int x : nums) {
        if (seen.count(x)) return x;  // 之前出现过
        seen.insert(x);
    }
    return -1;  // 没有重复
}

int main() {
    std::vector<int> nums = {2, 5, 1, 3, 5, 2, 4};
    std::cout << "First duplicate: " << findFirstDuplicate(nums) << std::endl;  // 5
    
    return 0;
}

4. 统计不同元素的数量

#include <iostream>
#include <unordered_set>
#include <vector>

int countDistinct(const std::vector<int>& nums) {
    return std::unordered_set<int>(nums.begin(), nums.end()).size();
}

int main() {
    std::vector<int> nums = {1, 2, 3, 2, 1, 4, 5, 4, 3, 2, 1};
    std::cout << "Distinct count: " << countDistinct(nums) << std::endl;  // 5
    return 0;
}

set 的特殊用法

有序集合的区间查询

set 基于 lower_bound 和 upper_bound 可以做高效的区间查询：

#include <iostream>
#include <set>

int main() {
    std::set<int> s = {1, 3, 5, 7, 9, 11, 13};
    
    // 查询 [lower, upper] 闭区间内的元素
    int lower = 4, upper = 10;
    auto start = s.lower_bound(lower);  // 第一个 >= 4
    auto end = s.upper_bound(upper);    // 第一个 > 10
    
    std::cout << "Elements in [" << lower << ", " << upper << "]: ";
    for (auto it = start; it != end; ++it) {
        std::cout << *it << " ";  // 5 7 9
    }
    std::cout << std::endl;
    
    return 0;
}

用 set 维护有序状态

#include <iostream>
#include <set>

int main() {
    // 实时维护中位数（简化版）
    std::multiset<int> data;
    
    auto add = [&](int x) { data.insert(x); };
    
    // 查询中位数
    auto median = [&]() -> double {
        auto mid = data.begin();
        std::advance(mid, data.size() / 2);
        if (data.size() % 2 == 1) return *mid;
        auto prev = std::prev(mid);
        return (*prev + *mid) / 2.0;
    };
    
    add(5);
    add(2);
    add(8);
    add(1);
    add(9);
    
    std::cout << "Median: " << median() << std::endl;  // 5
    
    add(3);
    std::cout << "Median: " << median() << std::endl;  // 4.0
    
    return 0;
}

常见陷阱

1. set 中修改元素需要先删后插

std::set<int> s = {1, 3, 5, 7};

// ❌ 不能直接修改元素
// *s.begin() = 10;  // 编译错误！set 的元素是 const

// ✅ 先删后插
s.erase(1);
s.insert(10);

2. set 的 iterator 是只读的

std::set<int> s = {1, 2, 3};

// set<int>::iterator 指向 const int
// 不能通过迭代器修改值
for (auto it = s.begin(); it != s.end(); ++it) {
    // *it = 100;  // ❌ 编译错误
}

3. unordered_set 遍历顺序不固定

std::unordered_set<int> s = {1, 2, 3, 4, 5};

// ⚠️ 不要依赖遍历顺序！
// 不同编译器、不同运行，顺序可能不同
// 同一个程序多次运行也可能不同（C++11 之后标准不再保证）
for (int x : s) {
    std::cout << x << " ";
}

4. 使用 pair 的 set

#include <iostream>
#include <set>

int main() {
    std::set<std::pair<int, int>> s;
    s.insert({1, 2});
    s.insert({1, 3});
    s.insert({2, 1});
    
    // pair 默认先比 first，再比 second
    for (auto& [a, b] : s) {
        std::cout << "(" << a << ", " << b << ") ";
    }
    // (1, 2) (1, 3) (2, 1)
    std::cout << std::endl;
    
    // 查找某个 key 对应的所有 value
    auto start = s.lower_bound({1, INT_MIN});
    auto end = s.upper_bound({1, INT_MAX});
    std::cout << "Key=1: ";
    for (auto it = start; it != end; ++it) {
        std::cout << "(" << it->first << "," << it->second << ") ";
    }
    std::cout << std::endl;
    
    return 0;
}

总结

场景	推荐
需要自动去重	`set` 或 `unordered_set`
需要有序遍历	`set`
只关心存在性，不在乎顺序	`unordered_set`
需要区间查询	`set`（lower_bound/upper_bound）
允许重复元素	`multiset`
自定义类型	`set` 重载 `<`，`unordered_set` 需要哈希

set 系列容器是去重和存在性检测的首选工具，unordered_set 在不需要有序性时性能更优。理解两者的底层差异和各自陷阱，能帮你写出更高效、更正确的代码。