概述
set 和 unordered_set 是 C++ STL 中的集合容器,与 map 的区别在于它们只存储 key,不存储 value。它们的核心功能是:
- 自动去重:相同的元素只会存在一份
- 快速查找:判断元素是否存在
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
| #include <iostream> #include <set> #include <unordered_set>
int main() { std::set<int> s = {3, 1, 4, 1, 5, 9, 2, 6, 5}; std::cout << "set: "; for (int x : s) { std::cout << x << " "; } std::cout << std::endl; std::cout << "size: " << s.size() << std::endl; std::unordered_set<int> us = {3, 1, 4, 1, 5, 9, 2, 6, 5}; std::cout << "unordered_set size: " << us.size() << std::endl; return 0; }
|
set vs unordered_set vs map 的关系
可以简单理解为:
| 容器 |
底层实现 |
元素类型 |
有序 |
查找复杂度 |
set |
红黑树 |
key only |
✅ |
O(log n) |
unordered_set |
哈希表 |
key only |
❌ |
O(1) 平均 |
map |
红黑树 |
key-value |
✅ |
O(log n) |
unordered_map |
哈希表 |
key-value |
❌ |
O(1) 平均 |
本质上 set<T> 就是 map<T, void> 的简化版本,底层结构相同,只是去掉了 value 部分。
常用操作
插入与删除
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
| #include <iostream> #include <set>
int main() { std::set<int> s; s.insert(3); s.insert(1); s.insert(5); s.insert(1); auto [it, ok] = s.insert(2); if (ok) { std::cout << "Inserted 2" << std::endl; } auto [it2, ok2] = s.insert(1); if (!ok2) { std::cout << "1 already exists, value: " << *it2 << std::endl; } s.emplace(4); s.emplace_hint(s.end(), 6); s.erase(1); auto it3 = s.find(3); s.erase(it3); size_t cnt = s.erase(999); s.clear(); return 0; }
|
查找操作
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
| #include <iostream> #include <set>
int main() { std::set<int> s = {1, 3, 5, 7, 9}; auto it = s.find(5); if (it != s.end()) { std::cout << "Found: " << *it << std::endl; } std::cout << "count(5): " << s.count(5) << std::endl; std::cout << "count(6): " << s.count(6) << std::endl; if (s.contains(7)) { std::cout << "7 exists" << std::endl; } auto lb = s.lower_bound(5); std::cout << "lower_bound(5): " << *lb << std::endl; auto ub = s.upper_bound(5); std::cout << "upper_bound(5): " << *ub << std::endl; auto [first, last] = s.equal_range(5); std::cout << "equal_range(5): "; for (auto it = first; it != last; ++it) { std::cout << *it << " "; } std::cout << std::endl; return 0; }
|
multiset —— 允许重复的集合
标准库还提供了 multiset,允许存储重复元素:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35
| #include <iostream> #include <set>
int main() { std::multiset<int> ms = {3, 1, 4, 1, 5, 9, 2, 6, 5, 3}; std::cout << "multiset: "; for (int x : ms) { std::cout << x << " "; } std::cout << std::endl; std::cout << "count(1): " << ms.count(1) << std::endl; std::cout << "count(3): " << ms.count(3) << std::endl; ms.erase(1); auto it = ms.find(3); if (it != ms.end()) { ms.erase(it); } auto [first, last] = ms.equal_range(5); std::cout << "All 5s: "; for (auto i = first; i != last; ++i) { std::cout << *i << " "; } std::cout << std::endl; return 0; }
|
自定义类型的 set
set —— 需要定义 < 运算符
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
| #include <iostream> #include <set> #include <string>
struct Student { std::string name; int score; bool operator<(const Student& other) const { if (score != other.score) return score > other.score; return name < other.name; } };
int main() { std::set<Student> students; students.insert({"Alice", 95}); students.insert({"Bob", 87}); students.insert({"Charlie", 95}); students.insert({"Alice", 95}); for (const auto& s : students) { std::cout << s.name << ": " << s.score << std::endl; } return 0; }
|
unordered_set —— 需要定义 == 和哈希函数
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
| #include <iostream> #include <unordered_set> #include <string>
struct Student { std::string name; int score; bool operator==(const Student& other) const { return name == other.name && score == other.score; } };
struct StudentHash { size_t operator()(const Student& s) const { size_t h1 = std::hash<std::string>{}(s.name); size_t h2 = std::hash<int>{}(s.score); return h1 ^ (h2 << 1); } };
int main() { std::unordered_set<Student, StudentHash> students; students.insert({"Alice", 95}); students.insert({"Bob", 87}); std::cout << "size: " << students.size() << std::endl; return 0; }
|
实际应用场景
1. 数组去重
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35
| #include <iostream> #include <vector> #include <set>
int main() { std::vector<int> arr = {3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5}; std::set<int> unique_sorted(arr.begin(), arr.end()); std::vector<int> result(unique_sorted.begin(), unique_sorted.end()); std::cout << "Sorted unique: "; for (int x : result) { std::cout << x << " "; } std::cout << std::endl; std::vector<int> arr2 = {3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5}; std::unordered_set<int> seen; std::vector<int> ordered_unique; for (int x : arr2) { if (seen.insert(x).second) { ordered_unique.push_back(x); } } std::cout << "Original order unique: "; for (int x : ordered_unique) { std::cout << x << " "; } std::cout << std::endl; return 0; }
|
2. 两个数组的交集、并集、差集
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
| #include <iostream> #include <vector> #include <set> #include <algorithm>
int main() { std::vector<int> a = {1, 2, 3, 4, 5}; std::vector<int> b = {3, 4, 5, 6, 7}; std::set<int> sa(a.begin(), a.end()); std::set<int> sb(b.begin(), b.end()); std::vector<int> intersection; std::set_intersection(sa.begin(), sa.end(), sb.begin(), sb.end(), std::back_inserter(intersection)); std::cout << "Intersection: "; for (int x : intersection) std::cout << x << " "; std::cout << std::endl; std::vector<int> union_set; std::set_union(sa.begin(), sa.end(), sb.begin(), sb.end(), std::back_inserter(union_set)); std::cout << "Union: "; for (int x : union_set) std::cout << x << " "; std::cout << std::endl; std::vector<int> difference; std::set_difference(sa.begin(), sa.end(), sb.begin(), sb.end(), std::back_inserter(difference)); std::cout << "Difference (a-b): "; for (int x : difference) std::cout << x << " "; std::cout << std::endl; return 0; }
|
3. 判断元素是否出现过(出现检测)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
| #include <iostream> #include <unordered_set> #include <vector> #include <string>
int findFirstDuplicate(const std::vector<int>& nums) { std::unordered_set<int> seen; for (int x : nums) { if (seen.count(x)) return x; seen.insert(x); } return -1; }
int main() { std::vector<int> nums = {2, 5, 1, 3, 5, 2, 4}; std::cout << "First duplicate: " << findFirstDuplicate(nums) << std::endl; return 0; }
|
4. 统计不同元素的数量
1 2 3 4 5 6 7 8 9 10 11 12 13
| #include <iostream> #include <unordered_set> #include <vector>
int countDistinct(const std::vector<int>& nums) { return std::unordered_set<int>(nums.begin(), nums.end()).size(); }
int main() { std::vector<int> nums = {1, 2, 3, 2, 1, 4, 5, 4, 3, 2, 1}; std::cout << "Distinct count: " << countDistinct(nums) << std::endl; return 0; }
|
set 的特殊用法
有序集合的区间查询
set 基于 lower_bound 和 upper_bound 可以做高效的区间查询:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
| #include <iostream> #include <set>
int main() { std::set<int> s = {1, 3, 5, 7, 9, 11, 13}; int lower = 4, upper = 10; auto start = s.lower_bound(lower); auto end = s.upper_bound(upper); std::cout << "Elements in [" << lower << ", " << upper << "]: "; for (auto it = start; it != end; ++it) { std::cout << *it << " "; } std::cout << std::endl; return 0; }
|
用 set 维护有序状态
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
| #include <iostream> #include <set>
int main() { std::multiset<int> data; auto add = [&](int x) { data.insert(x); }; auto median = [&]() -> double { auto mid = data.begin(); std::advance(mid, data.size() / 2); if (data.size() % 2 == 1) return *mid; auto prev = std::prev(mid); return (*prev + *mid) / 2.0; }; add(5); add(2); add(8); add(1); add(9); std::cout << "Median: " << median() << std::endl; add(3); std::cout << "Median: " << median() << std::endl; return 0; }
|
常见陷阱
1. set 中修改元素需要先删后插
1 2 3 4 5 6 7 8
| std::set<int> s = {1, 3, 5, 7};
s.erase(1); s.insert(10);
|
2. set 的 iterator 是只读的
1 2 3 4 5 6 7
| std::set<int> s = {1, 2, 3};
for (auto it = s.begin(); it != s.end(); ++it) { }
|
3. unordered_set 遍历顺序不固定
1 2 3 4 5 6 7 8
| std::unordered_set<int> s = {1, 2, 3, 4, 5};
for (int x : s) { std::cout << x << " "; }
|
4. 使用 pair 的 set
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
| #include <iostream> #include <set>
int main() { std::set<std::pair<int, int>> s; s.insert({1, 2}); s.insert({1, 3}); s.insert({2, 1}); for (auto& [a, b] : s) { std::cout << "(" << a << ", " << b << ") "; } std::cout << std::endl; auto start = s.lower_bound({1, INT_MIN}); auto end = s.upper_bound({1, INT_MAX}); std::cout << "Key=1: "; for (auto it = start; it != end; ++it) { std::cout << "(" << it->first << "," << it->second << ") "; } std::cout << std::endl; return 0; }
|
总结
| 场景 |
推荐 |
| 需要自动去重 |
set 或 unordered_set |
| 需要有序遍历 |
set |
| 只关心存在性,不在乎顺序 |
unordered_set |
| 需要区间查询 |
set(lower_bound/upper_bound) |
| 允许重复元素 |
multiset |
| 自定义类型 |
set 重载 <,unordered_set 需要哈希 |
set 系列容器是去重和存在性检测的首选工具,unordered_set 在不需要有序性时性能更优。理解两者的底层差异和各自陷阱,能帮你写出更高效、更正确的代码。