关于哈希表

1.哈希表的查找、插入及删除

1-1.存在重复元素：217

法一：排序

sort()的时间复杂度是$O(NlogN)$,空间复杂度为$O(logN)$，因为排序过程中需要递归调用栈空间。

class Solution {
public:
    bool containsDuplicate(vector<int>& nums) {
        sort(nums.begin(), nums.end());
        for (int i = 0; i < nums.size() - 1; i++) {
            if (nums[i] == nums[i + 1]) {
                return true;
            }
        }
        return false;
    }
};

法二：哈希表

时间和空间复杂度均为$O(N)$，set的find()时间复杂度是$O(1)$

class Solution {
public:
    bool containsDuplicate(vector<int>& nums) {
        unordered_set<int> s;
        for (int x : nums) {
            if (s.find(x) != s.end()) {
                return true;
            }
            s.insert(x);
        }
        return false;
    }
};

但该题实际上排序比哈希表的执行用时和消耗内存更小，所以时间复杂度不是唯一。哈希表要计算哈希值等，可以理解哈希表时间复杂度常数项比较大。常数不一样，hashset的常数开销大，所以对于非大量的数据来说，常数效应覆盖了时间复杂度的差距。如果把数据量放大，那么随着数据量的增加，hashset方式的时间是线性增加，sort是非线性增加，到一个阈值，则hashset的低时间复杂度的优势才能发挥出来。

1-2.两个数组的交集：349

法一：哈希表

如果使用哈希集合存储元素，则可以在 $O(1)$ 的时间内判断一个元素是否在集合中，从而降低时间复杂度。

class Solution {
public:
    vector<int> getSets(unordered_set<int>& set1, unordered_set<int>& set2) {
        if (set1.size() < set2.size()) {
            return getSets(set2, set1);
        }
        vector<int> ans;
        for (auto& num : set1) {
            if (set2.count(num)) { // set1中的num，set2也有
                ans.push_back(num);
            }
        }
        return ans;
    }
    vector<int> intersection(vector<int>& nums1, vector<int>& nums2) {
        unordered_set<int> set1, set2;
        for (int& x : nums1) {
            set1.insert(x);
        }
        for (int& x : nums2) {
            set2.insert(x);
        }
        return getSets(set1, set2);
    }
};

法二：排序+双指针

class Solution {
public:
    vector<int> intersection(vector<int>& nums1, vector<int>& nums2) {
        sort(nums1.begin(), nums1.end());
        sort(nums2.begin(), nums2.end());
        int index1 = 0, index2 = 0;
        vector<int> ans;
        while (index1 < nums1.size() && index2 < nums2.size()) {
            int num1 = nums1[index1], num2 = nums2[index2];
            if (num1 == num2) {
                // 确保加入的元素是唯一的
                if (!ans.size() || num1 != ans.back()) {
                    ans.emplace_back(num1);
                }
                ++index1;
                ++index2;
            } else if (num1 < num2) {
                ++index1;
            } else {
                ++index2;
            }
        }
        return ans;
    }
};

法三：直接使用set,省去排序的步骤。效率和法一差不多

class Solution {
public:
    vector<int> intersection(vector<int>& nums1, vector<int>& nums2) {
        set<int> s;
        for (auto& num : nums1) {
            s.insert(num);
        }
        set<int> intersectionSet;
        for (auto& num : nums2) {
            if (s.find(num) != s.end()) {
                if (intersectionSet.find(num) == intersectionSet.end()) { 
                    intersectionSet.insert(num);
                }
            }
        }
        vector<int> ans;
        for (set<int>::iterator it = intersectionSet.begin();
             it != intersectionSet.end(); it++) {
            ans.push_back(*it);
        }
        return ans;
    }
};

1-3.最长连续序列：128

法一：哈希表(该题也可以直接用set)

class Solution {
public:
    int longestConsecutive(vector<int>& nums) {
        unordered_set<int> s;
        for (int& num : nums) {
            s.insert(num);
        }
        int count = 0;
        for (const auto& num : s) {
            if (!s.count(num - 1)) { // 不存在num的前一个数字，重置
                int currentNum = num, currentCount = 1;
                while (s.count(currentNum + 1)) {
                    currentCount++;
                    currentNum++;
                }
                count = max(count, currentCount);
            }
        }
        return count;
    }
};

注意第9行，如果写作`int& num:s`则会报错，而使用`const int& num:s`/`auto& num:s`/`const auto& num:s`/`auto&& num:s`则不会报错。因为前者将尝试以非const引用的方式访问容器中的元素，而unordered_set中的元素是<mark>只读</mark>的。

Q:但是为什么`int& num:s`不正确，而`auto& num:s`就可以呢？

A:可参见[C++中的auto关键字](https://zhuanlan.zhihu.com/p/390608866)，这里auto就相当于T，而这里T相当于是const int.

1-4.快乐数：202

法一：哈希表

需要注意一点：如何解决无限循环这一问题。记录下每次的平方和，当出现重复则说明进入无限循环。

class Solution {
public:
    bool isHappy(int n) {
        unordered_set<int> numbers;
        while (!numbers.count(n)) { // n没有出现在平方和记录中过
            int num = n;
            numbers.insert(num);
            n = 0;
            while (num) {
                int i = num % 10;
                num /= 10;
                n += i * i;
            }
        }
        return n == 1;
    }
};

法二：快慢指针（空间复杂度更低，时间复杂度相同，但是实际速度更慢一点）

每次获取下一个平方和的操作，可以认为是在构造一个隐式链表的下一个节点。检测是否进入循环，相当于检测一个链表是否有环，因此可以使用Floyd循环查找算法。

slow每次前进一个节点，fast每次前进两次，这样它们最终将在循环中相遇，while循环中止条件其一即为slow==fast。

class Solution {
public:
    int getNext(int n) {
        int sum = 0;
        while (n) {
            int i = n % 10;
            n /= 10;
            sum += i * i;
        }
        return sum;
    }
    bool isHappy(int n) {
        int slow = n, fast = getNext(n);
        while (fast != 1 && slow != fast) {
            slow = getNext(slow);
            fast = getNext(getNext(fast));
        }
        return fast == 1;
    }
};

1-5.键盘行：500

法一：遍历

键盘字母转换为行号。2ms,7.82MB。

class Solution {
public:
    vector<string> findWords(vector<string>& words) {
        vector<string> ans;
        string rowIdx = "12210111011122000010020202";
        for (auto & word : words) {
            bool isValid = true;
            char idx = rowIdx[tolower(word[0]) - 'a'];
            for (int i = 1; i < word.size(); ++i) {
                if(rowIdx[tolower(word[i]) - 'a'] != idx) {
                    isValid = false;
                    break;
                }
            }
            if (isValid) {
                ans.emplace_back(word);
            }
        }
        return ans;
    }
};

法二：哈希表

4ms,8MB

class Solution {
public:
    vector<string> findWords(vector<string>& words) {
        unordered_set<char> s{'a', 's', 'd', 'f', 'g', 'h', 'j', 'k', 'l'};
        unordered_set<char> s2{'q', 'w', 'e', 'r', 't', 'y', 'u', 'i', 'o', 'p'};
        unordered_set<char> s3{'z', 'x', 'c', 'v', 'b', 'n', 'm'};
        int n = words.size();
        vector<string> ans;
        for (auto& word : words) {
            bool inS1 = true, inS2 = true, inS3 = true;
            for (auto& c : word) {
                char ch = tolower(c); // 记得转换成小写,注意tolower()返回的是int值
                // 或者写作auto ch = char(tolower(c));
                if (inS1 && (s.find(ch) == s.end())) {
                    inS1 = false;
                }
                if (inS2 && (s2.find(ch) == s2.end())) {
                    inS2 = false;
                }
                if (inS3 && (s3.find(ch) == s3.end())) {
                    inS3 = false;
                }
            }
            if (inS1 || inS2 || inS3) {
                ans.push_back(word);
            }
        }
        return ans;
    }
};

法三：正则

41ms,20.8MB

class Solution {
public:
    bool isValid(string& word) {
        std::regex pattern("^[QqWwEeRrTtYyUuIiOoPp]*$|^[AaSsDdFfGgHhJjKkLl]*$|^[ZzXxCcVvBbNnMm]*$");
        if (std::regex_match(word, pattern)) {
            return true;
        } else {
            return false;
        }
    }
    vector<string> findWords(vector<string>& words) {
        vector<string> ans;
        for (auto& word : words) {
            if (isValid(word)) {
                ans.emplace_back(word);
            }
        }
        return ans;
    }
};

chrome_oAb79KjUWh

1-6.单词规律：290

由于要记录string和char，所以需要使用map而不是set.

注意：pattern=”abba”,s=”dog dog dog dog”时，应该输出false.所以不止要记录char->string,还要记录string->char.

pattern=”aaa”,s=”aa aa aa aa”时，应该输出false.所以最终不能单纯返回true,而是返回长度比较。

官方解答：

class Solution {
public:
    bool wordPattern(string pattern, string str) {
        unordered_map<string, char> str2ch;
        unordered_map<char, string> ch2str;
        int m = str.length();
        int i = 0;
        for (auto ch : pattern) {
            if (i >= m) {
                return false;
            }
            int j = i;
            while (j < m && str[j] != ' ') j++;
            const string &tmp = str.substr(i, j - i);
            if (str2ch.count(tmp) && str2ch[tmp] != ch) {
                return false;
            }
            if (ch2str.count(ch) && ch2str[ch] != tmp) {
                return false;
            }
            str2ch[tmp] = ch;
            ch2str[ch] = tmp;
            i = j + 1;
        }
        return i >= m;
    }
};

类似题：同构字符串205（更简单，因为都是一对一字符且无空格）

class Solution {
public:
    bool isIsomorphic(string s, string t) {
        if (s.length() != t.length()) {
            return false;
        }
        unordered_map<char, char> s2t;
        unordered_map<char, char> t2s;
        int n = s.length();
        for (int i = 0; i < n; i++) {
            char ch = s[i], ch2 = t[i];
            if (s2t.count(ch) && s2t[ch] != ch2) {
                return false;
            }
            if (t2s.count(ch2) && t2s[ch2] != ch) {
                return false;
            }
            s2t[ch] = ch2;
            t2s[ch2] = ch;
        }
        return true;
    }
};

1-7.数组中的k-diff数对：532

法一：哈希表

res放入的是每次数对中较小的那个数，unordered_set可以起到去重的一个作用，这样可以很容易地处理k=0时的情况。multiset记录nums/常数res记录最终答案，都在处理k=0时比较复杂。

class Solution {
public:
    int findPairs(vector<int>& nums, int k) {
        unordered_set<int> visited;
        unordered_set<int> res;
        for (int num : nums) {
            if (visited.count(num - k)) {
                res.emplace(num - k);
            }
            if (visited.count(num + k)) {
                res.emplace(num);
            }
            visited.emplace(num);
        }
        return res.size();
    }
};

法二：数组排序+双指针

class Solution {
public:
    int findPairs(vector<int>& nums, int k) {
        sort(nums.begin(), nums.end());
        int n = nums.size(), res = 0;
        for (int i = 0, j = 0; i < n && j < n; i++) {
            if (i == 0 || nums[i - 1] != nums[i]) {
                while (j < n && (nums[j] < nums[i] + k || j <= i)) {
                    j++;
                }
                if (j < n && nums[j] == nums[i] + k) {
                    res++;
                }
            }
        }
        return res;
    }
};

1-8.统计重复个数：466（困难）

法一：暴力法（超时）

class Solution {
public:
    int getMaxRepetitions(string s1, int n1, string s2, int n2) {
        int index = 0, repeat_count = 0;
        int s1_size = s1.size(), s2_size = s2.size();
        for (int i = 0; i < n1; i++) {
            for (int j = 0; j < s1_size; j++) {
                if (s1[j] == s2[index]) {
                    index++;
                }
                if (index == s2_size) {
                    index = 0;
                    repeat_count++;
                }
            }
        }
        return repeat_count / n2;
    }
};

法二：找出循环节

详见官方视频讲解。使用unordered_map<int,pair<int,int>>>

class Solution {
public:
    int getMaxRepetitions(string s1, int n1, string s2, int n2) {
        if (n1 == 0) {
            return 0;
        }
        int s1cnt = 0, index = 0, s2cnt = 0;
        // recall 是我们用来找循环节的变量，它是一个哈希映射
        // 我们如何找循环节？假设我们遍历了 s1cnt 个 s1，此时匹配到了第 s2cnt 个 s2 中的第 index 个字符
        // 如果我们之前遍历了 s1cnt' 个 s1 时，匹配到的是第 s2cnt' 个 s2 中同样的第 index 个字符，那么就有循环节了
        // 我们用 (s1cnt', s2cnt', index) 和 (s1cnt, s2cnt, index) 表示两次包含相同 index 的匹配结果
        // 那么哈希映射中的键就是 index，值就是 (s1cnt', s2cnt') 这个二元组
        // 循环节就是；
        //    - 前 s1cnt' 个 s1 包含了 s2cnt' 个 s2
        //    - 以后的每 (s1cnt - s1cnt') 个 s1 包含了 (s2cnt - s2cnt') 个 s2
        // 那么还会剩下 (n1 - s1cnt') % (s1cnt - s1cnt') 个 s1, 我们对这些与 s2 进行暴力匹配
        // 注意 s2 要从第 index 个字符开始匹配
        unordered_map<int, pair<int, int>> recall;
        pair<int, int> pre_loop, in_loop;
        while (true) {
            // 我们多遍历一个 s1，看看能不能找到循环节
            ++s1cnt;
            for (char ch: s1) {
                if (ch == s2[index]) {
                    index += 1;
                    if (index == s2.size()) {
                        ++s2cnt;
                        index = 0;
                    }
                }
            }
            // 还没有找到循环节，所有的 s1 就用完了
            if (s1cnt == n1) {
                return s2cnt / n2;
            }
            // 出现了之前的 index，表示找到了循环节
            if (recall.count(index)) {
                auto [s1cnt_prime, s2cnt_prime] = recall[index];
                // 前 s1cnt' 个 s1 包含了 s2cnt' 个 s2
                pre_loop = {s1cnt_prime, s2cnt_prime};
                // 以后的每 (s1cnt - s1cnt') 个 s1 包含了 (s2cnt - s2cnt') 个 s2
                in_loop = {s1cnt - s1cnt_prime, s2cnt - s2cnt_prime};
                break;
            } else {
                recall[index] = {s1cnt, s2cnt};
            }
        }
        // ans 存储的是 S1 包含的 s2 的数量，考虑的之前的 pre_loop 和 in_loop
        int ans = pre_loop.second + (n1 - pre_loop.first) / in_loop.first * in_loop.second;
        // S1 的末尾还剩下一些 s1，我们暴力进行匹配
        int rest = (n1 - pre_loop.first) % in_loop.first;
        for (int i = 0; i < rest; ++i) {
            for (char ch: s1) {
                if (ch == s2[index]) {
                    ++index;
                    if (index == s2.size()) {
                        ++ans;
                        index = 0;
                    }
                }
            }
        }
        // S1 包含 ans 个 s2，那么就包含 ans / n2 个 S2
        return ans / n2;
    }
};

1-9.随机链表的复制：138

注意题干要求，返回的是原链表的深拷贝，不能直接return head;

深拷贝与浅拷贝的区别：

当出现类的等号赋值时，会调用拷贝函数，在未定义显示拷贝构造函数的情况下，系统会调用**默认的拷贝函数－即浅拷贝**，它能够完成成员的一一复制。当数据成员中<u>没有指针</u>时，浅拷贝是可行的。

但当数据成员中有指针时，如果采用简单的浅拷贝，则两类中的两个指针指向同一个地址，当对象快要结束时，会调用两次析构函数，而导致**野指针**的问题。

所以，这时必需采用深拷贝。深拷贝与浅拷贝之间的区别就在于深拷贝会在堆内存中**另外申请空间来存储数据**，从而也就解决来野指针的问题。简而言之，当数据成员中**有指针**时，必须要用深拷贝更加安全。

法一：回溯+哈希表+递归

/*
// Definition for a Node.
class Node {
public:
    int val;
    Node* next;
    Node* random;

    Node(int _val) {
        val = _val;
        next = NULL;
        random = NULL;
    }
};
*/

class Solution {
public:
    unordered_map<Node*, Node*> cachedNode;
    Node* copyRandomList(Node* head) {
        if (head == nullptr) {
            return nullptr;
        }
        if (!cachedNode.count(head)) {
            Node* temp = new Node(head->val);
            cachedNode[head] = temp;
            temp->next = copyRandomList(head->next);
            temp->random = copyRandomList(head->random);
        }
        return cachedNode[head];
    }
};

法二：迭代+节点拆分

详见官方图解。简而言之是将A->B->C拆分为A->A’->B->B’->C->C’，再变成A’->B’->C’。

class Solution {
public:
    Node* copyRandomList(Node* head) {
        if (head == nullptr) {
            return nullptr;
        }
        for (Node* node = head; node != nullptr; node = node->next->next) {
            Node* temp = new Node(node->val);
            temp->next = node->next;
            node->next = temp;
        }
        for (Node* node = head; node != nullptr; node = node->next->next) {
            Node* temp = node->next;
            temp->random = node->random ? node->random->next : nullptr;
        }
        Node* headNew = head->next;
        for (Node* node = head; node; node = node->next) {
            Node* temp = node->next;
            node->next = node->next->next;
            temp->next = temp->next ? temp->next->next : nullptr;
        }
        return headNew;
    }
};

1-10.分数到小数：166

参考《2.1字符串篇(2)》。进阶：无需哈希 + 适用于任意进制的分数到小数—leetcode

2.哈希表与索引

2-1.两数之和：1

class Solution {
public:
    vector<int> twoSum(vector<int>& nums, int target) {
        unordered_map<int, int> hashtable;
        for (int i = 0; i < nums.size(); i++) {
            auto it = hashtable.find(target - nums[i]);
            if (it != hashtable.end()) {
                return {it->second, i};
            }
            // 没有找到，则插入该数字
            hashtable[nums[i]] = i;
        }
        return {}; // 必须要有一个返回内容
    }
};

哈希表的find()函数返回的是迭代器类型。

2-2.两数之和II—输入有序数组：167

该题要求空间复杂度为$O(1)$，所以不能使用哈希表。

法一：二分查找

class Solution {
public:
    vector<int> twoSum(vector<int>& numbers, int target) {
        for (int i = 0; i < numbers.size(); ++i) {
            int low = i + 1, high = numbers.size() - 1;
            while (low <= high) {
                int mid = (high - low) / 2 + low;
                if (numbers[mid] == target - numbers[i]) {
                    return {i + 1, mid + 1};
                } else if (numbers[mid] > target - numbers[i]) {
                    high = mid - 1;
                } else {
                    low = mid + 1;
                }
            }
        }
        return {-1, -1};
    }
};

法二：双指针（最优）

class Solution {
public:
    vector<int> twoSum(vector<int>& numbers, int target) {
        int low = 0, high = numbers.size() - 1;
        while (numbers[low] + numbers[high] != target && low < high) {
            if (numbers[low] + numbers[high] < target) {
                low++;
            } else {
                high--;
            }
        }
        return {low + 1, high + 1};
    }
};

2-3.存在重复元素II：219

法一：哈希表

class Solution {
public:
    bool containsNearbyDuplicate(vector<int>& nums, int k) {
        unordered_map<int, int> index; //<值num，索引i>
        for (int i = 0; i < nums.size(); i++) {
            if (index.count(nums[i]) && i - index[nums[i]] <= k) {
                return true;
            }
            index[nums[i]] = i;
        }
        return false;
    }
};

法二：滑动窗口

class Solution {
public:
    bool containsNearbyDuplicate(vector<int>& nums, int k) {
        unordered_set<int> s;
        for (int i = 0; i < nums.size(); i++) {
            if (i > k) {
                s.erase(nums[i - k - 1]);
            }
            if (s.count(nums[i])) {
                return true;
            }
            s.emplace(nums[i]);
        }
        return false;
    }
};

2-4.存在重复元素III：220（困难）

法一：滑动窗口+有序集合

class Solution {
public:
    bool containsNearbyAlmostDuplicate(vector<int>& nums, int indexDiff, int valueDiff) {
        set<int> s;
        for (int i = 0; i < nums.size(); i++) {
            int leftBoundary = max(nums[i], INT_MIN + valueDiff) - valueDiff;
            int rightBoundary = min(nums[i], INT_MAX - valueDiff) + valueDiff;
            auto it = s.lower_bound(leftBoundary);
            if (it != s.end() && *it <= rightBoundary) {
                return true;
            }
            s.insert(nums[i]);
            if (i >= indexDiff) {
                s.erase(nums[i - indexDiff]);
            }
        }
        return false;
    }
};

`s.lower_bound(leftBoundary)`将返回第一个>=leftBoundary的元素的迭代器，如果未找到，则返回一个指向末尾的迭代器。

法二：滑动窗口+哈希表+桶排序（最优解）

使用set时，插入或者删除一次元素的时间复杂度为$O(log(min(n,IndexDiff)))$，哈希表＋桶排序则为$O(1)$。

class Solution {
public:
    int getIndex(int x, long w) { 
        return x < 0 ? (x + 1ll) / w - 1 : x / w; 
    }

    bool containsNearbyAlmostDuplicate(vector<int>& nums, int indexDiff, int valueDiff) {
        unordered_map<int, int> mp;
        for (int i = 0; i < nums.size(); i++) {
            long x = nums[i]; // long存储避免溢出
            int index = getIndex(x, valueDiff + 1ll);
            if (mp.count(index) ||
                mp.count(index - 1) && abs(x - mp[index - 1]) <= valueDiff ||
                mp.count(index + 1) && abs(x - mp[index + 1]) <= valueDiff) {
                return true;
            }
            mp[index] = x;
            if (i >= indexDiff) {
                mp.erase(getIndex(nums[i - indexDiff], valueDiff + 1ll));
            }
        }
        return false;
    }
};