The firsrt hashing function is a standard string hashing together coupled with sorting of the string elements to get rid of ambiguity: we want our hash to return the same value independently on the order of the characters in the string. This approach essentially requires copying of the string because we cannot mutate the original strings since we need them later to construct the output.
The second approach is hasing based on prime numbers. Basically, we encode each unique char with a prime number. This approach does not require sorting and, hence, sorting of the string.
class Solution {
public:
// standard hashing approach
// 52ms
const size_t p = 27;
const size_t M = 1e9 + 7;
size_t getHash_1(string s) {
sort(s.begin(), s.end());
size_t h = 0;
size_t pp = 1;
for (int i = 0; i < s.size(); ++i) {
h = (h + (pp*(s[i] - 'a'))%M)%M;
pp = (pp * p)%M;
}
return h;
}
// encode by multiplication of primes
// 24ms
const array<int, 26> prime = {2, 3, 5, 7, 11, 13, 17, 19, 23, 29,
31, 41, 43, 47, 53, 59, 61, 67, 71, 73,
79, 83, 89, 97, 101, 103};
size_t getHash_2(string& s) {
size_t h = 1;
for (int i = 0; i < s.size(); ++i) {
h = (h*prime[s[i] - 'a'])%M;
}
return h;
}
vector<vector<string>> groupAnagrams(vector<string>& strs) {
vector<vector<string>> res;
unordered_map<size_t, int> hash2bucket;
int maxBucket = 0;
for (int i = 0; i < strs.size(); ++i) {
size_t h = getHash_2(strs[i]);
if (!hash2bucket.count(h)) {
hash2bucket.emplace(h, maxBucket);
++maxBucket;
res.emplace_back(vector<string>());
}
res[ hash2bucket[h] ].emplace_back(strs[i]);
}
return res;
}
};
PS If in the line 41
I use move(strs[i]), the program takes 40ms. So copying of the string in this place works slower than moving? I don't get it.