Created
May 31, 2025 06:09
-
-
Save komakai/69a425520add36823412498f8c9989b6 to your computer and use it in GitHub Desktop.
Super Strings
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
template <int N> class intModuloN { | |
public: | |
intModuloN(unsigned long i = 0) { | |
this->n = i % (unsigned long)N; | |
} | |
intModuloN<N>& operator+=(const intModuloN<N>& rhs) { | |
n = ((unsigned long)n + (unsigned long)rhs.n) % (unsigned long)N; | |
return *this; | |
} | |
intModuloN<N>& operator++() { | |
n = ((long)n + 1) % (unsigned long)N; | |
return *this; | |
} | |
intModuloN<N>& operator--() { | |
n = ((long)n + N - 1) % (unsigned long)N; | |
return *this; | |
} | |
explicit operator int() const { | |
return n; | |
} | |
private: | |
int n; | |
}; | |
template <int N> | |
intModuloN<N> operator+(const intModuloN<N>& lhs, const intModuloN<N>& rhs) { | |
return intModuloN<N>((unsigned long)(int)lhs + (unsigned long)(int)rhs); | |
} | |
template <int N> | |
intModuloN<N> operator*(const intModuloN<N>& lhs, const intModuloN<N>& rhs) { | |
return intModuloN<N>((unsigned long)(int)lhs * (unsigned long)(int)rhs); | |
} | |
typedef string::size_type strsize; | |
typedef pair<strsize, strsize> valLenPair; | |
const strsize MIN_RUN_CANDIDATE = 50; | |
class RedBlackTreeSet { | |
public: | |
RedBlackTreeSet(const string& s): s(s) { | |
len = s.length(); | |
root = NIL = new Node(); | |
NIL->parent = NIL->left = NIL->right = NIL; | |
ordered.resize(len); | |
for (strsize i = 0; i < len; ++i) { | |
Node* node = insert(i); | |
ordered[i] = node; | |
} | |
} | |
~RedBlackTreeSet() { | |
destroyTree(root); | |
delete NIL; | |
} | |
private: | |
enum class Color { RED, BLACK }; | |
struct Node { | |
valLenPair data; | |
Color color; | |
Node* parent; | |
Node* left; | |
Node* right; | |
Node*& getSubNode(bool isLeft) { return isLeft ? left : right; } | |
Node() : data(0, 0), color(Color::BLACK), parent(nullptr), left(nullptr), right(nullptr) {} | |
Node(const strsize& data) : data(data, 0), color(Color::RED), parent(nullptr), left(nullptr), right(nullptr) {} | |
}; | |
struct Run { | |
strsize start; | |
strsize end; | |
strsize offset; | |
string p; | |
}; | |
vector<Run> runCache; | |
public: | |
Node* insert(const strsize& data) { | |
Node *y = NIL, *x = root; | |
pair<int, strsize> cmpRes = make_pair(0, 0); | |
strsize upperLimitCmpLen = 0, lowerLimitCmpLen = 0; | |
Node *upperLimit = NIL; | |
bool upperLimitSet = false, lowerLimitSet = false; | |
strsize lastA = 0; | |
while (x != NIL) { | |
y = x; | |
cmpRes = compare(x->data.first, data); | |
lastA = x->data.first; | |
if (cmpRes.first > 0) { | |
upperLimitCmpLen = cmpRes.second; | |
upperLimit = x; | |
upperLimitSet = true; | |
} else if (cmpRes.first < 0) { | |
lowerLimitCmpLen = cmpRes.second; | |
lowerLimitSet = true; | |
} else { | |
return nullptr; | |
} | |
x = x->getSubNode(cmpRes.first > 0); | |
} | |
Node* z = new Node(data); | |
z->left = z->right = NIL; | |
if (upperLimitSet) { | |
upperLimit->data.second = upperLimitCmpLen; | |
} | |
if (lowerLimitSet) { | |
z->data.second = lowerLimitCmpLen; | |
} | |
z->parent = y; | |
if (y == NIL) { | |
root = z; | |
} else { | |
if (lastA != y->data.first || data != z->data.first) { | |
compare(y->data.first, z->data.first); | |
} | |
y->getSubNode(cmpRes.first > 0) = z; | |
} | |
insertFixup(z); | |
return z; | |
} | |
strsize operator[](int index) const { | |
return ordered[index] != nullptr ? ordered[index]->data.second : len; | |
} | |
private: | |
const string& s; | |
strsize len; | |
vector<Node*> ordered; | |
Node* root; | |
Node* NIL; | |
void rotate(Node* x, bool isLeft = true) { | |
Node* y = x->getSubNode(!isLeft); | |
x->getSubNode(!isLeft) = y->getSubNode(isLeft); | |
if (y->getSubNode(isLeft) != NIL) { | |
y->getSubNode(isLeft)->parent = x; | |
} | |
y->parent = x->parent; | |
if (x->parent == NIL) { | |
root = y; | |
} else { | |
x->parent->getSubNode(x == x->parent->getSubNode(true)) = y; | |
} | |
y->getSubNode(isLeft) = x; | |
x->parent = y; | |
} | |
void insertFixup(Node* z) { | |
while (z->parent->color == Color::RED) { | |
bool isLeft = (z->parent == z->parent->parent->left); | |
Node* y = z->parent->parent->getSubNode(!isLeft); | |
if (y->color == Color::RED) { | |
z->parent->color = Color::BLACK; | |
y->color = Color::BLACK; | |
z->parent->parent->color = Color::RED; | |
z = z->parent->parent; | |
} else { | |
if (z == z->parent->getSubNode(!isLeft)) { | |
z = z->parent; | |
rotate(z, isLeft); | |
} | |
z->parent->color = Color::BLACK; | |
z->parent->parent->color = Color::RED; | |
rotate(z->parent->parent, !isLeft); | |
} | |
} | |
root->color = Color::BLACK; | |
} | |
Run* getRun(strsize x) { | |
for (int index = 0; index < runCache.size(); ++index) { | |
if (runCache[index].start <=x && x<= runCache[index].end - 1) { | |
return &runCache[index]; | |
} | |
} | |
return nullptr; | |
} | |
strsize extendRunUpper(string& p, strsize offset, strsize x) { | |
strsize ret = x + 1; | |
strsize pIndex = (offset + 1) % p.length(); | |
while (ret < len && s[ret] == p[pIndex] ) { | |
ret++; | |
pIndex = (pIndex + 1) % p.length(); | |
} | |
return ret; | |
} | |
pair<int, strsize> compare(strsize a, strsize b) { | |
strsize cmp_len = s.length() - max(a, b); | |
auto lhs = s.data() + a; | |
auto rhs = s.data() + b; | |
strsize count = 0; | |
Run* aRun = getRun(a); | |
Run* bRun = (aRun != nullptr) ? getRun(b) : nullptr; | |
bool useCache = false; | |
if (aRun != nullptr && bRun != nullptr) { | |
if (aRun->p == bRun->p && (a - aRun->start + aRun->offset) % aRun->p.length() == (b - bRun->start + bRun->offset) % bRun->p.length()) { | |
useCache = true; | |
count = min(aRun->end - a, bRun->end - b); | |
lhs += count; | |
rhs += count; | |
} | |
} | |
while (count < cmp_len && *lhs == *rhs) { | |
++count; | |
++lhs; | |
++rhs; | |
} | |
if (!useCache && count >= MIN_RUN_CANDIDATE) { | |
if (abs((int)a - (int)b) < count / 2) { | |
Run run { | |
min(a, b), | |
min(a, b) + count, | |
0, | |
s.substr(min(a, b), abs((int)a - (int)b)) | |
}; | |
runCache.push_back(run); | |
} else { | |
strsize lower = min(a, b); | |
Run* run = getRun(lower); | |
if (run != nullptr) { | |
strsize start = max(a, b); | |
strsize offsetLower = (lower - run->start) % run->p.length(); | |
strsize offsetUpper = (lower + count - run->start) % run->p.length(); | |
strsize end = extendRunUpper(run->p, offsetUpper, start + count); | |
Run newRun { | |
start, | |
end, | |
offsetLower, | |
run->p | |
}; | |
runCache.push_back(newRun); | |
} | |
} | |
} | |
return make_pair((count < cmp_len) ? (*lhs - *rhs) : 0, count); | |
}; | |
void destroyTree(Node* node) { | |
if (node != NIL) { | |
destroyTree(node->left); | |
destroyTree(node->right); | |
delete node; | |
} | |
} | |
}; | |
const int D = 1000000007; | |
typedef intModuloN<D> modInt; | |
const strsize MAX_DISTINCT_ELEMENTS = 26; | |
class PowerCache { | |
public: | |
modInt const& operator[](strsize index) const { | |
return cache[index]; | |
} | |
void reset(strsize l, strsize max) { | |
fill(cache.begin(), cache.end(), 0); | |
cache[0] = l; | |
for (int i = 1; i < min(max, MAX_DISTINCT_ELEMENTS); ++i) { | |
cache[i] = cache[i - 1] * cache[0]; | |
} | |
} | |
private: | |
array<modInt, MAX_DISTINCT_ELEMENTS> cache; | |
}; | |
int distinct(string s) { | |
array<char, 26> counts = {}; | |
for (auto ch: s) { | |
++counts[ch - 'a']; | |
} | |
int ret = 0; | |
for (auto c: counts) { | |
if (c > 0) { | |
ret++; | |
} | |
} | |
return ret; | |
} | |
int superFunctionalStrings(const string s) { | |
strsize len = s.length(); | |
vector<strsize> initCounts(len); | |
vector<strsize> uniqueCounts(len); | |
map<char, strsize> charPosMap; | |
vector<vector<strsize>> stepUps(len); | |
vector<vector<strsize>> stepIns(len); | |
RedBlackTreeSet tree(s); | |
strsize maxDistinct = 0; | |
for (strsize i = 0; i < len; i++) { | |
char ch = s[i]; | |
if (charPosMap.find(ch) == charPosMap.end()) { | |
maxDistinct++; | |
} | |
for (strsize j = (charPosMap.find(ch) != charPosMap.end()) ? (charPosMap[ch] + 1) : 0; j <= i; j++) { | |
if ((i - j) > tree[(int)j]) { | |
stepUps[i - j].push_back(j); | |
} else { | |
++initCounts[j]; | |
} | |
} | |
charPosMap[ch] = i; | |
if (tree[(int)i] < len) { | |
stepIns[tree[(int)i]].push_back(i); | |
} | |
} | |
modInt ret = 0; | |
PowerCache cache; | |
array<modInt, MAX_DISTINCT_ELEMENTS> histogram; | |
for (strsize i = 1; i <= len; ++i) { | |
cache.reset(i, maxDistinct); | |
for (auto stepUp: stepUps[i - 1]) { | |
if (uniqueCounts[stepUp] > 0) { | |
--histogram[uniqueCounts[stepUp] - 1]; | |
} | |
++uniqueCounts[stepUp]; | |
++histogram[uniqueCounts[stepUp] - 1]; | |
} | |
for (auto stepIn: stepIns[i - 1]) { | |
uniqueCounts[stepIn] = initCounts[stepIn]; | |
++histogram[uniqueCounts[stepIn] - 1]; | |
} | |
for (int i = 0; i < MAX_DISTINCT_ELEMENTS; ++i) { | |
ret += histogram[i] * cache[i]; | |
} | |
if (uniqueCounts[len - i] > 0) { | |
--histogram[uniqueCounts[len - i] - 1]; | |
} | |
} | |
return (int)ret; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment