Last active
May 1, 2020 13:22
-
-
Save jakab922/e7e132e80b0cad2d9d057063c5a6ec0f to your computer and use it in GitHub Desktop.
Finding the smallest lower/upper balanced substring in a string consisting of a-z A-Z in linear time
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def check(substr): | |
lower = [False for i in range(26)] | |
upper = [False for i in range(26)] | |
for c in substr: | |
o = ord(c) | |
if o > 90: | |
lower[o - 97] = True | |
else: | |
upper[o - 65] = True | |
for i in range(26): | |
if lower[i] != upper[i]: | |
return False | |
return True | |
s = input().strip() | |
n = len(s) | |
best = n + 1 | |
for i in range(n): | |
for j in range(i, n): | |
if check(s[i:(j + 1)]): | |
if j - i + 1 < best: | |
best = j - i + 1 | |
if best == n + 1: | |
print(-1) | |
else: | |
print(best) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from random import choice, randint | |
asize = 10 | |
assert asize <= 26, "There are only 26 letters in the English alphabet" | |
lower = [chr(i) for i in range(97, 97 + asize)] | |
upper = [chr(i) for i in range(65, 65 + asize)] | |
def check(substr): | |
lower = [False for i in range(26)] | |
upper = [False for i in range(26)] | |
for c in substr: | |
o = ord(c) | |
if o > 90: | |
lower[o - 97] = True | |
else: | |
upper[o - 65] = True | |
for i in range(26): | |
if lower[i] != upper[i]: | |
return False | |
return True | |
n = 5000 | |
min_length = 100 | |
assert min_length < n, f"The minimal length({min_length}) should be smaller than n({n})" | |
letters = [] | |
for _ in range(n): | |
if randint(0, 1) == 0: | |
letters.append(choice(lower)) | |
else: | |
letters.append(choice(upper)) | |
good = False | |
for l in range(1, min(min_length, len(letters)) + 1): | |
if check(letters[-l:]): | |
good = True | |
break | |
while good: | |
if randint(0, 1) == 0: | |
letters[-1] = choice(lower) | |
else: | |
letters[-1] = choice(upper) | |
good = False | |
for l in range(1, min(min_length, len(letters)) + 1): | |
if check(letters[-l:]): | |
good = True | |
break | |
print("".join(letters[1:])) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <bits/stdc++.h> | |
using namespace std; | |
using ll = long long; | |
using ull = unsigned long long; | |
#define FOR(i, j, k, in) for (int i = j; i < k; i += in) | |
#define RFOR(i, j, k, in) for (int i = j; i >= k; i -= in) | |
#define REP(i, j) FOR(i, 0, j, 1) | |
#define RREP(i, j) RFOR(i, j, 0, 1) | |
constexpr int ASIZE = 26; | |
int get_pair(int x) { | |
if (x < ASIZE) | |
return x + ASIZE; | |
else | |
return x - ASIZE; | |
} | |
void update(vector<vector<int>> &last, int index, int value) { | |
REP(i, 2 * ASIZE) { | |
if (value == i) { | |
last[index][i] = index; | |
} else { | |
if (index == 0) | |
last[index][i] = -1; | |
else { | |
last[index][i] = last[index - 1][i]; | |
} | |
} | |
} | |
} | |
void diff_update(const vector<vector<int>> &last, vector<bool> &need, vector<bool> &have, int low, int high) { | |
REP(i, 2 * ASIZE) { | |
if (last[high][i] != last[low][i]) { | |
need[i] = true; | |
have[i] = true; | |
need[get_pair(i)] = true; | |
} | |
} | |
} | |
int rec_solve(const vector<vector<int>> &last, vector<bool> &need, vector<bool> &have, int index) { | |
bool good = true; | |
REP(i, 2 * ASIZE) { | |
if (need[i] != have[i]) { | |
good = false; | |
break; | |
} | |
} | |
if (good) return index; | |
REP(i, 2 * ASIZE) { | |
if (have[i] != need[i]) { | |
if (last[index][i] == -1) return INT_MAX; | |
int new_index = last[index][i]; | |
have[i] = true; | |
diff_update(last, need, have, new_index, index); | |
return rec_solve(last, need, have, new_index); | |
} | |
} | |
return INT_MAX; // For the compiler warning. We should never get here | |
} | |
int main() { | |
string s; | |
cin >> s; | |
int n = s.size(); | |
vector<int> vs(n); | |
REP(i, n) { | |
if (s[i] > 'Z') | |
vs[i] = (int)(s[i] - 97 + ASIZE); | |
else | |
vs[i] = vs[i] = (int)(s[i] - 65); | |
} | |
vector<vector<int>> last(n, vector<int>(2 * ASIZE)); | |
update(last, 0, vs[0]); | |
int best = INT_MAX; | |
FOR(i, 1, n, 1) { | |
vector<bool> have(2 * ASIZE, false), need(2 * ASIZE, false); | |
have[vs[i]] = true; | |
need[vs[i]] = true; | |
need[get_pair(vs[i])] = true; | |
update(last, i, vs[i]); | |
int res = rec_solve(last, need, have, i); | |
if (res != INT_MAX) best = min(best, i - res + 1); | |
} | |
if (best == INT_MAX) | |
cout << -1 << endl; | |
else | |
cout << best << endl; | |
return 0; | |
} |
The generator is garbage. I need a better one.
I've updated the test generator. Now it has better outputs. Before we had an ((asize - 1)/asize) ^ (n - 1)
chance to have a length 2 good set. The new generator produces interesting inputs for short(say <100) inputs. It's an interesting problem in itself to write a test generator where the shortest good substring of the generated string is k
.
Now one can force a minimal good substring length with the generator by changing the min_length
parameter. It works sometimes produces strings where there is no solution. Also it's really slow.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
One can test that this works with the following command: