Created
August 18, 2012 10:08
-
-
Save bicepjai/3385821 to your computer and use it in GitHub Desktop.
SUBLEX SPOJ
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.lang.String; | |
import java.util.Scanner; | |
public class Main{ | |
public static String str; | |
public static int len; | |
public static int[] SA,lcp,subsAcc; | |
// length of longest common prefix of s and t | |
private static int lcp(int s, int t) { | |
int N = len - Math.max(s, t); | |
for (int i = 0; i < N; i++) | |
if (str.charAt(s+i) != str.charAt(t+i)) return i; | |
return N; | |
} | |
public static void main(String[] args) { | |
int i,l,lo,m,n; | |
try{ | |
Scanner sc = new Scanner(System.in); | |
str = sc.nextLine(); | |
len = str.length(); | |
SA = new int[len]; | |
lcp = new int[len]; | |
subsAcc = new int[len]; | |
new sa().suffixsort(str, SA, len); | |
lcp[0] = 0; | |
subsAcc[0] = len - SA[0]; | |
for (i = 1; i < len; i++) { | |
lcp[i] = lcp(SA[i],SA[i-1]); | |
subsAcc[i] = subsAcc[i-1] + len - SA[i] - lcp[i]; | |
} | |
/*System.out.println("\tidx \tSA \tLCP \tACC \tstr"); | |
for (i = 0; i < len; i++) { | |
System.out.println("\t"+i+" \t"+SA[i]+" \t"+lcp[i]+" \t"+subsAcc[i]+" \t"+str.substring(SA[i])); | |
}*/ | |
n = sc.nextInt(); | |
sc.nextLine(); | |
while(n-- > 0){ | |
//binary search | |
l = sc.nextInt(); | |
if(sc.hasNextLine())sc.nextLine(); | |
//System.out.println(l+"=>"+l); | |
m=0; lo = 0; i = len; | |
while(lo <= i){ | |
m = lo + (i - lo)/2; | |
if (subsAcc[m] > l){ | |
i = m - 1; | |
} else if (subsAcc[m] < l){ | |
lo = m + 1; | |
} else { | |
lo = m; | |
break; | |
} | |
} | |
//System.out.println("lo=>"+lo); | |
if(lo == 0){ | |
//System.out.println(SA[lo]+" "+(SA[lo]+lcp[lo]+l)); | |
System.out.println(str.substring(SA[lo], | |
SA[lo]+lcp[lo]+l)); | |
}else{ | |
//System.out.println(SA[lo]+" "+(SA[lo]+lcp[lo]+l-subsAcc[lo-1])); | |
System.out.println(str.substring(SA[lo], | |
SA[lo]+lcp[lo]+l-subsAcc[lo-1])); | |
} | |
} | |
}catch(Exception e){ | |
return; | |
} | |
} | |
} | |
class sa { | |
private static interface BaseArray { | |
public int get(int i); | |
public void set(int i, int val); | |
public int update(int i, int val); | |
} | |
private static class CharArray implements BaseArray { | |
private char[] m_A = null; | |
private int m_pos = 0; | |
CharArray(char[] A, int pos) { m_A = A; m_pos = pos; } | |
public int get(int i) { return m_A[m_pos + i] & 0xffff; } | |
public void set(int i, int val) { m_A[m_pos + i] = (char)(val & 0xffff); } | |
public int update(int i, int val) { return m_A[m_pos + i] += val & 0xffff; } | |
} | |
private static class IntArray implements BaseArray { | |
private int[] m_A = null; | |
private int m_pos = 0; | |
IntArray(int[] A, int pos) { m_A = A; m_pos = pos; } | |
public int get(int i) { return m_A[m_pos + i]; } | |
public void set(int i, int val) { m_A[m_pos + i] = val; } | |
public int update(int i, int val) { return m_A[m_pos + i] += val; } | |
} | |
private static class StringArray implements BaseArray { | |
private String m_A = null; | |
private int m_pos = 0; | |
StringArray(String A, int pos) { m_A = A; m_pos = pos; } | |
public int get(int i) { return (int)(m_A.charAt(m_pos + i) & 0xffff); } | |
public void set(int i, int val) { } | |
public int update(int i, int val) { return 0; } | |
} | |
/* find the start or end of each bucket */ | |
private static | |
void | |
getCounts(BaseArray T, BaseArray C, int n, int k) { | |
int i; | |
for(i = 0; i < k; ++i) { C.set(i, 0); } | |
for(i = 0; i < n; ++i) { C.update(T.get(i), 1); } | |
} | |
private static | |
void | |
getBuckets(BaseArray C, BaseArray B, int k, boolean end) { | |
int i, sum = 0; | |
if(end != false) { for(i = 0; i < k; ++i) { sum += C.get(i); B.set(i, sum); } } | |
else { for(i = 0; i < k; ++i) { sum += C.get(i); B.set(i, sum - C.get(i)); } } | |
} | |
/* sort all type LMS suffixes */ | |
private static | |
void | |
LMSsort(BaseArray T, int[] SA, BaseArray C, BaseArray B, int n, int k) { | |
int b, i, j; | |
int c0, c1; | |
/* compute SAl */ | |
if(C == B) { getCounts(T, C, n, k); } | |
getBuckets(C, B, k, false); /* find starts of buckets */ | |
j = n - 1; | |
b = B.get(c1 = T.get(j)); | |
--j; | |
SA[b++] = (T.get(j) < c1) ? ~j : j; | |
for(i = 0; i < n; ++i) { | |
if(0 < (j = SA[i])) { | |
if((c0 = T.get(j)) != c1) { B.set(c1, b); b = B.get(c1 = c0); } | |
--j; | |
SA[b++] = (T.get(j) < c1) ? ~j : j; | |
SA[i] = 0; | |
} else if(j < 0) { | |
SA[i] = ~j; | |
} | |
} | |
/* compute SAs */ | |
if(C == B) { getCounts(T, C, n, k); } | |
getBuckets(C, B, k, true); /* find ends of buckets */ | |
for(i = n - 1, b = B.get(c1 = 0); 0 <= i; --i) { | |
if(0 < (j = SA[i])) { | |
if((c0 = T.get(j)) != c1) { B.set(c1, b); b = B.get(c1 = c0); } | |
--j; | |
SA[--b] = (T.get(j) > c1) ? ~(j + 1) : j; | |
SA[i] = 0; | |
} | |
} | |
} | |
private static | |
int | |
LMSpostproc(BaseArray T, int[] SA, int n, int m) { | |
int i, j, p, q, plen, qlen, name; | |
int c0, c1; | |
boolean diff; | |
/* compact all the sorted substrings into the first m items of SA | |
2*m must be not larger than n (proveable) */ | |
for(i = 0; (p = SA[i]) < 0; ++i) { SA[i] = ~p; } | |
if(i < m) { | |
for(j = i, ++i;; ++i) { | |
if((p = SA[i]) < 0) { | |
SA[j++] = ~p; SA[i] = 0; | |
if(j == m) { break; } | |
} | |
} | |
} | |
/* store the length of all substrings */ | |
i = n - 1; j = n - 1; c0 = T.get(n - 1); | |
do { c1 = c0; } while((0 <= --i) && ((c0 = T.get(i)) >= c1)); | |
for(; 0 <= i;) { | |
do { c1 = c0; } while((0 <= --i) && ((c0 = T.get(i)) <= c1)); | |
if(0 <= i) { | |
SA[m + ((i + 1) >> 1)] = j - i; j = i + 1; | |
do { c1 = c0; } while((0 <= --i) && ((c0 = T.get(i)) >= c1)); | |
} | |
} | |
/* find the lexicographic names of all substrings */ | |
for(i = 0, name = 0, q = n, qlen = 0; i < m; ++i) { | |
p = SA[i]; plen = SA[m + (p >> 1)]; diff = true; | |
if((plen == qlen) && ((q + plen) < n)) { | |
for(j = 0; (j < plen) && (T.get(p + j) == T.get(q + j)); ++j) { } | |
if(j == plen) { diff = false; } | |
} | |
if(diff != false) { ++name; q = p; qlen = plen; } | |
SA[m + (p >> 1)] = name; | |
} | |
return name; | |
} | |
/* compute SA and BWT */ | |
private static | |
void | |
induceSA(BaseArray T, int[] SA, BaseArray C, BaseArray B, int n, int k) { | |
int b, i, j; | |
int c0, c1; | |
/* compute SAl */ | |
if(C == B) { getCounts(T, C, n, k); } | |
getBuckets(C, B, k, false); /* find starts of buckets */ | |
j = n - 1; | |
b = B.get(c1 = T.get(j)); | |
SA[b++] = ((0 < j) && (T.get(j - 1) < c1)) ? ~j : j; | |
for(i = 0; i < n; ++i) { | |
j = SA[i]; SA[i] = ~j; | |
if(0 < j) { | |
if((c0 = T.get(--j)) != c1) { B.set(c1, b); b = B.get(c1 = c0); } | |
SA[b++] = ((0 < j) && (T.get(j - 1) < c1)) ? ~j : j; | |
} | |
} | |
/* compute SAs */ | |
if(C == B) { getCounts(T, C, n, k); } | |
getBuckets(C, B, k, true); /* find ends of buckets */ | |
for(i = n - 1, b = B.get(c1 = 0); 0 <= i; --i) { | |
if(0 < (j = SA[i])) { | |
if((c0 = T.get(--j)) != c1) { B.set(c1, b); b = B.get(c1 = c0); } | |
SA[--b] = ((j == 0) || (T.get(j - 1) > c1)) ? ~j : j; | |
} else { | |
SA[i] = ~j; | |
} | |
} | |
} | |
private static | |
int | |
computeBWT(BaseArray T, int[] SA, BaseArray C, BaseArray B, int n, int k) { | |
int b, i, j, pidx = -1; | |
int c0, c1; | |
/* compute SAl */ | |
if(C == B) { getCounts(T, C, n, k); } | |
getBuckets(C, B, k, false); /* find starts of buckets */ | |
j = n - 1; | |
b = B.get(c1 = T.get(j)); | |
SA[b++] = ((0 < j) && (T.get(j - 1) < c1)) ? ~j : j; | |
for(i = 0; i < n; ++i) { | |
if(0 < (j = SA[i])) { | |
SA[i] = ~(c0 = T.get(--j)); | |
if(c0 != c1) { B.set(c1, b); b = B.get(c1 = c0); } | |
SA[b++] = ((0 < j) && (T.get(j - 1) < c1)) ? ~j : j; | |
} else if(j != 0) { | |
SA[i] = ~j; | |
} | |
} | |
/* compute SAs */ | |
if(C == B) { getCounts(T, C, n, k); } | |
getBuckets(C, B, k, true); /* find ends of buckets */ | |
for(i = n - 1, b = B.get(c1 = 0); 0 <= i; --i) { | |
if(0 < (j = SA[i])) { | |
SA[i] = (c0 = T.get(--j)); | |
if(c0 != c1) { B.set(c1, b); b = B.get(c1 = c0); } | |
SA[--b] = ((0 < j) && (T.get(j - 1) > c1)) ? ~((int)T.get(j - 1)) : j; | |
} else if(j != 0) { | |
SA[i] = ~j; | |
} else { | |
pidx = i; | |
} | |
} | |
return pidx; | |
} | |
/* find the suffix array SA of T[0..n-1] in {0..k-1}^n | |
use a working space (excluding T and SA) of at most 2n+O(1) for a constant alphabet */ | |
private static | |
int | |
SA_IS(BaseArray T, int[] SA, int fs, int n, int k, boolean isbwt) { | |
BaseArray C, B, RA; | |
int i, j, b, c, m, p, q, name, pidx = 0, newfs; | |
int c0, c1; | |
int flags = 0; | |
if(k <= 256) { | |
C = new IntArray(new int[k], 0); | |
if(k <= fs) { B = new IntArray(SA, n + fs - k); flags = 1; } | |
else { B = new IntArray(new int[k], 0); flags = 3; } | |
} else if(k <= fs) { | |
C = new IntArray(SA, n + fs - k); | |
if(k <= (fs - k)) { B = new IntArray(SA, n + fs - k * 2); flags = 0; } | |
else if(k <= 1024) { B = new IntArray(new int[k], 0); flags = 2; } | |
else { B = C; flags = 8; } | |
} else { | |
C = B = new IntArray(new int[k], 0); | |
flags = 4 | 8; | |
} | |
/* stage 1: reduce the problem by at least 1/2 | |
sort all the LMS-substrings */ | |
getCounts(T, C, n, k); getBuckets(C, B, k, true); /* find ends of buckets */ | |
for(i = 0; i < n; ++i) { SA[i] = 0; } | |
b = -1; i = n - 1; j = n; m = 0; c0 = T.get(n - 1); | |
do { c1 = c0; } while((0 <= --i) && ((c0 = T.get(i)) >= c1)); | |
for(; 0 <= i;) { | |
do { c1 = c0; } while((0 <= --i) && ((c0 = T.get(i)) <= c1)); | |
if(0 <= i) { | |
if(0 <= b) { SA[b] = j; } b = B.update(c1, -1); j = i; ++m; | |
do { c1 = c0; } while((0 <= --i) && ((c0 = T.get(i)) >= c1)); | |
} | |
} | |
if(1 < m) { | |
LMSsort(T, SA, C, B, n, k); | |
name = LMSpostproc(T, SA, n, m); | |
} else if(m == 1) { | |
SA[b] = j + 1; | |
name = 1; | |
} else { | |
name = 0; | |
} | |
/* stage 2: solve the reduced problem | |
recurse if names are not yet unique */ | |
if(name < m) { | |
if((flags & 4) != 0) { C = null; B = null; } | |
if((flags & 2) != 0) { B = null; } | |
newfs = (n + fs) - (m * 2); | |
if((flags & (1 | 4 | 8)) == 0) { | |
if((k + name) <= newfs) { newfs -= k; } | |
else { flags |= 8; } | |
} | |
for(i = m + (n >> 1) - 1, j = m * 2 + newfs - 1; m <= i; --i) { | |
if(SA[i] != 0) { SA[j--] = SA[i] - 1; } | |
} | |
RA = new IntArray(SA, m + newfs); | |
SA_IS(RA, SA, newfs, m, name, false); | |
RA = null; | |
i = n - 1; j = m * 2 - 1; c0 = T.get(n - 1); | |
do { c1 = c0; } while((0 <= --i) && ((c0 = T.get(i)) >= c1)); | |
for(; 0 <= i;) { | |
do { c1 = c0; } while((0 <= --i) && ((c0 = T.get(i)) <= c1)); | |
if(0 <= i) { | |
SA[j--] = i + 1; | |
do { c1 = c0; } while((0 <= --i) && ((c0 = T.get(i)) >= c1)); | |
} | |
} | |
for(i = 0; i < m; ++i) { SA[i] = SA[m + SA[i]]; } | |
if((flags & 4) != 0) { C = B = new IntArray(new int[k], 0); } | |
if((flags & 2) != 0) { B = new IntArray(new int[k], 0); } | |
} | |
/* stage 3: induce the result for the original problem */ | |
if((flags & 8) != 0) { getCounts(T, C, n, k); } | |
/* put all left-most S characters into their buckets */ | |
if(1 < m) { | |
getBuckets(C, B, k, true); /* find ends of buckets */ | |
i = m - 1; j = n; p = SA[m - 1]; c1 = T.get(p); | |
do { | |
q = B.get(c0 = c1); | |
while(q < j) { SA[--j] = 0; } | |
do { | |
SA[--j] = p; | |
if(--i < 0) { break; } | |
p = SA[i]; | |
} while((c1 = T.get(p)) == c0); | |
} while(0 <= i); | |
while(0 < j) { SA[--j] = 0; } | |
} | |
if(isbwt == false) { induceSA(T, SA, C, B, n, k); } | |
else { pidx = computeBWT(T, SA, C, B, n, k); } | |
C = null; B = null; | |
return pidx; | |
} | |
/** Suffixsorting **/ | |
/* char */ | |
public static | |
int | |
suffixsort(char[] T, int[] SA, int n) { | |
if((T == null) || (SA == null) || (T.length < n) || (SA.length < n)) { return -1; } | |
if(n <= 1) { if(n == 1) { SA[0] = 0; } return 0; } | |
return SA_IS(new CharArray(T, 0), SA, 0, n, 65536, false); | |
} | |
/* String */ | |
public static | |
int | |
suffixsort(String T, int[] SA, int n) { | |
if((T == null) || (SA == null) || | |
(T.length() < n) || (SA.length < n)) { return -1; } | |
if(n <= 1) { if(n == 1) { SA[0] = 0; } return 0; } | |
return SA_IS(new StringArray(T, 0), SA, 0, n, 65536, false); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
for understanding on suffix array, follow up this naive implementation
https://gist.github.com/3385853