Created
December 27, 2012 21:44
-
-
Save msg555/4392298 to your computer and use it in GitHub Desktop.
Solution to SPOJ/GF2
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include<stdlib.h> | |
#include<string.h> | |
#include<stddef.h> | |
#include<stdint.h> | |
#include<stdio.h> | |
#define ui uint32_t | |
#define uj uint64_t | |
#define uk size_t | |
#define BS 100000000 | |
#define BBS ((uj)BS*BS) | |
typedef struct cb { | |
uk N; | |
uk cap; | |
ui* A; | |
}cb; | |
ui* get_value(cb* X,uk i){ | |
while(X->cap<i+1){ | |
X->cap=2+(X->cap*3)>> 1; | |
} | |
X->A=(ui*)realloc(X->A,sizeof(ui)*X->cap); | |
while(X->N<=i){ | |
X->A[X->N++]=0; | |
} | |
return X->A+i; | |
} | |
void purge(cb* X){ | |
while(X->N && !X->A[X->N-1]){ | |
X->N--; | |
} | |
while(X->N<(X->cap >> 2)){ | |
X->cap=X->cap >> 1; | |
X->A=realloc(X->A,sizeof(ui)*X->cap); | |
} | |
} | |
cb cb_zero(){ | |
cb X={0,0,NULL}; | |
return X; | |
} | |
cb cb_copy(cb X){ | |
cb Y; | |
Y.N=Y.cap=X.N; | |
Y.A=(ui*)malloc(sizeof(ui)*X.N); | |
memcpy(Y.A,X.A,sizeof(ui)*X.N); | |
return Y; | |
} | |
void wF(cb X){ | |
free(X.A); | |
} | |
void cb_scalar_add(cb* X,uj y){ | |
uk i; | |
for(i=0;y;i++){ | |
ui* pv=get_value(X,i); | |
*pv +=y%BS; | |
y=(y/BS)+(*pv/BS); | |
*pv %=BS; | |
} | |
} | |
cb cb_scalar(uj y){ | |
cb X=cb_zero(); | |
cb_scalar_add(&X,y); | |
return X; | |
} | |
void cb_scalar_mul(cb* X,ui y){ | |
uk i; | |
uj c=0; | |
for(i=0;c || i<X->N;i++){ | |
ui* pv=get_value(X,i); | |
uj m=(uj)*pv*y; | |
*pv=(m%BS)+(c%BS); | |
c=(m/BS)+(c/BS)+(*pv/BS); | |
*pv %=BS; | |
} | |
purge(X); | |
} | |
ui cb_scalar_div(cb* X,ui y){ | |
uk i; | |
ui c=0; | |
for(i=X->N;i--;){ | |
ui* pv=get_value(X,i); | |
uj v=((uj)c*BS)+*pv; | |
c=v%y; | |
*pv=v/y; | |
} | |
purge(X); | |
return c; | |
} | |
int cb_compare(cb X,cb Y){ | |
int r=0; | |
uk i; | |
for(i=(X.N<Y.N?Y.N:X.N);!r && i--;){ | |
r=(i<X.N?X.A[i]:0)-(i<Y.N?Y.A[i]:0); | |
} | |
return r; | |
} | |
void wAS(cb* X,cb Y,uk shft){ | |
uk i,j; | |
ui c=0; | |
for(i=shft,j=0;c || j<Y.N;i++,j++){ | |
ui* pv=get_value(X,i); | |
*pv +=(j<Y.N?Y.A[j]:0)+c; | |
c=*pv/BS; | |
*pv %=BS; | |
} | |
} | |
void wSS(cb* X,cb Y,uk shft){ | |
uk i,j; | |
ui c=0; | |
for(i=shft,j=0;c || j<Y.N;i++,j++){ | |
ui* pv=X->A+i; | |
*pv -=(j<Y.N?Y.A[j]:0)+c; | |
for(c=0;(int32_t)*pv<0;c++)*pv +=BS; | |
} | |
purge(X); | |
} | |
void multiply_basic(cb* Z,cb X,cb Y){ | |
Z->N=Z->cap=X.N+Y.N; | |
Z->A=(ui*)calloc(Z->cap,sizeof(ui)); | |
uk i; | |
uj c=0; | |
for(i=0;c||i+1<X.N+Y.N;i++){ | |
uj v=c; | |
c=0; | |
uk js=i<Y.N-1?0:i-Y.N+1; | |
ui*xe=X.A+(i<X.N?i+1:X.N); | |
ui*x=X.A+js; | |
ui*y=Y.A+i-js; | |
for(;x+16<xe;x+=16,y-=16){ | |
v+= | |
(uj)x[0]*y[0]+ | |
(uj)x[1]*y[-1]+ | |
(uj)x[2]*y[-2]+ | |
(uj)x[3]*y[-3]+ | |
(uj)x[4]*y[-4]+ | |
(uj)x[5]*y[-5]+ | |
(uj)x[6]*y[-6]+ | |
(uj)x[7]*y[-7]+ | |
(uj)x[8]*y[-8]+ | |
(uj)x[9]*y[-9]+ | |
(uj)x[10]*y[-10]+ | |
(uj)x[11]*y[-11]+ | |
(uj)x[12]*y[-12]+ | |
(uj)x[13]*y[-13]+ | |
(uj)x[14]*y[-14]+ | |
(uj)x[15]*y[-15]; | |
} | |
for(;x<xe;){ | |
v+=(uj)*x++**y--; | |
} | |
Z->A[i]=v%BS; | |
c+=v/BS; | |
} | |
purge(Z); | |
} | |
cb wX(cb X,uk i,uk M){ | |
cb Y; | |
if(i<X.N){ | |
Y.N=Y.cap=X.N-i<M?X.N-i:M; | |
Y.A=(ui*)malloc(sizeof(ui)*Y.N); | |
memcpy(Y.A,X.A+i,sizeof(ui)*Y.N); | |
purge(&Y); | |
}else { | |
Y=cb_zero(); | |
} | |
return Y; | |
} | |
cb cb_mul(cb X,cb Y); | |
void multiply_karatsuba(cb* Z,cb X,cb Y){ | |
if(X.N<2 || Y.N<2){ | |
multiply_basic(Z,X,Y); | |
return; | |
} | |
uk M=(1+(X.N<Y.N?Y.N:X.N))/ 2; | |
cb x0=wX(X,0,M); | |
cb x1=wX(X,M,M); | |
cb y0=wX(Y,0,M); | |
cb y1=wX(Y,M,M); | |
cb z0=cb_mul(x0,y0); | |
cb z2=cb_mul(x1,y1); | |
wAS(&x0,x1,0); | |
wAS(&y0,y1,0); | |
cb z1=cb_mul(x0,y0); | |
*Z=cb_zero(); | |
wAS(Z,z0,0); | |
wAS(Z,z1,M); | |
wAS(Z,z2,2*M); | |
wSS(Z,z0,M); | |
wSS(Z,z2,M); | |
wF(x0);wF(x1); | |
wF(y0);wF(y1); | |
wF(z0);wF(z1);wF(z2); | |
} | |
cb cb_mul(cb X,cb Y){ | |
if(X.N ==0 || Y.N ==0)return cb_zero(); | |
cb Z=cb_zero(); | |
if(X.N+Y.N<1800){ | |
multiply_basic(&Z,X,Y); | |
}else { | |
multiply_karatsuba(&Z,X,Y); | |
} | |
purge(&Z); | |
return Z; | |
} | |
cb cb_exp(cb X,ui e){ | |
int F=1; | |
uk i; | |
cb R=cb_scalar(1); | |
for(i=32-__builtin_clz(e);i--;){ | |
cb T; | |
if(!F){ | |
T=cb_mul(R,R); | |
wF(R); | |
R=T; | |
} | |
if(e&1U<< i){ | |
if(F){ | |
R=cb_copy(X); | |
}else{ | |
T=cb_mul(R,X); | |
wF(R); | |
R=T; | |
} | |
} | |
F=0; | |
} | |
return R; | |
} | |
int main() { | |
uk i, j, M; | |
ui mul = 1, N, ON; scanf("%u", &N); | |
ui P[20]; | |
for(i=2,M=0, ON = N; i * i <= N; i++) { | |
if(N % i) continue; | |
for(; N % i == 0; N /= i); | |
P[M++] = i; | |
mul *= i; | |
} | |
if(N>1)P[M++]=N; | |
mul *=N; | |
cb* F = (cb*)malloc(sizeof(cb) << M); | |
cb two = cb_scalar(2); | |
F[0] = cb_exp(two, ON / mul); | |
cb r1 = cb_zero(); | |
cb r2 = cb_zero(); | |
wAS(M % 2?&r1:&r2, F[0], 0); | |
for(i = 1; i<1<<M; i++) { | |
j = __builtin_ctz(i); | |
F[i] = cb_exp(F[i ^ (1 << j)], P[j]); | |
wAS((M+__builtin_popcount(i))%2?&r1:&r2,F[i],0); | |
} | |
wSS(&r2, r1, 0); | |
cb_scalar_div(&r2, ON); | |
char b[200000]; | |
for(i = r2.N, j = 0; i--; ) { | |
j+=sprintf(b+j,i+1==r2.N?"%u":"%08u",r2.A[i]); | |
} | |
puts(b); | |
return 0; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment