michalskop · December 16, 2015 21:48
diff --git a/readme.rd b/readme.rd
 | Principal factor analysis with "natural" weights - Model of parliamentary voting.
 | (PCA equals to Multidimensional scaling)
 | Cutting lines: perpendicular to normal vectors of divisions, minimizing the sum of "error distances" in given hyperplane
diff --git a/a-weighted PCA b/a-weighted PCA
 # Weighted PCA
diff --git a/cutting_lines.r b/cutting_lines.r
 # CUTTING LINES
 # based on overall model
 #minimizing function
 f_cutting_lines=function(beta0) -1*sum(apply(cbind(y*(x%*%beta+beta0),zeros),1,min))

 #how many dimensions?
 n_first_dimensions = 2

 #preparing variables
 normals = Hy[,1:n_first_dimensions]
 loss_f = data.frame(matrix(0,nrow=dim(H)[1],ncol=4))
 colnames(loss_f)=c("Parameter1","Loss1","Parameter_1","Loss_1")
 parameters = data.frame(matrix(0,nrow=dim(H)[1],ncol=3))
 colnames(parameters)=c("Parameter","Loss","Direction")

 xfull = t(t(He$vectors[,1:n_first_dimensions]) * sqrt(He$values[1:n_first_dimensions]))

 #calculating all cutting lines
 i=1
 for (i in as.numeric(1:dim(H)[1])) {
    beta = Hy[i,1:n_first_dimensions]
    y = t(as.matrix(H[i,]))[,pI]
    x = xfull[which(!is.na(y)),]
    y = y[which(!is.na(y))]
    zeros = as.matrix(rep(0,length(y)))
    #** place for improvement: "1000"
    res1 = optim(c(1),f_cutting_lines,method="Brent",lower=-1000,upper=1000)	#1000? arbitrary
    #the sign is arbitrary, the real result may be -1*; we need to minimize the other way as well!!
    y=-y
    res2 = optim(c(1),f_cutting_lines,method="Brent",lower=-1000,upper=1000) #1000? arbitrary
    #the real parameter is the one with lower loss function
    #theoretically should be the same (either +1 or -1) for all divisions(rows), however, due to missing data, the calculation may lead to a few divisions with the other direction
    loss_f[i,] = c(res1$par,res1$value,res2$par,res2$value)
    if (res1$value<=res2$value) {
      parameters[i,] = c(res1$par,res1$value,1)
    } else {
      parameters[i,] = c(res2$par,res2$value,-1)
    }
 }
 CuttingLines = list(normals=normals,parameters=parameters,loss_function=loss_f,weights=cbind(w1,w2))


 ##
 #cutting lines:
 plot(Hproj[,1],Hproj[,2])
 I = w1*w2 > .5
 for (i in as.numeric(1:dim(H)[1])) {
  if (I[i] && ((i %% 10) == 0)) {
    beta = CuttingLines$normals[i,]
    beta0 = CuttingLines$parameters$Parameter[i]
 	abline(-beta0/beta[2],-beta[1]/beta[2])
    break
  }
 }

 # only second half
 plot(Hproj[,1],Hproj[,2])
 for (j in as.numeric(1:round(dim(H)[1]/2))) {
     i = round(dim(H)[1]/2) + j
     if (I[i] && ((i %% 10) == 0)) {
         beta = CuttingLines$normals[i,]
         beta0 = CuttingLines$parameters$Parameter[i]
         abline(-beta0/beta[2],-beta[1]/beta[2])
         #break
     }
 }

 # when y=0, x is ?
 plot(CuttingLines$parameters$Parameter/CuttingLines$normals[,1],ylim=c(-100,100))
 # slopes
 plot(CuttingLines$parameters$Parameter/CuttingLines$normals[,2],ylim=c(-10,10))

diff --git a/partial_projection.r b/partial_projection.r
 # NEW MP (or partial)
 #New persons / partial (like a projection of divisions from a smaller time interval into all divisions)


 analdb = dbConnect(SQLite(), dbname=paste(path,dataname,"/",analysisname,"/",analysisname,".sqlite3",sep=""))
 datadb = dbConnect(SQLite(), dbname=paste(path,dataname,"/",dataname,".sqlite3",sep=""))

 rotation = strsplit(dbGetQuery(analdb,"SELECT orientation FROM analysis_info")[1,1],",")[[1]]
 rot_mp_rank = which(dimnames(Hcov)[[1]] == rotation[1])
 for (j in 2:length(rotation)) {
  if (Hproj[rot_mp_rank,j-1] * as.double(rotation[j]) < 0) {
    U[,j-1] = -1*U[,j-1]
  }
 }

 aU = abs(U)

 mp_names = dbGetQuery(datadb,"SELECT name FROM mp ORDER BY CAST(code as INTEGER)")
 attr(G,'dimnames')[2][[1]] = mp_names$name
 #attr(Hw0c,'dimnames')[2][[1]] = mp_names$name[pI]

 interval_names = dbGetQuery(analdb,"SELECT distinct(interval_name) FROM analysis_division_in_interval ORDER BY interval_name")

 #lower limit to eliminate from projections
 lo_limitT = lo_limit

    


 for (i in 1:dim(interval_names)[1]) {

    TIf = dbGetQuery(analdb,paste("SELECT division_code, CASE interval_name='",interval_names[i,],"' WHEN 1 THEN 'TRUE' ELSE 'FALSE' END as in_interval FROM analysis_division_in_interval ORDER BY division_code",sep=""))
    
    TIf$in_interval = as.logical(TIf$in_interval)
    
    max_division_code = TIf$division_code[max(which(as.logical(TIf$in_interval)))]
    min_division_code = TIf$division_code[min(which(as.logical(TIf$in_interval)))]
    
    TI = TIf$in_interval
    
    GTc = G[,pI]
    GTc[!TI,] = NA
    HTc = (GTc - attr(H,"scaled:center"))/attr(H,"scaled:scale")
    
    HTIc = HTc
 	HTIc[!is.na(HTIc)] = 1
 	HTIc[is.na(HTIc)] = 0
    
    HTw0c = HTc * w1 * w2
    HTw0c[is.na(HTw0c)] = 0
    

 	#weights for non missing data division x persons
 	HTIcw = HTIc*w1*w2
 	#sum of weights of divisions for each persons
 	tmp = apply(HTIcw,2,sum)
 	pTw = tmp/max(tmp)
 	#index of persons in calculation
 	pTI = pTw > lo_limitT
 	
 	#weighted scaled with NA->0 and cutted persons with too few votes division x persons
 	HTw0cc = HTw0c[,pTI]
 	#index of missing data cutted persons with too few votes divisions x persons
 	HTIcc = HTIc[,pTI]
    
    
 	dweights = t(t(aU)%*%HTIcc / apply(aU,2,sum))  	#person x division
    dweights[is.na(dweights)] = 0
    
    HTw0ccproj = t(HTw0cc)%*%U / dweights


    parties = as.matrix(dbGetQuery(datadb,paste('SELECT COALESCE(NULLIF(tmax.code,""),tmin.code) as code, COALESCE(NULLIF(tmax.color,""),tmin.color) as color FROM (SELECT  mvf.mp_code, g.code, g.color FROM mp_vote_full as mvf  LEFT JOIN "group" as g  ON mvf.group_code=g.code  WHERE division_code="',max_division_code,'") as tmax LEFT JOIN  (SELECT  mvf.mp_code, g.code, g.color FROM mp_vote_full as mvf  LEFT JOIN "group" as g  ON mvf.group_code=g.code  WHERE division_code="',min_division_code,'") as tmin ON tmax.mp_code=tmin.mp_code  ORDER BY CAST(tmax.mp_code as INTEGER)',sep="")))
    partiescc = (parties[pI,])[pTI,]
    
    #mp rank id
    mp_rank_id_cc = seq(1,dim(HTw0c)[2],1)[,pTI]
    
    write.csv(format(cbind(HTw0ccproj[,1:2],partiescc,mp_rank_id_cc),digits=3),file=paste(path,dataname,"/",analysisname,"/",interval_names[i,],"_result.csv",sep=""))
     #*************************
 }

diff --git a/sources b/sources
 #matrix (operations):
 en.wikipedia.org/wiki/Matrix_(mathematics)

 #rotation matrix:
 http://en.wikipedia.org/wiki/Rotation_matrix

 #SVD:
 http://en.wikipedia.org/wiki/Singular_value_decomposition

 #Eigendecomposition
 http://en.wikipedia.org/wiki/Eigendecomposition

 #SVD vs. eigendec.
 http://stats.stackexchange.com/questions/14002/whats-the-difference-between-principal-components-analysis-and-multidimensional

 #SVD example:
 http://polisci2.ucsd.edu/aruizeuler/SVD.html

 #normal and distance point,plane
 http://en.wikipedia.org/wiki/Normal_%28geometry%29
 http://en.wikipedia.org/wiki/Distance_from_a_point_to_a_line
 http://www.vitutor.com/geometry/distance/point_plane.html

 # multiply rows of matrix by vector in R (speed)
 http://stackoverflow.com/questions/3643555/multiply-rows-of-matrix-by-vector

 #ideal points
 http://voteview.com/ideal_point_history.htm

 #wnominate in R
 http://cran.r-project.org/web/packages/wnominate/vignettes/wnominate.pdf
 http://cran.r-project.org/web/packages/wnominate/wnominate.pdf
 http://rss.acs.unt.edu/Rdoc/library/pscl/html/rollcall.html
 http://voteview.com/senate109.htm

 #US roll-call downloads
 http://voteview.com/downloads.asp#ROLLCALLDWNL

 #linear discriminant analysis (not used finally)
 http://en.wikipedia.org/wiki/Linear_discriminant_analysis

 #logistic regression (not used finally)
 http://en.wikipedia.org/wiki/Linear_discriminant_analysis

 #SVM and kernels (not used finally)
 http://en.wikipedia.org/wiki/Support_vector_machine
 http://en.wikibooks.org/wiki/Data_Mining_Algorithms_In_R/Classification/SVM
 http://stats.stackexchange.com/questions/26293/how-to-obtain-decision-boundaries-from-linear-svm-in-r
 http://stats.stackexchange.com/questions/5056/computing-the-decision-boundary-of-a-linear-svm-model/5080#5080
 http://stats.stackexchange.com/questions/25387/problem-with-e1071-libsvm
 http://stats.stackexchange.com/questions/17955/svm-with-unequal-group-sizes-in-training-data?rq=1
 http://cran.r-project.org/web/packages/kernlab/kernlab.pdf
 http://cbio.ensmp.fr/~jvert/svn/tutorials/practical/svmbasic/svmbasic_notes.pdf
 http://stackoverflow.com/questions/3282916/class-weight-syntax-in-kernlab
 http://www.robots.ox.ac.uk/~az/lectures/ml/lect2.pdf
 http://stat.ethz.ch/R-manual/R-patched/library/stats/html/density.html


diff --git a/thumbnail.png b/thumbnail.png
diff --git a/votes.csv b/votes.csv
diff --git a/wpca.r b/wpca.r
 #install.packages("reshape2")
 library("reshape2")
 #install.packages("sqldf")
 library("sqldf")

 path = "~/project/mpv_motion/dev/"
 #dataname = "cz_psp_2006-2010_a1_1"
 #analysisname = 'cz_psp_2006-2010_a1_1q'
 dataname = "cz_kromeriz_2012"
 analysisname = 'cz_kromeriz_2012_1h'

 #raw data in 3 columns (values in [-1,1])
 Graw = read.table(paste(path,dataname,"/",dataname,".csv",sep=""), header=FALSE, sep=",")
 #data divisions x persons
 G = acast(Graw,V2~V1,value.var='V3')
 #scaled divisions x persons (mean=0 and sd=1 for each division)
 H=t(scale(t(G),scale=TRUE))

 #weights
 #weights 1, based on number of persons in division
 w1 = apply(abs(G)==1,1,sum,na.rm=TRUE)/max(apply(abs(G)==1,1,sum,na.rm=TRUE))
 w1[is.na(w1)] = 0
 #weights 2, "100:100" vs. "195:5"
 w2 = 1 - abs(apply(G==1,1,sum,na.rm=TRUE) - apply(G==-1,1,sum,na.rm=TRUE))/apply(!is.na(G),1,sum) 
 w2[is.na(w2)] = 0

 	#analytics
 	plot(w1)
 	plot(w2)
 	plot(w1*w2)

 #weighted scaled divisions x persons
 Hw = H * w1 * w2

 #index of missing data
 #index of missing data divisions x persons
 HI = H
 HI[!is.na(H)]=1
 HI[is.na(H)] = 0

 #weighted scaled with NA->0 division x persons
 Hw0 = Hw
 Hw0[is.na(Hw)]=0

 #eliminate persons with too few votes (weighted)
 #lower limit to eliminate from calculations
 lo_limit = .1
 #weights for non missing data division x persons
 HIw = HI*w1*w2
 #sum of weights of divisions for each persons
 tmp = apply(HIw,2,sum)
 pw = tmp/max(tmp)
 #index of persons in calculation
 pI = pw > lo_limit
 #weighted scaled with NA->0 and cutted persons with too few votes division x persons
 Hw0c = Hw0[,pI]
 #index of missing data cutted persons with too few votes divisions x persons
 HIc = HI[,pI]

 #"covariance" matrix adjusted according to missing data
 Hcov=t(Hw0c)%*%Hw0c * 1/(t(HIc)%*%HIc) * (dim(Hw0c)[1])
 #Hcov=t(Hw0)%*%Hw0 * 1/(t(HI)%*%HI-1) * (dim(H)[1]-1)

 #substitution of missing data in "covariance" matrix
 Hcov0 = Hcov
 Hcov0[is.na(Hcov)] = 0			#********* 

 #eigendecomposition
 He=eigen(Hcov0)
 # V (rotation values of persons)
 V = He$vectors
 #projected divisions into dimensions
 Hy=Hw0c%*%V

 	#analytics
 	plot(Hy[,1],Hy[,2])
 	plot(sqrt(He$values[1:10]))

 #sigma matrix 
 sigma = diag(sqrt(He$values))
 sigma[is.na(sigma)] = 0

 #projection of persons into dimensions
 Hproj = t(sigma%*%t(V))
 	#analytics
 	plot(Hproj[,1],Hproj[,2])

 #sigma^-1 matrix
 sigma_1 = diag(sqrt(1/He$values))
 sigma_1[is.na(sigma_1)] = 0

 # U (rotation values of divisions)
 U = Hw0c%*%V%*%sigma_1

 #U%*%sigma%*%t(V) != Hw0c ;because of adjusting of "covariance" matrix
 #THE PROBLEM (if T - overall otherwise see Hproj2)
 plot(He$vectors[,1]*sqrt(He$value[1]),He$vectors[,2]*sqrt(He$value[2]))
 #second projection
    Hproj2 = t(t(U) %*% Hw0c)
    	#without missing values should be equal
    #analytics
    plot(Hproj[,1],Hproj2[,1])
    plot(Hproj[,2],Hproj2[,2])
 #plot(HTw0ccproj[,1],HTw0ccproj[,2])
 #plot(Hproj[,1],HTw0ccproj[,1])
 #plot(Hproj[,2],HTw0ccproj[,2])
	\| Principal factor analysis with "natural" weights - Model of parliamentary voting.
	\| (PCA equals to Multidimensional scaling)
	\| Cutting lines: perpendicular to normal vectors of divisions, minimizing the sum of "error distances" in given hyperplane
	# CUTTING LINES
	# based on overall model
	#minimizing function
	f_cutting_lines=function(beta0) -1sum(apply(cbind(y(x%*%beta+beta0),zeros),1,min))

	#how many dimensions?
	n_first_dimensions = 2

	#preparing variables
	normals = Hy[,1:n_first_dimensions]
	loss_f = data.frame(matrix(0,nrow=dim(H)[1],ncol=4))
	colnames(loss_f)=c("Parameter1","Loss1","Parameter_1","Loss_1")
	parameters = data.frame(matrix(0,nrow=dim(H)[1],ncol=3))
	colnames(parameters)=c("Parameter","Loss","Direction")

	xfull = t(t(He$vectors[,1:n_first_dimensions]) * sqrt(He$values[1:n_first_dimensions]))

	#calculating all cutting lines
	i=1
	for (i in as.numeric(1:dim(H)[1])) {
	beta = Hy[i,1:n_first_dimensions]
	y = t(as.matrix(H[i,]))[,pI]
	x = xfull[which(!is.na(y)),]
	y = y[which(!is.na(y))]
	zeros = as.matrix(rep(0,length(y)))
	#** place for improvement: "1000"
	res1 = optim(c(1),f_cutting_lines,method="Brent",lower=-1000,upper=1000) #1000? arbitrary
	#the sign is arbitrary, the real result may be -1*; we need to minimize the other way as well!!
	y=-y
	res2 = optim(c(1),f_cutting_lines,method="Brent",lower=-1000,upper=1000) #1000? arbitrary
	#the real parameter is the one with lower loss function
	#theoretically should be the same (either +1 or -1) for all divisions(rows), however, due to missing data, the calculation may lead to a few divisions with the other direction
	loss_f[i,] = c(res1$par,res1$value,res2$par,res2$value)
	if (res1$value<=res2$value) {
	parameters[i,] = c(res1$par,res1$value,1)
	} else {
	parameters[i,] = c(res2$par,res2$value,-1)
	}
	}
	CuttingLines = list(normals=normals,parameters=parameters,loss_function=loss_f,weights=cbind(w1,w2))


	##
	#cutting lines:
	plot(Hproj[,1],Hproj[,2])
	I = w1*w2 > .5
	for (i in as.numeric(1:dim(H)[1])) {
	if (I[i] && ((i %% 10) == 0)) {
	beta = CuttingLines$normals[i,]
	beta0 = CuttingLines$parameters$Parameter[i]
	abline(-beta0/beta[2],-beta[1]/beta[2])
	break
	}
	}

	# only second half
	plot(Hproj[,1],Hproj[,2])
	for (j in as.numeric(1:round(dim(H)[1]/2))) {
	i = round(dim(H)[1]/2) + j
	if (I[i] && ((i %% 10) == 0)) {
	beta = CuttingLines$normals[i,]
	beta0 = CuttingLines$parameters$Parameter[i]
	abline(-beta0/beta[2],-beta[1]/beta[2])
	#break
	}
	}

	# when y=0, x is ?
	plot(CuttingLines$parameters$Parameter/CuttingLines$normals[,1],ylim=c(-100,100))
	# slopes
	plot(CuttingLines$parameters$Parameter/CuttingLines$normals[,2],ylim=c(-10,10))
	# NEW MP (or partial)
	#New persons / partial (like a projection of divisions from a smaller time interval into all divisions)


	analdb = dbConnect(SQLite(), dbname=paste(path,dataname,"/",analysisname,"/",analysisname,".sqlite3",sep=""))
	datadb = dbConnect(SQLite(), dbname=paste(path,dataname,"/",dataname,".sqlite3",sep=""))

	rotation = strsplit(dbGetQuery(analdb,"SELECT orientation FROM analysis_info")[1,1],",")[[1]]
	rot_mp_rank = which(dimnames(Hcov)[[1]] == rotation[1])
	for (j in 2:length(rotation)) {
	if (Hproj[rot_mp_rank,j-1] * as.double(rotation[j]) < 0) {
	U[,j-1] = -1*U[,j-1]
	}
	}

	aU = abs(U)

	mp_names = dbGetQuery(datadb,"SELECT name FROM mp ORDER BY CAST(code as INTEGER)")
	attr(G,'dimnames')[2][[1]] = mp_names$name
	#attr(Hw0c,'dimnames')[2][[1]] = mp_names$name[pI]

	interval_names = dbGetQuery(analdb,"SELECT distinct(interval_name) FROM analysis_division_in_interval ORDER BY interval_name")

	#lower limit to eliminate from projections
	lo_limitT = lo_limit




	for (i in 1:dim(interval_names)[1]) {

	TIf = dbGetQuery(analdb,paste("SELECT division_code, CASE interval_name='",interval_names[i,],"' WHEN 1 THEN 'TRUE' ELSE 'FALSE' END as in_interval FROM analysis_division_in_interval ORDER BY division_code",sep=""))

	TIf$in_interval = as.logical(TIf$in_interval)

	max_division_code = TIf$division_code[max(which(as.logical(TIf$in_interval)))]
	min_division_code = TIf$division_code[min(which(as.logical(TIf$in_interval)))]

	TI = TIf$in_interval

	GTc = G[,pI]
	GTc[!TI,] = NA
	HTc = (GTc - attr(H,"scaled:center"))/attr(H,"scaled:scale")

	HTIc = HTc
	HTIc[!is.na(HTIc)] = 1
	HTIc[is.na(HTIc)] = 0

	HTw0c = HTc * w1 * w2
	HTw0c[is.na(HTw0c)] = 0


	#weights for non missing data division x persons
	HTIcw = HTIcw1w2
	#sum of weights of divisions for each persons
	tmp = apply(HTIcw,2,sum)
	pTw = tmp/max(tmp)
	#index of persons in calculation
	pTI = pTw > lo_limitT

	#weighted scaled with NA->0 and cutted persons with too few votes division x persons
	HTw0cc = HTw0c[,pTI]
	#index of missing data cutted persons with too few votes divisions x persons
	HTIcc = HTIc[,pTI]


	dweights = t(t(aU)%*%HTIcc / apply(aU,2,sum)) #person x division
	dweights[is.na(dweights)] = 0

	HTw0ccproj = t(HTw0cc)%*%U / dweights


	parties = as.matrix(dbGetQuery(datadb,paste('SELECT COALESCE(NULLIF(tmax.code,""),tmin.code) as code, COALESCE(NULLIF(tmax.color,""),tmin.color) as color FROM (SELECT mvf.mp_code, g.code, g.color FROM mp_vote_full as mvf LEFT JOIN "group" as g ON mvf.group_code=g.code WHERE division_code="',max_division_code,'") as tmax LEFT JOIN (SELECT mvf.mp_code, g.code, g.color FROM mp_vote_full as mvf LEFT JOIN "group" as g ON mvf.group_code=g.code WHERE division_code="',min_division_code,'") as tmin ON tmax.mp_code=tmin.mp_code ORDER BY CAST(tmax.mp_code as INTEGER)',sep="")))
	partiescc = (parties[pI,])[pTI,]

	#mp rank id
	mp_rank_id_cc = seq(1,dim(HTw0c)[2],1)[,pTI]

	write.csv(format(cbind(HTw0ccproj[,1:2],partiescc,mp_rank_id_cc),digits=3),file=paste(path,dataname,"/",analysisname,"/",interval_names[i,],"_result.csv",sep=""))
	#*************************
	}
	#matrix (operations):
	en.wikipedia.org/wiki/Matrix_(mathematics)

	#rotation matrix:
	http://en.wikipedia.org/wiki/Rotation_matrix

	#SVD:
	http://en.wikipedia.org/wiki/Singular_value_decomposition

	#Eigendecomposition
	http://en.wikipedia.org/wiki/Eigendecomposition

	#SVD vs. eigendec.
	http://stats.stackexchange.com/questions/14002/whats-the-difference-between-principal-components-analysis-and-multidimensional

	#SVD example:
	http://polisci2.ucsd.edu/aruizeuler/SVD.html

	#normal and distance point,plane
	http://en.wikipedia.org/wiki/Normal_%28geometry%29
	http://en.wikipedia.org/wiki/Distance_from_a_point_to_a_line
	http://www.vitutor.com/geometry/distance/point_plane.html

	# multiply rows of matrix by vector in R (speed)
	http://stackoverflow.com/questions/3643555/multiply-rows-of-matrix-by-vector

	#ideal points
	http://voteview.com/ideal_point_history.htm

	#wnominate in R
	http://cran.r-project.org/web/packages/wnominate/vignettes/wnominate.pdf
	http://cran.r-project.org/web/packages/wnominate/wnominate.pdf
	http://rss.acs.unt.edu/Rdoc/library/pscl/html/rollcall.html
	http://voteview.com/senate109.htm

	#US roll-call downloads
	http://voteview.com/downloads.asp#ROLLCALLDWNL

	#linear discriminant analysis (not used finally)
	http://en.wikipedia.org/wiki/Linear_discriminant_analysis

	#logistic regression (not used finally)
	http://en.wikipedia.org/wiki/Linear_discriminant_analysis

	#SVM and kernels (not used finally)
	http://en.wikipedia.org/wiki/Support_vector_machine
	http://en.wikibooks.org/wiki/Data_Mining_Algorithms_In_R/Classification/SVM
	http://stats.stackexchange.com/questions/26293/how-to-obtain-decision-boundaries-from-linear-svm-in-r
	http://stats.stackexchange.com/questions/5056/computing-the-decision-boundary-of-a-linear-svm-model/5080#5080
	http://stats.stackexchange.com/questions/25387/problem-with-e1071-libsvm
	http://stats.stackexchange.com/questions/17955/svm-with-unequal-group-sizes-in-training-data?rq=1
	http://cran.r-project.org/web/packages/kernlab/kernlab.pdf
	http://cbio.ensmp.fr/~jvert/svn/tutorials/practical/svmbasic/svmbasic_notes.pdf
	http://stackoverflow.com/questions/3282916/class-weight-syntax-in-kernlab
	http://www.robots.ox.ac.uk/~az/lectures/ml/lect2.pdf
	http://stat.ethz.ch/R-manual/R-patched/library/stats/html/density.html
p1	p2	p3	p4
1	1	1	-1
1	1	-1	-1
1	1	-1	-1
1	-1	1	-1
1	-1	1	-1
1	-1	-1	-1
-1	-1	-1	1
-1	-1	-1	1
1	1	-1	-1
1	-1	1	-1
	#install.packages("reshape2")
	library("reshape2")
	#install.packages("sqldf")
	library("sqldf")

	path = "~/project/mpv_motion/dev/"
	#dataname = "cz_psp_2006-2010_a1_1"
	#analysisname = 'cz_psp_2006-2010_a1_1q'
	dataname = "cz_kromeriz_2012"
	analysisname = 'cz_kromeriz_2012_1h'

	#raw data in 3 columns (values in [-1,1])
	Graw = read.table(paste(path,dataname,"/",dataname,".csv",sep=""), header=FALSE, sep=",")
	#data divisions x persons
	G = acast(Graw,V2~V1,value.var='V3')
	#scaled divisions x persons (mean=0 and sd=1 for each division)
	H=t(scale(t(G),scale=TRUE))

	#weights
	#weights 1, based on number of persons in division
	w1 = apply(abs(G)==1,1,sum,na.rm=TRUE)/max(apply(abs(G)==1,1,sum,na.rm=TRUE))
	w1[is.na(w1)] = 0
	#weights 2, "100:100" vs. "195:5"
	w2 = 1 - abs(apply(G==1,1,sum,na.rm=TRUE) - apply(G==-1,1,sum,na.rm=TRUE))/apply(!is.na(G),1,sum)
	w2[is.na(w2)] = 0

	#analytics
	plot(w1)
	plot(w2)
	plot(w1*w2)

	#weighted scaled divisions x persons
	Hw = H * w1 * w2

	#index of missing data
	#index of missing data divisions x persons
	HI = H
	HI[!is.na(H)]=1
	HI[is.na(H)] = 0

	#weighted scaled with NA->0 division x persons
	Hw0 = Hw
	Hw0[is.na(Hw)]=0

	#eliminate persons with too few votes (weighted)
	#lower limit to eliminate from calculations
	lo_limit = .1
	#weights for non missing data division x persons
	HIw = HIw1w2
	#sum of weights of divisions for each persons
	tmp = apply(HIw,2,sum)
	pw = tmp/max(tmp)
	#index of persons in calculation
	pI = pw > lo_limit
	#weighted scaled with NA->0 and cutted persons with too few votes division x persons
	Hw0c = Hw0[,pI]
	#index of missing data cutted persons with too few votes divisions x persons
	HIc = HI[,pI]

	#"covariance" matrix adjusted according to missing data
	Hcov=t(Hw0c)%%Hw0c 1/(t(HIc)%%HIc) (dim(Hw0c)[1])
	#Hcov=t(Hw0)%%Hw0 1/(t(HI)%%HI-1) (dim(H)[1]-1)

	#substitution of missing data in "covariance" matrix
	Hcov0 = Hcov
	Hcov0[is.na(Hcov)] = 0 #*********

	#eigendecomposition
	He=eigen(Hcov0)
	# V (rotation values of persons)
	V = He$vectors
	#projected divisions into dimensions
	Hy=Hw0c%*%V

	#analytics
	plot(Hy[,1],Hy[,2])
	plot(sqrt(He$values[1:10]))

	#sigma matrix
	sigma = diag(sqrt(He$values))
	sigma[is.na(sigma)] = 0

	#projection of persons into dimensions
	Hproj = t(sigma%*%t(V))
	#analytics
	plot(Hproj[,1],Hproj[,2])

	#sigma^-1 matrix
	sigma_1 = diag(sqrt(1/He$values))
	sigma_1[is.na(sigma_1)] = 0

	# U (rotation values of divisions)
	U = Hw0c%%V%%sigma_1

	#U%%sigma%%t(V) != Hw0c ;because of adjusting of "covariance" matrix
	#THE PROBLEM (if T - overall otherwise see Hproj2)
	plot(He$vectors[,1]sqrt(He$value[1]),He$vectors[,2]sqrt(He$value[2]))
	#second projection
	Hproj2 = t(t(U) %*% Hw0c)
	#without missing values should be equal
	#analytics
	plot(Hproj[,1],Hproj2[,1])
	plot(Hproj[,2],Hproj2[,2])
	#plot(HTw0ccproj[,1],HTw0ccproj[,2])
	#plot(Hproj[,1],HTw0ccproj[,1])
	#plot(Hproj[,2],HTw0ccproj[,2])