natbusa · December 27, 2015 09:19
diff --git a/dutch.elections.2010.R b/dutch.elections.2010.R
 # read in ducth election file as provided by http://www.engagedata.eu/dataset/14399
 # https://engagefp7.s3.amazonaws.com/resources/dataset_14399/TK2010.csv

 dat = read.csv2('http://engagefp7.s3.amazonaws.com/resources/dataset_14399/TK2010.csv', skip=31, header=FALSE)

 #define column names
 columns =  '
 Gemeente; 
 geldige stemmen; 
 ongeldige stemmen; 
 blanco stemmen;
 Percentage blanco stemmen; 
 kiesgerechtigden;
 Opkomst;
 Opkomstpercentage;
 VVD;
 Partij van de Arbeid (P.v.d.A.);
 PVV (Partij voor de Vrijheid);
 Christen Democratisch Appel (CDA);
 SP (Socialistische Partij);
 Democraten 66 (D66);
 GROENLINKS;
 ChristenUnie;
 Staatkundig Gereformeerde Partij (SGP);
 Partij voor de Dieren;
 TROTS OP NEDERLAND LIJST RITA VERDONK;
 Partij voor Mens en Spirit (MenS);
 Piratenpartij;
 Lijst 17;
 Partij een;
 Nieuw Nederland;
 Heel NL;
 Evangelische Partij Nederland
 '
 #parse the names as a dataframe
 column.names = read.csv2(text=columns, header=FALSE)

 #rename the columns of the data frame
 colnames(dat) <- column.names[,1]

 #remove last four lines
 dat = dat[1:(nrow(dat)-4),]

 #how many municipalities?
 nrow(dat)
 length(dat$Gemeente)
 length(unique(dat$Gemeente))

 #how many parties
 n    = names(dat)
 from = which(names(dat) == 'Opkomstpercentage') + 1
 to   = length(n)

 #select the columns relative to parties
 parties = n[from:to]
 parties

 #how many parties?
 length(parties)

 #votes per party
 total = colSums(dat[parties])
 total

 #plot bars
 barplot(total)

 #plot log10 (some alternatives)
 barplot(log10(total))
 barplot(total, log='y')

 # most votes
 total.max.value = max(total)
 total.max       = total[total==total.max.value]
 total.max

 #minum votes
 total.min.value = min(total)
 total.min       = total[total==total.min.value]
 total.min

 # coverage min votes per municipality

 #at least one vote for the looser
 I = dat[names(total.min)] > 0

 #exactly 12 votes for the looser
 J = dat[names(total.min)] == 12

 #select the municipalities for the loosing party (at least one vote)
 party.loosing = dat[I,c('Gemeente', names(total.min))]
 nrow(party.loosing)

 #select the municipalities for the loosing party (exactly 12 votes)
 party.loosing = dat[J,c('Gemeente', names(total.min))]
 nrow(party.loosing)

 # CDA < VVD < PVV

 I = dat['VVD'] < dat['PVV (Partij voor de Vrijheid)']
 J = dat['VVD'] > dat['Christen Democratisch Appel (CDA)']

 d = dat[I & J, c('Gemeente', 'VVD', 'PVV (Partij voor de Vrijheid)', 'Christen Democratisch Appel (CDA)')]

 #which municipalities where  CDA < VVD < PVV
 d$Gemeente
 nrow(d)

 # time for pies!
 pie(total)

 # create a others category for the small parties
 n = 7
 sorted.total = sort(total, decreasing=TRUE)

 votes = sorted.total[1:n]
 votes = c(votes, others=sum(sorted.total[(n+1):length(total)]))

 # a better pie
 pie(votes, main='dutch elections 2010')

 # as above but for a specific municipality
 municipality = 'Almere'

 municipality.selection = dat$Gemeente == municipality
 municipalities.votes = dat[municipality.selection, parties]

 # reuse previous code ...
 total = as.numeric(municipalities.votes)
 names(total) = parties

 n = 7
 sorted.total = sort(total, decreasing=TRUE)

 votes = sorted.total[1:n]
 votes = c(votes, others=sum(sorted.total[(n+1):length(total)]))

 # a better pie
 pie(votes, main=municipality)

 # take into account valid votes and correct the votes by valid votes
 relative = dat[parties] / dat[,'geldige stemmen']

 # relative correlation
 cross.corr = cor(relative)

 # do not look in the diagonal
 diag(cross.corr) = NA

 # VVD correlation
 vvd.corr = cross.corr['VVD',]
 vvd.corr

 # min and max correlation for vvd
 vvd.corr.max = max(vvd.corr, na.rm=TRUE)
 vvd.corr.max

 vvd.corr.min = min(vvd.corr, na.rm=TRUE)
 vvd.corr.min

 # which party correlates?
 # positive correlation
 vvd.corr[which.max(vvd.corr)]

 # which party correlates?
 # negative correlation
 vvd.corr[which.min(vvd.corr)]

 #get the names of the parties (row and column are the same)
 cross.corr.parties = rownames(cross.corr)

 # max correlation (all columns)
 corr.max.value = apply(cross.corr, 2, FUN=function(x) {max(x, na.rm=TRUE)} )
 corr.max.value

 corr.max.party = apply(cross.corr, 2, FUN=function(x) {cross.corr.parties[which.max(x)]} )
 corr.max.party

 #combine into a data frame
 cross.corr.dataframe = data.frame( cbind(corr.max.value, corr.max.party) )
 cross.corr.dataframe

 #sort by highest correlation
 I = order(cross.corr.dataframe[,1],decreasing=TRUE)
 cross.corr.dataframe[I,]

 #highest correlation
 cross.corr.dataframe[I[1],]


 # min correlation (all columns)
 corr.min.value = apply(cross.corr, 2, FUN=function(x) {min(x, na.rm=TRUE)} )
 corr.min.value

 corr.min.party = apply(cross.corr, 2, FUN=function(x) {cross.corr.parties[which.min(x)]} )
 corr.min.party

 #combine into a data frame
 cross.corr.dataframe = data.frame( cbind(corr.min.value, corr.min.party) )
 cross.corr.dataframe

 #sort by highest negative correlation
 I = order(cross.corr.dataframe[,1],decreasing=TRUE)
 cross.corr.dataframe[I,]

 #highest correlation
 cross.corr.dataframe[I[1],]
	# read in ducth election file as provided by http://www.engagedata.eu/dataset/14399
	# https://engagefp7.s3.amazonaws.com/resources/dataset_14399/TK2010.csv

	dat = read.csv2('http://engagefp7.s3.amazonaws.com/resources/dataset_14399/TK2010.csv', skip=31, header=FALSE)

	#define column names
	columns = '
	Gemeente;
	geldige stemmen;
	ongeldige stemmen;
	blanco stemmen;
	Percentage blanco stemmen;
	kiesgerechtigden;
	Opkomst;
	Opkomstpercentage;
	VVD;
	Partij van de Arbeid (P.v.d.A.);
	PVV (Partij voor de Vrijheid);
	Christen Democratisch Appel (CDA);
	SP (Socialistische Partij);
	Democraten 66 (D66);
	GROENLINKS;
	ChristenUnie;
	Staatkundig Gereformeerde Partij (SGP);
	Partij voor de Dieren;
	TROTS OP NEDERLAND LIJST RITA VERDONK;
	Partij voor Mens en Spirit (MenS);
	Piratenpartij;
	Lijst 17;
	Partij een;
	Nieuw Nederland;
	Heel NL;
	Evangelische Partij Nederland
	'
	#parse the names as a dataframe
	column.names = read.csv2(text=columns, header=FALSE)

	#rename the columns of the data frame
	colnames(dat) <- column.names[,1]

	#remove last four lines
	dat = dat[1:(nrow(dat)-4),]

	#how many municipalities?
	nrow(dat)
	length(dat$Gemeente)
	length(unique(dat$Gemeente))

	#how many parties
	n = names(dat)
	from = which(names(dat) == 'Opkomstpercentage') + 1
	to = length(n)

	#select the columns relative to parties
	parties = n[from:to]
	parties

	#how many parties?
	length(parties)

	#votes per party
	total = colSums(dat[parties])
	total

	#plot bars
	barplot(total)

	#plot log10 (some alternatives)
	barplot(log10(total))
	barplot(total, log='y')

	# most votes
	total.max.value = max(total)
	total.max = total[total==total.max.value]
	total.max

	#minum votes
	total.min.value = min(total)
	total.min = total[total==total.min.value]
	total.min

	# coverage min votes per municipality

	#at least one vote for the looser
	I = dat[names(total.min)] > 0

	#exactly 12 votes for the looser
	J = dat[names(total.min)] == 12

	#select the municipalities for the loosing party (at least one vote)
	party.loosing = dat[I,c('Gemeente', names(total.min))]
	nrow(party.loosing)

	#select the municipalities for the loosing party (exactly 12 votes)
	party.loosing = dat[J,c('Gemeente', names(total.min))]
	nrow(party.loosing)

	# CDA < VVD < PVV

	I = dat['VVD'] < dat['PVV (Partij voor de Vrijheid)']
	J = dat['VVD'] > dat['Christen Democratisch Appel (CDA)']

	d = dat[I & J, c('Gemeente', 'VVD', 'PVV (Partij voor de Vrijheid)', 'Christen Democratisch Appel (CDA)')]

	#which municipalities where CDA < VVD < PVV
	d$Gemeente
	nrow(d)

	# time for pies!
	pie(total)

	# create a others category for the small parties
	n = 7
	sorted.total = sort(total, decreasing=TRUE)

	votes = sorted.total[1:n]
	votes = c(votes, others=sum(sorted.total[(n+1):length(total)]))

	# a better pie
	pie(votes, main='dutch elections 2010')

	# as above but for a specific municipality
	municipality = 'Almere'

	municipality.selection = dat$Gemeente == municipality
	municipalities.votes = dat[municipality.selection, parties]

	# reuse previous code ...
	total = as.numeric(municipalities.votes)
	names(total) = parties

	n = 7
	sorted.total = sort(total, decreasing=TRUE)

	votes = sorted.total[1:n]
	votes = c(votes, others=sum(sorted.total[(n+1):length(total)]))

	# a better pie
	pie(votes, main=municipality)

	# take into account valid votes and correct the votes by valid votes
	relative = dat[parties] / dat[,'geldige stemmen']

	# relative correlation
	cross.corr = cor(relative)

	# do not look in the diagonal
	diag(cross.corr) = NA

	# VVD correlation
	vvd.corr = cross.corr['VVD',]
	vvd.corr

	# min and max correlation for vvd
	vvd.corr.max = max(vvd.corr, na.rm=TRUE)
	vvd.corr.max

	vvd.corr.min = min(vvd.corr, na.rm=TRUE)
	vvd.corr.min

	# which party correlates?
	# positive correlation
	vvd.corr[which.max(vvd.corr)]

	# which party correlates?
	# negative correlation
	vvd.corr[which.min(vvd.corr)]

	#get the names of the parties (row and column are the same)
	cross.corr.parties = rownames(cross.corr)

	# max correlation (all columns)
	corr.max.value = apply(cross.corr, 2, FUN=function(x) {max(x, na.rm=TRUE)} )
	corr.max.value

	corr.max.party = apply(cross.corr, 2, FUN=function(x) {cross.corr.parties[which.max(x)]} )
	corr.max.party

	#combine into a data frame
	cross.corr.dataframe = data.frame( cbind(corr.max.value, corr.max.party) )
	cross.corr.dataframe

	#sort by highest correlation
	I = order(cross.corr.dataframe[,1],decreasing=TRUE)
	cross.corr.dataframe[I,]

	#highest correlation
	cross.corr.dataframe[I[1],]


	# min correlation (all columns)
	corr.min.value = apply(cross.corr, 2, FUN=function(x) {min(x, na.rm=TRUE)} )
	corr.min.value

	corr.min.party = apply(cross.corr, 2, FUN=function(x) {cross.corr.parties[which.min(x)]} )
	corr.min.party

	#combine into a data frame
	cross.corr.dataframe = data.frame( cbind(corr.min.value, corr.min.party) )
	cross.corr.dataframe

	#sort by highest negative correlation
	I = order(cross.corr.dataframe[,1],decreasing=TRUE)
	cross.corr.dataframe[I,]

	#highest correlation
	cross.corr.dataframe[I[1],]
No results found