Last active
December 27, 2015 09:19
-
-
Save natbusa/7303557 to your computer and use it in GitHub Desktop.
Basic statistics on dutch election 2010
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # read in ducth election file as provided by http://www.engagedata.eu/dataset/14399 | |
| # https://engagefp7.s3.amazonaws.com/resources/dataset_14399/TK2010.csv | |
| dat = read.csv2('http://engagefp7.s3.amazonaws.com/resources/dataset_14399/TK2010.csv', skip=31, header=FALSE) | |
| #define column names | |
| columns = ' | |
| Gemeente; | |
| geldige stemmen; | |
| ongeldige stemmen; | |
| blanco stemmen; | |
| Percentage blanco stemmen; | |
| kiesgerechtigden; | |
| Opkomst; | |
| Opkomstpercentage; | |
| VVD; | |
| Partij van de Arbeid (P.v.d.A.); | |
| PVV (Partij voor de Vrijheid); | |
| Christen Democratisch Appel (CDA); | |
| SP (Socialistische Partij); | |
| Democraten 66 (D66); | |
| GROENLINKS; | |
| ChristenUnie; | |
| Staatkundig Gereformeerde Partij (SGP); | |
| Partij voor de Dieren; | |
| TROTS OP NEDERLAND LIJST RITA VERDONK; | |
| Partij voor Mens en Spirit (MenS); | |
| Piratenpartij; | |
| Lijst 17; | |
| Partij een; | |
| Nieuw Nederland; | |
| Heel NL; | |
| Evangelische Partij Nederland | |
| ' | |
| #parse the names as a dataframe | |
| column.names = read.csv2(text=columns, header=FALSE) | |
| #rename the columns of the data frame | |
| colnames(dat) <- column.names[,1] | |
| #remove last four lines | |
| dat = dat[1:(nrow(dat)-4),] | |
| #how many municipalities? | |
| nrow(dat) | |
| length(dat$Gemeente) | |
| length(unique(dat$Gemeente)) | |
| #how many parties | |
| n = names(dat) | |
| from = which(names(dat) == 'Opkomstpercentage') + 1 | |
| to = length(n) | |
| #select the columns relative to parties | |
| parties = n[from:to] | |
| parties | |
| #how many parties? | |
| length(parties) | |
| #votes per party | |
| total = colSums(dat[parties]) | |
| total | |
| #plot bars | |
| barplot(total) | |
| #plot log10 (some alternatives) | |
| barplot(log10(total)) | |
| barplot(total, log='y') | |
| # most votes | |
| total.max.value = max(total) | |
| total.max = total[total==total.max.value] | |
| total.max | |
| #minum votes | |
| total.min.value = min(total) | |
| total.min = total[total==total.min.value] | |
| total.min | |
| # coverage min votes per municipality | |
| #at least one vote for the looser | |
| I = dat[names(total.min)] > 0 | |
| #exactly 12 votes for the looser | |
| J = dat[names(total.min)] == 12 | |
| #select the municipalities for the loosing party (at least one vote) | |
| party.loosing = dat[I,c('Gemeente', names(total.min))] | |
| nrow(party.loosing) | |
| #select the municipalities for the loosing party (exactly 12 votes) | |
| party.loosing = dat[J,c('Gemeente', names(total.min))] | |
| nrow(party.loosing) | |
| # CDA < VVD < PVV | |
| I = dat['VVD'] < dat['PVV (Partij voor de Vrijheid)'] | |
| J = dat['VVD'] > dat['Christen Democratisch Appel (CDA)'] | |
| d = dat[I & J, c('Gemeente', 'VVD', 'PVV (Partij voor de Vrijheid)', 'Christen Democratisch Appel (CDA)')] | |
| #which municipalities where CDA < VVD < PVV | |
| d$Gemeente | |
| nrow(d) | |
| # time for pies! | |
| pie(total) | |
| # create a others category for the small parties | |
| n = 7 | |
| sorted.total = sort(total, decreasing=TRUE) | |
| votes = sorted.total[1:n] | |
| votes = c(votes, others=sum(sorted.total[(n+1):length(total)])) | |
| # a better pie | |
| pie(votes, main='dutch elections 2010') | |
| # as above but for a specific municipality | |
| municipality = 'Almere' | |
| municipality.selection = dat$Gemeente == municipality | |
| municipalities.votes = dat[municipality.selection, parties] | |
| # reuse previous code ... | |
| total = as.numeric(municipalities.votes) | |
| names(total) = parties | |
| n = 7 | |
| sorted.total = sort(total, decreasing=TRUE) | |
| votes = sorted.total[1:n] | |
| votes = c(votes, others=sum(sorted.total[(n+1):length(total)])) | |
| # a better pie | |
| pie(votes, main=municipality) | |
| # take into account valid votes and correct the votes by valid votes | |
| relative = dat[parties] / dat[,'geldige stemmen'] | |
| # relative correlation | |
| cross.corr = cor(relative) | |
| # do not look in the diagonal | |
| diag(cross.corr) = NA | |
| # VVD correlation | |
| vvd.corr = cross.corr['VVD',] | |
| vvd.corr | |
| # min and max correlation for vvd | |
| vvd.corr.max = max(vvd.corr, na.rm=TRUE) | |
| vvd.corr.max | |
| vvd.corr.min = min(vvd.corr, na.rm=TRUE) | |
| vvd.corr.min | |
| # which party correlates? | |
| # positive correlation | |
| vvd.corr[which.max(vvd.corr)] | |
| # which party correlates? | |
| # negative correlation | |
| vvd.corr[which.min(vvd.corr)] | |
| #get the names of the parties (row and column are the same) | |
| cross.corr.parties = rownames(cross.corr) | |
| # max correlation (all columns) | |
| corr.max.value = apply(cross.corr, 2, FUN=function(x) {max(x, na.rm=TRUE)} ) | |
| corr.max.value | |
| corr.max.party = apply(cross.corr, 2, FUN=function(x) {cross.corr.parties[which.max(x)]} ) | |
| corr.max.party | |
| #combine into a data frame | |
| cross.corr.dataframe = data.frame( cbind(corr.max.value, corr.max.party) ) | |
| cross.corr.dataframe | |
| #sort by highest correlation | |
| I = order(cross.corr.dataframe[,1],decreasing=TRUE) | |
| cross.corr.dataframe[I,] | |
| #highest correlation | |
| cross.corr.dataframe[I[1],] | |
| # min correlation (all columns) | |
| corr.min.value = apply(cross.corr, 2, FUN=function(x) {min(x, na.rm=TRUE)} ) | |
| corr.min.value | |
| corr.min.party = apply(cross.corr, 2, FUN=function(x) {cross.corr.parties[which.min(x)]} ) | |
| corr.min.party | |
| #combine into a data frame | |
| cross.corr.dataframe = data.frame( cbind(corr.min.value, corr.min.party) ) | |
| cross.corr.dataframe | |
| #sort by highest negative correlation | |
| I = order(cross.corr.dataframe[,1],decreasing=TRUE) | |
| cross.corr.dataframe[I,] | |
| #highest correlation | |
| cross.corr.dataframe[I[1],] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment