Created
November 1, 2014 23:18
-
-
Save dmpe/b8c3f74d1e937d90b20f to your computer and use it in GitHub Desktop.
Analysis of Singapore's Education using three datasets
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library("ggplot2") | |
source("PupilsPerTeacherRatio.R") # load our data | |
source("ExpenditurePerStudent_Pupils.R") # load the data | |
data = matrix(c(cleanedData.pupils$Years, cleanedData.pupils$ExpednInSD, | |
cleanedData.ratio$PupilsPerTeacher), ncol=3) | |
df = data.frame(data) | |
colnames(df) <- c("Year", "MoneySpend", "Ratio") | |
#TODO ggplot | |
pl = plot(x=df$Year, y=log(df$MoneySpend), ylim=c(2,10), main ="Red points are the ratio, which should be declining. | |
Whereas dark points are money spend on each student.") | |
points(x=df$Year, y=log(df$Ratio), col=2) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# let's load some data from Excel :_ | |
suppressMessages(library("XLConnect")) | |
library("plyr") | |
# Number of Classes and Class Size by Level | |
if(!file.exists("NumberOfClassesSizeLevel.xlsx")) { | |
#download.file("http://data.gov.sg/Metadata/SGMatadata.aspx?id=0304010000000005778F", | |
# "NumberOfClassesSizeLevel.xlsx.xlsx") | |
} | |
loadedXLSofData.class = loadWorkbook("NumberOfClassesSizeLevel.xlsx") | |
rawTable.class = readWorksheet(loadedXLSofData.class, 1) | |
#Begin with just 2 columns | |
#select only primary "all" and secondary "all" | |
rawTable.class.twoCol <- rawTable.class[c("Year","Description", | |
"CLASSES.AND.CLASS.SIZE.BY.LEVEL...PUBLIC.SCHOOLS", | |
"CLASSES.AND.CLASS.SIZE.BY.LEVEL...PUBLIC.SCHOOLS.9")] | |
#rename column names to make it more clear for next steps | |
colnames(rawTable.class.twoCol) <- c("Year", "Description", | |
"AllNumbersForPrimarySchools", "AllNumbersForSecondarySchools") | |
#select row by its description and devide them into two tables | |
numberOfClases = subset(rawTable.class.twoCol,rawTable.class.twoCol$Description=="NO OF CLASSES") | |
avarageClassSize = subset(rawTable.class.twoCol,rawTable.class.twoCol$Description=="AVE CLASS SIZE") | |
#great; both are chars. We need to convert them | |
class(numberOfClases$AllNumbersForPrimarySchools) | |
class(numberOfClases$Year) | |
class(avarageClassSize$AllNumbersForSecondarySchools) | |
cleanedData.numberOfClases = suppressWarnings(data.frame(llply(numberOfClases, as.numeric))) | |
# because we apply as.numeric to chars we introduce NA (and warnings [sic!]) | |
cleanedData.numberOfClases$Description <- NULL | |
cleanedData.avarageClassSize = suppressWarnings(data.frame(llply(avarageClassSize, as.numeric))) | |
cleanedData.avarageClassSize$Description <- NULL |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# let's load some data from Excel :_ | |
suppressMessages(library("XLConnect")) | |
library("plyr") | |
if(!file.exists("ExpenditurePerStudent_PS.xls")) { | |
download.file("http://data.gov.sg/Agency_Data/MOE/0303010000000005772Z.xls", | |
"ExpenditurePerStudent_PS.xls") | |
} | |
loadedXLSofData.pupils = loadWorkbook("ExpenditurePerStudent_PS.xls") | |
rawTable.pupils = readWorksheet(loadedXLSofData.pupils, 1) | |
rawData.pupils = rename(rawTable.pupils[3:30,0:2], c("X2.0" = "Years", "SGDATA" = "ExpednInSD")) | |
# summary(rawData) # DF which is a list | |
rawData.pupils$Years <- with(rawData.pupils, | |
replace(rawData.pupils$Years, | |
rawData.pupils$Years == "2013*", "2013")) | |
# NUMERIC all | |
cleanedData.pupils = data.frame(llply(rawData.pupils, as.numeric)) | |
cleanedData.pupils = cleanedData.pupils[cleanedData.pupils$Years<=2012,] | |
summary(lm(cleanedData.pupils$ExpednInSD ~ cleanedData.pupils$Years)) | |
cor(cleanedData.pupils$Years, cleanedData.pupils$ExpednInSD) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library("XLConnect") | |
library("plyr") | |
loadedXLSofData.ratio = loadWorkbook("PupilsPerTeacherPrimarySchools.xlsx") | |
rawTable.ratio = readWorksheet(loadedXLSofData.ratio, 1) | |
rawData.ratio = rename(rawTable.ratio[2:nrow(rawTable.ratio),], c("X0301050000000005757K" = "Years", | |
"Col2" = "PupilsPerTeacher")) | |
cleanedData.ratio = data.frame(llply(rawData.ratio, as.numeric)) | |
cleanedData.ratio = cleanedData.ratio[cleanedData.ratio$Years>=1986,] | |
summary( lm(cleanedData.ratio$PupilsPerTeacher ~ cleanedData.ratio$Years)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
(as of 2017, cannot be used anymore)
https://ref.data.gov.sg/Metadata/SGMatadata.aspx?id=0304010000000005778F&mid=135758&t=TEXTUAL
https://ref.data.gov.sg/Metadata/SGMatadata.aspx?id=0301050000000005757K&mid=105030&t=TEXTUAL
https://ref.data.gov.sg/Metadata/SGMatadata.aspx?id=0303010000000005772Z&mid=104977&t=TEXTUAL