inkhorn · May 2, 2013 01:01
diff --git a/toronto_casino.r b/toronto_casino.r
 library(ff)
 library(ffbase)
 library(stringr)
 library(ggplot2)
 library(ggthemes)
 library(reshape2)
 library(RgoogleMaps)

 # Loading 2 copies of the same data set so that I can convert one and have the original for its text values
 casino = read.csv("/home/inkhorn/Downloads/casino_survey_results20130325.csv")
 casino.orig = read.csv("/home/inkhorn/Downloads/casino_survey_results20130325.csv")

 # Here's the dataset of canadian postal codes and latitude/longitude coordinates
 pcodes = read.csv.ffdf(file="/home/inkhorn/Downloads/zipcodeset.txt", first.rows=50000, next.rows=50000, colClasses=NA, header=FALSE)

 # I'm doing some numerical recoding here.  If you can tell me a cleaner way of doing this
 # then by all means please do. I found this process really annoyingly tedious.

 casino$Q1_A = ifelse(casino.orig$Q1_A == "Neutral or Mixed Feelings", 3,
              ifelse(casino.orig$Q1_A == "Somewhat in Favour", 4,
              ifelse(casino.orig$Q1_A == "Somewhat Opposed", 2,
              ifelse(casino.orig$Q1_A == "Strongly in Favour", 5,
              ifelse(casino.orig$Q1_A == "Strongly Opposed", 1,NA)))))


 casino$Q2_A = ifelse(casino.orig$Q2_A == "Does Not Fit My Image At All", 1,
              ifelse(casino.orig$Q2_A == "Neutral / I am Not Sure",2,
              ifelse(casino.orig$Q2_A == "Fits Image Somewhat", 3, 
              ifelse(casino.orig$Q2_A == "Fits Image Perfectly", 4, NA))))

 for (i in 8:24) {
  casino[,i] = ifelse(casino.orig[,i] == "Not Important At All", 1,
  ifelse(casino.orig[,i] == "Somewhat Important", 2,
  ifelse(casino.orig[,i] == "Very Important", 3,NA)))}

 for (i in c(31:32,47,48,63,64)) {
  casino[,i] = ifelse(casino.orig[,i] == "Highly Suitable",5,
                ifelse(casino.orig[,i] == "Neutral or Mixed Feelings",3,
                ifelse(casino.orig[,i] == "Somewhat Suitable",4,
                ifelse(casino.orig[,i] == "Somewhat Unsuitable",2,
                ifelse(casino.orig[,i] == "Strongly Unsuitable",1,NA)))))}

 # There tended to be blank responses in the original dataset.  When seeking to 
 # plot the responses in their original text option format, I got rid of them in some cases,
 # or coded them in "Did not disclose" in others.

 casino.orig$Q1_A[casino.orig$Q1_A == ""] = NA
 casino.orig$Q1_A = factor(casino.orig$Q1_A, levels=c("Strongly Opposed","Somewhat Opposed","Neutral or Mixed Feelings","Somewhat in Favour","Strongly in Favour"))

 # Here's the graph showing how people feel about a new casino
 ggplot(subset(casino.orig, !is.na(Q1_A)), aes(x=Q1_A,y=..count../sum(..count..))) + geom_bar(fill="forest green") + coord_flip() + ggtitle("How do you feel about having a new casino in Toronto?") + scale_x_discrete(name="") + theme_wsj() + theme(title=element_text(size=22),plot.title=element_text(hjust=.8)) + stat_bin(aes(label = sprintf("%.02f %%", ..count../sum(..count..)*100)), geom="text") + scale_y_continuous(labels=percent)

 # How does the casino fit into your image of toronto...
 ggplot(subset(casino.orig, Q2_A!= ''), aes(x=Q2_A,y=..count../sum(..count..))) + geom_bar(fill="forest green") + coord_flip() + ggtitle("How does a new casino in Toronto fit your image of the City of Toronto?") + scale_x_discrete(name="") + theme_wsj() + theme(title=element_text(size=22),plot.title=element_text(hjust=.8)) + stat_bin(aes(label = sprintf("%.02f %%", ..count../sum(..count..)*100)),geom="text") + scale_y_continuous(labels=percent)

 # Where you'd prefer to see it located
 ggplot(subset(casino.orig, Q6!= ''), aes(x=Q6,y=..count../sum(..count..))) + geom_bar(fill="forest green") + coord_flip() + ggtitle("If a casino is built, where would you prefer to see it located?") + scale_x_discrete(name="") + theme_wsj() + theme(title=element_text(size=22),plot.title=element_text(hjust=.8)) + stat_bin(aes(label = sprintf("%.02f %%", ..count../sum(..count..)*100)), geom="text") + scale_y_continuous(labels=percent)

 # Here I reorder the text labels from the questions asking about suitability of the downtown location
 casino.orig$Q7_A_StandAlone = reorder(casino.orig$Q7_A_StandAlone, casino$Q7_A_StandAlone)
 casino.orig$Q7_A_Integrated = reorder(casino.orig$Q7_A_Integrated, casino$Q7_A_Integrated)

 # Reshaping the downtown ratings data for graphing..
 stand.and.integrated.ratings.downtown = cbind(prop.table(as.matrix(table(casino.orig$Q7_A_StandAlone)[1:5])),
                                     prop.table(as.matrix(table(casino.orig$Q7_A_Integrated)[1:5])))

 colnames(stand.and.integrated.ratings.downtown) = c("Standalone Casino","Integrated Entertainment Complex")

 stand.and.integrated.ratings.downtown.long = melt(stand.and.integrated.ratings.downtown, varnames=c("Rating","Casino Type"), value.name="Percentage")

 # Graphing ratings of casino suitability for the downtown location
 ggplot(stand.and.integrated.ratings.downtown.long, aes(x=stand.and.integrated.ratings.downtown.long$"Casino Type", fill=Rating, y=Percentage,label=sprintf("%.02f %%", Percentage*100))) + geom_bar(position="dodge") + coord_flip() + ggtitle("Ratings of Casino Suitability \nin Downtown Toronto by Casino Type") + scale_x_discrete(name="") + theme(title=element_text(size=22),plot.title=element_text(hjust=.8)) + scale_y_continuous(labels=percent) + geom_text(aes(x=stand.and.integrated.ratings.downtown.long$"Casino Type", y=Percentage, ymax=Percentage, label=sprintf("%.01f%%",Percentage*100), hjust=.75),position = position_dodge(width=1),size=4) + scale_fill_few(palette="light") + theme_wsj()

 # Reshaping the exhibition place ratings for graphing
 stand.and.integrated.ratings.exhibition = cbind(prop.table(as.matrix(table(casino.orig$Q7_B_StandAlone)[2:6])),
                                              prop.table(as.matrix(table(casino.orig$Q7_B_Integrated)[2:6])))

 colnames(stand.and.integrated.ratings.exhibition) = c("Standalone Casino","Integrated Entertainment Complex")

 stand.and.integrated.ratings.exhibition.long = melt(stand.and.integrated.ratings.exhibition, varnames=c("Rating","Casino Type"), value.name="Percentage")

 # Reordering the rating text labels for the graphing.
 stand.and.integrated.ratings.exhibition.long$Rating = factor(stand.and.integrated.ratings.exhibition.long$Rating, levels=levels(casino.orig$Q7_A_StandAlone)[1:5])

 # Graphing ratings of casino suitability for the exhibition place location
 ggplot(stand.and.integrated.ratings.exhibition.long, aes(x=stand.and.integrated.ratings.exhibition.long$"Casino Type", fill=Rating, y=Percentage,label=sprintf("%.02f %%", Percentage*100))) + geom_bar(position="dodge") + coord_flip() + ggtitle("Ratings of Casino Suitability \nat Exhibition Place by Casino Type") + scale_x_discrete(name="") + theme(title=element_text(size=22),plot.title=element_text(hjust=.8)) + scale_y_continuous(labels=percent) + geom_text(aes(x=stand.and.integrated.ratings.exhibition.long$"Casino Type", y=Percentage, ymax=Percentage, label=sprintf("%.01f%%",Percentage*100), hjust=.75), position = position_dodge(width=1),size=4) + scale_fill_few(palette="light") + theme_wsj()

 # Reshaping the Port Lands ratings for graphing
 stand.and.integrated.ratings.portlands = cbind(prop.table(as.matrix(table(casino.orig$Q7_C_StandAlone)[2:6])),
                                                prop.table(as.matrix(table(casino.orig$Q7_C_Integrated)[2:6])))

 colnames(stand.and.integrated.ratings.portlands) = c("Standalone Casino", "Integrated Entertainment Complex")

 stand.and.integrated.ratings.portlands.long = melt(stand.and.integrated.ratings.portlands, varnames=c("Rating","Casino Type"), value.name="Percentage")

 # Reording the rating text labels for the graping.
 stand.and.integrated.ratings.portlands.long$Rating = factor(stand.and.integrated.ratings.portlands.long$Rating, levels=levels(casino.orig$Q7_A_StandAlone)[1:5])

 # Graphing ratings of casino suitability for the port lands location
 ggplot(stand.and.integrated.ratings.portlands.long, aes(x=stand.and.integrated.ratings.portlands.long$"Casino Type", fill=Rating, y=Percentage,label=sprintf("%.02f %%", Percentage*100))) + geom_bar(position="dodge") + coord_flip() + ggtitle("Ratings of Casino Suitability \nat Port Lands by Casino Type") + scale_x_discrete(name="") + theme(title=element_text(size=22),plot.title=element_text(hjust=.8)) + scale_y_continuous(labels=percent) + geom_text(aes(x=stand.and.integrated.ratings.portlands.long$"Casino Type", y=Percentage, ymax=Percentage, label=sprintf("%.01f%%",Percentage*100), hjust=.75), position = position_dodge(width=1),size=4) + scale_fill_few(palette="light") + theme_wsj()

 # This was the part in my analysis where I looked at postal codes (FSAs really) and their coordinates
 # Sorry I'm not more linear in how I do my analysis vs. write about it :)
 # You'll notice that I've imported the geocode file as ffdf.  This led to faster merging with the 
 # original casino data set.  This meant that I had to coerce the casino.orig data frame into ffdf format
 # But I work with it every day at work, so I'm used to it by now, despite its idiosynchracies.

 casino.orig$PostalCode = toupper(casino.orig$PostalCode)

 pcodes = read.csv.ffdf(file="/home/inkhorn/Downloads/zipcodeset.txt", first.rows=50000, next.rows=50000, colClasses=NA, header=FALSE)
 names(pcodes) = c("Postal","Lat","Long","City","Prov")
 pcodes$FSA = as.ff(as.factor(toupper(substr(pcodes[,"Postal"], 1,3))))
 casino.orig = as.ffdf(casino.orig)
 casino.orig$PostalCode = as.ff(as.factor(toupper(casino.orig[,"PostalCode"])))
 casino.orig = merge(casino.orig, pcodes, by.x="PostalCode", by.y="FSA", all.x=TRUE)

 # This is the code for the full map I generated

 casino.gc = casino.orig[which(!is.na(casino.orig[,"Lat"])),]  # making sure only records with coordinates are included...
 mymap = MapBackground(lat=casino.gc$Lat, lon=casino.gc$Long)
 PlotOnStaticMap(mymap, casino.gc$Lat, casino.gc$Long, cex=1.5, pch=21, bg="orange")

 # Here I'm getting a list of cities, winnowing it down, and using it to filter the 
 # geocode coordinates to zoom in on the map I generated.

 cities = data.frame(table(casino.orig[,"City"]))
 cities = cities[cities$Freq > 0,]
 cities = cities[order(cities$Freq, decreasing=TRUE),]
 cities = cities[cities$Var1 != '',]
 cities.filter = cities[1:28,] # Here's my top cities variable (i set an arbitrary dividing line...)
 names(cities.filter) = c("City","# Responses")

 # Here's where I filtered the original casino ffdf so that it only contained the cities
 # that I wanted to see in Southern Ontario
 casino.top.so = casino.orig[which(casino.orig[,"City"] %in% cities.filter$Var1),]

 # here's a transparency function that I used for the southern ontario map

 addTrans <- function(color,trans)
 {
  # This function adds transparancy to a color.
  # Define transparancy with an integer between 0 and 255
  # 0 being fully transparant and 255 being fully visable
  # Works with either color and trans a vector of equal length,
  # or one of the two of length 1.
  
  if (length(color)!=length(trans)&!any(c(length(color),length(trans))==1)) stop("Vector lengths not correct")
  if (length(color)==1 & length(trans)>1) color <- rep(color,length(trans))
  if (length(trans)==1 & length(color)>1) trans <- rep(trans,length(color))
  
  num2hex <- function(x)
  {
    hex <- unlist(strsplit("0123456789ABCDEF",split=""))
    return(paste(hex[(x-x%%16)/16+1],hex[x%%16+1],sep=""))
  }
  rgb <- rbind(col2rgb(color),trans)
  res <- paste("#",apply(apply(rgb,2,num2hex),2,paste,collapse=""),sep="")
  return(res)
 }

 # Finally here's the southern ontario map code

 mymap = MapBackground(lat=casino.top.so$Lat, lon=casino.top.so$Long)
 PlotOnStaticMap(mymap, casino.top.so$Lat, casino.top.so$Long, cex=1.5, pch=21, bg=addTrans("orange",10))

 # Here's some code for summarizing and plotting the response data to the question
 # around issues of importance regarding the new casino (question 3)

 q3.summary = matrix(NA, 16,1,dimnames=list(c("Design of the facility",
            "Employment opportunities","Entertainment and cultural activities",
            "Expanded convention facilities", "Integration with surrounding areas",
          "New hotel accommodations","Problem gambling & health concerns",
          "Public safety and social concerns","Public space",
          "Restaurants","Retail","Revenue for the City","Support for local businesses",
          "Tourist attraction","Traffic concerns","Training and career development"),c("% Very Important")))

 for (i in 8:23) {
  q3.summary[i-7] = mean(casino[,i] == 3, na.rm=TRUE)}

 q3.summary = as.data.frame(q3.summary[order(q3.summary[,1], decreasing = FALSE),])
 names(q3.summary)[1] = "% Very Important"
 q3.summary$Concern = rownames(q3.summary)
 q3.summary = q3.summary[order(q3.summary$"% Very Important", decreasing=FALSE),]
 q3.summary$Concern = factor(q3.summary$Concern, levels=q3.summary$Concern)
 ggplot(q3.summary, aes(x=Concern, y=q3.summary$"% Very Important")) + geom_point(size=5, colour="forest green") + coord_flip() + ggtitle("Issues of Importance Surrounding\nthe New Casino") + scale_x_discrete(name="Issues of Importance") + theme(title=element_text(size=22),plot.title=element_text(hjust=.8)) + scale_y_continuous(labels=percent) + theme_wsj()
  
 # This chunk of code deals with summarizing and plotting the questions surrounding 
 # what features people might want if a new Integrated Entertainment Complex is built

 q7a.summary = matrix(NA, 9,1, dimnames=list(c("No Casino","Casino Only", "Convention Centre Space", "Cultural and Arts Facilities",
              "Hotel","Nightclubs","Restaurants","Retail","Theatre"),c("% Include")))

 for (i in 36:44) {
  q7a.summary[i-35] = mean(casino[,i], na.rm=TRUE)}
 q7a.summary = as.data.frame(q7a.summary[order(q7a.summary[,1], decreasing = FALSE),])
 names(q7a.summary)[1] = "% Include"
 q7a.summary$feature = rownames(q7a.summary)
 q7a.summary$feature = factor(q7a.summary$feature, levels=q7a.summary$feature)

 ggplot(q7a.summary, aes(x=feature, y=q7a.summary$"% Include")) + geom_point(size=5, colour="forest green") + coord_flip() + ggtitle("What People Would Want in an Integrated\nEntertainment Complex in Downtown Toronto") + scale_x_discrete(name="Features") + theme(title=element_text(size=22),plot.title=element_text(hjust=.8)) + scale_y_continuous(labels=percent,name="% Wanting the Feature") + theme_wsj()

 q7b.summary = matrix(NA, 9,1, dimnames=list(c("No Casino","Casino Only", "Convention Centre Space", "Cultural and Arts Facilities",
                                              "Hotel","Nightclubs","Restaurants","Retail","Theatre"),c("% Include")))

 for (i in 52:60) {
  q7b.summary[i-51] = mean(casino[,i], na.rm=TRUE)}
 q7b.summary = as.data.frame(q7b.summary[order(q7b.summary[,1], decreasing = FALSE),])
 names(q7b.summary)[1] = "% Include"
 q7b.summary$feature = rownames(q7b.summary)
 q7b.summary$feature = factor(q7b.summary$feature, levels=q7b.summary$feature)

 ggplot(q7b.summary, aes(x=feature, y=q7b.summary$"% Include")) + geom_point(size=5, colour="forest green") + coord_flip() + ggtitle("What People Would Want in an Integrated\nEntertainment Complex at the Exhbition Place") + scale_x_discrete(name="Features") + theme(title=element_text(size=22),plot.title=element_text(hjust=.8)) + scale_y_continuous(labels=percent,name="% Wanting the Feature") + theme_wsj()

 q7c.summary = matrix(NA, 9,1, dimnames=list(c("No Casino","Casino Only", "Convention Centre Space", "Cultural and Arts Facilities",
                                              "Hotel","Nightclubs","Restaurants","Retail","Theatre"),c("% Include")))

 for (i in 68:76) {
  q7c.summary[i-67] = mean(casino[,i], na.rm=TRUE)}
 q7c.summary = as.data.frame(q7c.summary[order(q7c.summary[,1], decreasing = FALSE),])
 names(q7c.summary)[1] = "% Include"
 q7c.summary$feature = rownames(q7c.summary)
 q7c.summary$feature = factor(q7c.summary$feature, levels=q7c.summary$feature)

 ggplot(q7c.summary, aes(x=feature, y=q7b.summary$"% Include")) + geom_point(size=5, colour="forest green") + coord_flip() + ggtitle("What People Would Want in an Integrated\nEntertainment Complex in Port Lands") + scale_x_discrete(name="Features") + theme(title=element_text(size=22),plot.title=element_text(hjust=.8)) + scale_y_continuous(labels=percent,name="% Wanting the Feature") + theme_wsj()

 # It sucks, but I imported yet another version of the casino dataset so that I wouldn't have to use 
 # the annoying ffdf indexing notation (e.g. df[,"variable1"])

 casino.orig2 = read.csv("/home/inkhorn/Downloads/casino_survey_results20130325.csv")

 # Finally, here's some code where I processed and plotted the Gender and Age demographic variables

 casino$Gender = casino.orig$Gender
 casino$Gender = ifelse(!(casino.orig2$Gender %in% c("Female","Male","Transgendered")), "Did not disclose", 
                ifelse(casino.orig2$Gender == "Female","Female",
                ifelse(casino.orig2$Gender == "Male","Male","Transgendered")))

 casino$Gender = factor(casino$Gender, levels=c("Transgendered","Did not disclose","Female","Male"))
 ggplot(casino, aes(x=Gender,y=..count../sum(..count..))) + geom_bar(fill="forest green") + coord_flip() + ggtitle("Gender Distribution of Respondents") + scale_x_discrete(name="") + theme_wsj() + theme(title=element_text(size=22),plot.title=element_text(hjust=.8)) + stat_bin(aes(label = sprintf("%.02f %%", ..count../sum(..count..)*100)), 
                                                                                                                                                                                                                                                                                                                                               geom="text") + scale_y_continuous(labels=percent)

 casino$Age = ifelse(casino.orig2$Age == "", "Did not disclose",
              ifelse(casino.orig2$Age == "Under 15", "Under 15",
            ifelse(casino.orig2$Age == "15-24", "15-24",
                   ifelse(casino.orig2$Age == "25-34", "25-34", 
            ifelse(casino.orig2$Age == "35-44", "35-44",
                   ifelse(casino.orig2$Age == "45-54","45-54",
                ifelse(casino.orig2$Age == "55-64","55-64",
                ifelse(casino.orig2$Age == "65 or older", "65 or older","Did not disclose"))))))))
 casino$Age = factor(casino$Age, levels=c("Did not disclose","Under 15","15-24","25-34","35-44","45-54","55-64","65 or older"))

 ggplot(casino, aes(x=Age,y=..count../sum(..count..))) + geom_bar(fill="forest green") + coord_flip() + ggtitle("Age Distribution of Respondents") + scale_x_discrete(name="") + theme_wsj() + theme(title=element_text(size=22),plot.title=element_text(hjust=.8)) + stat_bin(aes(label = sprintf("%.02f %%", ..count../sum(..count..)*100)), geom="text") + scale_y_continuous(labels=percent)
	library(ff)
	library(ffbase)
	library(stringr)
	library(ggplot2)
	library(ggthemes)
	library(reshape2)
	library(RgoogleMaps)

	# Loading 2 copies of the same data set so that I can convert one and have the original for its text values
	casino = read.csv("/home/inkhorn/Downloads/casino_survey_results20130325.csv")
	casino.orig = read.csv("/home/inkhorn/Downloads/casino_survey_results20130325.csv")

	# Here's the dataset of canadian postal codes and latitude/longitude coordinates
	pcodes = read.csv.ffdf(file="/home/inkhorn/Downloads/zipcodeset.txt", first.rows=50000, next.rows=50000, colClasses=NA, header=FALSE)

	# I'm doing some numerical recoding here. If you can tell me a cleaner way of doing this
	# then by all means please do. I found this process really annoyingly tedious.

	casino$Q1_A = ifelse(casino.orig$Q1_A == "Neutral or Mixed Feelings", 3,
	ifelse(casino.orig$Q1_A == "Somewhat in Favour", 4,
	ifelse(casino.orig$Q1_A == "Somewhat Opposed", 2,
	ifelse(casino.orig$Q1_A == "Strongly in Favour", 5,
	ifelse(casino.orig$Q1_A == "Strongly Opposed", 1,NA)))))


	casino$Q2_A = ifelse(casino.orig$Q2_A == "Does Not Fit My Image At All", 1,
	ifelse(casino.orig$Q2_A == "Neutral / I am Not Sure",2,
	ifelse(casino.orig$Q2_A == "Fits Image Somewhat", 3,
	ifelse(casino.orig$Q2_A == "Fits Image Perfectly", 4, NA))))

	for (i in 8:24) {
	casino[,i] = ifelse(casino.orig[,i] == "Not Important At All", 1,
	ifelse(casino.orig[,i] == "Somewhat Important", 2,
	ifelse(casino.orig[,i] == "Very Important", 3,NA)))}

	for (i in c(31:32,47,48,63,64)) {
	casino[,i] = ifelse(casino.orig[,i] == "Highly Suitable",5,
	ifelse(casino.orig[,i] == "Neutral or Mixed Feelings",3,
	ifelse(casino.orig[,i] == "Somewhat Suitable",4,
	ifelse(casino.orig[,i] == "Somewhat Unsuitable",2,
	ifelse(casino.orig[,i] == "Strongly Unsuitable",1,NA)))))}

	# There tended to be blank responses in the original dataset. When seeking to
	# plot the responses in their original text option format, I got rid of them in some cases,
	# or coded them in "Did not disclose" in others.

	casino.orig$Q1_A[casino.orig$Q1_A == ""] = NA
	casino.orig$Q1_A = factor(casino.orig$Q1_A, levels=c("Strongly Opposed","Somewhat Opposed","Neutral or Mixed Feelings","Somewhat in Favour","Strongly in Favour"))

	# Here's the graph showing how people feel about a new casino
	ggplot(subset(casino.orig, !is.na(Q1_A)), aes(x=Q1_A,y=..count../sum(..count..))) + geom_bar(fill="forest green") + coord_flip() + ggtitle("How do you feel about having a new casino in Toronto?") + scale_x_discrete(name="") + theme_wsj() + theme(title=element_text(size=22),plot.title=element_text(hjust=.8)) + stat_bin(aes(label = sprintf("%.02f %%", ..count../sum(..count..)*100)), geom="text") + scale_y_continuous(labels=percent)

	# How does the casino fit into your image of toronto...
	ggplot(subset(casino.orig, Q2_A!= ''), aes(x=Q2_A,y=..count../sum(..count..))) + geom_bar(fill="forest green") + coord_flip() + ggtitle("How does a new casino in Toronto fit your image of the City of Toronto?") + scale_x_discrete(name="") + theme_wsj() + theme(title=element_text(size=22),plot.title=element_text(hjust=.8)) + stat_bin(aes(label = sprintf("%.02f %%", ..count../sum(..count..)*100)),geom="text") + scale_y_continuous(labels=percent)

	# Where you'd prefer to see it located
	ggplot(subset(casino.orig, Q6!= ''), aes(x=Q6,y=..count../sum(..count..))) + geom_bar(fill="forest green") + coord_flip() + ggtitle("If a casino is built, where would you prefer to see it located?") + scale_x_discrete(name="") + theme_wsj() + theme(title=element_text(size=22),plot.title=element_text(hjust=.8)) + stat_bin(aes(label = sprintf("%.02f %%", ..count../sum(..count..)*100)), geom="text") + scale_y_continuous(labels=percent)

	# Here I reorder the text labels from the questions asking about suitability of the downtown location
	casino.orig$Q7_A_StandAlone = reorder(casino.orig$Q7_A_StandAlone, casino$Q7_A_StandAlone)
	casino.orig$Q7_A_Integrated = reorder(casino.orig$Q7_A_Integrated, casino$Q7_A_Integrated)

	# Reshaping the downtown ratings data for graphing..
	stand.and.integrated.ratings.downtown = cbind(prop.table(as.matrix(table(casino.orig$Q7_A_StandAlone)[1:5])),
	prop.table(as.matrix(table(casino.orig$Q7_A_Integrated)[1:5])))

	colnames(stand.and.integrated.ratings.downtown) = c("Standalone Casino","Integrated Entertainment Complex")

	stand.and.integrated.ratings.downtown.long = melt(stand.and.integrated.ratings.downtown, varnames=c("Rating","Casino Type"), value.name="Percentage")

	# Graphing ratings of casino suitability for the downtown location
	ggplot(stand.and.integrated.ratings.downtown.long, aes(x=stand.and.integrated.ratings.downtown.long$"Casino Type", fill=Rating, y=Percentage,label=sprintf("%.02f %%", Percentage100))) + geom_bar(position="dodge") + coord_flip() + ggtitle("Ratings of Casino Suitability \nin Downtown Toronto by Casino Type") + scale_x_discrete(name="") + theme(title=element_text(size=22),plot.title=element_text(hjust=.8)) + scale_y_continuous(labels=percent) + geom_text(aes(x=stand.and.integrated.ratings.downtown.long$"Casino Type", y=Percentage, ymax=Percentage, label=sprintf("%.01f%%",Percentage100), hjust=.75),position = position_dodge(width=1),size=4) + scale_fill_few(palette="light") + theme_wsj()

	# Reshaping the exhibition place ratings for graphing
	stand.and.integrated.ratings.exhibition = cbind(prop.table(as.matrix(table(casino.orig$Q7_B_StandAlone)[2:6])),
	prop.table(as.matrix(table(casino.orig$Q7_B_Integrated)[2:6])))

	colnames(stand.and.integrated.ratings.exhibition) = c("Standalone Casino","Integrated Entertainment Complex")

	stand.and.integrated.ratings.exhibition.long = melt(stand.and.integrated.ratings.exhibition, varnames=c("Rating","Casino Type"), value.name="Percentage")

	# Reordering the rating text labels for the graphing.
	stand.and.integrated.ratings.exhibition.long$Rating = factor(stand.and.integrated.ratings.exhibition.long$Rating, levels=levels(casino.orig$Q7_A_StandAlone)[1:5])

	# Graphing ratings of casino suitability for the exhibition place location
	ggplot(stand.and.integrated.ratings.exhibition.long, aes(x=stand.and.integrated.ratings.exhibition.long$"Casino Type", fill=Rating, y=Percentage,label=sprintf("%.02f %%", Percentage100))) + geom_bar(position="dodge") + coord_flip() + ggtitle("Ratings of Casino Suitability \nat Exhibition Place by Casino Type") + scale_x_discrete(name="") + theme(title=element_text(size=22),plot.title=element_text(hjust=.8)) + scale_y_continuous(labels=percent) + geom_text(aes(x=stand.and.integrated.ratings.exhibition.long$"Casino Type", y=Percentage, ymax=Percentage, label=sprintf("%.01f%%",Percentage100), hjust=.75), position = position_dodge(width=1),size=4) + scale_fill_few(palette="light") + theme_wsj()

	# Reshaping the Port Lands ratings for graphing
	stand.and.integrated.ratings.portlands = cbind(prop.table(as.matrix(table(casino.orig$Q7_C_StandAlone)[2:6])),
	prop.table(as.matrix(table(casino.orig$Q7_C_Integrated)[2:6])))

	colnames(stand.and.integrated.ratings.portlands) = c("Standalone Casino", "Integrated Entertainment Complex")

	stand.and.integrated.ratings.portlands.long = melt(stand.and.integrated.ratings.portlands, varnames=c("Rating","Casino Type"), value.name="Percentage")

	# Reording the rating text labels for the graping.
	stand.and.integrated.ratings.portlands.long$Rating = factor(stand.and.integrated.ratings.portlands.long$Rating, levels=levels(casino.orig$Q7_A_StandAlone)[1:5])

	# Graphing ratings of casino suitability for the port lands location
	ggplot(stand.and.integrated.ratings.portlands.long, aes(x=stand.and.integrated.ratings.portlands.long$"Casino Type", fill=Rating, y=Percentage,label=sprintf("%.02f %%", Percentage100))) + geom_bar(position="dodge") + coord_flip() + ggtitle("Ratings of Casino Suitability \nat Port Lands by Casino Type") + scale_x_discrete(name="") + theme(title=element_text(size=22),plot.title=element_text(hjust=.8)) + scale_y_continuous(labels=percent) + geom_text(aes(x=stand.and.integrated.ratings.portlands.long$"Casino Type", y=Percentage, ymax=Percentage, label=sprintf("%.01f%%",Percentage100), hjust=.75), position = position_dodge(width=1),size=4) + scale_fill_few(palette="light") + theme_wsj()

	# This was the part in my analysis where I looked at postal codes (FSAs really) and their coordinates
	# Sorry I'm not more linear in how I do my analysis vs. write about it :)
	# You'll notice that I've imported the geocode file as ffdf. This led to faster merging with the
	# original casino data set. This meant that I had to coerce the casino.orig data frame into ffdf format
	# But I work with it every day at work, so I'm used to it by now, despite its idiosynchracies.

	casino.orig$PostalCode = toupper(casino.orig$PostalCode)

	pcodes = read.csv.ffdf(file="/home/inkhorn/Downloads/zipcodeset.txt", first.rows=50000, next.rows=50000, colClasses=NA, header=FALSE)
	names(pcodes) = c("Postal","Lat","Long","City","Prov")
	pcodes$FSA = as.ff(as.factor(toupper(substr(pcodes[,"Postal"], 1,3))))
	casino.orig = as.ffdf(casino.orig)
	casino.orig$PostalCode = as.ff(as.factor(toupper(casino.orig[,"PostalCode"])))
	casino.orig = merge(casino.orig, pcodes, by.x="PostalCode", by.y="FSA", all.x=TRUE)

	# This is the code for the full map I generated

	casino.gc = casino.orig[which(!is.na(casino.orig[,"Lat"])),] # making sure only records with coordinates are included...
	mymap = MapBackground(lat=casino.gc$Lat, lon=casino.gc$Long)
	PlotOnStaticMap(mymap, casino.gc$Lat, casino.gc$Long, cex=1.5, pch=21, bg="orange")

	# Here I'm getting a list of cities, winnowing it down, and using it to filter the
	# geocode coordinates to zoom in on the map I generated.

	cities = data.frame(table(casino.orig[,"City"]))
	cities = cities[cities$Freq > 0,]
	cities = cities[order(cities$Freq, decreasing=TRUE),]
	cities = cities[cities$Var1 != '',]
	cities.filter = cities[1:28,] # Here's my top cities variable (i set an arbitrary dividing line...)
	names(cities.filter) = c("City","# Responses")

	# Here's where I filtered the original casino ffdf so that it only contained the cities
	# that I wanted to see in Southern Ontario
	casino.top.so = casino.orig[which(casino.orig[,"City"] %in% cities.filter$Var1),]

	# here's a transparency function that I used for the southern ontario map

	addTrans <- function(color,trans)
	{
	# This function adds transparancy to a color.
	# Define transparancy with an integer between 0 and 255
	# 0 being fully transparant and 255 being fully visable
	# Works with either color and trans a vector of equal length,
	# or one of the two of length 1.

	if (length(color)!=length(trans)&!any(c(length(color),length(trans))==1)) stop("Vector lengths not correct")
	if (length(color)==1 & length(trans)>1) color <- rep(color,length(trans))
	if (length(trans)==1 & length(color)>1) trans <- rep(trans,length(color))

	num2hex <- function(x)
	{
	hex <- unlist(strsplit("0123456789ABCDEF",split=""))
	return(paste(hex[(x-x%%16)/16+1],hex[x%%16+1],sep=""))
	}
	rgb <- rbind(col2rgb(color),trans)
	res <- paste("#",apply(apply(rgb,2,num2hex),2,paste,collapse=""),sep="")
	return(res)
	}

	# Finally here's the southern ontario map code

	mymap = MapBackground(lat=casino.top.so$Lat, lon=casino.top.so$Long)
	PlotOnStaticMap(mymap, casino.top.so$Lat, casino.top.so$Long, cex=1.5, pch=21, bg=addTrans("orange",10))

	# Here's some code for summarizing and plotting the response data to the question
	# around issues of importance regarding the new casino (question 3)

	q3.summary = matrix(NA, 16,1,dimnames=list(c("Design of the facility",
	"Employment opportunities","Entertainment and cultural activities",
	"Expanded convention facilities", "Integration with surrounding areas",
	"New hotel accommodations","Problem gambling & health concerns",
	"Public safety and social concerns","Public space",
	"Restaurants","Retail","Revenue for the City","Support for local businesses",
	"Tourist attraction","Traffic concerns","Training and career development"),c("% Very Important")))

	for (i in 8:23) {
	q3.summary[i-7] = mean(casino[,i] == 3, na.rm=TRUE)}

	q3.summary = as.data.frame(q3.summary[order(q3.summary[,1], decreasing = FALSE),])
	names(q3.summary)[1] = "% Very Important"
	q3.summary$Concern = rownames(q3.summary)
	q3.summary = q3.summary[order(q3.summary$"% Very Important", decreasing=FALSE),]
	q3.summary$Concern = factor(q3.summary$Concern, levels=q3.summary$Concern)
	ggplot(q3.summary, aes(x=Concern, y=q3.summary$"% Very Important")) + geom_point(size=5, colour="forest green") + coord_flip() + ggtitle("Issues of Importance Surrounding\nthe New Casino") + scale_x_discrete(name="Issues of Importance") + theme(title=element_text(size=22),plot.title=element_text(hjust=.8)) + scale_y_continuous(labels=percent) + theme_wsj()

	# This chunk of code deals with summarizing and plotting the questions surrounding
	# what features people might want if a new Integrated Entertainment Complex is built

	q7a.summary = matrix(NA, 9,1, dimnames=list(c("No Casino","Casino Only", "Convention Centre Space", "Cultural and Arts Facilities",
	"Hotel","Nightclubs","Restaurants","Retail","Theatre"),c("% Include")))

	for (i in 36:44) {
	q7a.summary[i-35] = mean(casino[,i], na.rm=TRUE)}
	q7a.summary = as.data.frame(q7a.summary[order(q7a.summary[,1], decreasing = FALSE),])
	names(q7a.summary)[1] = "% Include"
	q7a.summary$feature = rownames(q7a.summary)
	q7a.summary$feature = factor(q7a.summary$feature, levels=q7a.summary$feature)

	ggplot(q7a.summary, aes(x=feature, y=q7a.summary$"% Include")) + geom_point(size=5, colour="forest green") + coord_flip() + ggtitle("What People Would Want in an Integrated\nEntertainment Complex in Downtown Toronto") + scale_x_discrete(name="Features") + theme(title=element_text(size=22),plot.title=element_text(hjust=.8)) + scale_y_continuous(labels=percent,name="% Wanting the Feature") + theme_wsj()

	q7b.summary = matrix(NA, 9,1, dimnames=list(c("No Casino","Casino Only", "Convention Centre Space", "Cultural and Arts Facilities",
	"Hotel","Nightclubs","Restaurants","Retail","Theatre"),c("% Include")))

	for (i in 52:60) {
	q7b.summary[i-51] = mean(casino[,i], na.rm=TRUE)}
	q7b.summary = as.data.frame(q7b.summary[order(q7b.summary[,1], decreasing = FALSE),])
	names(q7b.summary)[1] = "% Include"
	q7b.summary$feature = rownames(q7b.summary)
	q7b.summary$feature = factor(q7b.summary$feature, levels=q7b.summary$feature)

	ggplot(q7b.summary, aes(x=feature, y=q7b.summary$"% Include")) + geom_point(size=5, colour="forest green") + coord_flip() + ggtitle("What People Would Want in an Integrated\nEntertainment Complex at the Exhbition Place") + scale_x_discrete(name="Features") + theme(title=element_text(size=22),plot.title=element_text(hjust=.8)) + scale_y_continuous(labels=percent,name="% Wanting the Feature") + theme_wsj()

	q7c.summary = matrix(NA, 9,1, dimnames=list(c("No Casino","Casino Only", "Convention Centre Space", "Cultural and Arts Facilities",
	"Hotel","Nightclubs","Restaurants","Retail","Theatre"),c("% Include")))

	for (i in 68:76) {
	q7c.summary[i-67] = mean(casino[,i], na.rm=TRUE)}
	q7c.summary = as.data.frame(q7c.summary[order(q7c.summary[,1], decreasing = FALSE),])
	names(q7c.summary)[1] = "% Include"
	q7c.summary$feature = rownames(q7c.summary)
	q7c.summary$feature = factor(q7c.summary$feature, levels=q7c.summary$feature)

	ggplot(q7c.summary, aes(x=feature, y=q7b.summary$"% Include")) + geom_point(size=5, colour="forest green") + coord_flip() + ggtitle("What People Would Want in an Integrated\nEntertainment Complex in Port Lands") + scale_x_discrete(name="Features") + theme(title=element_text(size=22),plot.title=element_text(hjust=.8)) + scale_y_continuous(labels=percent,name="% Wanting the Feature") + theme_wsj()

	# It sucks, but I imported yet another version of the casino dataset so that I wouldn't have to use
	# the annoying ffdf indexing notation (e.g. df[,"variable1"])

	casino.orig2 = read.csv("/home/inkhorn/Downloads/casino_survey_results20130325.csv")

	# Finally, here's some code where I processed and plotted the Gender and Age demographic variables

	casino$Gender = casino.orig$Gender
	casino$Gender = ifelse(!(casino.orig2$Gender %in% c("Female","Male","Transgendered")), "Did not disclose",
	ifelse(casino.orig2$Gender == "Female","Female",
	ifelse(casino.orig2$Gender == "Male","Male","Transgendered")))

	casino$Gender = factor(casino$Gender, levels=c("Transgendered","Did not disclose","Female","Male"))
	ggplot(casino, aes(x=Gender,y=..count../sum(..count..))) + geom_bar(fill="forest green") + coord_flip() + ggtitle("Gender Distribution of Respondents") + scale_x_discrete(name="") + theme_wsj() + theme(title=element_text(size=22),plot.title=element_text(hjust=.8)) + stat_bin(aes(label = sprintf("%.02f %%", ..count../sum(..count..)*100)),
	geom="text") + scale_y_continuous(labels=percent)

	casino$Age = ifelse(casino.orig2$Age == "", "Did not disclose",
	ifelse(casino.orig2$Age == "Under 15", "Under 15",
	ifelse(casino.orig2$Age == "15-24", "15-24",
	ifelse(casino.orig2$Age == "25-34", "25-34",
	ifelse(casino.orig2$Age == "35-44", "35-44",
	ifelse(casino.orig2$Age == "45-54","45-54",
	ifelse(casino.orig2$Age == "55-64","55-64",
	ifelse(casino.orig2$Age == "65 or older", "65 or older","Did not disclose"))))))))
	casino$Age = factor(casino$Age, levels=c("Did not disclose","Under 15","15-24","25-34","35-44","45-54","55-64","65 or older"))

	ggplot(casino, aes(x=Age,y=..count../sum(..count..))) + geom_bar(fill="forest green") + coord_flip() + ggtitle("Age Distribution of Respondents") + scale_x_discrete(name="") + theme_wsj() + theme(title=element_text(size=22),plot.title=element_text(hjust=.8)) + stat_bin(aes(label = sprintf("%.02f %%", ..count../sum(..count..)*100)), geom="text") + scale_y_continuous(labels=percent)