Last active
August 29, 2015 13:57
-
-
Save milesgrimshaw/9414109 to your computer and use it in GitHub Desktop.
Data prep for geocoding
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Load desired packages | |
library(lubridate) | |
library(stringr) | |
library(ggplot2) | |
library(scales) | |
# Set the working directory | |
getwd() | |
setwd("~/Desktop/Patreon/") | |
# Read in the data | |
d <- read.csv("Kickstarter/kickstarter_projects.csv", header=FALSE, as.is=TRUE) | |
# Reame the columns | |
names(d) <- c("url","backers","goal","pledged","start","end","category", "location", "profile") | |
# Eliminate blanks | |
d <- d[which(d$goal!="[]"),] | |
# Get all the unique locations | |
# locations <- unique(d$location) | |
# Write the unique locations to a CSV for geocoding | |
# locations <- data.frame(locations) | |
# write.csv(locations,file="Kickstarter_Locations.csv", row.names=FALSE) | |
# Read back in csv after GeoCoding | |
l <- read.csv("Kickstarter/kickstarter_locations_coded.csv", header=FALSE, as.is=TRUE) | |
# Reame the columns | |
names(l) <- c("location","lat","lon","geo") | |
# Subset because some projects haven't ended | |
d <- d[which(d$end!=""),] | |
# Check no NAs | |
which(is.na(d$end)) | |
# Create an end data variable | |
d$end <- as.POSIXct(sapply(d$end, function (t) as.POSIXct(substr(t,1,10),format="%Y-%m-%d"),USE.NAMES=FALSE),origin="1970-01-01") | |
# Run vectorized functions to merge the two data sets making a lat/lon for each project | |
d$lat <- sapply(d$location, function (t) (l$lat[which(t==l$location)])) | |
d$lon <- sapply(d$location, function (t) (l$lon[which(t==l$location)])) | |
# The total amount actually funded | |
sum(as.numeric(d$pledged[which(as.numeric(d$pledged)>=as.numeric(d$goal))])) | |
# The total amount pledged | |
sum(as.numeric(d$pledged)) | |
# Subset the data to save as a new CSV | |
df <- data.frame(d$pledged, d$end, d$category, d$location, d$lat, d$lon) | |
# Save the CSV | |
write.csv(df,file="kickstarter_pledged_locations_for_upload.csv", row.names=FALSE) | |
# Could subset those for which pledged > goal | |
d <- d[which(as.numeric(d$pledged)>=as.numeric(d$goal)),] | |
# Subset the data to save as a new CSV | |
df <- data.frame(d$pledged, d$end, d$category, d$location, d$lat, d$lon) | |
# Save the CSV | |
write.csv(df,file="kickstarter_successful_locations_for_upload.csv", row.names=FALSE) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment