Created
December 28, 2013 17:45
-
-
Save milesgrimshaw/8162039 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Code for subsetting and formatting open paths data for Mapbox | |
## Miles Grimshaw | |
## December 26th 2013 | |
## Set working directory and load required packages | |
getwd() | |
setwd("~/Dropbox/Personal/Mapbox/OpenPaths/") | |
library(stringr) | |
library(lubridate) | |
# library(ggplot2) | |
## Read in the data | |
d <- read.csv("./openpaths_milesgrimshaw.csv", header=TRUE, as.is=TRUE) | |
head(d) | |
## Convert $date to a time variable | |
d$t <- as.POSIXct(d$date,format='%Y-%m-%d %H:%M:%S', tz="America/New_York") | |
## Just select lat, lon, alt, t | |
d2 <- d[,c(1,2,3,8)] | |
head(d2) | |
## Only select unique rows | |
d3 <- unique(d2) | |
## Trim down to 5 sifnificant figures | |
d3$lat <- signif(d3$lat,5) | |
d3$lon <- signif(d3$lon,5) | |
## Further subset removing the 'alt' column | |
d3 <- d3[,c(1,2,4)] | |
## Only want data for 2013 | |
year_2013 <- as.POSIXct("2013-01-01",format='%Y-%m-%d', tz="America/New_York") | |
d4 <- d3[which(d3$t >= year_2013),] | |
## Further trim the number of points to those > than 10 mins apart | |
d5 <- d4[1,] | |
for (i in 2:nrow(d4)) { | |
if (difftime(d4$t[i],d4$t[i-1],units="mins") > 10) { | |
d5 <- rbind(d5,d4[i,]) | |
} | |
} | |
## Create a second column to enable creation of line segments | |
## This is not neccessary as we do in the Ruby script to create line segments | |
# for (i in 1:(nrow(d5)-1)) { | |
# d5$lat2[i] <- d5$lat[i+1] | |
# d5$lon2[i] <- d5$lon[i+1] | |
# } | |
## Rename columns | |
colnames(d5) <- c("latitude", "longitude", "time") | |
## Create seperate columns for the day of the week, month, and day of the year from time stamp | |
head(d5) | |
d5$week_day[which(wday(d5$time)==1)] <- 'Monday' | |
d5$week_day[which(wday(d5$time)==2)] <- 'Tuesday' | |
d5$week_day[which(wday(d5$time)==3)] <- 'Wednesday' | |
d5$week_day[which(wday(d5$time)==4)] <- 'Thursday' | |
d5$week_day[which(wday(d5$time)==5)] <- 'Friday' | |
d5$week_day[which(wday(d5$time)==6)] <- 'Saturday' | |
d5$week_day[which(wday(d5$time)==7)] <- 'Sunday' | |
d5$month[which(month(d5$time)==1)] <- 'January' | |
d5$month[which(month(d5$time)==2)] <- 'February' | |
d5$month[which(month(d5$time)==3)] <- 'March' | |
d5$month[which(month(d5$time)==4)] <- 'April' | |
d5$month[which(month(d5$time)==5)] <- 'May' | |
d5$month[which(month(d5$time)==6)] <- 'June' | |
d5$month[which(month(d5$time)==7)] <- 'July' | |
d5$month[which(month(d5$time)==8)] <- 'August' | |
d5$month[which(month(d5$time)==9)] <- 'September' | |
d5$month[which(month(d5$time)==10)] <- 'October' | |
d5$month[which(month(d5$time)==11)] <- 'November' | |
d5$month[which(month(d5$time)==12)] <- 'December' | |
## Set the day | |
d5$day <- mday(d5$time) | |
## Check format | |
head(d5) | |
tail(d5) | |
## We don't need the time column | |
d6 <- d5[,c(1,2,4,5,6)] | |
head(d6) | |
## Write to CSV | |
write.csv(d6, file = "paths_final.csv") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment