Skip to content

Instantly share code, notes, and snippets.

@andrewbtran
Created April 6, 2015 13:10
Show Gist options
  • Select an option

  • Save andrewbtran/78ec711692a5dfb517e7 to your computer and use it in GitHub Desktop.

Select an option

Save andrewbtran/78ec711692a5dfb517e7 to your computer and use it in GitHub Desktop.
library(dplyr)
ctmeet <- read.csv("meetup_groups.csv", quote = "",
row.names = NULL,
stringsAsFactors = FALSE)
# Adding column names
colnames(ctmeet) <- c("Area","Group","Created","Neighborhood","State","Category","Members","Who")
# Converting the Date group was created into a recognized format
ctmeet$Created_Date <- as.POSIXct(ctmeet$Created/1000, origin="1970-01-01")
# Extracting the year
ctmeet$Year <- as.numeric(format(ctmeet$Created_Date,"%Y"))
# Total number of members by Area
ctmeet %>% group_by(Area) %>% summarise(Total.Members=sum(Members))
# Analyzing number of groups per city by category and building a dataframe
cat1 <- ctmeet %>% filter(Area=="Bridgeport") %>% group_by(Category) %>% summarise(Bridgeport=length(Category))
cat2 <- ctmeet %>% filter(Area=="Hartford") %>% group_by(Category) %>% summarise(Hartford=length(Category))
cat <- join(cat2, cat1, by="Category")
cat3 <- ctmeet %>% filter(Area=="New Haven") %>% group_by(Category) %>% summarise(New.Haven=length(Category))
cat <- join(cat, cat3, by="Category")
cat4 <- ctmeet %>% filter(Area=="Stamford") %>% group_by(Category) %>% summarise(Stamford=length(Category))
cat <- join(cat, cat4, by="Category")
cat5 <- ctmeet %>% filter(Area=="Waterbury") %>% group_by(Category) %>% summarise(Waterbury=length(Category))
cat <- join(cat, cat5, by="Category")
# Turn the figures into percent and add them to the dataframe
cat$Hartford.Perc <- round(((cat$Hartford/sum(cat$Hartford, na.rm=TRUE))*100), digits=2)
cat$Bridgeport.Perc <- round(((cat$Bridgeport/sum(cat$Bridgeport, na.rm=TRUE))*100), digits=2)
cat$New.Haven.Perc <- round(((cat$New.Haven/sum(cat$New.Haven, na.rm=TRUE))*100), digits=2)
cat$Stamford.Perc <- round(((cat$Stamford/sum(cat$Stamford, na.rm=TRUE))*100), digits=2)
cat$Waterbury.Perc <- round(((cat$Waterbury/sum(cat$Waterbury, na.rm=TRUE))*100), digits=2)
# Exporting the dataframe into a CSV
write.csv(cat,"ct-categories.csv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment