Created
February 18, 2014 05:07
-
-
Save bbarrilleaux/9064953 to your computer and use it in GitHub Desktop.
UFO sightings
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
1 | California | CA | 36756666 | 403931.96 | 91 | 155959 | 235.68 | |
---|---|---|---|---|---|---|---|---|
2 | Texas | TX | 24326974 | 678051.12 | 35.88 | 261797 | 92.92 | |
3 | New York | NY | 19490297 | 122283.7 | 159.39 | 47214 | 412.81 | |
4 | Florida | FL | 18328340 | 139760.29 | 131.14 | 53927 | 339.87 | |
5 | Illinois | IL | 12901563 | 143961.9 | 89.62 | 55584 | 232.11 | |
6 | Pennsylvania | PA | 12448279 | 116075.5 | 107.24 | 44817 | 277.76 | |
7 | Ohio | OH | 11485910 | 106054.83 | 108.3 | 40948 | 280.5 | |
8 | Michigan | MI | 10003422 | 147121.68 | 67.99 | 56804 | 176.1 | |
9 | Georgia | GA | 9685744 | 149975.85 | 64.58 | 57906 | 167.27 | |
10 | North Carolina | NC | 9222414 | 126160.91 | 73.1 | 48711 | 189.33 | |
11 | New Jersey | NJ | 8682661 | 19209.94 | 451.99 | 7417 | 1170.64 | |
12 | Virginia | VA | 7769089 | 102547.99 | 75.76 | 39594 | 196.22 | |
13 | Washington | WA | 6549224 | 172348.17 | 38 | 66544 | 98.42 | |
14 | Arizona | AZ | 6500180 | 294313.3 | 22.09 | 113635 | 57.2 | |
15 | Massachusetts | MA | 6497967 | 20305.51 | 320.01 | 7840 | 828.82 | |
16 | Indiana | IN | 6376792 | 92895.1 | 68.65 | 35867 | 177.79 | |
17 | Tennessee | TN | 6214888 | 106751.54 | 58.22 | 41217 | 150.78 | |
18 | Missouri | MO | 5911605 | 178413.92 | 33.13 | 68886 | 85.82 | |
19 | Maryland | MD | 5633597 | 25314.54 | 222.54 | 9774 | 576.39 | |
20 | Wisconsin | WI | 5627967 | 140662.25 | 40.01 | 54310 | 103.63 | |
21 | Minnesota | MN | 5220393 | 206188.95 | 25.32 | 79610 | 65.57 | |
22 | Colorado | CO | 4939456 | 268628.39 | 18.39 | 103718 | 47.62 | |
23 | Alabama | AL | 4661900 | 131426.36 | 35.47 | 50744 | 91.87 | |
24 | South Carolina | SC | 4479800 | 77981.95 | 57.45 | 30109 | 148.79 | |
25 | Louisiana | LA | 4410796 | 112825.06 | 39.09 | 43562 | 101.25 | |
26 | Kentucky | KY | 4269245 | 102895.05 | 41.49 | 39728 | 107.46 | |
27 | Oregon | OR | 3790060 | 268631.09 | 14.11 | 95997 | 39.48 | |
28 | Oklahoma | OK | 3642361 | 177846.71 | 20.48 | 68667 | 53.04 | |
29 | Connecticut | CT | 3501252 | 12548.49 | 279.02 | 4845 | 722.65 | |
30 | Iowa | IA | 3002555 | 144700.05 | 20.75 | 55869 | 53.74 | |
31 | Mississippi | MS | 2938618 | 121488.57 | 24.19 | 46907 | 62.65 | |
32 | Arkansas | AR | 2855390 | 134856 | 21.17 | 52068 | 54.84 | |
33 | Kansas | KS | 2802134 | 211899.88 | 13.22 | 81815 | 34.25 | |
34 | Utah | UT | 2736424 | 212751.98 | 12.86 | 82144 | 33.31 | |
35 | Nevada | NV | 2600167 | 284448.03 | 9.14 | 109826 | 23.68 | |
36 | New Mexico | NM | 1984356 | 314310.6 | 6.31 | 121356 | 16.35 | |
37 | West Virginia | WV | 1814468 | 62361.73 | 29.1 | 24078 | 75.36 | |
38 | Nebraska | NE | 1783432 | 199097.57 | 8.96 | 76872 | 23.2 | |
39 | Idaho | ID | 1523816 | 214313.75 | 7.11 | 82747 | 18.42 | |
40 | Maine | ME | 1316456 | 79932.21 | 16.47 | 30862 | 42.66 | |
41 | New Hampshire | NH | 1315809 | 23227.01 | 56.65 | 8968 | 146.72 | |
42 | Hawaii | HI | 1288198 | 16635.49 | 77.44 | 6423 | 200.56 | |
43 | Rhode Island | RI | 1050788 | 2706.54 | 388.24 | 1045 | 1005.54 | |
44 | Montana | MT | 967440 | 376977.95 | 2.57 | 145552 | 6.65 | |
45 | Delaware | DE | 873092 | 5060.84 | 172.52 | 1954 | 446.82 | |
46 | South Dakota | SD | 804194 | 196541.25 | 4.09 | 75885 | 10.6 | |
47 | Alaska | AK | 686293 | 1481346 | 0.46 | 571951 | 1.2 | |
48 | North Dakota | ND | 641481 | 178647.02 | 3.59 | 68976 | 9.3 | |
49 | Vermont | VT | 621270 | 23957.39 | 25.93 | 9250 | 67.16 | |
50 | Wyoming | WY | 532668 | 251487.85 | 2.12 | 97100 | 5.49 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library("maps") | |
library("ggplot2") | |
library("RColorBrewer") | |
library("mapproj") | |
# data from infochimps, http://www.infochimps.com/datasets/60000-documented-ufo-sightings-with-text-descriptions-and-metada | |
tsvfile <- "chimps_16154-2010-10-20_14-33-35/ufo_awesome.tsv" | |
ufo <- read.table(tsvfile, sep ="\t", fill=TRUE, stringsAsFactors = FALSE) | |
ufo$state <- sapply(ufo$V3, function (x) strsplit(as.character(x), ", ")[[1]][2]) | |
ufo$statename <- tolower(state.name[match(ufo$state, state.abb)]) | |
# this file is given in the gist: | |
popfile <- "statepops.csv" | |
statePops <- read.csv(popfile, header = FALSE) | |
statePops <- statePops[, 3 : 4] | |
statePops$region <- tolower(state.name[match(statePops$V3, state.abb)]) | |
names(statePops)[2] <- "population" | |
stateTable <- unlist(table(ufo$statename)) | |
stateDF <- as.data.frame(stateTable) | |
names(stateDF) <- c("region", "UFO") | |
stateDF <- merge(stateDF, statePops) | |
stateDF$UFOpercapita <- stateDF$UFO * 10000 / stateDF$population | |
# set up a theme for the map | |
new_theme_empty <- theme_bw() | |
new_theme_empty$line <- element_blank() | |
new_theme_empty$rect <- element_blank() | |
new_theme_empty$strip.text <- element_blank() | |
new_theme_empty$axis.text <- element_blank() | |
new_theme_empty$axis.title <- element_blank() | |
new_theme_empty$plot.margin <- structure(c(0, 0, -1, -1), unit = "lines", valid.unit = 3L, class = "unit") | |
stateShapes <- map("state", plot = FALSE, fill = TRUE) | |
stateShapes <- fortify(stateShapes) # Load state shapefiles and convert to a data.frame | |
stateShapes$UFO = stateDF$UFOpercapita[match(stateShapes$region, stateDF$region)] | |
# make the map! | |
myPalette <- brewer.pal(9,"PuRd") | |
mapPlot <- ggplot(stateShapes, | |
aes(x = long, y = lat, group = group, | |
fill = UFO)) | |
mapPlot <- mapPlot + geom_polygon(colour = "BLACK") | |
mapPlot <- mapPlot + coord_map(project="conic", lat0 = 30) | |
mapPlot <- mapPlot + new_theme_empty | |
mapPlot <- mapPlot + scale_fill_gradientn("UFO sightings per 10,000 residents", | |
colours = myPalette) | |
mapPlot <- mapPlot + ggtitle("UFO sightings, 1995-2010") | |
print(mapPlot) | |
# make the word cloud! | |
library(tm) | |
library(wordcloud) | |
WAtext <- data.frame(as.character(ufo[ufo$state == "WA", 6])) | |
WAtext <- data.frame(WAtext[!is.na(WAtext)]) | |
WA.corpus <- Corpus(DataframeSource(WAtext)) | |
WA.corpus <- tm_map(WA.corpus, removePunctuation) | |
WA.corpus <- tm_map(WA.corpus, tolower) | |
WA.corpus <- tm_map(WA.corpus, function(x) removeWords(x, stopwords("english"))) | |
tdm <- TermDocumentMatrix(WA.corpus) | |
m <- as.matrix(tdm) | |
v <- sort(rowSums(m),decreasing=TRUE) | |
d <- data.frame(word = names(v),freq=v) | |
pal <- brewer.pal(9, "BuGn") | |
pal <- pal[-(1:2)] | |
wordcloud(d$word, d$freq, scale = c(5, .5), min.freq = 3, max.words = 60, random.order = TRUE, rot.per = 0.15, colors = pal, vfont = c("sans serif", "plain")) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment