Last active
August 29, 2015 14:00
-
-
Save felixhaass/11282831 to your computer and use it in GitHub Desktop.
This code reproduces the data and plots in this blog post on each Game of Thrones character's screen time on the show in seasons 1 to 3.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
got <- readLines("GoT_screentime_full.txt", encoding="UTF-8") | |
gotdf <- data.frame(name=vector(), | |
minutes=vector(), | |
seconds=vector(), | |
seasons=vector(), | |
episode_count=vector()) | |
for(i in 1:length(got)) { | |
char <- strsplit(got[i], split=" = |\\. |\\(|\\:|; |)") | |
char <- unlist(char) | |
gotdf[i, ] <- char[2:6] | |
} | |
# fix data types | |
gotdf[ ,c(2:3,5)] <- apply(gotdf[, c("minutes", "seconds", "episode_count")], 2, FUN= as.numeric) | |
# calculate screentime in seconds | |
gotdf$screentime_seconds <- (gotdf$minutes * 60) + gotdf$seconds | |
# extract family name from 'name' variable | |
# Beware: gives first name, if no family name provided | |
for(i in 1:nrow(gotdf)) { | |
split_name <- unlist(strsplit(gotdf[i, 1], " ")) | |
gotdf[i, "family"] <- tail(split_name, 1) | |
} | |
# Generate a Lannister-Stark-Other factor variable | |
gotdf$sl_dummy <- ifelse(gotdf$family == "Stark", "Stark", | |
ifelse(gotdf$family == "Lannister", "Lannister", "Other")) | |
# I manually coded sex | |
gotdf$sex <- c('m', 'f', 'm', 'f', 'm', 'f', 'f', 'm', 'f', 'm', 'm', 'm', 'm', 'm', 'm', 'm', 'm', 'm', 'm', 'f', 'f', 'f', 'm', 'm', 'm', 'm', 'f', 'f', 'f', 'f', 'm', 'm', 'm', 'm', 'm', 'm', 'm', 'f', 'm', 'm', 'f', 'm', 'm', 'm', 'f', 'm', 'm', 'm', 'm', 'm', 'm', 'f', 'f', 'm', 'm', 'm', 'm', 'm', 'm', 'f', 'm', 'm', 'm', 'm', 'm') | |
# extract info in how many seasons character appeared | |
for(i in 1:nrow(gotdf)) { | |
seasons <- unlist(strsplit(gotdf[i, "seasons"], ",")) | |
gotdf[i, "season_count"] <- length(seasons) | |
} | |
# same thing with ddply: | |
# ddply(gotdf, | |
# "name", | |
# function(df) { | |
# df[,"season_count"] <- length(unlist(strsplit(df$seasons, ","))) | |
# return(df)}) | |
# | |
# I think the loop's more reader-friendly, though, & it doesn't make much difference in terms of speed | |
# Manually fix Balon Grejoy entry | |
gotdf[gotdf$name == "Balon Greyjoy", "season_count"] <- 2 | |
######### | |
# Plots # | |
######### | |
library(Cairo) | |
# plot screentime as a function of episode count | |
CairoPNG("st_epc.png", width = 800, height = 400) | |
plot(gotdf$screentime_seconds ~ gotdf$episode_count, xlab = "Episode Count", ylab="Screen time (seconds)") | |
dev.off() | |
# Lannister vs. Stark vs. Other screentime boxplot | |
CairoPNG("lan_stark_other.png", width = 800, height = 400) | |
boxplot(gotdf$screentime_seconds ~ gotdf$sl_dummy) | |
dev.off() | |
# male vs. female character screentime | |
CairoPNG("got_sex.png", width = 800, height = 400) | |
boxplot(gotdf$screentime_seconds ~ gotdf$sex) | |
dev.off() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
1. Tyrion Lannister = 166:15 (1,2,3; 28) | |
2. Daenerys Targaryen = 127:35 (1,2,3; 25) | |
3. Jon Snow = 126:41 (1,2,3; 24) | |
4. Arya Stark = 100:11 (1,2,3; 27) | |
5. Eddard Stark = 94:35 (1; 9) | |
6. Catelyn Stark = 91:14 (1,2,3; 25) | |
7. Cersei Lannister = 90:02 (1,2,3; 27) | |
8. Robb Stark = 89:16 (1,2,3; 22) | |
9. Sansa Stark = 85:42 (1,2,3; 24) | |
10. Theon Greyjoy = 84:11 (1,2,3; 23) | |
11. Jorah Mormont = 72:18 (1,2,3; 24) | |
12. Jaime Lannister = 71:25 (1,2,3; 19) | |
13. Samwell Tarly = 61:48 (1,2,3; 19) | |
14. Petyr Baelish = 60:00 (1,2,3; 19) | |
15. Bran Stark = 57:56 (1,2,3; 21) | |
16. Varys = 57:17 (1,2,3; 18) | |
17. Joffrey Baratheon = 54:22 (1,2,3; 23) | |
18. Tywin Lannister = 49:39 (1,2,3; 19) | |
19. Davos Seaworth = 44:00 (2,3; 10) | |
20. Ygritte = 42:55 (2,3; 12) | |
21. Shae = 40:45 (1,2,3; 16) | |
22. Brienne of Tarth = 39:38 (2,3; 14) | |
23. Gendry = 39:14 (1,2,3; 17) | |
24. Sandor Clegane = 39:03 (1,2,3; 21) | |
25. Bronn = 36:54 (1,2,3; 16) | |
26. Stannis Baratheon = 36:45 (2,3; 12) | |
27. Margaery Tyrell = 32:51 (2,3; 10) | |
28. Talisa Maegyr = 32:25 (2,3; 12) | |
29. Melisandre = 32:22 (2,3; 10) | |
30. Osha = 30:53 (1,2,3; 14) | |
31. Robert Baratheon = 29:38 (1; 7) | |
32. Grand Maester Pycelle = 28:38 (1,2,3; 15) | |
33. Khal Drogo = 27:20 (1,2; 10) | |
34. Barristan Selmy = 25:43 (1,3; 13) | |
35. Renly Baratheon = 25:26 (1,2; 8) | |
36. Maester Luwin = 24:22 (1,2; 14) | |
37. Jeor Mormont = 23:42 (1,2,3; 12) | |
38. Ros = 22:12 (1,2,3; 15) | |
39. Loras Tyrell = 21:54 (1,2,3; 11) | |
40. Viserys Targaryen = 20:15 (1; 5) | |
41. Gilly = 18:08 (2,3; 9) | |
42. Rodrik Cassel = 16:46 (1,2; 12) | |
43. Hot Pie = 15:47 (1,2,3; 10) | |
44. Thoros of Myr = 15:35 (3; 6) | |
45. Olenna Tyrell = 14:54 (3; 5) | |
46. Yoren = 13:48 (1,2; 7) | |
47. Ramsay Snow = 13:43 (3; 6) | |
48. Roose Bolton = 13:41 (2,3; 11) | |
49. Edmure Tully = 13:08 (3; 5) | |
50. Walder Frey = 12:26 (1,3; 3) | |
51. Brynden Tully = 12:00 (3; 5) | |
52. Yara Greyjoy = 11:50 (2,3; 5) | |
53. Missandei = 11:39 (3; 8) | |
54. Beric Dondarion = 11:07 (1,3; 5) | |
55. Tormund Giantsbane = 10:27 (3; 7) | |
56. Daario Naharis = 10:12 (3; 3) | |
57. Jaqen H’qhar = 8:15 (1,2; 7) | |
58. Balon Greyjoy = 7:58 (2;3; 3) | |
59. Qyburn = 7:16 (3; 4) | |
60. Lysa Arryn = 7:03 (1; 4) | |
61. Benjen Stark = 7:00 (1; 3) | |
62. Maester Aemon = 6:49 (1,3; 4) | |
63. Mance Rayder = 6:22 (3; 3) | |
64. Syrio Forel = 6:15 (1; 3) | |
65. Robin Arryn = 4:37 (1; 4) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment