Created
August 4, 2013 22:23
-
-
Save ccagrawal/6152192 to your computer and use it in GitHub Desktop.
WordPress Views and MCMC
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# For each day, find the appropriate Poisson Parameter and use it to calculate the likelihood of the actual view count | |
# Likelihood = P(D|H) | |
Calc.Likelihood <- function(changes, poissons, stats, periods) { | |
likelihood <- 0 | |
for (day in 1:nrow(stats)) { | |
proceed <- FALSE | |
period <- 1 | |
while (!proceed) { | |
if (day < changes[period]) { | |
likelihood <- likelihood + dpois(stats[day, 2], poissons[period], log = TRUE) # Add log likelihoods | |
proceed <- TRUE | |
} | |
period <- period + 1 | |
} | |
} | |
return(likelihood) | |
} | |
api.key <- "blah blah" # Insert your own API Key | |
blog.id <- 696969 # Insert your own blog ID | |
table <- "views" # View counts | |
days <- -1 # Unlimited number of days | |
url <- paste("http://stats.wordpress.com/csv.php?api_key=", api.key, | |
"&blog_id=", blog.id, "&table=", table, "&days=", days, sep = "") | |
stats <- read.csv(url, header = TRUE) | |
stats$date <- as.Date(stats$date) | |
# Stats does not include days with 0 views, manually add those by creating a new data frame with all dates in between | |
adj.stats <- as.data.frame(seq(from = min(stats$date), to = max(stats$date), by = 1)) | |
colnames(adj.stats) <- "date" | |
stats <- merge(stats, adj.stats, by = "date", all = TRUE) | |
stats[is.na(stats)] <- 0 | |
thin <- 10 # To reduce autocorrelation, use thinning to only keep 1 of every 10 samples | |
burn <- 2000 # Use a "burn" period since we're starting with vague priors | |
sample <- 10000 | |
periods <- 3 # My view count seems to be divided in 3 general periods | |
posteriors <- matrix(data = 0, nrow = burn + thin * sample, ncol = periods * 2) | |
# Assume each "period" consists of at least 30 days | |
# Thus, randomly sample starting from day 30 to day (end - 30) | |
# Make last "change day" equal to last blog day | |
# Highest 10 day avg views was about 21, so randomly sample poisson parameters from 0 to 22 | |
changes <- c(sort(sample(30:nrow(stats) - 30, periods - 1, replace = TRUE)), nrow(stats) + 1) | |
poissons <- sample(0:22, periods, replace = TRUE) | |
posteriors[1, ] <- c(changes, poissons) | |
old.prob <- Calc.Likelihood(changes, poissons, stats, periods) | |
for (iter in 2:nrow(posteriors)) { | |
parameter <- iter %% (periods * 2) # Only change one parameter at a time | |
if (parameter < periods) { | |
changes <- posteriors[iter - 1, c(1:periods)] | |
if ((parameter + 1) != periods) { # Don't change the last day | |
changes[parameter + 1] <- changes[parameter + 1] + round(rnorm(1, 0, 15)) | |
} | |
changes[changes < 30] <- 30 # Minimum change day is 30 | |
changes <- sort(changes) | |
for (change in periods:2) { | |
if (changes[change] - changes[change - 1] < 30) # Make sure gaps are at least 30 days | |
changes[change - 1] = changes[change] - 30 | |
} | |
} | |
else { | |
poissons <- posteriors[iter - 1, c((periods + 1):(2 * periods))] | |
poissons[parameter - periods + 1] <- poissons[parameter - periods + 1] + round(rnorm(1, 0, 2)) | |
poissons[poissons < 0] <- 0 # Poisson distributions cannot have negative parameters | |
} | |
new.prob <- Calc.Likelihood(changes, poissons, stats, periods) | |
switch.prob <- min(1, exp(new.prob - old.prob)) | |
random <- runif(1, min = 0, max = 1) | |
if (random < switch.prob) { | |
posteriors[iter, ] <- c(changes, poissons) | |
old.prob <- new.prob | |
} | |
else { | |
posteriors[iter, ] <- posteriors[iter - 1, ] | |
} | |
} | |
posteriors <- posteriors[seq(from = burn + 1, to = burn + thin * sample, by = thin), ] | |
# For each day, average the number of views from each posterior | |
expected.views <- numeric(nrow(stats)) | |
for (day in 1:nrow(stats)) { | |
sum <- 0 | |
for (posterior in 1:nrow(posteriors)) { | |
proceed <- FALSE | |
period <- 1 | |
while (!proceed) { | |
if (day < posteriors[posterior, period]) { | |
sum <- sum + posteriors[posterior, period + periods] | |
proceed <- TRUE | |
} | |
period <- period + 1 | |
} | |
} | |
expected.views[day] <- sum/nrow(posteriors) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment