Last active
December 10, 2015 11:06
-
-
Save talegari/1b0c52fafedfb495de95 to your computer and use it in GitHub Desktop.
Channel Prediction: predict next possible channel using previously used channels (quick hack)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# nextChannel ---- | |
# | |
# predict next possible channel using previously used channels. | |
# This is more of a quick hack than a timeseries algorithm. | |
# It does not take care of seasonal changes or long-term changes, | |
# If you are looking for more, use Arima or ETS timeseries methods. | |
# idea | to predict channel preference based on few | |
# | previously used channels. | |
# | |
# length | how much to look backwards to determine the | |
# | maximum occuring channel, default = 3 | |
# | set it to 0, to use full length of input | |
# | vector | |
# | |
# direction | if multiple maximums occur, | |
# | 0 implies go to left to include one more | |
# | previous entry and see if the decision | |
# | can be made. This is a cascading process. | |
# | If no decision can be made, | |
# | till the specified left end point, the | |
# | function is called with direction = 1 | |
# | default: 0 | |
# | |
# edge | if direction = 0, how far to go to the left | |
# | default: 0, sets to length of input vector | |
# | |
# freqThres | min value of the proportion of the item that | |
# | that occurs maximum number of times to be selected | |
# | default: 1/distinct elements of previousVec | |
# | |
# verbose | if TRUE, returns a list with: | |
# | * predicted value, | |
# | * frequency ratio at cascaded level, | |
# | * frequency ratio at original level, | |
# | * cascade length | |
# | if FALSE, returns predicted value | |
# | default = F | |
# | |
# dependencies | |
library('magrittr',quietly = T) | |
library('dplyr',quietly = T) | |
# get utilities from gist | |
library('devtools',quietly = T) | |
source_url('https://gist.github.com/talegari/fa4e8e72b3a9e23ede2e/raw/b359536c197a955449af307d6bcfce3bd34d2e76/utilities') | |
# ---- | |
nextChannel <- function(previousVec | |
, length = 3 | |
, direction = 0 | |
, edge = 0 | |
, freqThres = 1/n_distinct(previousVec) | |
, verbose = T){ | |
# check sanity of the input data ---- | |
if (class(previousVec) %in% | |
c('integer','numeric','character','logical') == F) | |
stop('previous vector is invalid') | |
if (length %>% as.integer %>% is.na) | |
stop('length is invalid') | |
if (direction != 0 && direction != 1) | |
stop('direction has be either 0 or 1') | |
if (verbose != T && verbose != F) | |
stop('verbose has be either TRUE or FALSE') | |
if (edge %>% as.integer %>% is.na) | |
stop('length is invalid') | |
# control length ---- | |
if (length > length(previousVec) || length == 0) | |
length <- length(previousVec) | |
# set edge to length when edge is not specified and control it ---- | |
if (edge == 0 || edge > length(previousVec) || edge < length) | |
edge <- length(previousVec) | |
# fodder for prediction ---- | |
rev <- previousVec %>% reverse | |
flag <- 0 # to indicate whether max could be determined | |
ell <- length | |
# breaks if prediction is done with flag = 1, | |
# else exists with flag = 0 | |
while (ell <= edge) { | |
local <- rev %>% head(ell) %>% freqRatio | |
lv <- local == max(local) | |
if (sum(lv) == 1 && max(local) >= freqThres) { | |
pred <- rev[which(lv)] | |
flag <- 1 | |
break }# out of the while loop | |
else{ | |
ifelse(direction == 0, | |
ell <- ell + 1, | |
ell <- ell - 1) | |
} | |
} | |
# output depending on flag ---- | |
if (flag == 1) { | |
if (verbose) { # descriptive list output | |
list( | |
value = pred, | |
cascadeLength = ell, | |
freqThres = freqThres, | |
# frequency ratio at cascaded length | |
freqAtCascadeLength = freqRatio(rev %>% head(ell)) %>% | |
sort(decreasing = T), | |
# frequency ratio of input vector | |
freqFull = freqRatio(rev) %>% | |
sort(decreasing = T), | |
# frequency ratio at edge | |
freqAtEdge = freqRatio(rev[1:edge]) %>% | |
sort(decreasing = T) | |
) %>% return} | |
else | |
return(pred)} | |
else{# flag = 0, we run towards right | |
nextChannel(previousVec = previousVec | |
,length = length | |
,direction = 1 | |
,edge = 0 | |
,freqThres = freqThres | |
,verbose = verbose)} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment