Skip to content

Instantly share code, notes, and snippets.

@talegari
Last active December 10, 2015 11:06
Show Gist options
  • Save talegari/1b0c52fafedfb495de95 to your computer and use it in GitHub Desktop.
Save talegari/1b0c52fafedfb495de95 to your computer and use it in GitHub Desktop.
Channel Prediction: predict next possible channel using previously used channels (quick hack)
# nextChannel ----
#
# predict next possible channel using previously used channels.
# This is more of a quick hack than a timeseries algorithm.
# It does not take care of seasonal changes or long-term changes,
# If you are looking for more, use Arima or ETS timeseries methods.
# idea | to predict channel preference based on few
# | previously used channels.
#
# length | how much to look backwards to determine the
# | maximum occuring channel, default = 3
# | set it to 0, to use full length of input
# | vector
#
# direction | if multiple maximums occur,
# | 0 implies go to left to include one more
# | previous entry and see if the decision
# | can be made. This is a cascading process.
# | If no decision can be made,
# | till the specified left end point, the
# | function is called with direction = 1
# | default: 0
#
# edge | if direction = 0, how far to go to the left
# | default: 0, sets to length of input vector
#
# freqThres | min value of the proportion of the item that
# | that occurs maximum number of times to be selected
# | default: 1/distinct elements of previousVec
#
# verbose | if TRUE, returns a list with:
# | * predicted value,
# | * frequency ratio at cascaded level,
# | * frequency ratio at original level,
# | * cascade length
# | if FALSE, returns predicted value
# | default = F
#
# dependencies
library('magrittr',quietly = T)
library('dplyr',quietly = T)
# get utilities from gist
library('devtools',quietly = T)
source_url('https://gist.github.com/talegari/fa4e8e72b3a9e23ede2e/raw/b359536c197a955449af307d6bcfce3bd34d2e76/utilities')
# ----
nextChannel <- function(previousVec
, length = 3
, direction = 0
, edge = 0
, freqThres = 1/n_distinct(previousVec)
, verbose = T){
# check sanity of the input data ----
if (class(previousVec) %in%
c('integer','numeric','character','logical') == F)
stop('previous vector is invalid')
if (length %>% as.integer %>% is.na)
stop('length is invalid')
if (direction != 0 && direction != 1)
stop('direction has be either 0 or 1')
if (verbose != T && verbose != F)
stop('verbose has be either TRUE or FALSE')
if (edge %>% as.integer %>% is.na)
stop('length is invalid')
# control length ----
if (length > length(previousVec) || length == 0)
length <- length(previousVec)
# set edge to length when edge is not specified and control it ----
if (edge == 0 || edge > length(previousVec) || edge < length)
edge <- length(previousVec)
# fodder for prediction ----
rev <- previousVec %>% reverse
flag <- 0 # to indicate whether max could be determined
ell <- length
# breaks if prediction is done with flag = 1,
# else exists with flag = 0
while (ell <= edge) {
local <- rev %>% head(ell) %>% freqRatio
lv <- local == max(local)
if (sum(lv) == 1 && max(local) >= freqThres) {
pred <- rev[which(lv)]
flag <- 1
break }# out of the while loop
else{
ifelse(direction == 0,
ell <- ell + 1,
ell <- ell - 1)
}
}
# output depending on flag ----
if (flag == 1) {
if (verbose) { # descriptive list output
list(
value = pred,
cascadeLength = ell,
freqThres = freqThres,
# frequency ratio at cascaded length
freqAtCascadeLength = freqRatio(rev %>% head(ell)) %>%
sort(decreasing = T),
# frequency ratio of input vector
freqFull = freqRatio(rev) %>%
sort(decreasing = T),
# frequency ratio at edge
freqAtEdge = freqRatio(rev[1:edge]) %>%
sort(decreasing = T)
) %>% return}
else
return(pred)}
else{# flag = 0, we run towards right
nextChannel(previousVec = previousVec
,length = length
,direction = 1
,edge = 0
,freqThres = freqThres
,verbose = verbose)}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment