Skip to content

Instantly share code, notes, and snippets.

@talegari
Last active October 4, 2016 06:58
Show Gist options
  • Save talegari/b16e0bce4e1475d33e8c409b42c83850 to your computer and use it in GitHub Desktop.
Save talegari/b16e0bce4e1475d33e8c409b42c83850 to your computer and use it in GitHub Desktop.
[R] Find the "right" point in a scree plot using a simple heuristic
###############################################################################
#
# optimal_scree ----
#
###############################################################################
#
# author : Srikanth KS (talegari)
# license : GNU AGPLv3 (http://choosealicense.com/licenses/agpl-3.0/)
#
###############################################################################
#
# Description ----
#
# To find the "right" point in a scree plot using a simple heuristic:
# Assume that the sequence is:
# - non-negative
# - non-decreasing
# - indexed from 1 to n
# - maximum value is not more than 1
# We consider two sets of indices and return the minimum among the
# common indices.
# Set 1: Pick all indices whose values are greater than `thres`.
# A value corresponding some index i is said to 'grow by'
# the amount of percentage change: (v[i+1] - v[i])/v[i]) * 100 where v[i] is the
# value corresponding to index i.
# Set 2: Pick all indices whose values grow by less than `maxGrowth`(percentage)
# If a set is empty, the index of the largest value is chosen.
#
# Arguments ----
#
# vec : numeric non-negative vector.
# A non-decreasing subsequence including the first element is used.
# maxVec : number not smaller than the maximum of 'vec'. 'vec' is normalized
# by dividing each element of vec by 'maxVec'. Defaults to NULL,
# indicating that maximum value of 'vec' is considered.
# thres : numeric indicating the minimum percentage value of normalized
# `vec`. Defaults to 90.
# minGrowth : numeric indicating the minimum 'grow by' value. If the next point
# grows greater than 'minGrowth', the point's index is not chosen.
# Defaults to 1.
#
# Value ----
#
# numeric value indicating the index position(in vec) of the chosen point.
#
###############################################################################
optimal_scree <- function(vec
, maxVec = NULL
, thres = 0.9
, minGrowth = 05){
# assertions
stopifnot(require("assertthat"
, quietly = TRUE
, warn.conflicts = FALSE
)
)
assert_that(all(vec >= 0))
assert_that(is.number(thres) && thres > 0 && thres < 1)
assert_that(is.number(minGrowth) && minGrowth > 0)
assert_that(is.null(maxVec) || (is.number(maxVec) && maxVec >= max(vec)))
# get an increasing subsequence
incss <- function(vec){
index <- rep(TRUE, length(vec))
maxVal <- vec[1]
for(i in 2:length(vec)){
if(vec[i] < maxVal){
index[i] <- FALSE
} else {
maxVal <- vec[i]
}
}
return(list(vec[index], index))
}
incvec <- incss(vec)
nvec <- incvec[[1]]
if(length(nvec) == 1){
message("optimal_scree: non-decreasing subsequence is a singleton.")
return(vec[[1]])
}
if(is.null(maxVec)){
maxVec <- max(nvec)
}
# normalize vec
vec2 <- (nvec/maxVec)
# growth of each index except the last
growth <- (diff(vec2)/head(vec2, -1))*100
gp <- which(growth < minGrowth)
# last index(of vec) of nvec
rightMostTrue <- Position(function(x){x == TRUE}
, incvec[[2]]
, right = TRUE
)
if(length(gp) == 0){
warning("minGrowth might be too small.")
return(rightMostTrue)
}
tp <- which(vec2 >= thres)
if(length(tp) == 0){
warning("thres might be too large.")
return(rightMostTrue)
}
# smallest index common to gp and tp
return(which(incvec[[2]])[min(intersect(gp, tp))])
}
# test
###############################################################################
#
# optimal_scree ----
#
###############################################################################
#
# author : Srikanth KS (talegari)
# license : GNU AGPLv3 (http://choosealicense.com/licenses/agpl-3.0/)
#
###############################################################################
#
# Description ----
#
# To find the "right" point in a scree plot using a simple heuristic:
# Assume that the sequence is:
# - non-negative
# - non-decreasing
# - indexed from 1 to n
# - maximum value is not more than 1
# We consider two sets of indices and return the minimum among the
# common indices.
# Set 1: Pick all indices whose values are greater than `thres`.
# A value corresponding some index i is said to 'grow by'
# the amount of percentage change: (v[i+1] - v[i])/v[i]) * 100 where v[i] is the
# value corresponding to index i.
# Set 2: Pick all indices whose values grow by less than `maxGrowth`(percentage)
# If a set is empty, the index of the largest value is chosen.
#
# Arguments ----
#
# vec : numeric non-negative vector.
# A non-decreasing subsequence including the first element is used.
# maxVec : number not smaller than the maximum of 'vec'. 'vec' is normalized
# by dividing each element of vec by 'maxVec'. Defaults to NULL,
# indicating that maximum value of 'vec' is considered.
# thres : numeric indicating the minimum percentage value of normalized
# `vec`. Defaults to 90.
# minGrowth : numeric indicating the minimum 'grow by' value. If the next point
# grows greater than 'minGrowth', the point's index is not chosen.
# Defaults to 1.
#
# Value ----
#
# numeric value indicating the index position(in vec) of the chosen point.
#
###############################################################################
optimal_scree <- function(vec
, maxVec = NULL
, thres = 0.9
, minGrowth = 05){
# assertions
stopifnot(require("assertthat"
, quietly = TRUE
, warn.conflicts = FALSE
)
)
assert_that(all(vec >= 0))
assert_that(is.number(thres) && thres > 0 && thres < 1)
assert_that(is.number(minGrowth) && minGrowth > 0)
assert_that(is.null(maxVec) || (is.number(maxVec) && maxVec >= max(vec)))
# get an increasing subsequence
incss <- function(vec){
index <- rep(TRUE, length(vec))
maxVal <- vec[1]
for(i in 2:length(vec)){
if(vec[i] < maxVal){
index[i] <- FALSE
} else {
maxVal <- vec[i]
}
}
return(list(vec[index], index))
}
incvec <- incss(vec)
nvec <- incvec[[1]]
if(length(nvec) == 1){
message("optimal_scree: non-decreasing subsequence is a singleton.")
return(vec[[1]])
}
if(is.null(maxVec)){
maxVec <- max(nvec)
}
# normalize vec
vec2 <- (nvec/maxVec)
# growth of each index except the last
growth <- (diff(vec2)/head(vec2, -1))*100
gp <- which(growth < minGrowth)
# last index(of vec) of nvec
rightMostTrue <- Position(function(x){x == TRUE}
, incvec[[2]]
, right = TRUE
)
if(length(gp) == 0){
warning("minGrowth might be too small.")
return(rightMostTrue)
}
tp <- which(vec2 >= thres)
if(length(tp) == 0){
warning("thres might be too large.")
return(rightMostTrue)
}
# smallest index common to gp and tp
return(which(incvec[[2]])[min(intersect(gp, tp))])
}
# # test
# y <- c(1, 2.5, 3, 3.1, 3.14, 3, 3.141, 3.1416)
# x <- 1:length(y)
# plot(x, y)
# lines(y)
# text(x, y, labels = y, pos = 1, cex = 0.5)
# a <- optimal_scree(y)
# abline(v = a, col = "red")
# b <- optimal_scree(y, minGrowth = 5)
# abline(v = b, col = "blue")
# c <- optimal_scree(y, minGrowth = 5, thres = 0.98)
# abline(v = c, col = "pink")
# d <- optimal_scree(y, minGrowth = 20, thres = 0.70)
# abline(v = d, col = "green")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment