Last active
October 4, 2016 06:58
-
-
Save talegari/b16e0bce4e1475d33e8c409b42c83850 to your computer and use it in GitHub Desktop.
[R] Find the "right" point in a scree plot using a simple heuristic
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
############################################################################### | |
# | |
# optimal_scree ---- | |
# | |
############################################################################### | |
# | |
# author : Srikanth KS (talegari) | |
# license : GNU AGPLv3 (http://choosealicense.com/licenses/agpl-3.0/) | |
# | |
############################################################################### | |
# | |
# Description ---- | |
# | |
# To find the "right" point in a scree plot using a simple heuristic: | |
# Assume that the sequence is: | |
# - non-negative | |
# - non-decreasing | |
# - indexed from 1 to n | |
# - maximum value is not more than 1 | |
# We consider two sets of indices and return the minimum among the | |
# common indices. | |
# Set 1: Pick all indices whose values are greater than `thres`. | |
# A value corresponding some index i is said to 'grow by' | |
# the amount of percentage change: (v[i+1] - v[i])/v[i]) * 100 where v[i] is the | |
# value corresponding to index i. | |
# Set 2: Pick all indices whose values grow by less than `maxGrowth`(percentage) | |
# If a set is empty, the index of the largest value is chosen. | |
# | |
# Arguments ---- | |
# | |
# vec : numeric non-negative vector. | |
# A non-decreasing subsequence including the first element is used. | |
# maxVec : number not smaller than the maximum of 'vec'. 'vec' is normalized | |
# by dividing each element of vec by 'maxVec'. Defaults to NULL, | |
# indicating that maximum value of 'vec' is considered. | |
# thres : numeric indicating the minimum percentage value of normalized | |
# `vec`. Defaults to 90. | |
# minGrowth : numeric indicating the minimum 'grow by' value. If the next point | |
# grows greater than 'minGrowth', the point's index is not chosen. | |
# Defaults to 1. | |
# | |
# Value ---- | |
# | |
# numeric value indicating the index position(in vec) of the chosen point. | |
# | |
############################################################################### | |
optimal_scree <- function(vec | |
, maxVec = NULL | |
, thres = 0.9 | |
, minGrowth = 05){ | |
# assertions | |
stopifnot(require("assertthat" | |
, quietly = TRUE | |
, warn.conflicts = FALSE | |
) | |
) | |
assert_that(all(vec >= 0)) | |
assert_that(is.number(thres) && thres > 0 && thres < 1) | |
assert_that(is.number(minGrowth) && minGrowth > 0) | |
assert_that(is.null(maxVec) || (is.number(maxVec) && maxVec >= max(vec))) | |
# get an increasing subsequence | |
incss <- function(vec){ | |
index <- rep(TRUE, length(vec)) | |
maxVal <- vec[1] | |
for(i in 2:length(vec)){ | |
if(vec[i] < maxVal){ | |
index[i] <- FALSE | |
} else { | |
maxVal <- vec[i] | |
} | |
} | |
return(list(vec[index], index)) | |
} | |
incvec <- incss(vec) | |
nvec <- incvec[[1]] | |
if(length(nvec) == 1){ | |
message("optimal_scree: non-decreasing subsequence is a singleton.") | |
return(vec[[1]]) | |
} | |
if(is.null(maxVec)){ | |
maxVec <- max(nvec) | |
} | |
# normalize vec | |
vec2 <- (nvec/maxVec) | |
# growth of each index except the last | |
growth <- (diff(vec2)/head(vec2, -1))*100 | |
gp <- which(growth < minGrowth) | |
# last index(of vec) of nvec | |
rightMostTrue <- Position(function(x){x == TRUE} | |
, incvec[[2]] | |
, right = TRUE | |
) | |
if(length(gp) == 0){ | |
warning("minGrowth might be too small.") | |
return(rightMostTrue) | |
} | |
tp <- which(vec2 >= thres) | |
if(length(tp) == 0){ | |
warning("thres might be too large.") | |
return(rightMostTrue) | |
} | |
# smallest index common to gp and tp | |
return(which(incvec[[2]])[min(intersect(gp, tp))]) | |
} | |
# test | |
############################################################################### | |
# | |
# optimal_scree ---- | |
# | |
############################################################################### | |
# | |
# author : Srikanth KS (talegari) | |
# license : GNU AGPLv3 (http://choosealicense.com/licenses/agpl-3.0/) | |
# | |
############################################################################### | |
# | |
# Description ---- | |
# | |
# To find the "right" point in a scree plot using a simple heuristic: | |
# Assume that the sequence is: | |
# - non-negative | |
# - non-decreasing | |
# - indexed from 1 to n | |
# - maximum value is not more than 1 | |
# We consider two sets of indices and return the minimum among the | |
# common indices. | |
# Set 1: Pick all indices whose values are greater than `thres`. | |
# A value corresponding some index i is said to 'grow by' | |
# the amount of percentage change: (v[i+1] - v[i])/v[i]) * 100 where v[i] is the | |
# value corresponding to index i. | |
# Set 2: Pick all indices whose values grow by less than `maxGrowth`(percentage) | |
# If a set is empty, the index of the largest value is chosen. | |
# | |
# Arguments ---- | |
# | |
# vec : numeric non-negative vector. | |
# A non-decreasing subsequence including the first element is used. | |
# maxVec : number not smaller than the maximum of 'vec'. 'vec' is normalized | |
# by dividing each element of vec by 'maxVec'. Defaults to NULL, | |
# indicating that maximum value of 'vec' is considered. | |
# thres : numeric indicating the minimum percentage value of normalized | |
# `vec`. Defaults to 90. | |
# minGrowth : numeric indicating the minimum 'grow by' value. If the next point | |
# grows greater than 'minGrowth', the point's index is not chosen. | |
# Defaults to 1. | |
# | |
# Value ---- | |
# | |
# numeric value indicating the index position(in vec) of the chosen point. | |
# | |
############################################################################### | |
optimal_scree <- function(vec | |
, maxVec = NULL | |
, thres = 0.9 | |
, minGrowth = 05){ | |
# assertions | |
stopifnot(require("assertthat" | |
, quietly = TRUE | |
, warn.conflicts = FALSE | |
) | |
) | |
assert_that(all(vec >= 0)) | |
assert_that(is.number(thres) && thres > 0 && thres < 1) | |
assert_that(is.number(minGrowth) && minGrowth > 0) | |
assert_that(is.null(maxVec) || (is.number(maxVec) && maxVec >= max(vec))) | |
# get an increasing subsequence | |
incss <- function(vec){ | |
index <- rep(TRUE, length(vec)) | |
maxVal <- vec[1] | |
for(i in 2:length(vec)){ | |
if(vec[i] < maxVal){ | |
index[i] <- FALSE | |
} else { | |
maxVal <- vec[i] | |
} | |
} | |
return(list(vec[index], index)) | |
} | |
incvec <- incss(vec) | |
nvec <- incvec[[1]] | |
if(length(nvec) == 1){ | |
message("optimal_scree: non-decreasing subsequence is a singleton.") | |
return(vec[[1]]) | |
} | |
if(is.null(maxVec)){ | |
maxVec <- max(nvec) | |
} | |
# normalize vec | |
vec2 <- (nvec/maxVec) | |
# growth of each index except the last | |
growth <- (diff(vec2)/head(vec2, -1))*100 | |
gp <- which(growth < minGrowth) | |
# last index(of vec) of nvec | |
rightMostTrue <- Position(function(x){x == TRUE} | |
, incvec[[2]] | |
, right = TRUE | |
) | |
if(length(gp) == 0){ | |
warning("minGrowth might be too small.") | |
return(rightMostTrue) | |
} | |
tp <- which(vec2 >= thres) | |
if(length(tp) == 0){ | |
warning("thres might be too large.") | |
return(rightMostTrue) | |
} | |
# smallest index common to gp and tp | |
return(which(incvec[[2]])[min(intersect(gp, tp))]) | |
} | |
# # test | |
# y <- c(1, 2.5, 3, 3.1, 3.14, 3, 3.141, 3.1416) | |
# x <- 1:length(y) | |
# plot(x, y) | |
# lines(y) | |
# text(x, y, labels = y, pos = 1, cex = 0.5) | |
# a <- optimal_scree(y) | |
# abline(v = a, col = "red") | |
# b <- optimal_scree(y, minGrowth = 5) | |
# abline(v = b, col = "blue") | |
# c <- optimal_scree(y, minGrowth = 5, thres = 0.98) | |
# abline(v = c, col = "pink") | |
# d <- optimal_scree(y, minGrowth = 20, thres = 0.70) | |
# abline(v = d, col = "green") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment