Skip to content

Instantly share code, notes, and snippets.

@daroczig
Last active April 29, 2016 22:48
Show Gist options
  • Save daroczig/119809ba423294753f0602856771d20c to your computer and use it in GitHub Desktop.
Save daroczig/119809ba423294753f0602856771d20c to your computer and use it in GitHub Desktop.
apply stuff on numeric cols
ddist <- function(df, quantiles = c(0,.02, .25, .50, .75, .90, .98, .99, .999, 1), na.rm = TRUE) {
numvars <- which(sapply(df, is.numeric))
sapply(numvars, function(v) {
if (is.data.table(df)) {
v <- df[, v, with = FALSE]
} else {
v <- df[, v]
}
c(n = length(v),
ndistinct = length(unique(v)),
quantile(v, quantiles, na.rm = na.rm))
})
}
> ddist(iris)
Sepal.Length Sepal.Width Petal.Length Petal.Width
n 150.0000 150.0000 150.0000 150.000
ndistinct 35.0000 23.0000 43.0000 22.000
0% 4.3000 2.0000 1.0000 0.100
2% 4.4000 2.2000 1.2000 0.100
25% 5.1000 2.8000 1.6000 0.300
50% 5.8000 3.0000 4.3500 1.300
75% 6.4000 3.3000 5.1000 1.800
90% 6.9000 3.6100 5.8000 2.200
98% 7.7000 4.0020 6.6020 2.402
99% 7.7000 4.1510 6.7000 2.500
99.9% 7.8702 4.3702 6.8702 2.500
100% 7.9000 4.4000 6.9000 2.500
> ddist(subset(iris, Species == 'virginica'))
Sepal.Length Sepal.Width Petal.Length Petal.Width
n 50.0000 50.000 50.0000 50.000
ndistinct 21.0000 13.000 20.0000 12.000
0% 4.9000 2.200 4.5000 1.400
2% 5.5860 2.494 4.7940 1.498
25% 6.2250 2.800 5.1000 1.800
50% 6.5000 3.000 5.5500 2.000
75% 6.9000 3.175 5.8750 2.300
90% 7.6100 3.310 6.3100 2.400
98% 7.7040 3.800 6.7040 2.500
99% 7.8020 3.800 6.8020 2.500
99.9% 7.8902 3.800 6.8902 2.500
100% 7.9000 3.800 6.9000 2.500
> ddist(data.table(iris))
Sepal.Length Sepal.Width Petal.Length Petal.Width
n 1.0000 1.0000 1.0000 1.000
ndistinct 1.0000 1.0000 1.0000 1.000
0% 4.3000 2.0000 1.0000 0.100
2% 4.4000 2.2000 1.2000 0.100
25% 5.1000 2.8000 1.6000 0.300
50% 5.8000 3.0000 4.3500 1.300
75% 6.4000 3.3000 5.1000 1.800
90% 6.9000 3.6100 5.8000 2.200
98% 7.7000 4.0020 6.6020 2.402
99% 7.7000 4.1510 6.7000 2.500
99.9% 7.8702 4.3702 6.8702 2.500
100% 7.9000 4.4000 6.9000 2.500
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment