|
(function(f){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=f()}else if(typeof define==="function"&&define.amd){define([],f)}else{var g;if(typeof window!=="undefined"){g=window}else if(typeof global!=="undefined"){g=global}else if(typeof self!=="undefined"){g=self}else{g=this}g.ssck = f()}})(function(){var define,module,exports;return (function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o<r.length;o++)s(r[o]);return s})({1:[function(require,module,exports){ |
|
'use strict'; |
|
|
|
// # simple-statistics |
|
// |
|
// A simple, literate statistics system. |
|
|
|
var ssck = module.exports = {}; |
|
|
|
// Linear Regression |
|
ssck.linearRegression = require('./src/linear_regression'); |
|
ssck.linearRegressionLine = require('./src/linear_regression_line'); |
|
ssck.standardDeviation = require('./src/standard_deviation'); |
|
ssck.rSquared = require('./src/r_squared'); |
|
ssck.mode = require('./src/mode'); |
|
ssck.min = require('./src/min'); |
|
ssck.max = require('./src/max'); |
|
ssck.sum = require('./src/sum'); |
|
ssck.quantile = require('./src/quantile'); |
|
ssck.quantileSorted = require('./src/quantile_sorted'); |
|
ssck.iqr = ssck.interquartileRange = require('./src/interquartile_range'); |
|
ssck.medianAbsoluteDeviation = ssck.mad = require('./src/mad'); |
|
ssck.chunk = require('./src/chunk'); |
|
ssck.shuffle = require('./src/shuffle'); |
|
ssck.shuffleInPlace = require('./src/shuffle_in_place'); |
|
ssck.sample = require('./src/sample'); |
|
ssck.ckmeans = require('./src/ckmeans'); |
|
ssck.sortedUniqueCount = require('./src/sorted_unique_count'); |
|
ssck.sumNthPowerDeviations = require('./src/sum_nth_power_deviations'); |
|
|
|
// sample statistics |
|
ssck.sampleCovariance = require('./src/sample_covariance'); |
|
ssck.sampleCorrelation = require('./src/sample_correlation'); |
|
ssck.sampleVariance = require('./src/sample_variance'); |
|
ssck.sampleStandardDeviation = require('./src/sample_standard_deviation'); |
|
ssck.sampleSkewness = require('./src/sample_skewness'); |
|
|
|
// measures of centrality |
|
ssck.geometricMean = require('./src/geometric_mean'); |
|
ssck.harmonicMean = require('./src/harmonic_mean'); |
|
ssck.mean = ssck.average = require('./src/mean'); |
|
ssck.median = require('./src/median'); |
|
|
|
ssck.rootMeanSquare = ssck.rms = require('./src/root_mean_square'); |
|
ssck.variance = require('./src/variance'); |
|
ssck.tTest = require('./src/t_test'); |
|
ssck.tTestTwoSample = require('./src/t_test_two_sample'); |
|
// ssck.jenks = require('./src/jenks'); |
|
|
|
// Classifiers |
|
ssck.bayesian = require('./src/bayesian_classifier'); |
|
ssck.perceptron = require('./src/perceptron'); |
|
|
|
// Distribution-related methods |
|
ssck.epsilon = require('./src/epsilon'); // We make ε available to the test suite. |
|
ssck.factorial = require('./src/factorial'); |
|
ssck.bernoulliDistribution = require('./src/bernoulli_distribution'); |
|
ssck.binomialDistribution = require('./src/binomial_distribution'); |
|
ssck.poissonDistribution = require('./src/poisson_distribution'); |
|
ssck.chiSquaredGoodnessOfFit = require('./src/chi_squared_goodness_of_fit'); |
|
|
|
// Normal distribution |
|
ssck.zScore = require('./src/z_score'); |
|
ssck.cumulativeStdNormalProbability = require('./src/cumulative_std_normal_probability'); |
|
ssck.standardNormalTable = require('./src/standard_normal_table'); |
|
ssck.errorFunction = ssck.erf = require('./src/error_function'); |
|
ssck.inverseErrorFunction = require('./src/inverse_error_function'); |
|
ssck.probit = require('./src/probit'); |
|
ssck.mixin = require('./src/mixin'); |
|
|
|
},{"./src/bayesian_classifier":2,"./src/bernoulli_distribution":3,"./src/binomial_distribution":4,"./src/chi_squared_goodness_of_fit":6,"./src/chunk":7,"./src/ckmeans":8,"./src/cumulative_std_normal_probability":9,"./src/epsilon":10,"./src/error_function":11,"./src/factorial":12,"./src/geometric_mean":13,"./src/harmonic_mean":14,"./src/interquartile_range":15,"./src/inverse_error_function":16,"./src/linear_regression":17,"./src/linear_regression_line":18,"./src/mad":19,"./src/max":20,"./src/mean":21,"./src/median":22,"./src/min":23,"./src/mixin":24,"./src/mode":25,"./src/perceptron":27,"./src/poisson_distribution":28,"./src/probit":29,"./src/quantile":30,"./src/quantile_sorted":31,"./src/r_squared":32,"./src/root_mean_square":33,"./src/sample":34,"./src/sample_correlation":35,"./src/sample_covariance":36,"./src/sample_skewness":37,"./src/sample_standard_deviation":38,"./src/sample_variance":39,"./src/shuffle":40,"./src/shuffle_in_place":41,"./src/sorted_unique_count":42,"./src/standard_deviation":43,"./src/standard_normal_table":44,"./src/sum":45,"./src/sum_nth_power_deviations":46,"./src/t_test":47,"./src/t_test_two_sample":48,"./src/variance":49,"./src/z_score":50}],2:[function(require,module,exports){ |
|
'use strict'; |
|
|
|
/** |
|
* [Bayesian Classifier](http://en.wikipedia.org/wiki/Naive_Bayes_classifier) |
|
* |
|
* This is a naïve bayesian classifier that takes |
|
* singly-nested objects. |
|
* |
|
* @class |
|
* @example |
|
* var bayes = new BayesianClassifier(); |
|
* bayes.train({ |
|
* species: 'Cat' |
|
* }, 'animal'); |
|
* var result = bayes.score({ |
|
* species: 'Cat' |
|
* }) |
|
* // result |
|
* // { |
|
* // animal: 1 |
|
* // } |
|
*/ |
|
function BayesianClassifier() { |
|
// The number of items that are currently |
|
// classified in the model |
|
this.totalCount = 0; |
|
// Every item classified in the model |
|
this.data = {}; |
|
} |
|
|
|
/** |
|
* Train the classifier with a new item, which has a single |
|
* dimension of Javascript literal keys and values. |
|
* |
|
* @param {Object} item an object with singly-deep properties |
|
* @param {string} category the category this item belongs to |
|
* @return {undefined} adds the item to the classifier |
|
*/ |
|
BayesianClassifier.prototype.train = function(item, category) { |
|
// If the data object doesn't have any values |
|
// for this category, create a new object for it. |
|
if (!this.data[category]) { |
|
this.data[category] = {}; |
|
} |
|
|
|
// Iterate through each key in the item. |
|
for (var k in item) { |
|
var v = item[k]; |
|
// Initialize the nested object `data[category][k][item[k]]` |
|
// with an object of keys that equal 0. |
|
if (this.data[category][k] === undefined) { |
|
this.data[category][k] = {}; |
|
} |
|
if (this.data[category][k][v] === undefined) { |
|
this.data[category][k][v] = 0; |
|
} |
|
|
|
// And increment the key for this key/value combination. |
|
this.data[category][k][item[k]]++; |
|
} |
|
|
|
// Increment the number of items classified |
|
this.totalCount++; |
|
}; |
|
|
|
/** |
|
* Generate a score of how well this item matches all |
|
* possible categories based on its attributes |
|
* |
|
* @param {Object} item an item in the same format as with train |
|
* @returns {Object} of probabilities that this item belongs to a |
|
* given category. |
|
*/ |
|
BayesianClassifier.prototype.score = function(item) { |
|
// Initialize an empty array of odds per category. |
|
var odds = {}, category; |
|
// Iterate through each key in the item, |
|
// then iterate through each category that has been used |
|
// in previous calls to `.train()` |
|
for (var k in item) { |
|
var v = item[k]; |
|
for (category in this.data) { |
|
// Create an empty object for storing key - value combinations |
|
// for this category. |
|
if (odds[category] === undefined) { odds[category] = {}; } |
|
|
|
// If this item doesn't even have a property, it counts for nothing, |
|
// but if it does have the property that we're looking for from |
|
// the item to categorize, it counts based on how popular it is |
|
// versus the whole population. |
|
if (this.data[category][k]) { |
|
odds[category][k + '_' + v] = (this.data[category][k][v] || 0) / this.totalCount; |
|
} else { |
|
odds[category][k + '_' + v] = 0; |
|
} |
|
} |
|
} |
|
|
|
// Set up a new object that will contain sums of these odds by category |
|
var oddsSums = {}; |
|
|
|
for (category in odds) { |
|
// Tally all of the odds for each category-combination pair - |
|
// the non-existence of a category does not add anything to the |
|
// score. |
|
for (var combination in odds[category]) { |
|
if (oddsSums[category] === undefined) { |
|
oddsSums[category] = 0; |
|
} |
|
oddsSums[category] += odds[category][combination]; |
|
} |
|
} |
|
|
|
return oddsSums; |
|
}; |
|
|
|
module.exports = BayesianClassifier; |
|
|
|
},{}],3:[function(require,module,exports){ |
|
'use strict'; |
|
|
|
var binomialDistribution = require('./binomial_distribution'); |
|
|
|
/** |
|
* The [Bernoulli distribution](http://en.wikipedia.org/wiki/Bernoulli_distribution) |
|
* is the probability discrete |
|
* distribution of a random variable which takes value 1 with success |
|
* probability `p` and value 0 with failure |
|
* probability `q` = 1 - `p`. It can be used, for example, to represent the |
|
* toss of a coin, where "1" is defined to mean "heads" and "0" is defined |
|
* to mean "tails" (or vice versa). It is |
|
* a special case of a Binomial Distribution |
|
* where `n` = 1. |
|
* |
|
* @param {number} p input value, between 0 and 1 inclusive |
|
* @returns {number} value of bernoulli distribution at this point |
|
*/ |
|
function bernoulliDistribution(p) { |
|
// Check that `p` is a valid probability (0 ≤ p ≤ 1) |
|
if (p < 0 || p > 1 ) { return null; } |
|
|
|
return binomialDistribution(1, p); |
|
} |
|
|
|
module.exports = bernoulliDistribution; |
|
|
|
},{"./binomial_distribution":4}],4:[function(require,module,exports){ |
|
'use strict'; |
|
|
|
var epsilon = require('./epsilon'); |
|
var factorial = require('./factorial'); |
|
|
|
/** |
|
* The [Binomial Distribution](http://en.wikipedia.org/wiki/Binomial_distribution) is the discrete probability |
|
* distribution of the number of successes in a sequence of n independent yes/no experiments, each of which yields |
|
* success with probability `probability`. Such a success/failure experiment is also called a Bernoulli experiment or |
|
* Bernoulli trial; when trials = 1, the Binomial Distribution is a Bernoulli Distribution. |
|
* |
|
* @param {number} trials number of trials to simulate |
|
* @param {number} probability |
|
* @returns {number} output |
|
*/ |
|
function binomialDistribution(trials, probability) { |
|
// Check that `p` is a valid probability (0 ≤ p ≤ 1), |
|
// that `n` is an integer, strictly positive. |
|
if (probability < 0 || probability > 1 || |
|
trials <= 0 || trials % 1 !== 0) { |
|
return null; |
|
} |
|
|
|
// We initialize `x`, the random variable, and `accumulator`, an accumulator |
|
// for the cumulative distribution function to 0. `distribution_functions` |
|
// is the object we'll return with the `probability_of_x` and the |
|
// `cumulativeProbability_of_x`, as well as the calculated mean & |
|
// variance. We iterate until the `cumulativeProbability_of_x` is |
|
// within `epsilon` of 1.0. |
|
var x = 0, |
|
cumulativeProbability = 0, |
|
cells = {}; |
|
|
|
// This algorithm iterates through each potential outcome, |
|
// until the `cumulativeProbability` is very close to 1, at |
|
// which point we've defined the vast majority of outcomes |
|
do { |
|
// a [probability mass function](https://en.wikipedia.org/wiki/Probability_mass_function) |
|
cells[x] = factorial(trials) / |
|
(factorial(x) * factorial(trials - x)) * |
|
(Math.pow(probability, x) * Math.pow(1 - probability, trials - x)); |
|
cumulativeProbability += cells[x]; |
|
x++; |
|
// when the cumulativeProbability is nearly 1, we've calculated |
|
// the useful range of this distribution |
|
} while (cumulativeProbability < 1 - epsilon); |
|
|
|
return cells; |
|
} |
|
|
|
module.exports = binomialDistribution; |
|
|
|
},{"./epsilon":10,"./factorial":12}],5:[function(require,module,exports){ |
|
'use strict'; |
|
|
|
/** |
|
* **Percentage Points of the χ2 (Chi-Squared) Distribution** |
|
* |
|
* The [χ2 (Chi-Squared) Distribution](http://en.wikipedia.org/wiki/Chi-squared_distribution) is used in the common |
|
* chi-squared tests for goodness of fit of an observed distribution to a theoretical one, the independence of two |
|
* criteria of classification of qualitative data, and in confidence interval estimation for a population standard |
|
* deviation of a normal distribution from a sample standard deviation. |
|
* |
|
* Values from Appendix 1, Table III of William W. Hines & Douglas C. Montgomery, "Probability and Statistics in |
|
* Engineering and Management Science", Wiley (1980). |
|
*/ |
|
var chiSquaredDistributionTable = { |
|
1: { 0.995: 0.00, 0.99: 0.00, 0.975: 0.00, 0.95: 0.00, 0.9: 0.02, 0.5: 0.45, 0.1: 2.71, 0.05: 3.84, 0.025: 5.02, 0.01: 6.63, 0.005: 7.88 }, |
|
2: { 0.995: 0.01, 0.99: 0.02, 0.975: 0.05, 0.95: 0.10, 0.9: 0.21, 0.5: 1.39, 0.1: 4.61, 0.05: 5.99, 0.025: 7.38, 0.01: 9.21, 0.005: 10.60 }, |
|
3: { 0.995: 0.07, 0.99: 0.11, 0.975: 0.22, 0.95: 0.35, 0.9: 0.58, 0.5: 2.37, 0.1: 6.25, 0.05: 7.81, 0.025: 9.35, 0.01: 11.34, 0.005: 12.84 }, |
|
4: { 0.995: 0.21, 0.99: 0.30, 0.975: 0.48, 0.95: 0.71, 0.9: 1.06, 0.5: 3.36, 0.1: 7.78, 0.05: 9.49, 0.025: 11.14, 0.01: 13.28, 0.005: 14.86 }, |
|
5: { 0.995: 0.41, 0.99: 0.55, 0.975: 0.83, 0.95: 1.15, 0.9: 1.61, 0.5: 4.35, 0.1: 9.24, 0.05: 11.07, 0.025: 12.83, 0.01: 15.09, 0.005: 16.75 }, |
|
6: { 0.995: 0.68, 0.99: 0.87, 0.975: 1.24, 0.95: 1.64, 0.9: 2.20, 0.5: 5.35, 0.1: 10.65, 0.05: 12.59, 0.025: 14.45, 0.01: 16.81, 0.005: 18.55 }, |
|
7: { 0.995: 0.99, 0.99: 1.25, 0.975: 1.69, 0.95: 2.17, 0.9: 2.83, 0.5: 6.35, 0.1: 12.02, 0.05: 14.07, 0.025: 16.01, 0.01: 18.48, 0.005: 20.28 }, |
|
8: { 0.995: 1.34, 0.99: 1.65, 0.975: 2.18, 0.95: 2.73, 0.9: 3.49, 0.5: 7.34, 0.1: 13.36, 0.05: 15.51, 0.025: 17.53, 0.01: 20.09, 0.005: 21.96 }, |
|
9: { 0.995: 1.73, 0.99: 2.09, 0.975: 2.70, 0.95: 3.33, 0.9: 4.17, 0.5: 8.34, 0.1: 14.68, 0.05: 16.92, 0.025: 19.02, 0.01: 21.67, 0.005: 23.59 }, |
|
10: { 0.995: 2.16, 0.99: 2.56, 0.975: 3.25, 0.95: 3.94, 0.9: 4.87, 0.5: 9.34, 0.1: 15.99, 0.05: 18.31, 0.025: 20.48, 0.01: 23.21, 0.005: 25.19 }, |
|
11: { 0.995: 2.60, 0.99: 3.05, 0.975: 3.82, 0.95: 4.57, 0.9: 5.58, 0.5: 10.34, 0.1: 17.28, 0.05: 19.68, 0.025: 21.92, 0.01: 24.72, 0.005: 26.76 }, |
|
12: { 0.995: 3.07, 0.99: 3.57, 0.975: 4.40, 0.95: 5.23, 0.9: 6.30, 0.5: 11.34, 0.1: 18.55, 0.05: 21.03, 0.025: 23.34, 0.01: 26.22, 0.005: 28.30 }, |
|
13: { 0.995: 3.57, 0.99: 4.11, 0.975: 5.01, 0.95: 5.89, 0.9: 7.04, 0.5: 12.34, 0.1: 19.81, 0.05: 22.36, 0.025: 24.74, 0.01: 27.69, 0.005: 29.82 }, |
|
14: { 0.995: 4.07, 0.99: 4.66, 0.975: 5.63, 0.95: 6.57, 0.9: 7.79, 0.5: 13.34, 0.1: 21.06, 0.05: 23.68, 0.025: 26.12, 0.01: 29.14, 0.005: 31.32 }, |
|
15: { 0.995: 4.60, 0.99: 5.23, 0.975: 6.27, 0.95: 7.26, 0.9: 8.55, 0.5: 14.34, 0.1: 22.31, 0.05: 25.00, 0.025: 27.49, 0.01: 30.58, 0.005: 32.80 }, |
|
16: { 0.995: 5.14, 0.99: 5.81, 0.975: 6.91, 0.95: 7.96, 0.9: 9.31, 0.5: 15.34, 0.1: 23.54, 0.05: 26.30, 0.025: 28.85, 0.01: 32.00, 0.005: 34.27 }, |
|
17: { 0.995: 5.70, 0.99: 6.41, 0.975: 7.56, 0.95: 8.67, 0.9: 10.09, 0.5: 16.34, 0.1: 24.77, 0.05: 27.59, 0.025: 30.19, 0.01: 33.41, 0.005: 35.72 }, |
|
18: { 0.995: 6.26, 0.99: 7.01, 0.975: 8.23, 0.95: 9.39, 0.9: 10.87, 0.5: 17.34, 0.1: 25.99, 0.05: 28.87, 0.025: 31.53, 0.01: 34.81, 0.005: 37.16 }, |
|
19: { 0.995: 6.84, 0.99: 7.63, 0.975: 8.91, 0.95: 10.12, 0.9: 11.65, 0.5: 18.34, 0.1: 27.20, 0.05: 30.14, 0.025: 32.85, 0.01: 36.19, 0.005: 38.58 }, |
|
20: { 0.995: 7.43, 0.99: 8.26, 0.975: 9.59, 0.95: 10.85, 0.9: 12.44, 0.5: 19.34, 0.1: 28.41, 0.05: 31.41, 0.025: 34.17, 0.01: 37.57, 0.005: 40.00 }, |
|
21: { 0.995: 8.03, 0.99: 8.90, 0.975: 10.28, 0.95: 11.59, 0.9: 13.24, 0.5: 20.34, 0.1: 29.62, 0.05: 32.67, 0.025: 35.48, 0.01: 38.93, 0.005: 41.40 }, |
|
22: { 0.995: 8.64, 0.99: 9.54, 0.975: 10.98, 0.95: 12.34, 0.9: 14.04, 0.5: 21.34, 0.1: 30.81, 0.05: 33.92, 0.025: 36.78, 0.01: 40.29, 0.005: 42.80 }, |
|
23: { 0.995: 9.26, 0.99: 10.20, 0.975: 11.69, 0.95: 13.09, 0.9: 14.85, 0.5: 22.34, 0.1: 32.01, 0.05: 35.17, 0.025: 38.08, 0.01: 41.64, 0.005: 44.18 }, |
|
24: { 0.995: 9.89, 0.99: 10.86, 0.975: 12.40, 0.95: 13.85, 0.9: 15.66, 0.5: 23.34, 0.1: 33.20, 0.05: 36.42, 0.025: 39.36, 0.01: 42.98, 0.005: 45.56 }, |
|
25: { 0.995: 10.52, 0.99: 11.52, 0.975: 13.12, 0.95: 14.61, 0.9: 16.47, 0.5: 24.34, 0.1: 34.28, 0.05: 37.65, 0.025: 40.65, 0.01: 44.31, 0.005: 46.93 }, |
|
26: { 0.995: 11.16, 0.99: 12.20, 0.975: 13.84, 0.95: 15.38, 0.9: 17.29, 0.5: 25.34, 0.1: 35.56, 0.05: 38.89, 0.025: 41.92, 0.01: 45.64, 0.005: 48.29 }, |
|
27: { 0.995: 11.81, 0.99: 12.88, 0.975: 14.57, 0.95: 16.15, 0.9: 18.11, 0.5: 26.34, 0.1: 36.74, 0.05: 40.11, 0.025: 43.19, 0.01: 46.96, 0.005: 49.65 }, |
|
28: { 0.995: 12.46, 0.99: 13.57, 0.975: 15.31, 0.95: 16.93, 0.9: 18.94, 0.5: 27.34, 0.1: 37.92, 0.05: 41.34, 0.025: 44.46, 0.01: 48.28, 0.005: 50.99 }, |
|
29: { 0.995: 13.12, 0.99: 14.26, 0.975: 16.05, 0.95: 17.71, 0.9: 19.77, 0.5: 28.34, 0.1: 39.09, 0.05: 42.56, 0.025: 45.72, 0.01: 49.59, 0.005: 52.34 }, |
|
30: { 0.995: 13.79, 0.99: 14.95, 0.975: 16.79, 0.95: 18.49, 0.9: 20.60, 0.5: 29.34, 0.1: 40.26, 0.05: 43.77, 0.025: 46.98, 0.01: 50.89, 0.005: 53.67 }, |
|
40: { 0.995: 20.71, 0.99: 22.16, 0.975: 24.43, 0.95: 26.51, 0.9: 29.05, 0.5: 39.34, 0.1: 51.81, 0.05: 55.76, 0.025: 59.34, 0.01: 63.69, 0.005: 66.77 }, |
|
50: { 0.995: 27.99, 0.99: 29.71, 0.975: 32.36, 0.95: 34.76, 0.9: 37.69, 0.5: 49.33, 0.1: 63.17, 0.05: 67.50, 0.025: 71.42, 0.01: 76.15, 0.005: 79.49 }, |
|
60: { 0.995: 35.53, 0.99: 37.48, 0.975: 40.48, 0.95: 43.19, 0.9: 46.46, 0.5: 59.33, 0.1: 74.40, 0.05: 79.08, 0.025: 83.30, 0.01: 88.38, 0.005: 91.95 }, |
|
70: { 0.995: 43.28, 0.99: 45.44, 0.975: 48.76, 0.95: 51.74, 0.9: 55.33, 0.5: 69.33, 0.1: 85.53, 0.05: 90.53, 0.025: 95.02, 0.01: 100.42, 0.005: 104.22 }, |
|
80: { 0.995: 51.17, 0.99: 53.54, 0.975: 57.15, 0.95: 60.39, 0.9: 64.28, 0.5: 79.33, 0.1: 96.58, 0.05: 101.88, 0.025: 106.63, 0.01: 112.33, 0.005: 116.32 }, |
|
90: { 0.995: 59.20, 0.99: 61.75, 0.975: 65.65, 0.95: 69.13, 0.9: 73.29, 0.5: 89.33, 0.1: 107.57, 0.05: 113.14, 0.025: 118.14, 0.01: 124.12, 0.005: 128.30 }, |
|
100: { 0.995: 67.33, 0.99: 70.06, 0.975: 74.22, 0.95: 77.93, 0.9: 82.36, 0.5: 99.33, 0.1: 118.50, 0.05: 124.34, 0.025: 129.56, 0.01: 135.81, 0.005: 140.17 } |
|
}; |
|
|
|
module.exports = chiSquaredDistributionTable; |
|
|
|
},{}],6:[function(require,module,exports){ |
|
'use strict'; |
|
|
|
var mean = require('./mean'); |
|
var chiSquaredDistributionTable = require('./chi_squared_distribution_table'); |
|
|
|
/** |
|
* The [χ2 (Chi-Squared) Goodness-of-Fit Test](http://en.wikipedia.org/wiki/Goodness_of_fit#Pearson.27s_chi-squared_test) |
|
* uses a measure of goodness of fit which is the sum of differences between observed and expected outcome frequencies |
|
* (that is, counts of observations), each squared and divided by the number of observations expected given the |
|
* hypothesized distribution. The resulting χ2 statistic, `chiSquared`, can be compared to the chi-squared distribution |
|
* to determine the goodness of fit. In order to determine the degrees of freedom of the chi-squared distribution, one |
|
* takes the total number of observed frequencies and subtracts the number of estimated parameters. The test statistic |
|
* follows, approximately, a chi-square distribution with (k − c) degrees of freedom where `k` is the number of non-empty |
|
* cells and `c` is the number of estimated parameters for the distribution. |
|
* |
|
* @param {Array<number>} data |
|
* @param {Function} distributionType a function that returns a point in a distribution: |
|
* for instance, binomial, bernoulli, or poisson |
|
* @param {number} significance |
|
* @returns {number} chi squared goodness of fit |
|
* @example |
|
* // Data from Poisson goodness-of-fit example 10-19 in William W. Hines & Douglas C. Montgomery, |
|
* // "Probability and Statistics in Engineering and Management Science", Wiley (1980). |
|
* var data1019 = [ |
|
* 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
* 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
* 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
|
* 2, 2, 2, 2, 2, 2, 2, 2, 2, |
|
* 3, 3, 3, 3 |
|
* ]; |
|
* ss.chiSquaredGoodnessOfFit(data1019, ss.poissonDistribution, 0.05)); //= false |
|
*/ |
|
function chiSquaredGoodnessOfFit(data, distributionType, significance) { |
|
// Estimate from the sample data, a weighted mean. |
|
var inputMean = mean(data), |
|
// Calculated value of the χ2 statistic. |
|
chiSquared = 0, |
|
// Degrees of freedom, calculated as (number of class intervals - |
|
// number of hypothesized distribution parameters estimated - 1) |
|
degreesOfFreedom, |
|
// Number of hypothesized distribution parameters estimated, expected to be supplied in the distribution test. |
|
// Lose one degree of freedom for estimating `lambda` from the sample data. |
|
c = 1, |
|
// The hypothesized distribution. |
|
// Generate the hypothesized distribution. |
|
hypothesizedDistribution = distributionType(inputMean), |
|
observedFrequencies = [], |
|
expectedFrequencies = [], |
|
k; |
|
|
|
// Create an array holding a histogram from the sample data, of |
|
// the form `{ value: numberOfOcurrences }` |
|
for (var i = 0; i < data.length; i++) { |
|
if (observedFrequencies[data[i]] === undefined) { |
|
observedFrequencies[data[i]] = 0; |
|
} |
|
observedFrequencies[data[i]]++; |
|
} |
|
|
|
// The histogram we created might be sparse - there might be gaps |
|
// between values. So we iterate through the histogram, making |
|
// sure that instead of undefined, gaps have 0 values. |
|
for (i = 0; i < observedFrequencies.length; i++) { |
|
if (observedFrequencies[i] === undefined) { |
|
observedFrequencies[i] = 0; |
|
} |
|
} |
|
|
|
// Create an array holding a histogram of expected data given the |
|
// sample size and hypothesized distribution. |
|
for (k in hypothesizedDistribution) { |
|
if (k in observedFrequencies) { |
|
expectedFrequencies[k] = hypothesizedDistribution[k] * data.length; |
|
} |
|
} |
|
|
|
// Working backward through the expected frequencies, collapse classes |
|
// if less than three observations are expected for a class. |
|
// This transformation is applied to the observed frequencies as well. |
|
for (k = expectedFrequencies.length - 1; k >= 0; k--) { |
|
if (expectedFrequencies[k] < 3) { |
|
expectedFrequencies[k - 1] += expectedFrequencies[k]; |
|
expectedFrequencies.pop(); |
|
|
|
observedFrequencies[k - 1] += observedFrequencies[k]; |
|
observedFrequencies.pop(); |
|
} |
|
} |
|
|
|
// Iterate through the squared differences between observed & expected |
|
// frequencies, accumulating the `chiSquared` statistic. |
|
for (k = 0; k < observedFrequencies.length; k++) { |
|
chiSquared += Math.pow( |
|
observedFrequencies[k] - expectedFrequencies[k], 2) / |
|
expectedFrequencies[k]; |
|
} |
|
|
|
// Calculate degrees of freedom for this test and look it up in the |
|
// `chiSquaredDistributionTable` in order to |
|
// accept or reject the goodness-of-fit of the hypothesized distribution. |
|
degreesOfFreedom = observedFrequencies.length - c - 1; |
|
return chiSquaredDistributionTable[degreesOfFreedom][significance] < chiSquared; |
|
} |
|
|
|
module.exports = chiSquaredGoodnessOfFit; |
|
|
|
},{"./chi_squared_distribution_table":5,"./mean":21}],7:[function(require,module,exports){ |
|
'use strict'; |
|
|
|
/** |
|
* Split an array into chunks of a specified size. This function |
|
* has the same behavior as [PHP's array_chunk](http://php.net/manual/en/function.array-chunk.php) |
|
* function, and thus will insert smaller-sized chunks at the end if |
|
* the input size is not divisible by the chunk size. |
|
* |
|
* `sample` is expected to be an array, and `chunkSize` a number. |
|
* The `sample` array can contain any kind of data. |
|
* |
|
* @param {Array} sample any array of values |
|
* @param {number} chunkSize size of each output array |
|
* @returns {Array<Array>} a chunked array |
|
* @example |
|
* console.log(chunk([1, 2, 3, 4], 2)); // [[1, 2], [3, 4]] |
|
*/ |
|
function chunk(sample, chunkSize) { |
|
|
|
// a list of result chunks, as arrays in an array |
|
var output = []; |
|
|
|
// `chunkSize` must be zero or higher - otherwise the loop below, |
|
// in which we call `start += chunkSize`, will loop infinitely. |
|
// So, we'll detect and return null in that case to indicate |
|
// invalid input. |
|
if (chunkSize <= 0) { |
|
return null; |
|
} |
|
|
|
// `start` is the index at which `.slice` will start selecting |
|
// new array elements |
|
for (var start = 0; start < sample.length; start += chunkSize) { |
|
|
|
// for each chunk, slice that part of the array and add it |
|
// to the output. The `.slice` function does not change |
|
// the original array. |
|
output.push(sample.slice(start, start + chunkSize)); |
|
} |
|
return output; |
|
} |
|
|
|
module.exports = chunk; |
|
|
|
},{}],8:[function(require,module,exports){ |
|
'use strict'; |
|
|
|
var sortedUniqueCount = require('./sorted_unique_count'), |
|
numericSort = require('./numeric_sort'); |
|
|
|
/** |
|
* Create a new column x row matrix. |
|
* |
|
* @private |
|
* @param {number} columns |
|
* @param {number} rows |
|
* @return {Array<Array<number>>} matrix |
|
* @example |
|
* makeMatrix(10, 10); |
|
*/ |
|
function makeMatrix(columns, rows) { |
|
var matrix = []; |
|
for (var i = 0; i < columns; i++) { |
|
var column = []; |
|
for (var j = 0; j < rows; j++) { |
|
column.push(0); |
|
} |
|
matrix.push(column); |
|
} |
|
return matrix; |
|
} |
|
|
|
/** |
|
* Ckmeans clustering is an improvement on heuristic-based clustering |
|
* approaches like Jenks. The algorithm was developed in |
|
* [Haizhou Wang and Mingzhou Song](http://journal.r-project.org/archive/2011-2/RJournal_2011-2_Wang+Song.pdf) |
|
* as a [dynamic programming](https://en.wikipedia.org/wiki/Dynamic_programming) approach |
|
* to the problem of clustering numeric data into groups with the least |
|
* within-group sum-of-squared-deviations. |
|
* |
|
* Minimizing the difference within groups - what Wang & Song refer to as |
|
* `withinss`, or within sum-of-squares, means that groups are optimally |
|
* homogenous within and the data is split into representative groups. |
|
* This is very useful for visualization, where you may want to represent |
|
* a continuous variable in discrete color or style groups. This function |
|
* can provide groups that emphasize differences between data. |
|
* |
|
* Being a dynamic approach, this algorithm is based on two matrices that |
|
* store incrementally-computed values for squared deviations and backtracking |
|
* indexes. |
|
* |
|
* Unlike the [original implementation](https://cran.r-project.org/web/packages/Ckmeans.1d.dp/index.html), |
|
* this implementation does not include any code to automatically determine |
|
* the optimal number of clusters: this information needs to be explicitly |
|
* provided. |
|
* |
|
* ### References |
|
* _Ckmeans.1d.dp: Optimal k-means Clustering in One Dimension by Dynamic |
|
* Programming_ Haizhou Wang and Mingzhou Song ISSN 2073-4859 |
|
* |
|
* from The R Journal Vol. 3/2, December 2011 |
|
* @param {Array<number>} data input data, as an array of number values |
|
* @param {number} nClusters number of desired classes. This cannot be |
|
* greater than the number of values in the data array. |
|
* @returns {Array<Array<number>>} clustered input |
|
* @example |
|
* ckmeans([-1, 2, -1, 2, 4, 5, 6, -1, 2, -1], 3); |
|
* // The input, clustered into groups of similar numbers. |
|
* //= [[-1, -1, -1, -1], [2, 2, 2], [4, 5, 6]]); |
|
*/ |
|
function ckmeans(data, nClusters) { |
|
|
|
if (nClusters > data.length) { |
|
throw new Error('Cannot generate more classes than there are data values'); |
|
} |
|
|
|
var sorted = numericSort(data), |
|
// we'll use this as the maximum number of clusters |
|
uniqueCount = sortedUniqueCount(sorted); |
|
|
|
// if all of the input values are identical, there's one cluster |
|
// with all of the input in it. |
|
if (uniqueCount === 1) { |
|
return [sorted]; |
|
} |
|
|
|
// named 'D' originally |
|
var matrix = makeMatrix(nClusters, sorted.length), |
|
// named 'B' originally |
|
backtrackMatrix = makeMatrix(nClusters, sorted.length); |
|
|
|
// This is a dynamic programming way to solve the problem of minimizing |
|
// within-cluster sum of squares. It's similar to linear regression |
|
// in this way, and this calculation incrementally computes the |
|
// sum of squares that are later read. |
|
|
|
// The outer loop iterates through clusters, from 0 to nClusters. |
|
for (var cluster = 0; cluster < nClusters; cluster++) { |
|
|
|
// At the start of each loop, the mean starts as the first element |
|
var firstClusterMean = sorted[0]; |
|
|
|
for (var sortedIdx = Math.max(cluster, 1); |
|
sortedIdx < sorted.length; |
|
sortedIdx++) { |
|
|
|
if (cluster === 0) { |
|
|
|
// Increase the running sum of squares calculation by this |
|
// new value |
|
var squaredDifference = Math.pow( |
|
sorted[sortedIdx] - firstClusterMean, 2); |
|
matrix[cluster][sortedIdx] = matrix[cluster][sortedIdx - 1] + |
|
((sortedIdx - 1) / sortedIdx) * squaredDifference; |
|
|
|
// We're computing a running mean by taking the previous |
|
// mean value, multiplying it by the number of elements |
|
// seen so far, and then dividing it by the number of |
|
// elements total. |
|
var newSum = sortedIdx * firstClusterMean + sorted[sortedIdx]; |
|
firstClusterMean = newSum / sortedIdx; |
|
|
|
} else { |
|
|
|
var sumSquaredDistances = 0, |
|
meanXJ = 0; |
|
|
|
for (var j = sortedIdx; j >= cluster; j--) { |
|
|
|
sumSquaredDistances += (sortedIdx - j) / |
|
(sortedIdx - j + 1) * |
|
Math.pow(sorted[j] - meanXJ, 2); |
|
|
|
meanXJ = (sorted[j] + ((sortedIdx - j) * meanXJ)) / |
|
(sortedIdx - j + 1); |
|
|
|
if (j === sortedIdx) { |
|
matrix[cluster][sortedIdx] = sumSquaredDistances; |
|
backtrackMatrix[cluster][sortedIdx] = j; |
|
if (j > 0) { |
|
matrix[cluster][sortedIdx] += matrix[cluster - 1][j - 1]; |
|
} |
|
} else { |
|
if (j === 0) { |
|
if (sumSquaredDistances <= matrix[cluster][sortedIdx]) { |
|
matrix[cluster][sortedIdx] = sumSquaredDistances; |
|
backtrackMatrix[cluster][sortedIdx] = j; |
|
} |
|
} else if (sumSquaredDistances + matrix[cluster - 1][j - 1] < matrix[cluster][sortedIdx]) { |
|
matrix[cluster][sortedIdx] = sumSquaredDistances + matrix[cluster - 1][j - 1]; |
|
backtrackMatrix[cluster][sortedIdx] = j; |
|
} |
|
} |
|
} |
|
} |
|
} |
|
} |
|
|
|
// The real work of Ckmeans clustering happens in the matrix generation: |
|
// the generated matrices encode all possible clustering combinations, and |
|
// once they're generated we can solve for the best clustering groups |
|
// very quickly. |
|
var clusters = [], |
|
clusterRight = backtrackMatrix[0].length - 1; |
|
|
|
// Backtrack the clusters from the dynamic programming matrix. This |
|
// starts at the bottom-right corner of the matrix (if the top-left is 0, 0), |
|
// and moves the cluster target with the loop. |
|
for (cluster = backtrackMatrix.length - 1; cluster >= 0; cluster--) { |
|
|
|
var clusterLeft = backtrackMatrix[cluster][clusterRight]; |
|
|
|
// fill the cluster from the sorted input by taking a slice of the |
|
// array. the backtrack matrix makes this easy - it stores the |
|
// indexes where the cluster should start and end. |
|
clusters[cluster] = sorted.slice(clusterLeft, clusterRight + 1); |
|
|
|
if (cluster > 0) { |
|
clusterRight = clusterLeft - 1; |
|
} |
|
} |
|
|
|
return clusters; |
|
} |
|
|
|
module.exports = ckmeans; |
|
|
|
},{"./numeric_sort":26,"./sorted_unique_count":42}],9:[function(require,module,exports){ |
|
'use strict'; |
|
|
|
var standardNormalTable = require('./standard_normal_table'); |
|
|
|
/** |
|
* **[Cumulative Standard Normal Probability](http://en.wikipedia.org/wiki/Standard_normal_table)** |
|
* |
|
* Since probability tables cannot be |
|
* printed for every normal distribution, as there are an infinite variety |
|
* of normal distributions, it is common practice to convert a normal to a |
|
* standard normal and then use the standard normal table to find probabilities. |
|
* |
|
* You can use `.5 + .5 * errorFunction(x / Math.sqrt(2))` to calculate the probability |
|
* instead of looking it up in a table. |
|
* |
|
* @param {number} z |
|
* @returns {number} cumulative standard normal probability |
|
*/ |
|
function cumulativeStdNormalProbability(z) { |
|
|
|
// Calculate the position of this value. |
|
var absZ = Math.abs(z), |
|
// Each row begins with a different |
|
// significant digit: 0.5, 0.6, 0.7, and so on. Each value in the table |
|
// corresponds to a range of 0.01 in the input values, so the value is |
|
// multiplied by 100. |
|
index = Math.min(Math.round(absZ * 100), standardNormalTable.length - 1); |
|
|
|
// The index we calculate must be in the table as a positive value, |
|
// but we still pay attention to whether the input is positive |
|
// or negative, and flip the output value as a last step. |
|
if (z >= 0) { |
|
return standardNormalTable[index]; |
|
} else { |
|
// due to floating-point arithmetic, values in the table with |
|
// 4 significant figures can nevertheless end up as repeating |
|
// fractions when they're computed here. |
|
return +(1 - standardNormalTable[index]).toFixed(4); |
|
} |
|
} |
|
|
|
module.exports = cumulativeStdNormalProbability; |
|
|
|
},{"./standard_normal_table":44}],10:[function(require,module,exports){ |
|
'use strict'; |
|
|
|
/** |
|
* We use `ε`, epsilon, as a stopping criterion when we want to iterate |
|
* until we're "close enough". |
|
* |
|
* This is used in calculations like the binomialDistribution, in which |
|
* the process of finding a value is [iterative](https://en.wikipedia.org/wiki/Iterative_method): |
|
* it progresses until it is close enough. |
|
*/ |
|
var epsilon = 0.0001; |
|
|
|
module.exports = epsilon; |
|
|
|
},{}],11:[function(require,module,exports){ |
|
'use strict'; |
|
|
|
/** |
|
* **[Gaussian error function](http://en.wikipedia.org/wiki/Error_function)** |
|
* |
|
* The `errorFunction(x/(sd * Math.sqrt(2)))` is the probability that a value in a |
|
* normal distribution with standard deviation sd is within x of the mean. |
|
* |
|
* This function returns a numerical approximation to the exact value. |
|
* |
|
* @param {number} x input |
|
* @return {number} error estimation |
|
* @example |
|
* errorFunction(1); //= 0.8427 |
|
*/ |
|
function errorFunction(x) { |
|
var t = 1 / (1 + 0.5 * Math.abs(x)); |
|
var tau = t * Math.exp(-Math.pow(x, 2) - |
|
1.26551223 + |
|
1.00002368 * t + |
|
0.37409196 * Math.pow(t, 2) + |
|
0.09678418 * Math.pow(t, 3) - |
|
0.18628806 * Math.pow(t, 4) + |
|
0.27886807 * Math.pow(t, 5) - |
|
1.13520398 * Math.pow(t, 6) + |
|
1.48851587 * Math.pow(t, 7) - |
|
0.82215223 * Math.pow(t, 8) + |
|
0.17087277 * Math.pow(t, 9)); |
|
if (x >= 0) { |
|
return 1 - tau; |
|
} else { |
|
return tau - 1; |
|
} |
|
} |
|
|
|
module.exports = errorFunction; |
|
|
|
},{}],12:[function(require,module,exports){ |
|
'use strict'; |
|
|
|
/** |
|
* A [Factorial](https://en.wikipedia.org/wiki/Factorial), usually written n!, is the product of all positive |
|
* integers less than or equal to n. Often factorial is implemented |
|
* recursively, but this iterative approach is significantly faster |
|
* and simpler. |
|
* |
|
* @param {number} n input |
|
* @returns {number} factorial: n! |
|
* @example |
|
* console.log(factorial(5)); // 120 |
|
*/ |
|
function factorial(n) { |
|
|
|
// factorial is mathematically undefined for negative numbers |
|
if (n < 0 ) { return null; } |
|
|
|
// typically you'll expand the factorial function going down, like |
|
// 5! = 5 * 4 * 3 * 2 * 1. This is going in the opposite direction, |
|
// counting from 2 up to the number in question, and since anything |
|
// multiplied by 1 is itself, the loop only needs to start at 2. |
|
var accumulator = 1; |
|
for (var i = 2; i <= n; i++) { |
|
// for each number up to and including the number `n`, multiply |
|
// the accumulator my that number. |
|
accumulator *= i; |
|
} |
|
return accumulator; |
|
} |
|
|
|
module.exports = factorial; |
|
|
|
},{}],13:[function(require,module,exports){ |
|
'use strict'; |
|
|
|
/** |
|
* The [Geometric Mean](https://en.wikipedia.org/wiki/Geometric_mean) is |
|
* a mean function that is more useful for numbers in different |
|
* ranges. |
|
* |
|
* This is the nth root of the input numbers multiplied by each other. |
|
* |
|
* The geometric mean is often useful for |
|
* **[proportional growth](https://en.wikipedia.org/wiki/Geometric_mean#Proportional_growth)**: given |
|
* growth rates for multiple years, like _80%, 16.66% and 42.85%_, a simple |
|
* mean will incorrectly estimate an average growth rate, whereas a geometric |
|
* mean will correctly estimate a growth rate that, over those years, |
|
* will yield the same end value. |
|
* |
|
* This runs on `O(n)`, linear time in respect to the array |
|
* |
|
* @param {Array<number>} x input array |
|
* @returns {number} geometric mean |
|
* @example |
|
* var growthRates = [1.80, 1.166666, 1.428571]; |
|
* var averageGrowth = geometricMean(growthRates); |
|
* var averageGrowthRates = [averageGrowth, averageGrowth, averageGrowth]; |
|
* var startingValue = 10; |
|
* var startingValueMean = 10; |
|
* growthRates.forEach(function(rate) { |
|
* startingValue *= rate; |
|
* }); |
|
* averageGrowthRates.forEach(function(rate) { |
|
* startingValueMean *= rate; |
|
* }); |
|
* startingValueMean === startingValue; |
|
*/ |
|
function geometricMean(x) { |
|
// The mean of no numbers is null |
|
if (x.length === 0) { return null; } |
|
|
|
// the starting value. |
|
var value = 1; |
|
|
|
for (var i = 0; i < x.length; i++) { |
|
// the geometric mean is only valid for positive numbers |
|
if (x[i] <= 0) { return null; } |
|
|
|
// repeatedly multiply the value by each number |
|
value *= x[i]; |
|
} |
|
|
|
return Math.pow(value, 1 / x.length); |
|
} |
|
|
|
module.exports = geometricMean; |
|
|
|
},{}],14:[function(require,module,exports){ |
|
'use strict'; |
|
|
|
/** |
|
* The [Harmonic Mean](https://en.wikipedia.org/wiki/Harmonic_mean) is |
|
* a mean function typically used to find the average of rates. |
|
* This mean is calculated by taking the reciprocal of the arithmetic mean |
|
* of the reciprocals of the input numbers. |
|
* |
|
* This is a [measure of central tendency](https://en.wikipedia.org/wiki/Central_tendency): |
|
* a method of finding a typical or central value of a set of numbers. |
|
* |
|
* This runs on `O(n)`, linear time in respect to the array. |
|
* |
|
* @param {Array<number>} x input |
|
* @returns {number} harmonic mean |
|
* @example |
|
* ss.harmonicMean([2, 3]) //= 2.4 |
|
*/ |
|
function harmonicMean(x) { |
|
// The mean of no numbers is null |
|
if (x.length === 0) { return null; } |
|
|
|
var reciprocalSum = 0; |
|
|
|
for (var i = 0; i < x.length; i++) { |
|
// the harmonic mean is only valid for positive numbers |
|
if (x[i] <= 0) { return null; } |
|
|
|
reciprocalSum += 1 / x[i]; |
|
} |
|
|
|
// divide n by the the reciprocal sum |
|
return x.length / reciprocalSum; |
|
} |
|
|
|
module.exports = harmonicMean; |
|
|
|
},{}],15:[function(require,module,exports){ |
|
'use strict'; |
|
|
|
var quantile = require('./quantile'); |
|
|
|
/** |
|
* The [Interquartile range](http://en.wikipedia.org/wiki/Interquartile_range) is |
|
* a measure of statistical dispersion, or how scattered, spread, or |
|
* concentrated a distribution is. It's computed as the difference between |
|
* the third quartile and first quartile. |
|
* |
|
* @param {Array<number>} sample |
|
* @returns {number} interquartile range: the span between lower and upper quartile, |
|
* 0.25 and 0.75 |
|
* @example |
|
* interquartileRange([0, 1, 2, 3]); //= 2 |
|
*/ |
|
function interquartileRange(sample) { |
|
// We can't derive quantiles from an empty list |
|
if (sample.length === 0) { return null; } |
|
|
|
// Interquartile range is the span between the upper quartile, |
|
// at `0.75`, and lower quartile, `0.25` |
|
return quantile(sample, 0.75) - quantile(sample, 0.25); |
|
} |
|
|
|
module.exports = interquartileRange; |
|
|
|
},{"./quantile":30}],16:[function(require,module,exports){ |
|
'use strict'; |
|
|
|
/** |
|
* The Inverse [Gaussian error function](http://en.wikipedia.org/wiki/Error_function) |
|
* returns a numerical approximation to the value that would have caused |
|
* `errorFunction()` to return x. |
|
* |
|
* @param {number} x value of error function |
|
* @returns {number} estimated inverted value |
|
*/ |
|
function inverseErrorFunction(x) { |
|
var a = (8 * (Math.PI - 3)) / (3 * Math.PI * (4 - Math.PI)); |
|
|
|
var inv = Math.sqrt(Math.sqrt( |
|
Math.pow(2 / (Math.PI * a) + Math.log(1 - x * x) / 2, 2) - |
|
Math.log(1 - x * x) / a) - |
|
(2 / (Math.PI * a) + Math.log(1 - x * x) / 2)); |
|
|
|
if (x >= 0) { |
|
return inv; |
|
} else { |
|
return -inv; |
|
} |
|
} |
|
|
|
module.exports = inverseErrorFunction; |
|
|
|
},{}],17:[function(require,module,exports){ |
|
'use strict'; |
|
|
|
/** |
|
* [Simple linear regression](http://en.wikipedia.org/wiki/Simple_linear_regression) |
|
* is a simple way to find a fitted line |
|
* between a set of coordinates. This algorithm finds the slope and y-intercept of a regression line |
|
* using the least sum of squares. |
|
* |
|
* @param {Array<Array<number>>} data an array of two-element of arrays, |
|
* like `[[0, 1], [2, 3]]` |
|
* @returns {Object} object containing slope and intersect of regression line |
|
* @example |
|
* linearRegression([[0, 0], [1, 1]]); // { m: 1, b: 0 } |
|
*/ |
|
function linearRegression(data) { |
|
|
|
var m, b; |
|
|
|
// Store data length in a local variable to reduce |
|
// repeated object property lookups |
|
var dataLength = data.length; |
|
|
|
//if there's only one point, arbitrarily choose a slope of 0 |
|
//and a y-intercept of whatever the y of the initial point is |
|
if (dataLength === 1) { |
|
m = 0; |
|
b = data[0][1]; |
|
} else { |
|
// Initialize our sums and scope the `m` and `b` |
|
// variables that define the line. |
|
var sumX = 0, sumY = 0, |
|
sumXX = 0, sumXY = 0; |
|
|
|
// Use local variables to grab point values |
|
// with minimal object property lookups |
|
var point, x, y; |
|
|
|
// Gather the sum of all x values, the sum of all |
|
// y values, and the sum of x^2 and (x*y) for each |
|
// value. |
|
// |
|
// In math notation, these would be SS_x, SS_y, SS_xx, and SS_xy |
|
for (var i = 0; i < dataLength; i++) { |
|
point = data[i]; |
|
x = point[0]; |
|
y = point[1]; |
|
|
|
sumX += x; |
|
sumY += y; |
|
|
|
sumXX += x * x; |
|
sumXY += x * y; |
|
} |
|
|
|
// `m` is the slope of the regression line |
|
m = ((dataLength * sumXY) - (sumX * sumY)) / |
|
((dataLength * sumXX) - (sumX * sumX)); |
|
|
|
// `b` is the y-intercept of the line. |
|
b = (sumY / dataLength) - ((m * sumX) / dataLength); |
|
} |
|
|
|
// Return both values as an object. |
|
return { |
|
m: m, |
|
b: b |
|
}; |
|
} |
|
|
|
|
|
module.exports = linearRegression; |
|
|
|
},{}],18:[function(require,module,exports){ |
|
'use strict'; |
|
|
|
/** |
|
* Given the output of `linearRegression`: an object |
|
* with `m` and `b` values indicating slope and intercept, |
|
* respectively, generate a line function that translates |
|
* x values into y values. |
|
* |
|
* @param {Object} mb object with `m` and `b` members, representing |
|
* slope and intersect of desired line |
|
* @returns {Function} method that computes y-value at any given |
|
* x-value on the line. |
|
* @example |
|
* var l = linearRegressionLine(linearRegression([[0, 0], [1, 1]])); |
|
* l(0) //= 0 |
|
* l(2) //= 2 |
|
*/ |
|
function linearRegressionLine(mb) { |
|
// Return a function that computes a `y` value for each |
|
// x value it is given, based on the values of `b` and `a` |
|
// that we just computed. |
|
return function(x) { |
|
return mb.b + (mb.m * x); |
|
}; |
|
} |
|
|
|
module.exports = linearRegressionLine; |
|
|
|
},{}],19:[function(require,module,exports){ |
|
'use strict'; |
|
|
|
var median = require('./median'); |
|
|
|
/** |
|
* The [Median Absolute Deviation](http://en.wikipedia.org/wiki/Median_absolute_deviation) is |
|
* a robust measure of statistical |
|
* dispersion. It is more resilient to outliers than the standard deviation. |
|
* |
|
* @param {Array<number>} x input array |
|
* @returns {number} median absolute deviation |
|
* @example |
|
* mad([1, 1, 2, 2, 4, 6, 9]); //= 1 |
|
*/ |
|
function mad(x) { |
|
// The mad of nothing is null |
|
if (!x || x.length === 0) { return null; } |
|
|
|
var medianValue = median(x), |
|
medianAbsoluteDeviations = []; |
|
|
|
// Make a list of absolute deviations from the median |
|
for (var i = 0; i < x.length; i++) { |
|
medianAbsoluteDeviations.push(Math.abs(x[i] - medianValue)); |
|
} |
|
|
|
// Find the median value of that list |
|
return median(medianAbsoluteDeviations); |
|
} |
|
|
|
module.exports = mad; |
|
|
|
},{"./median":22}],20:[function(require,module,exports){ |
|
'use strict'; |
|
|
|
/** |
|
* This computes the maximum number in an array. |
|
* |
|
* This runs on `O(n)`, linear time in respect to the array |
|
* |
|
* @param {Array<number>} x input |
|
* @returns {number} maximum value |
|
* @example |
|
* console.log(max([1, 2, 3, 4])); // 4 |
|
*/ |
|
function max(x) { |
|
var value; |
|
for (var i = 0; i < x.length; i++) { |
|
// On the first iteration of this loop, max is |
|
// undefined and is thus made the maximum element in the array |
|
if (x[i] > value || value === undefined) { |
|
value = x[i]; |
|
} |
|
} |
|
return value; |
|
} |
|
|
|
module.exports = max; |
|
|
|
},{}],21:[function(require,module,exports){ |
|
'use strict'; |
|
|
|
var sum = require('./sum'); |
|
|
|
/** |
|
* The mean, _also known as average_, |
|
* is the sum of all values over the number of values. |
|
* This is a [measure of central tendency](https://en.wikipedia.org/wiki/Central_tendency): |
|
* a method of finding a typical or central value of a set of numbers. |
|
* |
|
* This runs on `O(n)`, linear time in respect to the array |
|
* |
|
* @param {Array<number>} x input values |
|
* @returns {number} mean |
|
* @example |
|
* console.log(mean([0, 10])); // 5 |
|
*/ |
|
function mean(x) { |
|
// The mean of no numbers is null |
|
if (x.length === 0) { return null; } |
|
|
|
return sum(x) / x.length; |
|
} |
|
|
|
module.exports = mean; |
|
|
|
},{"./sum":45}],22:[function(require,module,exports){ |
|
'use strict'; |
|
|
|
var numericSort = require('./numeric_sort'); |
|
|
|
/** |
|
* The [median](http://en.wikipedia.org/wiki/Median) is |
|
* the middle number of a list. This is often a good indicator of 'the middle' |
|
* when there are outliers that skew the `mean()` value. |
|
* This is a [measure of central tendency](https://en.wikipedia.org/wiki/Central_tendency): |
|
* a method of finding a typical or central value of a set of numbers. |
|
* |
|
* The median isn't necessarily one of the elements in the list: the value |
|
* can be the average of two elements if the list has an even length |
|
* and the two central values are different. |
|
* |
|
* @param {Array<number>} x input |
|
* @returns {number} median value |
|
* @example |
|
* var incomes = [10, 2, 5, 100, 2, 1]; |
|
* median(incomes); //= 3.5 |
|
*/ |
|
function median(x) { |
|
// The median of an empty list is null |
|
if (x.length === 0) { return null; } |
|
|
|
// Sorting the array makes it easy to find the center, but |
|
// use `.slice()` to ensure the original array `x` is not modified |
|
var sorted = numericSort(x); |
|
|
|
// If the length of the list is odd, it's the central number |
|
if (sorted.length % 2 === 1) { |
|
return sorted[(sorted.length - 1) / 2]; |
|
// Otherwise, the median is the average of the two numbers |
|
// at the center of the list |
|
} else { |
|
var a = sorted[(sorted.length / 2) - 1]; |
|
var b = sorted[(sorted.length / 2)]; |
|
return (a + b) / 2; |
|
} |
|
} |
|
|
|
module.exports = median; |
|
|
|
},{"./numeric_sort":26}],23:[function(require,module,exports){ |
|
'use strict'; |
|
|
|
/** |
|
* The min is the lowest number in the array. This runs on `O(n)`, linear time in respect to the array |
|
* |
|
* @param {Array<number>} x input |
|
* @returns {number} minimum value |
|
* @example |
|
* min([1, 5, -10, 100, 2]); // -100 |
|
*/ |
|
function min(x) { |
|
var value; |
|
for (var i = 0; i < x.length; i++) { |
|
// On the first iteration of this loop, min is |
|
// undefined and is thus made the minimum element in the array |
|
if (x[i] < value || value === undefined) { |
|
value = x[i]; |
|
} |
|
} |
|
return value; |
|
} |
|
|
|
module.exports = min; |
|
|
|
},{}],24:[function(require,module,exports){ |
|
'use strict'; |
|
|
|
/** |
|
* **Mixin** simple_statistics to a single Array instance if provided |
|
* or the Array native object if not. This is an optional |
|
* feature that lets you treat simple_statistics as a native feature |
|
* of Javascript. |
|
* |
|
* @param {Object} ss simple statistics |
|
* @param {Array} [array=] a single array instance which will be augmented |
|
* with the extra methods. If omitted, mixin will apply to all arrays |
|
* by changing the global `Array.prototype`. |
|
* @returns {*} the extended Array, or Array.prototype if no object |
|
* is given. |
|
* |
|
* @example |
|
* var myNumbers = [1, 2, 3]; |
|
* mixin(ss, myNumbers); |
|
* console.log(myNumbers.sum()); // 6 |
|
*/ |
|
function mixin(ss, array) { |
|
var support = !!(Object.defineProperty && Object.defineProperties); |
|
// Coverage testing will never test this error. |
|
/* istanbul ignore next */ |
|
if (!support) { |
|
throw new Error('without defineProperty, simple-statistics cannot be mixed in'); |
|
} |
|
|
|
// only methods which work on basic arrays in a single step |
|
// are supported |
|
var arrayMethods = ['median', 'standardDeviation', 'sum', |
|
'sampleSkewness', |
|
'mean', 'min', 'max', 'quantile', 'geometricMean', |
|
'harmonicMean', 'root_mean_square']; |
|
|
|
// create a closure with a method name so that a reference |
|
// like `arrayMethods[i]` doesn't follow the loop increment |
|
function wrap(method) { |
|
return function() { |
|
// cast any arguments into an array, since they're |
|
// natively objects |
|
var args = Array.prototype.slice.apply(arguments); |
|
// make the first argument the array itself |
|
args.unshift(this); |
|
// return the result of the ss method |
|
return ss[method].apply(ss, args); |
|
}; |
|
} |
|
|
|
// select object to extend |
|
var extending; |
|
if (array) { |
|
// create a shallow copy of the array so that our internal |
|
// operations do not change it by reference |
|
extending = array.slice(); |
|
} else { |
|
extending = Array.prototype; |
|
} |
|
|
|
// for each array function, define a function that gets |
|
// the array as the first argument. |
|
// We use [defineProperty](https://developer.mozilla.org/en-US/docs/JavaScript/Reference/Global_Objects/Object/defineProperty) |
|
// because it allows these properties to be non-enumerable: |
|
// `for (var in x)` loops will not run into problems with this |
|
// implementation. |
|
for (var i = 0; i < arrayMethods.length; i++) { |
|
Object.defineProperty(extending, arrayMethods[i], { |
|
value: wrap(arrayMethods[i]), |
|
configurable: true, |
|
enumerable: false, |
|
writable: true |
|
}); |
|
} |
|
|
|
return extending; |
|
} |
|
|
|
module.exports = mixin; |
|
|
|
},{}],25:[function(require,module,exports){ |
|
'use strict'; |
|
|
|
var numericSort = require('./numeric_sort'); |
|
|
|
/** |
|
* The [mode](http://bit.ly/W5K4Yt) is the number that appears in a list the highest number of times. |
|
* There can be multiple modes in a list: in the event of a tie, this |
|
* algorithm will return the most recently seen mode. |
|
* |
|
* This is a [measure of central tendency](https://en.wikipedia.org/wiki/Central_tendency): |
|
* a method of finding a typical or central value of a set of numbers. |
|
* |
|
* This runs on `O(n)`, linear time in respect to the array. |
|
* |
|
* @param {Array<number>} x input |
|
* @returns {number} mode |
|
* @example |
|
* mode([0, 0, 1]); //= 0 |
|
*/ |
|
function mode(x) { |
|
|
|
// Handle edge cases: |
|
// The median of an empty list is null |
|
if (x.length === 0) { return null; } |
|
else if (x.length === 1) { return x[0]; } |
|
|
|
// Sorting the array lets us iterate through it below and be sure |
|
// that every time we see a new number it's new and we'll never |
|
// see the same number twice |
|
var sorted = numericSort(x); |
|
|
|
// This assumes it is dealing with an array of size > 1, since size |
|
// 0 and 1 are handled immediately. Hence it starts at index 1 in the |
|
// array. |
|
var last = sorted[0], |
|
// store the mode as we find new modes |
|
value, |
|
// store how many times we've seen the mode |
|
maxSeen = 0, |
|
// how many times the current candidate for the mode |
|
// has been seen |
|
seenThis = 1; |
|
|
|
// end at sorted.length + 1 to fix the case in which the mode is |
|
// the highest number that occurs in the sequence. the last iteration |
|
// compares sorted[i], which is undefined, to the highest number |
|
// in the series |
|
for (var i = 1; i < sorted.length + 1; i++) { |
|
// we're seeing a new number pass by |
|
if (sorted[i] !== last) { |
|
// the last number is the new mode since we saw it more |
|
// often than the old one |
|
if (seenThis > maxSeen) { |
|
maxSeen = seenThis; |
|
value = last; |
|
} |
|
seenThis = 1; |
|
last = sorted[i]; |
|
// if this isn't a new number, it's one more occurrence of |
|
// the potential mode |
|
} else { seenThis++; } |
|
} |
|
return value; |
|
} |
|
|
|
module.exports = mode; |
|
|
|
},{"./numeric_sort":26}],26:[function(require,module,exports){ |
|
'use strict'; |
|
|
|
/** |
|
* Sort an array of numbers by their numeric value, ensuring that the |
|
* array is not changed in place. |
|
* |
|
* This is necessary because the default behavior of .sort |
|
* in JavaScript is to sort arrays as string values |
|
* |
|
* [1, 10, 12, 102, 20].sort() |
|
* // output |
|
* [1, 10, 102, 12, 20] |
|
* |
|
* @param {Array<number>} array input array |
|
* @return {Array<number>} sorted array |
|
* @example |
|
* numericSort([3, 2, 1]) // [1, 2, 3] |
|
*/ |
|
function numericSort(array) { |
|
return array |
|
// ensure the array is changed in-place |
|
.slice() |
|
// comparator function that treats input as numeric |
|
.sort(function(a, b) { |
|
return a - b; |
|
}); |
|
} |
|
|
|
module.exports = numericSort; |
|
|
|
},{}],27:[function(require,module,exports){ |
|
'use strict'; |
|
|
|
/** |
|
* This is a single-layer [Perceptron Classifier](http://en.wikipedia.org/wiki/Perceptron) that takes |
|
* arrays of numbers and predicts whether they should be classified |
|
* as either 0 or 1 (negative or positive examples). |
|
* @class |
|
* @example |
|
* // Create the model |
|
* var p = new PerceptronModel(); |
|
* // Train the model with input with a diagonal boundary. |
|
* for (var i = 0; i < 5; i++) { |
|
* p.train([1, 1], 1); |
|
* p.train([0, 1], 0); |
|
* p.train([1, 0], 0); |
|
* p.train([0, 0], 0); |
|
* } |
|
* p.predict([0, 0]); // 0 |
|
* p.predict([0, 1]); // 0 |
|
* p.predict([1, 0]); // 0 |
|
* p.predict([1, 1]); // 1 |
|
*/ |
|
function PerceptronModel() { |
|
// The weights, or coefficients of the model; |
|
// weights are only populated when training with data. |
|
this.weights = []; |
|
// The bias term, or intercept; it is also a weight but |
|
// it's stored separately for convenience as it is always |
|
// multiplied by one. |
|
this.bias = 0; |
|
} |
|
|
|
/** |
|
* **Predict**: Use an array of features with the weight array and bias |
|
* to predict whether an example is labeled 0 or 1. |
|
* |
|
* @param {Array<number>} features an array of features as numbers |
|
* @returns {number} 1 if the score is over 0, otherwise 0 |
|
*/ |
|
PerceptronModel.prototype.predict = function(features) { |
|
|
|
// Only predict if previously trained |
|
// on the same size feature array(s). |
|
if (features.length !== this.weights.length) { return null; } |
|
|
|
// Calculate the sum of features times weights, |
|
// with the bias added (implicitly times one). |
|
var score = 0; |
|
for (var i = 0; i < this.weights.length; i++) { |
|
score += this.weights[i] * features[i]; |
|
} |
|
score += this.bias; |
|
|
|
// Classify as 1 if the score is over 0, otherwise 0. |
|
if (score > 0) { |
|
return 1; |
|
} else { |
|
return 0; |
|
} |
|
}; |
|
|
|
/** |
|
* **Train** the classifier with a new example, which is |
|
* a numeric array of features and a 0 or 1 label. |
|
* |
|
* @param {Array<number>} features an array of features as numbers |
|
* @param {number} label either 0 or 1 |
|
* @returns {PerceptronModel} this |
|
*/ |
|
PerceptronModel.prototype.train = function(features, label) { |
|
// Require that only labels of 0 or 1 are considered. |
|
if (label !== 0 && label !== 1) { return null; } |
|
// The length of the feature array determines |
|
// the length of the weight array. |
|
// The perceptron will continue learning as long as |
|
// it keeps seeing feature arrays of the same length. |
|
// When it sees a new data shape, it initializes. |
|
if (features.length !== this.weights.length) { |
|
this.weights = features; |
|
this.bias = 1; |
|
} |
|
// Make a prediction based on current weights. |
|
var prediction = this.predict(features); |
|
// Update the weights if the prediction is wrong. |
|
if (prediction !== label) { |
|
var gradient = label - prediction; |
|
for (var i = 0; i < this.weights.length; i++) { |
|
this.weights[i] += gradient * features[i]; |
|
} |
|
this.bias += gradient; |
|
} |
|
return this; |
|
}; |
|
|
|
module.exports = PerceptronModel; |
|
|
|
},{}],28:[function(require,module,exports){ |
|
'use strict'; |
|
|
|
var epsilon = require('./epsilon'); |
|
var factorial = require('./factorial'); |
|
|
|
/** |
|
* The [Poisson Distribution](http://en.wikipedia.org/wiki/Poisson_distribution) |
|
* is a discrete probability distribution that expresses the probability |
|
* of a given number of events occurring in a fixed interval of time |
|
* and/or space if these events occur with a known average rate and |
|
* independently of the time since the last event. |
|
* |
|
* The Poisson Distribution is characterized by the strictly positive |
|
* mean arrival or occurrence rate, `λ`. |
|
* |
|
* @param {number} lambda location poisson distribution |
|
* @returns {number} value of poisson distribution at that point |
|
*/ |
|
function poissonDistribution(lambda) { |
|
// Check that lambda is strictly positive |
|
if (lambda <= 0) { return null; } |
|
|
|
// our current place in the distribution |
|
var x = 0, |
|
// and we keep track of the current cumulative probability, in |
|
// order to know when to stop calculating chances. |
|
cumulativeProbability = 0, |
|
// the calculated cells to be returned |
|
cells = {}; |
|
|
|
// This algorithm iterates through each potential outcome, |
|
// until the `cumulativeProbability` is very close to 1, at |
|
// which point we've defined the vast majority of outcomes |
|
do { |
|
// a [probability mass function](https://en.wikipedia.org/wiki/Probability_mass_function) |
|
cells[x] = (Math.pow(Math.E, -lambda) * Math.pow(lambda, x)) / factorial(x); |
|
cumulativeProbability += cells[x]; |
|
x++; |
|
// when the cumulativeProbability is nearly 1, we've calculated |
|
// the useful range of this distribution |
|
} while (cumulativeProbability < 1 - epsilon); |
|
|
|
return cells; |
|
} |
|
|
|
module.exports = poissonDistribution; |
|
|
|
},{"./epsilon":10,"./factorial":12}],29:[function(require,module,exports){ |
|
'use strict'; |
|
|
|
var epsilon = require('./epsilon'); |
|
var inverseErrorFunction = require('./inverse_error_function'); |
|
|
|
/** |
|
* The [Probit](http://en.wikipedia.org/wiki/Probit) |
|
* is the inverse of cumulativeStdNormalProbability(), |
|
* and is also known as the normal quantile function. |
|
* |
|
* It returns the number of standard deviations from the mean |
|
* where the p'th quantile of values can be found in a normal distribution. |
|
* So, for example, probit(0.5 + 0.6827/2) ≈ 1 because 68.27% of values are |
|
* normally found within 1 standard deviation above or below the mean. |
|
* |
|
* @param {number} p |
|
* @returns {number} probit |
|
*/ |
|
function probit(p) { |
|
if (p === 0) { |
|
p = epsilon; |
|
} else if (p >= 1) { |
|
p = 1 - epsilon; |
|
} |
|
return Math.sqrt(2) * inverseErrorFunction(2 * p - 1); |
|
} |
|
|
|
module.exports = probit; |
|
|
|
},{"./epsilon":10,"./inverse_error_function":16}],30:[function(require,module,exports){ |
|
'use strict'; |
|
|
|
var quantileSorted = require('./quantile_sorted'); |
|
var numericSort = require('./numeric_sort'); |
|
|
|
/** |
|
* The [quantile](https://en.wikipedia.org/wiki/Quantile): |
|
* this is a population quantile, since we assume to know the entire |
|
* dataset in this library. This is an implementation of the |
|
* [Quantiles of a Population](http://en.wikipedia.org/wiki/Quantile#Quantiles_of_a_population) |
|
* algorithm from wikipedia. |
|
* |
|
* Sample is a one-dimensional array of numbers, |
|
* and p is either a decimal number from 0 to 1 or an array of decimal |
|
* numbers from 0 to 1. |
|
* In terms of a k/q quantile, p = k/q - it's just dealing with fractions or dealing |
|
* with decimal values. |
|
* When p is an array, the result of the function is also an array containing the appropriate |
|
* quantiles in input order |
|
* |
|
* @param {Array<number>} sample a sample from the population |
|
* @param {number} p the desired quantile, as a number between 0 and 1 |
|
* @returns {number} quantile |
|
* @example |
|
* var data = [3, 6, 7, 8, 8, 9, 10, 13, 15, 16, 20]; |
|
* quantile(data, 1); //= max(data); |
|
* quantile(data, 0); //= min(data); |
|
* quantile(data, 0.5); //= 9 |
|
*/ |
|
function quantile(sample, p) { |
|
|
|
// We can't derive quantiles from an empty list |
|
if (sample.length === 0) { return null; } |
|
|
|
// Sort a copy of the array. We'll need a sorted array to index |
|
// the values in sorted order. |
|
var sorted = numericSort(sample); |
|
|
|
if (p.length) { |
|
// Initialize the result array |
|
var results = []; |
|
// For each requested quantile |
|
for (var i = 0; i < p.length; i++) { |
|
results[i] = quantileSorted(sorted, p[i]); |
|
} |
|
return results; |
|
} else { |
|
return quantileSorted(sorted, p); |
|
} |
|
} |
|
|
|
module.exports = quantile; |
|
|
|
},{"./numeric_sort":26,"./quantile_sorted":31}],31:[function(require,module,exports){ |
|
'use strict'; |
|
|
|
/** |
|
* This is the internal implementation of quantiles: when you know |
|
* that the order is sorted, you don't need to re-sort it, and the computations |
|
* are faster. |
|
* |
|
* @param {Array<number>} sample input data |
|
* @param {number} p desired quantile: a number between 0 to 1, inclusive |
|
* @returns {number} quantile value |
|
* @example |
|
* var data = [3, 6, 7, 8, 8, 9, 10, 13, 15, 16, 20]; |
|
* quantileSorted(data, 1); //= max(data); |
|
* quantileSorted(data, 0); //= min(data); |
|
* quantileSorted(data, 0.5); //= 9 |
|
*/ |
|
function quantileSorted(sample, p) { |
|
var idx = (sample.length) * p; |
|
if (p < 0 || p > 1) { |
|
return null; |
|
} else if (p === 1) { |
|
// If p is 1, directly return the last element |
|
return sample[sample.length - 1]; |
|
} else if (p === 0) { |
|
// If p is 0, directly return the first element |
|
return sample[0]; |
|
} else if (idx % 1 !== 0) { |
|
// If p is not integer, return the next element in array |
|
return sample[Math.ceil(idx) - 1]; |
|
} else if (sample.length % 2 === 0) { |
|
// If the list has even-length, we'll take the average of this number |
|
// and the next value, if there is one |
|
return (sample[idx - 1] + sample[idx]) / 2; |
|
} else { |
|
// Finally, in the simple case of an integer value |
|
// with an odd-length list, return the sample value at the index. |
|
return sample[idx]; |
|
} |
|
} |
|
|
|
module.exports = quantileSorted; |
|
|
|
},{}],32:[function(require,module,exports){ |
|
'use strict'; |
|
|
|
/** |
|
* The [R Squared](http://en.wikipedia.org/wiki/Coefficient_of_determination) |
|
* value of data compared with a function `f` |
|
* is the sum of the squared differences between the prediction |
|
* and the actual value. |
|
* |
|
* @param {Array<Array<number>>} data input data: this should be doubly-nested |
|
* @param {Function} func function called on `[i][0]` values within the dataset |
|
* @returns {number} r-squared value |
|
* @example |
|
* var samples = [[0, 0], [1, 1]]; |
|
* var regressionLine = linearRegressionLine(linearRegression(samples)); |
|
* rSquared(samples, regressionLine); //= 1 this line is a perfect fit |
|
*/ |
|
function rSquared(data, func) { |
|
if (data.length < 2) { return 1; } |
|
|
|
// Compute the average y value for the actual |
|
// data set in order to compute the |
|
// _total sum of squares_ |
|
var sum = 0, average; |
|
for (var i = 0; i < data.length; i++) { |
|
sum += data[i][1]; |
|
} |
|
average = sum / data.length; |
|
|
|
// Compute the total sum of squares - the |
|
// squared difference between each point |
|
// and the average of all points. |
|
var sumOfSquares = 0; |
|
for (var j = 0; j < data.length; j++) { |
|
sumOfSquares += Math.pow(average - data[j][1], 2); |
|
} |
|
|
|
// Finally estimate the error: the squared |
|
// difference between the estimate and the actual data |
|
// value at each point. |
|
var err = 0; |
|
for (var k = 0; k < data.length; k++) { |
|
err += Math.pow(data[k][1] - func(data[k][0]), 2); |
|
} |
|
|
|
// As the error grows larger, its ratio to the |
|
// sum of squares increases and the r squared |
|
// value grows lower. |
|
return 1 - (err / sumOfSquares); |
|
} |
|
|
|
module.exports = rSquared; |
|
|
|
},{}],33:[function(require,module,exports){ |
|
'use strict'; |
|
|
|
/** |
|
* The Root Mean Square (RMS) is |
|
* a mean function used as a measure of the magnitude of a set |
|
* of numbers, regardless of their sign. |
|
* This is the square root of the mean of the squares of the |
|
* input numbers. |
|
* This runs on `O(n)`, linear time in respect to the array |
|
* |
|
* @param {Array<number>} x input |
|
* @returns {number} root mean square |
|
* @example |
|
* rootMeanSquare([-1, 1, -1, 1]); //= 1 |
|
*/ |
|
function rootMeanSquare(x) { |
|
if (x.length === 0) { return null; } |
|
|
|
var sumOfSquares = 0; |
|
for (var i = 0; i < x.length; i++) { |
|
sumOfSquares += Math.pow(x[i], 2); |
|
} |
|
|
|
return Math.sqrt(sumOfSquares / x.length); |
|
} |
|
|
|
module.exports = rootMeanSquare; |
|
|
|
},{}],34:[function(require,module,exports){ |
|
'use strict'; |
|
|
|
var shuffle = require('./shuffle'); |
|
|
|
/** |
|
* Create a [simple random sample](http://en.wikipedia.org/wiki/Simple_random_sample) |
|
* from a given array of `n` elements. |
|
* |
|
* The sampled values will be in any order, not necessarily the order |
|
* they appear in the input. |
|
* |
|
* @param {Array} array input array. can contain any type |
|
* @param {number} n count of how many elements to take |
|
* @param {Function} [randomSource=Math.random] an optional source of entropy |
|
* instead of Math.random |
|
* @return {Array} subset of n elements in original array |
|
* @example |
|
* var values = [1, 2, 4, 5, 6, 7, 8, 9]; |
|
* sample(values, 3); // returns 3 random values, like [2, 5, 8]; |
|
*/ |
|
function sample(array, n, randomSource) { |
|
// shuffle the original array using a fisher-yates shuffle |
|
var shuffled = shuffle(array, randomSource); |
|
|
|
// and then return a subset of it - the first `n` elements. |
|
return shuffled.slice(0, n); |
|
} |
|
|
|
module.exports = sample; |
|
|
|
},{"./shuffle":40}],35:[function(require,module,exports){ |
|
'use strict'; |
|
|
|
var sampleCovariance = require('./sample_covariance'); |
|
var sampleStandardDeviation = require('./sample_standard_deviation'); |
|
|
|
/** |
|
* The [correlation](http://en.wikipedia.org/wiki/Correlation_and_dependence) is |
|
* a measure of how correlated two datasets are, between -1 and 1 |
|
* |
|
* @param {Array<number>} x first input |
|
* @param {Array<number>} y second input |
|
* @returns {number} sample correlation |
|
* @example |
|
* var a = [1, 2, 3, 4, 5, 6]; |
|
* var b = [2, 2, 3, 4, 5, 60]; |
|
* sampleCorrelation(a, b); //= 0.691 |
|
*/ |
|
function sampleCorrelation(x, y) { |
|
var cov = sampleCovariance(x, y), |
|
xstd = sampleStandardDeviation(x), |
|
ystd = sampleStandardDeviation(y); |
|
|
|
if (cov === null || xstd === null || ystd === null) { |
|
return null; |
|
} |
|
|
|
return cov / xstd / ystd; |
|
} |
|
|
|
module.exports = sampleCorrelation; |
|
|
|
},{"./sample_covariance":36,"./sample_standard_deviation":38}],36:[function(require,module,exports){ |
|
'use strict'; |
|
|
|
var mean = require('./mean'); |
|
|
|
/** |
|
* [Sample covariance](https://en.wikipedia.org/wiki/Sample_mean_and_sampleCovariance) of two datasets: |
|
* how much do the two datasets move together? |
|
* x and y are two datasets, represented as arrays of numbers. |
|
* |
|
* @param {Array<number>} x first input |
|
* @param {Array<number>} y second input |
|
* @returns {number} sample covariance |
|
* @example |
|
* var x = [1, 2, 3, 4, 5, 6]; |
|
* var y = [6, 5, 4, 3, 2, 1]; |
|
* sampleCovariance(x, y); //= -3.5 |
|
*/ |
|
function sampleCovariance(x, y) { |
|
|
|
// The two datasets must have the same length which must be more than 1 |
|
if (x.length <= 1 || x.length !== y.length) { |
|
return null; |
|
} |
|
|
|
// determine the mean of each dataset so that we can judge each |
|
// value of the dataset fairly as the difference from the mean. this |
|
// way, if one dataset is [1, 2, 3] and [2, 3, 4], their covariance |
|
// does not suffer because of the difference in absolute values |
|
var xmean = mean(x), |
|
ymean = mean(y), |
|
sum = 0; |
|
|
|
// for each pair of values, the covariance increases when their |
|
// difference from the mean is associated - if both are well above |
|
// or if both are well below |
|
// the mean, the covariance increases significantly. |
|
for (var i = 0; i < x.length; i++) { |
|
sum += (x[i] - xmean) * (y[i] - ymean); |
|
} |
|
|
|
// this is Bessels' Correction: an adjustment made to sample statistics |
|
// that allows for the reduced degree of freedom entailed in calculating |
|
// values from samples rather than complete populations. |
|
var besselsCorrection = x.length - 1; |
|
|
|
// the covariance is weighted by the length of the datasets. |
|
return sum / besselsCorrection; |
|
} |
|
|
|
module.exports = sampleCovariance; |
|
|
|
},{"./mean":21}],37:[function(require,module,exports){ |
|
'use strict'; |
|
|
|
var sumNthPowerDeviations = require('./sum_nth_power_deviations'); |
|
var sampleStandardDeviation = require('./sample_standard_deviation'); |
|
|
|
/** |
|
* [Skewness](http://en.wikipedia.org/wiki/Skewness) is |
|
* a measure of the extent to which a probability distribution of a |
|
* real-valued random variable "leans" to one side of the mean. |
|
* The skewness value can be positive or negative, or even undefined. |
|
* |
|
* Implementation is based on the adjusted Fisher-Pearson standardized |
|
* moment coefficient, which is the version found in Excel and several |
|
* statistical packages including Minitab, SAS and SPSS. |
|
* |
|
* @param {Array<number>} x input |
|
* @returns {number} sample skewness |
|
* @example |
|
* var data = [2, 4, 6, 3, 1]; |
|
* sampleSkewness(data); //= 0.5901286564 |
|
*/ |
|
function sampleSkewness(x) { |
|
// The skewness of less than three arguments is null |
|
if (x.length < 3) { return null; } |
|
|
|
var n = x.length, |
|
cubedS = Math.pow(sampleStandardDeviation(x), 3), |
|
sumCubedDeviations = sumNthPowerDeviations(x, 3); |
|
|
|
return n * sumCubedDeviations / ((n - 1) * (n - 2) * cubedS); |
|
} |
|
|
|
module.exports = sampleSkewness; |
|
|
|
},{"./sample_standard_deviation":38,"./sum_nth_power_deviations":46}],38:[function(require,module,exports){ |
|
'use strict'; |
|
|
|
var sampleVariance = require('./sample_variance'); |
|
|
|
/** |
|
* The [standard deviation](http://en.wikipedia.org/wiki/Standard_deviation) |
|
* is the square root of the variance. |
|
* |
|
* @param {Array<number>} x input array |
|
* @returns {number} sample standard deviation |
|
* @example |
|
* ss.sampleStandardDeviation([2, 4, 4, 4, 5, 5, 7, 9]); |
|
* //= 2.138 |
|
*/ |
|
function sampleStandardDeviation(x) { |
|
// The standard deviation of no numbers is null |
|
if (x.length <= 1) { return null; } |
|
|
|
return Math.sqrt(sampleVariance(x)); |
|
} |
|
|
|
module.exports = sampleStandardDeviation; |
|
|
|
},{"./sample_variance":39}],39:[function(require,module,exports){ |
|
'use strict'; |
|
|
|
var sumNthPowerDeviations = require('./sum_nth_power_deviations'); |
|
|
|
/* |
|
* The [sample variance](https://en.wikipedia.org/wiki/Variance#Sample_variance) |
|
* is the sum of squared deviations from the mean. The sample variance |
|
* is distinguished from the variance by the usage of [Bessel's Correction](https://en.wikipedia.org/wiki/Bessel's_correction): |
|
* instead of dividing the sum of squared deviations by the length of the input, |
|
* it is divided by the length minus one. This corrects the bias in estimating |
|
* a value from a set that you don't know if full. |
|
* |
|
* References: |
|
* * [Wolfram MathWorld on Sample Variance](http://mathworld.wolfram.com/SampleVariance.html) |
|
* |
|
* @param {Array<number>} x input array |
|
* @return {number} sample variance |
|
* @example |
|
* sampleVariance([1, 2, 3, 4, 5]); //= 2.5 |
|
*/ |
|
function sampleVariance(x) { |
|
// The variance of no numbers is null |
|
if (x.length <= 1) { return null; } |
|
|
|
var sumSquaredDeviationsValue = sumNthPowerDeviations(x, 2); |
|
|
|
// this is Bessels' Correction: an adjustment made to sample statistics |
|
// that allows for the reduced degree of freedom entailed in calculating |
|
// values from samples rather than complete populations. |
|
var besselsCorrection = x.length - 1; |
|
|
|
// Find the mean value of that list |
|
return sumSquaredDeviationsValue / besselsCorrection; |
|
} |
|
|
|
module.exports = sampleVariance; |
|
|
|
},{"./sum_nth_power_deviations":46}],40:[function(require,module,exports){ |
|
'use strict'; |
|
|
|
var shuffleInPlace = require('./shuffle_in_place'); |
|
|
|
/* |
|
* A [Fisher-Yates shuffle](http://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle) |
|
* is a fast way to create a random permutation of a finite set. This is |
|
* a function around `shuffle_in_place` that adds the guarantee that |
|
* it will not modify its input. |
|
* |
|
* @param {Array} sample an array of any kind of element |
|
* @param {Function} [randomSource=Math.random] an optional entropy source |
|
* @return {Array} shuffled version of input |
|
* @example |
|
* var shuffled = shuffle([1, 2, 3, 4]); |
|
* shuffled; // = [2, 3, 1, 4] or any other random permutation |
|
*/ |
|
function shuffle(sample, randomSource) { |
|
// slice the original array so that it is not modified |
|
sample = sample.slice(); |
|
|
|
// and then shuffle that shallow-copied array, in place |
|
return shuffleInPlace(sample.slice(), randomSource); |
|
} |
|
|
|
module.exports = shuffle; |
|
|
|
},{"./shuffle_in_place":41}],41:[function(require,module,exports){ |
|
'use strict'; |
|
|
|
/* |
|
* A [Fisher-Yates shuffle](http://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle) |
|
* in-place - which means that it **will change the order of the original |
|
* array by reference**. |
|
* |
|
* This is an algorithm that generates a random [permutation](https://en.wikipedia.org/wiki/Permutation) |
|
* of a set. |
|
* |
|
* @param {Array} sample input array |
|
* @param {Function} [randomSource=Math.random] an optional source of entropy |
|
* @returns {Array} sample |
|
* @example |
|
* var sample = [1, 2, 3, 4]; |
|
* shuffleInPlace(sample); |
|
* // sample is shuffled to a value like [2, 1, 4, 3] |
|
*/ |
|
function shuffleInPlace(sample, randomSource) { |
|
|
|
// a custom random number source can be provided if you want to use |
|
// a fixed seed or another random number generator, like |
|
// [random-js](https://www.npmjs.org/package/random-js) |
|
randomSource = randomSource || Math.random; |
|
|
|
// store the current length of the sample to determine |
|
// when no elements remain to shuffle. |
|
var length = sample.length; |
|
|
|
// temporary is used to hold an item when it is being |
|
// swapped between indices. |
|
var temporary; |
|
|
|
// The index to swap at each stage. |
|
var index; |
|
|
|
// While there are still items to shuffle |
|
while (length > 0) { |
|
// chose a random index within the subset of the array |
|
// that is not yet shuffled |
|
index = Math.floor(randomSource() * length--); |
|
|
|
// store the value that we'll move temporarily |
|
temporary = sample[length]; |
|
|
|
// swap the value at `sample[length]` with `sample[index]` |
|
sample[length] = sample[index]; |
|
sample[index] = temporary; |
|
} |
|
|
|
return sample; |
|
} |
|
|
|
module.exports = shuffleInPlace; |
|
|
|
},{}],42:[function(require,module,exports){ |
|
'use strict'; |
|
|
|
/** |
|
* For a sorted input, counting the number of unique values |
|
* is possible in constant time and constant memory. This is |
|
* a simple implementation of the algorithm. |
|
* |
|
* Values are compared with `===`, so objects and non-primitive objects |
|
* are not handled in any special way. |
|
* |
|
* @param {Array} input an array of primitive values. |
|
* @returns {number} count of unique values |
|
* @example |
|
* sortedUniqueCount([1, 2, 3]); // 3 |
|
* sortedUniqueCount([1, 1, 1]); // 1 |
|
*/ |
|
function sortedUniqueCount(input) { |
|
var uniqueValueCount = 0, |
|
lastSeenValue; |
|
for (var i = 0; i < input.length; i++) { |
|
if (i === 0 || input[i] !== lastSeenValue) { |
|
lastSeenValue = input[i]; |
|
uniqueValueCount++; |
|
} |
|
} |
|
return uniqueValueCount; |
|
} |
|
|
|
module.exports = sortedUniqueCount; |
|
|
|
},{}],43:[function(require,module,exports){ |
|
'use strict'; |
|
|
|
var variance = require('./variance'); |
|
|
|
/** |
|
* The [standard deviation](http://en.wikipedia.org/wiki/Standard_deviation) |
|
* is the square root of the variance. It's useful for measuring the amount |
|
* of variation or dispersion in a set of values. |
|
* |
|
* Standard deviation is only appropriate for full-population knowledge: for |
|
* samples of a population, {@link sampleStandardDeviation} is |
|
* more appropriate. |
|
* |
|
* @param {Array<number>} x input |
|
* @returns {number} standard deviation |
|
* @example |
|
* var scores = [2, 4, 4, 4, 5, 5, 7, 9]; |
|
* variance(scores); //= 4 |
|
* standardDeviation(scores); //= 2 |
|
*/ |
|
function standardDeviation(x) { |
|
// The standard deviation of no numbers is null |
|
if (x.length === 0) { return null; } |
|
|
|
return Math.sqrt(variance(x)); |
|
} |
|
|
|
module.exports = standardDeviation; |
|
|
|
},{"./variance":49}],44:[function(require,module,exports){ |
|
'use strict'; |
|
|
|
var SQRT_2PI = Math.sqrt(2 * Math.PI); |
|
|
|
function cumulativeDistribution(z) { |
|
var sum = z, |
|
tmp = z; |
|
|
|
// 15 iterations are enough for 4-digit precision |
|
for (var i = 1; i < 15; i++) { |
|
tmp *= z * z / (2 * i + 1); |
|
sum += tmp; |
|
} |
|
return Math.round((0.5 + (sum / SQRT_2PI) * Math.exp(-z * z / 2)) * 1e4) / 1e4; |
|
} |
|
|
|
/** |
|
* A standard normal table, also called the unit normal table or Z table, |
|
* is a mathematical table for the values of Φ (phi), which are the values of |
|
* the cumulative distribution function of the normal distribution. |
|
* It is used to find the probability that a statistic is observed below, |
|
* above, or between values on the standard normal distribution, and by |
|
* extension, any normal distribution. |
|
* |
|
* The probabilities are calculated using the |
|
* [Cumulative distribution function](https://en.wikipedia.org/wiki/Normal_distribution#Cumulative_distribution_function). |
|
* The table used is the cumulative, and not cumulative from 0 to mean |
|
* (even though the latter has 5 digits precision, instead of 4). |
|
*/ |
|
var standardNormalTable = []; |
|
|
|
for (var z = 0; z <= 3.09; z += 0.01) { |
|
standardNormalTable.push(cumulativeDistribution(z)); |
|
} |
|
|
|
module.exports = standardNormalTable; |
|
|
|
},{}],45:[function(require,module,exports){ |
|
'use strict'; |
|
|
|
/** |
|
* The [sum](https://en.wikipedia.org/wiki/Summation) of an array |
|
* is the result of adding all numbers together, starting from zero. |
|
* |
|
* This runs on `O(n)`, linear time in respect to the array |
|
* |
|
* @param {Array<number>} x input |
|
* @return {number} sum of all input numbers |
|
* @example |
|
* console.log(sum([1, 2, 3])); // 6 |
|
*/ |
|
function sum(x) { |
|
var value = 0; |
|
for (var i = 0; i < x.length; i++) { |
|
value += x[i]; |
|
} |
|
return value; |
|
} |
|
|
|
module.exports = sum; |
|
|
|
},{}],46:[function(require,module,exports){ |
|
'use strict'; |
|
|
|
var mean = require('./mean'); |
|
|
|
/** |
|
* The sum of deviations to the Nth power. |
|
* When n=2 it's the sum of squared deviations. |
|
* When n=3 it's the sum of cubed deviations. |
|
* |
|
* @param {Array<number>} x |
|
* @param {number} n power |
|
* @returns {number} sum of nth power deviations |
|
* @example |
|
* var input = [1, 2, 3]; |
|
* // since the variance of a set is the mean squared |
|
* // deviations, we can calculate that with sumNthPowerDeviations: |
|
* var variance = sumNthPowerDeviations(input) / input.length; |
|
*/ |
|
function sumNthPowerDeviations(x, n) { |
|
var meanValue = mean(x), |
|
sum = 0; |
|
|
|
for (var i = 0; i < x.length; i++) { |
|
sum += Math.pow(x[i] - meanValue, n); |
|
} |
|
|
|
return sum; |
|
} |
|
|
|
module.exports = sumNthPowerDeviations; |
|
|
|
},{"./mean":21}],47:[function(require,module,exports){ |
|
'use strict'; |
|
|
|
var standardDeviation = require('./standard_deviation'); |
|
var mean = require('./mean'); |
|
|
|
/** |
|
* This is to compute [a one-sample t-test](https://en.wikipedia.org/wiki/Student%27s_t-test#One-sample_t-test), comparing the mean |
|
* of a sample to a known value, x. |
|
* |
|
* in this case, we're trying to determine whether the |
|
* population mean is equal to the value that we know, which is `x` |
|
* here. usually the results here are used to look up a |
|
* [p-value](http://en.wikipedia.org/wiki/P-value), which, for |
|
* a certain level of significance, will let you determine that the |
|
* null hypothesis can or cannot be rejected. |
|
* |
|
* @param {Array<number>} sample an array of numbers as input |
|
* @param {number} x expected vale of the population mean |
|
* @returns {number} value |
|
* @example |
|
* tTest([1, 2, 3, 4, 5, 6], 3.385); //= 0.16494154 |
|
*/ |
|
function tTest(sample, x) { |
|
// The mean of the sample |
|
var sampleMean = mean(sample); |
|
|
|
// The standard deviation of the sample |
|
var sd = standardDeviation(sample); |
|
|
|
// Square root the length of the sample |
|
var rootN = Math.sqrt(sample.length); |
|
|
|
// Compute the known value against the sample, |
|
// returning the t value |
|
return (sampleMean - x) / (sd / rootN); |
|
} |
|
|
|
module.exports = tTest; |
|
|
|
},{"./mean":21,"./standard_deviation":43}],48:[function(require,module,exports){ |
|
'use strict'; |
|
|
|
var mean = require('./mean'); |
|
var sampleVariance = require('./sample_variance'); |
|
|
|
/** |
|
* This is to compute [two sample t-test](http://en.wikipedia.org/wiki/Student's_t-test). |
|
* Tests whether "mean(X)-mean(Y) = difference", ( |
|
* in the most common case, we often have `difference == 0` to test if two samples |
|
* are likely to be taken from populations with the same mean value) with |
|
* no prior knowledge on standard deviations of both samples |
|
* other than the fact that they have the same standard deviation. |
|
* |
|
* Usually the results here are used to look up a |
|
* [p-value](http://en.wikipedia.org/wiki/P-value), which, for |
|
* a certain level of significance, will let you determine that the |
|
* null hypothesis can or cannot be rejected. |
|
* |
|
* `diff` can be omitted if it equals 0. |
|
* |
|
* [This is used to confirm or deny](http://www.monarchlab.org/Lab/Research/Stats/2SampleT.aspx) |
|
* a null hypothesis that the two populations that have been sampled into |
|
* `sampleX` and `sampleY` are equal to each other. |
|
* |
|
* @param {Array<number>} sampleX a sample as an array of numbers |
|
* @param {Array<number>} sampleY a sample as an array of numbers |
|
* @param {number} [difference=0] |
|
* @returns {number} test result |
|
* @example |
|
* ss.tTestTwoSample([1, 2, 3, 4], [3, 4, 5, 6], 0); //= -2.1908902300206643 |
|
*/ |
|
function tTestTwoSample(sampleX, sampleY, difference) { |
|
var n = sampleX.length, |
|
m = sampleY.length; |
|
|
|
// If either sample doesn't actually have any values, we can't |
|
// compute this at all, so we return `null`. |
|
if (!n || !m) { return null; } |
|
|
|
// default difference (mu) is zero |
|
if (!difference) { |
|
difference = 0; |
|
} |
|
|
|
var meanX = mean(sampleX), |
|
meanY = mean(sampleY); |
|
|
|
var weightedVariance = ((n - 1) * sampleVariance(sampleX) + |
|
(m - 1) * sampleVariance(sampleY)) / (n + m - 2); |
|
|
|
return (meanX - meanY - difference) / |
|
Math.sqrt(weightedVariance * (1 / n + 1 / m)); |
|
} |
|
|
|
module.exports = tTestTwoSample; |
|
|
|
},{"./mean":21,"./sample_variance":39}],49:[function(require,module,exports){ |
|
'use strict'; |
|
|
|
var sumNthPowerDeviations = require('./sum_nth_power_deviations'); |
|
|
|
/** |
|
* The [variance](http://en.wikipedia.org/wiki/Variance) |
|
* is the sum of squared deviations from the mean. |
|
* |
|
* This is an implementation of variance, not sample variance: |
|
* see the `sampleVariance` method if you want a sample measure. |
|
* |
|
* @param {Array<number>} x a population |
|
* @returns {number} variance: a value greater than or equal to zero. |
|
* zero indicates that all values are identical. |
|
* @example |
|
* ss.variance([1, 2, 3, 4, 5, 6]); //= 2.917 |
|
*/ |
|
function variance(x) { |
|
// The variance of no numbers is null |
|
if (x.length === 0) { return null; } |
|
|
|
// Find the mean of squared deviations between the |
|
// mean value and each value. |
|
return sumNthPowerDeviations(x, 2) / x.length; |
|
} |
|
|
|
module.exports = variance; |
|
|
|
},{"./sum_nth_power_deviations":46}],50:[function(require,module,exports){ |
|
'use strict'; |
|
|
|
/** |
|
* The [Z-Score, or Standard Score](http://en.wikipedia.org/wiki/Standard_score). |
|
* |
|
* The standard score is the number of standard deviations an observation |
|
* or datum is above or below the mean. Thus, a positive standard score |
|
* represents a datum above the mean, while a negative standard score |
|
* represents a datum below the mean. It is a dimensionless quantity |
|
* obtained by subtracting the population mean from an individual raw |
|
* score and then dividing the difference by the population standard |
|
* deviation. |
|
* |
|
* The z-score is only defined if one knows the population parameters; |
|
* if one only has a sample set, then the analogous computation with |
|
* sample mean and sample standard deviation yields the |
|
* Student's t-statistic. |
|
* |
|
* @param {number} x |
|
* @param {number} mean |
|
* @param {number} standardDeviation |
|
* @return {number} z score |
|
* @example |
|
* ss.zScore(78, 80, 5); //= -0.4 |
|
*/ |
|
function zScore(x, mean, standardDeviation) { |
|
return (x - mean) / standardDeviation; |
|
} |
|
|
|
module.exports = zScore; |
|
|
|
},{}]},{},[1])(1) |
|
}); |
Nice work! Do you know why some states are showing up black?
They don't get any class I think the scale is returning undefined.