The juttle predict proc combines trend, seasonal, and level prediction. Although it is native javascript, many of its components can also be written in juttle, and those examples are here.
Last active
October 7, 2015 19:54
-
-
Save welch/1117319b8232653b5245 to your computer and use it in GitHub Desktop.
ingredients for the predict proc, in juttle
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
export reducer nn(field, alpha, initial=null) { | |
// the "level" state-space model. (an exponentially weighted moving average) | |
var value = initial; | |
function update() { | |
if (*field != null) { | |
if (value == null) { | |
value = *field; | |
} else { | |
value = value + alpha * (*field - value); | |
} | |
} | |
} | |
function result() { | |
return value; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
sub put_predict(field, tover, sover, every=null) { | |
// return the portion of the signal in field predicted by | |
// seasonality, trend, and level estimates as P, and the | |
// prediction error as E | |
put_trend -field field -every every -over tover | |
| put detrend = *field - *(field+"_T") | |
| put_seasonal -field 'detrend' -over sover -every every | |
| put deseas = detrend // - detrend_S | |
| put holt = 0//level('deseas', alpha, 0) | |
| put E = deseas - holt | |
| put P = *field - E | |
//| put -over over E = stats.relMean(E) | |
// | (@table;merge) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function deCalendarize(duration) { | |
return Duration.new(Duration.seconds(duration)); | |
} | |
sub put_seasonal(field, over, every=null) { | |
// year-over-year moving average series for each month. | |
// this expects an unbatched stream of points, one per month. | |
put __over = deCalendarize(over), __every = deCalendarize(every ?? __over / 30) | |
//| put __bucket = Math.floor((time - Date.quantize(time, __over)) / __every) | |
//| put -over 3 * over __Q = percentile(field,[0, 0.25, 0.5, 0.75]), __count = count() by __bucket | |
// | put S = __Q[2] | |
| put S = level(field,alpha,0) by __bucket | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
input last: duration -default :30 days: -label 'Show this duration:'; | |
import 'stats.juttle' as stats; | |
export sub live_juttle_runs(dur1=:3M:) { | |
read -from (:now: + :1d: - dur1) -space 'prod' | |
event = 'run-juttle' AND | |
properties_page_currentPage ='https://app.jut.io/#explorer' AND | |
context_ip != '207.141.12.50' | |
| reduce -every :h: run_count = count() | |
| (@timechart -title "Weekly App Juttle Program Runs" -valueField 'run_count' -display.dataDensity 0; merge) | |
} | |
const FROM = Date.new(0); | |
const TO = Date.new(0)+:10 year:; | |
export sub series(trend = 1, season = 10, sigma=0.25) { | |
// generate a series of monthly values having given trend, seasonality, and noise | |
emit -from FROM -to TO -every :w: // do not use calendar intervals! | |
| put trend_true= trend, season_true=season, sigma_true=sigma | |
| put n = count(), dy = Duration.as(time - Date.new(0), "y"), cycle = Math.sin(dy * 2 * Math.PI) | |
| put value = n * trend/12 + sigma * (2 * Math.random() - 1) + season * cycle | |
//| put value = value + ((dy > 5) ? 10 : 0) | |
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function deCalendarize(duration) { | |
return Duration.new(Duration.seconds(duration)); | |
} | |
sub put_trend(field, over, every=null) { | |
// estimate trailing trend as the median duration-over-duration change of all samples | |
// in a window of -over duration and/or point-to-point change (a variant of the Theil-Sen | |
// estimator). This can use up to [2 * over] of historic data per point, but can begin producing | |
// (noisy) point-to-point estimates after two points. | |
// | |
// trend consumes its input stream, and outputs points every -every with T as the estimated | |
// change over -over, and the trend portion of field as field_trend. [field - field_trend] | |
// is the de-trended series. Additionally, t0,field_0 are the time and value of the initial | |
// point of the result batch, such that field_trend = field_0 + [time - t0] * T. (this allows | |
// the estimated T to be joined with and de-trend a denser version of the input stream). | |
// | |
// reject the trend as being 0 if the quartile range Q1...Q3 contains 0 (be pessimistic about | |
// trends, as we do not expect them in short-horizon operations data; but they will confound | |
// seasonality if not accounted for). | |
// | |
put __over = deCalendarize(over), __every = deCalendarize(every ?? __over / 30) | |
| put __bucket = Math.floor((time - Date.quantize(time, __over)) / __every) | |
| put __change_over = delta(field,null) == null ? null : delta(field) * (over / delta(time,:forever:)) by __bucket // change since over ago, if we have historic data | |
| put __change = __change_over ?? delta(field, 0) * (__over / delta(time,:forever:)) // sample-to-sample, for startup | |
| put -over 2 * over __change = (count() <= 3 || (last(time) - first(time) < 2 * __every)) ? 0 : __change, // moar data, please!! | |
__Q = percentile(__change,[0, 0.25, 0.5, 0.75]), | |
__t0=first(time), __y0=first(field) | |
,__count = count() | |
| put __T = (__Q[1] < 0 && __Q[3] > 0) ? 0 : __Q[2] // ignore trends around 0, else median | |
| put *(field + "_T") = __y0 + __T * (time - __t0) / __over // trend portion of field's value | |
//| remove __Q, __t0, __y0, __bucket | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment