Created
October 14, 2024 12:37
-
-
Save jackrusher/66bcf70db8b327052d3092b34e40590f to your computer and use it in GitHub Desktop.
A note tacked onto https://strommarkt-at.apps.garden/notebooks/timeseries_demo
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;; If you have to do any sophisticated time calculations, including | |
;; calendrical stuff (day of week, &c), you should obviously use a | |
;; library to handle that stuff. However, if you only need some simple | |
;; things it can be very useful to leave ISO 8601 formatted dates in | |
;; string representation... | |
;; gold/oz 1995-01-03 to 2016-11-10 | |
(def data | |
(->> (slurp "https://www.math.ttu.edu/~atrindad/tsdata/FTSdatasets/Gold.csv") | |
clojure.string/split-lines | |
rest | |
(map #((juxt first (comp read-string second)) | |
(clojure.string/split % #","))) | |
(into (sorted-map)))) | |
(count data) | |
;;=> 5526 | |
;; Because ISO 8601 (YYYY-MM-DD) dates sort lexicographically, we can | |
;; easily partition by year and month | |
(def data-yearly | |
(partition-by #(subs (first %) 0 4) data)) | |
(count data-yearly) | |
;; => 22 | |
;; average price per year, 1995-2016 | |
(map #(let [prices (map second %)] | |
[(ffirst %) (/ (apply + prices) (count prices))]) | |
data-yearly) | |
;; => | |
;; (["1995-01-03" 384.07063492063486] | |
;; ["1996-01-02" 387.7348425196852] | |
;; ["1997-01-02" 330.99683794466415] | |
;; ["1998-01-02" 294.1223320158102] | |
;; ["1999-01-04" 278.8626984126983] | |
;; ["2000-01-04" 279.2896825396827] | |
;; ["2001-01-02" 271.19011857707505] | |
;; ["2002-01-02" 310.07599206349204] | |
;; ["2003-01-02" 363.8316205533598] | |
;; ["2004-01-02" 409.5322834645669] | |
;; ["2005-01-04" 444.98789682539683] | |
;; ["2006-01-03" 604.3378968253966] | |
;; ["2007-01-02" 696.4312252964429] | |
;; ["2008-01-02" 872.3722440944882] | |
;; ["2009-01-02" 973.6581027667984] | |
;; ["2010-01-04" 1226.664624505929] | |
;; ["2011-01-04" 1573.1599601593628] | |
;; ["2012-01-03" 1668.857142857143] | |
;; ["2013-01-02" 1409.505928853755] | |
;; ["2014-01-02" 1266.0626482213438] | |
;; ["2015-01-02" 1159.8211462450597] | |
;; ["2016-01-04" 1261.2420091324207]) | |
;; just as easy to group monthly | |
(def data-monthly | |
(partition-by #(subs (first %) 0 7) data)) | |
(count data-monthly) | |
;;=> 263 | |
;; N.B. weekly is harder! :) | |
;; range queries are super easy and clear when using strings | |
(def subset-1997+1998 | |
(subseq data > "1997" < "1999")) | |
(count subset-1997+1998) | |
;;=> 506 | |
;; and, of course, you can take windows easily with partition. here's | |
;; a moving average of the proceding five days for each day in the | |
;; selected year range. | |
(mapv (fn [chunk] | |
[(first (last chunk)) | |
(let [prices (map second chunk)] | |
(/ (apply + prices) (count prices)))]) | |
(partition 5 1 subset-1997+1998)) | |
;;=> [["1997-01-08" 360.96] | |
;; ["1997-01-09" 358.29999999999995] | |
;; ["1997-01-10" 357.15] | |
;; ["1997-01-13" 357.21999999999997] | |
;; ["1997-01-14" 357.15] ... |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment