Skip to content

Instantly share code, notes, and snippets.

@Kodiologist
Last active October 19, 2019 18:06
Show Gist options
  • Save Kodiologist/e2f1e6685e8fd865650f97bb6a67ad07 to your computer and use it in GitHub Desktop.
Save Kodiologist/e2f1e6685e8fd865650f97bb6a67ad07 to your computer and use it in GitHub Desktop.
Compare Cataclysm DDA's weather to real weather
; Compare a debug weather log generated by Cataclysm DDA to real
; data from the NOAA dataset Local Climatological Data (LCD).
; The data is downloaded automatically.
;
; Dependencies:
; - pandas
; - matplotlib
; - metpy
; - https://github.com/Kodiologist/Kodhy
(require [kodhy.macros [qw $ getl ss λ wc geta]])
(import
[glob [glob]]
re
tarfile
os
io
urllib.request
[numpy :as np]
[pandas :as pd]
metpy.calc [metpy.units [units :as U]]
[kodhy.util [seq T F]])
(setv inHg-to-hPa 33.863886)
(setv inch-to-mm 25.4)
(setv precipitation-cols (qw date 1 2 3 4 5 6 7 8 9 10 11 12 ignored1 13 14 15 16 17 18 19 20 21 22 23 24 ignored2 ignored3))
(setv light-rain-threshold-mm/hour 1)
(setv heavy-rain-threshold-mm/hour 2.5)
(setv years [2014 2015 2016])
(setv sites ["CONCORD, NH" "BOSTON, MA" "CARIBOU, ME"])
(setv data-dir "/tmp/Local Climatological Data")
(setv cata-weather-log-path "/tmp/weather.output")
(unless (glob (os.path.join data-dir "*"))
(for [year years :if (!= year 2014) month (seq 1 12)]
(setv fname (.format "{:04}{:02}.tar.gz" year month))
(print "Getting" fname)
(urllib.request.urlretrieve
(+ "https://www1.ncdc.noaa.gov/pub/data/lcd/" fname)
(os.path.join data-dir fname))))
(unless (in "main" (globals))
(setv obs (reduce + (lfor year years month (seq 1 12) (do
(setv ym (.format "{:04}{:02}" year month))
(with [o (tarfile.open (os.path.join data-dir (+ ym ".tar.gz")))
o2 (.extractfile o (+ ym "01.txt"))]
(setv text (.read (io.TextIOWrapper o2 :encoding "Windows-1252"))))
(lfor site sites (do
(setv [main precip] (.groups (re.search
(+ site r"\"\n.+?\n(01,.+?\n) (?:[^\n]*\n){8}(.+?)\n(?:[ ,]|[0-9]+\.)")
text re.DOTALL)))
(setv main (pd.read-csv (io.StringIO main)
:names (qw date maximum_temp minimum_temp average_temp depart_from_normal average_dew_point average_wet_bulb heating_degree_days cooling_degree_days weather snow/ice_depth snow/ice_water_equivalent snowfall_precipitation precipitation_water_equiv average_station_pressure average_sea_level_pressure resultant_wind_speed resultant_direction average_wind_speed max_peak_wind_speed max_peak_wind_direction max_2-min_wind_speed max_2-min_wind_direction date2)
:usecols (qw date maximum_temp minimum_temp average_dew_point average_wet_bulb average_sea_level_pressure precipitation_water_equiv)
:na-values " "))
(*= ($ main precipitation_water_equiv) inch-to-mm)
(setv precip (pd.read-csv
(io.StringIO (.replace precip ",\n" "\n"))
:names precipitation-cols
:usecols (+ ["date"] (lfor c precipitation-cols
:if (.isdigit c) c))))
(for [d [main precip]]
(setv ($ d site) (.index sites site))
(setv ($ d year) year)
(setv ($ d month) month))
(setv precip (pd.melt precip
:id_vars (qw site year month date)
:var-name "hour"
:value-name "precip"))
(*= ($ precip precip) inch-to-mm)
(setv ($ precip hour) (.astype ($ precip hour) int))
(setv ($ precip class)
(np.where (< ($ precip precip) light-rain-threshold-mm/hour)
0
(np.where (< ($ precip precip) heavy-rain-threshold-mm/hour)
1
2)))
(, main precip)))))))
(setv main (pd.concat (map first obs)))
(setv precip (pd.concat (map second obs)))
(*= ($ main average_sea_level_pressure) inHg-to-hPa)
(setv ($ main relative_humidity) (.
(wc main (metpy.calc.relative-humidity-from-dewpoint
(* (.to-numpy $average_wet_bulb) U.degC)
(* (.to-numpy $average_dew_point) U.degC))) magnitude)))
(setv cata (pd.read-csv cata-weather-log-path
:sep ";" :thousands ","))
(setv cata.columns (lfor c cata.columns
(.replace (.replace c ")" "") "(" "_")))
(setv cata (.copy (ss cata (|
(& (= $year 1) (= $season 3))
(& (= $year 2) (<= $season 2))))))
(/= ($ cata humidity_%) 100)
(setv ($ cata rain-mm) (* (/ 1 3)
(np.where (.isin ($ cata weatherdesc) (qw Clear Sunny Cloudy))
0.
(np.where (.isin ($ cata weatherdesc) (qw Flurries Drizzle))
1.5
3.))))
(setv ($ cata is-precip) (= ($ cata rain-mm) 0))
(defn r [x]
(round x 3))
(defmacro loop-real [d &rest body]
`(for [site (range (len sites)) year years]
(setv it (ss ~d (& (= $year year) (= $site site))))
(print (get sites site) year "-" (r (do ~@body)))))
(defn compare-precipitation []
(print "Proportion of days with ≥1 mm of precipitation")
(loop-real main (.mean (>= ($ it precipitation_water_equiv) 1)))
(print "Cata -" (r (.mean (.apply
(.groupby cata (qw year season day))
(λ (>= (.sum ($ it rain-mm)) 1))))))
(print "\nProportion of hours with light precip or more")
(loop-real precip (.mean (< 0 ($ it class))))
(print "Cata -" (r (.mean (>= (.apply
(.groupby cata (qw year season day hour))
(λ (.sum ($ it rain-mm)))) light-rain-threshold-mm/hour))))
(print "\nProportion of hours with heavy precip")
(loop-real precip (.mean (= ($ it class) 2)))
(print "Cata -" (r (.mean (>= (.apply
(.groupby cata (qw year season day hour))
(λ (.sum ($ it rain-mm)))) heavy-rain-threshold-mm/hour))))
(print "\nTotal precip per year (cm)")
(loop-real precip (/ (.sum ($ it precip)) 10))
(print "Cata -" (r (/ (.sum ($ cata rain-mm)) 10))))
(defn compare-temperature []
(for [highs [F T]]
(print "\nMean absolute running difference -" (if highs "highs" "lows"))
(loop-real main (.mean (.abs (.diff
(getl it : (if highs "maximum_temp" "minimum_temp"))))))
(print "Cata -" (r (.mean (.abs (.diff (.apply
(.groupby cata (qw year season day))
(λ (if highs
(.max ($ it temperature_F))
(.min ($ it temperature_F)))))))))))
(print "\nMean within-day range")
(loop-real main (.mean (wc it (- $maximum_temp $minimum_temp))))
(print "Cata -" (r (.mean (.apply
(.groupby cata (qw year season day))
(λ (- (.max ($ it temperature_F)) (.min ($ it temperature_F))))))))
(setv win-width 5)
(for [highs [F T]]
(defn extreme-idx [t]
(setv ms (.mean (.rolling
(.reset-index t :drop T)
win-width :win-type "boxcar")))
(+ (- (// win-width 2)) (if highs
(.idxmax ms :skipna T)
(.idxmin ms :skipna T))))
(print "\nExtreme window center -" (if highs "highs" "lows"))
(loop-real main (extreme-idx
(getl it : (if highs "maximum_temp" "minimum_temp"))))
(print "Cata -" (extreme-idx (.apply
(.groupby cata (qw year season day))
(λ (if highs
(.max ($ it temperature_F))
(.min ($ it temperature_F)))))))))
(defn plot [thing &optional site year [use-cata T] fake-cata]
(import
[matplotlib.pyplot :as plt]
matplotlib.dates
datetime)
(plt.clf)
(setv d (matplotlib.dates.date2num (datetime.date (or year 2013) 1 1)))
(defn f [c v]
(setv v (.to-numpy v))
(plt.plot :color c (+ d (np.arange (len v))) v))
(when (and site year) (f "blue" (ss main
(& (= $year year) (= $site (.index sites site)))
(get {
"temp-max" "maximum_temp"
"temp-min" "minimum_temp"
"pressure" "average_sea_level_pressure"
"humidity" "relative_humidity"
"rain" "precipitation_water_equiv" } thing))))
(when use-cata (f "orange" (if (not (none? fake-cata)) fake-cata (.apply
(.groupby cata (qw year season day))
(λ (cond
[(= thing "temp-max") (.max ($ it temperature_F))]
[(= thing "temp-min") (.min ($ it temperature_F))]
[(= thing "pressure") (.mean ($ it pressure_mB))]
[(= thing "humidity") (.mean ($ it humidity_%))]
[(= thing "rain") (.sum ($ it rain-mm))]))))))
(.xaxis.set-major-locator (plt.gca)
(matplotlib.dates.MonthLocator))
(.xaxis.set-major-formatter (plt.gca)
(matplotlib.dates.DateFormatter "%b")))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment