Last active
February 12, 2020 23:55
-
-
Save alex-hhh/41b94005339724421b9832c74fe54d92 to your computer and use it in GitHub Desktop.
efficiency-factor-and-temperature
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#lang racket | |
;; See https://alex-hhh.github.io/2017/12/running-and-outdoor-temperature.html | |
;; You will need to clone https://github.com/alex-hhh/ActivityLog2 and fix the | |
;; require below to point to the al-interactive.rkt file. You will also need | |
;; to update the session ids as they reference activities in my own database. | |
(require plot) | |
(require "../../ActivityLog2/etc/al-interactive.rkt") | |
(require math/statistics) | |
(require racket/draw) | |
(define img-width 800) | |
(define img-height 400) | |
;;........................................................... utilities .... | |
(define (efficiency-factor/run hr speed) | |
(* 100 (/ speed hr))) | |
;; https://en.wikipedia.org/wiki/Humidex | |
(define (humidex temperature dew-point) | |
(let* ((dewpk (+ dew-point 273.16)) | |
(e (* 6.11 (exp (* 5417.7530 (- (/ 1 273.16) (/ 1 dewpk)))))) | |
(h (* 0.5555 (- e 10.0)))) | |
(exact-round (+ temperature h)))) | |
;;............................................... construct the data set .... | |
;; The 'df' data-frame% contains an entry for each running session. The | |
;; following data series are present: | |
;; | |
;; "start_time" -- Unix timestamp for the start of the session | |
;; | |
;; "sid" -- the database session id | |
;; | |
;; "speed" -- average speed for the session in meters/second | |
;; | |
;; "ga-speed" -- grade adjusted average speed in meters/second | |
;; | |
;; "hr" -- average heart rate for the session | |
;; | |
;; "hr-stddev" -- STDDEV for the heart rate (higher values indicate interval | |
;; workouts) | |
;; | |
;; "body_weight" -- body weight at the time of the session, in kilograms | |
;; | |
;; "hill-pct" -- percentage of the route with a grade greater than 2% or less | |
;; than -2%, this is an indicator of how hilly the course is | |
;; | |
;; "temperature" -- temperature for the session, in degrees Celsius | |
;; | |
;; "humidity" -- relative humidity as a percentage | |
;; | |
;; "dew_point" -- dew point, in degrees Celsius, see https://en.wikipedia.org/wiki/Humidex | |
;; | |
;; "humidex" -- Humidex for the session (this is a "feels like" temperature), | |
;; in degrees Celsius, see https://en.wikipedia.org/wiki/Humidex | |
;; | |
;; "ef" -- efficiency factor calculated as 100 * speed / hr | |
;; | |
;; "ga-ef" -- grade adjusted efficiency factor, same as 'ef' but calculated | |
;; using the grade adjusted speed | |
(define df | |
(make-data-frame-from-query | |
(current-database) | |
(file->string "session-data.sql"))) | |
(define ga-speed-cache (make-hash)) | |
(define hill-pct-cache (make-hash)) | |
(define hr-stddev-cache (make-hash)) | |
;; Calculate derived data for SESSION-ID (Grade-Adjusted Speed, Hill Percent | |
;; and Heart Rate StdDev) and store it in GA-SPEED-CACHE, HILL-PCT-CACHE and | |
;; HR-STDDEV-CACHE with SESSION-ID as the key. This is done here to avoid | |
;; loading session data frames multiple time, as the DF cache is smaller than | |
;; the number of sessions we have to process. | |
(define (update-parameters-for session-id) | |
(let* ((sdf (sid->df session-id)) | |
(gaspd-stats (df-statistics sdf "gaspd")) | |
(hr-stats (df-statistics sdf "hr")) | |
(grade-histogram (df-histogram sdf "grade" #:as-percentage? #t))) | |
(hash-set! ga-speed-cache session-id (statistics-mean gaspd-stats)) | |
(hash-set! hr-stddev-cache session-id (statistics-stddev hr-stats)) | |
(when grade-histogram | |
(hash-set! hill-pct-cache | |
session-id | |
;; NOTE: histogram percentages are from 1 .. 100, we convert | |
;; them to 0 .. 1 range | |
(/ (for/sum ((item grade-histogram) | |
#:when (> (abs (vector-ref item 0)) 2)) | |
(vector-ref item 1)) | |
100.0))))) | |
;; Update derived data for all sessions in our data set, this is a slow | |
;; operation, so we print out a progress bar. | |
(printf "Fetching data for ~a sessions ~%" (send df get-row-count)) | |
(flush-output) | |
(let ((count 0)) | |
(send df for-each '("sid") | |
(lambda (sid) | |
(set! count (add1 count)) | |
(printf ".") | |
(when (zero? (remainder count 50)) | |
(printf " ~a sessions done~%" count)) | |
(define session-id (vector-ref sid 0)) | |
(update-parameters-for session-id) | |
(flush-output)))) | |
(printf "~%Fetching data for ~a sessions ... done.~%" (send df get-row-count)) | |
(printf "Adding extra series ...")(flush-output) | |
(df-generate-series df "humidex" '("temperature" "dew_point") humidex) | |
(df-generate-series df "ga-speed" '("sid") (lambda (sid) (hash-ref ga-speed-cache sid #f))) | |
(df-generate-series df "hill-pct" '("sid") (lambda (sid) (hash-ref hill-pct-cache sid #f))) | |
(df-generate-series df "hr-stddev" '("sid") (lambda (sid) (hash-ref hr-stddev-cache sid #f))) | |
(df-generate-series df "ef" '("hr" "speed") efficiency-factor/run) | |
(df-generate-series df "ga-ef" '("hr" "ga-speed") efficiency-factor/run) | |
(printf " done.~%")(flush-output) | |
;;......................................... temperature vs ef data set .... | |
;; The 'tdf' data frame contains an entry for each recorded temperature in | |
;; `df' along with average efficiency factor data at that temperature. It | |
;; contains the following series: | |
;; | |
;; "temperature" -- temperature in degrees Celsius | |
;; | |
;; "num-samples" -- number of runs at this temperature | |
;; | |
;; "avg-ef" -- the average efficiency factors of the runs at this temperature | |
;; | |
;; "stddev-ef" -- the standard deviation of the efficiency factors for the | |
;; runs at this temperature | |
;; | |
;; "avg-ga-ef" -- the average grade adjusted efficiency factors for the runs | |
;; at this temperature. | |
;; | |
;; "stddev-ga-ef" -- the standard deviation of the grade adjusted efficiency | |
;; factors for the runs at this temperature | |
(printf "Preparing temperature data frame ...")(flush-output) | |
(define tdf (new data-frame%)) | |
(let ((data-1 (make-hash)) | |
(data-2 (make-hash))) | |
(for ((item (send df select* "temperature" "ef" "ga-ef" #:filter valid-only))) | |
(match-define (vector temperature ef ga-ef) item) | |
(hash-update! data-1 (exact-round temperature) | |
(lambda (v) (cons ef v)) | |
(lambda () '())) | |
(hash-update! data-2 (exact-round temperature) | |
(lambda (v) (cons ga-ef v)) | |
(lambda () '()))) | |
(let ((t-list '()) | |
(nsamples-list '()) | |
(avg-ef-list '()) | |
(stddev-ef-list '()) | |
(avg-ga-ef-list '()) | |
(stddev-ga-ef-list '())) | |
(for ((key (in-list (sort (hash-keys data-1) >)))) | |
(define stats-1 (update-statistics* empty-statistics (hash-ref data-1 key))) | |
(define stats-2 (update-statistics* empty-statistics (hash-ref data-2 key))) | |
(set! t-list (cons key t-list)) | |
(set! nsamples-list (cons (length (hash-ref data-1 key)) nsamples-list)) | |
(set! avg-ef-list (cons (statistics-mean stats-1) avg-ef-list)) | |
(set! stddev-ef-list (cons (statistics-stddev stats-1) stddev-ef-list)) | |
(set! avg-ga-ef-list (cons (statistics-mean stats-2) avg-ga-ef-list)) | |
(set! stddev-ga-ef-list (cons (statistics-stddev stats-2) stddev-ga-ef-list))) | |
(df-add-series tdf "temperature" t-list) | |
(df-add-series tdf "num-samples" nsamples-list) | |
(df-add-series tdf "avg-ef" avg-ef-list) | |
(df-add-series tdf "stddev-ef" stddev-ef-list) | |
(df-add-series tdf "avg-ga-ef" avg-ga-ef-list) | |
(df-add-series tdf "stddev-ga-ef" stddev-ga-ef-list))) | |
(printf " done.~%")(flush-output) | |
;;............................................ humidex vs ef data set .... | |
;; The 'hxdf' data frame contains an entry for each recorded humidex value in | |
;; `df' along with average efficiency factors data at that temperature. It | |
;; contains the following series: | |
;; | |
;; "humidex" -- Humidex value in degrees Celsius | |
;; | |
;; "num-samples" -- number of runs at this temperature | |
;; | |
;; "avg-ef" -- the average efficiency factor of the runs at this temperature | |
;; | |
;; "stddev-ef" -- the standard deviation of the efficiency factor for the runs | |
;; at this temperature | |
;; | |
;; "avg-ga-ef" -- the average grade adjusted efficiency factor for the runs at | |
;; this temperature. | |
;; | |
;; "stddev-ga-ef" -- the standard deviation of the grade adjusted efficiency | |
;; factor for the runs at this temperature | |
(printf "Preparing humidex data frame ...")(flush-output) | |
(define hxdf (new data-frame%)) | |
(let ((data-1 (make-hash)) | |
(data-2 (make-hash))) | |
(for ((item (send df select* "humidex" "ef" "ga-ef" #:filter valid-only))) | |
(match-define (vector temperature ef ga-ef) item) | |
(hash-update! data-1 temperature | |
(lambda (v) (cons ef v)) | |
(lambda () '())) | |
(hash-update! data-2 temperature | |
(lambda (v) (cons ga-ef v)) | |
(lambda () '()))) | |
(let ((t-list '()) | |
(nsamples-list '()) | |
(avg-ef-list '()) | |
(stddev-ef-list '()) | |
(avg-ga-ef-list '()) | |
(stddev-ga-ef-list '()) | |
(count-ga-ef-list '())) | |
(for ((key (in-list (sort (hash-keys data-1) >)))) | |
(define stats-1 (update-statistics* empty-statistics (hash-ref data-1 key))) | |
(define stats-2 (update-statistics* empty-statistics (hash-ref data-2 key))) | |
(set! t-list (cons key t-list)) | |
(set! nsamples-list (cons (length (hash-ref data-1 key)) nsamples-list)) | |
(set! avg-ef-list (cons (statistics-mean stats-1) avg-ef-list)) | |
(set! stddev-ef-list (cons (statistics-stddev stats-1) stddev-ef-list)) | |
(set! avg-ga-ef-list (cons (statistics-mean stats-2) avg-ga-ef-list)) | |
(set! stddev-ga-ef-list (cons (statistics-stddev stats-2) stddev-ga-ef-list))) | |
(define (add-series df name data) | |
(send df add-series (new data-series% [name name] [data (list->vector data)]))) | |
(add-series hxdf "humidex" t-list) | |
(add-series hxdf "num-samples" nsamples-list) | |
(add-series hxdf "avg-ef" avg-ef-list) | |
(add-series hxdf "stddev-ef" stddev-ef-list) | |
(add-series hxdf "avg-ga-ef" avg-ga-ef-list) | |
(add-series hxdf "stddev-ga-ef" stddev-ga-ef-list))) | |
(printf " done.~%")(flush-output) | |
;;....................................................... exporting data .... | |
;; Export our data frames to CVS files to be loaded into other tools like | |
;; Excel or R for further analysis. Three files are created: | |
;; | |
;; * 'session-data.csv' -- contains an entry for each running session with the | |
;; average speed, heart rate, temperature data, plus other things (data is | |
;; from the 'df' data-frame%) | |
;; | |
;; * 'temperature-ef-data.csv' -- contains an entry for each recorded | |
;; temperature with the number of samples (running session) at that | |
;; temperature and the average efficiency factor, and average grade adjusted | |
;; efficiency factor. (data is from the 'tdf' data-frame%) | |
;; | |
;; * 'humidex-ef-data.csv' -- same as 'temperature-ef-data.csv' but for | |
;; the 'HUMIDEX' value instead of the temperature (data is from the 'hxdf' | |
;; data-frame%) | |
;; | |
(define (export-data) | |
(call-with-output-file | |
"session-data.csv" | |
(lambda (out) | |
(df-write/csv out df | |
"start_time" "sid" "speed" "ga-speed" "hr" "hr-stddev" | |
"body_weight" "hill-pct" | |
"temperature" "humidity" "dew_point" "humidex" | |
"ef" "ga-ef")) | |
#:exists 'replace) | |
(call-with-output-file | |
"temperature-ef-data.csv" | |
(lambda (out) | |
(df-write/csv out tdf | |
"temperature" "num-samples" | |
"avg-ef" "stddev-ef" | |
"avg-ga-ef" "stddev-ga-ef")) | |
#:exists 'replace) | |
(call-with-output-file | |
"humidex-ef-data.csv" | |
(lambda (out) | |
(df-write/csv out hxdf | |
"humidex" "num-samples" | |
"avg-ef" "stddev-ef" | |
"avg-ga-ef" "stddev-ga-ef")) | |
#:exists 'replace)) | |
;;..................................................... generating plots .... | |
(define (fitted-ga-ef humidex) | |
(+ 1.8347990118 (* 0.0114733678 humidex) (* -0.0002975829 humidex humidex))) | |
(define (generate-humidex-ga-ef-plot) | |
(define scatter-data (send df select* "humidex" "ga-ef" #:filter valid-only)) | |
(define avg-scatter-data (send hxdf select* "humidex" "avg-ga-ef" #:filter valid-only)) | |
(define c0 (make-object color% 165 165 165)) | |
(define c1 (make-object color% 237 125 49)) | |
(define c2 (make-object color% 68 114 196)) | |
(parameterize ((plot-legend-anchor 'top-right) | |
(plot-title #f) | |
(plot-x-label "Temperature (℃)") | |
(plot-y-label "Efficiency Factor")) | |
(plot-file | |
(list | |
(tick-grid) | |
(make-scatter-renderer scatter-data #:color c0 #:size 1.5 #:label "Efficiency Factor" #:alpha 0.5) | |
(make-scatter-renderer avg-scatter-data #:color c1 #:size 3 #:label "Average" #:alpha 0.8) | |
(function fitted-ga-ef #:color c2 #:width 4 #:style 'long-dash #:label "Trendline")) | |
"humidex-ga-ef-scatter.svg" | |
#:x-min 0 #:x-max 45 | |
#:y-min 1.5 #:y-max 2.5 | |
#:width img-width | |
#:height img-height))) | |
(define (generate-temperature-histogram-plot df temperature-series temperature-label out-file-name) | |
(define hx-hist (df-histogram df temperature-series)) | |
;;(define c (make-object color% 102 188 255)) | |
(define c (make-object color% 68 114 196)) | |
(parameterize ((plot-legend-anchor 'top-right) | |
(plot-title #f) | |
(plot-x-label temperature-label) | |
(plot-y-label "Number of Running Sessions")) | |
(plot-file | |
(make-histogram-renderer hx-hist #:color c) | |
out-file-name | |
#:width img-width | |
#:height img-height))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Fit a second degree polynomial in the format a * X ^ 2 + b * X + c on the | |
## efficiency factor versus temperature data. The input data was generated | |
## from running sessions in an ActivityLog2 database by the | |
## 'ef-temperature.rkt' Racket script and it is imported from CSV files. | |
## | |
## We fit four combinations: | |
## | |
## * efficiency factor as a function of temperature (t.ef) | |
## | |
## * grade adjusted efficiency factor as a function of temperature | |
## (t.ga.ef) | |
## | |
## * efficiency factor as a function of humidex, a "feels like" temperature | |
## estimate (hx.ef) | |
## | |
## * grade adjusted efficiency factor as a function of humidex (hx.ga.ef) | |
## | |
## get the current directory of the script, so we can load CSV files relative | |
## to the location of this script. | |
this.dir <- dirname(sys.frame(1)$ofile); | |
temperature.ef.data.file <- file.path(this.dir, "temperature-ef-data.csv"); | |
humidex.ef.data.file <- file.path(this.dir, "humidex-ef-data.csv"); | |
temperature.ef.data <- read.csv(temperature.ef.data.file); | |
humidex.ef.data <- read.csv(humidex.ef.data.file); | |
hx.ef <- lm(formula = avg.ef ~ poly(humidex, 2, raw=TRUE), data=humidex.ef.data); | |
names(hx.ef$coefficients) <- c('c', 'b', 'a'); | |
hx.ga.ef <- lm(formula = avg.ga.ef ~ poly(humidex, 2, raw=TRUE), data=humidex.ef.data); | |
names(hx.ga.ef$coefficients) <- c('c', 'b', 'a'); | |
t.ef <- lm(formula = avg.ef ~ poly(temperature, 2, raw=TRUE), data=temperature.ef.data); | |
names(t.ef$coefficients) <- c('c', 'b', 'a'); | |
t.ga.ef <- lm(formula = avg.ga.ef ~ poly(temperature, 2, raw=TRUE), data=temperature.ef.data); | |
names(t.ga.ef$coefficients) <- c('c', 'b', 'a'); | |
message("---------------------------------------------------------------------------"); | |
message("Coefficients for fitting temperature vs efficiency factor:"); | |
print(coefficients(t.ef)); | |
print(summary(t.ef)); | |
message("---------------------------------------------------------------------------"); | |
message("Coefficients for fitting temperature vs grade adjusted efficiency factor:"); | |
print(coefficients(t.ga.ef)); | |
print(summary(t.ga.ef)); | |
message("---------------------------------------------------------------------------"); | |
message("Coefficients for fitting humidex vs efficiency factor:"); | |
print(coefficients(hx.ef)); | |
print(summary(hx.ef)); | |
message("---------------------------------------------------------------------------"); | |
message("Coefficients for fitting humidex vs grade adjusted efficiency factor:"); | |
print(coefficients(hx.ga.ef)); | |
print(summary(hx.ga.ef)); | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
> source('~/Projects/AlAnalysis/ef-vs-temperature/fit-ef-temperature.r') | |
--------------------------------------------------------------------------- | |
Coefficients for fitting temperature vs efficiency factor: | |
c b a | |
1.8489762675 0.0086945355 -0.0002560993 | |
Call: | |
lm(formula = avg.ef ~ poly(temperature, 2, raw = TRUE), data = temperature.ef.data) | |
Residuals: | |
Min 1Q Median 3Q Max | |
-0.07979 -0.02032 0.00451 0.01693 0.08988 | |
Coefficients: | |
Estimate Std. Error t value Pr(>|t|) | |
c 1.849e+00 3.573e-02 51.750 < 2e-16 *** | |
b 8.695e-03 3.416e-03 2.545 0.01629 * | |
a -2.561e-04 7.291e-05 -3.513 0.00143 ** | |
--- | |
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 | |
Residual standard error: 0.03748 on 30 degrees of freedom | |
Multiple R-squared: 0.526, Adjusted R-squared: 0.4944 | |
F-statistic: 16.65 on 2 and 30 DF, p-value: 1.369e-05 | |
--------------------------------------------------------------------------- | |
Coefficients for fitting temperature vs grade adjusted efficiency factor: | |
c b a | |
1.8583853928 0.0088294731 -0.0002656736 | |
Call: | |
lm(formula = avg.ga.ef ~ poly(temperature, 2, raw = TRUE), data = temperature.ef.data) | |
Residuals: | |
Min 1Q Median 3Q Max | |
-0.081085 -0.016782 0.004513 0.018862 0.086066 | |
Coefficients: | |
Estimate Std. Error t value Pr(>|t|) | |
c 1.858e+00 3.401e-02 54.635 < 2e-16 *** | |
b 8.829e-03 3.252e-03 2.715 0.010877 * | |
a -2.657e-04 6.941e-05 -3.828 0.000612 *** | |
--- | |
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 | |
Residual standard error: 0.03568 on 30 degrees of freedom | |
Multiple R-squared: 0.5867, Adjusted R-squared: 0.5592 | |
F-statistic: 21.3 on 2 and 30 DF, p-value: 1.752e-06 | |
--------------------------------------------------------------------------- | |
Coefficients for fitting humidex vs efficiency factor: | |
c b a | |
1.8173862364 0.0121595112 -0.0003088752 | |
Call: | |
lm(formula = avg.ef ~ poly(humidex, 2, raw = TRUE), data = humidex.ef.data) | |
Residuals: | |
Min 1Q Median 3Q Max | |
-0.059137 -0.019832 -0.002467 0.013722 0.079316 | |
Coefficients: | |
Estimate Std. Error t value Pr(>|t|) | |
c 1.817e+00 2.716e-02 66.924 < 2e-16 *** | |
b 1.216e-02 2.594e-03 4.687 4.62e-05 *** | |
a -3.089e-04 5.459e-05 -5.659 2.64e-06 *** | |
--- | |
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 | |
Residual standard error: 0.03313 on 33 degrees of freedom | |
Multiple R-squared: 0.601, Adjusted R-squared: 0.5768 | |
F-statistic: 24.85 on 2 and 33 DF, p-value: 2.609e-07 | |
--------------------------------------------------------------------------- | |
Coefficients for fitting humidex vs grade adjusted efficiency factor: | |
c b a | |
1.8347990118 0.0114733678 -0.0002975829 | |
Call: | |
lm(formula = avg.ga.ef ~ poly(humidex, 2, raw = TRUE), data = humidex.ef.data) | |
Residuals: | |
Min 1Q Median 3Q Max | |
-0.051693 -0.024896 -0.000276 0.011807 0.082331 | |
Coefficients: | |
Estimate Std. Error t value Pr(>|t|) | |
c 1.835e+00 2.730e-02 67.200 < 2e-16 *** | |
b 1.147e-02 2.608e-03 4.399 0.000107 *** | |
a -2.976e-04 5.488e-05 -5.422 5.31e-06 *** | |
--- | |
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 | |
Residual standard error: 0.03331 on 33 degrees of freedom | |
Multiple R-squared: 0.6001, Adjusted R-squared: 0.5759 | |
F-statistic: 24.76 on 2 and 33 DF, p-value: 2.704e-07 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
select VAL.session_id as sid, | |
VAL.start_time as start_time, | |
VAL.hr as hr, | |
VAL.speed as speed, | |
VAL.body_weight as body_weight, | |
VAL.temperature as temperature, | |
VAL.humidity as humidity, | |
VAL.dew_point as dew_point | |
from V_ACTIVITY_LIST VAL | |
where VAL.sport = 1 | |
and VAL.hr is not null | |
and VAL.speed is not null | |
and VAL.temperature is not null | |
and VAL.session_id not in ( | |
select SL.session_id | |
from SESSION_LABEL SL, LABEL L | |
where SL.label_id = L.id | |
and L.name = 'equipment-failure') | |
order by VAL.start_time | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment