Created
March 7, 2012 20:15
-
-
Save danhammer/1995731 to your computer and use it in GitHub Desktop.
cascalog queries for final clearing probabilities
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns forma.hadoop.jobs.forma | |
(:use cascalog.api) | |
(:require [cascalog.ops :as c] | |
[forma.trends.analysis :as a] | |
[forma.classify.logistic :as log])) | |
(def get-loc | |
(<- [?chunk :> ?s-res ?mod-h ?mod-v ?sample ?line ?val] | |
(map ?chunk [:location :value] :> ?loc ?val) | |
(schema/unpack-pixel-location ?loc :> ?s-res ?mod-h ?mod-v ?sample ?line))) | |
(defn fire-tap | |
"Accepts an est-map and a query source of fire timeseries. Note that | |
this won't work, pulling directly from the pail!" | |
[est-map fire-src] | |
(<- [?s-res ?mod-h ?mod-v ?sample ?line ?fire-series] | |
(fire-src ?chunk) | |
(get-loc ?chunk :> ?s-res ?mod-h ?mod-v ?sample ?line ?f-series) | |
(schema/adjust-fires est-map ?f-series :> ?fire-series))) | |
(defn filter-query [vcf-src vcf-limit chunk-src] | |
(<- [?s-res ?mod-h ?mod-v ?sample ?line ?start ?ts] | |
(chunk-src _ ?ts-chunk) | |
(vcf-src _ ?vcf-chunk) | |
(get-loc ?ts-chunk :> ?s-res ?mod-h ?mod-v ?sample ?line ?series) | |
(:distinct false) | |
(map ?series [:start-idx :series] :> ?start ?ts) | |
(p/blossom-chunk ?vcf-chunk :> ?s-res ?mod-h ?mod-v ?sample ?line ?vcf) | |
(>= ?vcf vcf-limit))) | |
(defn dynamic-filter | |
"Returns a new generator of ndvi and rain timeseries obtained by | |
filtering out all pixels with VCF less than the supplied | |
`vcf-limit`." | |
[ndvi-src reli-src rain-src] | |
(<- [?s-res ?mod-h ?mod-v ?sample ?line ?start-idx ?ndvi-ts ?precl-ts ?reli-ts] | |
(ndvi-src ?s-res ?mod-h ?mod-v ?sample ?line ?n-start ?ndvi) | |
(reli-src ?s-res ?mod-h ?mod-v ?sample ?line ?r-start ?reli) | |
(rain-src ?s-res ?mod-h ?mod-v ?sample ?line ?p-start ?precl) | |
(schema/adjust ?p-start ?precl ?n-start ?ndvi ?r-start ?reli | |
:> ?start-idx ?precl-ts ?ndvi-ts ?reli-ts) | |
(:distinct false))) | |
(defn dynamic-tap | |
"Accepts an est-map, and sources for ndvi and rain timeseries and | |
vcf values split up by pixel." | |
[est-map dynamic-src] | |
(<- [?s-res ?mod-h ?mod-v ?sample ?line ?new-start ?short ?break ?long ?t-stat] | |
(dynamic-src ?s-res ?mod-h ?mod-v ?sample ?line ?start ?ndvi ?precl ?reli) | |
(short-trend-shell est-map ?start ?ndvi ?reli :> ?new-start ?short) | |
(long-trend-shell est-map ?start ?ndvi ?reli ?precl :> _ ?break ?long ?t-stat) | |
(:distinct false))) | |
(defn forma-tap | |
"Accepts an est-map and sources for ndvi, rain, and fire timeseries, | |
plus a source of static vcf pixels." | |
[dynamic-src fire-src] | |
(<- [?s-res ?period ?mh ?mv ?s ?l ?forma-val] | |
(fire-src ?s-res ?mh ?mv ?s ?l !!fire) | |
(dynamic-src ?s-res ?mh ?mv ?s ?l ?start ?short ?break ?long ?t-stat) | |
(schema/forma-seq !!fire ?short ?break ?long ?t-stat :> ?forma-seq) | |
(p/index ?forma-seq :zero-index ?start :> ?period ?forma-val) | |
(:distinct false))) | |
(defmapcatop [process-neighbors [num-neighbors]] | |
"Processes all neighbors... Returns the index within the chunk, the | |
value, and the aggregate of the neighbors." | |
[window] | |
(for [[idx [val neighbors]] (->> (w/neighbor-scan num-neighbors window) | |
(map-indexed vector)) | |
:when val] | |
[idx val (->> neighbors | |
(apply concat) | |
(filter identity) | |
(schema/combine-neighbors))])) | |
(defn forma-query | |
"final query that walks the neighbors and spits out the values." | |
[est-map forma-val-src] | |
(let [{:keys [neighbors window-dims]} est-map | |
[rows cols] window-dims | |
src (p/sparse-windower forma-val-src | |
["?sample" "?line"] | |
window-dims | |
"?forma-val" | |
nil)] | |
(<- [?s-res ?period ?mod-h ?mod-v ?sample ?line ?val ?neighbor-val] | |
(src ?s-res ?period ?mod-h ?mod-v ?win-col ?win-row ?window) | |
(process-neighbors [neighbors] ?window :> ?win-idx ?val ?neighbor-val) | |
(r/tile-position cols rows ?win-col ?win-row ?win-idx :> ?sample ?line) | |
(:distinct false)))) | |
(defn beta-generator | |
"query to return the beta vector associated with each ecoregion" | |
[{:keys [t-res est-start ridge-const convergence-thresh max-iterations]} | |
dynamic-src static-src] | |
(let [first-idx (date/datetime->period t-res est-start)] | |
(<- [?s-res ?eco ?beta] | |
(dynamic-src ?s-res ?pd ?mod-h ?mod-v ?s ?l ?val ?neighbor-val) | |
(static-src ?s-res ?mod-h ?mod-v ?s ?l _ _ ?eco ?hansen) | |
(= ?pd first-idx) | |
(log/logistic-beta-wrap [ridge-const convergence-thresh max-iterations] | |
?hansen ?val ?neighbor-val :> ?beta) | |
(:distinct false)))) | |
(defn forma-estimate | |
"query to end all queries: estimate the probabilities for each | |
period after the training period." | |
[beta-src dynamic-src static-src] | |
(<- [?s-res ?mod-h ?mod-v ?s ?l ?prob-series] | |
(beta-src ?s-res ?eco ?beta) | |
(dynamic-src ?s-res ?pd ?mod-h ?mod-v ?s ?l ?val ?neighbor-val) | |
(static-src ?s-res ?mod-h ?mod-v ?s ?l _ _ ?eco _) | |
(log/logistic-prob-wrap ?beta ?val ?neighbor-val :> ?prob) | |
(log/mk-timeseries ?pd ?prob :> ?prob-series) | |
(:distinct false))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment