Skip to content

Instantly share code, notes, and snippets.

View atroche's full-sized avatar

Alistair Roche atroche

View GitHub Profile
(d/q
'[:find ?account-name ?tx-time
:where
[?account :account/name ?account-name]
[?account :account/active true ?tx true]
[?tx :db/txInstant ?tx-time]]
(d/history db))
(defn datoms-for-adding-employee
[system-user employee-name initial-services-offered]
[{:db/id (d/tempid :db.part/user)
:employee/name employee-name
:employee/services initial-services-offered}
{:db/id (d/tempid :db.part/tx)
:command/id :add-employee
:command/done-by (:db/id system-user)}])
(let [datoms (datoms-for-adding-employee (:user session)
(defn response-or-error [conn data command-definition data auth-details]
(let [db (d/db conn)
{::commands/keys [data-spec
db-validator
handler
allowed?
pre-tx-side-effector
prepare-datoms
session-transformer
;; via http://blog.klipse.tech//clojure/2016/10/25/core-match.html
(doseq [n (range 1 11)]
(println
(match [(mod n 3) (mod n 5)]
[0 0] "FizzBuzz"
[0 _] "Fizz"
[_ 0] "Buzz"
:else n)))
alistair@instance-1:~/data$ time cat "training/train-00001-of-00150.json" | jq -c '.| {document: (.string_sequence|join(" ")), property: (.question_string_sequence|join(" ")), values: .raw_answers}' > train.json
real 2m34.287s
alistair@instance-1:~/data$ time cat "training/train-00001-of-00150.json" | parallel --no-notice -q --pipe -P 16 jq -c '.| {document: (.string_sequence|join(" ")), property: (.question_string_sequence|join(" ")), values: .raw_answers}' > train.json
real 0m20.731s
(ns sunshine.less-bloat
(:require [datasplash.api :as ds]))
;; just used for parsing command line options
(ds/defoptions LessBloatOptions
;; (these options don't support kebab case)
;; dataDir is used for running this against my local data as a way to test quickly:
{:dataDir {:type String
:default "gs://wikireading-atroche/data"
:description "Path where files are stored"}})
lein run -m sunshine.less-bloat --runner=DataflowPipelineRunner --project=mindful-pillar-123205 --stagingLocation=gs://wikireading-atroche/staging/ --zone=asia-east1-b
(with-open [rdr (FileInputStream. ten-gb-json-file)]
(loop [line-count 0]
(let [bytes-read (.read rdr buffer)]
(if (>= bytes-read 0)
(let [lines (count (filter #(= \newline %)
buffer))]
(recur (+ line-count lines)))
line-count))))
(with-open [rdr (io/reader ten-gb-filename)]
(loop [line-count 0]
(if-not (.readLine rdr)
line-count
(recur (inc line-count)))))