Skip to content

Instantly share code, notes, and snippets.

@manboubird
Last active August 29, 2015 14:10
Show Gist options
  • Save manboubird/9dd47faf10911d5f7259 to your computer and use it in GitHub Desktop.
Save manboubird/9dd47faf10911d5f7259 to your computer and use it in GitHub Desktop.
; http://nathanmarz.com/blog/introducing-cascalog-a-clojure-based-query-language-for-hado.html
;
; Run with method executing a script:
; drake -w cascalog.drake %playground
;
; Run with code eval:
; drake -w cascalog.drake %playground.eval
;
CASCALOG_JAR:=$[HOME]/local/bin/cascalog-standalone.jar
file() [eval]
echo "$CODE" | tee $OUTPUT | nl
cascalog_file() [eval]
java -jar $[CASCALOG_JAR] $INPUT
cascalog_code() [eval]
java -jar $[CASCALOG_JAR] -e "$CODE"
;
; load playground data
;
playground.clg <- [method:file method-mode:append]
(use 'cascalog.playground) (bootstrap)
(?<- (stdout) [?person] (age ?person 25))
%playground <- playground.clg [method:cascalog_file]
%playground.eval <- [eval]
java -jar $[CASCALOG_JAR] -e "$CODE"
(use 'cascalog.playground) (bootstrap)
(?<- (stdout) [?person] (age ?person 25))
;
; cascalog tutorial:
; http://cascalog.org/articles/getting_started.html
;
; http://nathanmarz.com/blog/introducing-cascalog-a-clojure-based-query-language-for-hado.html
; http://nathanmarz.com/blog/new-cascalog-features-outer-joins-combiners-sorting-and-more.html
; https://www.youtube.com/watch?v=7qq_PmwplEc
;
;cascalog_tutorial.clg <- [method:file method-mode:append]
cascalog_tutorial.clg <- [method:cascalog_code method-mode:append]
(use 'cascalog.playground) (bootstrap)
; print dataset
(?- (stdout) sentence)
(?- (stdout) follows)
; sentence by line
(?- (stdout)
(<- [?line] (sentence :> ?line))
)
; take 2 tuples
(take 2 person)
(take 2 age)
(take 2 gender)
; define tokenize cascalog operation
(require '[cascalog.logic.def :as def])
(def/defmapcatfn tokenise [line]
"reads in a line of string and splits it by a regular expression"
(clojure.string/split line #"[\[\]\\\(\),.)\s]+"))
; tokenize sentence
(?- (stdout)
(<- [?word]
(sentence :> ?line)
(tokenise :< ?line :> ?word)))
; Aggregation
(require '[cascalog.logic.ops :as c])
(?- (stdout)
(<- [?word ?count]
(sentence :> ?line)
(tokenise :< ?line :> ?word)
(c/count :> ?count)
))
; inner join
(?<- (stdout) [?person ?age ?gender]
(age ?person ?age)
(gender ?person ?gender)
)
; outer join
(?<- (stdout) [?person !!age !!gender]
(age ?person !!age)
(gender ?person !!gender)
)
;
(?<- (stdout) [?person1 !!person2]
(person ?person1)
(follows ?person1 !!person2))
(?<- (stdout) [?person]
(person ?person)
(follows ?person !!p2)
(nil? !!p2))
;
; running on a cluster:
; http://nathanmarz.com/blog/news-feed-in-38-lines-of-code-using-cascalog.html
; with swank
; https://groups.google.com/forum/#!topic/cascalog-user/T3aoOTlBb6s
;
%demo <-
(use 'cascalog.api)
(require '[cascalog [vars :as v] [ops :as c]])
(?<- (stdout) (hfs-textline "PATH"))
;
; https://github.com/nathanmarz/cascalog-workshop
;
%cascalog_workshop <-
;
; a cascalog excercise: https://github.com/sritchie/climate-class
;
%climate_class <-
;
; a cascalog excercise:
; https://github.com/sthuebner/cascalog-examples
; http://vimeo.com/48349618
;
%cascalog_examples <-
;
; form-clj for image processing in cascalog
; https://github.com/wri/forma-clj
;
forma_clj <-
;
; pagerank
; https://github.com/docteurZ/cascalog-contrib/blob/pagerank/cascalog.graph/src/cascalog/graph/pagerank.clj
;
pagerank <-
;
; cascalog for graph processing:
; http://www.rene-pickhardt.de/nils-grunwald-from-linkfluence-talks-at-fosdem-about-cascalog-for-graph-processing/
;
graph <-
;
; http://blog.factual.com/clojure-on-hadoop-a-new-hope
; https://github.com/chunhk/cascalog-hope
;
cascalog_hope <-
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment