Skip to content

Instantly share code, notes, and snippets.

@mstump
Created February 3, 2012 23:32
Show Gist options
  • Save mstump/1733719 to your computer and use it in GitHub Desktop.
Save mstump/1733719 to your computer and use it in GitHub Desktop.
Unexpected third parameter from Cascalog aggregator
(ns test.core
(:use cascalog.api)
(:require [cascalog [vars :as v]])
(:gen-class))
(def sample_data
[{:path nil, :javadoc? :not_present, :bundle_docurl nil, :bundle_symname nil, :bundle_name nil, :bundle_exportsrv nil, :prefix nil, :lastmodified 1127001651000, :bundle_importpkg nil, :groupid "xfire", :bundle_description nil, :version "1.0-20050917.231235", :size 21774, :packaging "jar", :remoteurl nil, :classnames nil, :signature? :not_present, :bundle_license nil, :source? :not_present, :sha1 "eed564da25682c728f7ad3da557656f0766376de", :repository nil, :bundle_exportpkg nil, :bundle_version nil, :fname nil, :description nil, :artifiactid "xfire-plexus", :md5 nil, :fextension "jar"}
{:path nil, :javadoc? :not_present, :bundle_docurl nil, :bundle_symname nil, :bundle_name nil, :bundle_exportsrv nil, :prefix nil, :lastmodified 1162775407000, :bundle_importpkg nil, :groupid "xfire", :bundle_description nil, :version "1.0-20050917.231219", :size -1, :packaging nil, :remoteurl nil, :classnames nil, :signature? :not_present, :bundle_license nil, :source? :not_present, :sha1 nil, :repository nil, :bundle_exportpkg nil, :bundle_version nil, :fname "Plexus XFire Module", :description "Plexus binding for XFire.", :artifiactid "xfire-plexus", :md5 nil, :fextension "pom"}
{:path nil, :javadoc? :not_present, :bundle_docurl nil, :bundle_symname nil, :bundle_name nil, :bundle_exportsrv nil, :prefix nil, :lastmodified 1126584108000, :bundle_importpkg nil, :groupid "xfire", :bundle_description nil, :version "1.0-20050913.005153", :size 23093, :packaging "jar", :remoteurl nil, :classnames nil, :signature? :not_present, :bundle_license nil, :source? :not_present, :sha1 "e8d3775f5e363ed8c6aad70dd44a1aef0521d293", :repository nil, :bundle_exportpkg nil, :bundle_version nil, :fname nil, :description nil, :artifiactid "xfire-plexus", :md5 nil, :fextension "jar"}])
(defmapop [s-expression-parse]
"take an input string containing a serialized s-expression and return the parsed value"
[^String str]
(read-string str))
(defn textline-parsed [dir]
"parse input file, it's one hash serialized as an s-expression per line"
(let [outargs (v/gen-nullable-vars 1)
source (hfs-textline dir)]
(<- outargs (source ?line) (s-expression-parse ?line :>> outargs) (:distinct false))))
(defn append-state
"group maven artifacts as a series of nested hashes
{ groupid
{ artifactid
{ version {artifiact} }}}
"
[state val]
(let [groupid (:groupid val)
artifiactid (:artifiactid val)
version (:version val)
group (get state groupid)
artifiact (get group artifiactid)]
(assoc state groupid
(assoc group artifiactid
(assoc artifiact version val)))))
(defaggregateop group-by-group-id
([] {})
([state val] (append-state state val))
([state val1 val2] (do
;; This is where I get the behavior I don't understand
;; VAL1 is always the value to be appended to state
;; VAL2 is always the number 1
;; Why 3 parameters and why is VAL2 always 1?
(println (str "STATE " state " VAL1 " val1 " VAL2 " val2))
(append-state state val1)))
([state] [state]))
(defn query
[output-tap input-path]
(let [artifacts (textline-parsed input-path)]
(?<- output-tap [?group]
(artifacts ?a)
(group-by-group-id ?group))))
(defn -main [input-path output-dir]
(query (hfs-textline output-dir) input-path))
;; THIS IS THE FINAL WORKING VERSION
(ns test.core
(:use cascalog.api)
(:require [cascalog [vars :as v]])
(:gen-class))
(def sample_data
[{:path nil, :javadoc? :not_present, :bundle_docurl nil, :bundle_symname nil, :bundle_name nil, :bundle_exportsrv nil, :prefix nil, :lastmodified 1127001651000, :bundle_importpkg nil, :groupid "xfire", :bundle_description nil, :version "1.0-20050917.231235", :size 21774, :packaging "jar", :remoteurl nil, :classnames nil, :signature? :not_present, :bundle_license nil, :source? :not_present, :sha1 "eed564da25682c728f7ad3da557656f0766376de", :repository nil, :bundle_exportpkg nil, :bundle_version nil, :fname nil, :description nil, :artifiactid "xfire-plexus", :md5 nil, :fextension "jar"}
{:path nil, :javadoc? :not_present, :bundle_docurl nil, :bundle_symname nil, :bundle_name nil, :bundle_exportsrv nil, :prefix nil, :lastmodified 1162775407000, :bundle_importpkg nil, :groupid "xfire", :bundle_description nil, :version "1.0-20050917.231219", :size -1, :packaging nil, :remoteurl nil, :classnames nil, :signature? :not_present, :bundle_license nil, :source? :not_present, :sha1 nil, :repository nil, :bundle_exportpkg nil, :bundle_version nil, :fname "Plexus XFire Module", :description "Plexus binding for XFire.", :artifiactid "xfire-plexus", :md5 nil, :fextension "pom"}
{:path nil, :javadoc? :not_present, :bundle_docurl nil, :bundle_symname nil, :bundle_name nil, :bundle_exportsrv nil, :prefix nil, :lastmodified 1126584108000, :bundle_importpkg nil, :groupid "xfire", :bundle_description nil, :version "1.0-20050913.005153", :size 23093, :packaging "jar", :remoteurl nil, :classnames nil, :signature? :not_present, :bundle_license nil, :source? :not_present, :sha1 "e8d3775f5e363ed8c6aad70dd44a1aef0521d293", :repository nil, :bundle_exportpkg nil, :bundle_version nil, :fname nil, :description nil, :artifiactid "xfire-plexus", :md5 nil, :fextension "jar"}])
(defn textline-parsed [dir]
"parse input file, it's one hash serialized as an s-expression per line"
(let [outargs (v/gen-nullable-vars 1)
source (hfs-textline dir)]
(<- outargs (source ?line)
(read-string ?line :>> outargs)
(:distinct false))))
(defn append-state
"group maven artifacts as a series of nested hashes
{ groupid
{ artifactid
{ version {artifiact} }}}
"
[state val]
(let [groupid (:groupid val)
artifiactid (:artifiactid val)
version (:version val)
group (get state groupid)
artifiact (get group artifiactid)]
(assoc state groupid
(assoc group artifiactid
(assoc artifiact version val)))))
(defaggregateop group-by-group-id
([] {})
([state val] (append-state state val))
([state] [state]))
(defn query
[output-tap input-path]
(let [artifacts (textline-parsed input-path)]
(?<- output-tap
[?group]
(artifacts ?a)
(group-by-group-id ?a :> ?group))))
(defn -main [input-path output-dir]
(query (hfs-textline output-dir) input-path))
@mstump
Copy link
Author

mstump commented Feb 4, 2012

@sritchie
Copy link

sritchie commented Feb 4, 2012

Nice!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment