Skip to content

Instantly share code, notes, and snippets.

@technomancy
Created September 11, 2009 23:00
Show Gist options
  • Select an option

  • Save technomancy/185642 to your computer and use it in GitHub Desktop.

Select an option

Save technomancy/185642 to your computer and use it in GitHub Desktop.
(ns org.log
(:use [org.parsimonygroup.cascading]
[org.parsimonygroup.makemain-utils :only [parseArgs]]
[clojure.contrib.java-utils :only [file]])
(:gen-class))
(defn delete-file-recursively [f]
(let [f (file f)]
(when (.isDirectory f)
(doseq [child (.listFiles f)]
(delete-file-recursively child)))
(.delete (file f))))
(defn split-line [line]
(take 3 (concat (.split line "[\t ]") (repeat "dummy"))))
(defn wrap-vec [& line] [line])
(defn flow []
{:operations {:each
{:using split-line :reader identity :writer str
:outputFields ["name" "id" "content"]}
:each {:using wrap-vec :reader identity :writer str
:inputFields ["name" "id"]
:outputFields ["name" "id"]}}})
(defn -main [& args]
(let [opts (assoc (parseArgs args) :mainCls (class -main))]
(delete-file-recursively (:out opts))
(cascading opts)))
;; (-main "-in" "/home/phil/bork" "-out" "/tmp/out" "-ns" "org.log" "-wf" "flow")
;; cascading.tuple.TupleException: operation added the wrong number of fields, expected: ['name', 'id', 'content', 'bleh'], got result size: 7
;; at cascading.tuple.TupleEntryCollector.add(TupleEntryCollector.java:69)
;; at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
;; at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
;; at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
;; at java.lang.reflect.Method.invoke(Method.java:616)
;; at clojure.lang.Reflector.invokeMatchingMethod(Reflector.java:88)
;; at clojure.lang.Reflector.invokeInstanceMethod(Reflector.java:28)
;; at org.parsimonygroup.function_bootstrap$process_data__12.invoke(function_bootstrap.clj:43)
;; at org.parsimonygroup.function_bootstrap$_operate__19.invoke(function_bootstrap.clj:48)
;; at org.parsimonygroup.FunctionBootstrap.operate(Unknown Source)
;; at cascading.pipe.Each.applyFunction(Each.java:312)
;; at cascading.pipe.Each.access$200(Each.java:50)
;; at cascading.pipe.Each$EachFunctionHandler.handle(Each.java:473)
;; at cascading.pipe.Each$EachHandler.operate(Each.java:408)
;; at cascading.flow.stack.EachMapperStackElement.operateEach(EachMapperStackElement.java:86)
;; at cascading.flow.stack.EachMapperStackElement.collect(EachMapperStackElement.java:79)
;; at cascading.flow.stack.FlowMapperStack.map(FlowMapperStack.java:170)
;; at cascading.flow.FlowMapper.map(FlowMapper.java:75)
;; at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:50)
;; at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:356)
;; at org.apache.hadoop.mapred.MapTask.run(MapTask.java:305)
;; at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:176)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment