Skip to content

Instantly share code, notes, and snippets.

@lotz84
Forked from shotarok/multi_vectorizer.py
Last active August 29, 2015 14:10
Show Gist options
  • Save lotz84/d0437573405ea58a481f to your computer and use it in GitHub Desktop.
Save lotz84/d0437573405ea58a481f to your computer and use it in GitHub Desktop.
(import concurrent.futures)
(import sys)
(import subprocess)
(import time)
(import [collections [defaultdict]])
(import [data [sentences]])
(defn sent2kgram [sentence &optional [k (int "2")] [delimiter " "]]
(setv sentence (str (.getoutput subprocess (.format "echo {0} | mecab -O wakati" sentence))))
(setv words (.split (.strip sentence)))
(setv length (len words))
(setv kgrams [])
(for [i (range (- length k))]
(setv kgram (.join delimiter (slice words i (+ i k))))
(.append kgrams kgram))
kgrams)
(defn timef [f]
(defn wrapper []
(setv start (.time time))
(f)
(setv end (.time time))
(print (.format "{0}: {1:.2f}ms" f.__name__ (* (int "1000") (- end start)))))
wrapper)
@timef
(defn single_process []
(setv kgram2id (defaultdict (fn [] (len kgram2id))))
(for [sentence sentences]
(setv kgram (sent2kgram sentence))
(setv kgram (map (fn [x] (get kgram2id x)) kgram)))
kgram2id)
@timef
(defn multi_process []
(setv kgram2id (defaultdict (fn [] (len kgram2id))))
(with [[executor (.ProcessPoolExecutor concurrent.futures)]]
(for [res_kgram (.map executor sent2kgram sentences)]
(setv res_kgram (map (fn [x] (get kgram2id x)) res_kgram))))
kgram2id)
(defn main []
(single_process)
(multi_process))
(if (= __name__ "__main__") (main))
@lotz84
Copy link
Author

lotz84 commented Dec 5, 2014

$ hy2py multi_vectorizer.hy > multi_vectorizer.py
$ python3 multi_vectorizer.py

Maybe, you have to delete a head line of multi_vectorizer.py. That is the line of importing hy module.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment