realgenekim · June 9, 2020 15:06
diff --git a/split-and-count.cljs b/split-and-count.cljs
 ;; using lumo for ClojureScript: https://github.com/anmonteiro/lumo
 ;;
 ;; https://www.wordclouds.com/
 ;;    - size: 1920x1080
 ;;    - size is typically around -45
 ;;    - shape: rectangle
 ;;    - theme: black on white
 ;;    - font: Ariel
 ;;
 ;; PS: Over the last 30 years, I've probably written this program over
 ;;     20 times in awk, perl, python, ruby... this was the first time writing
 ;;     it in Clojure, my new favorite language.  This time around, I used Lumo,
 ;;     which feels much more like a scripting language than any other ClojureScript
 ;;     implementation.  Highly recommended!
 ;;
 ;;     There's another ClojureScript scripting alternative called Planck, which
 ;;     also looks fantastic: http://planck-repl.org/


 (ns split.core
  (:require [clojure.string :as str]
            [cljs.nodejs :as nodejs]
            [lumo.util :as util :refer [file-seq line-seq distinct-by]]
            [goog.string :as gstr])
  (:import [goog.string format StringBuffer]))

 (nodejs/enable-util-print!)

 ; helper function to check for presence of value in vector
 (defn in? 
  "true if coll contains elem"
  [coll elem]  
  (some #(= elem %) coll))

 ; words to filter out of list
 (def common-words (util/line-seq "commonwords.txt"))

 ; source text
 (def lines (util/line-seq "module6.txt"))

 (println "# lines: " (count lines))

 (def wlines (map #(clojure.string/split % #"[\s.,]+") lines))
 (def words (map clojure.string/lower-case (flatten wlines)))

 (def sorted-words (sort-by second > (frequencies words)))

 (def top-words (filter #(>= (second %) 1) sorted-words))

 (def top-new-words (filter #(not (in? common-words (first %))) top-words))

 (def top-words (take 250 top-new-words))

 (def out (map #(gstr/format "%d %s" (second %) (first %)) top-words))

 (doseq [o out]
  (println o))
	;; using lumo for ClojureScript: https://github.com/anmonteiro/lumo
	;;
	;; https://www.wordclouds.com/
	;; - size: 1920x1080
	;; - size is typically around -45
	;; - shape: rectangle
	;; - theme: black on white
	;; - font: Ariel
	;;
	;; PS: Over the last 30 years, I've probably written this program over
	;; 20 times in awk, perl, python, ruby... this was the first time writing
	;; it in Clojure, my new favorite language. This time around, I used Lumo,
	;; which feels much more like a scripting language than any other ClojureScript
	;; implementation. Highly recommended!
	;;
	;; There's another ClojureScript scripting alternative called Planck, which
	;; also looks fantastic: http://planck-repl.org/


	(ns split.core
	(:require [clojure.string :as str]
	[cljs.nodejs :as nodejs]
	[lumo.util :as util :refer [file-seq line-seq distinct-by]]
	[goog.string :as gstr])
	(:import [goog.string format StringBuffer]))

	(nodejs/enable-util-print!)

	; helper function to check for presence of value in vector
	(defn in?
	"true if coll contains elem"
	[coll elem]
	(some #(= elem %) coll))

	; words to filter out of list
	(def common-words (util/line-seq "commonwords.txt"))

	; source text
	(def lines (util/line-seq "module6.txt"))

	(println "# lines: " (count lines))

	(def wlines (map #(clojure.string/split % #"[\s.,]+") lines))
	(def words (map clojure.string/lower-case (flatten wlines)))

	(def sorted-words (sort-by second > (frequencies words)))

	(def top-words (filter #(>= (second %) 1) sorted-words))

	(def top-new-words (filter #(not (in? common-words (first %))) top-words))

	(def top-words (take 250 top-new-words))

	(def out (map #(gstr/format "%d %s" (second %) (first %)) top-words))

	(doseq [o out]
	(println o))