Created
August 18, 2011 16:17
-
-
Save kirankulkarni/1154424 to your computer and use it in GitHub Desktop.
Get top 10 frequent words from file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns freq.core | |
(:import (java.io BufferedReader FileReader)) | |
(:require [clojure.string :as string])) | |
(defn read-file-lazy | |
"Opens a file and creates a lazy-sequence to read each line" | |
[file-name] | |
(with-open [reader (BufferedReader. (FileReader. file-name))] | |
(line-seq reader))) | |
(defn read-file | |
"Reads a file using slurp" | |
[file-name] | |
(slurp file-name)) | |
(defn read-words | |
"returns the words in a file" | |
[file-name] | |
(string/split (read-file file-name) #"[ \n]")) | |
(defn update-frequencies | |
"Update frequency of the word" | |
[frequency-map word] | |
(if (contains? frequency-map word) | |
(update-in frequency-map [word] inc) | |
(assoc frequency-map word 1))) | |
(defn calculate-word-frequencies | |
"Given a sequence of words spits out frequency map of each word" | |
[words] | |
(reduce update-frequencies (sorted-map) words)) | |
(defn top-words | |
"Get top 10 frequent words from file" | |
[file-name] | |
(take 10 (reverse (sort-by last (calculate-word-frequencies (read-words file-name)))))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment