Created
March 14, 2010 10:45
-
-
Save mreid/331911 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;; A simple demonstration of several aspects of Clojure: | |
;; Sequences, Java interop, Regular expressions | |
;; Presented at the March 10, 2010 Canberra Java User's Group meeting | |
;; (See http://mark.reid.name/sap/clojure-cjug-talk.html for more info) | |
(ns demo | |
(:import (java.io FileReader BufferedReader))) | |
(defn canonical | |
"Returns a canonical version of word." | |
[word] (.toLowerCase word)) | |
(defn word-seq | |
"Returns a lazy sequence of canonical words from string." | |
[string] (map canonical (re-seq #"\w+" string))) | |
(defn read-words | |
"Returns a lazy sequence of words from the given reader." | |
[reader] (mapcat word-seq (line-seq reader))) | |
(defn update | |
"Returns an updated tally map with incremented count for word." | |
[tally word] | |
(assoc tally word (inc (get tally word 0)))) | |
(defn tally-words | |
"Returns a map of [word count] pairs from the given reader." | |
[reader] (reduce update {} (read-words reader))) | |
(with-open [reader (-> "don_quixote.txt" (FileReader.) (BufferedReader.))] | |
(doseq [[k,v] (take 10 (sort-by (comp - val) (tally-words reader)))] | |
(println k ": " v))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package demo; | |
import java.io.BufferedReader; | |
import java.io.FileReader; | |
import java.io.IOException; | |
import java.util.ArrayList; | |
import java.util.Collections; | |
import java.util.Comparator; | |
import java.util.HashMap; | |
import java.util.List; | |
import java.util.Map; | |
import java.util.Map.Entry; | |
import java.util.regex.Matcher; | |
import java.util.regex.Pattern; | |
/** | |
* Simple demonstration of tallying word counts in Java | |
*/ | |
public class Tally { | |
static final Pattern WORD = Pattern.compile("\\w+"); | |
/** | |
* @param line The input to parse words from. | |
* @return A list of lower-case words found in line. | |
*/ | |
static List<String> words(String line) { | |
List<String> result = new ArrayList<String>(); | |
Matcher matcher = WORD.matcher(line); | |
while(matcher.find()) { | |
String word = line.substring(matcher.start(), matcher.end()); | |
result.add(word.toLowerCase()); | |
} | |
return result; | |
} | |
/** | |
* @param filename The name of the file to open, parse, tally and close. | |
* @return A map of word associated with the number of times they appeared. | |
* @throws IOException | |
*/ | |
static Map<String,Integer> tallyWords(BufferedReader reader) throws IOException { | |
Map<String,Integer> result = new HashMap<String,Integer>(); | |
while(reader.ready()) { | |
String line = reader.readLine(); | |
for(String word : words(line)) { | |
int count = 0; | |
if(result.containsKey(word)) { | |
count = result.get(word); | |
} | |
result.put(word, count+1); | |
} | |
} | |
return result; | |
} | |
/** | |
* Used to sort Map.Entry elements in decreasing order of value. | |
*/ | |
static final class EntryComparator<K,V extends Comparable<V>> implements Comparator<Map.Entry<K,V>> { | |
public int compare(Entry<K, V> o1, Entry<K, V> o2) { | |
return - o1.getValue().compareTo(o2.getValue()); | |
} | |
} | |
public static void main(String[] args) throws Exception { | |
BufferedReader reader = new BufferedReader(new FileReader("don_quixote.txt")); | |
Map<String,Integer> tally = tallyWords(reader); | |
reader.close(); | |
List<Map.Entry<String, Integer>> entries = | |
new ArrayList<Map.Entry<String,Integer>>(tally.entrySet()); | |
Collections.sort(entries, new EntryComparator<String, Integer>()); | |
int index = 0; | |
while(index < 10) { | |
Map.Entry<String, Integer> entry = entries.get(index++); | |
System.out.println(entry.getKey() + ": " + entry.getValue()); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment