Skip to content

Instantly share code, notes, and snippets.

@mreid
Created March 14, 2010 10:45
Show Gist options
  • Save mreid/331911 to your computer and use it in GitHub Desktop.
Save mreid/331911 to your computer and use it in GitHub Desktop.
;; A simple demonstration of several aspects of Clojure:
;; Sequences, Java interop, Regular expressions
;; Presented at the March 10, 2010 Canberra Java User's Group meeting
;; (See http://mark.reid.name/sap/clojure-cjug-talk.html for more info)
(ns demo
(:import (java.io FileReader BufferedReader)))
(defn canonical
"Returns a canonical version of word."
[word] (.toLowerCase word))
(defn word-seq
"Returns a lazy sequence of canonical words from string."
[string] (map canonical (re-seq #"\w+" string)))
(defn read-words
"Returns a lazy sequence of words from the given reader."
[reader] (mapcat word-seq (line-seq reader)))
(defn update
"Returns an updated tally map with incremented count for word."
[tally word]
(assoc tally word (inc (get tally word 0))))
(defn tally-words
"Returns a map of [word count] pairs from the given reader."
[reader] (reduce update {} (read-words reader)))
(with-open [reader (-> "don_quixote.txt" (FileReader.) (BufferedReader.))]
(doseq [[k,v] (take 10 (sort-by (comp - val) (tally-words reader)))]
(println k ": " v)))
package demo;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Simple demonstration of tallying word counts in Java
*/
public class Tally {
static final Pattern WORD = Pattern.compile("\\w+");
/**
* @param line The input to parse words from.
* @return A list of lower-case words found in line.
*/
static List<String> words(String line) {
List<String> result = new ArrayList<String>();
Matcher matcher = WORD.matcher(line);
while(matcher.find()) {
String word = line.substring(matcher.start(), matcher.end());
result.add(word.toLowerCase());
}
return result;
}
/**
* @param filename The name of the file to open, parse, tally and close.
* @return A map of word associated with the number of times they appeared.
* @throws IOException
*/
static Map<String,Integer> tallyWords(BufferedReader reader) throws IOException {
Map<String,Integer> result = new HashMap<String,Integer>();
while(reader.ready()) {
String line = reader.readLine();
for(String word : words(line)) {
int count = 0;
if(result.containsKey(word)) {
count = result.get(word);
}
result.put(word, count+1);
}
}
return result;
}
/**
* Used to sort Map.Entry elements in decreasing order of value.
*/
static final class EntryComparator<K,V extends Comparable<V>> implements Comparator<Map.Entry<K,V>> {
public int compare(Entry<K, V> o1, Entry<K, V> o2) {
return - o1.getValue().compareTo(o2.getValue());
}
}
public static void main(String[] args) throws Exception {
BufferedReader reader = new BufferedReader(new FileReader("don_quixote.txt"));
Map<String,Integer> tally = tallyWords(reader);
reader.close();
List<Map.Entry<String, Integer>> entries =
new ArrayList<Map.Entry<String,Integer>>(tally.entrySet());
Collections.sort(entries, new EntryComparator<String, Integer>());
int index = 0;
while(index < 10) {
Map.Entry<String, Integer> entry = entries.get(index++);
System.out.println(entry.getKey() + ": " + entry.getValue());
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment