Skip to content

Instantly share code, notes, and snippets.

@pmrozik
Last active August 29, 2015 14:26
Show Gist options
  • Save pmrozik/4d7431c966d5386f4a04 to your computer and use it in GitHub Desktop.
Save pmrozik/4d7431c966d5386f4a04 to your computer and use it in GitHub Desktop.
// WordStatistics.java
import java.util.*;
import java.io.*;
import java.util.regex.*;
enum Pronoun
{
I ("I"),
YOU ("you"),
SHE ("she"),
HE ("he"),
THEY ("they"),
WE ("we");
String s;
Pronoun(String s)
{
this.s = s;
}
String getPronoun()
{ return s; }
}
public class WordStatistics
{
String fileName;
StringBuilder text = new StringBuilder();
Map<String, Integer> wordMap = new HashMap<String, Integer>();
List<Word> wordList = new ArrayList<Word>();
public static void main(String[] args)
{
if(args.length < 1)
{
System.out.println("Error: needs at least one argument. \n Usage: java WordStatistics textFileName\n")
}
WordStatistics ws = new WordStatistics();
ws.fileName = args[0];
try
{
ws.read();
ws.addWords();
ws.printSorted();
}
catch (IOException e) {e.printStackTrace(); }
}
void printSorted()
{
Collections.sort(wordList);
// Print only if word repeats more than five times
for(Word w: wordList)
{
if(w.getReps() >= 5)
System.out.println("[" + w.getWord() + "] : [" + w.getReps() + "]");
}
}
boolean checkIfPronoun(String word)
{
boolean foundPronoun = false;
for (Pronoun p : Pronoun.values())
{
if(p.getPronoun().equals(word))
return true;
}
return false;
}
// Retrieves and adds words to wordMap
void addWords()
{
Pattern p = Pattern.compile("[\\w&&\\D]{3,}");
Matcher m = p.matcher(text);
// Look for the next word that matches the pattern
while(m.find())
{
// Get the word from the last search
String word = m.group().toLowerCase();
// Check whether word is a pronoun
if(!checkIfPronoun(word))
{
// Check the key value (repetitions) for current word
Integer val = wordMap.get(word);
// Value is null, add new word to map
if(val == null)
{
wordMap.put(word, 1);
}
// Increase the number of repetitions
else
{
val++;
wordMap.put(word, val);
}
}
}
// Add words to list
for (Map.Entry<String, Integer> entry : wordMap.entrySet())
{
String w = entry.getKey();
Integer r = entry.getValue();
wordList.add(new Word(w, r));
}
}
void read() throws IOException
{
log("Reading from file.");
String NL = System.getProperty("line.separator");
Scanner scanner = new Scanner(new FileInputStream(fileName), "utf-8");
try {
while (scanner.hasNextLine()){
text.append(scanner.nextLine() + NL);
}
}
finally{
scanner.close();
}
}
private void log(String aMessage)
{
System.out.println(aMessage);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment