Last active
August 29, 2015 14:26
-
-
Save pmrozik/4d7431c966d5386f4a04 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// WordStatistics.java | |
import java.util.*; | |
import java.io.*; | |
import java.util.regex.*; | |
enum Pronoun | |
{ | |
I ("I"), | |
YOU ("you"), | |
SHE ("she"), | |
HE ("he"), | |
THEY ("they"), | |
WE ("we"); | |
String s; | |
Pronoun(String s) | |
{ | |
this.s = s; | |
} | |
String getPronoun() | |
{ return s; } | |
} | |
public class WordStatistics | |
{ | |
String fileName; | |
StringBuilder text = new StringBuilder(); | |
Map<String, Integer> wordMap = new HashMap<String, Integer>(); | |
List<Word> wordList = new ArrayList<Word>(); | |
public static void main(String[] args) | |
{ | |
if(args.length < 1) | |
{ | |
System.out.println("Error: needs at least one argument. \n Usage: java WordStatistics textFileName\n") | |
} | |
WordStatistics ws = new WordStatistics(); | |
ws.fileName = args[0]; | |
try | |
{ | |
ws.read(); | |
ws.addWords(); | |
ws.printSorted(); | |
} | |
catch (IOException e) {e.printStackTrace(); } | |
} | |
void printSorted() | |
{ | |
Collections.sort(wordList); | |
// Print only if word repeats more than five times | |
for(Word w: wordList) | |
{ | |
if(w.getReps() >= 5) | |
System.out.println("[" + w.getWord() + "] : [" + w.getReps() + "]"); | |
} | |
} | |
boolean checkIfPronoun(String word) | |
{ | |
boolean foundPronoun = false; | |
for (Pronoun p : Pronoun.values()) | |
{ | |
if(p.getPronoun().equals(word)) | |
return true; | |
} | |
return false; | |
} | |
// Retrieves and adds words to wordMap | |
void addWords() | |
{ | |
Pattern p = Pattern.compile("[\\w&&\\D]{3,}"); | |
Matcher m = p.matcher(text); | |
// Look for the next word that matches the pattern | |
while(m.find()) | |
{ | |
// Get the word from the last search | |
String word = m.group().toLowerCase(); | |
// Check whether word is a pronoun | |
if(!checkIfPronoun(word)) | |
{ | |
// Check the key value (repetitions) for current word | |
Integer val = wordMap.get(word); | |
// Value is null, add new word to map | |
if(val == null) | |
{ | |
wordMap.put(word, 1); | |
} | |
// Increase the number of repetitions | |
else | |
{ | |
val++; | |
wordMap.put(word, val); | |
} | |
} | |
} | |
// Add words to list | |
for (Map.Entry<String, Integer> entry : wordMap.entrySet()) | |
{ | |
String w = entry.getKey(); | |
Integer r = entry.getValue(); | |
wordList.add(new Word(w, r)); | |
} | |
} | |
void read() throws IOException | |
{ | |
log("Reading from file."); | |
String NL = System.getProperty("line.separator"); | |
Scanner scanner = new Scanner(new FileInputStream(fileName), "utf-8"); | |
try { | |
while (scanner.hasNextLine()){ | |
text.append(scanner.nextLine() + NL); | |
} | |
} | |
finally{ | |
scanner.close(); | |
} | |
} | |
private void log(String aMessage) | |
{ | |
System.out.println(aMessage); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment