Created
September 26, 2018 18:09
-
-
Save bwenzel2/1f1aa7fbb7e6210df70c0cc613d51e09 to your computer and use it in GitHub Desktop.
Java function that counts the number of unique words in a string, not including punctuation.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.util.HashMap; | |
class CountUniqueWords { | |
/* | |
* Given a string, return the number of unique (i.e., non-repeated) words in the string, not including punctuation, in O(n). | |
* Hyphenated words such as "case-sensitive" are considered as one word, as are contractions such as "didn't". | |
* NOTE: this function is NOT case-sensitive, so "Ball" and "ball" are not considered unique if both are in the string. | |
*/ | |
public static int countUniqueWords(String s) { | |
HashMap<String, Integer> h = new HashMap<String, Integer>(); | |
String[] words = s.toLowerCase().replaceAll("[^a-zA-Z1-9\\-\\' ]","").split(" "); | |
//assume every word is unique | |
int count = words.length; | |
//get/put are O(1) in most cases, so this is still O(n) | |
for (String word : words) { | |
if (h.get(word) == null) { | |
h.put(word, 1); | |
} | |
else { | |
h.put(word, (h.get(word) + 1)); | |
} | |
} | |
int uniqueCount = 0; | |
for (String word : words) { | |
if (h.get(word) == 1) { | |
uniqueCount++; | |
} | |
} | |
return uniqueCount; | |
} | |
public static void main(String[] args) { | |
String s = "Someone kicked the ball ball Ball Ball-ball"; | |
System.out.println(CountUniqueWords.countUniqueWords(s)); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment