Created
May 19, 2013 21:47
-
-
Save powerlim2/5609163 to your computer and use it in GitHub Desktop.
Class for Sentimental Analysis:
It receives sentences and gives you an integer value. 1 - positive, 2 - neutral, 3 - negative, null - error or empty.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.*; | |
import java.util.Hashtable; | |
/** | |
* Created with IntelliJ IDEA. | |
* User: joonhyunglim | |
* Date: 5/18/13 | |
* Time: 8:31 PM | |
* To change this template use File | Settings | File Templates. | |
*/ | |
public class MapReduceSentimentScore { | |
/* | |
OOP class for the sentimental analysis. | |
It takes a sentence and spit out a value (integer number) corresponding to the sentence's sentiment. | |
*/ | |
Hashtable<String,Integer> hashtable; | |
Hashtable<String,String> htstem; | |
String posline, negline, nounline = null; | |
String verbline, adjline, advline = null; | |
int posnum = 0, negnum = 0, nounnum=0, verbnum=0,adjnum=0,advnum=0; | |
public MapReduceSentimentScore() { | |
try{ | |
// get ready for the sentiment analysis; prepare stem dictionaries, positive negative words dictionary. | |
File pos = new File("positive-words.txt"); | |
File neg = new File("negative-words.txt"); | |
File noun = new File("noun.txt"); | |
File verb = new File("verb.txt"); | |
File adv = new File("adv.txt"); | |
File adj = new File("adj.txt"); | |
BufferedReader posin = new BufferedReader(new FileReader(pos)); | |
BufferedReader negin = new BufferedReader(new FileReader(neg)); | |
BufferedReader nounin = new BufferedReader(new FileReader(noun)); | |
BufferedReader verbin = new BufferedReader(new FileReader(verb)); | |
BufferedReader adjin = new BufferedReader(new FileReader(adv)); | |
BufferedReader advin = new BufferedReader(new FileReader(adj)); | |
hashtable = new Hashtable<String,Integer>(); | |
htstem = new Hashtable<String,String>(); | |
// loading those data into hash tables | |
while ((posline = posin.readLine()) != null) { | |
hashtable.put(posline, 1); | |
posnum++; | |
} | |
while ((negline = negin.readLine()) != null) { | |
hashtable.put(negline, -1); | |
negnum++; | |
} | |
while ((nounline = nounin.readLine()) != null) { | |
String tokens[] = nounline.split(" "); | |
htstem.put(tokens[0], tokens[1]); | |
nounnum++; | |
} | |
while ((verbline = verbin.readLine()) != null) { | |
String tokens[] = verbline.split(" "); | |
htstem.put(tokens[0], tokens[1]); | |
verbnum++; | |
} | |
while ((adjline = adjin.readLine()) != null) { | |
String tokens[] = adjline.split(" "); | |
htstem.put(tokens[0], tokens[1]); | |
adjnum++; | |
}while ((advline = advin.readLine()) != null) { | |
String tokens[] = advline.split(" "); | |
htstem.put(tokens[0], tokens[1]); | |
advnum++; | |
} | |
// this code below will print out whether it is ready for the analysis. | |
System.out.println(posnum+" of positive words and "+negnum+" of negative words, "+nounnum+" nouns, "+verbnum+" verbs, "+adjnum+" adjs, "+advnum+" advs are ready for use."); | |
// close the input files | |
posin.close(); | |
negin.close(); | |
nounin.close(); | |
verbin.close(); | |
adjin.close(); | |
advin.close(); | |
} catch (IOException i) { | |
i.printStackTrace(); | |
} | |
} | |
public Integer analysis(String sentence) { | |
// this bracket below is the part for actual analysis. | |
String line = sentence; | |
String[] tweets; | |
int rownum = 0; | |
int pcatchnum = 0; | |
int ncatchnum = 0; | |
if (!line.isEmpty()) { | |
int value = 0; | |
tweets = line.split(" "); | |
for(String i : tweets) { | |
// ignore all none word characters | |
i = i.replaceAll("\\W", ""); | |
/* if we can find it from pos & neg words, then use it. | |
otherwise, try stemming and do it again. | |
*/ | |
if (hashtable.get(i)!=null){ | |
value += hashtable.get(i); | |
} else { // if we cannot, try to stem it and find it again. | |
String stemmed; | |
if(htstem.get(i)!=null){ | |
stemmed = htstem.get(i); | |
// this is not necessary but will give you an idea of which words are being caught by stemming. | |
// System.out.println("**** "+i+" is stemmed"); | |
} else { | |
stemmed = i; | |
} | |
if (hashtable.get(stemmed)!=null){ | |
value += hashtable.get(stemmed); | |
} | |
} | |
} | |
if (value > 0) { | |
return 1; // this implies positive. | |
} else if (value == 0) { | |
return 2; // this implies neutral. | |
} else { | |
return 3; // this implies negative. | |
} | |
} | |
return null; // this implies error! | |
} | |
//} | |
// we can check if this sentiment analysis class actually does its job :) | |
public static void main(String[] args) { | |
int value = 0; | |
String[] sentences = new String[] {"Benedict is a good student?", "Do you know who Benedict is?", "Joon is a reliable person", "is Joon a bad student?", "Samsung is bad phone.", "fjwefjpoewjfopewjfopewjfopewjofjwep"}; | |
MapReduceSentimentScore good = new MapReduceSentimentScore(); | |
for (int i = 0; i < sentences.length; i++) { | |
value = good.analysis(sentences[i]); | |
System.out.println(sentences[i]+" : "+value); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment