Created
January 22, 2016 19:29
-
-
Save sudipto80/6121c3eb47698c4f3b4c to your computer and use it in GitHub Desktop.
Sentiment Analysis
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
open System.Text.RegularExpressions | |
type SentiWordNetEntry = {POS:string; ID:string; PositiveScore:string; NegativeScore:string; Words:string} | |
let sentiWordList = System.IO.File.ReadAllLines(@"SentiWordNet_3.0.0_20130122.txt") | |
|> Array.filter (fun line -> not (line.StartsWith("#"))) | |
|> Array.map (fun line -> line.Split '\t') | |
|> Array.map (fun lineTokens -> {POS = lineTokens.[0]; | |
ID = lineTokens.[1]; | |
PositiveScore = lineTokens.[2].Trim(); | |
NegativeScore = lineTokens.[3].Trim(); | |
Words = lineTokens.[4]}) | |
|> Array.map(fun item -> [item.Words.Substring(0,item.Words.LastIndexOf('#')+1); | |
item.PositiveScore;item.NegativeScore]) | |
//let getPolarity (sentiWordNetList:string list[]) (word:string) = | |
// let matchedItem = sentiWordNetList | |
// |> Array.filter(fun item -> item.[0].Contains (word)) | |
// | |
// match matchedItem.Length with | |
// | 0 -> (0.0,0.0)//No value found | |
// //There can be multiple match; picking the first one (i.e: matchedItem.[0]) | |
// | _ -> (float matchedItem.[0].[1], float matchedItem.[0].[2]) | |
let getPolarityScore (sentence:string) (sentiWordNetList:string list[]) = | |
let words = sentence.Split ' ' | |
let mutable totalPositivity = 0.0 | |
let mutable totalNegativity = 0.0 | |
let polarities = words | |
|> Array.map(fun word -> getPolarity sentiWordNetList word) | |
polarities | |
|> Array.map (fun polarity -> totalPositivity <- totalPositivity + fst polarity) | |
|> ignore | |
polarities | |
|> Array.map (fun polarity -> totalNegativity <- totalNegativity + snd polarity) | |
|> ignore | |
if totalPositivity > totalNegativity then 1 //Positive polarity | |
elif totalNegativity = totalPositivity then 0 //Neutral polarity | |
else -1 //Negative polarity | |
//Finding polarities of the sentences using SentiWordNet | |
getPolarityScore "I am loving this product.I thought that the camera will be much better" sentiWordList | |
getPolarityScore "don't buy this drug . it gave me a bummer" sentiWordList //negative | |
let allPositiveWords (sentiWordNetList:string list[])= | |
sentiWordNetList | |
|> Array.filter(fun sentiWord -> float sentiWord.[1] > float sentiWord.[2]) | |
|> Array.map (fun sentiWord -> sentiWord.[0]) | |
let allNegativeWords (sentiWordNetList:string list[])= | |
sentiWordNetList | |
|> Array.filter(fun sentiWord -> float sentiWord.[1] < float sentiWord.[2]) | |
|> Array.map (fun sentiWord -> sentiWord.[0]) | |
let positiveWords = allPositiveWords sentiWordList |> Array.toList | |
let negativeWords = allNegativeWords sentiWordList |> Array.toList | |
let delims = [|'#';' '|] | |
let pos = positiveWords |> List.map (fun t -> t.Split delims | |
|> Array.filter(fun z -> Regex.Match(z,"[a-zA-Z]+").Success)) | |
let neg = negativeWords |> List.map (fun t -> t.Split delims | |
|> Array.filter(fun z -> Regex.Match(z,"[a-zA-Z]+").Success)) | |
let mutable posList = [""] | |
let mutable negList = [""] | |
pos |> List.map (fun current -> [for k in 0 .. current.Length - 1 -> | |
posList <- posList @ [current.[k]]]) | |
|> ignore | |
neg |> List.map (fun current -> [for k in 0 .. current.Length - 1 -> | |
negList <- negList @ [current.[k]]]) | |
|> ignore | |
posList <- posList |> List.filter (fun word -> word.Length > 0) | |
negList <- negList |> List.filter (fun word -> word.Length > 0) | |
let getPolarity (sentiWordNetList:string list[]) (word:string) = | |
let wordWithHash = String.concat "" [word; "#"] | |
let wordWithLeadingBlankAndHash = String.concat "" [" ";wordWithHash] | |
let matchedItem = sentiWordNetList | |
|> Array.filter(fun item -> item.[0].ToString().StartsWith(wordWithHash) | |
|| item.[0].ToString().Contains wordWithLeadingBlankAndHash) | |
match matchedItem.Length with | |
| 0 -> if word = "Negative_detected" then (0.0,0.675) | |
elif word = "Ok_detected" then (0.125,0.0) | |
else (0.0,0.0)//No value found | |
//There can be multiple match | |
| _ -> (float matchedItem.[0].[1], float matchedItem.[0].[2]) | |
let negations = ["no";"not";"never";"seldom";"neither";"nor"] | |
let badCombos = negations | |
|> List.collect (fun x -> posList |> List.map (fun y -> x + " " + y)) | |
let okCombos = negations | |
|> List.collect (fun x -> negList |> List.map (fun y -> x + " " + y)) | |
let mutable sen = "the camera of the phone was not amazing" | |
badCombos |> List.map (fun badWordCombo -> sen <- Regex.Replace (sen, badWordCombo,"Negative_detected")) | |
|> ignore | |
okCombos |> List.map (fun badWordCombo -> sen <- Regex.Replace (sen, badWordCombo,"Ok_detected")) | |
|> ignore | |
let prob list word = | |
let matchCount = list |> List.filter (fun z -> z |> List.contains word) | |
|> List.length |> float | |
matchCount / float list.Length | |
let probBoth list w1 w2 = | |
let matchCount = list |>List.filter (fun z -> z |> List.contains w1 && z |> List.contains w2 ) | |
|>List.length |> float | |
matchCount / float list.Length | |
let pmi docs w1 w2 = | |
let numerator = probBoth docs w1 w2 | |
let denominator = (prob docs w1) * (prob docs w2) | |
if denominator > 0.0 && numerator > 0.0 then log (numerator / denominator) else 0.0 | |
// | |
//List of positive words | |
let pWords = ["good"; "nice"; "excellent"; "positive"; "fortunate"; | |
"correct"; "superior"] | |
//List of negative words | |
let nWords = ["bad"; "nasty"; "poor"; "negative"; "unfortunate"; "wrong"; "inferior"] | |
let mutable posi = 0.0 //Total positive semantic orientation | |
let mutable negi = 0.0 //Total negative semantic orientation | |
let docs =[ | |
[["positive";"outlook"];["good";"service"];["nice";"people"];["bad";"location"]];//Bank1 | |
[["nasty";"behaviour"];["unfortunate"; "outcome"];["poor";"quality"]]//Bank2 | |
] | |
for i in 0 .. docs.Length - 1 do | |
for j in 0 .. docs.[i].Length - 1 do | |
for pw in pWords do | |
posi <- posi + pmi docs docs.[i].[j] pw | |
for i in 0 .. docs.Length - 1 do | |
for j in 0 .. docs.[i].Length - 1 do | |
for pw in nWords do | |
negi <- negi + pmi docs docs.[i].[j] pw | |
let so_pmi = posi - negi //Calculating semantic orientation's value | |
let calculateSO (docs:string list list)(words:string list)= | |
let mutable res = 0.0 | |
for i in 0 .. docs.Length - 1 do | |
for j in 0 .. docs.[i].Length - 1 do | |
for pw in words do | |
res <- res + pmi docs docs.[i].[j] pw | |
res | |
let soPMI ( reviews : string list list list )= | |
let mutable posi = 0.0 | |
let mutable negi = 0.0 | |
reviews |> List.map (fun docs -> | |
posi <- calculateSO docs pWords | |
negi <- calculateSO docs nWords | |
(docs, posi - negi)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment