Skip to content

Instantly share code, notes, and snippets.

@isaacabraham
Last active October 4, 2016 01:20
Show Gist options
  • Save isaacabraham/636df8e6a691df1c20d1 to your computer and use it in GitHub Desktop.
Save isaacabraham/636df8e6a691df1c20d1 to your computer and use it in GitHub Desktop.
#I @"..\packages\"
#r @"FSharp.Data\lib\net40\FSharp.Data.dll"
#load @"FSPlot\FsPlotBootstrap.fsx"
#r @"Deedle\lib\net40\Deedle.dll"
do fsi.AddPrinter(fun (printer:Deedle.Internal.IFsiFormattable) -> "\n" + (printer.Format()))
open Deedle
open FSharp.Data
open System
type Twitter = CsvProvider< @"C:\Users\Isaac\Downloads\fsharp_2013-2014.csv">
let data = Twitter.GetSample()
let remove removeIt (text:string) = text.Replace(removeIt, "")
let getName = remove ":"
let isMention (word:string) = word.StartsWith "@"
let (|Retweet|_|) =
function
| (tweet:Twitter.Row) when tweet.Text.StartsWith "RT" ->
Some <| Retweet(tweet, ((tweet.Text.Split ' ').[1] |> getName))
| tweet -> None
let getMentions (tweet:Twitter.Row) =
tweet.Text.Split ' '
|> Seq.filter isMention
|> Seq.map (fun word -> (word.Split ' ').[0] |> getName)
|> Seq.toArray
module Seq =
let summarise counter =
Seq.countBy counter
>> Seq.sortBy (fun (n, count) -> -count)
>> Seq.toArray
let toFrame (items:seq<_>) =
items
|> Frame.ofRecords
|> Frame.indexColsWith [ "Key"; "Count" ]
|> Frame.indexRowsString "Key"
|> Frame.sortRowsWith "Count" (fun _ c -> -c)
/// Who is mentioned the most
let mentions =
data.Rows
|> Seq.collect getMentions
|> Seq.summarise id
/// Who is retweeted the most
let retweets =
data.Rows
|> Seq.choose(function
| Retweet details -> Some details
| _ -> None)
|> Seq.summarise snd
/// Who tweets the most
let tweeters = data.Rows |> Seq.summarise (fun t -> t.FromUserScreenName)
let stopwords = [ ""; "rt"; "-"; "&amp;"; "will"; "just"; "amp"; "a";"about";"above";"after";"again";"against";"all";"am";"an";"and";"any";"are";"aren't";"as";"at";"be";"because";"been";"before";"being";"below";"between";"both";"but";"by";"can't";"cannot";"could";"couldn't";"did";"didn't";"do";"does";"doesn't";"doing";"don't";"down";"during";"each";"few";"for";"from";"further";"had";"hadn't";"has";"hasn't";"have";"haven't";"having";"he";"he'd";"he'll";"he's";"her";"here";"here's";"hers";"herself";"him";"himself";"his";"how";"how's";"i";"i'd";"i'll";"i'm";"i've";"if";"in";"into";"is";"isn't";"it";"it's";"its";"itself";"let's";"me";"more";"most";"mustn't";"my";"myself";"no";"nor";"not";"of";"off";"on";"once";"only";"or";"other";"ought";"our";"ours";"ourselves";"out";"over";"own";"same";"shan't";"she";"she'd";"she'll";"she's";"should";"shouldn't";"so";"some";"such";"than";"that";"that's";"the";"their";"theirs";"them";"themselves";"then";"there";"there's";"these";"they";"they'd";"they'll";"they're";"they've";"this";"those";"through";"to";"too";"under";"until";"up";"very";"was";"wasn't";"we";"we'd";"we'll";"we're";"we've";"were";"weren't";"what";"what's";"when";"when's";"where";"where's";"which";"while";"who";"who's";"whom";"why";"why's";"with";"won't";"would";"wouldn't";"you";"you'd";"you'll";"you're";"you've";"your";"yours";"yourself";"yourselves";] |> Set.ofList
let isHashtag (word:string) = word.StartsWith "#"
let isLongerThan length (word:string) = word.Length > length
let words =
data.Rows
|> Seq.choose(fun tweet ->
match tweet with
| Retweet details -> None
| _ -> Some tweet)
|> Seq.map(fun tweet -> tweet.Text)
|> Seq.collect(fun text -> text.Split ' ')
|> Seq.map(fun word -> word.ToLower())
let composeRules rules word = rules |> Seq.forall(fun rule -> rule word)
/// Most popular words
let popularWords =
let isPopularWord =
composeRules
[ not << stopwords.Contains
not << isMention
not << isHashtag
fun word -> word.ToCharArray() |> Array.forall(Char.IsLetter)
isLongerThan 3 ]
words
|> Seq.filter isPopularWord
|> Seq.summarise id
/// Most popular hashtags
let hashtags =
let isValidHashTag = composeRules [ isHashtag; isLongerThan 1 ]
words
|> Seq.map(fun word -> String(word.ToCharArray() |> Array.filter(fun c -> Char.IsLetter c || c = '#')))
|> Seq.filter isValidHashTag
|> Seq.summarise id
open FsPlot.Highcharts.Charting
mentions |> Seq.take 15 |> Chart.Column |> Chart.WithTitle "Mentions"
retweets |> Seq.take 15 |> Chart.Pie |> Chart.WithTitle "Retweets"
popularWords |> Seq.take 15 |> Chart.Column |> Chart.WithTitle "Words"
hashtags |> Seq.skip 1 |> Seq.take 15 |> Chart.Pie |> Chart.WithTitle "Hashtags"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment