Created
December 6, 2016 03:03
-
-
Save jamessdixon/2e51e07170d5133b3a9e15385995d16b to your computer and use it in GitHub Desktop.
Age and Sex Analysis Of Microsoft USA MVPs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#r "WindowsBase.dll" | |
#r "System.Net.Http.dll" | |
#r "PresentationCore.dll" | |
#r "../packages/FSharp.Data.2.3.2/lib/net40/FSharp.Data.dll" | |
open System | |
open System.IO | |
open System.Web | |
open System.Net | |
open FSharp.Data | |
open System.Net.Http | |
open System.Threading | |
open System.Net.Http.Headers | |
open System.Windows.Media.Imaging | |
open System.Text.RegularExpressions | |
let getPageContents(pageNumber:int) = | |
let uri = new Uri("http://mvp.microsoft.com/en-us/search-mvp.aspx?lo=United+States&sl=0&browse=False&sc=s&ps=36&pn=" + pageNumber.ToString()) | |
let request = WebRequest.Create(uri) | |
request.Method <- "GET" | |
let response = request.GetResponse() | |
use stream = response.GetResponseStream() | |
use reader = new StreamReader(stream) | |
reader.ReadToEnd() | |
let contents = | |
[|1..19|] | |
|> Array.map(fun i -> getPageContents i) | |
|> Seq.reduce(fun x y -> x + y) | |
let getUrisFromPageContents(pageContents:string) = | |
let pattern = "/PublicProfile/Photo/\d+" | |
let matchCollection = Regex.Matches(pageContents, pattern) | |
matchCollection | |
|> Seq.cast | |
|> Seq.map(fun (m:Match) -> m.Value) | |
|> Seq.map(fun v -> "https://mvp.microsoft.com/en-us" + v + "?language=en-us") | |
|> Seq.toArray | |
let uris = getUrisFromPageContents contents | |
let saveImage uri = | |
use client = new WebClient() | |
let id = Guid.NewGuid() | |
let path = @"F:\Git\ChickenSoftware.ParseMvpPages.Solution\ChickenSoftware.ParseMvpPages\photos\" + id.ToString() + ".jpg" | |
client.DownloadFile(Uri(uri),path) | |
uris | |
|> Seq.iter saveImage | |
let getOxfordResults path = | |
let queryString = HttpUtility.ParseQueryString(String.Empty) | |
queryString.Add("returnFaceId","true") | |
queryString.Add("returnFaceLandmarks","false") | |
queryString.Add("returnFaceAttributes","age,gender") | |
let uri = "https://api.projectoxford.ai/face/v1.0/detect?" + queryString.ToString() | |
let bytes = File.ReadAllBytes(path) | |
let client = new HttpClient() | |
client.DefaultRequestHeaders.Add("Ocp-Apim-Subscription-Key","xxxxxxxxxxx") | |
let response = new HttpResponseMessage() | |
let content = new ByteArrayContent(bytes) | |
content.Headers.ContentType <- MediaTypeHeaderValue("application/octet-stream") | |
let result = client.PostAsync(uri,content).Result | |
Thread.Sleep(TimeSpan.FromSeconds(5.0)) | |
match result.StatusCode with | |
| HttpStatusCode.OK -> Some (result.Content.ReadAsStringAsync().Result) | |
| _ -> None | |
type FaceInfo = JsonProvider<Sample="[{\"faceId\":\"83045097-daa1-4f1c-8669-ed012e9b5975\",\"faceRectangle\":{\"top\":187,\"left\":209,\"width\":214,\"height\":214},\"faceAttributes\":{\"gender\":\"male\",\"age\":42.8}}]"> | |
let parseOxfordResuls results = | |
match results with | |
| Some r -> | |
let face = FaceInfo.Parse(r) | |
match Seq.length face with | |
| 0 -> None | |
| _ -> let header = face |> Seq.head | |
Some(header.FaceAttributes.Age,header.FaceAttributes.Gender) | |
| None -> None | |
#time | |
let results = | |
let path = @"F:\Git\ChickenSoftware.ParseMvpPages.Solution\ChickenSoftware.ParseMvpPages\photos" | |
Directory.GetFiles(path) | |
|> Array.map(fun f -> getOxfordResults f) | |
|> Array.map(fun r -> parseOxfordResuls r) | |
#r "../packages/MathNet.Numerics.3.13.1/lib/net40/MathNet.Numerics.dll" | |
#r "../packages/MathNet.Numerics.FSharp.3.13.1/lib/net40/MathNet.Numerics.FSharp.dll" | |
open MathNet.Numerics.Statistics | |
Seq.length results | |
let ages = | |
results | |
|> Seq.filter(fun r -> r.IsSome) | |
|> Seq.map(fun o -> fst o.Value) | |
|> Seq.map(fun a -> float a) | |
let stats = new DescriptiveStatistics(ages) | |
let count = stats.Count | |
let largest = stats.Maximum | |
let smallest = stats.Minimum | |
let mean = stats.Mean | |
let median = Statistics.Median(ages) | |
let variance = stats.Variance | |
let standardDeviation = stats.StandardDeviation | |
let kurtosis = stats.Kurtosis | |
let skewness = stats.Skewness | |
let lowerQuartile = Statistics.LowerQuartile(ages) | |
let uppserQuartile = Statistics.UpperQuartile(ages) | |
let histogram = new Histogram(ages,10) | |
[0..9] | |
|> Seq.mapi(fun i v -> histogram.Item(i).LowerBound, histogram.Item(i).UpperBound, histogram.Item(i).Count) | |
|> Seq.iter(fun (u,l,v) -> printfn "%f-%f %f" u l v) | |
#r "../packages/FSharp.Charting.0.90.14/lib/net40/FSharp.Charting.dll" | |
open FSharp.Charting | |
let chart = Chart.Histogram(ages,Intervals=10.0) | |
Chart.Show(chart) | |
let gender = | |
results | |
|> Seq.filter(fun r -> r.IsSome) | |
|> Seq.map(fun o -> snd o.Value) | |
gender | |
|> Seq.countBy(fun v -> v) | |
|> Seq.map(fun (g,c) -> g, c, float c/float count) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment