Last active
October 30, 2015 18:31
-
-
Save MartinBodocky/b2f44d70f6f8d6a869a0 to your computer and use it in GitHub Desktop.
Prototyping for What's Cooking Kaggle competition
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// link to competition: https://www.kaggle.com/c/whats-cooking/ | |
//reference deelde with fsharp charting | |
#r "../packages/Deedle.1.2.4/lib/net40/Deedle.dll" | |
#r "../packages/FSharp.Charting.0.90.12/lib/net40/FSharp.Charting.dll" | |
#I "../packages/FSharp.Charting.0.90.12" | |
#load "FSharp.Charting.fsx" | |
#r "../packages/Newtonsoft.Json.7.0.1/lib/net45/Newtonsoft.Json.dll" | |
open System | |
open System.IO | |
open System.Collections.Generic | |
open Newtonsoft.Json | |
open FSharp.Charting | |
open Deedle | |
type Receipt = | |
{ id : int | |
cuisine : string | |
ingredients : string array } | |
// read data from file to memory | |
let path = __SOURCE_DIRECTORY__ + "../../data/What's Cooking/train.json" | |
let sr = new StreamReader(path) | |
let json = JsonConvert.DeserializeObject<Receipt array>(sr.ReadToEnd()) | |
let classificationCollection = new Dictionary<string, Dictionary<string, int>>() | |
json |> Array.iter (fun receipt -> | |
// truncate all ingredients | |
receipt.ingredients |> Array.iter (fun ing -> | |
match classificationCollection.ContainsKey(ing) with | |
| true -> | |
// check cuisine | |
match classificationCollection.[ing].ContainsKey(receipt.cuisine) with | |
| true -> | |
classificationCollection.[ing].[receipt.cuisine] <- classificationCollection.[ing].[receipt.cuisine] | |
+ 1 | |
| _ -> classificationCollection.[ing].Add(receipt.cuisine, 1) | |
| _ -> | |
let cuisineDict = new Dictionary<string, int>() | |
cuisineDict.Add(receipt.cuisine, 1) | |
classificationCollection.Add(ing, cuisineDict))) | |
// open test collection | |
let pathTest = __SOURCE_DIRECTORY__ + "../../data/What's Cooking/test.json" | |
let srTest = new StreamReader(pathTest) | |
let jsonTest = JsonConvert.DeserializeObject<Receipt array>(srTest.ReadToEnd()) | |
// clasify | |
let clasify (ingredients : string array) (collection : Dictionary<string, Dictionary<string, int>>) = | |
let outcome = new Dictionary<string, double>() | |
ingredients |> Array.iter (fun ing -> | |
match collection.ContainsKey(ing) with | |
| true -> | |
let sum = | |
collection.[ing].Values | |
|> Seq.sum | |
|> double | |
collection.[ing] |> Seq.iter (fun (KeyValue(k, v)) -> | |
match outcome.ContainsKey(k) with | |
| true -> | |
outcome.[k] <- outcome.[k] + (double collection.[ing].[k]) / sum | |
| _ -> outcome.Add(k, (double collection.[ing].[k]) / sum)) | |
| _ -> 0 |> ignore) | |
outcome | |
|> Seq.sortByDescending (fun (KeyValue(_, v)) -> v) | |
|> Seq.map (fun (KeyValue(k, v)) -> (k, v)) | |
|> Seq.head | |
|> fst | |
let outcomePath = __SOURCE_DIRECTORY__ + "../../data/What's Cooking/submission.csv" | |
let swSubmission = new StreamWriter(outcomePath, false) | |
swSubmission.WriteLine("id,cuisine") | |
jsonTest |> Array.iter (fun rep -> | |
let outcome = clasify rep.ingredients classificationCollection | |
printfn "Id: %A" rep.id | |
printfn "Outcome: %A" outcome | |
printfn "" | |
swSubmission.WriteLine(rep.id.ToString() + "," + outcome) |> ignore) | |
swSubmission.Flush() | |
swSubmission.Close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment