-
-
Save caindy/5cd5ee279861de6dd92d to your computer and use it in GitHub Desktop.
Prototyping for What's Cooking Kaggle competition
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// link to competition: https://www.kaggle.com/c/whats-cooking/ | |
//reference deelde with fsharp charting | |
#r "../packages/Deedle.1.2.4/lib/net40/Deedle.dll" | |
#r "../packages/FSharp.Charting.0.90.12/lib/net40/FSharp.Charting.dll" | |
#I "../packages/FSharp.Charting.0.90.12" | |
#load "FSharp.Charting.fsx" | |
#r "../packages/Newtonsoft.Json.7.0.1/lib/net45/Newtonsoft.Json.dll" | |
open System | |
open System.IO | |
open System.Collections.Generic | |
open Newtonsoft.Json | |
open FSharp.Charting | |
open Deedle | |
type Receipt = | |
{ id : int | |
cuisine : string | |
ingredients : string array } | |
// read data from file to memory | |
let path = __SOURCE_DIRECTORY__ + "../../data/What's Cooking/train.json" | |
let sr = new StreamReader(path) | |
let json = JsonConvert.DeserializeObject<Receipt array>(sr.ReadToEnd()) | |
let classificationCollection = new Dictionary<string, Dictionary<string, int>>() | |
json |> Array.iter (fun receipt -> | |
// truncate all ingredients | |
receipt.ingredients |> Array.iter (fun ing -> | |
match classificationCollection.ContainsKey(ing) with | |
| true -> | |
// check cuisine | |
match classificationCollection.[ing].ContainsKey(receipt.cuisine) with | |
| true -> | |
classificationCollection.[ing].[receipt.cuisine] <- classificationCollection.[ing].[receipt.cuisine] | |
+ 1 | |
| _ -> classificationCollection.[ing].Add(receipt.cuisine, 1) | |
| _ -> | |
let cuisineDict = new Dictionary<string, int>() | |
cuisineDict.Add(receipt.cuisine, 1) | |
classificationCollection.Add(ing, cuisineDict))) | |
// open test collection | |
let pathTest = __SOURCE_DIRECTORY__ + "../../data/What's Cooking/test.json" | |
let srTest = new StreamReader(pathTest) | |
let jsonTest = JsonConvert.DeserializeObject<Receipt array>(srTest.ReadToEnd()) | |
// clasify | |
let clasify (ingredients : string array) (collection : Dictionary<string, Dictionary<string, int>>) = | |
let outcome = new Dictionary<string, double>() | |
ingredients |> Array.iter (fun ing -> | |
match collection.ContainsKey(ing) with | |
| true -> | |
let sum = | |
collection.[ing].Values | |
|> Seq.sum | |
|> double | |
collection.[ing] |> Seq.iter (fun (KeyValue(k, v)) -> | |
match outcome.ContainsKey(k) with | |
| true -> | |
outcome.[k] <- outcome.[k] + (double collection.[ing].[k]) / sum | |
| _ -> outcome.Add(k, (double collection.[ing].[k]) / sum)) | |
| _ -> 0 |> ignore) | |
outcome | |
|> Seq.sortByDescending (fun (KeyValue(_, v)) -> v) | |
|> Seq.map (fun (KeyValue(k, v)) -> (k, v)) | |
|> Seq.head | |
|> fst | |
let outcomePath = __SOURCE_DIRECTORY__ + "../../data/What's Cooking/submission.csv" | |
let swSubmission = new StreamWriter(outcomePath, false) | |
swSubmission.WriteLine("id,cuisine") | |
jsonTest |> Array.iter (fun rep -> | |
let outcome = clasify rep.ingredients classificationCollection | |
printfn "Id: %A" rep.id | |
printfn "Outcome: %A" outcome | |
printfn "" | |
swSubmission.WriteLine(rep.id.ToString() + "," + outcome) |> ignore) | |
swSubmission.Flush() | |
swSubmission.Close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment