Skip to content

Instantly share code, notes, and snippets.

@MartinBodocky
Last active October 30, 2015 18:31
Show Gist options
  • Save MartinBodocky/b2f44d70f6f8d6a869a0 to your computer and use it in GitHub Desktop.
Save MartinBodocky/b2f44d70f6f8d6a869a0 to your computer and use it in GitHub Desktop.
Prototyping for What's Cooking Kaggle competition
// link to competition: https://www.kaggle.com/c/whats-cooking/
//reference deelde with fsharp charting
#r "../packages/Deedle.1.2.4/lib/net40/Deedle.dll"
#r "../packages/FSharp.Charting.0.90.12/lib/net40/FSharp.Charting.dll"
#I "../packages/FSharp.Charting.0.90.12"
#load "FSharp.Charting.fsx"
#r "../packages/Newtonsoft.Json.7.0.1/lib/net45/Newtonsoft.Json.dll"
open System
open System.IO
open System.Collections.Generic
open Newtonsoft.Json
open FSharp.Charting
open Deedle
type Receipt =
{ id : int
cuisine : string
ingredients : string array }
// read data from file to memory
let path = __SOURCE_DIRECTORY__ + "../../data/What's Cooking/train.json"
let sr = new StreamReader(path)
let json = JsonConvert.DeserializeObject<Receipt array>(sr.ReadToEnd())
let classificationCollection = new Dictionary<string, Dictionary<string, int>>()
json |> Array.iter (fun receipt ->
// truncate all ingredients
receipt.ingredients |> Array.iter (fun ing ->
match classificationCollection.ContainsKey(ing) with
| true ->
// check cuisine
match classificationCollection.[ing].ContainsKey(receipt.cuisine) with
| true ->
classificationCollection.[ing].[receipt.cuisine] <- classificationCollection.[ing].[receipt.cuisine]
+ 1
| _ -> classificationCollection.[ing].Add(receipt.cuisine, 1)
| _ ->
let cuisineDict = new Dictionary<string, int>()
cuisineDict.Add(receipt.cuisine, 1)
classificationCollection.Add(ing, cuisineDict)))
// open test collection
let pathTest = __SOURCE_DIRECTORY__ + "../../data/What's Cooking/test.json"
let srTest = new StreamReader(pathTest)
let jsonTest = JsonConvert.DeserializeObject<Receipt array>(srTest.ReadToEnd())
// clasify
let clasify (ingredients : string array) (collection : Dictionary<string, Dictionary<string, int>>) =
let outcome = new Dictionary<string, double>()
ingredients |> Array.iter (fun ing ->
match collection.ContainsKey(ing) with
| true ->
let sum =
collection.[ing].Values
|> Seq.sum
|> double
collection.[ing] |> Seq.iter (fun (KeyValue(k, v)) ->
match outcome.ContainsKey(k) with
| true ->
outcome.[k] <- outcome.[k] + (double collection.[ing].[k]) / sum
| _ -> outcome.Add(k, (double collection.[ing].[k]) / sum))
| _ -> 0 |> ignore)
outcome
|> Seq.sortByDescending (fun (KeyValue(_, v)) -> v)
|> Seq.map (fun (KeyValue(k, v)) -> (k, v))
|> Seq.head
|> fst
let outcomePath = __SOURCE_DIRECTORY__ + "../../data/What's Cooking/submission.csv"
let swSubmission = new StreamWriter(outcomePath, false)
swSubmission.WriteLine("id,cuisine")
jsonTest |> Array.iter (fun rep ->
let outcome = clasify rep.ingredients classificationCollection
printfn "Id: %A" rep.id
printfn "Outcome: %A" outcome
printfn ""
swSubmission.WriteLine(rep.id.ToString() + "," + outcome) |> ignore)
swSubmission.Flush()
swSubmission.Close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment