-
-
Save zgramana/1dd6dcb777d671adf8b64d7f6d0c38ad to your computer and use it in GitHub Desktop.
Word2Vec experiment
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#I "../packages/" | |
#r @"FSharp.Data/lib/net40/FSharp.Data.dll" | |
#r @"StemmersNet/lib/net20/StemmersNet.dll" | |
#r @"FSharp.Collections.ParallelSeq/lib/net40/FSharp.Collections.ParallelSeq.dll" | |
#load "Utilities.fs" | |
open FSharp.Data | |
[<Literal>] | |
let trainPath = @"../data/train.csv" | |
[<Literal>] | |
let testPath = @"../data/test.csv" | |
[<Literal>] | |
let attributesPath = @"../data/attributes.csv" | |
[<Literal>] | |
let productsPath = @"..\data\product_descriptions.csv" | |
[<Literal>] | |
let submissionPath = @"../data/" | |
type Train = CsvProvider<trainPath,Schema=",,,,float"> | |
type Test = CsvProvider<testPath> | |
let sample = seq { | |
yield! Train.GetSample().Rows |> Seq.map (fun x -> x.Product_title) | |
yield! Test.GetSample().Rows |> Seq.map (fun x -> x.Product_title) | |
} | |
#load "Utilities.fs" | |
open HomeDepot.Utilities | |
let titles = | |
sample | |
|> Seq.distinct | |
|> Seq.map (preprocess) | |
|> Seq.toArray | |
let path = @"C:\users\mathias brandewinder\desktop\titles.txt" | |
let file = System.IO.File.WriteAllLines(path,titles) | |
#r @"C:\Users\Mathias Brandewinder\Documents\GitHub\Word2Vec.Net\Word2Vec.Net\bin\Release\Word2Vec.Net.dll" | |
open Word2Vec | |
let output = @"C:\users\mathias brandewinder\desktop\output.txt" | |
let vocab = @"C:\users\mathias brandewinder\desktop\vocab.txt" | |
let builder = | |
Word2Vec.Net.Word2VecBuilder | |
.Create() | |
.WithTrainFile(path) | |
.WithOutputFile(output) | |
.WithBinary(1) | |
// .WithCBow(1) | |
.WithSize(50) | |
.WithSaveVocubFile(vocab) | |
.WithWindow(5) | |
.Build() | |
builder.TrainModel() | |
let distance = Word2Vec.Net.Distance(output) | |
distance.Search("shower") | |
let analogy = Word2Vec.Net.WordAnalogy(output) | |
analogy.Search("metal stakes") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment