Created
April 3, 2012 15:14
-
-
Save ptmt/2292827 to your computer and use it in GitHub Desktop.
simple fsharp tests
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module totify.tests | |
open NUnit.Framework | |
open FsUnit | |
open TinyNLP | |
let shouldBeTrue a = | |
Assert.AreEqual(a, true) | |
[<Test>] | |
let ``tokenizer should be able to tokenize nothing`` () = | |
TinyNLP.Tokenizer.tokenize "" |> fun x -> x.Value.Length = 1 |> shouldBeTrue | |
[<Test>] | |
let ``tokenizer should be able to tokenize simple word`` () = | |
TinyNLP.Tokenizer.tokenize "слово" |> fun x -> x.Value.Length = 1 |> shouldBeTrue | |
[<Test>] | |
let ``tokenizer should be able to tokenize two simple words`` () = | |
TinyNLP.Tokenizer.tokenize "два слова" |> fun x -> x.Value.Length = 2 |> shouldBeTrue | |
[<Test>] | |
let ``tokenizer should be able to tokenize three simple words`` () = | |
TinyNLP.Tokenizer.tokenize "три простых слова" |> fun x -> x.Value.Length = 3 |> shouldBeTrue | |
[<Test>] | |
let ``tokenizer should be able to tokenize two simple words and one preposition`` () = | |
TinyNLP.Tokenizer.tokenize "слово с предлогом" |> fun x -> x.Value.Length = 3 |> shouldBeTrue | |
[<Test>] | |
let ``tokenizer should be able to tokenize three simple words with one comma`` () = | |
TinyNLP.Tokenizer.tokenize "слово, ещё одно" |> (fun x -> x.Value.Length = 4 && x.Value.Item(0) = "слово") |> shouldBeTrue | |
[<Test>] | |
let ``tokenizer should be able to tokenize New York`` () = | |
TinyNLP.Tokenizer.tokenize "Нью-Йорк" |> fun x -> x.Value.Length = 1 |> shouldBeTrue | |
[<Test>] | |
let ``tokenizer should be able to tokenize word with dash`` () = | |
TinyNLP.Tokenizer.tokenize "слово - не воробей" |> fun x -> x.Value.Length = 4 |> shouldBeTrue |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment