Created
May 3, 2017 22:44
-
-
Save gilles-leblanc/033895b5b935085e782eb4ea67a81cc9 to your computer and use it in GitHub Desktop.
Name Generator for blog post
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module NameLength | |
open System | |
open System.Configuration | |
open MathNet.Numerics.Distributions | |
// A record type that contains the mean and standard deviation of the names world length from an | |
// input file. | |
type NameLengthInfo = { mean:float; standardDeviation: float } | |
// Given an input string returns the NameLengthInfo record that can be later used to draw a random | |
// value from the normal distribution. | |
let internal getNameLengthInfo (input:string) : NameLengthInfo = | |
let names = input.Split [|' '|] | |
let numberOfNames = float names.Length | |
let namesLengths = names |> Array.map (fun name -> float name.Length) | |
let mean = namesLengths |> Array.average | |
let standardDeviation = sqrt (Array.averageBy (fun x -> (x - mean)**2.0) namesLengths) | |
{ mean = mean; standardDeviation = standardDeviation } | |
// Given a NameLengthInfo returns a random value drawn from a normal (gaussian) distribution | |
let internal getNameLength (nameLengthInfo:NameLengthInfo) = | |
let mean = nameLengthInfo.mean | |
let standardDeviation = nameLengthInfo.standardDeviation | |
let normalDistribution = Normal(mean, standardDeviation) | |
let length = normalDistribution.Sample() |> Math.Round |> int | |
let minimumLength = ConfigurationManager.AppSettings.Item("minimumNameLength") |> int | |
if length >= (minimumLength) then length else minimumLength | |
module ProbabilityTable | |
open System | |
open System.IO | |
open Newtonsoft.Json | |
open MapConverter | |
open NameLength | |
type ProbabilityTable = { probabilities:Map<string, Map<string, float>>; | |
nameLengthInfo:NameLengthInfo } | |
// Parses a string and count the total number of occurrences of substrings of size length | |
let rec countOccurrences input (occurrenceTable:Map<string, float>) length = | |
let adjLen = length - 1 | |
match input |> Seq.toList with | |
| head :: tail when tail.Length >= adjLen -> | |
let other = Seq.take adjLen tail |> Seq.toList | |
let occurrence = head :: other |> Array.ofList |> String | |
// add current occurrence to the occurrence table | |
let updatedMap = match occurrenceTable.ContainsKey (occurrence) with | |
| true -> occurrenceTable.Add(occurrence, occurrenceTable.[occurrence] + 1.0) | |
| false -> occurrenceTable.Add(occurrence, 1.0) | |
// call the function recursively with the rest of the string | |
countOccurrences (tail |> Array.ofList |> String) updatedMap length | |
| _ -> occurrenceTable | |
// Return a new probability table with the key value pair added. | |
// Given letter X, a probability table gives a percentage for letter Y to appear following letter X. | |
let private addProbability (key:string) value (probabilityTable:Map<string, Map<string, float>>) length = | |
let mainKey = Char.ToString key.[0] | |
let subKey = key.[1..] | |
match Seq.forall Char.IsLower subKey with | |
| false -> probabilityTable // do not add a subkey containing a white space | |
| _ -> match probabilityTable.ContainsKey(mainKey) with | |
| true -> let subMap = Map.find mainKey probabilityTable | |
match subMap.ContainsKey(subKey) with | |
| true -> failwithf "subkey %s already added in probabilityTable" subKey | |
| false -> let newSubMap = subMap.Add(subKey, value) | |
probabilityTable.Add(mainKey, newSubMap) | |
| false -> let subMap = Map.empty.Add(subKey, value) | |
probabilityTable.Add(mainKey, subMap) | |
// Cumulate the submap to transform to probabilities of the form 0.75 0.25 0.0. | |
// Notice that the order is decreasing. Instead of using the more tradational increasing order | |
// of 0.25 0.75 1.0, we are presenting the values in decreasing order starting from 1 to make | |
// picking the right value easier later on. When we will pick the letters we will draw a random | |
// number and check if it is greater than the value. | |
let private cumulate map = | |
let total = Map.fold (fun acc key value -> acc + value) 0.0 map | |
let _, cumulativeSubMap = | |
// map into probability | |
Map.map (fun key value -> value / total) map | |
// fold into a cumulative probability result | |
|> Map.fold (fun (t, (m:Map<string, float>)) key value -> | |
(t - value, m.Add(key, t - value)) | |
) (1.0, Map.empty) | |
Map.map (fun key (value:float) -> Math.Round(value, 6)) cumulativeSubMap | |
// Given an input string creates a probability table for the different letters in the string. | |
let buildProbabilityTable (input:string) length : ProbabilityTable = | |
let nameLengths = getNameLengthInfo input | |
let occurrencesTable = countOccurrences (input.ToLower()) Map.empty length | |
let adjLen = length - 1 | |
let table = Map.fold (fun acc key value -> addProbability key value acc adjLen) | |
Map.empty occurrencesTable | |
|> Map.map (fun key value -> cumulate value) | |
{ probabilities = table; nameLengthInfo = nameLengths } | |
// Given an input file path, creates a probability table calling buildProbabilityTable | |
let buildProbabilityTableFromMediaFile filePath length : ProbabilityTable = | |
let input = File.ReadAllText(filePath) | |
buildProbabilityTable input length | |
// Given an input file path for an already built serialized probabilityTable, return this table | |
let buildProbabilityTableFromSerializationFile filePath length : ProbabilityTable = | |
let json = File.ReadAllText(filePath) | |
JsonConvert.DeserializeObject<ProbabilityTable>(json, mapConverter) | |
// Serialize a ProbabilityTable to file | |
let serializeProbabilityTable filePath (table:ProbabilityTable) = | |
let json = JsonConvert.SerializeObject table | |
File.WriteAllText(filePath, json) | |
module NameGenerator | |
open System | |
open System.Collections.Generic | |
open NameLength | |
open ProbabilityTable | |
let rnd = System.Random() | |
// Randomly returns a string from values based on it's probability | |
let private pickString (values:Map<string, float>) = | |
let randomValue = rnd.NextDouble() | |
let pick = values | |
|> Map.tryPick (fun key value -> if randomValue >= value then Some(key) else None) | |
match pick with | |
| Some v -> v | |
| None -> failwith "Can't pick letter" | |
// Recursively creates a new name. | |
let rec private buildName (nameSoFar:string) (charLeft:int) (probabilityTable:ProbabilityTable) = | |
let lastChar = Char.ToString nameSoFar.[nameSoFar.Length - 1] | |
let addition = match Map.containsKey lastChar probabilityTable.probabilities with | |
// if our character exists pick one of it's subkeys | |
| true -> pickString probabilityTable.probabilities.[lastChar] | |
// otherwise start a new sequence of character with a name starting character | |
| false -> pickString probabilityTable.probabilities.[" "] | |
let newName = nameSoFar + addition | |
let newCharLeft = charLeft - addition.Length | |
match newCharLeft with | |
| ln when ln > 0 -> buildName newName newCharLeft probabilityTable // we need more | |
| ln when ln < 0 -> newName.[0..newName.Length - 1] // we went one char to long | |
| _ -> newName // we are exactly where we want to be | |
// Given a pre-built probability table generates a random name. | |
let generateRandomName (probabilityTable:ProbabilityTable) = | |
let nameLength = int (getNameLength probabilityTable.nameLengthInfo) | |
// We pass in the whitespace char to start the name as this will allow us to find letters after | |
// spaces in our probability table. These are the letters that start name. | |
// We must remember to take this whitespace into account in our nameLength and later when | |
// returning the name | |
let lowerCaseName = buildName " " nameLength probabilityTable | |
(Char.ToUpper lowerCaseName.[1] |> Char.ToString) + lowerCaseName.[2..] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment