Last active
August 5, 2019 20:21
-
-
Save jakehawken/04d90ac439cbd909ee8041798d2d4166 to your computer and use it in GitHub Desktop.
I've never actually researched how an actual Markov Chain works, but I'm attempting, by way of a lot of assumptions, to write something at least similar to one in Swift. It’s a fun mental exercise. My approach is to make something that takes in an array of strings, builds a predictive tree based off of that input, and then can generate sentences…
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // ProbabilityTree.swift | |
| // Created by Jake Hawken on 8/4/19. | |
| // Copyright © 2019 Jake Hawken. All rights reserved. | |
| import Foundation | |
| // TODO: genericize so that this can be used for things other than strings | |
| class ProbabilityTree { | |
| private let rootNode = Node(word: "", parent: nil) | |
| func add(wordSequence words: [String]) { | |
| var currentNode = rootNode | |
| for word in words { | |
| currentNode = currentNode.insert(newWord: word) | |
| if currentNode.word.hasSentenceTerminator { | |
| //TODO: keep track of incidence of terminators | |
| currentNode = rootNode | |
| } | |
| } | |
| } | |
| func generateSentence() -> String { | |
| var words = [String]() | |
| rootNode.appendTo(sentence: &words) | |
| if let firstWord = words.first { | |
| let capitalized = firstWord.capitalizingFirstLetter() | |
| words[0] = capitalized | |
| } | |
| var last = words.removeLast() | |
| if !last.hasSentenceTerminator { | |
| last += sentenceTerminator() | |
| } | |
| words.append(last) | |
| return words.joined(separator: " ") | |
| } | |
| private func sentenceTerminator() -> String { | |
| // TODO: based on overall incidence of given sentence terminators, select a statistically-weighted random terminator | |
| return "." | |
| } | |
| } | |
| extension ProbabilityTree { | |
| class Node { | |
| let word: String | |
| let parent: Node? | |
| private(set) var incidence: UInt = 1 | |
| private(set) var children = Set<Node>() | |
| init(word: String, parent: Node?) { | |
| self.word = word | |
| self.parent = parent | |
| } | |
| func insert(newWord: String) -> Node { | |
| incidence += 1 | |
| if let existing = existingChild(forWord: newWord) { | |
| return existing | |
| } | |
| let new = newChild(forWord: newWord) | |
| children.insert(new) | |
| return new | |
| } | |
| func appendTo(sentence words: inout [String]) { | |
| guard let next = nextNode() else { | |
| return | |
| } | |
| words.append(next.word) | |
| if next.word.hasSentenceTerminator { | |
| return | |
| } | |
| next.appendTo(sentence: &words) | |
| } | |
| private func nextNode() -> Node? { | |
| guard !word.hasSentenceTerminator else { | |
| return nil | |
| } | |
| guard !children.isEmpty else { | |
| return nil | |
| } | |
| let randomValue = Float.random(in: 0...1) | |
| let totalIncidence = children.reduce(Float(0)) { $0 + Float($1.incidence) } | |
| let sortedByIncidence = children.sorted { $0.incidence < $1.incidence } | |
| var next: Node? | |
| for node in sortedByIncidence { | |
| let likelihood = Float(node.incidence) / totalIncidence | |
| if randomValue <= likelihood { | |
| next = node | |
| break | |
| } | |
| } | |
| return next ?? sortedByIncidence.last | |
| } | |
| private var root: Node { | |
| return parent ?? self | |
| } | |
| private func existingChild(forWord word: String) -> Node? { | |
| return children.first { $0.word == word } | |
| } | |
| private func newChild(forWord word: String) -> Node { | |
| return Node(word: word, parent: self) | |
| } | |
| } | |
| } | |
| extension ProbabilityTree.Node: Hashable { | |
| static func == (lhs: ProbabilityTree.Node, rhs: ProbabilityTree.Node) -> Bool { | |
| return lhs.word == rhs.word | |
| } | |
| func hash(into hasher: inout Hasher) { | |
| hasher.combine(word) | |
| } | |
| } | |
| private extension Set where Element == ProbabilityTree.Node { | |
| func contains(word: String) -> Bool { | |
| return map { $0.word }.contains(word) | |
| } | |
| } | |
| private extension String { | |
| func capitalizingFirstLetter() -> String { | |
| return prefix(1).capitalized + dropFirst() | |
| } | |
| mutating func capitalizeFirstLetter() { | |
| self = self.capitalizingFirstLetter() | |
| } | |
| var sentenceTerminator: String? { | |
| if hasSuffix("?") || hasSuffix("? ") || hasSuffix("? ") { | |
| return "?" | |
| } | |
| else if hasSuffix(".") || hasSuffix(". ") || hasSuffix(". ") { | |
| return "." | |
| } | |
| else if hasSuffix("!") || hasSuffix("! ") || hasSuffix("! ") { | |
| return "!" | |
| } | |
| else { | |
| return nil | |
| } | |
| } | |
| var hasSentenceTerminator: Bool { | |
| return sentenceTerminator != nil | |
| } | |
| } |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
There are a couple TODO's left on here, and I eventually want to genericize this so that it's not tied to strings.