Skip to content

Instantly share code, notes, and snippets.

@jakehawken
Last active August 5, 2019 20:21
Show Gist options
  • Select an option

  • Save jakehawken/04d90ac439cbd909ee8041798d2d4166 to your computer and use it in GitHub Desktop.

Select an option

Save jakehawken/04d90ac439cbd909ee8041798d2d4166 to your computer and use it in GitHub Desktop.
I've never actually researched how an actual Markov Chain works, but I'm attempting, by way of a lot of assumptions, to write something at least similar to one in Swift. It’s a fun mental exercise. My approach is to make something that takes in an array of strings, builds a predictive tree based off of that input, and then can generate sentences…
// ProbabilityTree.swift
// Created by Jake Hawken on 8/4/19.
// Copyright © 2019 Jake Hawken. All rights reserved.
import Foundation
// TODO: genericize so that this can be used for things other than strings
class ProbabilityTree {
private let rootNode = Node(word: "", parent: nil)
func add(wordSequence words: [String]) {
var currentNode = rootNode
for word in words {
currentNode = currentNode.insert(newWord: word)
if currentNode.word.hasSentenceTerminator {
//TODO: keep track of incidence of terminators
currentNode = rootNode
}
}
}
func generateSentence() -> String {
var words = [String]()
rootNode.appendTo(sentence: &words)
if let firstWord = words.first {
let capitalized = firstWord.capitalizingFirstLetter()
words[0] = capitalized
}
var last = words.removeLast()
if !last.hasSentenceTerminator {
last += sentenceTerminator()
}
words.append(last)
return words.joined(separator: " ")
}
private func sentenceTerminator() -> String {
// TODO: based on overall incidence of given sentence terminators, select a statistically-weighted random terminator
return "."
}
}
extension ProbabilityTree {
class Node {
let word: String
let parent: Node?
private(set) var incidence: UInt = 1
private(set) var children = Set<Node>()
init(word: String, parent: Node?) {
self.word = word
self.parent = parent
}
func insert(newWord: String) -> Node {
incidence += 1
if let existing = existingChild(forWord: newWord) {
return existing
}
let new = newChild(forWord: newWord)
children.insert(new)
return new
}
func appendTo(sentence words: inout [String]) {
guard let next = nextNode() else {
return
}
words.append(next.word)
if next.word.hasSentenceTerminator {
return
}
next.appendTo(sentence: &words)
}
private func nextNode() -> Node? {
guard !word.hasSentenceTerminator else {
return nil
}
guard !children.isEmpty else {
return nil
}
let randomValue = Float.random(in: 0...1)
let totalIncidence = children.reduce(Float(0)) { $0 + Float($1.incidence) }
let sortedByIncidence = children.sorted { $0.incidence < $1.incidence }
var next: Node?
for node in sortedByIncidence {
let likelihood = Float(node.incidence) / totalIncidence
if randomValue <= likelihood {
next = node
break
}
}
return next ?? sortedByIncidence.last
}
private var root: Node {
return parent ?? self
}
private func existingChild(forWord word: String) -> Node? {
return children.first { $0.word == word }
}
private func newChild(forWord word: String) -> Node {
return Node(word: word, parent: self)
}
}
}
extension ProbabilityTree.Node: Hashable {
static func == (lhs: ProbabilityTree.Node, rhs: ProbabilityTree.Node) -> Bool {
return lhs.word == rhs.word
}
func hash(into hasher: inout Hasher) {
hasher.combine(word)
}
}
private extension Set where Element == ProbabilityTree.Node {
func contains(word: String) -> Bool {
return map { $0.word }.contains(word)
}
}
private extension String {
func capitalizingFirstLetter() -> String {
return prefix(1).capitalized + dropFirst()
}
mutating func capitalizeFirstLetter() {
self = self.capitalizingFirstLetter()
}
var sentenceTerminator: String? {
if hasSuffix("?") || hasSuffix("? ") || hasSuffix("? ") {
return "?"
}
else if hasSuffix(".") || hasSuffix(". ") || hasSuffix(". ") {
return "."
}
else if hasSuffix("!") || hasSuffix("! ") || hasSuffix("! ") {
return "!"
}
else {
return nil
}
}
var hasSentenceTerminator: Bool {
return sentenceTerminator != nil
}
}
@jakehawken
Copy link
Author

There are a couple TODO's left on here, and I eventually want to genericize this so that it's not tied to strings.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment