Last active
January 14, 2018 16:19
-
-
Save lejafar/da2c95b671df6cb1597fc44a07f312be to your computer and use it in GitHub Desktop.
Determine the most likely value that is about to follow based on binary sequence using Naive Bayes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
Example implementation of https://stats.stackexchange.com/q/322943/190938 | |
Determine the most likely value that is about to follow based on binary sequence using Naive Bayes | |
Test should output: | |
Predicted 1 CORRECTLY for [0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0] , pattern: [0, 1] | |
Predicted 1 CORRECTLY for [1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0] , pattern: [0, 1] | |
Predicted 0 CORRECTLY for [1, 1, 0, 1, 0, 0, 1] , pattern: [0, 1, 0] | |
Predicted 1 CORRECTLY for [1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0] , pattern: [0, 1] | |
""" | |
def parse(history, n): | |
""" | |
Generate list of (pattern, followed_by) tuples with patterns | |
of lenght n found in inside `history` for [0,1,1,0,1,0,1,1,0,1] this gives: | |
[([0, 1, 1], 0), | |
([1, 1, 0], 1), | |
([1, 0, 1], 0), | |
([0, 1, 0], 1), | |
([1, 0, 1], 1), | |
([0, 1, 1], 0), | |
([1, 1, 0], 1)] | |
""" | |
for i in range(len(history) - n): | |
yield (history[i:i + n], history[i + n]) | |
def hash(pattern): | |
""" | |
It's just convinient to store 1,0-pattern at their binary value in `counts` | |
""" | |
return sum([d * pow(2, i) for i, d in enumerate(reversed(pattern))]) | |
def learn(example, n): | |
""" | |
Count how many times a certain pattern occurs followed by a 1 or 0 | |
""" | |
occurences = parse(example, n) | |
# counts[followed_by][pattern] | |
counts = [[0] * pow(2, n), [0] * pow(2, n)] | |
# loop over all occurences | |
for occurence in occurences: | |
pattern, followed_by = occurence | |
pattern_hash = hash(pattern) | |
counts[followed_by][pattern_hash] += 1 | |
return counts | |
def probability(example, n): | |
""" | |
Determine `counts` array in order to infer `followed_by` using `example` | |
""" | |
counts = learn(example, n) | |
# determine probability that 1 will follow after `previous` | |
# (last n values), using what we know about past values | |
previous = example[-n:] | |
previous_hash = hash(previous) | |
# Determine probability of `followed_by`=1 using Naive Bayes | |
total_count = (counts[1][previous_hash] + counts[0][previous_hash]) | |
# if pattern was never (`total_count` == 0) seen return None | |
return counts[1][previous_hash] / total_count if total_count else None | |
def prediction(example): | |
""" | |
Determine probability of the next value being 1 | |
""" | |
max_n = len(example) - 1 | |
# Determine probability of `followed_by`=1 over different pattern lengths | |
probabilities = [(probability(example, n), n) for n in range(1, max_n) if probability(example, n) is not None] | |
# If min equals 0, there is a pattern that is inconsistent with next being 1 | |
p = min(probabilities)[0] and max(probabilities)[0] | |
n = [n for pp, n in probabilities if pp==p][0] | |
most_informative = example[-n:] + [int(p)] | |
return (p, most_informative) | |
if __name__ == "__main__": | |
tests = [([0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0], 1), | |
([1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0], 1), | |
([1, 1, 0, 1, 0, 0, 1], 0), | |
([1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0], 1)] | |
# run tests | |
for test in tests: | |
pattern, expectation = test | |
outcome, mi = prediction(pattern) | |
test_outcome = "CORRECTLY" if outcome == expectation else "INCORRECTLY" | |
print("Predicted", int(outcome), | |
test_outcome,"for", pattern, ", pattern:", mi) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment