Last active
June 29, 2022 01:44
-
-
Save fbparis/738c52e253639edb3cb41f90ea0b490c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# -*- coding: utf-8 -*- | |
"""Python Spintax parser | |
This module provides an efficient spintax parser where every possible spuns have the same probability. | |
Given a masterspin you can get the total number of possible spuns or a randomly chosen spun. | |
""" | |
import re | |
from random import randint, shuffle | |
from numpy.random import choice | |
import sys | |
from time import time | |
from scipy.stats import norm | |
def spin(masterspin, replace_numbers=False, get_count=False): | |
"""Get spuns count or random spun from a masterspin. | |
Parameters | |
---------- | |
masterspin : string | |
A spintax string with brackets "{" and "}" and delimiter "|". | |
replace_numbers : bool | |
If true, intervals such as [a, b] in the masterspin will be replaced by randint(a, b) in the generated spun. | |
get_count : bool | |
If true, the return value will be the total number of possible spuns you can get from masterspin. | |
Returns | |
------- | |
string, if get_count is False | |
A randomly chosen spun from masterspin, each possible spun having probability (1 / Total Possible Spuns). | |
int, if get_count is True | |
The total number of possible spuns you can get from masterspin. | |
""" | |
level, S, P = 0, {0:['']}, {0: 1} | |
for c in masterspin: | |
if c == '{': | |
level += 1 | |
S[level] = [''] | |
P[level] = [1.] | |
elif level > 0: | |
if c == '|': | |
S[level].append('') | |
P[level].append(1.) | |
elif c == '}': | |
n = sum(P[level]) | |
if level == 1: | |
if get_count: | |
P[0] *= int(n) | |
else: | |
P[level - 1][-1] *= n | |
S[level - 1][-1] += choice(S[level], p=[x / n for x in P[level]]) | |
del(S[level]) | |
del(P[level]) | |
level -= 1 | |
else: | |
S[level][-1] += c | |
elif level == 0: | |
S[0][-1] += c | |
if get_count: | |
return P[0] | |
if replace_numbers: | |
def repl_numbers(m): | |
a, b = int(m.group(1)), int(m.group(2)) | |
return str(randint(min(a, b), max(a, b))) | |
r = re.compile('\[([0-9]+)-([0-9]+)\]') | |
return r.sub(repl_numbers, S[0][0]) | |
return S[0][0] | |
def test(masterspin='{A|{B|C|{D|E [6-9]|F}}}', replace_numbers=False, n=10000): | |
"""Run some test on a spintax string. | |
Parameters | |
---------- | |
masterspin : string | |
Some spintax string with brackets "{" and "}" and delimiter "|". | |
replace_numbers : bool | |
If true, intervals such as [a, b] in the masterspin will be replaced by randint(a, b) in the generated spuns. | |
n : int | |
How much spuns you want to generate. | |
Output | |
------ | |
Print a line with the masterspin, n, number of possible spuns, number of distinct spuns generated, probability for each spun, | |
mean and scale of the normal distribution fitting the results and execution time. | |
""" | |
count = spin(masterspin, get_count=True) | |
avg = float(n) / count | |
r=dict() | |
t = time() | |
for i in xrange(n): | |
s = spin(masterspin, replace_numbers, get_count=False) | |
if s in r: | |
r[s] += 1 | |
else: | |
r[s] = 1 | |
t = time() - t | |
mu, std = norm.fit(r.values()) | |
print '%s -> n=%d ; count=%d ; found=%d ; avg=%.2f ; mu=%.2f ; std=%.2f [%.2fs]' % (masterspin, n, count, len(r), avg, mu, std, t) | |
def info(masterspin, n=None, p=0.999): | |
"""Get some indications on how much distinct spuns you can generate with probability p according to a masterspin. | |
Parameters | |
---------- | |
masterspin : string | |
Some spintax string with brackets "{" and "}" and delimiter "|". | |
n : int | |
Get the probability of generating n distincts spuns from masterspin. | |
p : float | |
Get the number of spuns you can generate from masterspin so that they're all distinct with probability p | |
Output | |
------ | |
Print the probability to get n distincts spuns from masterspin if n is not None and / or the number of spuns you can generate with probability p that all spuns are distinct. | |
""" | |
N = float(spin(masterspin, get_count=True)) | |
x = 1. | |
if n is not None: | |
n = int(n) | |
if n < 0: | |
print 'no result for n < 0' | |
elif n > N: | |
print 'p=0 for n > %d' % N | |
else: | |
for i in xrange(1, n): | |
if x <= p: | |
print 'n=%d for probability p=%f' % (i, x) | |
p = 0. | |
x *= (N - i) / N | |
print 'p=%f for n=%d' % (x, n) | |
else: | |
p = float(p) | |
if (p <= 0) or (p > 1): | |
print 'p must be greater than 0 and less or equal to 1' | |
else: | |
i = 1 | |
while 1: | |
try: | |
if (x <= p): | |
print 'n=%d for probability p=%f' % (i, x) | |
break | |
x *= (N - i) / N | |
i += 1 | |
except KeyboardInterrupt: # it could take very long time if the spintax string is complex enough... | |
print 'p=%f for n=%d' % (x, i) | |
break | |
def pretty_print(masterspin): | |
""" | |
Parameters | |
---------- | |
masterspin : string | |
Some spintax string with brackets "{" and "}" and delimiter "|". | |
Output | |
------ | |
Display count of opening and closing brackets and print an indented version of the spinyax string which can help identifying errors. | |
""" | |
sys.stdout.write('Found %d "{" and %d "}"' % (masterspin.count('{'), masterspin.count('}'))) | |
tab = "-" | |
indent = 0 | |
for c in masterspin: | |
if c == '{': | |
sys.stdout.write('\n%s%c\n' % (tab * indent, c)) | |
indent += 1 | |
sys.stdout.write(tab * indent) | |
elif indent: | |
if c == '|': | |
sys.stdout.write('\n%s%c' % (tab * indent, c)) | |
elif c == '}': | |
sys.stdout.write('\n') | |
indent -= 1 | |
sys.stdout.write('%s%c\n%s' % (tab * indent, c, tab * indent)) | |
else: | |
sys.stdout.write(c) | |
else: | |
sys.stdout.write(c) | |
sys.stdout.write('\n') | |
if __name__ == '__main__': | |
"""Command line utility. | |
As a command line utility, you can call the script with a file containing one or several masterspins. | |
""" | |
try: | |
with open(sys.argv[1], 'r') as src: | |
lines = src.readlines() | |
shuffle(lines) | |
for line in lines: | |
print spin(line, replace_numbers=True).strip() | |
except: | |
sys.exit('Usage: python spinner.py PATH_TO_MASTERSPINS_FILE.TXT') |
can u make me this spin to JS or php ?
i try but much error i got
do you have this in php version? with same probability algorithm
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
You only need to copy the spin function, so I guess importing re, random.randint and numpy.random.choice is enough.