Created
May 29, 2014 13:47
-
-
Save themiurgo/868268025a3e7e3b55a0 to your computer and use it in GitHub Desktop.
Word Generator
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
#-*- coding: utf-8 -*- | |
"""This script was used to generate random test files for the course | |
of Computer Networks 2014 at University of Birmingham, Computer Science. | |
Author: Antonio Lima | |
License: WTFPL | |
""" | |
from __future__ import print_function | |
import collections | |
import itertools | |
import random | |
import string | |
import sys | |
def default_lengths(min_length=3, max_length=10): | |
while True: | |
yield random.randint(min_length, max_length) | |
def random_word_length(alphabet, length): | |
return ''.join((random.choice(alphabet) for x in xrange(length))) | |
def random_words(alphabet, lengths=default_lengths()): | |
for length in lengths: | |
yield random_word_length(alphabet, length) | |
def random_words(alphabet=string.letters, lengths=default_lengths()): | |
random.seed("networks2014") | |
rwl = random_word_length | |
words = (rwl(alphabet, length) for length in lengths) | |
for word in words: | |
yield word | |
def random_words_repetition(repetition_probability, words): | |
seen = collections.deque(maxlen=200000) | |
for word in words: | |
seen.append(word) | |
yield word | |
if random.random() < repetition_probability: | |
yield seen.popleft() | |
def size_capper(total_size, words): | |
size = 0 | |
for word in words: | |
yield word | |
size += len(word.encode('utf-8')) + 1 # Considers EOL | |
if size > total_size: | |
break | |
def main(): | |
usage = """Usage: | |
wordgenerator <bytes> <repetition_probability> | |
The script outputs to STDOUT. Repetition probability must be between | |
0 and 1. | |
""" | |
try: | |
size = int(sys.argv[1]) | |
prob = float(sys.argv[2]) | |
except (IndexError, ValueError): | |
print(usage, file=sys.stderr) | |
return 1 | |
wordlist = random_words() | |
wordlist_with_repetition = random_words_repetition(prob, wordlist) | |
capped_wordlist = size_capper(size, wordlist_with_repetition) | |
for word in capped_wordlist: | |
print(word, file=sys.stdout) | |
if __name__ == "__main__": | |
sys.exit(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment