Skip to content

Instantly share code, notes, and snippets.

@zoranzaric
Created December 14, 2010 09:42
Show Gist options
  • Save zoranzaric/740195 to your computer and use it in GitHub Desktop.
Save zoranzaric/740195 to your computer and use it in GitHub Desktop.
#/bin/zsh
PWD=$(pwd)
STARTFILES=200000
ENDFILES=200365
FILES_DIR=${PWD}/texte
RESTORE_DIR=${PWD}/restore
BUP_DIR=${PWD}/bup
export BUP_DIR
rm -rf $FILES_DIR
rm -rf $BUP_DIR
LOGFILE=/root/test.log
rm -rf $LOGFILE
mkdir $FILES_DIR
bup init
echo "bupdir size before;bupdir size after;save time;data size;restore size;restore time;" > $LOGFILE
STARTTIME=$(date +%s)
./markov.py init $STARTFILES
ENDTIME=$(date +%s)
STARTBUPSIZE=$(du -s $BUP_DIR | cut -f1)
STARTBUPTIME=$(date +%s)
echo "INDEX"
bup index -ux $FILES_DIR
echo "SAVE"
bup save -q -n benchmark $FILES_DIR
ENDBUPTIME=$(date +%s)
SAVETIME=$(echo "$ENDBUPTIME - $STARTBUPTIME" | bc)
ENDBUPSIZE=$(du -s $BUP_DIR | cut -f1)
DATASIZE=$(du -s $FILES_DIR | cut -f1)
rm -rf $RESTORE_DIR
mkdir $RESTORE_DIR
STARTRESTORETIME=$(date +%s)
echo "RESTORE"
bup restore -C "$RESTORE_DIR" "benchmark/latest"
ENDRESTORETIME=$(date +%s)
RESTORETIME=$(echo "$ENDRESTORETIME - $STARTRESTORETIME" | bc)
RESTORESIZE=$(du -s $RESTORE_DIR | cut -f1)
echo "${STARTBUPSIZE};${ENDBUPSIZE};${SAVETIME};${DATASIZE};${RESTORESIZE};${RESTORETIME}" >> $LOGFILE
for x in $(seq ${STARTFILES} ${ENDFILES}); do
echo "---RUN ${x}---"
STARTTIME=$(date +%s)
./markov.py update $x
ENDTIME=$(date +%s)
STARTBUPSIZE=$(du -s $BUP_DIR | cut -f1)
STARTBUPTIME=$(date +%s)
echo "INDEX"
bup index -ux $FILES_DIR
echo "SAVE"
bup save -n benchmark $FILES_DIR
ENDBUPTIME=$(date +%s)
SAVETIME=$(echo "$ENDBUPTIME - $STARTBUPTIME" | bc)
ENDBUPSIZE=$(du -s $BUP_DIR | cut -f1)
DATASIZE=$(du -s $FILES_DIR | cut -f1)
rm -rf $RESTORE_DIR
mkdir $RESTORE_DIR
STARTRESTORETIME=$(date +%s)
echo "RESTORE"
bup restore -C "$RESTORE_DIR" "benchmark/latest/"
ENDRESTORETIME=$(date +%s)
RESTORETIME=$(echo "$ENDRESTORETIME - $STARTRESTORETIME" | bc)
RESTORESIZE=$(du -s $RESTORE_DIR | cut -f1)
echo "${STARTBUPSIZE};${ENDBUPSIZE};${SAVETIME};${DATASIZE};${RESTORESIZE};${RESTORETIME}" >> $LOGFILE
done
#!/usr/bin/python
import random, os, sys
class Markov(object):
def __init__(self, open_file):
self.cache = {}
self.open_file = open_file
self.words = self.file_to_words()
self.word_size = len(self.words)
self.database()
def file_to_words(self):
self.open_file.seek(0)
data = self.open_file.read()
words = data.split()
return words
def triples(self):
""" Generates triples from the given data string. So if our string were
"What a lovely day", we'd generate (What, a, lovely) and then
(a, lovely, day).
"""
if len(self.words) < 3:
return
for i in range(len(self.words) - 2):
yield (self.words[i], self.words[i+1], self.words[i+2])
def database(self):
for w1, w2, w3 in self.triples():
key = (w1, w2)
if key in self.cache:
self.cache[key].append(w3)
else:
self.cache[key] = [w3]
def generate_markov_text(self, size=25):
seed = random.randint(0, self.word_size-3)
seed_word, next_word = self.words[seed], self.words[seed+1]
w1, w2 = seed_word, next_word
gen_words = []
for i in xrange(size):
gen_words.append(w1)
w1, w2 = w2, random.choice(self.cache[(w1, w2)])
gen_words.append(w2)
return ' '.join(gen_words)
class Mail():
def __init__(self, id, path, content="", attachment=False, attachment_size=0):
self.id = id
self.path = path
self.content = content
self.attachment = attachment
self.attachment_size = attachment_size
def generate_text(self):
header = """Date: %s;\n
From: %s;\n
To: %s\n-----------\n""" % ("10.10.2010",
"Foo Bar <[email protected]>",
"Bar Foo <[email protected]")
return header + self.content
def write(self):
try:
file_path = os.path.join(self.path, 'mail_' + str(self.id))
f = open(file_path, 'w')
f.write(self.generate_text())
if self.attachment:
f.write(os.urandom(self.attachment_size))
f.close()
print file_path
except Exception, e:
print "An Error Occured: %r" % e
def init(mails):
f = open('romeoandjuliet.txt', 'r')
m = Markov(f)
f.close()
for i in range(0,mails):
text = m.generate_markov_text(random.randint(100, 10000))
attachment = random.choice([True, False])
attachment_size = random.randint(10*1024, 1*1024*1024)
mail = Mail(i, 'texte', text, attachment, attachment_size)
mail.write()
def update(total_mails):
new_mails = total_mails/10
f = open('romeoandjuliet.txt', 'r')
m = Markov(f)
f.close()
for i in range(0,new_mails):
id = random.randint(0,total_mails)
text = m.generate_markov_text(random.randint(100, 10000))
attachment = random.choice([True, False])
attachment_size = random.randint(10*1024, 1*1024*1024)
mail = Mail(id, 'texte', text, attachment, attachment_size)
mail.write()
for i in range(total_mails,total_mails+new_mails):
id = i
text = m.generate_markov_text(random.randint(100, 10000))
attachment = random.choice([True, False])
attachment_size = random.randint(10*1024, 1*1024*1024)
mail = Mail(id, 'texte', text, attachment, attachment_size)
mail.write()
if __name__ == '__main__':
if len(sys.argv) != 3:
print "FAIL"
else:
if sys.argv[1] == 'init':
init(int(sys.argv[2]))
else:
update(int(sys.argv[2]))
bupdir size before;bupdir size after;save time;data size;restore size;restore time;
100;13897748;4268;14549760;4;1
13897748;16564516;927;15952864;15952852;2333
16564516;19237496;913;15942720;15942708;2281
19237496;21958812;970;15971744;15971732;2294
21958812;24699012;993;15999792;15999780;2268
24699012;27469244;1047;15992200;15992192;2312
27469244;30113732;956;15946652;15946644;2364
30113732;32831060;1037;15915648;15915640;2330
32831060;35511228;1040;15865772;15865764;2280
35511228;38261148;1046;15878920;15878912;2298
38261148;40907504;999;15873344;15873336;2306
40907504;43638600;1097;15924900;15924892;2322
bupdir size before;bupdir size after;save time;data size;restore size;restore time;
100;2768192;769;2906608;4;0
2768192;3287264;165;3186200;3186236;384
3287264;3835332;174;3200080;3200116;400
3835332;4380616;181;3207376;3207412;398
4380616;4901748;161;3190880;3190916;399
4901748;5450480;172;3216152;3216188;293
5450480;5955936;165;3159748;3159784;400
5955936;6521872;189;3193328;3193360;410
6521872;7043720;164;3159240;3159272;300
7043720;7556556;167;3136124;3136156;398
7556556;8089256;176;3138088;3138120;398
8089256;8679808;203;3199196;3199224;410
bupdir size before;bupdir size after;save time;data size;restore size;restore time;
100;54983660;18445;57597108;4;0
54983660;65768060;4367;63333856;24604876;8009
65768060;65818116;1013;63283784;24604892;7832
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment