Skip to content

Instantly share code, notes, and snippets.

@dmarx
Created September 9, 2012 15:51
Show Gist options
  • Save dmarx/3685199 to your computer and use it in GitHub Desktop.
Save dmarx/3685199 to your computer and use it in GitHub Desktop.
A variation on /u/StonedEmu's text compression program to introduce him to a few slightly advanced python concepts.
_text = 'raw.txt'
_index = 'index.txt'
def readFile(F):
'''Pretty generic 'read' operation'''
data = []
with open(F, 'r') as f:
for line in f:
data.append(line.strip())
return data
def update_index(ind, F, txt):
'''Updates word index in memory and hard file with new words in text.'''
words = set()
for row in txt:
words.update(row.split())
words.difference_update(index)
if words:
with open(F,'ab') as f:
for w in words:
f.write(w+'\n')
ind.append(w)
return ind
index = readFile(_index)
text = readFile(_text)
index = update_index(index, _index, text)
for row in text:
tokens = row.split()
compressed = [chr(i+200) for word in tokens for i,w in enumerate(index) if w == word]
print compressed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment