Created
March 30, 2010 00:39
-
-
Save mattkatz/348616 to your computer and use it in GitHub Desktop.
Microbloggable.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# microbloggable.py | |
# breaks a story up into notices or tweets | |
# By Matt Katz: http://www.morelightmorelight.com | |
# this code is public domain but please | |
# say hi: http://identi.ca/mattkatz | |
# open a file | |
text_in = 'c:/last-beautiful.txt' | |
output = 'c:/twitterable.txt' | |
def process_story(file_in, file_out): | |
fout = open(file_out, 'w') | |
fin = open(file_in, 'r') | |
fout.write("Notices|Length\n") | |
# here is where the action begins | |
for line in fin: | |
if line[0]=="#": | |
#let's skip those ### lines, even though they are nice | |
continue | |
while(len(line) > 1): | |
end_of_notice = find_notice(line,fout) | |
if end_of_notice <= 0: | |
break | |
line=line[end_of_notice+1:] | |
fout.close() | |
fin.close() | |
def min_unless_neg(a,b): | |
if a <0: | |
return b | |
if b <0: | |
return a | |
return min(a,b) | |
def indices(text, list, reverse=False): | |
if reverse : | |
retlist = map(text.rfind,list) | |
else: | |
retlist = map(text.find, list) | |
return retlist | |
def find_notice(line, fout): | |
#count forward till you hit 140 characters, then back to a ! ? or . | |
notice = line[:140] | |
ends = ['!','?','.'] | |
end_of_notice = max(indices(notice, ends, reverse=True)) | |
# if we didn't find a sentence in there, we should just grab the first one we've got | |
if (end_of_notice < 0 ): | |
end_of_notice = reduce( min_unless_neg, indices(notice,ends)) | |
notice = line[:end_of_notice + 1 ] | |
fout.write("%s|%d\n"%(notice,len(notice))) | |
return end_of_notice | |
process_story(text_in,output) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment