Created
July 5, 2012 04:52
-
-
Save ttscoff/3051453 to your computer and use it in GitHub Desktop.
Modified version of Dr. Drang's ThinkUp db -> text file script. Adds Markdown formatting and t.co expansion.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
import csv | |
import os | |
from datetime import datetime | |
import sys | |
import re | |
import urllib2 | |
expand_tco_links = False | |
# Utility function for expanding t.co links | |
def expand_tco(match): | |
if expand_tco_links: | |
url = re.sub(r'^https','http',match.group(0)) | |
try: | |
final_url = urllib2.urlopen(url).geturl() | |
return final_url | |
except: | |
return url | |
else: | |
return match.group(0) | |
# Put your Twitter username here. | |
me = "MarkedApp" | |
# Archive format. | |
single = "%s\n%shttp://twitter.com/" + me + "/status/%s" | |
# Open the CSV file specified on the command line and read the field names. | |
tfile = open(sys.argv[1]) | |
treader = csv.reader(tfile) | |
fields = treader.next() | |
# Fill a list with the tweets, with each tweet a dictionary. | |
allInfo = [] | |
for row in treader: | |
allInfo.append(dict(zip(fields,row))) | |
# Collect only the info we need in a list of lists. Convert the date string | |
# into a datetime object. | |
tweets = [ [datetime.strptime(x['pub_date'], "%Y-%m-%d %H:%M:%S"), x['post_id'], x['post_text']] for x in allInfo ] | |
# We put the date first so we can sort by date easily. | |
tweets.sort() | |
# Construct a new list of tweets formatted the way the IFTTT recipe does. | |
out = [ single % \ | |
(re.sub(r'https?://t.co/\S+?(?=\s|\.|,|\)|:|;|\'|"|\?|!|>|&)', expand_tco, x[2]) + '\n\n', '[' + x[0].strftime("%B %d, %Y at %I:%M%p") + '](', x[1] + ')\n') \ | |
for x in tweets ] | |
print '\n---\n\n'.join(out) | |
print '---' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
By the way, turning on t.co expansion will take a long time if you have a good number of tweets with links. But you only have to run it once, right? Also, a backlog of tweets to 2008-something will crash nvALT if it's in one file. I should follow @ianbeck's cue and break these up into multiple archives after all.