Skip to content

Instantly share code, notes, and snippets.

@uogbuji
Created November 20, 2013 06:37
Show Gist options
  • Save uogbuji/7558721 to your computer and use it in GitHub Desktop.
Save uogbuji/7558721 to your computer and use it in GitHub Desktop.
Process a twitter stream in form of Wayin "bites" JSON into useful Exhibit JSON
import sys
import re
from amara.thirdparty import json #https://pypi.python.org/pypi/Amara/
from datachef.exhibit import emitter #https://pypi.python.org/pypi/datachef
HASHTAG_PAT = re.compile(r"#(\w+)")
def run(source=None, em=None):
obj = json.load(source)
for res in obj[u'results']:
item = {
u'id': res[u'id'],
u'timestamp': res[u'creation_time'],
#u'id': res[u'item'][u'id'],
u'lang': res[u'item'][u'body'][u'lang'],
u'userhandle': res[u'item'][u'body'][u'user'][u'screen_name'],
u'userrealname': res[u'item'][u'body'][u'user'][u'name'],
u'profile_image_url': res[u'item'][u'body'][u'user'][u'profile_image_url'],
u'username': res[u'item'][u'body'][u'user'][u'screen_name'],
u'userplace': res[u'item'][u'body'][u'user'][u'location'],
u'userurl': res[u'item'][u'body'][u'user'][u'url'],
u'statuses_count': res[u'item'][u'body'][u'user'][u'statuses_count'],
u'followers_count': res[u'item'][u'body'][u'user'][u'followers_count'],
u'text': res[u'item'][u'message'],
u'link': res[u'item'][u'link'],
u'favorites_count': res[u'item'][u'body'][u'favorite_count'],
u'retweet_count': res[u'item'][u'body'][u'retweet_count'],
}
item[u'hashtags'] = HASHTAG_PAT.findall(item[u'text'])
em.send(item)
return
# Handle command-line arguments
import sys
from akara.thirdparty import argparse # Sorry PEP 8 ;)
if __name__ == '__main__':
# python wayin2ejson.py < wayinbites-sample.json
em = emitter.emitter(sys.stdout)
run(source=sys.stdin, em=em)
em.send(emitter.ITEMS_DONE_SIGNAL)
em.send(None) #No data profile
em.close()
sys.exit(0)
parser = argparse.ArgumentParser()
#parser.add_argument('-o', '--output')
parser.add_argument('source', type=argparse.FileType('r'), metavar='source',
help='Input spreadsheet (for now supports broken links report format')
parser.add_argument('--out', type=argparse.FileType('w'), default=sys.stdout,
help='file where output should be written, JSON format for now '
'(default: write to stdout)')
args = parser.parse_args()
report_emitter = emitter.emitter(args.out)
run(source=args.source, emitter=report_emitter)
report_emitter.send(emitter.ITEMS_DONE_SIGNAL)
report_emitter.send(None) #No data profile
args.source.close()
report_emitter.close()
args.out.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment