Created
December 6, 2012 04:59
-
-
Save harshavardhana/4221876 to your computer and use it in GitHub Desktop.
Get Followers Tweets and Following through "https://twitter.com/<userid>/followers" and encode into json
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import sys | |
import logging | |
import chardet | |
import json | |
from optparse import OptionParser | |
from boilerpipy import (Extractor, isValidhtml, | |
compat_urllib_request) | |
def main(): | |
parser = OptionParser(usage="%prog: [options] [file]") | |
parser.add_option('-u', '--url', help="use URL instead of a local file") | |
parser.add_option('-q', '--query', help="query should be a string") | |
parser.add_option('-d', help="enable debug", action="store_true", default=False, dest="debug") | |
(options, args) = parser.parse_args() | |
if not (options.url and options.query): | |
parser.print_help() | |
sys.exit(1) | |
loglevel = logging.INFO | |
if options.debug: | |
loglevel = logging.DEBUG | |
url = None | |
if options.url: | |
if not isValidhtml(options.url): | |
print "Unrecognized URL, please provide a content-type of text/html" | |
sys.exit(255) | |
url = compat_urllib_request.urlopen(options.url) | |
try: | |
content = url.read() | |
try: | |
enc = chardet.detect(content)['encoding'] | |
content = content.decode(enc) | |
except: | |
pass | |
out = Extractor(content, tag=options.query, loglevel=loglevel).query() | |
if out is None: | |
raise | |
import re | |
twitter_dict = {} | |
for i in out[0:3]: | |
if i.endswith('Tweets'): | |
twitter_dict['total_tweets'] = re.findall(r'\d+', i.replace(',', ''))[0] | |
if i.endswith('Following'): | |
twitter_dict['following'] = re.findall(r'\d+', i.replace(',', ''))[0] | |
if i.endswith('Followers'): | |
twitter_dict['followers'] = re.findall(r'\d+', i.replace(',', ''))[0] | |
encoder = json.encoder.JSONEncoder() | |
print encoder.encode(twitter_dict) | |
print encoder.encode(twitter_dict) | |
except Exception as err: | |
print "Error in printing the extracted html () %s" % err | |
finally: | |
url.close() | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment