Created
January 17, 2016 22:03
-
-
Save salty-horse/ddb8caaaa6775bdb801a to your computer and use it in GitHub Desktop.
Process Twitter followers fetched with get_followers.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import re | |
import json | |
import glob | |
def print_user(user): | |
print( | |
user['id'], | |
user['screen_name'], | |
user['name'].replace('\r\n', ' ').replace('\n', ' '), | |
user['statuses_count'], | |
user['followers_count'], | |
user['friends_count'], | |
user['description'].replace('\r\n', ' ').replace('\n', ' '), | |
user['default_profile_image'], | |
# This adds an embedded image to Google Spreadsheets, but may be slow | |
# if the file is large. | |
# '=image("{}")'.format(user['profile_image_url_https']), | |
sep='\t') | |
followers = [] | |
for fname in glob.glob('followers*.txt'): | |
with open(fname) as f: | |
json_data = json.load(f) | |
followers.extend(json_data) | |
HEBREW = re.compile(u'[\u0590-\u05FF]') | |
CYRILLIC = re.compile(u'[\u0400-\u0500]') | |
KANJI = re.compile(u'[\u3000-\u303f\u3040-\u309f\u30a0-\u30ff\uff00-\uffef\u4e00-\u9faf\u3400-\u4dbf]') | |
print( | |
'User ID', | |
'screen name', | |
'name', | |
'tweet count', | |
'followers', | |
'following', | |
'description', | |
'egg avatar', | |
sep='\t' | |
) | |
for user in followers: | |
print_user(user) | |
continue # Stop here | |
# Skip users you trust | |
if HEBREW.search(user['name']) or HEBREW.search(user['description']): | |
continue | |
# Enable/disable these as needed | |
if len(CYRILLIC.findall(user['name'])) > 3: | |
continue | |
print_user(user) | |
if len(CYRILLIC.findall(user['description'])) > 3: | |
continue | |
print_user(user) | |
if len(KANJI.findall(user['name'])) > 5: | |
continue | |
print_user(user) | |
if len(KANJI.findall(user['description'])) > 5: | |
continue | |
print_user(user) | |
if user['statuses_count'] == 0: | |
continue | |
print_user(user) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment