Last active
October 4, 2016 13:57
-
-
Save magnusnissel/6276cd50ea2251534a6a70075cfcaebf to your computer and use it in GitHub Desktop.
Creating the NBA Big List
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tweepy | |
import os | |
import sys | |
import datetime | |
import pandas as pd | |
import glob | |
import time | |
TWITTER_LIST_DIR ="PATH/TO/THE/LIST/FOLDER" | |
def get_twitter_nba_lists(): | |
# tweepy auth | |
CK = "YOUR TWITTER API CONSUMER KEY" | |
CS = "YOUR TWITTER API CONSUMER SECRET" | |
AT = "YOUR TWITTER API ACCESS TOKEN" | |
AS = "YOUR TWITTER API ACCESS TOKEN SECRET" | |
auth = tweepy.OAuthHandler(CK, CS) | |
auth.set_access_token(AT, AS) | |
api = tweepy.API(auth, wait_on_rate_limit=True) | |
# NBA PhD Lists | |
save_twitter_lists(api, "adam_mares", req_str="PhD") | |
save_twitter_lists(api, "alecgwin", req_str="PhD") | |
am_df = pd.read_csv(os.path.join(TWITTER_LIST_DIR, "adam_mares_twitter_lists.csv"), index_col=0) | |
ag_df = pd.read_csv(os.path.join(TWITTER_LIST_DIR, "alecgwin_twitter_lists.csv"), index_col=0) | |
comb_df = am_df.append(ag_df, ignore_index=True) | |
comb_df.reset_index(inplace=True, drop=True) | |
comb_df.to_csv("nba_phd_twitter_lists.csv") | |
# Other accounts with lists | |
screen_names = ["thenbpa", "nba", "nbatv", "nbaontnt", "nbaallstar", "rotowirenba", "bballinsiders", | |
"sbnationnba", "wnba", "usabasketball", "marchmadness", "nba_trades"] | |
for sn in screen_names: | |
save_twitter_lists(api, sn) | |
# populate Big List | |
all_path = os.path.join(TWITTER_LIST_DIR, "nba-big-list.csv") | |
if os.path.exists(all_path): | |
add_df = pd.read_csv(all_path, index_col=0) | |
else: | |
glob_str = os.path.join(TWITTER_LIST_DIR, "*.csv") | |
add_df = pd.DataFrame() | |
for f in glob.iglob(glob_str): | |
if os.path.basename(f) != "nba-big-list.csv": | |
print(f) | |
df = pd.read_csv(f, index_col=0) | |
df = df[df["FOLLOWERS"] > 199] | |
add_df = add_df.append(df, ignore_index=True) | |
add_df = add_df.drop_duplicates(["SCREEN_NAME"]) | |
add_df = add_df.sort_values(by=["TWEETS"], ascending=False) | |
add_df.to_csv(all_path) | |
add_df.to_csv(all_path) | |
def save_twitter_lists(api, screen_name, req_str = None): | |
"""DL all twitter lists to CSV for screen name""" | |
try: | |
os.makedirs(TWITTER_LIST_DIR) | |
except FileExistsError: | |
pass | |
now = datetime.datetime.now() | |
u = api.get_user(screen_name) | |
list_members = [] | |
lists = api.lists_all(screen_name=u.screen_name) | |
for l in lists: | |
if req_str: | |
if req_str.lower() in l.name.lower(): | |
use = True | |
else: | |
use = False | |
else: | |
use = True | |
if use: | |
l_url = "https://twitter.com/{}/lists/{}".format(u.screen_name, l.slug) | |
try: | |
for m in api.list_members(owner_screen_name=u.screen_name, slug=l.slug): | |
d = {"LIST_URL": l_url, "LIST_NAME": l.name, "LIST_DESCRIPTION": l.description, "NAME": m.name, | |
"SCREEN_NAME": m.screen_name, "FOLLOWERS" : m.followers_count, "VERIFIED": m.verified, | |
"DESCRIPTION" : m.description, "TWEETS" : m.statuses_count, "LIST_MEMBERSHIPS": m.listed_count, | |
"LIST_ACCESSED": now} | |
list_members.append(d) | |
except Exception as e: | |
print(e) | |
list_members_df = pd.DataFrame.from_dict(list_members) | |
if len(list_members_df.index) > 0: | |
list_members_df = list_members_df.sort_values(by=["VERIFIED", "FOLLOWERS", "LIST_MEMBERSHIPS", "TWEETS", "SCREEN_NAME"], ascending=False) | |
cols = ["LIST_NAME", "SCREEN_NAME", "NAME", "VERIFIED", "FOLLOWERS", "LIST_MEMBERSHIPS", "TWEETS", "DESCRIPTION", "LIST_DESCRIPTION", "LIST_URL", "LIST_ACCESSED"] | |
list_members_df = list_members_df[cols] | |
list_members_df.reset_index(inplace=True, drop=True) | |
fn = "{}_twitter_lists.csv".format(screen_name) | |
list_members_df.to_csv(os.path.join(TWITTER_LIST_DIR, fn)) | |
print(list_members_df.head()) | |
else: | |
print("Could not find lists for", screen_name) | |
if __name__ == "__main__": | |
get_twitter_nba_lists() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment