Skip to content

Instantly share code, notes, and snippets.

@magnusnissel
Last active October 4, 2016 13:57
Show Gist options
  • Save magnusnissel/6276cd50ea2251534a6a70075cfcaebf to your computer and use it in GitHub Desktop.
Save magnusnissel/6276cd50ea2251534a6a70075cfcaebf to your computer and use it in GitHub Desktop.
Creating the NBA Big List
import tweepy
import os
import sys
import datetime
import pandas as pd
import glob
import time
TWITTER_LIST_DIR ="PATH/TO/THE/LIST/FOLDER"
def get_twitter_nba_lists():
# tweepy auth
CK = "YOUR TWITTER API CONSUMER KEY"
CS = "YOUR TWITTER API CONSUMER SECRET"
AT = "YOUR TWITTER API ACCESS TOKEN"
AS = "YOUR TWITTER API ACCESS TOKEN SECRET"
auth = tweepy.OAuthHandler(CK, CS)
auth.set_access_token(AT, AS)
api = tweepy.API(auth, wait_on_rate_limit=True)
# NBA PhD Lists
save_twitter_lists(api, "adam_mares", req_str="PhD")
save_twitter_lists(api, "alecgwin", req_str="PhD")
am_df = pd.read_csv(os.path.join(TWITTER_LIST_DIR, "adam_mares_twitter_lists.csv"), index_col=0)
ag_df = pd.read_csv(os.path.join(TWITTER_LIST_DIR, "alecgwin_twitter_lists.csv"), index_col=0)
comb_df = am_df.append(ag_df, ignore_index=True)
comb_df.reset_index(inplace=True, drop=True)
comb_df.to_csv("nba_phd_twitter_lists.csv")
# Other accounts with lists
screen_names = ["thenbpa", "nba", "nbatv", "nbaontnt", "nbaallstar", "rotowirenba", "bballinsiders",
"sbnationnba", "wnba", "usabasketball", "marchmadness", "nba_trades"]
for sn in screen_names:
save_twitter_lists(api, sn)
# populate Big List
all_path = os.path.join(TWITTER_LIST_DIR, "nba-big-list.csv")
if os.path.exists(all_path):
add_df = pd.read_csv(all_path, index_col=0)
else:
glob_str = os.path.join(TWITTER_LIST_DIR, "*.csv")
add_df = pd.DataFrame()
for f in glob.iglob(glob_str):
if os.path.basename(f) != "nba-big-list.csv":
print(f)
df = pd.read_csv(f, index_col=0)
df = df[df["FOLLOWERS"] > 199]
add_df = add_df.append(df, ignore_index=True)
add_df = add_df.drop_duplicates(["SCREEN_NAME"])
add_df = add_df.sort_values(by=["TWEETS"], ascending=False)
add_df.to_csv(all_path)
add_df.to_csv(all_path)
def save_twitter_lists(api, screen_name, req_str = None):
"""DL all twitter lists to CSV for screen name"""
try:
os.makedirs(TWITTER_LIST_DIR)
except FileExistsError:
pass
now = datetime.datetime.now()
u = api.get_user(screen_name)
list_members = []
lists = api.lists_all(screen_name=u.screen_name)
for l in lists:
if req_str:
if req_str.lower() in l.name.lower():
use = True
else:
use = False
else:
use = True
if use:
l_url = "https://twitter.com/{}/lists/{}".format(u.screen_name, l.slug)
try:
for m in api.list_members(owner_screen_name=u.screen_name, slug=l.slug):
d = {"LIST_URL": l_url, "LIST_NAME": l.name, "LIST_DESCRIPTION": l.description, "NAME": m.name,
"SCREEN_NAME": m.screen_name, "FOLLOWERS" : m.followers_count, "VERIFIED": m.verified,
"DESCRIPTION" : m.description, "TWEETS" : m.statuses_count, "LIST_MEMBERSHIPS": m.listed_count,
"LIST_ACCESSED": now}
list_members.append(d)
except Exception as e:
print(e)
list_members_df = pd.DataFrame.from_dict(list_members)
if len(list_members_df.index) > 0:
list_members_df = list_members_df.sort_values(by=["VERIFIED", "FOLLOWERS", "LIST_MEMBERSHIPS", "TWEETS", "SCREEN_NAME"], ascending=False)
cols = ["LIST_NAME", "SCREEN_NAME", "NAME", "VERIFIED", "FOLLOWERS", "LIST_MEMBERSHIPS", "TWEETS", "DESCRIPTION", "LIST_DESCRIPTION", "LIST_URL", "LIST_ACCESSED"]
list_members_df = list_members_df[cols]
list_members_df.reset_index(inplace=True, drop=True)
fn = "{}_twitter_lists.csv".format(screen_name)
list_members_df.to_csv(os.path.join(TWITTER_LIST_DIR, fn))
print(list_members_df.head())
else:
print("Could not find lists for", screen_name)
if __name__ == "__main__":
get_twitter_nba_lists()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment