Created
February 5, 2021 13:54
-
-
Save Kudusch/ad34687ab5af0aea70542e0b774e7f42 to your computer and use it in GitHub Desktop.
Python functions for the Twitter API v2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
import requests | |
import datetime | |
import math | |
import csv | |
import json | |
import traceback | |
import time | |
user_dict = {} | |
included_tweets_dict = {} | |
places_dict = {} | |
queried_at = int(datetime.datetime.now().timestamp()) | |
def parse_tweet(raw_tweet): | |
parsed_tweet = { | |
"status_id":"", | |
"created_at":"", | |
"text":"", | |
"conversation_id":"", | |
"hashtags":"", | |
"mentions":"", | |
"url_location":"", | |
"url_unwound":"", | |
"url_title":"", | |
"url_description":"", | |
"url_sensitive":"", | |
"geo":"", | |
"lang":"", | |
"reply_settings":"", | |
"retweet_count":"", | |
"reply_count":"", | |
"like_count":"", | |
"quote_count":"", | |
"is_retweet":"", | |
"is_reply":"", | |
"is_quote":"", | |
"retweeted_user_id":"", | |
"retweeted_user_screen_name":"", | |
"retweeted_user_name":"", | |
"retweeted_user_followers_count":"", | |
"retweeted_user_following_count":"", | |
"retweeted_user_tweet_count":"", | |
"retweeted_user_listed_count":"", | |
"retweeted_user_protected":"", | |
"retweeted_user_verified":"", | |
"retweeted_user_description":"", | |
"retweeted_tweet_status_id":"", | |
"retweeted_tweet_conversation_id":"", | |
"retweeted_tweet_created_at":"", | |
"retweeted_tweet_lang":"", | |
"retweeted_tweet_source":"", | |
"retweeted_tweet_text":"", | |
"retweeted_tweet_retweet_count": "", | |
"retweeted_tweet_reply_count": "", | |
"retweeted_tweet_like_count": "", | |
"retweeted_tweet_quote_count": "", | |
"replied_user_id":"", | |
"replied_user_screen_name":"", | |
"replied_user_name":"", | |
"replied_user_followers_count":"", | |
"replied_user_following_count":"", | |
"replied_user_tweet_count":"", | |
"replied_user_listed_count":"", | |
"replied_user_protected":"", | |
"replied_user_verified":"", | |
"replied_user_description":"", | |
"replied_tweet_status_id":"", | |
"replied_tweet_conversation_id":"", | |
"replied_tweet_created_at":"", | |
"replied_tweet_lang":"", | |
"replied_tweet_source":"", | |
"replied_tweet_text":"", | |
"replied_tweet_retweet_count": "", | |
"replied_tweet_reply_count": "", | |
"replied_tweet_like_count": "", | |
"replied_tweet_quote_count": "", | |
"quoted_user_id":"", | |
"quoted_user_screen_name":"", | |
"quoted_user_name":"", | |
"quoted_user_followers_count":"", | |
"quoted_user_following_count":"", | |
"quoted_user_tweet_count":"", | |
"quoted_user_listed_count":"", | |
"quoted_user_protected":"", | |
"quoted_user_verified":"", | |
"quoted_user_description":"", | |
"quoted_tweet_status_id":"", | |
"quoted_tweet_conversation_id":"", | |
"quoted_tweet_created_at":"", | |
"quoted_tweet_lang":"", | |
"quoted_tweet_source":"", | |
"quoted_tweet_text":"", | |
"quoted_tweet_retweet_count": "", | |
"quoted_tweet_reply_count": "", | |
"quoted_tweet_like_count": "", | |
"quoted_tweet_quote_count": "", | |
"geo_id":"", | |
"geo_full_name":"", | |
"geo_name":"", | |
"geo_country":"", | |
"geo_country_code":"", | |
"geo_place_type":"", | |
"geo_json":"", | |
"user_id":"", | |
"screen_name":"", | |
"name":"", | |
"account_created_at":"", | |
"description":"", | |
"url":"", | |
"location":"", | |
"followers_count":"", | |
"following_count":"", | |
"tweet_count":"", | |
"listed_count":"", | |
"protected":"", | |
"verified":"", | |
"queried_at":"" | |
} | |
parsed_tweet["status_id"] = raw_tweet["id"] | |
parsed_tweet["created_at"] = raw_tweet["created_at"] | |
parsed_tweet["text"] = raw_tweet["text"] | |
parsed_tweet["conversation_id"] = raw_tweet["conversation_id"] | |
# entities | |
if "entities" in raw_tweet.keys(): | |
if "hashtags" in raw_tweet["entities"].keys(): | |
parsed_tweet["hashtags"] = json.dumps([i["tag"] for i in raw_tweet["entities"]["hashtags"]]) | |
if "mentions" in raw_tweet["entities"].keys(): | |
parsed_tweet["mentions"] = json.dumps([i["username"] for i in raw_tweet["entities"]["mentions"]]) | |
if "urls" in raw_tweet["entities"].keys(): | |
try: | |
parsed_tweet["url_location"] = json.dumps([i["expanded_url"] for i in raw_tweet["entities"]["urls"]]) | |
except: | |
pass | |
# experimental | |
try: | |
parsed_tweet["url_unwound"] = json.dumps([i["unwound_url"] for i in raw_tweet["entities"]["urls"]]) | |
except: | |
pass | |
try: | |
parsed_tweet["url_title"] = json.dumps([i["title"] for i in raw_tweet["entities"]["urls"]]) | |
except: | |
pass | |
try: | |
parsed_tweet["url_description"] = json.dumps([i["description"] for i in raw_tweet["entities"]["urls"]]) | |
except: | |
pass | |
try: | |
parsed_tweet["url_sensitive"] = raw_tweet["possiby_sensitive"] | |
except: | |
pass | |
# geo, needs testing | |
# Check: https://developer.twitter.com/en/docs/twitter-api/data-dictionary/object-model/place | |
try: | |
parsed_tweet["geo_id"] = raw_tweet["geo"]["place_id"] | |
parsed_tweet["geo_full_name"] = places_dict[raw_tweet["geo"]["place_id"]]["full_name"] | |
parsed_tweet["geo_name"] = places_dict[raw_tweet["geo"]["place_id"]]["name"] | |
parsed_tweet["geo_country"] = places_dict[raw_tweet["geo"]["place_id"]]["country"] | |
parsed_tweet["geo_country_code"] = places_dict[raw_tweet["geo"]["place_id"]]["country_code"] | |
parsed_tweet["place_type"] = places_dict[raw_tweet["geo"]["place_id"]]["geo_place_type"] | |
parsed_tweet["geo_json"] = places_dict[raw_tweet["geo"]["place_id"]]["geo_json"] | |
except: | |
pass | |
# BCP47 language tag | |
try: | |
parsed_tweet["lang"] = raw_tweet["lang"] | |
except: | |
pass | |
parsed_tweet["reply_settings"] = raw_tweet["reply_settings"] | |
parsed_tweet["source"] = raw_tweet["source"] | |
parsed_tweet["retweet_count"] = raw_tweet["public_metrics"]["retweet_count"] | |
parsed_tweet["reply_count"] = raw_tweet["public_metrics"]["reply_count"] | |
parsed_tweet["like_count"] = raw_tweet["public_metrics"]["like_count"] | |
parsed_tweet["quote_count"] = raw_tweet["public_metrics"]["quote_count"] | |
if "referenced_tweets" in raw_tweet.keys(): | |
for referenced_tweet in raw_tweet["referenced_tweets"]: | |
if referenced_tweet["type"] == "quoted": | |
parsed_tweet["quoted_user_id"] = included_tweets_dict[referenced_tweet["id"]]["user_id"] | |
parsed_tweet["quoted_user_screen_name"] = included_tweets_dict[referenced_tweet["id"]]["screen_name"] | |
parsed_tweet["quoted_user_name"] = included_tweets_dict[referenced_tweet["id"]]["name"] | |
parsed_tweet["quoted_user_followers_count"] = included_tweets_dict[referenced_tweet["id"]]["followers_count"] | |
parsed_tweet["quoted_user_following_count"] = included_tweets_dict[referenced_tweet["id"]]["following_count"] | |
parsed_tweet["quoted_user_tweet_count"] = included_tweets_dict[referenced_tweet["id"]]["tweet_count"] | |
parsed_tweet["quoted_user_listed_count"] = included_tweets_dict[referenced_tweet["id"]]["listed_count"] | |
parsed_tweet["quoted_user_protected"] = included_tweets_dict[referenced_tweet["id"]]["protected"] | |
parsed_tweet["quoted_user_verified"] = included_tweets_dict[referenced_tweet["id"]]["verified"] | |
parsed_tweet["quoted_user_description"] = included_tweets_dict[referenced_tweet["id"]]["description"] | |
parsed_tweet["quoted_tweet_status_id"] = referenced_tweet["id"] | |
parsed_tweet["quoted_tweet_conversation_id"] = included_tweets_dict[referenced_tweet["id"]]["conversation_id"] | |
parsed_tweet["quoted_tweet_created_at"] = included_tweets_dict[referenced_tweet["id"]]["created_at"] | |
parsed_tweet["quoted_tweet_lang"] = included_tweets_dict[referenced_tweet["id"]]["lang"] | |
parsed_tweet["quoted_tweet_source"] = included_tweets_dict[referenced_tweet["id"]]["source"] | |
parsed_tweet["quoted_tweet_text"] = included_tweets_dict[referenced_tweet["id"]]["text"] | |
parsed_tweet["quoted_tweet_retweet_count"] = included_tweets_dict[referenced_tweet["id"]]["retweet_count"] | |
parsed_tweet["quoted_tweet_reply_count"] = included_tweets_dict[referenced_tweet["id"]]["reply_count"] | |
parsed_tweet["quoted_tweet_like_count"] = included_tweets_dict[referenced_tweet["id"]]["like_count"] | |
parsed_tweet["quoted_tweet_quote_count"] = included_tweets_dict[referenced_tweet["id"]]["quote_count"] | |
elif referenced_tweet["type"] == "retweeted": | |
parsed_tweet["retweeted_user_id"] = included_tweets_dict[referenced_tweet["id"]]["user_id"] | |
parsed_tweet["retweeted_user_screen_name"] = included_tweets_dict[referenced_tweet["id"]]["screen_name"] | |
parsed_tweet["retweeted_user_name"] = included_tweets_dict[referenced_tweet["id"]]["name"] | |
parsed_tweet["retweeted_user_followers_count"] = included_tweets_dict[referenced_tweet["id"]]["followers_count"] | |
parsed_tweet["retweeted_user_following_count"] = included_tweets_dict[referenced_tweet["id"]]["following_count"] | |
parsed_tweet["retweeted_user_tweet_count"] = included_tweets_dict[referenced_tweet["id"]]["tweet_count"] | |
parsed_tweet["retweeted_user_listed_count"] = included_tweets_dict[referenced_tweet["id"]]["listed_count"] | |
parsed_tweet["retweeted_user_protected"] = included_tweets_dict[referenced_tweet["id"]]["protected"] | |
parsed_tweet["retweeted_user_verified"] = included_tweets_dict[referenced_tweet["id"]]["verified"] | |
parsed_tweet["retweeted_user_description"] = included_tweets_dict[referenced_tweet["id"]]["description"] | |
parsed_tweet["retweeted_tweet_status_id"] = referenced_tweet["id"] | |
parsed_tweet["retweeted_tweet_conversation_id"] = included_tweets_dict[referenced_tweet["id"]]["conversation_id"] | |
parsed_tweet["retweeted_tweet_created_at"] = included_tweets_dict[referenced_tweet["id"]]["created_at"] | |
parsed_tweet["retweeted_tweet_lang"] = included_tweets_dict[referenced_tweet["id"]]["lang"] | |
parsed_tweet["retweeted_tweet_source"] = included_tweets_dict[referenced_tweet["id"]]["source"] | |
parsed_tweet["retweeted_tweet_text"] = included_tweets_dict[referenced_tweet["id"]]["text"] | |
parsed_tweet["retweeted_tweet_retweet_count"] = included_tweets_dict[referenced_tweet["id"]]["retweet_count"] | |
parsed_tweet["retweeted_tweet_reply_count"] = included_tweets_dict[referenced_tweet["id"]]["reply_count"] | |
parsed_tweet["retweeted_tweet_like_count"] = included_tweets_dict[referenced_tweet["id"]]["like_count"] | |
parsed_tweet["retweeted_tweet_quote_count"] = included_tweets_dict[referenced_tweet["id"]]["quote_count"] | |
elif referenced_tweet["type"] == "replied_to": | |
try: | |
parsed_tweet["replied_user_id"] = included_tweets_dict[referenced_tweet["id"]]["user_id"] | |
parsed_tweet["replied_user_screen_name"] = included_tweets_dict[referenced_tweet["id"]]["screen_name"] | |
parsed_tweet["replied_user_name"] = included_tweets_dict[referenced_tweet["id"]]["name"] | |
parsed_tweet["replied_user_followers_count"] = included_tweets_dict[referenced_tweet["id"]]["followers_count"] | |
parsed_tweet["replied_user_following_count"] = included_tweets_dict[referenced_tweet["id"]]["following_count"] | |
parsed_tweet["replied_user_tweet_count"] = included_tweets_dict[referenced_tweet["id"]]["tweet_count"] | |
parsed_tweet["replied_user_listed_count"] = included_tweets_dict[referenced_tweet["id"]]["listed_count"] | |
parsed_tweet["replied_user_protected"] = included_tweets_dict[referenced_tweet["id"]]["protected"] | |
parsed_tweet["replied_user_verified"] = included_tweets_dict[referenced_tweet["id"]]["verified"] | |
parsed_tweet["replied_user_description"] = included_tweets_dict[referenced_tweet["id"]]["description"] | |
parsed_tweet["replied_tweet_status_id"] = referenced_tweet["id"] | |
parsed_tweet["replied_tweet_conversation_id"] = included_tweets_dict[referenced_tweet["id"]]["conversation_id"] | |
parsed_tweet["replied_tweet_created_at"] = included_tweets_dict[referenced_tweet["id"]]["created_at"] | |
parsed_tweet["replied_tweet_lang"] = included_tweets_dict[referenced_tweet["id"]]["lang"] | |
parsed_tweet["replied_tweet_source"] = included_tweets_dict[referenced_tweet["id"]]["source"] | |
parsed_tweet["replied_tweet_text"] = included_tweets_dict[referenced_tweet["id"]]["text"] | |
parsed_tweet["replied_tweet_retweet_count"] = included_tweets_dict[referenced_tweet["id"]]["retweet_count"] | |
parsed_tweet["replied_tweet_reply_count"] = included_tweets_dict[referenced_tweet["id"]]["reply_count"] | |
parsed_tweet["replied_tweet_like_count"] = included_tweets_dict[referenced_tweet["id"]]["like_count"] | |
parsed_tweet["replied_tweet_quote_count"] = included_tweets_dict[referenced_tweet["id"]]["quote_count"] | |
except: | |
parsed_tweet["replied_user_id"] = raw_tweet["in_reply_to_user_id"] | |
# user fields | |
parsed_tweet["user_id"] = raw_tweet["author_id"] | |
parsed_tweet["screen_name"] = user_dict[raw_tweet["author_id"]]["username"] | |
parsed_tweet["name"] = user_dict[raw_tweet["author_id"]]["name"] | |
parsed_tweet["account_created_at"] = user_dict[raw_tweet["author_id"]]["created_at"] | |
parsed_tweet["description"] = user_dict[raw_tweet["author_id"]]["description"] | |
parsed_tweet["url"] = user_dict[raw_tweet["author_id"]]["url"] | |
parsed_tweet["location"] = user_dict[raw_tweet["author_id"]]["location"] | |
parsed_tweet["followers_count"] = user_dict[raw_tweet["author_id"]]["followers_count"] | |
parsed_tweet["following_count"] = user_dict[raw_tweet["author_id"]]["following_count"] | |
parsed_tweet["tweet_count"] = user_dict[raw_tweet["author_id"]]["tweet_count"] | |
parsed_tweet["listed_count"] = user_dict[raw_tweet["author_id"]]["listed_count"] | |
parsed_tweet["protected"] = user_dict[raw_tweet["author_id"]]["protected"] | |
parsed_tweet["verified"] = user_dict[raw_tweet["author_id"]]["verified"] | |
parsed_tweet["is_retweet"] = "False" if parsed_tweet["retweeted_tweet_status_id"] == "" else "True" | |
parsed_tweet["is_reply"] = "False" if parsed_tweet["replied_tweet_status_id"] == "" else "True" | |
parsed_tweet["is_quote"] = "False" if parsed_tweet["quoted_tweet_status_id"] == "" else "True" | |
parsed_tweet["queried_at"] = queried_at | |
return(parsed_tweet) | |
def parse_tweets(r): | |
if "includes" in r.json().keys(): | |
if "users" in r.json()["includes"].keys(): | |
for user in r.json()["includes"]["users"]: | |
if not user["id"] in user_dict.keys(): | |
user_dict[user["id"]] = {} | |
try: | |
user_dict[user["id"]]["name"] = user["name"] | |
except: | |
user_dict[user["id"]]["name"] = "" | |
try: | |
user_dict[user["id"]]["username"] = user["username"] | |
except: | |
user_dict[user["id"]]["username"] = "" | |
try: | |
user_dict[user["id"]]["created_at"] = user["created_at"] | |
except: | |
user_dict[user["id"]]["created_at"] = "" | |
try: | |
user_dict[user["id"]]["description"] = user["description"] | |
except: | |
user_dict[user["id"]]["description"] = "" | |
try: | |
user_dict[user["id"]]["url"] = user["entities"]["url"]["urls"][0]["expanded_url"] | |
except: | |
try: | |
user_dict[user["id"]]["url"] = user["url"] | |
except: | |
user_dict[user["id"]]["url"] = "" | |
try: | |
user_dict[user["id"]]["location"] = user["location"] | |
except: | |
user_dict[user["id"]]["location"] = "" | |
try: | |
user_dict[user["id"]]["followers_count"] = user["public_metrics"]["followers_count"] | |
except: | |
user_dict[user["id"]]["followers_count"] = "" | |
try: | |
user_dict[user["id"]]["following_count"] = user["public_metrics"]["following_count"] | |
except: | |
user_dict[user["id"]]["following_count"] = "" | |
try: | |
user_dict[user["id"]]["tweet_count"] = user["public_metrics"]["tweet_count"] | |
except: | |
user_dict[user["id"]]["tweet_count"] = "" | |
try: | |
user_dict[user["id"]]["listed_count"] = user["public_metrics"]["listed_count"] | |
except: | |
user_dict[user["id"]]["listed_count"] = "" | |
try: | |
user_dict[user["id"]]["protected"] = user["protected"] | |
except: | |
user_dict[user["id"]]["protected"] = "" | |
try: | |
user_dict[user["id"]]["verified"] = user["verified"] | |
except: | |
user_dict[user["id"]]["verified"] = "" | |
if "tweets" in r.json()["includes"].keys(): | |
for tweet in r.json()["includes"]["tweets"]: | |
if not tweet["id"] in included_tweets_dict.keys(): | |
included_tweets_dict[tweet["id"]] = {} | |
included_tweets_dict[tweet["id"]]["conversation_id"] = tweet["conversation_id"] | |
included_tweets_dict[tweet["id"]]["created_at"] = tweet["created_at"] | |
included_tweets_dict[tweet["id"]]["lang"] = tweet["lang"] | |
included_tweets_dict[tweet["id"]]["source"] = tweet["source"] | |
included_tweets_dict[tweet["id"]]["text"] = tweet["text"] | |
included_tweets_dict[tweet["id"]]["retweet_count"] = tweet["public_metrics"]["retweet_count"] | |
included_tweets_dict[tweet["id"]]["reply_count"] = tweet["public_metrics"]["reply_count"] | |
included_tweets_dict[tweet["id"]]["like_count"] = tweet["public_metrics"]["like_count"] | |
included_tweets_dict[tweet["id"]]["quote_count"] = tweet["public_metrics"]["quote_count"] | |
included_tweets_dict[tweet["id"]]["user_id"] = tweet["author_id"] | |
included_tweets_dict[tweet["id"]]["screen_name"] = user_dict[tweet["author_id"]]["username"] | |
included_tweets_dict[tweet["id"]]["name"] = user_dict[tweet["author_id"]]["name"] | |
included_tweets_dict[tweet["id"]]["followers_count"] = user_dict[tweet["author_id"]]["followers_count"] | |
included_tweets_dict[tweet["id"]]["following_count"] = user_dict[tweet["author_id"]]["following_count"] | |
included_tweets_dict[tweet["id"]]["tweet_count"] = user_dict[tweet["author_id"]]["tweet_count"] | |
included_tweets_dict[tweet["id"]]["listed_count"] = user_dict[tweet["author_id"]]["listed_count"] | |
included_tweets_dict[tweet["id"]]["protected"] = user_dict[tweet["author_id"]]["protected"] | |
included_tweets_dict[tweet["id"]]["verified"] = user_dict[tweet["author_id"]]["verified"] | |
included_tweets_dict[tweet["id"]]["description"] = user_dict[tweet["author_id"]]["description"] | |
if "places" in r.json()["includes"].keys(): | |
for place in r.json()["includes"]["places"]: | |
if not place["id"] in places_dict.keys(): | |
places_dict[place["id"]] = {} | |
try: | |
places_dict[place["id"]]["full_name"] = place["full_name"] | |
except: | |
places_dict[place["id"]]["full_name"] = "" | |
try: | |
places_dict[place["id"]]["name"] = place["name"] | |
except: | |
places_dict[place["id"]]["name"] = "" | |
try: | |
places_dict[place["id"]]["country"] = place["country_code"] | |
except: | |
places_dict[place["id"]]["country"] = "" | |
try: | |
places_dict[place["id"]]["place_type"] = place["place_type"] | |
except: | |
places_dict[place["id"]]["place_type"] = "" | |
try: | |
places_dict[place["id"]]["geo_json"] = json.dumps(place["geo"]) | |
except: | |
places_dict[place["id"]]["geo_json"] = "" | |
parsed_tweets = [] | |
for tweet in r.json()["data"]: | |
parsed_tweets.append(parse_tweet(tweet)) | |
return(parsed_tweets) | |
def lookup_tweets(tweet_ids, bearer_token, verbose=True): | |
headers = { | |
"Authorization": "Bearer {}".format(bearer_token), | |
} | |
query_ids = tweet_ids | |
if len(query_ids) > 100: | |
raise Exception("Query too long, halting") | |
params = ( | |
("ids", ",".join(query_ids)), | |
("tweet.fields", "author_id,created_at,conversation_id,text,lang,geo,entities,reply_settings,public_metrics,source,referenced_tweets"), | |
("user.fields", "id,name,username,created_at,description,url,location,protected,verified,public_metrics,entities"), | |
("expansions", "referenced_tweets.id,referenced_tweets.id.author_id,in_reply_to_user_id,author_id,attachments.media_keys,entities.mentions.username,geo.place_id") | |
) | |
if verbose: | |
print("Searching for tweets with the following parameters:") | |
print("\tids:", ",".join(query_ids)) | |
try: | |
r = requests.get("https://api.twitter.com/2/tweets", headers=headers, params=params) | |
except: | |
print("Error getting tweets (Error: {})".format(traceback.format_exc())) | |
if (r.status_code == 429): | |
sleep_time = math.ceil((datetime.datetime.fromtimestamp(int(r.headers["x-rate-limit-reset"])) - datetime.datetime.today()).total_seconds()) + 15 | |
if sleep_time < 15: | |
sleep_time = 900 | |
if verbose: | |
print("\t"*5, "Rate limit exceeded, resuming in {} seconds".format(str(sleep_time))) | |
time.sleep(sleep_time) | |
r = requests.get("https://api.twitter.com/2/tweets", headers=headers, params=params) | |
if (r.status_code != 200): | |
print("Error getting tweets (status code: {}), halting".format(r.status_code)) | |
print(r.text) | |
exit() | |
if "errors" in r.json().keys(): | |
print("No tweets found") | |
return(None) | |
queried_tweets = parse_tweets(r) | |
print("Retrieved {} tweets".format(len(queried_tweets))) | |
if verbose: | |
print("\t{} of {} calls remaining.\n".format(r.headers["x-rate-limit-remaining"], r.headers["x-rate-limit-limit"])) | |
return(queried_tweets) | |
def search_tweets(query, bearer_token, since_id=None, until_id=None, start_time=None, end_time=None, mode="recent", verbose=False): | |
headers = { | |
"Authorization": "Bearer {}".format(bearer_token), | |
} | |
if len(query) > 1024: | |
raise Exception("Query too long, halting") | |
params = ( | |
("query", query), | |
("max_results", 100), | |
("tweet.fields", "author_id,created_at,conversation_id,text,lang,geo,entities,reply_settings,public_metrics,source,referenced_tweets"), | |
("user.fields", "id,name,username,created_at,description,url,location,protected,verified,public_metrics,entities"), | |
("expansions", "referenced_tweets.id,referenced_tweets.id.author_id,in_reply_to_user_id,author_id,attachments.media_keys,entities.mentions.username,geo.place_id") | |
) | |
if verbose: | |
print("Searching for tweets with the following parameters:") | |
print("\tquery:", query) | |
if since_id: | |
params = params + (("since_id", since_id),) | |
if verbose: | |
print("\tsince_id:", query) | |
if until_id: | |
params = params + (("until_id", until_id),) | |
if verbose: | |
print("\tuntil_id:", until_id) | |
if start_time: | |
params = params + (("start_time", start_time),) | |
if verbose: | |
print("\tstart_time:", start_time) | |
if end_time: | |
params = params + (("end_time", end_time),) | |
if verbose: | |
print("\tend_time:", end_time) | |
try: | |
r = requests.get("https://api.twitter.com/2/tweets/search/{}".format(mode), headers=headers, params=params) | |
except: | |
print("Error getting tweets (Error: {})".format(traceback.format_exc())) | |
if (r.status_code == 429): | |
sleep_time = math.ceil((datetime.datetime.fromtimestamp(int(r.headers["x-rate-limit-reset"])) - datetime.datetime.today()).total_seconds()) + 15 | |
if sleep_time < 15: | |
sleep_time = 900 | |
if verbose: | |
print("\t"*5, "Rate limit exceeded, resuming in {} seconds".format(str(sleep_time)), end="\r") | |
time.sleep(sleep_time) | |
r = requests.get("https://api.twitter.com/2/tweets/search/{}".format(mode), headers=headers, params=params) | |
if (r.status_code != 200): | |
print("Error getting tweets (status code: {}), halting".format(r.status_code)) | |
print(r.text) | |
exit() | |
if r.json()["meta"]["result_count"] == 0: | |
print("No tweets found") | |
return(None) | |
searched_tweets = parse_tweets(r) | |
if "next_token" in r.json()["meta"]: | |
try: | |
while "next_token" in r.json()["meta"]: | |
if verbose: | |
print("Retrieved {} tweets".format(len(searched_tweets)), end="\r") | |
next_token = r.json()["meta"]["next_token"] | |
time.sleep(1.2) | |
r = requests.get("https://api.twitter.com/2/tweets/search/{}".format(mode), headers=headers, params=params + (("next_token", next_token),)) | |
if (r.status_code == 429): | |
sleep_time = math.ceil((datetime.datetime.fromtimestamp(int(r.headers["x-rate-limit-reset"])) - datetime.datetime.today()).total_seconds()) + 15 | |
if sleep_time < 15: | |
sleep_time = 900 | |
if verbose: | |
print("\t"*5, "Rate limit exceeded, resuming in {} seconds".format(str(sleep_time)), end="\r") | |
time.sleep(sleep_time) | |
r = requests.get("https://api.twitter.com/2/tweets/search/{}".format(mode), headers=headers, params=params + (("next_token", next_token),)) | |
if r.json()["meta"]["result_count"] > 0: | |
searched_tweets.extend(parse_tweets(r)) | |
except: | |
print("Error in while loop results, continuing (Traceback: {})".format(traceback.format_exc())) | |
print("Retrieved {} tweets".format(len(searched_tweets))) | |
if verbose: | |
print("\t{} of {} calls remaining.\n".format(r.headers["x-rate-limit-remaining"], r.headers["x-rate-limit-limit"])) | |
return(searched_tweets) | |
def tweets_to_csv(queried_tweets, file_name, append=False, verbose=False): | |
file_mode = "a+" if append else "w" | |
if verbose: | |
print("Appending" if append else "Writing", "to file", file_name) | |
if queried_tweets: | |
with open(file_name, file_mode) as f: | |
writer = csv.writer(f) | |
if not append: | |
writer.writerow([ | |
"status_id", | |
"created_at", | |
"text", | |
"conversation_id", | |
"hashtags", | |
"mentions", | |
"url_location", | |
"url_unwound", | |
"url_title", | |
"url_description", | |
"url_sensitive", | |
"geo", | |
"lang", | |
"reply_settings", | |
"retweet_count", | |
"reply_count", | |
"like_count", | |
"quote_count", | |
"is_retweet", | |
"is_reply", | |
"is_quote", | |
"retweeted_user_id", | |
"retweeted_user_screen_name", | |
"retweeted_user_name", | |
"retweeted_user_followers_count", | |
"retweeted_user_following_count", | |
"retweeted_user_tweet_count", | |
"retweeted_user_listed_count", | |
"retweeted_user_protected", | |
"retweeted_user_verified", | |
"retweeted_user_description", | |
"retweeted_tweet_status_id", | |
"retweeted_tweet_conversation_id", | |
"retweeted_tweet_created_at", | |
"retweeted_tweet_lang", | |
"retweeted_tweet_source", | |
"retweeted_tweet_text", | |
"retweeted_tweet_retweet_count", | |
"retweeted_tweet_reply_count", | |
"retweeted_tweet_like_count", | |
"retweeted_tweet_quote_count", | |
"replied_user_id", | |
"replied_user_screen_name", | |
"replied_user_name", | |
"replied_user_followers_count", | |
"replied_user_following_count", | |
"replied_user_tweet_count", | |
"replied_user_listed_count", | |
"replied_user_protected", | |
"replied_user_verified", | |
"replied_user_description", | |
"replied_tweet_status_id", | |
"replied_tweet_conversation_id", | |
"replied_tweet_created_at", | |
"replied_tweet_lang", | |
"replied_tweet_source", | |
"replied_tweet_text", | |
"replied_tweet_retweet_count", | |
"replied_tweet_reply_count", | |
"replied_tweet_like_count", | |
"replied_tweet_quote_count", | |
"quoted_user_id", | |
"quoted_user_screen_name", | |
"quoted_user_name", | |
"quoted_user_followers_count", | |
"quoted_user_following_count", | |
"quoted_user_tweet_count", | |
"quoted_user_listed_count", | |
"quoted_user_protected", | |
"quoted_user_verified", | |
"quoted_user_description", | |
"quoted_tweet_status_id", | |
"quoted_tweet_conversation_id", | |
"quoted_tweet_created_at", | |
"quoted_tweet_lang", | |
"quoted_tweet_source", | |
"quoted_tweet_text", | |
"quoted_tweet_retweet_count", | |
"quoted_tweet_reply_count", | |
"quoted_tweet_like_count", | |
"quoted_tweet_quote_count", | |
"geo_id", | |
"geo_full_name", | |
"geo_name", | |
"geo_country", | |
"geo_country_code", | |
"geo_place_type", | |
"geo_json", | |
"user_id", | |
"screen_name", | |
"name", | |
"account_created_at", | |
"description", | |
"url", | |
"location", | |
"followers_count", | |
"following_count", | |
"tweet_count", | |
"listed_count", | |
"protected", | |
"verified", | |
"queried_at" | |
]) | |
for parsed_tweet in queried_tweets: | |
writer.writerow([ | |
parsed_tweet["status_id"], | |
parsed_tweet["created_at"], | |
parsed_tweet["text"], | |
parsed_tweet["conversation_id"], | |
parsed_tweet["hashtags"], | |
parsed_tweet["mentions"], | |
parsed_tweet["url_location"], | |
parsed_tweet["url_unwound"], | |
parsed_tweet["url_title"], | |
parsed_tweet["url_description"], | |
parsed_tweet["url_sensitive"], | |
parsed_tweet["geo"], | |
parsed_tweet["lang"], | |
parsed_tweet["reply_settings"], | |
parsed_tweet["retweet_count"], | |
parsed_tweet["reply_count"], | |
parsed_tweet["like_count"], | |
parsed_tweet["quote_count"], | |
parsed_tweet["is_retweet"], | |
parsed_tweet["is_reply"], | |
parsed_tweet["is_quote"], | |
parsed_tweet["retweeted_user_id"], | |
parsed_tweet["retweeted_user_screen_name"], | |
parsed_tweet["retweeted_user_name"], | |
parsed_tweet["retweeted_user_followers_count"], | |
parsed_tweet["retweeted_user_following_count"], | |
parsed_tweet["retweeted_user_tweet_count"], | |
parsed_tweet["retweeted_user_listed_count"], | |
parsed_tweet["retweeted_user_protected"], | |
parsed_tweet["retweeted_user_verified"], | |
parsed_tweet["retweeted_user_description"], | |
parsed_tweet["retweeted_tweet_status_id"], | |
parsed_tweet["retweeted_tweet_conversation_id"], | |
parsed_tweet["retweeted_tweet_created_at"], | |
parsed_tweet["retweeted_tweet_lang"], | |
parsed_tweet["retweeted_tweet_source"], | |
parsed_tweet["retweeted_tweet_text"], | |
parsed_tweet["retweeted_tweet_retweet_count"], | |
parsed_tweet["retweeted_tweet_reply_count"], | |
parsed_tweet["retweeted_tweet_like_count"], | |
parsed_tweet["retweeted_tweet_quote_count"], | |
parsed_tweet["replied_user_id"], | |
parsed_tweet["replied_user_screen_name"], | |
parsed_tweet["replied_user_name"], | |
parsed_tweet["replied_user_followers_count"], | |
parsed_tweet["replied_user_following_count"], | |
parsed_tweet["replied_user_tweet_count"], | |
parsed_tweet["replied_user_listed_count"], | |
parsed_tweet["replied_user_protected"], | |
parsed_tweet["replied_user_verified"], | |
parsed_tweet["replied_user_description"], | |
parsed_tweet["replied_tweet_status_id"], | |
parsed_tweet["replied_tweet_conversation_id"], | |
parsed_tweet["replied_tweet_created_at"], | |
parsed_tweet["replied_tweet_lang"], | |
parsed_tweet["replied_tweet_source"], | |
parsed_tweet["replied_tweet_text"], | |
parsed_tweet["replied_tweet_retweet_count"], | |
parsed_tweet["replied_tweet_reply_count"], | |
parsed_tweet["replied_tweet_like_count"], | |
parsed_tweet["replied_tweet_quote_count"], | |
parsed_tweet["quoted_user_id"], | |
parsed_tweet["quoted_user_screen_name"], | |
parsed_tweet["quoted_user_name"], | |
parsed_tweet["quoted_user_followers_count"], | |
parsed_tweet["quoted_user_following_count"], | |
parsed_tweet["quoted_user_tweet_count"], | |
parsed_tweet["quoted_user_listed_count"], | |
parsed_tweet["quoted_user_protected"], | |
parsed_tweet["quoted_user_verified"], | |
parsed_tweet["quoted_user_description"], | |
parsed_tweet["quoted_tweet_status_id"], | |
parsed_tweet["quoted_tweet_conversation_id"], | |
parsed_tweet["quoted_tweet_created_at"], | |
parsed_tweet["quoted_tweet_lang"], | |
parsed_tweet["quoted_tweet_source"], | |
parsed_tweet["quoted_tweet_text"], | |
parsed_tweet["quoted_tweet_retweet_count"], | |
parsed_tweet["quoted_tweet_reply_count"], | |
parsed_tweet["quoted_tweet_like_count"], | |
parsed_tweet["quoted_tweet_quote_count"], | |
parsed_tweet["geo_id"], | |
parsed_tweet["geo_full_name"], | |
parsed_tweet["geo_name"], | |
parsed_tweet["geo_country"], | |
parsed_tweet["geo_country_code"], | |
parsed_tweet["geo_place_type"], | |
parsed_tweet["geo_json"], | |
parsed_tweet["user_id"], | |
parsed_tweet["screen_name"], | |
parsed_tweet["name"], | |
parsed_tweet["account_created_at"], | |
parsed_tweet["description"], | |
parsed_tweet["url"], | |
parsed_tweet["location"], | |
parsed_tweet["followers_count"], | |
parsed_tweet["following_count"], | |
parsed_tweet["tweet_count"], | |
parsed_tweet["listed_count"], | |
parsed_tweet["protected"], | |
parsed_tweet["verified"], | |
parsed_tweet["queried_at"] | |
]) | |
else: | |
if verbose: | |
print("No tweets to write to file") | |
# Usage ----- | |
# Setup API | |
BEARER_TOKEN = "XXX" | |
## Loopup tweets by id | |
tweet_ids = ["1356767914632896513", "1355669150933385216", "1351970364700692482", "1351202079843479556"] | |
tweets = lookup_tweets(tweet_ids, BEARER_TOKEN) | |
tweets_to_csv(tweets, "out_1.csv") | |
## Search tweets | |
query = "from:kudusch -is:retweet" | |
tweets = search_tweets(query, BEARER_TOKEN, mode="all", start_time = "2010-01-01T00:00:00+00:00", end_time="2011-01-01T00:00:00+00:00", verbose=True) | |
tweets_to_csv(tweets, "out_2.csv") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment