marknca · September 4, 2021 20:15 · marknca · Sep 4, 2021
diff --git a/get_twitter_thread.py b/get_twitter_thread.py
 #! /usr/bin/env python3

 # Standard library
 import datetime
 import re
 import sys
 import time

 # 3rd party library
 import requests

 bearer_token = 'YOUR_BEARER_TOKEN_HERE'

 headers = { 'Authorization': 'Bearer {}'.format(bearer_token) }

 def get_html_for_thread(tweet_thread, tweets, includes, users):
 	result = '<a name="start_of_thread"></a>\n\n'

 	thread_len = len(tweet_thread.keys())
 	thread_index = 1
 	for k, v in tweet_thread.items():
 		result += "{}\n\n".format(get_html_for_tweet(v, tweets, includes, users, thread_index, thread_len))
 		thread_index += 1

 	return result

 def get_html_for_tweet(tweet, tweets, includes, users, thread_index=None, thread_len=None):
 	result = None

 	if 'data' in tweet: tweet = tweet['data']

 	created_at_obj = datetime.datetime.strptime(tweet['created_at'].replace('.000Z', ''), '%Y-%m-%dT%H:%M:%S')
 	created_at_str = created_at_obj.strftime("%d-%b-%Y, %H:%M")

 	html = tweet['text'].replace('\n\n', "<br /><br />")

 	# remove image URLs
 	for m in re.findall(r'(https://t.co/(\w+))', tweet['text']):
 		#print("--- checking URL {}".format(m[0]))
 		r = None
 		try:
 			r = requests.get(m[0])
 		except Exception as err:
 			print("Unable to resolve URL included in tweet. Threw exception:\n\t{}".format(err))

 		if r and r.status_code == 200:
 			mi = re.match(r'https://twitter.com/.+/status/\d+/(photo)|(video)/\d+', r.url)			
 			if mi:
 				# This URL is for an included attachment
 				html = html.replace(" {}".format(m[0]), '')
 			else:
 				html = html.replace(m[0], '<a href="{}">{}</a>'.format(m[0], r.url))

 	# add images
 	if 'attachments' in tweet and 'media_keys' in tweet['attachments']:
 		for attachment in tweet['attachments']['media_keys']:
 			include = includes[attachment]
 			if include['type'] == 'photo':
 				html += '<br /><img src="{}" />'.format(include['url'])

 	# tweet link
 	user = users[tweet['author_id']]
 	html += '<p class="tweet-link"><img src="{}" alt="{}" /><b>@{}</b> tweeted at <a href="https://twitter.com/{}/status/{}">{}</a></p></p>'.format(user['profile_image_url'], user['username'], user['username'], tweet['author_id'], tweet['id'], created_at_str)

 	position = ""
 	nav = ""
 	if thread_index and thread_len:
 		position = '<p class="tweet-position"><a name="{}">Tweet {}/{}</a><span>&nbsp;</span><a href="#{}">👇 Next tweet</a>'.format(thread_index, thread_index, thread_len, (thread_index+1))
 		if thread_index > 1:
 			position += '<span>&nbsp;</span><a href="#start_of_thread">👆 Start</a>'
 		position += "</p>"

 		if thread_index == thread_len:
 			nav = '<p class="tweet-link"><a href="#start_of_thread">👆 Start</a>'

 	result = '{}<div class="tweet" data-tweet-id="{}">{}{}</div>'.format(position, tweet['id'], html, nav)

 	return result

 def organize_tweets(tweets):
 	results = {}
 	for t in tweets:
 		results[t['id']] = t
 	return results

 def organize_includes(includes):
 	results = {}

 	for i in includes:
 		results[i['media_key']] = i

 	return results

 def organize_users(users):
 	results = {}

 	for u in users:
 		results[u['id']] = u

 	return results

 def get_tweet(tweet_id):
 	results = None

 	url = 'https://api.twitter.com/2/tweets/{}?expansions=attachments.media_keys,author_id,referenced_tweets.id&media.fields=url&tweet.fields=created_at,public_metrics,source,text,author_id,in_reply_to_user_id&user.fields=profile_image_url,url,username,verified,name,created_at'.format(tweet_id)

 	r = requests.get(url, headers=headers)
 	if r.status_code == 200:
 		results = r.json()
 	else:
 		print("Could not get tweet {}. Threw err:\n\t{}".format(tweet_id, r.text))

 	return results

 def get_tweets_for_user(user_id, max_results=100, token=None):
 	results = None

 	if max_results > 100: max_results = 100
 	url = 'https://api.twitter.com/2/users/{}/tweets?expansions=attachments.media_keys,author_id,referenced_tweets.id&media.fields=url&max_results=100&tweet.fields=created_at,public_metrics,source,author_id,text,in_reply_to_user_id&user.fields=profile_image_url,url,username,verified,name,created_at'.format(user_id)
 	if token:
 		url += '&pagination_token={}'.format(token)

 	r = requests.get(url, headers=headers)
 	if r.status_code == 200:
 		results = r.json()
 	else:
 		print("Could not get tweets for user {}. Threw err:\n\t{}".format(user_id, r.text))

 	return results

 def get_last_tweets_for_user(user_id):
 	tweets = []
 	includes = []
 	users = []
 	token = None
 	for i in range(5):
 		response = get_tweets_for_user(user_id, token=token)
 		tweets += response['data']
 		includes += response['includes']['media'] # this is a dict
 		users += response['includes']['users'] # this is a dict
 		
 		if 'meta' in response and 'next_token' in response['meta']:
 			token = response['meta']['next_token']

 	return { 'tweets': tweets, 'includes': includes, 'users': users, }

 def get_tweet_in_reply_to(replied_to_tweet_id, tweets):
 	result = None

 	for k,t in tweets.items():
 		if 'referenced_tweets' in t:
 			for ref_t in t['referenced_tweets']:
 				if ref_t['type'] == 'replied_to' and ref_t['id'] == str(replied_to_tweet_id):
 					result = t
 					break

 	return result

 def get_thread(root_tweet, tweets):
 	thread = { root_tweet['data']['id']: root_tweet }

 	reply = get_tweet_in_reply_to(root_tweet['data']['id'], tweets)
 	if reply:
 		thread[reply['id']] = reply

 		while reply:
 			reply = get_tweet_in_reply_to(reply['id'], tweets)
 			if reply: thread[reply['id']] = reply

 	return thread

 def main(root_tweet_id=None):
  """
  Call as:
  get_twitter_thread.py TWEET_ID
  """
 	if not root_tweet_id: root_tweet_id = sys.argv[-1]
 	print("Finding threat for tweet: {}".format(root_tweet_id))

 	# Get the root tweets
 	root_tweet = get_tweet(root_tweet_id)
 	root_user_id = None
 	if root_tweet and 'data' in root_tweet and 'author_id' in root_tweet['data']:
 		root_user_id = root_tweet['data']['author_id']

 	print("Re-building thread by user id #{}...".format(root_user_id))

 	# Get the last 500 tweets from the current user to build the thread from
 	tweets_and_includes_and_users = get_last_tweets_for_user(root_user_id)
 	includes = organize_includes(tweets_and_includes_and_users['includes'])
 	tweets = organize_tweets(tweets_and_includes_and_users['tweets'])
 	users = organize_users(tweets_and_includes_and_users['users'])
 	print("Retrieved {} tweets to sort through...".format(len(tweets_and_includes_and_users['tweets'])))
 	
 	# dict's now follow insertion order
 	tweet_thread = get_thread(root_tweet, tweets)

 	print(get_html_thread(tweet_thread))
 	
 	return tweets, includes, users, tweet_thread

 if __name__ == '__main__': main()
	#! /usr/bin/env python3

	# Standard library
	import datetime
	import re
	import sys
	import time

	# 3rd party library
	import requests

	bearer_token = 'YOUR_BEARER_TOKEN_HERE'

	headers = { 'Authorization': 'Bearer {}'.format(bearer_token) }

	def get_html_for_thread(tweet_thread, tweets, includes, users):
	result = '<a name="start_of_thread"></a>\n\n'

	thread_len = len(tweet_thread.keys())
	thread_index = 1
	for k, v in tweet_thread.items():
	result += "{}\n\n".format(get_html_for_tweet(v, tweets, includes, users, thread_index, thread_len))
	thread_index += 1

	return result

	def get_html_for_tweet(tweet, tweets, includes, users, thread_index=None, thread_len=None):
	result = None

	if 'data' in tweet: tweet = tweet['data']

	created_at_obj = datetime.datetime.strptime(tweet['created_at'].replace('.000Z', ''), '%Y-%m-%dT%H:%M:%S')
	created_at_str = created_at_obj.strftime("%d-%b-%Y, %H:%M")

	html = tweet['text'].replace('\n\n', "<br /><br />")

	# remove image URLs
	for m in re.findall(r'(https://t.co/(\w+))', tweet['text']):
	#print("--- checking URL {}".format(m[0]))
	r = None
	try:
	r = requests.get(m[0])
	except Exception as err:
	print("Unable to resolve URL included in tweet. Threw exception:\n\t{}".format(err))

	if r and r.status_code == 200:
	mi = re.match(r'https://twitter.com/.+/status/\d+/(photo)\|(video)/\d+', r.url)
	if mi:
	# This URL is for an included attachment
	html = html.replace(" {}".format(m[0]), '')
	else:
	html = html.replace(m[0], '<a href="{}">{}</a>'.format(m[0], r.url))

	# add images
	if 'attachments' in tweet and 'media_keys' in tweet['attachments']:
	for attachment in tweet['attachments']['media_keys']:
	include = includes[attachment]
	if include['type'] == 'photo':
	html += '<br /><img src="{}" />'.format(include['url'])

	# tweet link
	user = users[tweet['author_id']]
	html += '<p class="tweet-link"><img src="{}" alt="{}" /><b>@{}</b> tweeted at <a href="https://twitter.com/{}/status/{}">{}</a></p></p>'.format(user['profile_image_url'], user['username'], user['username'], tweet['author_id'], tweet['id'], created_at_str)

	position = ""
	nav = ""
	if thread_index and thread_len:
	position = '<p class="tweet-position"><a name="{}">Tweet {}/{}</a><span> </span><a href="#{}">👇 Next tweet</a>'.format(thread_index, thread_index, thread_len, (thread_index+1))
	if thread_index > 1:
	position += '<span> </span><a href="#start_of_thread">👆 Start</a>'
	position += "</p>"

	if thread_index == thread_len:
	nav = '<p class="tweet-link"><a href="#start_of_thread">👆 Start</a>'

	result = '{}<div class="tweet" data-tweet-id="{}">{}{}</div>'.format(position, tweet['id'], html, nav)

	return result

	def organize_tweets(tweets):
	results = {}
	for t in tweets:
	results[t['id']] = t
	return results

	def organize_includes(includes):
	results = {}

	for i in includes:
	results[i['media_key']] = i

	return results

	def organize_users(users):
	results = {}

	for u in users:
	results[u['id']] = u

	return results

	def get_tweet(tweet_id):
	results = None

	url = 'https://api.twitter.com/2/tweets/{}?expansions=attachments.media_keys,author_id,referenced_tweets.id&media.fields=url&tweet.fields=created_at,public_metrics,source,text,author_id,in_reply_to_user_id&user.fields=profile_image_url,url,username,verified,name,created_at'.format(tweet_id)

	r = requests.get(url, headers=headers)
	if r.status_code == 200:
	results = r.json()
	else:
	print("Could not get tweet {}. Threw err:\n\t{}".format(tweet_id, r.text))

	return results

	def get_tweets_for_user(user_id, max_results=100, token=None):
	results = None

	if max_results > 100: max_results = 100
	url = 'https://api.twitter.com/2/users/{}/tweets?expansions=attachments.media_keys,author_id,referenced_tweets.id&media.fields=url&max_results=100&tweet.fields=created_at,public_metrics,source,author_id,text,in_reply_to_user_id&user.fields=profile_image_url,url,username,verified,name,created_at'.format(user_id)
	if token:
	url += '&pagination_token={}'.format(token)

	r = requests.get(url, headers=headers)
	if r.status_code == 200:
	results = r.json()
	else:
	print("Could not get tweets for user {}. Threw err:\n\t{}".format(user_id, r.text))

	return results

	def get_last_tweets_for_user(user_id):
	tweets = []
	includes = []
	users = []
	token = None
	for i in range(5):
	response = get_tweets_for_user(user_id, token=token)
	tweets += response['data']
	includes += response['includes']['media'] # this is a dict
	users += response['includes']['users'] # this is a dict

	if 'meta' in response and 'next_token' in response['meta']:
	token = response['meta']['next_token']

	return { 'tweets': tweets, 'includes': includes, 'users': users, }

	def get_tweet_in_reply_to(replied_to_tweet_id, tweets):
	result = None

	for k,t in tweets.items():
	if 'referenced_tweets' in t:
	for ref_t in t['referenced_tweets']:
	if ref_t['type'] == 'replied_to' and ref_t['id'] == str(replied_to_tweet_id):
	result = t
	break

	return result

	def get_thread(root_tweet, tweets):
	thread = { root_tweet['data']['id']: root_tweet }

	reply = get_tweet_in_reply_to(root_tweet['data']['id'], tweets)
	if reply:
	thread[reply['id']] = reply

	while reply:
	reply = get_tweet_in_reply_to(reply['id'], tweets)
	if reply: thread[reply['id']] = reply

	return thread

	def main(root_tweet_id=None):
	"""
	Call as:
	get_twitter_thread.py TWEET_ID
	"""
	if not root_tweet_id: root_tweet_id = sys.argv[-1]
	print("Finding threat for tweet: {}".format(root_tweet_id))

	# Get the root tweets
	root_tweet = get_tweet(root_tweet_id)
	root_user_id = None
	if root_tweet and 'data' in root_tweet and 'author_id' in root_tweet['data']:
	root_user_id = root_tweet['data']['author_id']

	print("Re-building thread by user id #{}...".format(root_user_id))

	# Get the last 500 tweets from the current user to build the thread from
	tweets_and_includes_and_users = get_last_tweets_for_user(root_user_id)
	includes = organize_includes(tweets_and_includes_and_users['includes'])
	tweets = organize_tweets(tweets_and_includes_and_users['tweets'])
	users = organize_users(tweets_and_includes_and_users['users'])
	print("Retrieved {} tweets to sort through...".format(len(tweets_and_includes_and_users['tweets'])))

	# dict's now follow insertion order
	tweet_thread = get_thread(root_tweet, tweets)

	print(get_html_thread(tweet_thread))

	return tweets, includes, users, tweet_thread

	if __name__ == '__main__': main()