Skip to content

Instantly share code, notes, and snippets.

@vindard
Last active January 10, 2019 08:13
Show Gist options
  • Save vindard/3eae3cb5ef7b879510677e5c1eb01368 to your computer and use it in GitHub Desktop.
Save vindard/3eae3cb5ef7b879510677e5c1eb01368 to your computer and use it in GitHub Desktop.
A script to parse a quote tweet chain and reveal all tweets/users.
# Can try it out with this command:
# $ python nestedTweets.py https://twitter.com/aantonop/status/1082420365291388928
import re, requests, sys
def currentTweet(url):
data = requests.get(url)
try:
tweet = re.findall("og:description.*“(.*)”", data.text)[0]
user = re.findall("og.url.*\/(.*)\/status", data.text)[0]
except IndexError:
print("Sorry, invalid tweet url entered. Exiting...")
sys.exit(1)
return tweet, user
def getNextTweet(url):
tweet, user = currentTweet(url)
try:
link = re.findall("https?\S+t.co\S+", tweet)[0]
return link, tweet, user
except IndexError:
return "", tweet, user
def getChain(url):
link = url
tweet, user = currentTweet(url)
chain = [(link, user, tweet)]
link, tweet, user = getNextTweet(url)
while link:
print(f"{len(chain)}) @{chain[-1][1]}: {chain[-1][2]}")
prev_link = link
link, tweet, user = getNextTweet(link)
chain.append((prev_link, user, tweet))
else:
print(f"{len(chain)}) @{chain[-1][1]}: {chain[-1][2]}")
print(f"\nEnd of chain, {len(chain)} tweets deep.")
return chain
def getChainParticipants(chain):
return [u for (l,u,t) in chain]
def getFirstTweet(user_input):
i, tweet_link = 0, []
url_regex = '(https?://twitter.com/\S*)'
tweet_link = re.findall(url_regex, user_input)
while not(tweet_link):
i += 1
if i > 5:
print("Sorry too many invalid attempts. Exiting...")
sys.exit(1)
user_input = input("Invalidt tweet link, please try again:\n")
tweet_link = re.findall(url_regex, user_input)
return tweet_link[0]
if __name__ == "__main__":
user_input = sys.argv[1]
tweet_link = getFirstTweet(user_input)
print("\n----\n\nProcessing:\n")
chain = getChain(tweet_link)
print(f"\n----\n\nChain starts at: {chain[-1][0]}\n")
participants = getChainParticipants(chain)
print(f"Participants:\n{participants}")
'''
BUILDS A CHAIN FROM THE ROOT INSTEAD OF THE TIP
'''
# Can try it out with this command:
# $ python reverseNestedTweets.py https://twitter.com/matt_odell/status/1083205056407764992
import re, requests
import sys, json
def parseTweet(url):
data = requests.get(url)
# If valid tweet, pulls tweet info
try:
tweet = re.findall("og:description.*“(.*)”", data.text)[0]
user = re.findall("og.url.*\/(.*)\/status", data.text)[0]
except IndexError:
print("Sorry, invalid tweet url entered. Exiting...")
sys.exit(1)
# Searches tweet for links present
try:
link = re.findall("https?\S+t.co\S+", tweet)[0]
return link, tweet, user
except IndexError:
return "", tweet, user
# Returns a searchable url of the tweet
def linkToSearchable(url):
urlArray = [i for i in url]
for i, lett in enumerate(urlArray):
if lett == ':':
urlArray[i] = '%3A'
elif lett == '/':
urlArray[i] = '%2F'
return ''.join(urlArray)
# Returns a list of tweets from search for `searchUrl`
def searchTwitter(searchUrl):
data = requests.get(searchUrl)
regex_for_results = '"(\S*)\?p=p'
tweetsFound = re.findall(regex_for_results, data.text)
tweetPrefix = 'https://twitter.com'
return [tweetPrefix + id for id in tweetsFound]
# Propagates one chain by returning only the first tweet found.
# Stretch goal could be to explore multiple branches for longest chain.
def buildChain(url):
allResults = [] # Initialise
tweetsFound = [url] # Initialise
while tweetsFound:
allResults.append(tweetsFound[0])
searchUrl = 'https://mobile.twitter.com/search?q=' + linkToSearchable(tweetsFound[0])
tweetsFound = searchTwitter(searchUrl)
return allResults
# Validates user input
def validateTweetUrl(user_input):
i, tweet_link = 0, []
url_regex = '(https?://twitter.com/\S*)'
tweet_link = re.findall(url_regex, user_input)
while not(tweet_link):
i += 1
if i > 5:
print("Sorry too many invalid attempts. Exiting...")
sys.exit(1)
user_input = input("Invalid tweet link, please try again:\n")
tweet_link = re.findall(url_regex, user_input)
return tweet_link[0]
if __name__ == "__main__":
user_input = sys.argv[1]
tweet_link = validateTweetUrl(user_input)
print("\n----\n\nProcessing:\n")
chain = buildChain(tweet_link)
print(f"{json.dumps(chain, indent=2)}\n" \
f"\nChain is {len(chain)} tweets deep. Build from:\n" \
f"{chain[-1]}\n")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment