Last active
January 10, 2019 08:13
-
-
Save vindard/3eae3cb5ef7b879510677e5c1eb01368 to your computer and use it in GitHub Desktop.
A script to parse a quote tweet chain and reveal all tweets/users.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Can try it out with this command: | |
# $ python nestedTweets.py https://twitter.com/aantonop/status/1082420365291388928 | |
import re, requests, sys | |
def currentTweet(url): | |
data = requests.get(url) | |
try: | |
tweet = re.findall("og:description.*“(.*)”", data.text)[0] | |
user = re.findall("og.url.*\/(.*)\/status", data.text)[0] | |
except IndexError: | |
print("Sorry, invalid tweet url entered. Exiting...") | |
sys.exit(1) | |
return tweet, user | |
def getNextTweet(url): | |
tweet, user = currentTweet(url) | |
try: | |
link = re.findall("https?\S+t.co\S+", tweet)[0] | |
return link, tweet, user | |
except IndexError: | |
return "", tweet, user | |
def getChain(url): | |
link = url | |
tweet, user = currentTweet(url) | |
chain = [(link, user, tweet)] | |
link, tweet, user = getNextTweet(url) | |
while link: | |
print(f"{len(chain)}) @{chain[-1][1]}: {chain[-1][2]}") | |
prev_link = link | |
link, tweet, user = getNextTweet(link) | |
chain.append((prev_link, user, tweet)) | |
else: | |
print(f"{len(chain)}) @{chain[-1][1]}: {chain[-1][2]}") | |
print(f"\nEnd of chain, {len(chain)} tweets deep.") | |
return chain | |
def getChainParticipants(chain): | |
return [u for (l,u,t) in chain] | |
def getFirstTweet(user_input): | |
i, tweet_link = 0, [] | |
url_regex = '(https?://twitter.com/\S*)' | |
tweet_link = re.findall(url_regex, user_input) | |
while not(tweet_link): | |
i += 1 | |
if i > 5: | |
print("Sorry too many invalid attempts. Exiting...") | |
sys.exit(1) | |
user_input = input("Invalidt tweet link, please try again:\n") | |
tweet_link = re.findall(url_regex, user_input) | |
return tweet_link[0] | |
if __name__ == "__main__": | |
user_input = sys.argv[1] | |
tweet_link = getFirstTweet(user_input) | |
print("\n----\n\nProcessing:\n") | |
chain = getChain(tweet_link) | |
print(f"\n----\n\nChain starts at: {chain[-1][0]}\n") | |
participants = getChainParticipants(chain) | |
print(f"Participants:\n{participants}") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
BUILDS A CHAIN FROM THE ROOT INSTEAD OF THE TIP | |
''' | |
# Can try it out with this command: | |
# $ python reverseNestedTweets.py https://twitter.com/matt_odell/status/1083205056407764992 | |
import re, requests | |
import sys, json | |
def parseTweet(url): | |
data = requests.get(url) | |
# If valid tweet, pulls tweet info | |
try: | |
tweet = re.findall("og:description.*“(.*)”", data.text)[0] | |
user = re.findall("og.url.*\/(.*)\/status", data.text)[0] | |
except IndexError: | |
print("Sorry, invalid tweet url entered. Exiting...") | |
sys.exit(1) | |
# Searches tweet for links present | |
try: | |
link = re.findall("https?\S+t.co\S+", tweet)[0] | |
return link, tweet, user | |
except IndexError: | |
return "", tweet, user | |
# Returns a searchable url of the tweet | |
def linkToSearchable(url): | |
urlArray = [i for i in url] | |
for i, lett in enumerate(urlArray): | |
if lett == ':': | |
urlArray[i] = '%3A' | |
elif lett == '/': | |
urlArray[i] = '%2F' | |
return ''.join(urlArray) | |
# Returns a list of tweets from search for `searchUrl` | |
def searchTwitter(searchUrl): | |
data = requests.get(searchUrl) | |
regex_for_results = '"(\S*)\?p=p' | |
tweetsFound = re.findall(regex_for_results, data.text) | |
tweetPrefix = 'https://twitter.com' | |
return [tweetPrefix + id for id in tweetsFound] | |
# Propagates one chain by returning only the first tweet found. | |
# Stretch goal could be to explore multiple branches for longest chain. | |
def buildChain(url): | |
allResults = [] # Initialise | |
tweetsFound = [url] # Initialise | |
while tweetsFound: | |
allResults.append(tweetsFound[0]) | |
searchUrl = 'https://mobile.twitter.com/search?q=' + linkToSearchable(tweetsFound[0]) | |
tweetsFound = searchTwitter(searchUrl) | |
return allResults | |
# Validates user input | |
def validateTweetUrl(user_input): | |
i, tweet_link = 0, [] | |
url_regex = '(https?://twitter.com/\S*)' | |
tweet_link = re.findall(url_regex, user_input) | |
while not(tweet_link): | |
i += 1 | |
if i > 5: | |
print("Sorry too many invalid attempts. Exiting...") | |
sys.exit(1) | |
user_input = input("Invalid tweet link, please try again:\n") | |
tweet_link = re.findall(url_regex, user_input) | |
return tweet_link[0] | |
if __name__ == "__main__": | |
user_input = sys.argv[1] | |
tweet_link = validateTweetUrl(user_input) | |
print("\n----\n\nProcessing:\n") | |
chain = buildChain(tweet_link) | |
print(f"{json.dumps(chain, indent=2)}\n" \ | |
f"\nChain is {len(chain)} tweets deep. Build from:\n" \ | |
f"{chain[-1]}\n") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment