Last active
March 11, 2020 03:17
-
-
Save stas00/97930de8858bb4c045087b9669d38072 to your computer and use it in GitHub Desktop.
url shortener unshortener (resolves url)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# this code handles redirects, failed requests, etc. can be tweaked to return some non-final url as well. | |
from urllib.parse import urlsplit | |
import requests | |
headers = {'headers':'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:51.0) Gecko/20100101 Firefox/51.0'} | |
def resolve_url_req(url): | |
""" if `url` is redirected returns the new url, otherwise None is returned """ | |
try: | |
r = requests.head(url, headers=headers, allow_redirects=False, timeout=10) | |
if r.status_code in [301, 302]: | |
location = r.headers.get('Location', None) | |
# handle the special case of local redirect | |
if location is not None and location.startswith('/'): | |
base = "{0.scheme}://{0.netloc}".format(urlsplit(r.url)) | |
location = base + location | |
return location | |
except: pass | |
return None | |
def resolve_url(url): | |
""" given a `url` traverse the chain of redirects and return the last url it was resolved to, even if it responds with error. This is useful for discovering where url shortners point to, even if the destination no longer exists | |
return: last resolved url (which could be the original url) | |
""" | |
redirect_url = resolve_url_req(url) | |
if redirect_url is not None: | |
# add other special cases here: | |
# * for t.co shortner we want the link before the suspension url | |
if redirect_url == 'https://twitter.com/account/suspended': return url | |
return resolve_url(redirect_url) | |
return url | |
# test run: | |
urls = [ | |
'https://t.co/RbbnjkoqUD', | |
'https://t.co/VgzGOK5k3S', | |
'http://t.co/cIrTVml9Vp', | |
'http://t.co/y7sIPKB1kd' | |
] | |
for u in urls: resolve_url(u) | |
#'https://vine.co/v/eHzipDIO5x7' | |
#'https://twitter.com/OfficialWith1D/status/629090990486237184' | |
#'https://www.youtube.com/watch?v=NnnSS0_-xoA' | |
#'https://twitter.com/JoshPainter2154/status/629274422952620033/photo/1' | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment