Skip to content

Instantly share code, notes, and snippets.

@stas00
Last active March 11, 2020 03:17
Show Gist options
  • Save stas00/97930de8858bb4c045087b9669d38072 to your computer and use it in GitHub Desktop.
Save stas00/97930de8858bb4c045087b9669d38072 to your computer and use it in GitHub Desktop.
url shortener unshortener (resolves url)
# this code handles redirects, failed requests, etc. can be tweaked to return some non-final url as well.
from urllib.parse import urlsplit
import requests
headers = {'headers':'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:51.0) Gecko/20100101 Firefox/51.0'}
def resolve_url_req(url):
""" if `url` is redirected returns the new url, otherwise None is returned """
try:
r = requests.head(url, headers=headers, allow_redirects=False, timeout=10)
if r.status_code in [301, 302]:
location = r.headers.get('Location', None)
# handle the special case of local redirect
if location is not None and location.startswith('/'):
base = "{0.scheme}://{0.netloc}".format(urlsplit(r.url))
location = base + location
return location
except: pass
return None
def resolve_url(url):
""" given a `url` traverse the chain of redirects and return the last url it was resolved to, even if it responds with error. This is useful for discovering where url shortners point to, even if the destination no longer exists
return: last resolved url (which could be the original url)
"""
redirect_url = resolve_url_req(url)
if redirect_url is not None:
# add other special cases here:
# * for t.co shortner we want the link before the suspension url
if redirect_url == 'https://twitter.com/account/suspended': return url
return resolve_url(redirect_url)
return url
# test run:
urls = [
'https://t.co/RbbnjkoqUD',
'https://t.co/VgzGOK5k3S',
'http://t.co/cIrTVml9Vp',
'http://t.co/y7sIPKB1kd'
]
for u in urls: resolve_url(u)
#'https://vine.co/v/eHzipDIO5x7'
#'https://twitter.com/OfficialWith1D/status/629090990486237184'
#'https://www.youtube.com/watch?v=NnnSS0_-xoA'
#'https://twitter.com/JoshPainter2154/status/629274422952620033/photo/1'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment