Skip to content

Instantly share code, notes, and snippets.

@Demeter
Forked from zhasm/unshortenurl.py
Created September 20, 2011 00:14
Show Gist options
  • Save Demeter/1227980 to your computer and use it in GitHub Desktop.
Save Demeter/1227980 to your computer and use it in GitHub Desktop.
Restore shortened URLs
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
#author: rex
#blog: http://iregex.org
#filename unshortenurl.py
#created: 2011-05-23 15:35
import pycurl
import StringIO
import re
class UnShortenUrl():
def __init__(self, url):
self.url=url
c=pycurl.Curl()
c.setopt(c.NOBODY,True)
self.curl=c
f=StringIO.StringIO()
c.setopt(c.HEADER, True)
c.setopt(c.WRITEFUNCTION, f.write)
self.f=f
def __str__(self):
c=self.curl
c.setopt(c.URL, self.url)
f=self.f
f.truncate(0)
c.perform()
value=f.getvalue()
try:
return re.findall(r'''(?mi)(?<=^Location:\s).+$''', value)[0].strip()
except:
return ""
def UnShortenAll(text):
def _unshoren(x):
url=x.group(1)
url=UnShortenUrl(url)
return str(url)
text=re.sub(ur"""(?i)(http://(?:j\.mp|is\.gd|goo\.gl)\S+)""", _unshoren, text)
return text
def main():
import sys
try:
text=sys.argv[1]
print UnShortenAll(text)
except Exception, e:
print '''Usage: python %s <URL>''' % sys.argv[0]
print str(e)
if __name__=='__main__':
main()
@Demeter
Copy link
Author

Demeter commented Sep 20, 2011

Checks if URL is shortened using one of these three:
'http://j.mp/' formerly owned by BackType, since acquired by Twitter in June 2011
http://is.gd/*****
http://goo.gl/
*** Google product for shortening and tracking URLs (for non-employee users). Google will transition to http://goo.co soon, for official Google purposes.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment