anderser · January 26, 2012 12:01
diff --git a/gettag.py b/gettag.py
 #!/usr/bin/python
 # -*- coding: utf-8 -*-

 import re
 import requests

 def get_page_tag(url, title_re=re.compile(r'<title>(.*?)</title>', re.UNICODE )):
    
    """
    Retrieves the title tag from a given url (or actually any tag if you want..)
    
    Requirements:
    requests (pip install requests)
    
    Example usage:
    
    Simple
        >>> title = get_page_tag("http://www.tv2.no/nyheter/innenriks/her-ligger-24-tonn-kylling-i-groefta-3692748.html")
    
    Supply your own regular expression to filter out what you want to be returned
    
        >>> myre = re.compile(r'<title>(.*?) - TV 2 Nyhetene</title>', re.UNICODE
        >>> title = get_page_tag("http://www.tv2.no/nyheter/innenriks/her-ligger-24-tonn-kylling-i-groefta-3692748.html", title_re=myre)
        
    """

    r = requests.get(url)
    if r.status_code == 200:
        match = title_re.search(r.text)
        if match:
            return match.group(1)
        return Exception("No match for title in page")
    raise Exception(r.status_code)
	#!/usr/bin/python
	# -- coding: utf-8 --

	import re
	import requests

	def get_page_tag(url, title_re=re.compile(r'<title>(.*?)</title>', re.UNICODE )):

	"""
	Retrieves the title tag from a given url (or actually any tag if you want..)

	Requirements:
	requests (pip install requests)

	Example usage:

	Simple
	>>> title = get_page_tag("http://www.tv2.no/nyheter/innenriks/her-ligger-24-tonn-kylling-i-groefta-3692748.html")

	Supply your own regular expression to filter out what you want to be returned

	>>> myre = re.compile(r'<title>(.*?) - TV 2 Nyhetene</title>', re.UNICODE
	>>> title = get_page_tag("http://www.tv2.no/nyheter/innenriks/her-ligger-24-tonn-kylling-i-groefta-3692748.html", title_re=myre)

	"""

	r = requests.get(url)
	if r.status_code == 200:
	match = title_re.search(r.text)
	if match:
	return match.group(1)
	return Exception("No match for title in page")
	raise Exception(r.status_code)
No results found