Last active
December 10, 2015 13:58
-
-
Save tzengerink/4443900 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
""" | |
BROWSER | |
------- | |
Get internet pages, send post requests, inspect HTML using BeautifulSoup | |
or simply check response codes of an URL. | |
Example usage: | |
# Create new Browser instance | |
browser = Browser() | |
# Login (or simply post data) to a website | |
browser.post('http://url.to/login', {'username':'john', | |
'password':'pass123'}) | |
# Get page contents | |
html = browser.get('http://url.to/password-protected-page') | |
# Get page contents as BeautifulSoup | |
soup = browser.soup('http://url.to/password-protected-page') | |
# Get the response code for a given URL | |
code = browser.code('http://url.to/password-protected-page') | |
Copyright (c) 2013 T. Zengerink | |
Licensed under MIT License. | |
See: https://gist.github.com/raw/3151357/6806e68cb9cc0042b265f25be9bc25dd39f75267/LICENSE.md | |
""" | |
import cookielib, sys, urllib, urllib2 | |
from BeautifulSoup import BeautifulSoup | |
class Browser: | |
def __init__(self): | |
self.cookie_jar = cookielib.CookieJar() | |
self.cookie_proc = urllib2.HTTPCookieProcessor(self.cookie_jar) | |
self.opener = urllib2.build_opener(self.cookie_proc) | |
def code(self, url): | |
""" | |
Get the HTTP response code for a given URL. | |
url -- URL to fetch and get code from. | |
""" | |
try: | |
urllib2.urlopen(urllib2.Request(url)) | |
except urllib2.URLError, e: | |
return e.code | |
return 200 | |
def get(self, url): | |
""" | |
Get the given URL and return the contents. | |
url -- URL to fetch. | |
""" | |
return self.opener.open(url).read() | |
def post(self, url, data): | |
""" | |
Post data to a given URL. | |
url -- URL to post to. | |
""" | |
return self.opener.open(url, urllib.urlencode(data)) | |
def soup(self, url): | |
""" | |
Get BeatifulSoup from the page contents. | |
url -- URL to fetch. | |
""" | |
return BeautifulSoup(self.get(url)) | |
def main(args): | |
browser = Browser() | |
for arg in args: | |
print browser.get(arg) | |
if __name__ == "__main__": | |
main(sys.argv[1:]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment