Created
September 13, 2014 17:40
-
-
Save nyov/ed79aecea88373ac0535 to your computer and use it in GitHub Desktop.
basic scrapy login
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from scrapy.http import Request | |
from scrapy.exceptions import CloseSpider | |
from scrapy.selector import Selector | |
class MySpider(Spider): | |
name = '' | |
allowed_domains = [ | |
] | |
start_urls = [ | |
] | |
# Login | |
login_user = '' | |
login_pass = '' | |
login_url = '' | |
# ... the usual (Rules or something) | |
def start_requests(self): | |
self.log('Starting up with login...', level=log.INFO) | |
return [Request(url=self.login_url, callback=self.login)] | |
def login(self, response): | |
self.log('Attempting to login', level=log.DEBUG) | |
sel = Selector(response) | |
loginform = sel.xpath('//form[@name="loginForm"]') # your form name or whatever | |
formname = loginform.xpath('@name').extract()[0] | |
username = loginform.xpath('//input[@type="text"]/@name').extract()[0] | |
password = loginform.xpath('//input[@type="password"]/@name').extract()[0] | |
return [FormRequest.from_response(response, callback=self.check_login, | |
#formxpath='//form[@name="loginForm"]', | |
formname=formname, | |
formdata={ | |
username: self.login_user, | |
password: self.login_pass, | |
}, | |
)] | |
def check_login(self, response): | |
if 'You are logged in // Welcome, Scraper // Whatever' not in response.body: | |
self.log("Login failed", level=log.ERROR) | |
print response.body | |
raise CloseSpider('Login failed') | |
return | |
self.log('Logged in', level=log.INFO) | |
# don't care for this response, switch URL now | |
for url in self.start_urls: | |
#yield self.make_requests_from_url(url) | |
yield Request(url, dont_filter=True) # callback=self.parse | |
def parse(self, response): | |
# do something with logged in session | |
# possibly re-check at some times, that we are still logged in | |
pass |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment