-
-
Save hugo53/1247487 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from scrapy.spider import BaseSpider | |
from scrapy.http import FormRequest, Request | |
from scrapy.selector import HtmlXPathSelector | |
class DjangoSpider(BaseSpider): | |
domain_name = "django.local" | |
start_urls = ["http://localhost:8000/admin/"] | |
extra_domain_names = ["localhost"] | |
def parse(self, response): | |
# log in into django's admin interface | |
data = {'username': 'admin', 'password': 'admin'} | |
return FormRequest.from_response(response, | |
formdata=data, | |
callback=self.after_login, | |
dont_filter=True # because we will get | |
# redirected to same | |
# url and can be | |
# filtered out by | |
# dupesfilter | |
) | |
def after_login(self, response): | |
if 'id="form_login"' in response.body: | |
# login form displayed again, could not be able to login | |
self.log("Username or password incorrect") | |
else: | |
# response is django's dashboard | |
hxs = HtmlXPathSelector(response) | |
# do stuff and/or return new requests | |
SPIDER = DjangoSpider() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment