Created
November 28, 2012 18:05
-
-
Save dangra/4162924 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tempfile | |
from scrapy import log | |
from scrapy.http import Request | |
from scrapy.spider import BaseSpider | |
from testspiders.items import Page | |
LOREMIPSUM = '''\ | |
Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed | |
diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat | |
volutpat. Ut wisi enim ad minim veniam, quis nostrud exerci tation ullamcorper | |
suscipit lobortis nisl ut aliquip ex ea commodo consequat. Duis autem vel eum | |
iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum | |
dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio | |
dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te | |
feugait nulla facilisi. Nam liber tempor cum soluta nobis eleifend option | |
congue nihil imperdiet doming id quod mazim placerat facer possim assum. Typi | |
non habent claritatem insitam; est usus legentis in iis qui facit eorum | |
claritatem. Investigationes demonstraverunt lectores legere me lius quod ii | |
legunt saepius. Claritas est etiam processus dynamicus, qui sequitur mutationem | |
consuetudium lectorum. Mirum est notare quam littera gothica, quam nunc putamus | |
parum claram, anteposuerit litterarum formas humanitatis per seacula quarta | |
decima et quinta decima. Eodem modo typi, qui nunc nobis videntur parum clari, | |
fiant sollemnes in futurum.''' | |
class LoremipsumSpider(BaseSpider): | |
name = "loremipsum" | |
def start_requests(self): | |
self.loremfile = tempfile.NamedTemporaryFile() | |
self.loremfile.write(LOREMIPSUM) | |
yield Request('file://{0}'.format(self.loremfile.name)) | |
def parse(self, response): | |
self.log(LOREMIPSUM[:30], level=log.DEBUG) | |
self.log(LOREMIPSUM[30:60], level=log.INFO) | |
self.log(LOREMIPSUM[60:90], level=log.WARNING) | |
self.log(LOREMIPSUM[90:120], level=log.ERROR) | |
yield Page(url=response.url, title=LOREMIPSUM[:20], body=LOREMIPSUM) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment