This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
scrapy/contrib/pipeline/images.py:112: if self.IMAGES_RESULT_FIELD in item.fields: | |
scrapy/contrib/pipeline/files.py:270: if self.FILES_RESULT_FIELD in item.fields: | |
scrapy/contrib/loader/__init__.py:122: value = self.item.fields[field_name].get(key, default) | |
scrapy/commands/parse.py:110: if isinstance(x, BaseItem): | |
scrapy/contracts/default.py:86: if isinstance(x, BaseItem): | |
scrapy/contrib/spiders/feed.py:129: if isinstance(ret, (BaseItem, Request)): | |
scrapy/contrib/exporter/__init__.py:243: if isinstance(value, BaseItem): | |
scrapy/contrib/loader/__init__.py:121: if isinstance(self.item, Item): | |
scrapy/core/scraper.py:177: elif isinstance(output, BaseItem): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def dictpath(dct, path): | |
"""Resolve dictpath | |
>>> r = {'also_viewed': ['url1', 'url2']} | |
>>> list(dictpath(r, 'also_viewed')) | |
['url1', 'url2'] | |
>>> r = {'related': [{'url': 'url1'}, {'url': 'url2'}]} | |
>>> list(dictpath(r, 'related:url')) | |
['url1', 'url2'] | |
>>> r = {'related': [{'urls': ['url1', 'url2']}, {'urls': ['url3', 'url4']}]} | |
>>> list(dictpath(r, 'related:urls')) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from scrapy.spider import Spider | |
from scrapy import log | |
class DummySpider(Spider): | |
name = "dummy" | |
allowed_domains = ["example.com", "iana.org"] | |
start_urls = ( | |
'http://www.example.com/', | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import collections, json | |
from urllib import urlretrieve | |
from urlparse import urljoin | |
from csv import DictReader, reader as csv_reader | |
import scrapinghub | |
from project.settings import SH_APIKEY | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
LOG 1 | |
===== | |
scrapy crawl state -s JOBDIR=test | |
/home/scrapinghub/Devel/testspiders/testspiders/spiders/dummy.py:3: ScrapyDeprecationWarning: testspiders.spiders.dummy.DummySpider inherits from deprecated class scrapy.spider.BaseSpider, please inherit from scrapy.spider.Spider. (warning only on first subclass, there may be others) | |
class DummySpider(BaseSpider): | |
/home/scrapinghub/Devel/scrapy/scrapy/contrib/linkextractors/sgml.py:106: ScrapyDeprecationWarning: SgmlLinkExtractor is deprecated and will be removed in future releases. Please use scrapy.contrib.linkextractors.LinkExtractor | |
ScrapyDeprecationWarning | |
2014-08-21 14:30:41-0300 [scrapy] INFO: Scrapy 0.25.1 started (bot: testspiders) | |
2014-08-21 14:30:41-0300 [scrapy] INFO: Optional features available: ssl, http11, boto | |
2014-08-21 14:30:41-0300 [scrapy] INFO: Overridden settings: {'CLOSESPIDER_TIMEOUT': 3600, 'CLOSESPIDER_PAGECOUNT': 1000, 'SPIDER_MODULES': ['testspiders.spiders'], 'NEWSPIDER_MODULE': 'testspiders.spiders', 'BOT_NAME': 'testspiders'} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from scrapy.spider import Spider | |
from scrapy.http import Request | |
class StateSpider(Spider): | |
name = 'state' | |
def start_requests(self): | |
print 'State:', getattr(self, 'state', None) | |
yield Request('http://example.com') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def parse_category(self, response): | |
item = Item() | |
item['category'] = get_category(response) | |
for url in get_product_urls(response): | |
yield Request(url, callback=self.parse_product, meta={'item': item.copy()]) | |
def parse_product(self, response): | |
item = response.meta.get('item', {}) | |
... | |
return item |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from scrapy.spider import Spider | |
class SeveralNamesSpider(Spider): | |
def start_requests(self): | |
print 'name: {}, start_urls: {}'.format(self.name, self.start_urls) | |
for name, start_urls in (('name1', ('url1', )), ('name2', ('url2', ))): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/scrapy/selector/unified.py b/scrapy/selector/unified.py | |
index b8a3678..6ce1de1 100644 | |
--- a/scrapy/selector/unified.py | |
+++ b/scrapy/selector/unified.py | |
@@ -46,10 +46,40 @@ def _response_from_text(text, st): | |
body=unicode_to_str(text, 'utf-8')) | |
+import threading | |
+data = threading.local() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/scrapy/selector/unified.py b/scrapy/selector/unified.py | |
index b8a3678..28d8ac9 100644 | |
--- a/scrapy/selector/unified.py | |
+++ b/scrapy/selector/unified.py | |
@@ -46,6 +46,38 @@ def _response_from_text(text, st): | |
body=unicode_to_str(text, 'utf-8')) | |
+class SelectorContext(object): | |
+ |