Skip to content

Instantly share code, notes, and snippets.

@denysbutenko
Last active December 11, 2015 19:28
Show Gist options
  • Save denysbutenko/4648518 to your computer and use it in GitHub Desktop.
Save denysbutenko/4648518 to your computer and use it in GitHub Desktop.
Pipelines.py
from scrapy.contrib.pipeline.images import ImagesPipeline
from scrapy.exceptions import DropItem
from scrapy.http import Request
class MyImagesPipeline(ImagesPipeline):
def image_key(self, url):
image_guid = url.split('/')[-1]
return '%s.jpg' % (image_guid)
def get_media_requests(self, item, info):
for image_url in item['image_urls']:
yield Request(image_url)
def item_completed(self, results, item, info):
image_paths = [x['path'] for ok, x in results if ok]
if not image_paths:
raise DropItem("Item contains no images")
item['image_paths'] = image_paths
return item
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment