rafikahmed · May 19, 2020 20:45
diff --git a/items.py b/items.py
 # -*- coding: utf-8 -*-

 # Define here the models for your scraped items
 #
 # See documentation in:
 # https://doc.scrapy.org/en/latest/topics/items.html
 #
 # -*- coding: utf-8 -*-

 # Define here the models for your scraped items
 #
 # See documentation in:
 # https://docs.scrapy.org/en/latest/topics/items.html

 import unicodedata
 import re
 import scrapy
 from scrapy.loader.processors import TakeFirst, MapCompose


 def slugify(value, allow_unicode=False):
    """
    Convert to ASCII if 'allow_unicode' is False. Convert spaces to hyphens.
    Remove characters that aren't alphanumerics, underscores, or hyphens.
    Convert to lowercase. Also strip leading and trailing whitespace.
    """
    value = str(value)
    if allow_unicode:
        value = unicodedata.normalize('NFKC', value)
    else:
        value = unicodedata.normalize('NFKD', value).encode(
            'ascii', 'ignore').decode('ascii')
    value = re.sub(r'[^\w\s-]', '', value.lower()).strip()
    return re.sub(r'[-\s]+', '-', value)


 class ScrapeimagesItem(scrapy.Item):
    # define the fields for your item here like:
    # name = scrapy.Field()
    image_urls = scrapy.Field()
    images = scrapy.Field()
    book_name = scrapy.Field(
        input_processor=MapCompose(slugify),
        output_processor=TakeFirst()
    )
diff --git a/pipelines.py b/pipelines.py
 # -*- coding: utf-8 -*-

 # Define your item pipelines here
 #
 # Don't forget to add your pipeline to the ITEM_PIPELINES setting
 # See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html

 from scrapy import Request
 from scrapy.pipelines.images import ImagesPipeline
 import hashlib


 class ScrapeimagesPipeline(ImagesPipeline):
    def get_media_requests(self, item, info):
        return [Request(x, meta={'BookName': item.get('book_name')}) for x in item.get(self.images_urls_field, [])]

    def file_path(self, request, response=None, info=None):

        # start of deprecation warning block (can be removed in the future)
        def _warn():
            from scrapy.exceptions import ScrapyDeprecationWarning
            import warnings
            warnings.warn('ImagesPipeline.image_key(url) and file_key(url) methods are deprecated, '
                          'please use file_path(request, response=None, info=None) instead',
                          category=ScrapyDeprecationWarning, stacklevel=1)

        # check if called from image_key or file_key with url as first argument
        if not isinstance(request, Request):
            _warn()
            url = request

        else:
            url = request.url

        # detect if file_key() or image_key() methods have been overridden
        if not hasattr(self.file_key, '_base'):
            _warn()
            return self.file_key(url)
        elif not hasattr(self.image_key, '_base'):
            _warn()
            return self.image_key(url)
        # end of deprecation warning block
        filename = request.meta['BookName'].replace(':', '')
        return 'full/%s.jpg' % (filename)
diff --git a/settings.py b/settings.py
 ITEM_PIPELINES = {
    '<project_name>.pipelines.ScrapeimagesPipeline': 1, # please replace <project_name> by your project name
 }

 IMAGES_STORE = 'C:\\Users\\username\\Desktop' # please replace username by your username
 Download_TIMEOUT = 1200
	# -- coding: utf-8 --

	# Define here the models for your scraped items
	#
	# See documentation in:
	# https://doc.scrapy.org/en/latest/topics/items.html
	#
	# -- coding: utf-8 --

	# Define here the models for your scraped items
	#
	# See documentation in:
	# https://docs.scrapy.org/en/latest/topics/items.html

	import unicodedata
	import re
	import scrapy
	from scrapy.loader.processors import TakeFirst, MapCompose


	def slugify(value, allow_unicode=False):
	"""
	Convert to ASCII if 'allow_unicode' is False. Convert spaces to hyphens.
	Remove characters that aren't alphanumerics, underscores, or hyphens.
	Convert to lowercase. Also strip leading and trailing whitespace.
	"""
	value = str(value)
	if allow_unicode:
	value = unicodedata.normalize('NFKC', value)
	else:
	value = unicodedata.normalize('NFKD', value).encode(
	'ascii', 'ignore').decode('ascii')
	value = re.sub(r'[^\w\s-]', '', value.lower()).strip()
	return re.sub(r'[-\s]+', '-', value)


	class ScrapeimagesItem(scrapy.Item):
	# define the fields for your item here like:
	# name = scrapy.Field()
	image_urls = scrapy.Field()
	images = scrapy.Field()
	book_name = scrapy.Field(
	input_processor=MapCompose(slugify),
	output_processor=TakeFirst()
	)
	# -- coding: utf-8 --

	# Define your item pipelines here
	#
	# Don't forget to add your pipeline to the ITEM_PIPELINES setting
	# See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html

	from scrapy import Request
	from scrapy.pipelines.images import ImagesPipeline
	import hashlib


	class ScrapeimagesPipeline(ImagesPipeline):
	def get_media_requests(self, item, info):
	return [Request(x, meta={'BookName': item.get('book_name')}) for x in item.get(self.images_urls_field, [])]

	def file_path(self, request, response=None, info=None):

	# start of deprecation warning block (can be removed in the future)
	def _warn():
	from scrapy.exceptions import ScrapyDeprecationWarning
	import warnings
	warnings.warn('ImagesPipeline.image_key(url) and file_key(url) methods are deprecated, '
	'please use file_path(request, response=None, info=None) instead',
	category=ScrapyDeprecationWarning, stacklevel=1)

	# check if called from image_key or file_key with url as first argument
	if not isinstance(request, Request):
	_warn()
	url = request

	else:
	url = request.url

	# detect if file_key() or image_key() methods have been overridden
	if not hasattr(self.file_key, '_base'):
	_warn()
	return self.file_key(url)
	elif not hasattr(self.image_key, '_base'):
	_warn()
	return self.image_key(url)
	# end of deprecation warning block
	filename = request.meta['BookName'].replace(':', '')
	return 'full/%s.jpg' % (filename)
	ITEM_PIPELINES = {
	'<project_name>.pipelines.ScrapeimagesPipeline': 1, # please replace <project_name> by your project name
	}

	IMAGES_STORE = 'C:\\Users\\username\\Desktop' # please replace username by your username
	Download_TIMEOUT = 1200