Created
January 2, 2013 14:30
-
-
Save dangra/4434935 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/scrapy/contrib/pipeline/images.py b/scrapy/contrib/pipeline/images.py | |
index 4775590..ec24c70 100644 | |
--- a/scrapy/contrib/pipeline/images.py | |
+++ b/scrapy/contrib/pipeline/images.py | |
@@ -25,6 +25,7 @@ from scrapy.contrib.pipeline.media import MediaPipeline | |
class NoimagesDrop(DropItem): | |
"""Product with no images exception""" | |
+ | |
class ImageException(Exception): | |
"""General image error exception""" | |
@@ -47,7 +48,7 @@ class FSImagesStore(object): | |
absolute_path = self._get_filesystem_path(key) | |
try: | |
last_modified = os.path.getmtime(absolute_path) | |
- except: # FIXME: catching everything! | |
+ except: # FIXME: catching everything! | |
return {} | |
with open(absolute_path, 'rb') as imagefile: | |
@@ -74,9 +75,9 @@ class S3ImagesStore(object): | |
POLICY = 'public-read' | |
HEADERS = { | |
- 'Cache-Control': 'max-age=172800', | |
- 'Content-Type': 'image/jpeg', | |
- } | |
+ 'Cache-Control': 'max-age=172800', | |
+ 'Content-Type': 'image/jpeg', | |
+ } | |
def __init__(self, uri): | |
assert uri.startswith('s3://') | |
@@ -113,8 +114,8 @@ class S3ImagesStore(object): | |
k.set_metadata('width', str(width)) | |
k.set_metadata('height', str(height)) | |
buf.seek(0) | |
- return threads.deferToThread(k.set_contents_from_file, buf, \ | |
- headers=self.HEADERS, policy=self.POLICY) | |
+ return threads.deferToThread(k.set_contents_from_file, buf, | |
+ headers=self.HEADERS, policy=self.POLICY) | |
class ImagesPipeline(MediaPipeline): | |
@@ -142,10 +143,10 @@ class ImagesPipeline(MediaPipeline): | |
EXPIRES = 90 | |
THUMBS = {} | |
STORE_SCHEMES = { | |
- '': FSImagesStore, | |
- 'file': FSImagesStore, | |
- 's3': S3ImagesStore, | |
- } | |
+ '': FSImagesStore, | |
+ 'file': FSImagesStore, | |
+ 's3': S3ImagesStore, | |
+ } | |
def __init__(self, store_uri, download_func=None): | |
if not store_uri: | |
@@ -166,7 +167,7 @@ class ImagesPipeline(MediaPipeline): | |
return cls(store_uri) | |
def _get_store(self, uri): | |
- if os.path.isabs(uri): # to support win32 paths like: C:\\some\dir | |
+ if os.path.isabs(uri): # to support win32 paths like: C:\\some\dir | |
scheme = 'file' | |
else: | |
scheme = urlparse.urlparse(uri).scheme | |
@@ -180,13 +181,13 @@ class ImagesPipeline(MediaPipeline): | |
log.msg(format='Image (code: %(status)s): Error downloading image from %(request)s referred in <%(referer)s>', | |
level=log.WARNING, spider=info.spider, | |
status=response.status, request=request, referer=referer) | |
- raise ImageException | |
+ raise ImageException('download-error') | |
if not response.body: | |
log.msg(format='Image (empty-content): Empty image from %(request)s referred in <%(referer)s>: no-content', | |
level=log.WARNING, spider=info.spider, | |
request=request, referer=referer) | |
- raise ImageException | |
+ raise ImageException('empty-content') | |
status = 'cached' if 'cached' in response.flags else 'downloaded' | |
log.msg(format='Image (%(status)s): Downloaded image from %(request)s referred in <%(referer)s>', | |
@@ -197,13 +198,15 @@ class ImagesPipeline(MediaPipeline): | |
try: | |
key = self.image_key(request.url) | |
checksum = self.image_downloaded(response, request, info) | |
- except ImageException, ex: | |
- log.err('image_downloaded hook failed: %s' % ex, | |
+ except ImageException: | |
+ whyfmt = 'Image (error): Error processing image from %(request)s referred in <%(referer)s>' | |
+ log.err(None, whyfmt % {'request': request, 'referer': referer}, | |
level=log.WARNING, spider=info.spider) | |
raise | |
- except Exception: | |
- log.err(spider=info.spider) | |
- raise ImageException | |
+ except Exception as exc: | |
+ whyfmt = 'Image (unknown-error): Error processing image from %(request)s referred in <%(referer)s>' | |
+ log.err(None, whyfmt % {'request': request, 'referer': referer}, spider=info.spider) | |
+ raise ImageException(str(exc)) | |
return {'url': request.url, 'path': key, 'checksum': checksum} | |
@@ -221,16 +224,16 @@ class ImagesPipeline(MediaPipeline): | |
def media_to_download(self, request, info): | |
def _onsuccess(result): | |
if not result: | |
- return # returning None force download | |
+ return # returning None force download | |
last_modified = result.get('last_modified', None) | |
if not last_modified: | |
- return # returning None force download | |
+ return # returning None force download | |
age_seconds = time.time() - last_modified | |
age_days = age_seconds / 60 / 60 / 24 | |
if age_days > self.EXPIRES: | |
- return # returning None force download | |
+ return # returning None force download | |
referer = request.headers.get('Referer') | |
log.msg(format='Image (uptodate): Downloaded %(medianame)s from %(request)s referred in <%(referer)s>', | |
@@ -243,7 +246,7 @@ class ImagesPipeline(MediaPipeline): | |
key = self.image_key(request.url) | |
dfd = defer.maybeDeferred(self.store.stat_image, key, info) | |
- dfd.addCallbacks(_onsuccess, lambda _:None) | |
+ dfd.addCallbacks(_onsuccess, lambda _: None) | |
dfd.addErrback(log.err, self.__class__.__name__ + '.store.stat_image') | |
return dfd | |
@@ -262,8 +265,9 @@ class ImagesPipeline(MediaPipeline): | |
width, height = orig_image.size | |
if width < self.MIN_WIDTH or height < self.MIN_HEIGHT: | |
- raise ImageException("Image too small (%dx%d < %dx%d): %s" % \ | |
- (width, height, self.MIN_WIDTH, self.MIN_HEIGHT, response.url)) | |
+ raise ImageException("Image too small (%dx%d < %dx%d): %s" % | |
+ (width, height, self.MIN_WIDTH, | |
+ self.MIN_HEIGHT, response.url)) | |
image, buf = self.convert_image(orig_image) | |
yield key, image, buf |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment