This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- a/scrapy/contrib/downloadermiddleware/httpcompression.py | |
+++ b/scrapy/contrib/downloadermiddleware/httpcompression.py | |
@@ -33,7 +33,11 @@ class HttpCompressionMiddleware(object): | |
def _decode(self, body, encoding): | |
if encoding == 'gzip' or encoding == 'x-gzip': | |
- body = gunzip(body) | |
+ try: | |
+ body = gunzip(body) | |
+ except IOError: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import division | |
def c_avrg(the_dict, exclude): | |
""" Calculate the average excluding the given element""" | |
i = 0 | |
total = 0 | |
for e in the_dict: | |
if e != exclude: | |
i += 1 | |
total += the_dict[e] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
bash$ python mp3box.py http://contenidos.comteco.com.bo/component/content/article/15-mp3-box/6434-top-40-usa.html | |
Downloading adele-rolling_in_the_deep.mp3 to /home/rolando/adele-rolling_in_the_deep.mp3 | |
Downloading blake_shelton-honey_bee.mp3 to /home/rolando/blake_shelton-honey_bee.mp3 | |
Downloading bruno_mars-grenade.mp3 to /home/rolando/bruno_mars-grenade.mp3 | |
Downloading bruno_mars-just_the_way_you_are.mp3 to /home/rolando/bruno_mars-just_the_way_you_are.mp3 | |
... |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# http://i.imgur.com/Tg068.png | |
PS1='\[\033[01;34m\]`if [ \$? = 0 ]; then echo \[\e[0\;32m\]\(^_^\); else echo \[\e[0\;31m\]\(0_0\); fi` ~ ${SECONDS}s\n\[\e[1;34m\][\t \u@\h:\w]\n\[\033[1;35m\]$>\[\033[00m\] ' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
from disco import func | |
from disco.core import Job | |
def mapper((id, tweet), params): | |
import rfc822 | |
from datetime import datetime, timedelta | |
from time import mktime | |
utc_dt = datetime.fromtimestamp(mktime(rfc822.parsedate(tweet['created_at']))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def merge_dicts(dict_list): | |
"""Merge all values from dict list into a single dict | |
>>> d1 = {'a': 1, 'b': 2} | |
>>> d2 = {'a': 2, 'b': 3} | |
>>> merge_dicts([d1, d2]) | |
{'a': [1, 2], 'b': [2, 3]} | |
""" | |
kviter = chain.from_iterable(d.iteritems() for d in dict_list) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import redis | |
from scrapy.dupefilter import BaseDupeFilter | |
from scrapy.utils.request import request_fingerprint | |
class RedisDupeFilter(BaseDupeFilter): | |
def __init__(self, host, port): | |
self.redis = redis.Redis(host, port) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# The contents of this file are subject to the Python Software Foundation | |
# License Version 2.3 (the License). You may not copy or use this file, in | |
# either source code or executable form, except in compliance with the License. | |
# You may obtain a copy of the License at http://www.python.org/license. | |
# | |
# Software distributed under the License is distributed on an AS IS basis, | |
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License | |
# for the specific language governing rights and limitations under the | |
# License. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
python parse_urls.py http://somesite/foo/ ".pdf\$" | |
""" | |
import sys | |
import urllib2 | |
from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor | |
from scrapy.http import HtmlResponse |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
Simple script to convert a image into data uri. | |
More info http://en.wikipedia.org/wiki/Data_URI_scheme | |
""" | |
import base64 | |
import mimetypes | |
import sys |