This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from scrapy.http import HtmlResponse | |
def parse(self, response): | |
body = cc_stripped(response.body) # modifyor get new response.body | |
response = HtmlResponse(response.url, | |
encoding='utf-8', | |
body=body.decode('utf-8'), | |
) | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# ata bus to harddisk mappings | |
for device in `ls /sys/block/ | grep '[hs]d'`; do | |
host=`ls /sys/block/${device}/../../../../scsi_host/` | |
unique_id=`cat /sys/block/${device}/../../../../scsi_host/${host}/unique_id` | |
echo "ata${unique_id} is scsi_host ${host} with attached device ${device}" | |
done |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class MySpider(Spider): | |
# [...] | |
# start requests from generator | |
def start_requests(self): | |
url = 'http://some.page.tld/%s/category' | |
for page in xrange(1, 247): | |
link = url % page | |
yield Request(url=link) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class DefaultsItem(Item): | |
""" Item with default values """ | |
def __getitem__(self, key): | |
try: | |
return self._values[key] | |
except KeyError: | |
field = self.fields[key] | |
if 'default' in field: | |
return field['default'] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def cc_stripped(x, extended=False): | |
""" strip control characters from string """ | |
if extended: | |
# also strip extended characters | |
return "".join([i for i in x if ord(i) in range(32, 126)]) | |
return "".join([i for i in x if ord(i) in range(32, 127)]) | |
def parse(self, response): | |
body = cc_stripped(response.body) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def __init__(self, *args, **kwargs): | |
super(Spider, self).__init__(*args, **kwargs) | |
dispatcher.connect(self.spider_idle, signal=signals.spider_idle) | |
logout_done = False | |
def spider_idle(self, spider): | |
if spider != self: return | |
if self.logout_done: return | |
self.crawler.engine.schedule(self.logout(), spider) | |
raise DontCloseSpider('Session logout proceeding') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from twisted.enterprise import adbapi | |
from twisted.python import log | |
import MySQLdb | |
class ReconnectingConnectionPool(adbapi.ConnectionPool): | |
"""Reconnecting adbapi connection pool for MySQL. | |
This class improves on the solution posted at | |
http://www.gelens.org/2008/09/12/reinitializing-twisted-connectionpool/ | |
by checking exceptions by error code and only disconnecting the current |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import subprocess | |
def spidermonk(script, user): | |
""" JavaScript wrapper | |
smjs notes: not recommended for production use, | |
as it contains dangerous (security-wise) debugging features. | |
-- Make sure not to load unsafe javascripts! | |
""" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
from rauth.service import OAuth1Service | |
# Create consumer key & secret in your Magento Admin interface | |
# For an API Guideline see: | |
# http://www.magentocommerce.com/api/rest/authentication/oauth_authentication.html | |
# | |
# Short Magento setup explanation: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* @source http://stackoverflow.com/questions/807878/javascript-that-executes-after-page-load#807997 | |
*/ | |
var yourFunctionName = function(){}; | |
if(window.attachEvent) { | |
window.attachEvent('onload', yourFunctionName); | |
} else { | |
if(window.onload) { | |
var curronload = window.onload; |
NewerOlder