Skip to content

Instantly share code, notes, and snippets.

View dangra's full-sized avatar
๐Ÿฆ–

Daniel Graรฑa dangra

๐Ÿฆ–
View GitHub Profile
import scrapy
from scrapy.http import safeurl
class Spider(scrapy.Spider):
name = 'loremipsum'
start_urls = ('https://www.lipsum.com',)
def parse(self, response):
class ReplacementClass(object):
@property
def selector(self):
return custom selector
def process_response(self, response):
cls = type(response)
newclass = type('newclass', (ReplacementClass, getmro(response.__class__), cls))
class NewClass(ReplacementClass, response.__class__):
lxc-start 1448649277.700 DEBUG lxc_start - sigchild handler set
lxc-start 1448649277.700 INFO lxc_start - 'foo' is initialized
lxc-start 1448649277.701 DEBUG lxc_start - Not dropping cap_sys_boot or watching utmp
lxc-start 1448649277.701 DEBUG lxc_conf - instanciated veth 'vethei3Ehx/vethcLzLk5', index is '21'
lxc-start 1448649277.701 INFO lxc_conf - opened /var/lib/lxc/hublxc/rootfs.hold as fd 7
lxc-start 1448649277.702 DEBUG lxc_cgroup - checking '/' (rootfs)
lxc-start 1448649277.702 DEBUG lxc_cgroup - checking '/' (aufs)
lxc-start 1448649277.702 DEBUG lxc_cgroup - checking '/proc' (proc)
#!/usr/bin/env python
import sys
import uuid
import time
from argparse import ArgumentParser
from shove import Shove
from loremipsum import Generator
def _generator(count):
$ py.test test-json-envvar.py
======================================== test session starts ========================================
platform linux2 -- Python 2.7.6, pytest-2.9.2, py-1.4.31, pluggy-0.3.1
rootdir: /home/daniel, inifile:
plugins: hypothesis-3.4.0
collected 1 items
test-json-envvar.py .
===================================== 1 passed in 5.29 seconds ======================================
description = "DNS parser"
short_description = "dns packet parser"
category = "misc"
args = {}
function on_init()
io.stdout:setvbuf 'line'
sysdig.set_snaplen(512)
chisel.set_filter("fd.port=53 and evt.dir=< and evt.type=sendmsg")
chisel.set_event_formatter("")
$ docker run -it --rm scrapinghub/scrapinghub-stack-hworker scrapy fetch https://dk.trustpilot.com/review/www.telia.dk
[sudo] password for daniel:
Unable to find image 'scrapinghub/scrapinghub-stack-hworker:latest' locally
latest: Pulling from scrapinghub/scrapinghub-stack-hworker
4edf76921243: Already exists
044c0d9e0cd9: Already exists
331fbd6c3dec: Already exists
8f76788f1cb3: Already exists
a3ed95caeb02: Already exists
494 "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36"
278 "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36"
242 "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36"
206 "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36"
192 "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36"
192 "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36"
168 "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36"
148 "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36"
94 "Mozilla/5.0 (Window
foo1 :~$ pip install 'https://github.com/scrapinghub/python-scrapinghub/archive/sc1467-1.zip#egg=scrapinghub==1.10.0dev0'
Processing ./Library/Caches/pip/wheels/d0/e8/cd/dab62ee79e76d94ca30817e60aeeba39eb33d6f52e0124720c/scrapinghub-1.10.0.dev0-py2-none-any.whl
Collecting six>=1.10.0 (from scrapinghub==1.10.0dev0)
Using cached six-1.10.0-py2.py3-none-any.whl
Collecting retrying>=1.3.3 (from scrapinghub==1.10.0dev0)
Collecting requests (from scrapinghub==1.10.0dev0)
Using cached requests-2.13.0-py2.py3-none-any.whl
Installing collected packages: six, retrying, requests, scrapinghub
Successfully installed requests-2.13.0 retrying-1.3.3 scrapinghub-1.10.0.dev0 six-1.10.0
You are using pip version 7.1.2, however version 9.0.1 is available.
#!/bin/sh
set -e
SPIDER=$(echo "$SHUB_JOB_DATA" |jq -r .spider)
ARGS="$(echo "$SHUB_JOB_DATA" |jq -r '.spider_args|to_entries|.[]|"--\(.key)=\(.value)"')"
echo "$SPIDER" $ARGS