Skip to content

Instantly share code, notes, and snippets.

View nside's full-sized avatar

nside

View GitHub Profile
from twisted.web.iweb import IBodyProducer
from twisted.internet import defer
from twisted.web.client import Agent
from twisted.web.http_headers import Headers
from zope.interface import implements
import urllib
class StringProducer(object):
implements(IBodyProducer)
@nside
nside / gist:8078726
Created December 22, 2013 05:18
scrapyd logs grokking for logstash kibana elasticsearch
filter {
grok {
match => [ "message", "%{TIMESTAMP_ISO8601:ts} \[%{USERNAME:spider}\] %{LOGLEVEL:level}: Crawled %{NUMBER:pages2:int} pages \(at %{NUMBER:pages_rate2:int%} pages/min\), scraped %{NUMBER:items2:int} items \(at %{NUMBER:items_rate2:int%} items/min\)" ]
}
}
@nside
nside / json2csv.py
Created December 6, 2013 23:45
converts a stream from JSON to CSV
#!/usr/bin/env python
import csv, json, sys, cStringIO, codecs
from operator import itemgetter
def fmt(s):
if s == None:
return ''
elif isinstance(s, basestring):
return s.encode('utf-8')
@nside
nside / distribute.py
Last active December 24, 2015 17:19
Distributes a callback using multiprocessing queues and writes the result back to a file
import multiprocessing, time
from multiprocessing import Pool, JoinableQueue, Process
SENTINEL = -1
def worker(q,rq, callback):
for item in iter(q.get, SENTINEL):
callback(rq, item)
q.task_done()
q.task_done()