cheekybastard’s gists

cheekybastard / gist:5167214

Created March 15, 2013 03:10 — forked from madebyjazz/gist:1090663

	from scrapy import log
	from scrapy.item import Item
	from scrapy.http import Request
	from scrapy.contrib.spiders import XMLFeedSpider


	def NextURL():
	"""
	Generate a list of URLs to crawl. You can query a database or come up with some other means
	Note that if you generate URLs to crawl from a scraped URL then you're better of using a

cheekybastard / Castle.xml

Created March 13, 2013 11:37 — forked from bemasher/Castle.xml

	<?xml version="1.0" encoding="UTF-8" ?>
	<Data>
	<Series>
	<id>83462</id>
	<Actors>\|Nathan Fillion\|Stana Katic\|Molly C. Quinn\|Jon Huertas\|Seamus Dever\|Tamala Jones\|Susan Sullivan\|Ruben Santiago-Hudson\|Monet Mazur\|</Actors>
	<Airs_DayOfWeek>Monday</Airs_DayOfWeek>
	<Airs_Time>10:00 PM</Airs_Time>
	<ContentRating>TV-PG</ContentRating>
	<FirstAired>2009-03-09</FirstAired>
	<Genre>\|Drama\|</Genre>

cheekybastard / subnav.css

Created March 11, 2013 00:22 — forked from thomaspark/subnav.css

	section {
	padding-top: 60px;
	}

	.subnav {
	margin-bottom: 60px;
	width: 100%;
	height: 36px;
	background-color: #eeeeee; /* Old browsers */
	background-repeat: repeat-x; /* Repeat the gradient */

cheekybastard / request_queue.py

Created March 5, 2013 04:51 — forked from ibrahima/request_queue.py

	import grequests
	from collections import deque
	class RequestQueue(object):
	"""
	This is a lame imitation of a Typhoeus Hydra using GRequests.

	The main thing this allows is building up a queue of requests and then
	executing them, and potentially adding requests to the queue in a callback
	so that you can build requests that depend on other requests more naturally.

cheekybastard / gist:5074972

Created March 3, 2013 06:28 — forked from mikeyk/gist:1329319

	#! /usr/bin/env python

	import redis
	import random
	import pylibmc
	import sys

	r = redis.Redis(host = 'localhost', port = 6389)
	mc = pylibmc.Client(['localhost:11222'])

cheekybastard / python_exception_examples

Created February 26, 2013 01:37

python_exception_examples

	# Python 2.x has an ambiguous except syntax, Python 3.x is stricter so the
	# following examples help to identify the right way to handle Py2/3 compatible
	# exceptions
	# Background: http://www.python.org/dev/peps/pep-3110/

	# Note that 'as' and ',' are both accepted in Python 2.x but only 'as' in Python 3.x:
	# http://docs.python.org/reference/compound_stmts.html#try

	# There are longer notes on re-raising, stack traces and tracebacks here:
	# http://www.doughellmann.com/articles/how-tos/python-exception-handling/index.html

cheekybastard / datetime_parse_examples

Created February 26, 2013 01:33

datetime parse snippets

	# NOTE these are code snippets
	import datetime
	import time
	from dateutil import parser as dt_parser # python-dateutil package

	# make a date 30 days ago, conver to truncated string in custom format, convert
	# back to datetime
	filter_from = datetime.datetime.now() - datetime.timedelta(days=30)
	print filter_from, type(filter_from)
	filter_from_str = time.strftime("%Y-%m-%dT%H:%M", filter_from.timetuple())

cheekybastard / icon-packs.md

Created February 20, 2013 12:51 — forked from revolunet/icon-packs.md

Icon ressources collection

cheekybastard / extractGifs.py

Created February 20, 2013 12:47 — forked from revolunet/extractGifs.py

	import os
	from PIL import Image


	def extractFrames(inGif, outFolder):
	frame = Image.open(inGif)
	nframes = 0
	while frame:
	frame.save( '%s/%s-%s.gif' % (outFolder, os.path.basename(inGif), nframes ) , 'GIF')
	nframes += 1

cheekybastard / craigslist_scrapy

Created February 17, 2013 00:48

craigslist_scrapy

	from scrapy.contrib.spiders import CrawlSpider, Rule
	from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
	from scrapy.selector import HtmlXPathSelector

	from myspider.items import CraigslistSampleItem

	class MySpider(CrawlSpider):
	name = "craigs"
	allowed_domains = ["sfbay.craigslist.org"]
	start_urls = ["http://sfbay.craigslist.org/"]