cuimuxi’s gists

cuimuxi / gist:0d2993d176c0d1ff9a3f

Created January 13, 2015 09:07

关键词

	回民吃猪肉
	习近平
	TMD
	毛民进党
	妹妹淫水流
	机吧
	联国
	1989六四
	性爱电影
	李红智

cuimuxi / fans

Last active August 29, 2015 14:04

select fans_id from messages where content in ('形勢', '形式', '形势') and mp_id=35164 and created_time >= '2014-07-24 06:00:00' and created_time < '2014-07-24 07:00:00'group by fans_id

cuimuxi / websocket_tornado_redis.py

Created January 10, 2014 16:42 — forked from lbolla/websocket_tornado_redis.py

	"""
	This is a simple example of WebSocket + Tornado + Redis Pub/Sub usage.
	Do not forget to replace YOURSERVER by the correct value.
	Keep in mind that you need the very latest version of your web browser.
	You also need to add Jacob Kristhammar's websocket implementation to Tornado:
	Grab it here:
	http://gist.github.com/526746
	Or clone my fork of Tornado with websocket included:
	http://github.com/pelletier/tornado
	Oh and the Pub/Sub protocol is only available in Redis 2.0.0:

cuimuxi / gist:5451756

Created April 24, 2013 12:27 — forked from madebyjazz/gist:1090663

	from scrapy import log
	from scrapy.item import Item
	from scrapy.http import Request
	from scrapy.contrib.spiders import XMLFeedSpider


	def NextURL():
	"""
	Generate a list of URLs to crawl. You can query a database or come up with some other means
	Note that if you generate URLs to crawl from a scraped URL then you're better of using a

cuimuxi / database.py

Created April 16, 2013 02:48

facebook tornado database

	#!/usr/bin/env python
	#
	# Copyright 2009 Facebook
	#
	# Licensed under the Apache License, Version 2.0 (the "License"); you may
	# not use this file except in compliance with the License. You may obtain
	# a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#

cuimuxi / setup.py

Created December 11, 2012 10:53 — forked from edwardgeorge/setup.py

python libxml2 binding package for easy installation with pip/easy_install into a virtualenv

	import os
	import shutil
	import subprocess
	import sys
	import tarfile
	import urllib2

	LIBXML2_PREFIX = "libxml2"
	LIBXSLT_PREFIX = "libxslt"
	LIBXML2_FTPURL = "ftp://xmlsoft.org/libxml2/"

cuimuxi / gist:3719535

Created September 14, 2012 02:54

gevent crawler test

	from gcrawler import GCrawler, Downloader
	import unittest
	import urllib2
	import logging
	import traceback
	from datetime import datetime
	import re

	logging.basicConfig(level=logging.DEBUG)

cuimuxi / gist:3719516

Created September 14, 2012 02:50

gevent crawler

	import gevent
	from gevent import monkey, queue

	monkey.patch_all()

	import urllib2
	from time import sleep
	import traceback
	import logging

cuimuxi / gist:777789

Created January 13, 2011 12:27

	# -- coding:utf-8 --
	import re
	import urllib2
	from lib.BeautifulSoup import BeautifulSoup

	agent="""Sosospider+(+http://help.soso.com/webspider.htm)"""

	blog_url = 'http://blog.sina.com.cn/s/articlelist_1517582220_0_1.html'
	spider_handle = urllib2.urlopen(blog_url)
	blog_content = spider_handle.read()