Skip to content

Instantly share code, notes, and snippets.

@swshan
swshan / script_01.py
Created November 7, 2015 15:07
http://zone.wooyun.org/content/23689, 获取Twitter的位置数据
# coding=UTF-8
import urllib
from anonBrowser import *
import json
import re
import urllib2
class reconPerson:
def __init__(self, handle):
self.handle = handle
# coding=UTF-8
import urllib2
import optparse
from bs4 import BeautifulSoup
from urlparse import urlsplit
from os.path import basename
from PIL import Image
from PIL.ExifTags import TAGS
def findImages(url):
@swshan
swshan / rc2.py
Created November 8, 2015 09:28
数据抓取 的小改进版本 脚本 主要改了 gevent spawn
#-*- coding:utf-8 -*-
''' For avoiding keyerror '''
from gevent import monkey
monkey.patch_all()
import gevent
import sys
import re
@swshan
swshan / test-01.py
Created November 10, 2015 15:23
Learning Python example for argument
#coding=utf-8
def function_get_args(a,b,c):
print a,b,c
return_val = a * 5
return return_val
def function_get_args_return_tuple(a,b,c):
print a,b,c
@swshan
swshan / crawler.r.2.4.py
Last active November 11, 2015 03:29
modify at the date of 11.11, for purpose of Coupling
#-*- coding:utf-8 -*-
"""
Date 15-11-11 lastest progres
"""
import time
import sys
import re
import gevent
@swshan
swshan / crawler.r.2.6.py
Created November 25, 2015 01:44
data grab script
#-*- coding:utf-8 -*-
"""
Date 15-11-11 lastest progres
"""
import time
import sys
import re
# -*- coding:utf-8 -*-
"""
Date 16-Jan-3q lastest progres
"""
#from __future__ import print_function
import gevent
from gevent import monkey
@swshan
swshan / crawler.r10.py
Last active January 13, 2016 04:41
my task data crawler reversion 10, add reds
# -*- coding:utf-8 -*-
"""
Date 16-Jan-3q lastest progres
"""
#from __future__ import print_function
import gevent
from gevent import monkey
import requests
import random
def getproxy():
foo = ['124.200.33.146:8118', '112.90.72.83:80']
rand = random.choice((foo))
''' Get a dict format proxy randomly '''
proxy= rand
#coding=utf-8
import urllib2
from bs4 import BeautifulSoup
def get(url):
req=urllib2.Request(url)
req.add_header("User-Agent","Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)")
req.add_header("Referer",url)
req.add_header("Cookie","jyean=flVZbRitNuOM44i3VjCPjP93MgxEj24GZ8yvM6fZ9HM4MfiQXQP6_eTlsA1lvSOqypz4l1Dj8OV4X-1xEwkoKZ6P1SyGgiNtNxi3n50-q6H_cTMs3jTa8oHBA47MLuyU0; JYE_FP2=58f1a56e5f3cea7fd10be747461acba5; CNZZDATA2018550=cnzz_eid%3D562178188-1448873225-http%253A%252F%252Fwww.jyeoo.com%252F%26ntime%3D1448873225")