This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import simplejson as json | |
import lxml | |
class objectJSONEncoder(json.JSONEncoder): | |
"""A specialized JSON encoder that can handle simple lxml objectify types | |
>>> from lxml import objectify | |
>>> obj = objectify.fromstring("<Book><price>1.50</price><author>W. Shakespeare</author></Book>") | |
>>> objectJSONEncoder().encode(obj) | |
'{"price": 1.5, "author": "W. Shakespeare"}' | |
""" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv, httplib, json | |
from string import ascii_lowercase | |
con = httplib.HTTPSConnection('api.github.com') | |
languages = ['java', 'c', 'ruby', 'python', 'javascript'] | |
for lang in languages: | |
with open(lang + '.csv', 'wb') as csvfile: | |
csvwriter = csv.writer(csvfile, delimiter = ',', quotechar='"', quoting = csv.QUOTE_MINIMAL) | |
for ch in ascii_lowercase: | |
print("Processing repos with " + ch + " for language " + lang) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from gcrawler import GCrawler, Downloader | |
import unittest | |
import urllib2 | |
import logging | |
import traceback | |
from datetime import datetime | |
import re | |
logging.basicConfig(level=logging.DEBUG) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
## | |
# Removes HTML markup from a text string. | |
# | |
# @param text The HTML source. | |
# @return The plain text. If the HTML source contains non-ASCII | |
# entities or character references, this is a Unicode string. | |
def strip_html(text): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
#coding=utf8 | |
import urllib | |
import urllib2 | |
import cookielib | |
import base64 | |
import re | |
import json | |
import hashlib | |
import os |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// requires | |
var utils = require('utils'); | |
var casper = require('casper').create() | |
var casper = require('casper').create({ | |
verbose: true, | |
logLevel: "debug" | |
}); | |
// setup globals | |
var email = casper.cli.options['email'] || 'REPLACE THIS EMAIL'; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from selenium import webdriver | |
from selenium.common.exceptions import TimeoutException | |
import selenium.webdriver.support.wait | |
selenium.webdriver.support.wait.POLL_FREQUENCY = 0.05 | |
import re | |
import random | |
import collections | |
class AdwordsAutomater(object): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#coding=utf-8 | |
from datetime import datetime | |
import os | |
import re | |
import urllib.request | |
from html.parser import HTMLParser | |
from time import sleep | |
import socket | |
socket.setdefaulttimeout(60) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
#-*- coding: UTF-8 -*- | |
# filename: AutoLogin.py | |
from __future__ import unicode_literals | |
import urllib2 | |
import cookielib | |
import urllib | |
import Image | |
from cStringIO import StringIO |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{"hotsite":{"name":"热门网站","links":[{"name":"科学松鼠会","url":"http://songshuhui.net/","ico_url":"http://img1.guokr.com/gkimage/hf/9z/cy/hf9zcy.png","style":"","ico":""},{"name":"科学网","url":"http://www.sciencenet.cn/","ico_url":"http://img1.guokr.com/gkimage/ix/ep/rw/ixeprw.png","style":"","ico":""},{"name":"丁香园","url":"http://www.dxy.cn/","ico_url":"http://img1.guokr.com/gkimage/cu/m5/vf/cum5vf.png","style":"","ico":""},{"name":"PubMed","url":"http://www.ncbi.nlm.nih.gov/pubmed","ico_url":"http://img1.guokr.com/gkimage/2o/wv/mt/2owvmt.png","style":"","ico":""},{"name":"趣玩网","url":"http://www.quwan.com/","ico_url":"http://img1.guokr.com/gkimage/rr/50/0c/rr500c.png","style":"","ico":""},{"name":"穷游网","url":"http://www.qyer.com/","ico_url":"http://img1.guokr.com/gkimage/ym/6h/ne/ym6hne.png","style":"","ico":""},{"name":"糗事百科","url":"http://www.qiushibaike.com/","ico_url":"http://img1.guokr.com/gkimage/iz/cv/jx/izcvjx.png","style":"","ico":""},{"name":"36氪","url":"http://www.36kr.com/","ico_url":"http://img1.guokr.co |