This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import cv2.cv as cv | |
import tesseract | |
gray = cv.LoadImage('captcha.jpeg', cv.CV_LOAD_IMAGE_GRAYSCALE) | |
cv.Threshold(gray, gray, 231, 255, cv.CV_THRESH_BINARY) | |
api = tesseract.TessBaseAPI() | |
api.Init(".","eng",tesseract.OEM_DEFAULT) | |
api.SetVariable("tessedit_char_whitelist", "0123456789abcdefghijklmnopqrstuvwxyz") | |
api.SetPageSegMode(tesseract.PSM_SINGLE_WORD) | |
tesseract.SetCvImage(gray,api) | |
print api.GetUTF8Text() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
import sys | |
import jieba | |
import numpy | |
from sklearn import metrics | |
from sklearn.feature_extraction.text import HashingVectorizer | |
from sklearn.naive_bayes import MultinomialNB | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'httparty' | |
require 'eventmachine' | |
class Request | |
include EM::Deferrable | |
@@requests = [] | |
attr_reader :method, :params |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# using such a setup requires `apt-get install lua-nginx-redis` under Ubuntu Trusty | |
# more info @ http://wiki.nginx.org/HttpLuaModule#access_by_lua | |
http { | |
lua_package_path "/etc/nginx/include.d/?.lua;;"; | |
lua_socket_pool_size 100; | |
lua_socket_connect_timeout 10ms; | |
lua_socket_read_timeout 10ms; | |
server { |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var Searcher = { | |
a: function() { | |
so = this; | |
so.g(); | |
window._r_ = false; | |
window.google = { | |
td: function(a, b, c) { | |
if (window._r_) return; | |
var h = c.d; | |
var t = h.match(/<font size="-1">[^,\d]*([,\d]+)[^,\d]*条结果[^\d]*(\d+)[^\d]*(用时 <b>(.*?)<\/b> 秒) <\/font>/); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# EDIT: 2013/10/20 | |
# google has updated its kwt UI, this script doesn't work any more! | |
# may be I will update this script when I have time to investigate their new Interface. | |
from selenium import webdriver | |
from selenium.common.exceptions import TimeoutException | |
import selenium.webdriver.support.wait | |
selenium.webdriver.support.wait.POLL_FREQUENCY = 0.05 | |
import re |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
$posting = $database->postlist_begin( $search_id ); | |
$enquire = new XapianEnquire( $database ); | |
$rset = new XapianRset(); | |
$rset->add_document( $posting->get_docid() ); | |
$eset = $enquire->get_eset(20, $rset); | |
$i = $eset->begin(); | |
$terms = array(); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# -*- coding: utf-8 -*- | |
# | |
#author: rex | |
#blog: http://iregex.org | |
#filename tr.py | |
#created: 2010-08-01 20:24 | |
#source uri: http://iregex.org/blog/trie-in-python.html | |
# escape bug fix by fcicq @ 2012.8.19 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# -*- coding: utf-8 -*- | |
# | |
#author: rex | |
#blog: http://iregex.org | |
#filename tr.py | |
#created: 2010-08-01 20:24 | |
#source uri: http://iregex.org/blog/trie-in-python.html | |
# escape bug fix by fcicq @ 2012.8.19 |