c93614’s gists

c93614 / BeyesianAvg.py

Created June 7, 2018 08:34 — forked from lastland/BeyesianAvg.py

尝试用这篇post: http://www.matrix67.com/blog/archives/5044 中的方法实现的一个自动中文抽词算法的Python程序

	# -- coding=utf-8 --
	import collections

	# Usage:
	# 我的做法是把WordsDetector.py里的结果输出到文件，
	# 然后把文件名放到下面的names列表中，运行本程序。

	names = ['name0',
	'name1',
	'name2',

c93614 / test.py

Created October 10, 2016 03:55 — forked from christianroman/test.py

Bypass Captcha using 10 lines of code with Python, OpenCV & Tesseract OCR engine

	import cv2.cv as cv
	import tesseract
	gray = cv.LoadImage('captcha.jpeg', cv.CV_LOAD_IMAGE_GRAYSCALE)
	cv.Threshold(gray, gray, 231, 255, cv.CV_THRESH_BINARY)
	api = tesseract.TessBaseAPI()
	api.Init(".","eng",tesseract.OEM_DEFAULT)
	api.SetVariable("tessedit_char_whitelist", "0123456789abcdefghijklmnopqrstuvwxyz")
	api.SetPageSegMode(tesseract.PSM_SINGLE_WORD)
	tesseract.SetCvImage(gray,api)
	print api.GetUTF8Text()

c93614 / sci_classifier.py

Created January 29, 2016 15:28 — forked from 2shou/sci_classifier.py

scikit-learn nb example

	# coding: utf-8

	import sys
	import jieba
	import numpy
	from sklearn import metrics
	from sklearn.feature_extraction.text import HashingVectorizer
	from sklearn.naive_bayes import MultinomialNB

c93614 / request.rb

Created November 26, 2015 09:15 — forked from steved/request.rb

Eventmachine Deferrable example

c93614 / 100_base.conf

Created November 5, 2015 10:12 — forked from danackerson/100_base.conf

using nginx + lua + redis for redirects and rewrites

	# using such a setup requires `apt-get install lua-nginx-redis` under Ubuntu Trusty
	# more info @ http://wiki.nginx.org/HttpLuaModule#access_by_lua

	http {
	lua_package_path "/etc/nginx/include.d/?.lua;;";
	lua_socket_pool_size 100;
	lua_socket_connect_timeout 10ms;
	lua_socket_read_timeout 10ms;

	server {

c93614 / ugly.js

Last active August 29, 2015 14:21 — forked from cuber/ugly.js

	var Searcher = {
	a: function() {
	so = this;
	so.g();
	window._r_ = false;
	window.google = {
	td: function(a, b, c) {
	if (window._r_) return;
	var h = c.d;
	var t = h.match(/<font size="-1">[^,\d]([,\d]+)[^,\d]条结果[^\d](\d+)[^\d]（用时 <b>(.*?)<\/b> 秒） <\/font>/);

c93614 / gist:0a3adaffd514bcb9cebe

Last active August 29, 2015 14:13 — forked from observerss/gist:3798896

	# EDIT: 2013/10/20
	# google has updated its kwt UI, this script doesn't work any more!
	# may be I will update this script when I have time to investigate their new Interface.

	from selenium import webdriver
	from selenium.common.exceptions import TimeoutException
	import selenium.webdriver.support.wait
	selenium.webdriver.support.wait.POLL_FREQUENCY = 0.05

	import re

c93614 / xapianmlt.php

Last active August 29, 2015 14:12 — forked from ianbarber/xapianmlt.php

	<?php

	$posting = $database->postlist_begin( $search_id );
	$enquire = new XapianEnquire( $database );
	$rset = new XapianRset();
	$rset->add_document( $posting->get_docid() );
	$eset = $enquire->get_eset(20, $rset);

	$i = $eset->begin();
	$terms = array();

c93614 / regexp-trie.py

Last active August 29, 2015 14:12 — forked from atiking/regexp-trie.py

	#!/usr/bin/python
	# -- coding: utf-8 --
	#
	#author: rex
	#blog: http://iregex.org
	#filename tr.py
	#created: 2010-08-01 20:24
	#source uri: http://iregex.org/blog/trie-in-python.html

	# escape bug fix by fcicq @ 2012.8.19

c93614 / regexp-trie.py

Last active August 29, 2015 14:12 — forked from fcicq/regexp-trie.py

	#!/usr/bin/python
	# -- coding: utf-8 --
	#
	#author: rex
	#blog: http://iregex.org
	#filename tr.py
	#created: 2010-08-01 20:24
	#source uri: http://iregex.org/blog/trie-in-python.html

	# escape bug fix by fcicq @ 2012.8.19

admin c93614