This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
from fst import * | |
from struct import pack | |
def build_dict(enc, outfile, *files): | |
entries = {} | |
for file in files: | |
with open(file, encoding=enc) as f: | |
for line in f: | |
line = line.rstrip() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.apache.lucene.analysis.Analyzer; | |
import org.apache.lucene.analysis.TokenStream; | |
import org.apache.lucene.analysis.ja.JapaneseAnalyzer; | |
import org.apache.lucene.analysis.ja.JapaneseTokenizer; | |
import org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute; | |
import org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute; | |
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; | |
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; | |
import org.apache.lucene.analysis.util.CharArraySet; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import psycopg2 | |
import xml.sax | |
from xml.sax.handler import ContentHandler | |
from dicttoxml import dicttoxml | |
INSERT_STMT = "INSERT INTO pages (id, page) VALUES('%s', '%s')" | |
COMMIT_WINDOW = 10000 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package indexer; | |
import java.io.File; | |
import java.io.IOException; | |
import java.util.HashMap; | |
import java.util.Map; | |
import org.apache.lucene.analysis.core.WhitespaceAnalyzer; | |
import org.apache.lucene.document.Document; | |
import org.apache.lucene.document.Field.Store; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package higlighter; | |
import java.io.File; | |
import org.apache.lucene.analysis.Analyzer; | |
import org.apache.lucene.analysis.core.WhitespaceAnalyzer; | |
import org.apache.lucene.document.Document; | |
import org.apache.lucene.index.DirectoryReader; | |
import org.apache.lucene.index.IndexReader; | |
import org.apache.lucene.queryparser.classic.QueryParser; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import binascii | |
import random | |
TOTAL_ELEMENTS = 2000 | |
SHARD_SIZE = 512 | |
def shard_key(base, key, total_elements, shard_size): | |
""" | |
シャードキーを計算する関数 | |
「Redis入門」リスト9-7 から引用 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import redis | |
import json | |
import time | |
import datetime | |
import uuid | |
import threading | |
import argparse | |
def provider(conn, name, total_tasks, queue, sleep=0.1): | |
""" Provider """ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#-*- coding: utf-8 -*- | |
def ed(s1, s2, detail=False): | |
u""" s1, s2 の編集距離を計算する. ※置換のコストは 1 """ | |
len_s1 = len(s1) | |
len_s2 = len(s2) | |
# initialize | |
m = [[0 for i in range(len_s2 + 1)] for j in range(len_s1 + 1)] | |
for i in range(1, (len_s1+1)): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sqlite3 | |
import time | |
import traceback | |
conn_meta = sqlite3.connect('/db/meta.db') | |
# get current db file and connect | |
cur = conn_meta.cursor() | |
cur.execute('SELECT path FROM db_file WHERE status = 0 ORDER BY ver DESC') | |
row = cur.fetchone() | |
current_db = row[0] if row else None |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sqlite3 | |
import redis | |
import sys | |
from timeit import Timer | |
loop = int(sys.argv[1]) | |
def ranking_sqlite(conn, key): | |
cur = conn.cursor() | |
cur.execute("SELECT val FROM ranking WHERE key = '%s' ORDER BY score DESC limit 10" % key) |