Skip to content

Instantly share code, notes, and snippets.

@tomotaka
tomotaka / concurrentcrawler.py
Last active August 29, 2015 14:13
concurrent crawler using gevent
import gevent.queue as gq
class ConcurrentCrawler(object):
def __init__(self, func=crawl_all, concurrency=10, q_max=100):
self._func = func
self._concurrency = concurrency
self._q_max = q_max
self._queue = gq.Queue(maxsize=self._q_max)
self._workers = None
@tomotaka
tomotaka / schwartz.java
Created June 23, 2014 07:18
schwartz.java
class Tuple {
public Object obj;
public int idx;
Tuple(Object o, int i) { this.obj = o; this.idx = i; }
}
List<Tuple> tmp = new ArrayList<Tuple>();
int i = 0;
for (Object obj : objects) {
tmp.add(new Tuple(obj, i++));
}
@tomotaka
tomotaka / stream_text_processor.py
Last active August 29, 2015 13:56
cat test.txt | python ./stream_text_processor.py
#!/usr/bin/python
# -*- coding: utf-8 -*-
import os
import sys
import hashlib
import gevent
import gevent.queue
import gevent.fileobject as gfo
#!/usr/bin/python
# -*- coding: utf-8 -*-
from pprint import pprint, pformat
import random
def randomize(items):
randomized = []
while 0 < len(items):
idx = random.randint(0, len(items)-1)
@tomotaka
tomotaka / seitekidouteki.md
Last active June 4, 2018 08:02
静的型言語 vs 動的型言語

静的型 vs 動的型

※個人の見解です!!

静的片付け言語のいいところ

  • コード自体の情報量が多い
    • ドキュメントの自動生成が楽(少ないドキュメンテーション作業で質の高いドキュメントが作れる)
    • エディタ/IDEの自動補完をより強力にすることができる
  • 実行するまでもなく様々なことがコンパイル時に検査される
  • 実行速度が速い(言語が多い)
@tomotaka
tomotaka / urlshortener.py
Created December 16, 2013 10:18
mongo-based url shorten service engine
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# requirements: pip install mongoengine simplejson bottle
import mongoengine
import string
from bottle import (
route, run, HTTPResponse, request
)
import simplejson
@tomotaka
tomotaka / cssdatauriexpander.py
Last active December 31, 2015 05:29
expanding url(./img/hoge.png) => url(data:image/png;....) with filesystem-based caching
import re
import base64
import hashlib
import os.path
import gevent.fileobject as gfo
def _read_file(filepath):
fh = open(filepath, 'rb')
gfh = gfo.FileObject(fh, 'rb')
ret = gfh.read()
@tomotaka
tomotaka / mongocounter.py
Created October 3, 2013 10:17
cache-enabled counter using MongoDB's '$inc' operator
#!/usr/bin/python
# -*- coding: utf-8 -*-
import time
import math
__all__ = ('MongoCounterError', 'MongoCounter')
class MongoCounterError(Exception):
pass
@tomotaka
tomotaka / test_jinja2_escape.py
Created September 13, 2013 08:09
Q. How does Jinja2 escape filter HTML escape? A. MarkupSafe.
#!/usr/bin/python
# -*- coding: utf-8 -*-
from nose.tools import eq_
import markupsafe
jinja_escape = lambda s: '%s' % markupsafe.escape(s)
# note: lambda s: str(markupsafe.escape(s)) doesn't work in some cases
scraped_string = soup.find('p', id='important').string # should be escaped by jinja2 escape filter
@tomotaka
tomotaka / beautiful_soup_sample.py
Created September 13, 2013 06:10
BeautifulSoup samples
#!/usr/bin/python
# -*- coding: utf-8 -*-
#from bs4 import BeautifulSoup
#soup = BeautifulSoup(some_html_string)
def get_string_by_id(soup, _tag, _id):
tag = soup.find(_tag, id=_id)
return tag.string if tag is not None else None