Skip to content

Instantly share code, notes, and snippets.

@clarksun
clarksun / chunks.py
Created December 28, 2017 02:58
python遍历大型dict, 分批事务写进数据库方法, 从frontera里面学来的
def chunks(l, n):
for i in range(0, len(l), n):
yield l[i:i+n]
d = dict()
for chunk in chunks(list(d.items()), 32768):
# 因为batch用了transaction就不能用batch_size
with table.batch(transaction=True) as b:
for k, v in chunk:
b.put(...)
@clarksun
clarksun / struct_pack.py
Last active December 25, 2017 02:52
python struct binascii
# https://pymotw.com/3/struct/
# struct_pack.py
import struct
import binascii
values = (1, 'ab'.encode('utf-8'), 2.7)
s = struct.Struct('I 2s f')
packed_data = s.pack(*values)
print('Original values:', values)
@clarksun
clarksun / rand_timestamp_str.py
Last active December 24, 2017 08:51
python生成微秒
# frontera hbase backend
from time import time
random_str = int(time() * 1E+6) # 16位数字, 微秒, microsecond
random_str = int(time() * 1E+3) # 13位数字, 毫秒, millisecond
@clarksun
clarksun / happybase_create_delete_table.py
Created December 23, 2017 01:55
happybase create delete table
# frontera HBaseQueue class
# tablename一定要to_bytes一下
from w3lib.util import to_bytes
table_name = to_bytes(table_name)
tables = set(connection.tables())
if drop and table_name in tables:
connection.delete_table(table_name, disable=True)
tables.remove(table_name)
@clarksun
clarksun / happybase_write.py
Last active December 24, 2017 08:54
happybase write hbase
# frontera hbase backend _schedule()
table = self.connection.table(self.table_name)
with table.batch(transaction=True) as b:
for rk, tuples in six.iteritems(data):
obj = dict()
for score, item in tuples:
column = 'f:%0.3f_%0.3f' % get_interval(score, 0.001)
obj.setdefault(column, []).append(item)
@clarksun
clarksun / debug_log_filter.py
Created December 22, 2017 09:30 — forked from bdarnell/debug_log_filter.py
Python logging filter for per-module debug logging
# A simple log filter to turn debug logging on and off on a per-module
# basis, when using tornado-style logging. Note that this works based
# on the module where the log statement occurs, not the name passed to
# logging.getLogger(), so it works even with libraries write directly
# to the root logger.
#
# One drawback to this approach (as opposed to using distinct
# per-module loggers and setting their levels appropriately) is that
# logger.isEnabledFor(DEBUG) will return true even when called from a
# module where debug output is not enabled, so modules that perform
@clarksun
clarksun / gevent_demo.py
Last active December 15, 2017 08:33
gevent #python #gevent
import gevent
import random
import time
from functools import wraps
# http://sdiehl.github.io/gevent-tutorial/
def task(pid):
"""
Some non-deterministic task
@clarksun
clarksun / kafka_python.py
Created December 12, 2017 01:44
关键字参数使用 #python
# kafka/consumer/group.py
class KafkaConsumer(six.Iterator):
DEFAULT_CONFIG = {
'bootstrap_servers': 'localhost',
'xxx':'yyy',
}
def __init__(self, *topics, **configs):
self.config = copy.copy(self.DEFAULT_CONFIG)
@clarksun
clarksun / CaselessDict.py
Created November 23, 2017 08:31
忽略大小写key的dict from scrapy #python
class CaselessDict(dict):
__slots__ = ()
def __init__(self, seq=None):
super(CaselessDict, self).__init__()
if seq:
self.update(seq)
def __getitem__(self, key):
return dict.__getitem__(self, self.normkey(key))
@clarksun
clarksun / kafka_topic_msg_count.sh
Created October 9, 2017 06:36
get kafka topic message count
kafka-run-class kafka.tools.GetOffsetShell --broker-list localhost:9092 --topic xxx --time -1 --offsets 1 | awk -F ':' '{sum += $3} END {print sum}'