Skip to content

Instantly share code, notes, and snippets.

View devdazed's full-sized avatar

Russ Bradberry devdazed

View GitHub Profile
@devdazed
devdazed / lp_counters.py
Created October 11, 2012 16:14
Simple Linear Probabilistic Counters
"""
Simple Linear Probabilistic Counters
Credit for idea goes to:
http://highscalability.com/blog/2012/4/5/big-data-counting-how-to-count-a-billion-distinct-objects-us.html
http://highlyscalable.wordpress.com/2012/05/01/probabilistic-structures-web-analytics-data-mining/
Installation:
pip install smhasher
pip install bitarray
import json
from pycassa.pool import ConnectionPool
from pycassa.columnfamily import ColumnFamily
from pycassa.cassandra.ttypes import ConsistencyLevel, NotFoundException
HOSTS = ['localhost']
pool = ConnectionPool('my_keyspace', HOSTS)
my_cf = ColumnFamily(pool, 'my_cf')
for ks in SYSTEM_MANAGER.list_keyspaces():
describe_keyspace(ks)
for cf in SYSTEM_MANAGER.get_keyspace_column_families(ks):
describe_column_family(ks, cf)
@devdazed
devdazed / load.py
Created January 2, 2014 21:45
Threa
for args in tables_for_load:
thread = FileLoaderWorker(*args)
thread.start()
completed = 0
while completed < len(tables_for_load):
try:
response, exception = response_queue.get(block=False, timeout=0.1)
if exception:
class Result(threading.Event):
exception = None
def execute():
cql = 'select * from events limit 10'
response = Result()
def _on_error(error):
response.exception = error
@devdazed
devdazed / .zuul.yml
Created February 24, 2014 19:52
There is only....
ui: mocha-qunit
browsers:
- name:chrome
- version: latest
scripts:
- "http://d8rk54i4mohrb.cloudfront.net/js/reach.js"
server: ./server.js
class TunableRetryPolicy(RetryPolicy):
""" A retry policy that allows you to decide if you want to downgrade consistency before
attempting to retry, additionally, you can specify the number of retries to try
"""
def __init__(self, read_retries=3, write_retries=3, downgrade_consistency=True):
self._read_retries = read_retries
self._write_retries = write_retries
self._downgrade_consistency = downgrade_consistency
@devdazed
devdazed / tombstone_count.py
Created November 24, 2015 22:04
Count tombstones in a Cassandra Table
#!/usr/bin/env python
"""
Counts the number of tombstones in a keyspace.table and reports the top N highest counts
tombstone_count.py
[-h] This help screen
[--data-dir DATA_DIR] The C* data directory (/var/lib/cassandra/data)
[--top-k TOP_K] The top number of keys with highest tombstone counts to display.
keyspace The keyspace that contains the table
@devdazed
devdazed / tc.py
Created December 2, 2015 21:29
SSTable Tombstone Counter
import fileinput, re, operator
from collections import Counter
def sizeof_fmt(num, suffix='B'):
for unit in ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi']:
if abs(num) < 1024.0:
return "%3.1f%s%s" % (num, unit, suffix)
num /= 1024.0
return "%.1f%s%s" % (num, 'Yi', suffix)
#!/usr/bin/env python
from __future__ import print_function
import json
import logging
import re
from base64 import b64decode, b64encode
from urllib2 import Request, urlopen, URLError, HTTPError