Last active
September 14, 2020 20:47
-
-
Save kmuthukk/49595e9020f9f3604faa68814f8b2117 to your computer and use it in GitHub Desktop.
Check performance of updating a column that doesn't have an index
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# pip install yb-cassandra-driver | |
from cassandra.cluster import Cluster | |
import time | |
import random | |
from multiprocessing.dummy import Pool as ThreadPool | |
from functools import partial | |
# Load Phase params | |
num_write_threads=2 | |
num_users=1000 | |
cluster = Cluster(['127.0.0.1']) | |
session = cluster.connect() | |
def create_table(num_indexes): | |
start_time = time.time() | |
session.execute("""CREATE KEYSPACE IF NOT EXISTS k"""); | |
session.execute("""USE k"""); | |
session.execute("""DROP TABLE IF EXISTS users"""); | |
now_time = time.time() | |
print("Dropped (if exists): users table") | |
print("Time: %s ms" % ((now_time - start_time) * 1000)) | |
print("====================") | |
start_time = time.time() | |
session.execute(""" | |
CREATE TABLE IF NOT EXISTS users( | |
id text, | |
name0 text, | |
name1 text, | |
name2 text, | |
name3 text, | |
name4 text, | |
age int, | |
PRIMARY KEY(id) | |
) WITH TRANSACTIONS = {'enabled' : true} | |
""") | |
now_time = time.time() | |
start_time = time.time() | |
for idx in range(num_indexes): | |
column = "name"+str(idx) | |
session.execute("""CREATE INDEX IF NOT EXISTS {}_idx ON users({})""".format(column, column)) | |
now_time = time.time() | |
print("Created {} indexes on table".format(num_indexes)) | |
def update_data_worker(num_indexes, thread_num): | |
thread_id = str(thread_num) | |
prepared = session.prepare("""UPDATE users SET age = ? WHERE id = ?"""); | |
for idx in range(num_users): | |
session.execute(prepared, | |
(27 + (idx % 50), "user-"+thread_id+"-"+str(idx))) | |
def update_data(num_indexes): | |
pool = ThreadPool(num_write_threads) | |
t1 = time.time() | |
results = pool.map(partial(update_data_worker, num_indexes), range(num_write_threads)) | |
t2 = time.time() | |
total_rows=num_users*num_write_threads | |
def load_data_worker(num_indexes, thread_num): | |
thread_id = str(thread_num) | |
prepared = session.prepare("""INSERT INTO users (id, name0, name1, name2, name3, name4, age) VALUES (?, ?, ?, ?, ?, ?, ?)"""); | |
for idx in range(num_users): | |
session.execute(prepared, | |
("user-"+thread_id+"-"+str(idx), | |
"name0-"+str(idx), | |
"name1-"+str(idx), | |
"name2-"+str(idx), | |
"name3-"+str(idx), | |
"name4-"+str(idx), | |
20 + (idx % 50))) | |
def load_data(num_indexes): | |
pool = ThreadPool(num_write_threads) | |
t1 = time.time() | |
results = pool.map(partial(load_data_worker, num_indexes), range(num_write_threads)) | |
t2 = time.time() | |
total_rows=num_users*num_write_threads | |
# Run the program for 0 indexes and 5 indexes. | |
for num_indexes in (0, 5): | |
print("Running test with {} index(es)".format(num_indexes)) | |
create_table(num_indexes) | |
# print("Waiting 60 seconds for leader balancing to happen before starting experiment") | |
# time.sleep(60) | |
start_time = time.time() | |
load_data(num_indexes) | |
now_time = time.time() | |
total_rows=num_users*num_write_threads | |
print("Num Indexes={}: Rows={}; Inserts/sec: {}; Avg Latency (ms): {}" | |
.format(num_indexes, | |
total_rows, | |
round((total_rows / (now_time - start_time)), 2), | |
round(((now_time - start_time) * 1000 / (total_rows)), 2))) | |
print("====================") | |
print("Now update a non-indexed column (age)...") | |
start_time = time.time() | |
update_data(num_indexes) | |
now_time = time.time() | |
total_rows=num_users*num_write_threads | |
print("Num Indexes={}: Rows={}; Updates/sec: {}; Avg Latency (ms): {}" | |
.format(num_indexes, | |
total_rows, | |
round((total_rows / (now_time - start_time)), 2), | |
round(((now_time - start_time) * 1000 / (total_rows)), 2))) | |
print("====================") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment