Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save normanlmfung/8bc9135606d526a92a8aa474c6f70030 to your computer and use it in GitHub Desktop.
Save normanlmfung/8bc9135606d526a92a8aa474c6f70030 to your computer and use it in GitHub Desktop.
python_performance_test_redis_vs_postgres
from datetime import datetime, date
import numpy as np
from numpy.random.mtrand import f
import pandas as pd
import cProfile
from pstats import SortKey, Stats
from pandas.core.frame import DataFrame
'''
Create postgres and redis from docker-compose.yml:
postgres:
image: postgres:12-alpine
ports: ["5432:5432"]
environment:
POSTGRES_USER: postgres
POSTGRES_PASSWORD: shu7aeci3Noh5veGhoov
redis:
image: redis:alpine
ports: ["6379:6379"]
Then,
docker-compose up -d redis
docker-compose up -d postgres
Then,
python test_postgres_vs_redis.py
Result:
Sample size one row:
_publishToRedis took 0:00:00.024909 for SAMPLE_SIZE = 1
_saveToPostgres took 0:00:00.475719 for SAMPLE_SIZE = 1
Sample size a million rows:
_publishToRedis took 0:00:00.064457 for SAMPLE_SIZE = 1000000
_saveToPostgres took 0:00:32.407136 for SAMPLE_SIZE = 1000000
'''
def _saveToPostgres(df : DataFrame):
from sqlalchemy import create_engine
from sqlalchemy.engine.url import URL
connstr = "postgresql://postgres:shu7aeci3Noh5veGhoov@localhost/postgres"
engine = create_engine(connstr)
'''
CREATE TABLE random (
id serial PRIMARY KEY,
index int NOT NULL,
rnd float NOT NULL
);
'''
# to_sql is row by row, so a little unfair here.
df.to_sql('random', con=engine, if_exists='append')
def _publishToRedis(df : DataFrame):
from redis import Redis
redis = Redis(
host='localhost',
port=6379,
db=0
)
'''
We don't want row by row, it would distort the stats. Time wasted in python loops is not what we're after. It's communication to redis.
for row in df.iterrows():
key = row[0][0]
val = row[1][0]
redis.hset(name="random", key=key, value=val)
'''
redis.hset(name="random", key="xxx", value=repr(df.to_records(index=False)))
if __name__ == "__main__":
SAMPLE_SIZE = 1000000
random_values = np.random.normal(0, 1, SAMPLE_SIZE)
idx = [f'{str(x)}' for x in range(SAMPLE_SIZE)]
random_values = pd.Series(random_values, index = idx)
random_values = pd.DataFrame(random_values, columns=['rnd'])
with cProfile.Profile() as pr:
start = datetime.now()
_publishToRedis(random_values)
finish = datetime.now()
delta = finish - start
print(f'_publishToRedis took {delta} for SAMPLE_SIZE = {SAMPLE_SIZE}')
start = datetime.now()
_saveToPostgres(random_values)
finish = datetime.now()
delta = finish - start
print(f'_saveToPostgres took {delta} for SAMPLE_SIZE = {SAMPLE_SIZE}')
with open('profiling_stats.txt', 'w') as stream:
stats = Stats(pr, stream=stream)
stats.strip_dirs()
stats.sort_stats(SortKey.CUMULATIVE)
stats.dump_stats('.prof_stats')
stats.print_stats()
'''
pip install --upgrade pip
pip install numpy
pip install pandas
pip install psycopg2
'''
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment