Created
July 19, 2021 03:39
-
-
Save normanlmfung/8bc9135606d526a92a8aa474c6f70030 to your computer and use it in GitHub Desktop.
python_performance_test_redis_vs_postgres
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from datetime import datetime, date | |
import numpy as np | |
from numpy.random.mtrand import f | |
import pandas as pd | |
import cProfile | |
from pstats import SortKey, Stats | |
from pandas.core.frame import DataFrame | |
''' | |
Create postgres and redis from docker-compose.yml: | |
postgres: | |
image: postgres:12-alpine | |
ports: ["5432:5432"] | |
environment: | |
POSTGRES_USER: postgres | |
POSTGRES_PASSWORD: shu7aeci3Noh5veGhoov | |
redis: | |
image: redis:alpine | |
ports: ["6379:6379"] | |
Then, | |
docker-compose up -d redis | |
docker-compose up -d postgres | |
Then, | |
python test_postgres_vs_redis.py | |
Result: | |
Sample size one row: | |
_publishToRedis took 0:00:00.024909 for SAMPLE_SIZE = 1 | |
_saveToPostgres took 0:00:00.475719 for SAMPLE_SIZE = 1 | |
Sample size a million rows: | |
_publishToRedis took 0:00:00.064457 for SAMPLE_SIZE = 1000000 | |
_saveToPostgres took 0:00:32.407136 for SAMPLE_SIZE = 1000000 | |
''' | |
def _saveToPostgres(df : DataFrame): | |
from sqlalchemy import create_engine | |
from sqlalchemy.engine.url import URL | |
connstr = "postgresql://postgres:shu7aeci3Noh5veGhoov@localhost/postgres" | |
engine = create_engine(connstr) | |
''' | |
CREATE TABLE random ( | |
id serial PRIMARY KEY, | |
index int NOT NULL, | |
rnd float NOT NULL | |
); | |
''' | |
# to_sql is row by row, so a little unfair here. | |
df.to_sql('random', con=engine, if_exists='append') | |
def _publishToRedis(df : DataFrame): | |
from redis import Redis | |
redis = Redis( | |
host='localhost', | |
port=6379, | |
db=0 | |
) | |
''' | |
We don't want row by row, it would distort the stats. Time wasted in python loops is not what we're after. It's communication to redis. | |
for row in df.iterrows(): | |
key = row[0][0] | |
val = row[1][0] | |
redis.hset(name="random", key=key, value=val) | |
''' | |
redis.hset(name="random", key="xxx", value=repr(df.to_records(index=False))) | |
if __name__ == "__main__": | |
SAMPLE_SIZE = 1000000 | |
random_values = np.random.normal(0, 1, SAMPLE_SIZE) | |
idx = [f'{str(x)}' for x in range(SAMPLE_SIZE)] | |
random_values = pd.Series(random_values, index = idx) | |
random_values = pd.DataFrame(random_values, columns=['rnd']) | |
with cProfile.Profile() as pr: | |
start = datetime.now() | |
_publishToRedis(random_values) | |
finish = datetime.now() | |
delta = finish - start | |
print(f'_publishToRedis took {delta} for SAMPLE_SIZE = {SAMPLE_SIZE}') | |
start = datetime.now() | |
_saveToPostgres(random_values) | |
finish = datetime.now() | |
delta = finish - start | |
print(f'_saveToPostgres took {delta} for SAMPLE_SIZE = {SAMPLE_SIZE}') | |
with open('profiling_stats.txt', 'w') as stream: | |
stats = Stats(pr, stream=stream) | |
stats.strip_dirs() | |
stats.sort_stats(SortKey.CUMULATIVE) | |
stats.dump_stats('.prof_stats') | |
stats.print_stats() | |
''' | |
pip install --upgrade pip | |
pip install numpy | |
pip install pandas | |
pip install psycopg2 | |
''' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment