Skip to content

Instantly share code, notes, and snippets.

@elena-roff
elena-roff / decorator.py
Created August 28, 2018 09:03
Decorator template
# source: https://realpython.com/primer-on-python-decorators/
# This formula is a good boilerplate template for building more complex decorators.
import functools
def decorator(func):
@functools.wraps(func)
def wrapper_decorator(*args, **kwargs):
# Do something before
value = func(*args, **kwargs)
# Do something after
@elena-roff
elena-roff / plot_by_groups.py
Created September 13, 2018 11:18
Plotting correlations using groups from groupby
grouped_df = data.groupby('col')
num_subplots = grouped_df.ngroups
fig_corr = plt.figure(figsize=(15, 250))
count = 0
for name, group in grouped_df:
# limit by n of observations in a group
if group.shape[0] >= 100:
count = count + 1
ax_corr = fig_corr.add_subplot(round(num_subplots/3) + 1, 3, count)
@elena-roff
elena-roff / custom_logger.py
Created September 14, 2018 09:34
Custom logger for both console printing (error level) and saving to a file
# example output
# __main__ - WARNING - This is a warning
# __main__ - ERROR - This is an error
import logging
# Create a custom logger
logger = logging.getLogger(__name__)
# Create handlers
@elena-roff
elena-roff / setup.py
Last active January 29, 2019 11:34
Sets up the project environment
import os
import unittest
import shutil
from setuptools import setup
from setuptools import Command
from <module> import __version__
try:
import coverage
@elena-roff
elena-roff / suplots_time_lines.py
Last active January 29, 2019 11:04
Plot a graph that draws subplots for different categories of the count of items per time intervals
label_size = 30
plt.rcParams['xtick.labelsize'] = label_size
plt.figure(figsize=(100, 250))
# put on one date plain
df['departure_time_utc'] = df['departure_time_utc'].apply(lambda dt: dt.replace(year=2018, month=11, day=20))
# group by category
grouped = df.groupby('departure_and_destination')
@elena-roff
elena-roff / pivot_table.py
Created January 29, 2019 11:03
Advanced Pivot table example
(pd.pivot_table(
direct_roundtrip_flights.query('departure_and_destination in @most_common_dep_dest_pairs'),
index=['departure_and_destination', 'type_of_flight', 'departure_time_interval', 'departure_day_of_week'],
values='booking_id',
aggfunc={'booking_id': np.count_nonzero})
.rename(columns={'booking_id': 'n'})
.sort_values('n', ascending=False)
)
@elena-roff
elena-roff / mongodb_docs_to_df.py
Created January 29, 2019 11:47
MongoDB flow for transforming JSON docs into pd.DataFrame
from pymongo import MongoClient
def get_db():
# For local use
from pymongo import MongoClient
# if your db is run locally
client = MongoClient(host='localhost', port=27017)
print(client.list_database_names())
return client.warehouse
@elena-roff
elena-roff / pylint.cfg
Created February 21, 2019 13:21
Pylint configuration
[MASTER]
# run on all available cores
jobs=0
# Specify a configuration file.
#rcfile=
# Python code to execute, usually for sys.path manipulation such as
# pygtk.require().
@elena-roff
elena-roff / setup.cfg
Created February 21, 2019 13:37
Flake8 Setup
[flake8]
max-line-length = 100
[coverage:run]
branch=True
source=<folder to run coverage on>
omit=
*/<folder to omit>/*
[coverage:report]
@elena-roff
elena-roff / Makefile
Created February 21, 2019 13:40
Lint and tests+coverage
.PHONY: test lint
lint:
TEST=1 pylint <folder> --rcfile=pylint.cfg
flake8
test:
coverage run --rcfile=setup.cfg --source=<folder> -m pytest --ignore=<folder to ignore>
coverage report --rcfile=setup.cfg