Skip to content

Instantly share code, notes, and snippets.

@elena-roff
elena-roff / get_obj_key.py
Created March 12, 2019 13:24
Creates a hash for an object
import hashlib
def get_obj_key(obj: Type) -> str:
""" Creates and returns a unique object id."""
obj_key = '+'.join(
[
obj.attr1, str(obj.num_attr1), obj.attr2, ...
])
return hashlib.md5(obj_key.encode('utf8')).hexdigest()
@elena-roff
elena-roff / api_get_data.py
Created March 12, 2019 13:21
Get data from the api with an offset update and limit extended
def api_get_data() -> List[Mapping]:
""" Gets data from the API. """
url = "{}/v1/booking".format(os.environ['CT_BOOKING_API_URI'])
params: Dict = {
...
'limit': 1000,
'offset': 0,
}
@elena-roff
elena-roff / test_proxy.py
Created March 12, 2019 13:12
Test examples to test API
import requests
from custom_module import api_get_data
uri = 'http://<.....>'
def fail(*args, **kwargs):
raise requests.exceptions.RequestException()
@elena-roff
elena-roff / Makefile
Created February 21, 2019 13:40
Lint and tests+coverage
.PHONY: test lint
lint:
TEST=1 pylint <folder> --rcfile=pylint.cfg
flake8
test:
coverage run --rcfile=setup.cfg --source=<folder> -m pytest --ignore=<folder to ignore>
coverage report --rcfile=setup.cfg
@elena-roff
elena-roff / setup.cfg
Created February 21, 2019 13:37
Flake8 Setup
[flake8]
max-line-length = 100
[coverage:run]
branch=True
source=<folder to run coverage on>
omit=
*/<folder to omit>/*
[coverage:report]
@elena-roff
elena-roff / pylint.cfg
Created February 21, 2019 13:21
Pylint configuration
[MASTER]
# run on all available cores
jobs=0
# Specify a configuration file.
#rcfile=
# Python code to execute, usually for sys.path manipulation such as
# pygtk.require().
@elena-roff
elena-roff / mongodb_docs_to_df.py
Created January 29, 2019 11:47
MongoDB flow for transforming JSON docs into pd.DataFrame
from pymongo import MongoClient
def get_db():
# For local use
from pymongo import MongoClient
# if your db is run locally
client = MongoClient(host='localhost', port=27017)
print(client.list_database_names())
return client.warehouse
@elena-roff
elena-roff / pivot_table.py
Created January 29, 2019 11:03
Advanced Pivot table example
(pd.pivot_table(
direct_roundtrip_flights.query('departure_and_destination in @most_common_dep_dest_pairs'),
index=['departure_and_destination', 'type_of_flight', 'departure_time_interval', 'departure_day_of_week'],
values='booking_id',
aggfunc={'booking_id': np.count_nonzero})
.rename(columns={'booking_id': 'n'})
.sort_values('n', ascending=False)
)
@elena-roff
elena-roff / suplots_time_lines.py
Last active January 29, 2019 11:04
Plot a graph that draws subplots for different categories of the count of items per time intervals
label_size = 30
plt.rcParams['xtick.labelsize'] = label_size
plt.figure(figsize=(100, 250))
# put on one date plain
df['departure_time_utc'] = df['departure_time_utc'].apply(lambda dt: dt.replace(year=2018, month=11, day=20))
# group by category
grouped = df.groupby('departure_and_destination')
@elena-roff
elena-roff / setup.py
Last active January 29, 2019 11:34
Sets up the project environment
import os
import unittest
import shutil
from setuptools import setup
from setuptools import Command
from <module> import __version__
try:
import coverage