import sys
!{sys.executable} -m pip install <package>
import sys
!conda install --yes --prefix {sys.prefix} <package>
from __future__ import absolute_import | |
from __future__ import division | |
from __future__ import print_function | |
from __future__ import unicode_literals |
import pytz | |
import pandas as pd | |
def localize_datetime(input_df, timezone='Europe/Paris', | |
datetime_column='tms_gmt'): | |
""" | |
Convert datetime column from UTC to another timezone. | |
""" | |
tmz = pytz.timezone(timezone) | |
df = input_df.copy() |
import sys
!{sys.executable} -m pip install <package>
import sys
!conda install --yes --prefix {sys.prefix} <package>
import pytz | |
def localize_datetime(input_df, timezone, tms_col): | |
""" | |
Convert datetime column from UTC to another timezone. | |
""" | |
tmz = pytz.timezone(timezone) | |
df = input_df.copy() | |
return (df.set_index(tms_col) | |
.tz_localize(pytz.utc) # UTC time |
A good technique to set configuration variables (from Keras): https://github.com/fchollet/keras/blob/master/keras/backend/common.py
from scipy.stats import lognorm | |
import numpy as np | |
def prepare_lognorm(mean, var): | |
# Formula from https://en.wikipedia.org/wiki/Log-normal_distribution | |
sigma = np.sqrt(np.log(1 + (float(var) / mean ** 2))) | |
mu = np.log(mean / np.sqrt(1 + (float(var) / mean ** 2))) | |
# Compute the scale for scipy |
Why inplace=True
isn't a good thing?
from functools import wraps | |
from logs import logger | |
# Two decorator to log the shape and dtypes of a DataFrame | |
# Inspired from here: https://tomaugspurger.github.io/method-chaining | |
def log_shape(func): | |
@wraps(func) | |
def wrapper(*args, **kwargs): |
# Inspired from: https://airflow.incubator.apache.org/_modules/airflow/models.html#BaseOperator | |
import pickle | |
import logging | |
from datetime import datetime | |
import traceback | |
def pickle_info(obj, session=None): | |
d = {} | |
d['is_picklable'] = True |
# This function is extracted from this file: https://github.com/dask/dask/blob/master/dask/diagnostics/progress.py | |
def format_time(t): | |
"""Format seconds into a human readable form. | |
>>> format_time(10.4) | |
'10.4s' | |
>>> format_time(1000.4) | |
'16min 40.4s' | |
""" |