Created
July 27, 2020 16:25
-
-
Save ericjang/8f8f9aaec79e24c3915d98249ba6ae3b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
A lightweight experiment logbook for Jupyter/Colab-style ad hoc experiments. | |
Let's say you generate a plot with Matplotlib and want to re-run your notebook with a | |
different set of configurations and then compare the resulting plot to the one you saved (to see | |
if the new configuration is better). | |
# Saving experiments | |
f = plt.gcf() | |
elog.savefig(f, | |
exp='mymodel_accuracy_moredata', | |
train_set='1997-2015', | |
eval_set='2016-2017', | |
outcome='unknown', | |
description='Trained on 2011-2013, eval on 2010.') | |
# retrieving experiments | |
f = elog.getfig('mymodel_accuracy_moredata') | |
# Rendering inline table (with inline figure images) in Jupyter/Colab. | |
elog.html(elog._df) | |
""" | |
import os | |
import pickle | |
import datetime | |
import pandas as pd | |
import glob | |
from typing import Text | |
from IPython.core.display import HTML | |
# Root dir contains a set of projects. Root dir must be on same path as | |
# jupyter server or else HTML wont render correctly. | |
_root_dir ='experiment_logs' | |
_df = pd.DataFrame() | |
_default_values = {'root_dir': _root_dir} | |
# Reload the default dataframe. | |
_df_path = os.path.join(_default_values['root_dir'], 'experiment_df') | |
if os.path.exists(_df_path): | |
_df = pd.read_pickle(_df_path) | |
def set_project(project_name: Text): | |
_default_values['project_name'] = project_name | |
def savefig(fig, exp: Text, **kwargs): | |
# Saves thumbnail and figure to folder, and metadata to DataFrame. | |
global _df | |
d = _default_values | |
d.update(kwargs) | |
date = datetime.date.today().strftime('%Y%m%d') | |
dirname = os.path.join(d['root_dir'], d['project_name'], date) | |
os.makedirs(dirname, exist_ok=True) | |
# pkl already exists, timestamp a new one. | |
exp += '_%d' % int(datetime.datetime.timestamp(datetime.datetime.now())) | |
path = os.path.join(dirname, exp + '.pkl') | |
# Save figure and data. | |
pickle.dump(fig, open(path, 'wb')) | |
# Save rasterized image for convenient inline display. | |
image_path = os.path.join(dirname, exp + '.png') | |
fig.savefig(image_path, bbox_inches='tight') | |
# Update with metadata. | |
d['timestamp'] = [datetime.datetime.utcnow()] | |
d['image'] = image_path | |
row = pd.DataFrame(d) | |
# Reload pd from root dir. | |
df_path = os.path.join(_default_values['root_dir'], 'experiment_df') | |
if os.path.exists(df_path): | |
_df = pd.read_pickle(df_path) | |
_df = _df.append(row) | |
_df.to_pickle(df_path) | |
def undo(n=1): | |
# Removes the last experiment. | |
global _df | |
_df = _df.head(-n) | |
df_path = os.path.join(_default_values['root_dir'], 'experiment_df') | |
_df.to_pickle(df_path) | |
def path_to_image_html(path): | |
# Convert path to relative path. | |
relpath = os.path.relpath(path) | |
if relpath.startswith('..'): | |
print('Warning: Jupyter server needs to be started in parent to the elog root dir.') | |
return '<img src="'+ relpath + '" width="200" >' | |
def html(df): | |
# Renders the HTML for quickly viewing experiments. | |
return HTML(df.to_html(escape=False ,formatters=dict(image=path_to_image_html))) | |
def getfig(f, exp: Text, **kwargs): | |
# Retrieve a fig from storage. | |
d = _default_values | |
d.update(kwargs) | |
project_dir = os.path.join(d['root_dir'], d['project_name']) | |
if not os.path.exists(project_dir): | |
print('Project %s does not exist' % d['project_name']) | |
# Scan across all dates to find the experiment. | |
pattern = os.path.join(project_dir, '*', exp + '.pkl') | |
matches = sorted(glob.glob(pattern)) | |
if len(matches) == 1: | |
path = matches[-1] | |
print(path) | |
if d.get('latest') and matches: | |
path = matches[-1] | |
print(path) | |
elif len(matches) > 1: | |
print('Multiple matching experiments. Specify date= attribute.') | |
for p in matches: | |
print(matches) | |
return | |
else: | |
print('Could not find project.') | |
return | |
return pickle.load(open(path, 'rb')) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment