Skip to content

Instantly share code, notes, and snippets.

View dmyersturnbull's full-sized avatar

Douglas Myers-Turnbull dmyersturnbull

  • Stanford University
  • Stanford, CA
View GitHub Profile
@dmyersturnbull
dmyersturnbull / find_only_file_matching.py
Last active November 21, 2016 23:19
Find a unique file in a directory.
# Douglas Myers-Turnbull wrote this for the Kokel Lab, which has released it under the Apache Software License, Version 2.0
# See the license file here: https://gist.github.com/dmyersturnbull/bfa1c3371e7449db553aaa1e7cd3cac1
# The list of copyright owners is unknown
from .scan_for_files import scan_for_files # see https://gist.github.com/dmyersturnbull/80845ba9ebab2da83963
from typing import Callable, Iterator
def find_only_file_matching(directory: str, matcher: Callable[[str], bool], file_iterator: Callable[[str], Iterator[str]]=scan_for_files) -> str:
"""Returns the full path of the matching file and raises an exception if none are found or more than 1 is found."""
file = None
@dmyersturnbull
dmyersturnbull / centeredness.py
Last active November 21, 2016 23:19
A measure of how much the data is concentrated toward the center of a multidimensional array
# Douglas Myers-Turnbull wrote this for the Kokel Lab, which has released it under the Apache Software License, Version 2.0
# See the license file here: https://gist.github.com/dmyersturnbull/bfa1c3371e7449db553aaa1e7cd3cac1
# The list of copyright owners is unknown
import numpy as np
def centeredness(data: np.ndarray) -> np.float64:
"""A measure of how much the data is concentrated toward the center of a multidimensional array.
Could probably be defined better. centeredness(arr) == centeredness(arr * 100),
but centeredness values for arrays of different sizes are not comparable.
@dmyersturnbull
dmyersturnbull / ffmpeg_tools.py
Last active November 21, 2016 23:19
Run ffmpeg to generate a libx264 .mp4.
# Douglas Myers-Turnbull wrote this for the Kokel Lab, which has released it under the Apache Software License, Version 2.0
# See the license file here: https://gist.github.com/dmyersturnbull/bfa1c3371e7449db553aaa1e7cd3cac1
# The list of copyright owners is unknown
from plumbum import local
import os
def make_video_from_frames(input_dir: str, video_path: str, framerate: str='10/1', ffmpeg_path: str='ffmpeg', input_image_extension: str='.jpg') -> None:
"""Runs ffmpeg on all selected files in a directory to generates an x264-encoded video in an MP4 container.
Warnings:
@dmyersturnbull
dmyersturnbull / bootstrap_subtract.py
Last active November 21, 2016 23:19
Bootstrap mean(X) - mean(Y) from a Pandas DataFrame.
# Douglas Myers-Turnbull wrote this for the Kokel Lab, which has released it under the Apache Software License, Version 2.0
# See the license file here: https://gist.github.com/dmyersturnbull/bfa1c3371e7449db553aaa1e7cd3cac1
# The list of copyright owners is unknown
import pandas as pd
import numpy as np
from typing import Optional
def bootstrap_subtract(X: pd.DataFrame, Y: pd.DataFrame, n_bootstrap_samples: int=200,
random_seed: Optional[int]=None) -> pd.DataFrame:
@dmyersturnbull
dmyersturnbull / scantree.py
Last active July 14, 2016 00:38
List the full path of every meaningful file in a directory recursively.
import os
from typing import Iterator
def is_proper_file(path: str) -> bool:
name = os.path.split(path)[1]
return len(name) > 0 and name[0] not in {'.', '~', '_'}
def scantree(path: str, follow_symlinks: bool=False) -> Iterator[str]:
"""List the full path of every file not beginning with '.', '~', or '_' in a directory, recursively."""
for entry in os.scandir(path):
@dmyersturnbull
dmyersturnbull / exists.py
Created July 11, 2016 18:05
Efficient existential quantifier for a filter() predicate.
from typing import Callable, Generic, TypeVar, Iterable
T = TypeVar('T')
def exists(keep_predicate: Callable[[T], bool], seq: Iterable[T]) -> bool:
"""Efficient existential quantifier for a filter() predicate.
Returns true iff keep_predicate is true for one or more elements."""
for e in seq:
if keep_predicate(e): return True # short-circuit
return False
@dmyersturnbull
dmyersturnbull / atomic_task.py
Last active November 21, 2016 23:19
Luigi Task that can't be left in a partially completed state.
# Douglas Myers-Turnbull wrote this for the Kokel Lab, which has released it under the Apache Software License, Version 2.0
# See the license file here: https://gist.github.com/dmyersturnbull/bfa1c3371e7449db553aaa1e7cd3cac1
# The list of copyright owners is unknown
import luigi
from typing import Iterable
import warnings
from .hash_file_utils import * # seehttps://gist.github.com/dmyersturnbull/f0116c52feae3094f66b0c99b586d166
class AtomicTask(luigi.Task):
@dmyersturnbull
dmyersturnbull / hash_file_utils.py
Last active January 20, 2017 04:09
Add and check .sha1 files.
# Douglas Myers-Turnbull wrote this for the Kokel Lab, which has released it under the Apache Software License, Version 2.0
# See the license file here: https://gist.github.com/dmyersturnbull/bfa1c3371e7449db553aaa1e7cd3cac1
# The list of copyright owners is unknown
import hashlib
import os
import gzip
algorithm = hashlib.sha1
extension = '.sha1'
@dmyersturnbull
dmyersturnbull / timestamp.py
Last active August 1, 2016 18:16
YYYY-mm-dd_HH-MM-SS timestamp.
import datetime
def format_time(time: datetime.datetime) -> str:
"""Standard timestamp format. Ex: 2016-05-02_22_35_56."""
return time.strftime("%Y-%m-%d_%H-%M-%S")
def timestamp() -> str:
"""Standard timestamp of time now. Ex: 2016-05-02_22_35_56."""
return format_time(datetime.datetime.now())
@dmyersturnbull
dmyersturnbull / head.py
Last active July 6, 2016 23:05
Pretty-print the head of a Pandas table in a Jupyter notebook and show its dimensions. Allows showing multiple tables per cell.
from IPython.display import display, Markdown
import pandas as pd
def head(df: pd.DataFrame, n_rows:int=1) -> None:
"""Pretty-print the head of a Pandas table in a Jupyter notebook and show its dimensions."""
display(Markdown("**whole table (below):** {} rows × {} columns".format(len(df), len(df.columns))))
display(df.head(n_rows))