Skip to content

Instantly share code, notes, and snippets.

View dmyersturnbull's full-sized avatar

Douglas Myers-Turnbull dmyersturnbull

  • Stanford University
  • Stanford, CA
View GitHub Profile
@dmyersturnbull
dmyersturnbull / prettify_plot.py
Last active November 21, 2016 23:20
Plot something the way I prefer it.
# Douglas Myers-Turnbull wrote this for the Kokel Lab, which has released it under the Apache Software License, Version 2.0
# See the license file here: https://gist.github.com/dmyersturnbull/bfa1c3371e7449db553aaa1e7cd3cac1
# The list of copyright owners is unknown
from typing import Callable, Optional, Tuple, Iterable
from matplotlib.axes import SubplotBase
import seaborn as sns
def prettify_plot(plot_fn: Callable[[], SubplotBase],
style :str='whitegrid',
@dmyersturnbull
dmyersturnbull / sliding_window.py
Last active June 21, 2016 03:08
Quickly calculate a sliding window of a Numpy array.
import numpy as np
def sliding_window(x: np.ndarray, n: int) -> np.ndarray:
"""Returns a sliding window of n elements from x.
Raises a ValueError of n > len(x).
"""
if n > len(x): raise ValueError("N must be less than the array length")
# Courtesy of https://stackoverflow.com/questions/13728392/moving-average-or-running-mean
return np.convolve(x, np.ones((n,)) / n, mode='valid')
@dmyersturnbull
dmyersturnbull / interpolation_experiment.jl
Created June 14, 2016 01:23
Julia string interpolation security experiments.
# This prints thisismypassword:
password = "thisismypassword"
function display_post(post_text)
println("<p>$post_text</p>")
end
display_post("$pass" * "word")
# This doesn't when given $password (escaped) as an argument:
display_post(ARGS[1])
@dmyersturnbull
dmyersturnbull / search_hgnc.py
Last active April 19, 2016 20:48
Search HGNC when you can't remember the official gene symbol.
from http_get import http_get # uses https://gist.github.com/dmyersturnbull/fade1a5901beeb1003680f8267454640
from typing import Mapping, Union, Iterable
import json
searchable_fields = {'alias_name', 'alias_symbol', 'ccds_id', 'ena', 'ensemble_gene_id',
'entrez_id', 'hgnc_id', 'locus_group', 'locus_type', 'mgd_id',
'name', 'prev_name', 'prev_symbol', 'refseq_accession', 'rgd_id',
'status', 'symbol', 'ucsc_id', 'uniprot_ids', 'vega_id'}
@dmyersturnbull
dmyersturnbull / srr_quake.py
Last active January 20, 2017 04:12
Extract metadata from Gene Expression Omnibus and other NCBI resources to match up identifiers, etc. For PMID:26060301.
# Douglas Myers-Turnbull wrote this while at UCSF. Because of this, the list of copyright owners is unknown and is not licensed (sorry!).
from dl_and_rezip import dl_and_rezip # see https://gist.github.com/dmyersturnbull/a6591676fc98da355c5250d48e26844e
from lines import lines
from typing import Mapping, Iterable, Optional, Iterator, Callable
import os
import warnings
import pandas as pd
import re
@dmyersturnbull
dmyersturnbull / tissue_expression_level.py
Last active April 19, 2016 21:59
Display per-tissue or per-cell type gene expression data from the Human Protein Atlas.
from typing import Callable
import pandas as pd
from dl_and_rezip import dl_and_rezip # see https://gist.github.com/dmyersturnbull/a6591676fc98da355c5250d48e26844e
def _load(filter_fn: Callable[[pd.DataFrame], pd.DataFrame]=pd.DataFrame.dropna) -> pd.DataFrame:
"""Get a DataFrame of Human Protein Atlas tissue expression data, indexed by Gene name and with the 'Gene' and 'Reliability' columns dropped.
The expression level ('Level') is replaced using this map: {'Not detected': 0, 'Low': 1, 'Medium': 2, 'High': 3}.
Downloads the file from http://www.proteinatlas.org/download/normal_tissue.csv.zip and reloads from normal_tissue.csv.gz thereafter.
@dmyersturnbull
dmyersturnbull / MinimalScalaCheck.scala
Last active April 3, 2016 04:59
A tiny example of using ScalaCheck and ScalaTest together.
import org.scalacheck.Gen
import org.scalatest.{PropSpec, Matchers}
import org.scalatest.prop.PropertyChecks
class MinimalScalaCheckExample extends PropSpec with PropertyChecks with Matchers {
property("A string's length should be constant") {
forAll { (s: String) =>
s.length should equal(s.length)
}
}
@dmyersturnbull
dmyersturnbull / Paddable.scala
Last active May 23, 2016 17:47
Left- and right- pad a string with ^ and $, for rare cases where it makes operations much more clear.
private implicit class Paddable(string: String) {
/** Left-pad this string. */
def ^(end: Int): String = " " * (end - string.length) + string
/** Right-pad this string. */
def $(end: Int): String = string + " " * (end - string.length)
}
@dmyersturnbull
dmyersturnbull / matlab_to_hdf5.jl
Created April 2, 2016 02:33
Convert a proprietary .mat file to a more modern HDF5-compatible version 7 .mat file.
using MAT
using Lumberjack
@doc """
Using MAT.jl, converts any MATLAB version >=5 .mat file to an HDF5-compatible MATLAB version 7 .mat file.
Warns if the file already exists.
""" ->
function convert_to_matlab7(input_file:: AbstractString, output_file:: AbstractString)
if ispath(output_file)
warn("File $output_file already exists")
@dmyersturnbull
dmyersturnbull / lines.py
Last active July 20, 2016 18:04
Lazily read a text file, gunzip based on filename extension, and return newline-stripped lines.
import gzip, io
from typing import Iterator
def lines(file_name: str, known_encoding='utf-8') -> Iterator[str]:
"""Lazily read a text file or gzipped text file, decode, and strip any newline character (\n or \r).
If the file name ends with '.gz' or '.gzip', assumes the file is Gzipped.
Arguments:
known_encoding: Applied only when decoding gzip
"""
if file_name.endswith('.gz') or file_name.endswith('.gzip'):