Skip to content

Instantly share code, notes, and snippets.

@mtholder
mtholder / count_species_rank_binomen.py
Created May 6, 2017 15:43
count species ranked taxa in OTT with two word names from the output of the otcetera command in https://gist.github.com/mtholder/0ab3ef34ac96d4eec38fcfbefd4e66cc
#!/usr/bin/env python
import sys
inp = sys.stdin
count = 0
for line in inp:
ls = [i.strip() for i in line.split(' |')]
if ls:
assert len(ls) == 3 and ls[2] == ''
rank = ls[0]
if rank == 'species':
@mtholder
mtholder / count_species_rank_binomen.sh
Created May 6, 2017 14:34
count_species_rank_binomen.sh from binomen
#!/bin/bash
OTT_DIR=$1
ROOT_TAXON=$2
echo "Extracting taxa that are descendants of ${ROOT_TAXON}"
otc-taxonomy-parser \
$OTT_DIR \
--format="%R | %N |" \
-r $ROOT_TAXON \
--cull-flags "major_rank_conflict,major_rank_conflict_inherited,environmental,viral,barren,not_otu,hidden,was_container,inconsistent,hybrid,merged" \
d = read.table('times_num_studies.tsv', header=TRUE, sep="\t");
print(d);
dt = as.POSIXct(d$time, origin="1970-01-01");
print(dt);
pdf("ot-time-series.pdf");
plot(dt, d$num_phyle_studies,
xlab="Date", ylab="num studies",
ylim=c(0, max(d$num_phyle_studies)),
type="l",
#!/usr/bin/env python
import sys
import json
import datetime
synth_j, unix_ts_and_num_studies = sys.argv[1:]
ts_ns = []
with open(unix_ts_and_num_studies, 'r') as inp:
for line in inp:
ls = line.strip().split()
if ls:
@mtholder
mtholder / num-studies-in-phylesystem.bash
Created May 2, 2017 14:09
get the unix timestamps (seconds since start of epoch) for every commit in phylesystem-1 that changes the # of studies
#!/bin/bash
git rev-list HEAD > commits.txt
prev_ns=0
for sha in $(cat commits.txt)
do
unix_time=$(git show -s --format=%at "${sha}")
git checkout $sha || exit
num_studies=$(find study -name "*.json" | wc -l)
if test $num_studies -ne $prev_ns
then
@mtholder
mtholder / find-taxonomy-only-tips.sh
Created April 23, 2017 14:52
finds taxonomy only tips in the open tree synth. companion to https://gist.github.com/mtholder/924dd1da9e6bc2775d2d6fe88acade8f
#!/bin/bash
# works on the unpacked dir that is posted under the link
# text "All pipeline outputs " on the release page.
# E.g. the release page https://tree.opentreeoflife.org/about/synthesis-release/v9.1
# points to http://files.opentreeoflife.org/synthesis/opentree9.1/opentree9.1_output.tgz
# as its versions of "All pipeline outputs"
#
# I downloaded that, unpacked it, and provide the path to that dir as the only
# argument to this script.
@mtholder
mtholder / parsimony-informative-check.tex
Created April 19, 2017 19:20
description of an algorithm for detecting whether or not a character pattern is parsimony informative when there are ambiguously scored OTUs
\documentclass{article}
\usepackage{amsmath}
\usepackage{paralist}
\DeclareMathOperator*{\argmin}{argmin}
\begin{document}
\section*{Note on detecting whether a pattern is parsimony informative}
Let $\ell(T, c)$ be the unordered parsimony length of discrete character pattern $c$ on
tree $T$ and $\ell(T, c, i)$ be the parsimony length that combination of pattern and tree
when state $i$ is enforced as the state for the root of the tree.
Data pattern $c$ is ``parsimony informative'' if and only if,
@mtholder
mtholder / find-taxonomy-only-from-annotations.py
Created April 17, 2017 16:50
reads an opentree summary tree annotated_supertree/annotations.json file and reports on the taxonomy-only nodes
#!/usr/bin/env python
import sys
try:
annot_fp = sys.argv[1]
except:
sys.exit("Expecting a path to an annotations.json file as the only argument.\n")
import json
import codecs
with codecs.open(annot_fp, 'rU', encoding='utf-8') as inp:
obj = json.load(inp)
@mtholder
mtholder / synth-tree-from-scratch.sh
Created September 21, 2016 23:23
overwrites your ~/.opentree and builds a tree in ~/OpenTree/
#!/bin/bash
cd ~
mkdir OpenTree
cd OpenTree/
git clone https://github.com/mtholder/propinquity.git
echo '[opentree]' > ~/.opentree
echo "home = $PWD" >> ~/.opentree
echo 'peyotl = %(home)s/peyotl' >> ~/.opentree
@mtholder
mtholder / print_nexml_keys_from_local.py
Created August 24, 2016 14:05
print keys in the nexml elelment of a nexson using a local phyleystemapi object
#!/usr/bin/env python
from __future__ import print_function
from peyotl.api import PhylesystemAPI
import sys
studyid = sys.argv[1]
phy = PhylesystemAPI(get_from='local')
nx = phy.get_study(studyid)['data']['nexml']
print(nx.keys())