Skip to content

Instantly share code, notes, and snippets.

View philippmuench's full-sized avatar

Philipp Münch philippmuench

View GitHub Profile
The Human Microbiome Project Consortium
Curtis Huttenhower, Dirk Gevers, Rob Knight, Sahar Abubucker, Jonathan H. Badger, Asif T. Chinwalla, Heather H. Creasy, Ashlee M. Earl, Michael G. FitzGerald, Robert S. Fulton, Michelle G. Giglio, Kymberlie Hallsworth-Pepin, Elizabeth A. Lobos, Ramana Madupu, Vincent Magrini, John C. Martin, Makedonka Mitreva, Donna M. Muzny, Erica J. Sodergren, James Versalovic, Aye M. Wollam, Kim C. Worley, Jennifer R. Wortman, Sarah K. Young, Qiandong Zeng, Kjersti M. Aagaard, Olukemi O. Abolude, Emma Allen-Vercoe, Eric J. Alm, Lucia Alvarado, Gary L. Andersen, Scott Anderson, Elizabeth Appelbaum, Harindra M. Arachchi, Gary Armitage, Cesar A. Arze, Tulin Ayvaz, Carl C. Baker, Lisa Begg, Tsegahiwot Belachew, Veena Bhonagiri, Monika Bihan, Martin J. Blaser, Toby Bloom, Vivien Bonazzi, J. Paul Brooks, Gregory A. Buck, Christian J. Buhay, Dana A. Busam, Joseph L. Campbell, Shane R. Canon, Brandi L. Cantarel, Patrick S. G. Chain, I-Min A. Chen, Lei Chen, Shaila Chhibba, Ken Chu, Dawn M. C
remove.packages("deepG")
devtools::install_github("hiddengenome/deepG@prepro")
tensorflow::install_tensorflow(version="1.12.0-gpu", method = "conda", conda_python_version = "2.7")
library(deepG)
hist <- trainNetwork(path = "/scratch/pmuench/crispr_refseq/with_crispr", use.cudnn = F, use.codon.cnn = F, maxlen = 80, batch.size = 500, run.name= "CrisprNet_v1_no_cnn", epochs = 50, steps.per.epoch = 1000, layers.lstm = 3, max.queue.size = 100, dropout.rate = 0.15, layer.size = 5, vocabulary.size = 5)
# ValueError: No data provided for "lstm_input". Need data for each key in: ['lstm_input']
# requirement: anaconda
conda install virtualenv
python3 -m venv ~/deepG_env
source ~/deepG_env/bin/activate
# deepG is tested with tensorflow 1.14
pip3 install https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.14.0-cp37-cp37m-linux_x86_64.whl
pip3 install keras
# setup cuda, required is cuda10 and cudnn 7.4
conda install -c anaconda cudatoolkit
@philippmuench
philippmuench / install.sh
Last active August 12, 2019 18:21
installation of CRISPRCasFinder
wget https://crisprcas.i2bc.paris-saclay.fr/Home/DownloadFile?filename=CRISPRCasFinder.zip
unzip DownloadFile?filename=CRISPRCasFinder.zip
cd CRISPRCasFinder
# install perl modules
curl -L http://cpanmin.us | perl - App::cpanminus
cpanm --local-lib=~/perl5 local::lib && eval $(perl -I ~/perl5/lib/perl5/ -Mlocal::lib)
cpanm JSON::Parse
# install prodigal
@philippmuench
philippmuench / Snakefile
Last active March 24, 2022 03:58
pipeline OligoMM-Antibiotics
'''
oligomm_antibiotics.snakefile
Philipp Muench
Maps NGS data to OligoMM reference genomes and performs variant calling
-----------------------------------------------------------------------
Requirements:
samtools
@philippmuench
philippmuench / multiplicative_lstm.R
Created June 18, 2019 09:13 — forked from jjallaire/multiplicative_lstm.R
Custom Multiplicative LSTM Layer for R Keras
library(keras)
library(reticulate)
layer_multiplicative_lstm <-function(
object, units, activation = "tanh", recurrent_activation = "hard_sigmoid", use_bias = TRUE,
return_sequences = FALSE, return_state = FALSE, go_backwards = FALSE, stateful = FALSE, unroll = FALSE,
kernel_initializer = "glorot_uniform", recurrent_initializer = "orthogonal", bias_initializer = "zeros",
unit_forget_bias = TRUE, kernel_regularizer = NULL, recurrent_regularizer = NULL, bias_regularizer = NULL,
@philippmuench
philippmuench / gist:59cbbd304619df1bcbf1835c1b771247
Created May 27, 2019 14:18
code for rebuttal of reviewer comments, additional Bayes analysis
n1 = 9196 # total number of lysis positive genomes
y1 = 830 # number of lyssis positive genemes among colicin B positive genomes
n2 = 303 # total number of lysis negative genomes
y2 = 20 # lysis negative genomes among colicin B positive genomes
# SIMULATION
I = 10000 # simulations
theta1 = rbeta(I, y1 + 1, (n1 - y1) + 1)
theta2 = rbeta(I, y2 + 1, (n2 - y2) + 1)
diff = theta1 - theta2 # simulated diffs
@philippmuench
philippmuench / run.R
Last active March 7, 2019 12:25
biomartr Linux test
install.packages("BiocManager")
BiocManager::install("Biostrings", version = "3.8")
BiocManager::install("biomaRt", version = "3.8")
install.packages("biomartr", dependencies = TRUE)
library(biomartr)
getGenome(db = "genbank", organism = "GCA_003138775.1", reference = FALSE)
@philippmuench
philippmuench / barplot.R
Created March 6, 2018 11:11
script to generate stacked barplot for community
# cleanup
rm(list=ls())
# load packages
require(vegan)
require(pander)
require(ggplot2)
require(ape)
library(RColorBrewer)
@philippmuench
philippmuench / get_proteins.sh
Last active March 6, 2018 11:08
shell script to download all proteins from taxids listed in intersection_ids.txt
#! /bin/sh
rm -rf log.txt # file where the taxid of skipped taxa will written to
max_treshold=20000
mkdir -p out
while read line; do
pyla_name=$(echo $line | awk -F';' '{print $1}' | tr -s ' ' | tr ' ' '_')
txid=$(echo $line | awk -F';' '{print $2}')
echo "processing $pyla_name"
num_found=$(esearch -db protein -query "txid$txid[Organism:exp]"\