Philipp Münch philippmuench

Staff Scientist Helmholtz Centre for Infection Research & PostDoc Harvard T.H. Chan School of Public Health

philippmuench / gist:b428c1c3ab0b255342011ff52b2daa96

Created April 4, 2025 10:25

check_for_exact_copies.r

	#!/usr/bin/env Rscript

	# Script to find exact matches between evaluation and training datasets
	# Identifies which samples from evaluation set are identical to samples in training set

	# Load necessary libraries
	if (!requireNamespace("Matrix", quietly = TRUE)) {
	install.packages("Matrix")
	}
	library(Matrix)

philippmuench / LLaMA.R

Created October 15, 2024 23:03 — forked from t-kalinowski/LLaMA.R

LLaMA implemented in R Tensorflow and Keras

	## Setup
	Sys.setenv(CUDA_VISIBLE_DEVICES='')
	options(tensorflow.extract.warn_tensors_passed_asis = FALSE)

	library(dplyr, warn.conflicts = FALSE)
	library(purrr)
	library(glue)
	library(envir)

	library(tensorflow)

philippmuench / download_ref_from_list.py

Created October 14, 2024 12:21

download genomes from NCBI

	import os
	import hashlib
	from pathlib import Path
	import requests
	import logging
	from colorama import Fore, Style, init
	import gzip
	import shutil
	import time
	import random

philippmuench / Snakefile

Last active September 3, 2024 10:25

Snakemake file for gene list generation for Muench et all, 2024 manuscript

	import os
	import random

	# Get a list of all FASTA files in the bacdive_gff folder
	FASTA_FILES, = glob_wildcards("fasta/{fasta_file}.fasta")

	rule all:
	input:
	expand("gff/{fasta_file}.gff", fasta_file=FASTA_FILES),
	expand("reformatted_gff_shuffled/{fasta_file}.gff", fasta_file=FASTA_FILES),

philippmuench / check_fasta.py

Created July 13, 2023 14:06

check for malformed file

philippmuench / gist:5a03cccfd472cb33b2a3a12058b21277

Created March 16, 2022 22:31

shiny CRISPR

philippmuench / gist:46edf41b389da1882d5fa9338648ec51

Created August 19, 2021 19:02

flexdashboard hd5

	---
	title: "GenomeNet Viewer"
	output:
	flexdashboard::flex_dashboard:
	orientation: rows
	social: menu
	theme: united #cerulean
	source_code: embed
	runtime: shiny
	---

philippmuench / gist:6b9bbb9f9f987ab22efb573f9f19160f

Created August 3, 2020 13:39

train for wavenet binary target

	#' @title Trains a (mostly) LSTM model on genomic data. Designed for developing genome based language models (GenomeNet)
	#'
	#' @description
	#' Depth and number of neurons per layer of the netwok can be specified. First layer can be a Convolutional Neural Network (CNN) that is designed to capture codons.
	#' If a path to a folder where FASTA files are located is provided, batches will ge generated using an external generator which
	#' is recommended for big training sets. Alternative, a dataset can be supplied that holds the preprocessed batches (generated by \code{preprocessSemiRedundant()})
	#' and keeps them in RAM. Supports also training on instances with multiple GPUs and scales linear with number of GPUs present.
	#' @param train_type Either "lm" for language model, "label_header" or "label_folder". Language model is trained to predict next character in sequence.
	#' label_header/label_folder are trained to predict a corresponding class, given a sequence as input. If "label_header", class will be read from f

philippmuench / gist:4832a25f8a2693b90e7a9e96edfa9bfd

Last active December 23, 2019 16:51

WaveNet genomic

	trainMinimalFunctionalAPI <- function(path = "example_files/fasta") {
	library(wavenet)
	message("Initialize model! This can take a few minutes.")

	maxlen <- 1000
	input <- keras::layer_input(batch_shape = c(64, maxlen, 6))


	# https://github.com/ibab/tensorflow-wavenet/blob/master/wavenet/ops.py#L46
	first <- keras::layer_conv_1d(

philippmuench / gist:f8c41263647b175cf0b501539c32a959

Created December 23, 2019 15:46

functional api

	trainMinimalFunctionalAPI <- function(path = "example_files/fasta") {

	message("Initialize model! This can take a few minutes.")

	input <- keras::layer_input(batch_shape = c(256, 50, 6))

	cnn <-
	keras::layer_conv_1d(
	object = input,
	kernel_size = 3,