genomewalker’s gists

genomewalker / get_orf_coverage_from_bam-coAssembly.awk

Last active August 29, 2015 14:26

	#Read a hist file from bedtools coverage -hist and create a coverage per orf file.
	#Coverage per each gene is calculated as: coverage_orfA = sum(depth_of_coverage * fraction_covered)
	{
	if ($0 i ~ /^all/){
	next;
	}else{
	split($4,a,"_");
	b=$1"_"$6"_"$2+1"_"$3"_orf-"a[2]"\t"$13;
	c[b]=c[b] + ($11*$14)
	}

genomewalker / fa2sto.sh

Last active January 22, 2016 08:23

	#!/bin/bash

	# We need one fasta file export with the aligned sequences and another with the SS SAI filter
	ALIGN=${1}
	OFILE=${1/.fasta/.stk}

	# Creating the first part of the file
	# we need to linearize the fasta file
	awk '
	BEGIN{

genomewalker / OSD_primer_collection.txt

Last active April 5, 2016 08:30

genomewalker / source_rmd.R

Created February 20, 2017 10:24 — forked from noamross/source_rmd.R

Source an RMD file

	#' Source the R code from an knitr file, optionally skipping plots
	#'
	#' @param file the knitr file to source
	#' @param skip_plots whether to make plots. If TRUE (default) sets a null graphics device
	#'
	#' @return This function is called for its side effects
	#' @export
	source_rmd = function(file, skip_plots = TRUE) {
	temp = tempfile(fileext=".R")
	knitr::purl(file, output=temp)

genomewalker / example_workflow.sh

Last active September 27, 2017 13:13

Tiny script to create aminoacid sequences with different types of mutations

	# Example for N. maritimus
	# First we generate a set of mutants
	python generate_mutants.py -g GCF_000018465.1_ASM1846v1_protein.faa -n 3 -o nmarit

	# We label our seeds or references
	awk '{if ($0 ~ /^>/) {print $1"_ref"}else{print $0}}' ../GCF_000018465.1_ASM1846v1_protein.faa > nmarit_aa_ref.fasta

	# Concatenate mutants and references
	cat nmarsub.fasta nmarins.fasta nmardel.fasta nmarall.fasta nmarit_aa_ref.fasta > nmarit_all_aa.fasta

genomewalker / sqlite_blob.r

Last active September 22, 2017 06:37 — forked from jfaganUK/sqlite_blob.r

Storing R Objects as a SQLite Blob

	#' ## Storing R Objects in a SQLite Database

	#' Two packages we are using. The first is the ```RSQLite``` which will be used to create and manage an in-memory SQLite database. The second is ```igraph``` which I will use to create and visualize a random network. Some of the work I do is on network simulation. I often don't know the metrics I need from a simulated network when it's created, so I want to be able to store the networks that are created so that I can go back later and analyze them.
	library(RSQLite)
	library(igraph)

	#' Create a database in memory.
	con <- dbConnect(SQLite(), ":memory:")

	#' The table has two columns, an id column and a column called graph which is a blob type. This type just stores binary data.

genomewalker / otu_table_analysis.r

Last active October 2, 2017 16:05

Scripts for analysing original alma data set

	# UCLUST workflow ---------------------------------------------------------
	library(tidyverse)
	library(cowplot)
	library(ggpubr)

	setwd("~/Downloads/alma_original_test")

	uclust_df <- read_tsv("16s.insilico_mock.tsv", col_names = T)

	uclust_16S_summary <- uclust_df%>% dplyr::rename(otu_name = `#OTU ID`) %>%

genomewalker / big_dada.r

Created October 12, 2017 09:27

	big_dada <- function(X){
	cat("Processing:", X, "\n")
	cat("Derep F:", X, "\n")
	derepF <- derepFastq(filtpathF[[X]])
	cat("Inferring F:", X, "\n")
	ddF <- dada(derepF, err=errF, multithread=TRUE)
	cat("Derep R:", X, "\n")
	derepR <- derepFastq(filtpathR[[X]])
	cat("Inferring R:", X, "\n")
	ddR <- dada(derepR, err=errR, multithread=TRUE)

genomewalker / update_unk.sh

Last active October 31, 2017 16:44

	mmseqs concatdbs /PROCESSING/OSD/UNKNOWNS/MMSEQ_MPI/unkdb_update_hmp/unkprot_db \
	/PROCESSING/OSD/UNKNOWNS/MMSEQ_MPI/unkdb_update_hmp/HMPSOAP_db \
	/PROCESSING/OSD/UNKNOWNS/MMSEQ_MPI/unkdb_update_hmp/unkprot_db.withNewSequences

	mmseqs concatdbs /PROCESSING/OSD/UNKNOWNS/MMSEQ_MPI/unkdb_update_hmp/unkprot_db_h \
	/PROCESSING/OSD/UNKNOWNS/MMSEQ_MPI/unkdb_update_hmp/HMPSOAP_db_h \
	/PROCESSING/OSD/UNKNOWNS/MMSEQ_MPI/unkdb_update_hmp/unkprot_db.withNewSequences_h

	mawk '{print NR"\t"$1}' unkprot_db.withNewSequences_h \| sed 's/\x0//g' > unkprot_db.withNewSequences.lookup

genomewalker / sge_hmm.sh

Created November 2, 2017 22:56

Antonio Fernandez-Guerra genomewalker