Skip to content

Instantly share code, notes, and snippets.

View genomewalker's full-sized avatar

Antonio Fernandez-Guerra genomewalker

View GitHub Profile
#Read a hist file from bedtools coverage -hist and create a coverage per orf file.
#Coverage per each gene is calculated as: coverage_orfA = sum(depth_of_coverage * fraction_covered)
{
if ($0 i ~ /^all/){
next;
}else{
split($4,a,"_");
b=$1"_"$6"_"$2+1"_"$3"_orf-"a[2]"\t"$13;
c[b]=c[b] + ($11*$14)
}
#!/bin/bash
# We need one fasta file export with the aligned sequences and another with the SS SAI filter
ALIGN=${1}
OFILE=${1/.fasta/.stk}
# Creating the first part of the file
# we need to linearize the fasta file
awk '
BEGIN{
515F GTGCCAGCMGCCGCGGTAA
515F-Y GTGYCAGCMGCCGCGGTAA
515YF GTGYCAGCMGCCGCGGTAA
341F CCTACGGGNGGCWGCAG
U341F CCTAYGGGRBGCASCAG
Eu565F CCAGCASCYGCGGTAATTCC
B806R GGACTACNVGGGTWTCTAAT
806R GGACTACHVGGGTWTCTAAT
926R CCGYCAATTYMTTTRAGTTT
906R-jed CCGYCAATTYMTTTRAGTTT
@genomewalker
genomewalker / source_rmd.R
Created February 20, 2017 10:24 — forked from noamross/source_rmd.R
Source an RMD file
#' Source the R code from an knitr file, optionally skipping plots
#'
#' @param file the knitr file to source
#' @param skip_plots whether to make plots. If TRUE (default) sets a null graphics device
#'
#' @return This function is called for its side effects
#' @export
source_rmd = function(file, skip_plots = TRUE) {
temp = tempfile(fileext=".R")
knitr::purl(file, output=temp)
@genomewalker
genomewalker / example_workflow.sh
Last active September 27, 2017 13:13
Tiny script to create aminoacid sequences with different types of mutations
# Example for N. maritimus
# First we generate a set of mutants
python generate_mutants.py -g GCF_000018465.1_ASM1846v1_protein.faa -n 3 -o nmarit
# We label our seeds or references
awk '{if ($0 ~ /^>/) {print $1"_ref"}else{print $0}}' ../GCF_000018465.1_ASM1846v1_protein.faa > nmarit_aa_ref.fasta
# Concatenate mutants and references
cat nmar*sub.fasta nmar*ins.fasta nmar*del.fasta nmar*all.fasta nmarit_aa_ref.fasta > nmarit_all_aa.fasta
@genomewalker
genomewalker / sqlite_blob.r
Last active September 22, 2017 06:37 — forked from jfaganUK/sqlite_blob.r
Storing R Objects as a SQLite Blob
#' ## Storing R Objects in a SQLite Database
#' Two packages we are using. The first is the ```RSQLite``` which will be used to create and manage an in-memory SQLite database. The second is ```igraph``` which I will use to create and visualize a random network. Some of the work I do is on network simulation. I often don't know the metrics I need from a simulated network when it's created, so I want to be able to store the networks that are created so that I can go back later and analyze them.
library(RSQLite)
library(igraph)
#' Create a database in memory.
con <- dbConnect(SQLite(), ":memory:")
#' The table has two columns, an *id* column and a column called *graph* which is a **blob** type. This type just stores binary data.
@genomewalker
genomewalker / otu_table_analysis.r
Last active October 2, 2017 16:05
Scripts for analysing original alma data set
# UCLUST workflow ---------------------------------------------------------
library(tidyverse)
library(cowplot)
library(ggpubr)
setwd("~/Downloads/alma_original_test")
uclust_df <- read_tsv("16s.insilico_mock.tsv", col_names = T)
uclust_16S_summary <- uclust_df%>% dplyr::rename(otu_name = `#OTU ID`) %>%
big_dada <- function(X){
cat("Processing:", X, "\n")
cat("Derep F:", X, "\n")
derepF <- derepFastq(filtpathF[[X]])
cat("Inferring F:", X, "\n")
ddF <- dada(derepF, err=errF, multithread=TRUE)
cat("Derep R:", X, "\n")
derepR <- derepFastq(filtpathR[[X]])
cat("Inferring R:", X, "\n")
ddR <- dada(derepR, err=errR, multithread=TRUE)
mmseqs concatdbs /PROCESSING/OSD/UNKNOWNS/MMSEQ_MPI/unkdb_update_hmp/unkprot_db \
/PROCESSING/OSD/UNKNOWNS/MMSEQ_MPI/unkdb_update_hmp/HMPSOAP_db \
/PROCESSING/OSD/UNKNOWNS/MMSEQ_MPI/unkdb_update_hmp/unkprot_db.withNewSequences
mmseqs concatdbs /PROCESSING/OSD/UNKNOWNS/MMSEQ_MPI/unkdb_update_hmp/unkprot_db_h \
/PROCESSING/OSD/UNKNOWNS/MMSEQ_MPI/unkdb_update_hmp/HMPSOAP_db_h \
/PROCESSING/OSD/UNKNOWNS/MMSEQ_MPI/unkdb_update_hmp/unkprot_db.withNewSequences_h
mawk '{print NR"\t"$1}' unkprot_db.withNewSequences_h | sed 's/\x0//g' > unkprot_db.withNewSequences.lookup
#!/bin/bash -l
#$ -cwd
#$ -j y
#$ -t 1-10:1
#$ -tc 25
#$ -N hmm_unk
#$ -pe threaded 2
#$ -V
# Where the models are