Skip to content

Instantly share code, notes, and snippets.

View chasemc's full-sized avatar
:octocat:

Chase Clark chasemc

:octocat:
View GitHub Profile
@chasemc
chasemc / parse_domtblout.R
Created September 3, 2021 17:43
Parse HMMER domtblout in R, no dependencies
parse_domtblout_file <- function(dom){
dom = readLines(dom)
dom <- dom[!grepl("^#", dom)]
dom <- lapply(strsplit(dom, "[ ]{1,}"),
function(x){
parsed <- c(x[1:22], paste0(x[23:length(x)], collapse = " "))
as.data.frame(t(parsed))
})
dom <- do.call(rbind, dom)
colnames(dom) <- c("target_name",
library(data.table)
library(ggplot2)
library(gganimate)
population <- fread("/Users/chase/Downloads/csvData.csv") # https://worldpopulationreview.com/states
population <- population[, c("State", "Pop")]
colnames(population) <- c("state", "population")
covid <- "https://raw.githubusercontent.com/nytimes/covid-19-data/master/rolling-averages/us-states.csv"
covid <- fread(covid)
@chasemc
chasemc / parse_execution_trace.R
Created August 16, 2021 13:47
Parse nextflow execution_trace file
library(ggplot2)
execution_trace <- '/home/chase/Documents/pipeline_info/execution_trace_2021-08-14_09-52-45.txt'
execution_trace <- read.delim(execution_trace, sep = "\t")
process_names <- sapply(strsplit(execution_trace$name, " "), function(x) x[[1]])
split_names <- strsplit(process_names, ":")
max_len <- max(lengths(split_names))
@chasemc
chasemc / read_assembly.r
Created May 14, 2021 21:27
Read NCBI assembly info from R
assembly_summary_refseq <- readr::read_delim("https://ftp.ncbi.nih.gov/genomes/ASSEMBLY_REPORTS/assembly_summary_refseq.txt", quote='', skip = 2, delim = "\t", col_names = c("assembly_accession", "bioproject", "biosample", "wgs_master", "refseq_category", "taxid", "species_taxid", "organism_name", "infraspecific_name", "isolate", "version_status", "assembly_level", "release_type", "genome_rep", "seq_rel_date", "asm_name", "submitter", "gbrs_paired_asm", "paired_asm_comp", "ftp_path", "excluded_from_refseq", "relation_to_type_material"))
@chasemc
chasemc / md5_as_filename.sh
Created April 15, 2021 12:20
Rename files by md5 and chosen extension
#!/usr/bin/bash
# $1 is the file(s') name to find and hash
# $2 is the extension to be given to each renamed file
find $1 -print0 | xargs -0 md5sum |
while read -r newname oldname; do
mv -v "$oldname" "$newname".$2
done
#!/usr/bin/env Rscript
args = commandArgs(trailingOnly=TRUE)
message("Installing necessary libraries if not already installed")
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
if (!requireNamespace("mzR", quietly = TRUE))
install.packages("mzR")
if (!requireNamespace("data.table", quietly = TRUE))
install.packages("data.table")
@chasemc
chasemc / ga_and_len.sh
Created February 21, 2021 18:56
Extract genomic_accessions and lengths from "ftp.ncbi.nlm.nih.gov/genomes............._assembly_report.txt"
#!/usr/bin/bash
curl -s $1 |\
sed -ne '/# Sequence-Name\tSequence-Role\tAssigned-Molecule\tAssigned-Molecule-Location\/Type\tGenBank-Accn\tRelationship\tRefSeq-Accn\tAssembly-Unit\tSequence-Length\tUCSC-style-name/,$ p' |\
awk -F"\t" 'NR==1 {for (i=1; i<=NF; i++) {f[$i] = i}}{ print $(f["RefSeq-Accn"]), $(f["Sequence-Length"])}' |\
sed 1d
https://github.com/Micromeda/InterProScan-Docker/blob/master/LICENSE
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
@chasemc
chasemc / antismash_get_cds.sh
Created January 28, 2021 14:21
Get CDS IDs from all antismash genbank files in all subdirectories
find -name "*region*gbk" | xargs grep "CDS " -A3 | grep "/ID=" | cut -d'"' -f2
@chasemc
chasemc / _run.sh
Created January 24, 2021 19:14 — forked from jexp/_run.sh
Rendering large graphs with vivagraph.js, neo4j-javscript-driver (binary-bolt), meetup dataset and compiled runtime. Oh the joy :)
npm install neo4j-driver
node test-neo-driver.js