Skip to content

Instantly share code, notes, and snippets.

View sminot's full-sized avatar

Sam Minot sminot

View GitHub Profile
@sminot
sminot / convert_npz_to_tiff.py
Created November 8, 2024 17:58
Convert a numpy array (.npz) to TIFF
#!/usr/bin/env python3
import click
import tifffile
import numpy as np
def convert_npz_to_tiff(input_file, output_file, pixelsize=0.05):
data = np.load(input_file)
@sminot
sminot / download_refseq.sh
Created June 6, 2022 20:26
Download from NCBI RefSeq
#!/bin/bash
set -e
rsync \
-a \
-m \
--copy-links \
--recursive \
--times \
#!/usr/bin/env python3
import click
import os
import pandas as pd
# Set up the command line processor
@click.command()
@click.option("--details-hdf5", help="Provide the path to the geneshot output file containing all gene abundances (*.details.hdf5)")
@click.option("--gene-name", help="Name of the gene to extract", prompt="Gene Name")
@sminot
sminot / geneshot_extract_gene_annot.py
Created April 4, 2022 19:48
Extract the gene annotation table from the geneshot results HDF5
#!/usr/bin/env python3
import click
import os
import pandas as pd
# Set up the command line processor
@click.command()
@click.option("--input_fp", help="Provide the path to the geneshot output file containing all gene abundances (*.results.hdf5)")
def geneshot_extract_gene_annot(input_fp):
@sminot
sminot / print_nextflow_logs.sh
Created January 14, 2022 18:26
Print the log file from a Nextflow task using its short hash
#!/bin/bash
set -euo pipefail
WORK_DIR="${1}"
# Input is the short task id prefix
TASK_PREFIX="${2}"
# Get the complete suffix
@sminot
sminot / merge_metaphlan_tables.py
Created March 2, 2021 00:30
Merge data from multiple files containing MetaPhlAn2 outputs
#!/usr/bin/env python3
import os
import pandas as pd
import sys
def read_metaphlan(fp):
"""Function to read a single file with metaphlan2 results encoded as a TSV."""
@sminot
sminot / fetch_kegg_genes.py
Last active December 2, 2020 12:24
Fetch gene sequences from KEGG
#!/usr/bin/env python3
import requests
from random import shuffle
# Argument parsing code courtesy of @wasade
import click
# Function to fetch the amino acid sequence for a single gene
@sminot
sminot / parse_fasta.py
Created August 13, 2020 16:35
Parse FASTA
from functools import lru_cache
import os
import pandas as pd
@lru_cache(maxsize=16)
def read_fasta(fasta_fp):
assert os.path.exists(fasta_fp)
fasta = {}
header = None
from functools import lru_cache
import pandas as pd
@lru_cache(maxsize=128)
def parse_gff(gff_fp):
df = pd.read_csv(
gff_fp,
sep="\t",
comment="#",
header=None,
@sminot
sminot / print_cloudwatch_logs_aws_batch.py
Last active June 15, 2023 16:46
Print CloudWatch logs for an AWS Batch job
#!/usr/bin/env python3
import boto3
import argparse
from datetime import datetime
parser = argparse.ArgumentParser()
parser.add_argument("job_id")
# Add the arguments