This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Plot the distribution of CAG sizes | |
def plot_cag_size(hdf_fp, pdf=None, min_size=5, alpha=0.25): | |
cag_annot = pd.read_hdf(hdf_fp, "/annot/cag/all").set_index("CAG") | |
# Calculate the log10 size (number of genes per CAG) | |
cag_annot = cag_annot.assign( | |
size_log10 = cag_annot["size"].apply(np.log10) | |
) | |
# Filter by CAG size |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Plot the number of genes detected and the proportion of reads aligned | |
def plot_specimen_summary(hdf_fp, pdf=None, alpha = 0.85): | |
specimen_summary = pd.read_hdf(hdf_fp, "/summary/all").set_index("specimen") | |
specimen_summary = specimen_summary.assign( | |
prop_reads = specimen_summary["aligned_reads"] / specimen_summary["n_reads"] | |
) | |
for col_name, axis_title in [ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Read in data from the HDF store | |
assert os.path.exists(hdf_fp) | |
cag_annot = pd.read_hdf(hdf_fp, "/annot/cag/all").set_index("CAG") | |
cag_abund = pd.read_hdf(hdf_fp, "/abund/cag/wide").set_index("CAG") | |
corncob_df = pd.read_hdf(hdf_fp, "/stats/cag/corncob") | |
betta_df = pd.read_hdf(hdf_fp, "/stats/enrichment/betta") | |
manifest = pd.read_hdf(hdf_fp, "/manifest").set_index("specimen") | |
specimen_summary = pd.read_hdf(hdf_fp, "/summary/all").set_index("specimen") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from functools import lru_cache | |
from collections import defaultdict | |
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
from matplotlib.backends.backend_pdf import PdfPages | |
import matplotlib.patches as mpatches | |
from scipy import stats | |
import seaborn as sns | |
import os |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
jupyter nbconvert --ExecutePreprocessor.timeout=-1 --to notebook --inplace --execute NOTEBBOOK |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
paste -d '' <(bunzip2 -c Raw_Read2_Barcodes.fq.bz2 | awk '{if(NR % 4 == 2 || NR % 4 == 4){print}else{print ""}}') <(bunzip2 -c Raw_Read1.fq.bz2) | fastx_barcode_splitter.pl --bcfile barcodes.tsv --prefix PREFIX --suffix _R1.fastq --bol |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import io | |
import boto3 | |
import pandas as pd | |
def read_csv_from_s3(s3_url, s3=None, sep=","): | |
assert s3_url.startswith("s3://") | |
bucket_name, key_name = s3_url[5:].split("/", 1) | |
if s3 is None: | |
s3 = boto3.client('s3') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import boto3 | |
def aws_s3_ls(bucket, prefix): | |
conn = boto3.client('s3') | |
fps = [] | |
r = conn.list_objects_v2( | |
Bucket=bucket, | |
Prefix=prefix |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
"""Convert GlimmerHMM GFF3 gene predictions into protein sequences. | |
This works with the GlimmerHMM GFF3 output format: | |
##gff-version 3 | |
##sequence-region Contig5.15 1 47390 | |
Contig5.15 GlimmerHMM mRNA 323 325 . + . ID=Contig5.15.path1.gene1;Name=Contig5.15.path1.gene1 | |
Contig5.15 GlimmerHMM CDS 323 325 . + 0 ID=Contig5.15.cds1.1;Parent=Contig5.15.path1.gene1;Name=Contig5.15.path1.gene1;Note=final-exon |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
set -e | |
bucket=$1 | |
prefix=$2 | |
(( ${#bucket} > 0 )) | |
(( ${#prefix} > 0 )) |