This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from functools import lru_cache | |
| import os | |
| import pandas as pd | |
| @lru_cache(maxsize=16) | |
| def read_fasta(fasta_fp): | |
| assert os.path.exists(fasta_fp) | |
| fasta = {} | |
| header = None |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from functools import lru_cache | |
| import pandas as pd | |
| @lru_cache(maxsize=128) | |
| def parse_gff(gff_fp): | |
| df = pd.read_csv( | |
| gff_fp, | |
| sep="\t", | |
| comment="#", | |
| header=None, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| import boto3 | |
| import argparse | |
| from datetime import datetime | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("job_id") | |
| # Add the arguments |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Plot the distribution of CAG sizes | |
| def plot_cag_size(hdf_fp, pdf=None, min_size=5, alpha=0.25): | |
| cag_annot = pd.read_hdf(hdf_fp, "/annot/cag/all").set_index("CAG") | |
| # Calculate the log10 size (number of genes per CAG) | |
| cag_annot = cag_annot.assign( | |
| size_log10 = cag_annot["size"].apply(np.log10) | |
| ) | |
| # Filter by CAG size |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Plot the number of genes detected and the proportion of reads aligned | |
| def plot_specimen_summary(hdf_fp, pdf=None, alpha = 0.85): | |
| specimen_summary = pd.read_hdf(hdf_fp, "/summary/all").set_index("specimen") | |
| specimen_summary = specimen_summary.assign( | |
| prop_reads = specimen_summary["aligned_reads"] / specimen_summary["n_reads"] | |
| ) | |
| for col_name, axis_title in [ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Read in data from the HDF store | |
| assert os.path.exists(hdf_fp) | |
| cag_annot = pd.read_hdf(hdf_fp, "/annot/cag/all").set_index("CAG") | |
| cag_abund = pd.read_hdf(hdf_fp, "/abund/cag/wide").set_index("CAG") | |
| corncob_df = pd.read_hdf(hdf_fp, "/stats/cag/corncob") | |
| betta_df = pd.read_hdf(hdf_fp, "/stats/enrichment/betta") | |
| manifest = pd.read_hdf(hdf_fp, "/manifest").set_index("specimen") | |
| specimen_summary = pd.read_hdf(hdf_fp, "/summary/all").set_index("specimen") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from functools import lru_cache | |
| from collections import defaultdict | |
| import pandas as pd | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| from matplotlib.backends.backend_pdf import PdfPages | |
| import matplotlib.patches as mpatches | |
| from scipy import stats | |
| import seaborn as sns | |
| import os |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| jupyter nbconvert --ExecutePreprocessor.timeout=-1 --to notebook --inplace --execute NOTEBBOOK |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| paste -d '' <(bunzip2 -c Raw_Read2_Barcodes.fq.bz2 | awk '{if(NR % 4 == 2 || NR % 4 == 4){print}else{print ""}}') <(bunzip2 -c Raw_Read1.fq.bz2) | fastx_barcode_splitter.pl --bcfile barcodes.tsv --prefix PREFIX --suffix _R1.fastq --bol |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import io | |
| import boto3 | |
| import pandas as pd | |
| def read_csv_from_s3(s3_url, s3=None, sep=","): | |
| assert s3_url.startswith("s3://") | |
| bucket_name, key_name = s3_url[5:].split("/", 1) | |
| if s3 is None: | |
| s3 = boto3.client('s3') |