Sam Minot sminot

Microbiome scientist at the Fred Hutchinson Cancer Research Center

sminot / download_results.py

Last active July 11, 2016 18:23

Download all results

	#!/usr/bin/python
	"""
	One Codex CSV Download Script.

	Simple 1 dependency (requests) Python 2/3 script for downloading
	One Codex analysis results and saving them to CSVs, as well as read-level results
	"""
	from __future__ import print_function
	import os
	import requests

sminot / fetch_ocx_analyses.R

Last active April 30, 2016 00:52

	#
	# Copyright Reference Genomics, Inc. 2016
	# Released under the MIT License
	#
	# Script for fetching analysis results from the One Codex API (v0)
	# See https://docs.onecodex.com for full documentation on the REST API
	#
	# Script can be run from the command line with:
	# `Rscript fetch_ocx_analyses.R $ONE_CODEX_API_KEY $output_filepath`
	#

sminot / download_result_table.py

Created May 9, 2016 23:07

Download results from One Codex and combine into a single table (python) (note: requires pandas)

	#!/usr/bin/python
	"""Download results from One Codex and combine into a single table (note: requires pandas)."""

	import requests
	import os
	import pandas as pd

	# Expects the API key for One Codex to be stored in the ONE_CODEX_API_KEY environment variable
	api_key = os.environ['ONE_CODEX_API_KEY']

sminot / fetch_ocx_analyses.R

Last active August 22, 2016 23:55 — forked from boydgreenfield/fetch_ocx_analyses.R

Sample R script for generating a CSV with One Codex analysis results, one column per sample

	#!/usr/local/bin/Rscript

	# Copyright Reference Genomics, Inc. 2016
	# Released under the MIT License
	#
	# Script for fetching analysis results from the One Codex API (v0)
	# See https://docs.onecodex.com for full documentation on the REST API
	#
	# Script can be run from the command line with:
	# `Rscript fetch_ocx_analyses.R <API_KEY> <FILE> [-d DB] [-b BEGINNING_DATE -e ENDING_DATE]`

sminot / split_by_header.py

Created July 14, 2017 17:32

Split a FASTQ file by header

	#!/usr/bin/python
	"""Split up a FASTQ file based on the first field of the header."""

	from collections import defaultdict
	import gzip
	import sys
	import os

	fp = sys.argv[1]
	if not os.path.exists(fp):

sminot / compare_proteins_blastp.py

Created September 8, 2017 20:52

Compare protein FASTAs with BLASTP and output XLSX

	#!/usr/bin/python
	"""Given a set of protein FASTA files, perform pairwise comparison via BLAST, outputting an Excel spreadsheet."""

	import os
	import sys
	import json
	import subprocess
	import pandas as pd
	from collections import defaultdict
	from Bio.SeqIO.FastaIO import SimpleFastaParser

sminot / ncbi_taxonomy.py

Last active January 7, 2024 09:37

Class for using the NCBI taxonomy, reading from taxdump files

	import os
	from functools import lru_cache
	from collections import defaultdict

	# Read in the taxonomy
	class NCBITaxonomy():
	def __init__(self, folder):
	self.tax = defaultdict(dict)
	# Read in the file of taxid information
	names_fp = os.path.join(folder, 'names.dmp')

sminot / test_sparse_dataframe_creation.ipy

Created October 26, 2017 17:52

Profiling sparse DataFrame creation

	#!/usr/local/bin/ipython

	import pandas as pd
	from collections import defaultdict
	from random import choice

	alph = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']

	# Function to make some test data
	def make_dat(nrows=1000, ncols=1000, nvals=1000):

sminot / make_mothur_tax.py

Last active December 8, 2017 23:08

Make a taxonomy file compatible with mothur

	#!/usr/bin/python
	"""Make a taxonomy file compatible with mothur."""

	import os
	import sys
	import pandas as pd

	if len(sys.argv) != 4:
	print("Please specify the seq_info.csv, tax_info.csv, and output.tsv files")

sminot / read_from_s3.py

Created February 20, 2018 22:23

Read JSON directly from AWS S3

	import io
	import json
	import gzip
	import boto3

	def read_gzipped_json_file_from_s3(bucket_name, key_name):
	s3 = boto3.client('s3')
	retr = s3.get_object(Bucket=bucket_name, Key=key_name)

	bytestream = io.BytesIO(retr['Body'].read())