This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
import os | |
import sys | |
import xml | |
import gzip | |
import json | |
import time | |
from collections import defaultdict | |
import pandas as pd |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/local/bin/python | |
import os | |
import io | |
import json | |
import gzip | |
import boto3 | |
import argparse | |
import pandas as pd |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# encoding:utf8 | |
# authors: Erik Garrison, Sébastien Boisvert, Sam Minot | |
"""This script takes a folder of fastq(.gz) files and interleaves them | |
Usage: | |
interleave-fasta folder | |
""" | |
import os |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def read_fasta_from_s3(bucket_name, key_name, sep="\t"): | |
s3 = boto3.client('s3') | |
retr = s3.get_object(Bucket=bucket_name, Key=key_name) | |
for header, seq in SimpleFastaParser(io.StringIO(retr['Body'].read().decode('utf-8'))): | |
yield header, seq |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import os | |
import sys | |
import xmltodict | |
from collections import OrderedDict | |
assert len(sys.argv) > 1 | |
assert os.path.exists(sys.argv[1]) | |
assert sys.argv[1].endswith(".xml") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def read_fasta_from_s3(bucket_name, key_name, sep="\t"): | |
s3 = boto3.client('s3') | |
retr = s3.get_object(Bucket=bucket_name, Key=key_name) | |
bytestream = io.BytesIO(retr['Body'].read()) | |
got_text = gzip.GzipFile(None, 'rb', fileobj=bytestream).read().decode('utf-8') | |
for header, seq in SimpleFastaParser(io.StringIO(got_text)): | |
yield header, seq |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from Bio import Entrez | |
import xmltodict | |
def get_name_from_assembly_id(assembly_name): | |
handle = Entrez.esearch("assembly", term=assembly_name) | |
search_result = xmltodict.parse("".join(handle.readlines())) | |
handle.close() | |
try: | |
assembly_id = search_result["eSearchResult"]["IdList"]["Id"] | |
except: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
set -e | |
module load PANDAseq/2.11-foss-2016b | |
find . -name "*_1.fq.gz" | sort -R | while read fwd; do | |
rev="${fwd/_1.fq.gz/_2.fq.gz}" | |
[[ -s "$rev" ]] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# KEGG names | |
@lru_cache(maxsize=None) | |
def get_kegg_name(ko): | |
r = requests.get("http://rest.kegg.jp/list/{}".format(ko)) | |
return r.text.split("\t")[-1].rstrip("\n") | |
get_kegg_name("K00975") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import io | |
import boto3 | |
import pandas as pd | |
def read_feather_file_from_s3(s3_url): | |
assert s3_url.startswith("s3://") | |
bucket_name, key_name = s3_url[5:].split("/", 1) | |
s3 = boto3.client('s3') | |
retr = s3.get_object(Bucket=bucket_name, Key=key_name) |