This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import boto3 | |
| def aws_s3_ls(bucket, prefix): | |
| conn = boto3.client('s3') | |
| fps = [] | |
| r = conn.list_objects_v2( | |
| Bucket=bucket, | |
| Prefix=prefix |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| """Convert GlimmerHMM GFF3 gene predictions into protein sequences. | |
| This works with the GlimmerHMM GFF3 output format: | |
| ##gff-version 3 | |
| ##sequence-region Contig5.15 1 47390 | |
| Contig5.15 GlimmerHMM mRNA 323 325 . + . ID=Contig5.15.path1.gene1;Name=Contig5.15.path1.gene1 | |
| Contig5.15 GlimmerHMM CDS 323 325 . + 0 ID=Contig5.15.cds1.1;Parent=Contig5.15.path1.gene1;Name=Contig5.15.path1.gene1;Note=final-exon |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| set -e | |
| bucket=$1 | |
| prefix=$2 | |
| (( ${#bucket} > 0 )) | |
| (( ${#prefix} > 0 )) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| from Bio.SeqIO.QualityIO import FastqGeneralIterator | |
| from collections import defaultdict | |
| import gzip | |
| from functools import lru_cache | |
| import os | |
| import sys | |
| assert len(sys.argv) == 3, "Please specify input file and output prefix" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import io | |
| import boto3 | |
| import pandas as pd | |
| def read_feather_file_from_s3(s3_url): | |
| assert s3_url.startswith("s3://") | |
| bucket_name, key_name = s3_url[5:].split("/", 1) | |
| s3 = boto3.client('s3') | |
| retr = s3.get_object(Bucket=bucket_name, Key=key_name) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # KEGG names | |
| @lru_cache(maxsize=None) | |
| def get_kegg_name(ko): | |
| r = requests.get("http://rest.kegg.jp/list/{}".format(ko)) | |
| return r.text.split("\t")[-1].rstrip("\n") | |
| get_kegg_name("K00975") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| set -e | |
| module load PANDAseq/2.11-foss-2016b | |
| find . -name "*_1.fq.gz" | sort -R | while read fwd; do | |
| rev="${fwd/_1.fq.gz/_2.fq.gz}" | |
| [[ -s "$rev" ]] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from Bio import Entrez | |
| import xmltodict | |
| def get_name_from_assembly_id(assembly_name): | |
| handle = Entrez.esearch("assembly", term=assembly_name) | |
| search_result = xmltodict.parse("".join(handle.readlines())) | |
| handle.close() | |
| try: | |
| assembly_id = search_result["eSearchResult"]["IdList"]["Id"] | |
| except: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def read_fasta_from_s3(bucket_name, key_name, sep="\t"): | |
| s3 = boto3.client('s3') | |
| retr = s3.get_object(Bucket=bucket_name, Key=key_name) | |
| bytestream = io.BytesIO(retr['Body'].read()) | |
| got_text = gzip.GzipFile(None, 'rb', fileobj=bytestream).read().decode('utf-8') | |
| for header, seq in SimpleFastaParser(io.StringIO(got_text)): | |
| yield header, seq |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| import os | |
| import sys | |
| import xmltodict | |
| from collections import OrderedDict | |
| assert len(sys.argv) > 1 | |
| assert os.path.exists(sys.argv[1]) | |
| assert sys.argv[1].endswith(".xml") |