Skip to content

Instantly share code, notes, and snippets.

View chasemc's full-sized avatar
:octocat:

Chase Clark chasemc

:octocat:
View GitHub Profile
import base64
import hashlib
from Bio.SeqUtils import CheckSum
import sys
def sha512t24u(input):
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7714221/
# To standardize to caps-only input, use hash_aminos()
sha512_digest = hashlib.sha512(bytes(input,"utf8")).digest()[:24]
@chasemc
chasemc / gist:270ec6eaaa864269b9bed2f12816d55f
Last active May 16, 2023 12:54
hmmsearch a model with --cut_ga when the model has no defined cutoffs
hmmsearch --domtblout "a.dom" -Z 100 --cut_ga '/socialgene_nr_hmms_file_without_cutoffs_1_of_1.hmm' /example.fa
# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file: /socialgene_nr_hmms_file_without_cutoffs_1_of_1.hmm
# target sequence database: /example.fa
@chasemc
chasemc / mibig_activity.tsv
Created May 6, 2023 21:27
grep '"activity":' *
BGC0000001 antibacterial
BGC0000001 cytotoxic
BGC0000001 antibacterial
BGC0000001 cytotoxic
BGC0000002 antibacterial
BGC0000002 antifungal
BGC0000014 antifungal
BGC0000016 antifungal
BGC0000017 neurotoxic
BGC0000017 neurotoxic
PF10417
PF12574
PF09847
PF00244
PF16998
PF00389
PF02826
PF00198
PF16078
PF04029
# Bio::Pfam::HMM::HMMResults.pm
#
# Author: finnr
# Maintainer: $Id: HMMResults.pm,v 1.3 2009-12-15 14:38:08 jt6 Exp $
# Version: $Revision: 1.3 $
# Created: Nov 19, 2008
# Last Modified: $Date: 2009-12-15 14:38:08 $
=head1 NAME
@chasemc
chasemc / antismash_minimal_json.py
Created April 28, 2023 12:50
Extract minimal BGC region info from antismash json
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
import json
import argparse
from multiprocessing import Pool
import tarfile
from pathlib import Path
parser = argparse.ArgumentParser(description="Extract from antismash json")
parser.add_argument(
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
import os
import hashlib
from pathlib import Path
import argparse
from collections import defaultdict
import glob, os
from rich.progress import (
{"import_files": {"/socialgene_neo4j/import/antismash_gbk_to_table.tsv.gz": "d7a8d113e8ed1412e48f8f02c345de58", "/socialgene_neo4j/import/versions.yml": "550ae99b30e769987a5ebb3be53ba196", "/socialgene_neo4j/import/taxdump_process/ea36b0a2f07c7f730058510f39656e70.nodes_taxid.gz": "ea36b0a2f07c7f730058510f39656e70", "/socialgene_neo4j/import/taxdump_process/versions.yml": "2a5caa6b018bbc8b5c9e3fa58591bfad", "/socialgene_neo4j/import/taxdump_process/8a3145f4e2f529124636f99dcc999878.taxid_to_taxid.gz": "8a3145f4e2f529124636f99dcc999878", "/socialgene_neo4j/import/protein_info/7fade5e885cd59d8493ae15b8e9ce1e3.protein_ids.gz": "7fade5e885cd59d8493ae15b8e9ce1e3", "/socialgene_neo4j/import/protein_info/8bcd5f910b49a24b4b36b66a147679d4.protein_info.gz": "8bcd5f910b49a24b4b36b66a147679d4", "/socialgene_neo4j/import/diamond_blastp/versions.yml": "45a30218f36138d65326e1332e1338f2", "/socialgene_neo4j/import/diamond_blastp/36063c1222c951511061b9fdc75fd8f3.blast6.gz": "36063c1222c951511061b9fdc75fd8f3", "/socialgene_neo4j
@chasemc
chasemc / NZ_CP070291.fna.gz
Created March 29, 2023 21:35
sim reads (after filter, so some missing)
This file has been truncated, but you can view the full file.
>NZ_CP070290.1_1-5000
GTGTCACTTTCGCTTTGGCAGCAGTGTCTTGCCCGATTGCAGGATGAGTTACCAGCCACA
GAATTCAGTATGTGGATACGCCCGTTGCAGGCGGAACTGAGCGATAACACGCTGGCCTTG
TACGCGCCAAATCGTTTTGTCCTCGATTGGGTACGGGACAAGTACCTCAATAATATCAAT
GGACTGCTAACCAGCTTCTGTGGCGCGGATGCCCCACAGTTGCGCTTTGAAGTTGGCACC
AGGCCGGTGACGAAAACCTCTCAGGCCGCAGTGACGAGCAACGTTACAGCGCCAGCTCAG
GTGGCGCAAATGCAACCGCAGCGCGCTGCGCCTGCAGCGCGTTCGGGTTGGGATAACGTT
CCTGCTCCGGCGGAACCGACCTATCGTTCCAACGTCAACGTCAAACATACGTTTGATAAC
TTCGTCGAAGGTAAATCTAACCAACTGGCGCGCGCGGCGGCTCGCCAGGTGGCGGATAAC
CCCGGTGGCGCTTATAACCCGCTGTTCCTTTATGGCGGCACGGGTCTGGGTAAAACTCAC
This file has been truncated, but you can view the full file.
>NZ_JADOTX010000001.1_1-5000_1
PGPHRRYGADLVVPRLVRAQCALLKGIALRYVMRRSGFRGRYERQRTMLAEVVAALVRRA
PEGLDPIFAPLWRAAPDDTARLRVVIDQVASLTDPAAVTWHTRLVGNGTPLTDN*
>NZ_JADOTX010000001.1_1-5000_2
MTERPRNVTSAQVLRTERPTPHLIRLVLGGDELVGLPVGEFTDHYIKVVFPQPGVAYPQP
LDLAAIRRDLPREQWPRLRAYTVRRWDPLAGELTVDVVHHGDEGLAGPWAAALRPGDPVH
FVGPGGAYAPSPDADWHLLVGDESALPAIAAALERLPLGARAHVFVEIADPAEEQKLLSP
GAVELTWLHRGDRPVGEALVAAVRALEFPAGQVHAFVHGEAAFVRELRRLLRGERGIPLG
QLSISGYWRRGMDDEGWRSTKADWNQQVAAEEVAVAAA*
>NZ_JADOTX010000001.1_1-5000_3