Skip to content

Instantly share code, notes, and snippets.

View apcamargo's full-sized avatar
🦖

Antônio Camargo apcamargo

🦖
  • University of São Paulo
  • São Paulo, SP, Brazil
  • X @apcamargo_
View GitHub Profile
#!/usr/bin/env python
import shutil
from pathlib import Path
from typing import Literal
import pyarrow as pa
import pyarrow.parquet as pq
@apcamargo
apcamargo / find_cutoff.py
Last active October 11, 2025 19:52
Automatic cutoff determination for an arbitrary distribution
from typing import Sequence
import math
def find_cutoff(values: Sequence[float]) -> float:
"""
Determine the cutoff point in a biphasic distribution curve by identifying
the "bending point" where the curve transitions from slowly growing values
to rapidly growing values, using the maximum perpendicular distance method.
@apcamargo
apcamargo / download_mg_rast.py
Last active July 12, 2025 21:20
Downloads all the assembled metagenomes available in MG-RAST
#!/usr/bin/env python
import json
import re
import sys
from typing import Generator, Dict, Any, Optional
import requests
from tqdm import tqdm
from pathlib import Path
from typing import Iterator, Optional, Union
import polars as pl
from needletail import parse_fastx_file
from polars.io.plugins import register_io_source
def scan_fastx(fastx_file: Union[str, Path]) -> pl.LazyFrame:
schema = pl.Schema(
@apcamargo
apcamargo / sam2tsv.py
Created March 10, 2025 03:53
Converts alignments stored in the SAM format to a BLAST-like table
#!/usr/bin/env python
"""
This script processes SAM (Sequence Alignment/Map format) inputs from standard
input and extracts alignment information that is then provided in a tab-separated
table. The following fields are produced: query, target, query_length, query_start,
query_end, target_start, target_end, alignment_length, alignment_identity.
This script was designed for use with SAM files produced by minimap2. However,
it will work with any SAM data that:
@apcamargo
apcamargo / calculate_neff.py
Created November 17, 2024 04:59
Calculate the number of effective sequences (Neff) of a A3M multiple sequence alignment
#!/usr/bin/env python
import math
import re
import click
from scipy.cluster.hierarchy import fcluster, linkage
from skbio import DistanceMatrix, Protein, TabularMSA, io
from skbio.sequence.distance import hamming
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
from coloraide import Color
def lighten(
color: Color,
amount: float,
) -> Color:
"""
Lighten a color by a given amount.
"""
@apcamargo
apcamargo / retrieve_assembly_accession.py
Created September 19, 2023 21:03
Retrieve NCBI assembly accessions from GenBank accessions using E-utilities
import subprocess
def get_assembly_accession(genbank_accession):
p1 = subprocess.Popen(
["elink", "-db", "nuccore", "-target", "assembly", "-id", genbank_accession],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
p2 = subprocess.Popen(
["efetch", "-format", "docsum"],