Skip to content

Instantly share code, notes, and snippets.

@ricky-lim
Created March 25, 2019 09:38
Show Gist options
  • Save ricky-lim/a102f982808260d5d9d88734fe51a401 to your computer and use it in GitHub Desktop.
Save ricky-lim/a102f982808260d5d9d88734fe51a401 to your computer and use it in GitHub Desktop.
Dataclasses
import math
import typing
import hashlib
from dataclasses import dataclass, field
from pathlib import Path
@dataclass(order=True)
class ReferenceGenome:
name: str = field(compare=False)
sourcepath: Path = field(compare=False)
size: int = field(compare=True)
hash_id: str = field(compare=False, default=None)
def generate_hash_id(self):
hash_str = f"{self.name},{self.sourcepath},{self.size}"
return hashlib.sha256(hash_str.encode("utf-8")).hexdigest()
def __post_init__(self):
self.hash_id = self.generate_hash_id()
@dataclass(order=True)
class BlockSynteny:
min_size: typing.ClassVar[int] = 1000
contig: str = field(compare=True)
start: int = field(compare=True)
end: int
block_id: int = field(hash=True)
size: int = None
def __post_init__(self):
self.size = int(math.fabs(self.end - self.start))
reference_genomes = [
ReferenceGenome(name="plant_a", sourcepath=Path("/mnt/Fasta/plant_a.fasta"),
size=3000),
ReferenceGenome(name="plant_b", sourcepath=Path("/mnt/Fasta/plant_b.fasta"),
size=1000),
ReferenceGenome(name="plant_c", sourcepath=Path("/mnt/Fasta/plant_c.fasta"),
size=2000),
]
# sort based on reference size
reference_genomes.sort(reverse=True)
assert reference_genomes == [
ReferenceGenome(name='plant_a', sourcepath=Path('/mnt/Fasta/plant_a.fasta'), size=3000,
hash_id='dee70d70ccf3854f8e09cbf48ddd3bcdbad39709402da9e6b618c3eefed54901'),
ReferenceGenome(name='plant_c', sourcepath=Path('/mnt/Fasta/plant_c.fasta'),
size=2000,
hash_id='eca2c646def0c0cd88c08686aac1aca67380d31c901537b5ed94fe1d624cdc5c'),
ReferenceGenome(name='plant_b', sourcepath=Path('/mnt/Fasta/plant_b.fasta'),
size=1000,
hash_id='e994c93558e67d075eb53b2bf03901acc01e336b22b47d470d05258515fb97a3'),
]
synteny_blocks = [
BlockSynteny("1", 1000, 5000, 1),
BlockSynteny("3", 3000, 5000, 1),
BlockSynteny("1", 5000, 5500, 1),
]
synteny_blocks.sort()
# filter synteny blocks in contig 1 and greater than min_size
filtered_synteny = list(filter(lambda s: s.contig == "1" and s.size > s.min_size, synteny_blocks))
assert filtered_synteny == [BlockSynteny(contig='1', start=1000, end=5000, block_id=1)]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment