Created
March 25, 2019 09:38
-
-
Save ricky-lim/a102f982808260d5d9d88734fe51a401 to your computer and use it in GitHub Desktop.
Dataclasses
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import math | |
import typing | |
import hashlib | |
from dataclasses import dataclass, field | |
from pathlib import Path | |
@dataclass(order=True) | |
class ReferenceGenome: | |
name: str = field(compare=False) | |
sourcepath: Path = field(compare=False) | |
size: int = field(compare=True) | |
hash_id: str = field(compare=False, default=None) | |
def generate_hash_id(self): | |
hash_str = f"{self.name},{self.sourcepath},{self.size}" | |
return hashlib.sha256(hash_str.encode("utf-8")).hexdigest() | |
def __post_init__(self): | |
self.hash_id = self.generate_hash_id() | |
@dataclass(order=True) | |
class BlockSynteny: | |
min_size: typing.ClassVar[int] = 1000 | |
contig: str = field(compare=True) | |
start: int = field(compare=True) | |
end: int | |
block_id: int = field(hash=True) | |
size: int = None | |
def __post_init__(self): | |
self.size = int(math.fabs(self.end - self.start)) | |
reference_genomes = [ | |
ReferenceGenome(name="plant_a", sourcepath=Path("/mnt/Fasta/plant_a.fasta"), | |
size=3000), | |
ReferenceGenome(name="plant_b", sourcepath=Path("/mnt/Fasta/plant_b.fasta"), | |
size=1000), | |
ReferenceGenome(name="plant_c", sourcepath=Path("/mnt/Fasta/plant_c.fasta"), | |
size=2000), | |
] | |
# sort based on reference size | |
reference_genomes.sort(reverse=True) | |
assert reference_genomes == [ | |
ReferenceGenome(name='plant_a', sourcepath=Path('/mnt/Fasta/plant_a.fasta'), size=3000, | |
hash_id='dee70d70ccf3854f8e09cbf48ddd3bcdbad39709402da9e6b618c3eefed54901'), | |
ReferenceGenome(name='plant_c', sourcepath=Path('/mnt/Fasta/plant_c.fasta'), | |
size=2000, | |
hash_id='eca2c646def0c0cd88c08686aac1aca67380d31c901537b5ed94fe1d624cdc5c'), | |
ReferenceGenome(name='plant_b', sourcepath=Path('/mnt/Fasta/plant_b.fasta'), | |
size=1000, | |
hash_id='e994c93558e67d075eb53b2bf03901acc01e336b22b47d470d05258515fb97a3'), | |
] | |
synteny_blocks = [ | |
BlockSynteny("1", 1000, 5000, 1), | |
BlockSynteny("3", 3000, 5000, 1), | |
BlockSynteny("1", 5000, 5500, 1), | |
] | |
synteny_blocks.sort() | |
# filter synteny blocks in contig 1 and greater than min_size | |
filtered_synteny = list(filter(lambda s: s.contig == "1" and s.size > s.min_size, synteny_blocks)) | |
assert filtered_synteny == [BlockSynteny(contig='1', start=1000, end=5000, block_id=1)] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment