This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import simplesam | |
with simplesam.Reader(open('input.sam')) as sam: | |
with simplesam.Writer(open('output.sam', 'w'), sam.header) as fixed: | |
for read in sam: | |
if bool(read.flag & 0x800): | |
read.flag -= 0x800 | |
if not read.secondary: | |
read.flag += 0x100 | |
fixed.write(read) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pyfaidx | |
with pyfaidx.Fasta("file.fa") as peg_fasta: | |
with open("file.sorted.fa", 'w') as sorted_fasta: | |
for id in sorted(peg_fasta.keys()): | |
sorted_fasta.write('>' + peg_fasta[id].long_name) | |
sorted_fasta.write(str(peg_fasta[id])) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pyfaidx | |
import simplesam | |
with pyfaidx.Fasta('genome.fasta') as fasta, simplesam.Reader(open('orfH9_offtarget.sam')) as sam: | |
for read in sam: | |
if read.mapped and not read.secondary: | |
# grab the reference sequnce from indexed FASTA | |
zero_index = read.pos - 1 | |
sequence_chunk = fasta[read.rname][zero_index:zero_index+20] | |
# or get the reference sequence from the SAM MD tag |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from simplesam import Reader | |
from pyfaidx import Fasta | |
with Reader(open('library.bam', 'r')) as sam_file, Fasta('hg38.fa', as_raw=True) as hg38: | |
for read in sam_file: | |
if read.mapped: | |
# might also want to handle read.reverse here | |
prior_pos = read.pos - 2 # read.pos is 1-based | |
prior_base = hg38[read.rname][prior_pos] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyfaidx import Fasta | |
genes = Fasta('Genome.fasta') | |
with open('chr02_18s', 'w') as f: | |
seqFile = genes['chr02'][146062:148216] | |
f.write('>' + seqFile.name) | |
f.write(seqFile.seq) # or str(seqFile) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyfaidx import Fasta | |
file_1 = {} | |
with open('file1.txt', 'r') as ids: | |
for line in ids: | |
key, value = line.strip().split('\t') | |
file_1[key] = value | |
file_2 = Fasta('file2.fa', key_function=lambda key: file_1[key]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyfaidx import Fasta | |
name_map = {} | |
with open('newnames.txt') as newnames: | |
next(newnames) # remove header | |
for line in newnames: | |
old, new = line.rstrip().split() | |
name_map[old] = new | |
with open('seqnew.fa', 'w') as new_fasta: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
import sys | |
from pyfaidx import Fasta | |
n = 10 | |
faa = Fasta("file.faa") | |
for sample in random.sample(faa, n): | |
print(sample.name) | |
for line in sample: | |
print(line) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import sys | |
import argparse | |
import pkg_resources | |
from collections import deque | |
from collections import Counter | |
try: | |
from collections import OrderedDict | |
except ImportError: #python 2.6 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyfaidx import FastaVariant | |
consensus = FastaVariant('reference.fasta', 'sample1.vcf.gz', het=True, hom=True) | |
chrom = 'chr1' | |
seq = consensus[chrom][0:8] | |
print(seq) # AGTGCG | |
# if you don't want to invariant sites masked, you're good to go. otherwise: |