Created
September 28, 2021 21:17
-
-
Save rob-p/74087f1335de806a26144cf18a9d8aca to your computer and use it in GitHub Desktop.
filter containments out of a PAF file
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pafpy import PafFile | |
import argparse | |
import sys | |
def main(argsj): | |
pi = args.paf | |
of = args.tblast | |
def frac_covered(r): | |
return float(r.qend - r.qstart) / r.qlen | |
ofile = open(of, 'w') | |
ofile.write('qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore\n') | |
with PafFile(pi) as paf: | |
for record in paf: | |
if record.qname != record.tname: | |
if record.qlen >= args.minlen: | |
if (frac_covered(record) >= args.fract) and (record.blast_identity() >= args.ident): | |
ofile.write(f"{record.qname}\t{record.tname}\t{record.blast_identity()}\t*\t*\t*\t*\t*\t*\t*\t*\t*\n") | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser(description='Filter minimap2 output to find high-quality containment.') | |
parser.add_argument('--paf', help='input PAF file') | |
parser.add_argument('--tblast', help='output tab-delimited BLAST file') | |
parser.add_argument('--minlen', default=500, type=int, help='minimum contig length to consider') | |
parser.add_argument('--ident', default=0.95, type=float, help='required percent identity') | |
parser.add_argument('--fract', default=0.95, type=float, help='fraction of the contained contig that must be covered by the match') | |
args = parser.parse_args() | |
main(args) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment