Last active
March 9, 2022 17:02
-
-
Save andrewyatz/652020eb5af7e731c4b2e0db2d42e555 to your computer and use it in GitHub Desktop.
Very basic code to find gaps in a single FASTA record. Min gap size is 10bp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
import gzip | |
import sys | |
filename = sys.argv[1] | |
file = gzip.open(filename, "rt") | |
position = 0 | |
contigous_n = 0 | |
start = 0 | |
for line in file: | |
if ">" in line: | |
continue | |
stripped_line = line.rstrip() | |
for c in stripped_line: | |
# If we have an N then record the start if it is the first one | |
# and increment the counter | |
if c == "N": | |
if contigous_n == 0: | |
start = position | |
contigous_n += 1 | |
# Otherwise we are in a non-N, so that could mean stopping the gap | |
# and recording it | |
else: | |
if contigous_n >= 10: | |
print( | |
f"Found a gap from {start+1} to {position}. Length claims it was {contigous_n}" | |
) | |
contigous_n = 0 | |
start = 0 | |
position += 1 | |
if contigous_n >= 10: | |
print( | |
f"Found a gap from {start+1} to {position}. Length claims it was {contigous_n}" | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment