Created
January 31, 2014 16:41
-
-
Save holtgrewe/8735840 to your computer and use it in GitHub Desktop.
Script that takes a SAM file and removes leading/trailing insertions/deletions from CIGAR string such that samtools tview likes it better.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import sys | |
def splitCigar(cigar): | |
result = [] | |
count = '' | |
operation = '' | |
for x in cigar: | |
if x not in '0123456789': | |
operation = x | |
if count: | |
result.append((int(count), operation)) | |
operation = '' | |
count = '' | |
else: | |
count = count + x | |
return result | |
def fixLine(line): | |
vals = line.split('\t') | |
if vals[3] == '*' or vals[5] == '*': | |
return line | |
pos = int(vals[3]) | |
cigar = vals[5] | |
if cigar != '*': | |
cigar = splitCigar(cigar) | |
if cigar[0][1] == 'I': | |
pos -= cigar[0][0] | |
if len(cigar) > 1 and cigar[1][1] == 'M': | |
cigar[1] = (cigar[0][0] + cigar[1][0], 'M') | |
cigar.pop(0) | |
else: | |
cigar[0] = (cigar[0][0], 'M') | |
elif cigar[0][1] == 'D': | |
pos += cigar[0][0] | |
cigar.pop(0) | |
if cigar[-1][1] == 'I': | |
if len(cigar) > 1 and cigar[-2][1] == 'M': | |
cigar[-2] = (cigar[-1][0] + cigar[-2][0], 'M') | |
cigar.pop() | |
else: | |
cigar[-1]= (cigar[-1][0], 'M') | |
elif cigar[-1][1] == 'D': | |
cigar.pop() | |
#if len(cigar) > 1 and cigar[-2][1] == 'M': | |
# cigar[-2] = (cigar[-1][0] + cigar[-2][0], 'M') | |
#cigar.pop() | |
#else: | |
#cigar[-1]= (cigar[-1][0], 'M') | |
cigar = ''.join(''.join(map(str, list(x))) for x in cigar) | |
vals[3] = str(pos) | |
vals[5] = cigar | |
return '\t'.join(vals) | |
if __name__ == '__main__': | |
for line in sys.stdin: | |
if line.startswith('@'): | |
print line.strip() | |
else: | |
print fixLine(line.strip()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Also see https://gist.github.com/holtgrewe/8735877.