Last active
April 19, 2022 13:02
-
-
Save epaule/a5f09d3010bafc87dc419fdfc5c51698 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# SUPER_1_1 is 1085289420bp | |
# SUPER_1_2 joins and both become SUPER_1 | |
samtools view -h split_1.mapped.bam |perl -pne 's/SUPER_1_1/SUPER_1/g' |perl -ne 'chomp;@F=split(/\t/,$_);next if $F[1] eq "SN:SUPER_1";if(/SN:SUPER_1_2/){$F[1]="SN:SUPER_1";$F[2]="LN:2170579067"};if($F[2] eq "SUPER_1_2"){$F[2]="SUPER_1";$F[3]+=1085289420;$F[7]+=1085289420 if $F[6] eq "="};if($F[6] eq "SUPER_1_2"){$F[6]="SUPER_1";$F[7]+=1085289420};print join "\t", @F;print "\n"' | /software/grit/conda/envs/snake_env/bin/PretextMap --sortby nosort --mapq 0 -o fixed.pretext --highRes |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#/usr/bin/env perl | |
# background: | |
# SUPER_1 was split at 1085289420 into SUPER_1_1 and SUPER_1_2, to allow aligning | |
# we now want to convert the alignments back into a joined SUPER_1 and need to rename | |
# SUPER_1_1 and SUPER_1_2 -> SUPER_1 | |
# if they were SUPER_1_2 coordinates, they need shifting by 1085289420 | |
# we might have to calculate TLEN for cases where both are on SUPER_1 | |
# the header needs fixing | |
# in theory all SUPER_1_1 coordinates should be the same as SUPER_1 | |
s/SUPER_1_1/SUPER_1/g; | |
chomp; | |
@F=split(/\t/,$_); | |
# header | |
next if $F[1] eq "SN:SUPER_1"; | |
if(/SN:SUPER_1_2/){ | |
$F[1]="SN:SUPER_1"; | |
$F[2]="LN:2170579067" | |
} | |
# shift SUPER_1_2 coordinates | |
if($F[2] eq "SUPER_1_2"){ | |
$F[2]="SUPER_1"; | |
$F[3]+=1085289420 | |
} | |
if($F[6] eq "SUPER_1_2"){ | |
$F[6]="SUPER_1"; | |
$F[7]+=1085289420 | |
} | |
# fix TLEN | |
# is an approximation, as I don't have the second cigar at that point | |
if ($F[2] eq 'SUPER_1' && $F[6] eq 'SUPER_1'){ | |
$F[8]=$F[7]-$F[3]+1 | |
} | |
print join "\t", @F; | |
print "\n"' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment