Last active
December 27, 2022 12:46
-
-
Save gpertea/b83f1b32435e166afa92a2d388527f4b to your computer and use it in GitHub Desktop.
post-processing of StringTie merge output to append ref_gene_id info to the MSTRG gene_id
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/env perl | |
#Usage: mstrg_prep.pl merged.gtf > merged_prep.gtf | |
use strict; | |
my %g; # gene_id => \%ref_gene_ids (or gene_names) | |
my @prep; # array of [line, original_id] | |
while (<>) { | |
s/ +$//; | |
my @t=split(/\t/); | |
unless (@t>8) { print $_; next } | |
my ($gid)=($t[8]=~m/gene_id "(MSTRG\.\d+)"/); | |
if ($gid) { | |
push(@prep, [$_, $gid]); | |
my ($rn)=($t[8]=~m/ref_gene_id "([^"]+)/); | |
#or for gene_name: | |
#my ($rn)=($t[8]=~m/gene_name "([^"]+)/); | |
if ($rn) { | |
my $h=$g{$gid}; | |
if ($h) { $h->{$rn}=1 } | |
else { $g{$gid}= { $rn=>1 } } | |
} | |
} | |
else { print $_ } | |
} | |
my ($prevgid, $gadd); | |
foreach my $d (@prep) { | |
my ($line, $gid)=@$d; | |
if ($prevgid ne $gid) { | |
$prevgid=$gid; | |
$gadd=''; | |
if (my $gd=$g{$gid}) { | |
$gadd='|'.join('|', (sort(keys(%$gd)))) | |
} | |
} | |
$line=~s/gene_id "MSTRG\.\d+/gene_id "$gid$gadd/ if $gadd; | |
print $line; | |
} |
drdna
commented
Aug 2, 2021
via email
Oh I gotcha. I read that as a double ==.
Mark L. Farman
Professor, Department of Plant Pathology
225 Plant Science Building<x-apple-data-detectors://1/1>
1405 Veteran's Dr.<x-apple-data-detectors://1/1>
University of Kentucky<x-apple-data-detectors://1/1>
Lexington, KY 40546 USA<x-apple-data-detectors://1/1>
tel: (859) 218-0728<tel:(859)%20218-0728>
fax: (859) 323-1961<tel:(859)%20323-1961>
Sent from my iPhone
On Aug 2, 2021, at 9:53 AM, Geo Pertea ***@***.***> wrote:
CAUTION: External Sender
@gpertea commented on this gist.
…________________________________
You mean the one at line 30? if (my $gd=$g{$gid})? (even though that's not the last if statement ;)). If so, no. In that test expression the reference is being created, assigned to $g{$gid} and then it is actually tested.
—
You are receiving this because you commented.
Reply to this email directly, view it on GitHub<https://nam04.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgist.github.com%2Fb83f1b32435e166afa92a2d388527f4b%23gistcomment-3840249&data=04%7C01%7Cmark.farman%40uky.edu%7C5b5a9d7ee355429a3fee08d955bce092%7C2b30530b69b64457b818481cb53d42ae%7C0%7C0%7C637635091974156230%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=l%2BFyjr6xSeDKF7dnRafavhh6DR95UdG8p8X9mJECEf0%3D&reserved=0>, or unsubscribe<https://nam04.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgithub.com%2Fnotifications%2Funsubscribe-auth%2FAAYUUDOA6AW2DVLAESHADKTT22PMXANCNFSM5BI5RBRQ&data=04%7C01%7Cmark.farman%40uky.edu%7C5b5a9d7ee355429a3fee08d955bce092%7C2b30530b69b64457b818481cb53d42ae%7C0%7C0%7C637635091974166236%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=DOb30Lw%2Btbe1wE9WXLYotZ36KsL%2BDmJMkCU84TqvqOs%3D&reserved=0>.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment