Skip to content

Instantly share code, notes, and snippets.

@sahrendt0
Created October 17, 2012 06:22
Show Gist options
  • Save sahrendt0/3903997 to your computer and use it in GitHub Desktop.
Save sahrendt0/3903997 to your computer and use it in GitHub Desktop.
GEN 220 Problem Set 3
#!/usr/bin/perl
use strict;
use warnings;
my $infile = shift;
my $num_utr3 = 0;
my (%GFF,%data,$ID,$gene_length,@exon,$gene_stop,$gene_start);
my($ex_start,$ex_stop,$ex_length);
my($in_start,$in_stop,$in_length);
my @introns;
my @gene_lengths;
open(GFF,"<$infile") or die "Can't open file \"$infile\"...\n";
foreach my $line (<GFF>)
{
my ($chr,$genome,$feature,$start,$stop,$a,$strand,$c,$desc) = split("\t",$line);
if($feature eq "gene")
{
my ($gene_id,$gene_type,$gene_name) = split(";",$desc);
my @tmp = split("=",$gene_id);
$ID = $tmp[1];
$data{"length"} = $gene_length;
$gene_stop = $stop;
$gene_start = $start;
$ex_start = -1;
$ex_stop = -1;
$in_start = -1;
$in_stop = -1;
if(($strand eq "+") && (($start >= 1001300) && ($stop <= 10000500)))
{
push(@gene_lengths,$gene_stop-$gene_start+1);
}
}
if($feature eq "exon")
{
if($ex_start != -1)
{
$ex_start = $start;
$in_stop = $ex_start-1;
$in_length = ($in_stop-$in_start);
if($strand eq "-")
{
$in_length *= -1;
}
if(($in_length >= 100) && ($in_length <= 300))
{
push(@introns,$in_stop-$in_start);
}
}
$ex_start = $start;
$ex_stop = $stop;
$in_start = $ex_stop+1;
}
if($feature =~ m/three/)
{
$num_utr3++;
}
}
close(GFF);
## OUTPUT
open(OUT,">ath_chr1_stats.txt");
print OUT "$num_utr3\t"; ## Part i
print OUT scalar @introns,"\t"; ## Part ii
my $len_sum; ## Part iii
foreach my $len (@gene_lengths)
{
$len_sum += $len;
}
print OUT int ($len_sum/(scalar @gene_lengths));
print OUT "\n";
close(OUT);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment