Created
April 29, 2014 22:36
-
-
Save alexpreynolds/8b3b915b1b56431dc237 to your computer and use it in GitHub Desktop.
Converts a matrix of set memberships to an Eulergrid runall statement with cardinalities
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl -w | |
use strict; | |
use Getopt::Long; | |
use Data::Dumper; | |
use EulerSet; | |
# | |
# sample input: | |
# | |
# GM06990.DS7748 TH1.DS7840 HepG2.DS7764 K562.DS9767 SKNSH.DS8482 | |
# 0 0 1 0 0 | |
# 1 0 1 0 0 | |
# 0 0 0 1 0 | |
# | |
my ($fdr); | |
my $optResult = GetOptions ("fdr=s" => \$fdr); | |
if (! defined $fdr) { die "specify --fdr=str\n"; } | |
# | |
# edit these run parameters, based on environment | |
# | |
my $eulergridScript = "/home/areynolds/proj/eulergrid/eulergrid.pl"; | |
my $resultsDir = "/home/areynolds/proj/eulergrid/results/footprintOverlaps/012510"; | |
my @headers = (); | |
my $lineCtr = 0; | |
my $powerSets; | |
my %eulerSets; | |
my $eulerSet; | |
my $maxEulerSetLength = 0; | |
while (my $inputLine = <STDIN>) { | |
chomp ($inputLine); | |
my @elements = split("\t", $inputLine); | |
if ($lineCtr == 0) { | |
# parse header row | |
foreach my $element (@elements) { | |
push (@headers, $element); | |
} | |
my $powerSets = powerset(@headers); | |
foreach my $powerSet (@{$powerSets}) { | |
my @subsets = @{$powerSet}; | |
my $indicatorStr = ""; | |
foreach my $subsetHeader (@headers) { | |
my %headerTable; | |
map { $headerTable{$_} = 1 } $subsetHeader; | |
my @intersection = grep ($headerTable{$_}, @subsets); | |
if (scalar @intersection == 0) { | |
$indicatorStr .= "0"; | |
} | |
else { | |
$indicatorStr .= "1"; | |
} | |
} | |
if (scalar @subsets > $maxEulerSetLength) { | |
$maxEulerSetLength = scalar @subsets; | |
} | |
$eulerSet = new EulerSet(\@subsets, $indicatorStr, scalar @subsets, 0); | |
$eulerSets{$indicatorStr} = $eulerSet; | |
} | |
} | |
else { | |
# parse remaining lines by joining @elements | |
my $elementIndicator = join("", @elements); | |
# increment count of euler set | |
my $count = $eulerSets{$elementIndicator}->getCount(); | |
$eulerSets{$elementIndicator}->setCount($count + 1); | |
} | |
$lineCtr++; | |
} | |
# group euler sets by length | |
my $eulerSetRefsByLength; | |
foreach my $key (sort ascendingLength keys %eulerSets) { | |
$eulerSet = $eulerSets{$key}; | |
my $length = $eulerSet->getLength(); | |
if (defined $eulerSetRefsByLength->[$length]) { | |
my $count = scalar @{ $eulerSetRefsByLength->[$length] }; | |
$eulerSetRefsByLength->[$length]->[$count] = $eulerSet; | |
} | |
else { | |
push (@{ $eulerSetRefsByLength->[$length] }, $eulerSet); | |
} | |
} | |
# sort within eulerSetByLength grouping | |
my $totalCnt = 0; | |
my $excEulerSetsRef; | |
for (my $index = 0; $index <= $maxEulerSetLength; $index++) { | |
# generate ref table | |
my %eulerSetRefs; | |
foreach my $eulerSetRef (@{$eulerSetRefsByLength->[$index]}) { | |
$eulerSetRefs{$eulerSetRef->getIndicator()} = $eulerSetRef; | |
} | |
# sort ref table in descending alphabetical key order | |
foreach my $eulerSetIndicatorKey (reverse sort keys(%eulerSetRefs)) { | |
my $ref = $eulerSetRefs{$eulerSetIndicatorKey}; | |
my $ind = $ref->getIndicator(); | |
my $cnt = $ref->getCount(); | |
$totalCnt += $cnt; | |
print STDERR "$index\t$ind\t$cnt\n"; | |
$excEulerSetsRef->[$index]->{$ind} = $cnt; | |
} | |
} | |
print STDERR "total count: $totalCnt\n"; | |
# combine scores | |
my $combEulerSetsRef; | |
for (my $index = 1; $index <= $maxEulerSetLength; $index++) { | |
my $excSetRef = $excEulerSetsRef->[$index]; | |
foreach my $excIndKey (reverse sort keys %{$excSetRef}) { | |
my $combCount = $excSetRef->{$excIndKey}; | |
my @excIndElements = split("", $excIndKey); | |
for (my $combIndex = $index + 1; $combIndex <= $maxEulerSetLength; $combIndex++) { | |
my $testSetRef = $excEulerSetsRef->[$combIndex]; | |
foreach my $testIndKey (reverse sort keys %{$testSetRef}) { | |
my @testIndElements = split("", $testIndKey); | |
my $opFlag = 0; # TRUE | |
for (my $testIndex = 0; $testIndex < scalar @testIndElements; $testIndex++) { | |
if (($excIndElements[$testIndex] == 1) && ($testIndElements[$testIndex] == 0)) { | |
$opFlag = 1; # FALSE | |
} | |
} | |
if ($opFlag == 0) { | |
$combCount += $testSetRef->{$testIndKey}; | |
} | |
} | |
} | |
$combEulerSetsRef->[$index]->{$excIndKey} = $combCount; | |
print STDERR "$index\t$excIndKey\t$combCount\n"; | |
} | |
} | |
# | |
# We now have excEulerSetsRef and combEulerSetsRef to make the runall.script | |
# | |
my @setNamesArray; | |
foreach my $header (@headers) { | |
my @setNameElements = split("\\.", $header); | |
push (@setNamesArray, $setNameElements[0]); | |
} | |
my $setNames = join(",", @setNamesArray); | |
my $plotTitle="Core__Footprint__Overlaps__($fdr)"; | |
my @setCardinalitiesArray; | |
foreach (my $combIndex = 1; $combIndex <= $maxEulerSetLength; $combIndex++) { | |
foreach my $indKey (reverse sort keys %{$combEulerSetsRef->[$combIndex]}) { | |
push (@setCardinalitiesArray, $combEulerSetsRef->[$combIndex]->{$indKey}); | |
} | |
} | |
my $setCardinalities = join(",", @setCardinalitiesArray); | |
my $setTotal = $totalCnt; | |
my $outputFilename = "$resultsDir/$fdr.overlaps.png"; | |
my $offCellColor = "gray80"; | |
my $onCellColor = "springgreen4"; | |
my @ctsCountsArray; | |
foreach my $excKey (reverse sort keys %{$excEulerSetsRef->[1]}) { | |
push (@ctsCountsArray, $excEulerSetsRef->[1]->{$excKey}); | |
} | |
my $ctsCounts = join(",", @ctsCountsArray); | |
print STDERR "\nmkdir -p $resultsDir; $eulergridScript --setNames=\"$setNames\" --plotTitle=\"$plotTitle\" --setCardinalities=$setCardinalities --setTotal=$setTotal --outputFilename=\"$outputFilename\" --offCellColor=\"$offCellColor\" --onCellColor=\"$onCellColor\" --ctsCounts=$ctsCounts\n"; | |
# cf. http://perl.plover.com/LOD/199803.html | |
sub powerset { | |
return [[]] if @_ == 0; | |
my $first = shift; | |
my $pow = &powerset; | |
[ map { [$first, @$_ ], [ @$_] } @$pow ]; | |
} | |
sub ascendingLength { | |
$eulerSets{$a}->getLength() <=> $eulerSets{$b}->getLength(); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment