Skip to content

Instantly share code, notes, and snippets.

@amackey
Created November 26, 2012 16:24
Show Gist options
  • Save amackey/4149141 to your computer and use it in GitHub Desktop.
Save amackey/4149141 to your computer and use it in GitHub Desktop.
simple LCA in R
# Copyright 2012 Aaron J. Mackey
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
model {
theta ~ dbeta(1,2);
for (i in 1:N) {
fp[i] ~ dbeta(1,2);
fn[i] ~ dbeta(1,2);
tp[i] <- 1-fn[i];
tn[i] <- 1-fp[i];
}
for (j in 1:M) {
margprobs[j] <- theta * (fn[1]^(1-x[j,1])) * (tp[1]^x[j,1]) *
(fn[2]^(1-x[j,2])) * (tp[2]^x[j,2]) *
(fn[3]^(1-x[j,3])) * (tp[3]^x[j,3]) *
(fn[4]^(1-x[j,4])) * (tp[4]^x[j,4])
+ (1-theta) * (tn[1]^(1-x[j,1])) * (fp[1]^x[j,1]) *
(tn[2]^(1-x[j,2])) * (fp[2]^x[j,2]) *
(tn[3]^(1-x[j,3])) * (fp[3]^x[j,3]) *
(tn[4]^(1-x[j,4])) * (fp[4]^x[j,4]);
postprobs[j] <- theta * (fn[1]^(1-x[j,1])) * (tp[1]^x[j,1]) *
(fn[2]^(1-x[j,2])) * (tp[2]^x[j,2]) *
(fn[3]^(1-x[j,3])) * (tp[3]^x[j,3]) *
(fn[4]^(1-x[j,4])) * (tp[4]^x[j,4])
/ margprobs[j];
}
counts ~ dmulti(margprobs, total);
}
# Copyright 2012 Aaron J. Mackey
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
library(R2jags)
N <- 4;
x <- as.matrix(expand.grid(c(0,1), c(0,1), c(0,1), c(0,1)));
M <- nrow(x);
counts <- c(2942808473, 17491655, 21576, 23189, 339805, 89159,
168214, 76044, 43138288, 530963, 22682, 22169,
462052, 129472, 2804257, 3454104);
total <- sum(counts);
jags.data <- c("N", "M", "x", "counts", "total");
jags.params <- c("theta", "fp", "fn", "margprobs", "postprobs");
jags.inits <- NULL;
jagsfit1 <- jags(data=jags.data, inits=jags.inits, param=jags.params, DIC=FALSE,
n.chains=1, n.iter=120000, n.thin=10, n.burnin=10000,
model.file="lca.jags")
mcmc <- as.mcmc(jagsfit1);
print(params <- summary(mcmc)$statistics[c(1:8,41),1, drop=F])
print(pp <- summary(mcmc)$statistics[25:40,1, drop=F])
print(cbind(x, pp, pp >= 0.5))
#!/usr/bin/perl -w
# Copyright 2012 Aaron J. Mackey
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# run as: (tabulateVCF.pl file1.vcf file2.vcf file3.vcf > combined.vcf) >& combined.cts
# or whatever your shell syntax is to redirect stdout to combined.vcf and stderr to combined.cts
# combined.cts is what lca.R wants as the counts array.
use strict;
use List::Util qw(max sum);
my @files = @ARGV;
my $nfiles = scalar(@files);
my %sites;
my $ctr = 0;
for my $file (@files) {
open(FILE, $file) or die $!;
while (<FILE>) {
chomp;
next if m/^#/;
my ($chr, $pos) = split(" ", $_);
$sites{$chr}->{$pos} ||= [ (0) x $nfiles ];
$sites{$chr}->{$pos}->[$ctr]++;
}
$ctr++;
}
my @counts = (0) x (2 ** $ctr);
my $sum = 0;
while (my ($chr, $chrsites) = each %sites) {
$sum += max(keys %$chrsites);
while (my ($pos, $ctrs) = each %$chrsites) {
my @status = map { $_ > 0 ? 1 : 0 } @$ctrs;
print join("\t", $chr, $pos, ".", "N", "N", ".", ".",
join(";", @status, sum(@status))
) . "\n";
my $idx = 0;
for (my $i = 0 ; $i < $nfiles ; $i++) {
if ($ctrs->[$i] > 0) {
$idx += 2 ** $i;
}
}
$counts[$idx]++;
}
}
warn "Estimated sum: $sum\n";
$counts[0] = $sum - sum(@counts[1..$#counts]);
for (my $idx = 0 ; $idx < @counts ; $idx++) {
my @labels = (0) x $nfiles;
for (my $i = 0 ; $i < @labels ; $i++) {
if ($idx & (1 << $i)) {
$labels[$i] = 1;
}
}
warn join("\t", @labels, $counts[$idx]), "\n";
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment