Last active
August 10, 2022 10:06
-
-
Save vjcitn/0e68fe63cac0695b14e194294a51417b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#' produce matrices of allele calls from VcfFile instance for a given genomic interval | |
#' @param vf instance of VcfFile, should be tabix indexed | |
#' @param rng compatible GRanges instance | |
#' @param genome character(1) obligatory for readVcf | |
#' @param pat1 character(1) gsub regexp to isolate first allele code (might need to have | |
#' / instead of | if unphased) | |
#' @param pat2 character(1) gsub regexp to isolate second allele code (might need to have | |
#' / instead of | if unphased) | |
#' x = VariantAnnotation::VcfFile("ALL.chr22.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz") | |
#' param = GRanges("22", IRanges(16e6, width=200000)) | |
#' m = allele_mats(x, param) | |
#' table(m$allele1[10,]) | |
#' # <CN0> <CN2> <CN3> <CN4> A | |
#' # 3 38 300 8 2155 | |
#' table(m$allele2[10,]) | |
#' @export | |
allele_mats = function(vf, rng, genome="hg19", pat1="\\|.*", pat2=".*\\|(*)") { | |
z = readVcf(vf, genome=genome, param=rng) # import a slice | |
az = alt(z) | |
rz = as.character(ref(z)) # ref has length 1 | |
# | |
# build a function that will map from VCF 0...codes to alt/ref codes | |
# | |
alleles = lapply(1:length(rz), function(i) c(rz[i], as.character(az[[i]]))) | |
decode = function(inds) alleles[[inds[1]]][inds[-1]+1] # add a column of rownums to a1 or a2 before applying | |
# | |
# get codes for alleles 1 and 2 | |
# | |
gt = geno(z)$GT | |
agt = as.character(gt) | |
a1 = as.numeric(gsub(pat1, "", agt)) | |
a1 = matrix(a1, nrow=nrow(gt)) | |
a2 = as.numeric(gsub(pat2, "\\1", agt)) | |
a2 = matrix(a2, nrow=nrow(gt)) | |
# | |
# decode | |
# | |
a1 = cbind(1:nrow(a1), a1) | |
da1 = t(apply(a1,1,decode)) | |
a1 = a1[,-1] | |
a2 = cbind(1:nrow(a2), a2) | |
da2 = t(apply(a2,1,decode)) | |
a2 = a2[,-1] | |
dimnames(da1) = dimnames(da2) = dimnames(gt) | |
list(allele1=da1, allele2=da2) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment