Last active
September 2, 2016 08:54
-
-
Save cheuerde/a50ee1fd315b3732a30f7e96e37c1afc to your computer and use it in GitHub Desktop.
Numerator Relationship Matrix for normal pedigrees and paternal pedigrees only
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Claas Heuer, February 2016 | |
// | |
// Note: This is copied and modified form the sources of the | |
// cran package: 'pedigreemm' | |
#include <R.h> | |
#include <Rdefines.h> | |
#include "Rcpp.h" | |
/** | |
* This is a creplacement for the former R-function "getGenAncestors" | |
* It takes integer pointers to the sire, dam, id and generation vectors | |
* and it changes the generations directly on the passed R-object, so there is no | |
* return value = void. | |
*/ | |
void calc_generation(int* sire, int* dam, int* id, int* gene, int this_id) { | |
/* j is the individual (label id) we are currently working with */ | |
int j = this_id; | |
/* integers to store the generations of dam and sire */ | |
int gene_sire, gene_dam; | |
/* If there are no parents assign 0 */ | |
gene[j] = 0; | |
/* check first parent */ | |
if(sire[j] != NA_INTEGER) { | |
/* if the parent is not a founder continue */ | |
if (gene[sire[j]-1] == NA_INTEGER) { | |
/* we call this function again to get the generation of this parent - recursion */ | |
/* The function doesnt return anything, it changes 'gene' directly */ | |
/* we need the '-1' for the indices because R starts at 1, C at 0 */ | |
calc_generation(sire, dam, id, gene, sire[j]-1); | |
gene_sire = 1 + gene[sire[j]-1]; | |
} else { | |
/* if the parent is a founder my generation is 1 */ | |
gene_sire = 1 + gene[sire[j]-1]; | |
} | |
/* we assign the generation derived from father for this individual */ | |
/* but this might get overriden by the dam's derived generation if it is larger */ | |
gene[j] = gene_sire; | |
} | |
/* check second parent */ | |
/* all the same as above */ | |
if(dam[j] != NA_INTEGER) { | |
if (gene[dam[j]-1] == NA_INTEGER) { | |
calc_generation(sire, dam, id, gene, dam[j]-1); | |
gene_dam = 1 + gene[dam[j]-1]; | |
} else { | |
gene_dam = 1 + gene[dam[j]-1]; | |
} | |
/* if the dam's derived generation is higher then override gene[j] with it */ | |
if (gene_dam > gene[j]) gene[j] = gene_dam; | |
} | |
} | |
/** | |
* This function iterates over every element of the pedigree and let | |
* "calc_generation" calculate the generations. Again without a return value. | |
*/ | |
// [[Rcpp::export]] | |
void GetGeneration(SEXP sire_in, SEXP dam_in, SEXP id_in, SEXP gene_in, SEXP verbose_in) { | |
// get R-vectors for sire, dam, id and generation | |
int *sire = INTEGER(sire_in), | |
*dam = INTEGER(dam_in), | |
*id = INTEGER(id_in), | |
*gene = INTEGER(gene_in); | |
int verbose = *INTEGER(verbose_in); | |
int n = LENGTH(sire_in); | |
for(int i=0; i<n; i++) { | |
if(verbose) Rprintf("%i \n", i); | |
// we only need to calculate the generation for non-founders, which | |
// is the case when we assigned 'NA' to generation (founders have 0) | |
if(gene[i] == NA_INTEGER) { | |
// call the function above to get the generation for individual 'i'. | |
// we pass the vectors as pointers to calc_generation() | |
calc_generation(sire, dam, id, gene, i); | |
} | |
} | |
} | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Claas Heuer, February 2016 | |
# | |
# Fast version of editPed form pedigreemm package | |
if (!require("pacman")) install.packages("pacman") | |
pacman::p_load(Rcpp, pedigreemm) | |
sourceCpp("EditPedFast.cpp") | |
EditPedFast <- function(sire, dam, label, verbose = FALSE) { | |
nped <- length(sire) | |
if (nped != length(dam)) stop("sire and dam have to be of the same length") | |
if (nped != length(label)) stop("label has to be of the same length than sire and dam") | |
tmp <- unique(sort(c(as.character(sire), as.character(dam)))) | |
missingP <-NULL | |
if(any(completeId <- ! tmp %in% as.character(label))) missingP <- tmp[completeId] | |
labelOl <- c(as.character(missingP),as.character(label)) | |
sireOl <- c(rep(NA, times=length(missingP)),as.character(sire)) | |
damOl <- c(rep(NA, times=length(missingP)),as.character(dam)) | |
sire <- as.integer(factor(sireOl, levels = labelOl)) | |
dam <- as.integer(factor(damOl, levels = labelOl)) | |
nped <-length(labelOl) | |
label <-1:nped | |
sire[!is.na(sire) & (sire<1 | sire>nped)] <- NA | |
dam[!is.na(dam) & (dam < 1 | dam > nped)] <- NA | |
pede <- data.frame(id= label, sire= sire, dam= dam, gene=rep(NA, times=nped)) | |
noParents <- (is.na(pede$sire) & is.na(pede$dam)) | |
pede$gene[noParents] <- 0 | |
pede$gene<-as.integer(pede$gene) | |
# new version that calls the C-function "get_generation" | |
# Note: there is no return value, the R-objects are directly being changed | |
GetGeneration(pede$sire, pede$dam, pede$id, pede$gene, as.integer(verbose)) | |
ord<- order(pede$gene) | |
ans<-data.frame(label=labelOl, sire=sireOl, dam=damOl, gene=pede$gene, | |
stringsAsFactors =F) | |
ans[ord,] | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Claas Heuer, March 2016 | |
# | |
# Very simple improvement of pedigreemm::getAInv. | |
# Replaced dense diagonal by sparse one | |
GetAinvFast <- function(ped) { | |
stopifnot(is(ped, "pedigree")) | |
T_Inv <- as(ped, "sparseMatrix") | |
D_Inv <- Diagonal(x=1/Dmat(ped)) | |
aiMx<-t(T_Inv) %*% D_Inv %*% T_Inv | |
dimnames(aiMx)[[1]]<-dimnames(aiMx)[[2]] <-ped@label | |
aiMx | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#' @name GetNRM | |
#' @title Constructing numerator relationship matrix using full pedigree or sire and MGS | |
#' @author Claas Heuer | |
#' @details April 2016 | |
#' @usage | |
#' GetNRM(ped, mgs = FALSE) | |
#' @description | |
#' Construct numerator relationship matrix based on regular pedigree file | |
#' or using sire and maternal grandsire information. | |
#' @param ped | |
#' \code{data.frame} with three columns: \code{id}, \code{sire} and \code{dam/mgs} | |
#' @param mgs | |
#' Indicate whether third column of \code{ped} is the maternal grandsire rather than the dam | |
#' @return Numerator Relationship Matrix as \code{dgCMatrix} | |
#' @examples | |
#' | |
#' A <- GetNRM(ped, mgs = FALSE) | |
if (!require("pacman")) install.packages("pacman") | |
pacman::p_load(pedigreemm) | |
source("EditPedFast.r") | |
#' @export | |
GetNRM <- function(ped, mgs = FALSE) { | |
if(!is.data.frame(ped)) stop("ped must be a data.frame") | |
colnames(ped)[1:3] <- c("id", "sire", "dam") | |
if(mgs) colnames(ped)[3] <- "mgs" | |
pednew <- ped | |
# if dam is replaced by mgs we expand pedigree with pseudo dams | |
if(mgs) { | |
damped <- data.frame(id = paste("PseudoDam_", 1:nrow(ped), sep = ""), | |
sire = ped$mgs, dam = NA, stringsAsFactors = FALSE) | |
# fill dam column with pseudodams | |
pednew <- data.frame(id = ped$id, sire = ped$sire, dam = damped$id, stringsAsFactors = FALSE) | |
# add dams to pedigree | |
pednew <- rbind(damped, pednew) | |
} | |
# add founders | |
PEDtemp <- EditPedFast(label = pednew$id, sire = pednew$sire, dam = pednew$dam) | |
PED <- pedigreemm::pedigree(label = PEDtemp$label, dam = PEDtemp$dam, | |
sire = PEDtemp$sire) | |
# get A without pseudodams | |
L <- t(relfactor(PED)) | |
if(mgs) { | |
A <- tcrossprod(L[!PED@label %in% damped$id,]) | |
ids <- PED@label[!PED@label %in% damped$id] | |
} else { | |
A <- tcrossprod(L) | |
ids <- PED@label | |
} | |
A <- as(A, "dgCMatrix") | |
rownames(A) <- ids | |
colnames(A) <- ids | |
return(A) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment