cheuerde · September 2, 2016 08:54
diff --git a/EditPedFast.cpp b/EditPedFast.cpp
 // Claas Heuer, February 2016
 //
 // Note: This is copied and modified form the sources of the 
 // cran package: 'pedigreemm'
 #include <R.h>
 #include <Rdefines.h>
 #include "Rcpp.h"


 /**
 * This is a creplacement for the former R-function "getGenAncestors"
 * It takes integer pointers to the sire, dam, id and generation vectors
 * and it changes the generations directly on the passed R-object, so there is no
 * return value = void.
 */

 void calc_generation(int* sire, int* dam, int* id, int* gene, int this_id) {

 /* j is the individual (label id) we are currently working with */
    int j = this_id;
 /* integers to store the generations of dam and sire */    
    int gene_sire, gene_dam;
    
 /* If there are no parents assign 0 */
    gene[j] = 0;

 /* check first parent */ 
    if(sire[j] != NA_INTEGER) {

 /* if the parent is not a founder continue */
      if (gene[sire[j]-1] == NA_INTEGER) {

 /* we call this function again to get the generation of this parent - recursion */
 /* The function doesnt return anything, it changes 'gene' directly */
 /* we need the '-1' for the indices because R starts at 1, C at 0 */
        calc_generation(sire, dam, id, gene, sire[j]-1);
        gene_sire = 1 + gene[sire[j]-1];

      } else {
 /* if the parent is a founder my generation is 1 */
          gene_sire = 1 + gene[sire[j]-1];      

        } 
 /* we assign the generation derived from father for this individual */
 /* but this might get overriden by the dam's derived generation if it is larger */
      gene[j] = gene_sire;

    }

 /* check second parent */ 
 /* all the same as above */
    if(dam[j] != NA_INTEGER) {

      if (gene[dam[j]-1] == NA_INTEGER) {

        calc_generation(sire, dam, id, gene, dam[j]-1);
        gene_dam = 1 + gene[dam[j]-1];

      } else {

          gene_dam = 1 + gene[dam[j]-1];      

        }
 /* if the dam's derived generation is higher then override gene[j] with it */        
      if (gene_dam > gene[j])  gene[j] = gene_dam;  

    }

 }

 /**
 * This function iterates over every element of the pedigree and let
 * "calc_generation" calculate the generations. Again without a return value.
 */


 // [[Rcpp::export]]
 void GetGeneration(SEXP sire_in, SEXP dam_in, SEXP id_in, SEXP gene_in, SEXP verbose_in) {

 // get R-vectors for sire, dam, id and generation
    int *sire = INTEGER(sire_in),
 	*dam = INTEGER(dam_in),
 	*id = INTEGER(id_in),
 	*gene = INTEGER(gene_in);
 
    int verbose = *INTEGER(verbose_in);
    int n = LENGTH(sire_in);

    for(int i=0; i<n; i++) {
      
      if(verbose) Rprintf("%i \n", i); 

 // we only need to calculate the generation for non-founders, which
 // is the case when we assigned 'NA' to generation (founders have 0)
      if(gene[i] == NA_INTEGER) {

 // call the function above to get the generation for individual 'i'.
 // we pass the vectors as pointers to calc_generation()
        calc_generation(sire, dam, id, gene, i);

      }

    }

 }


diff --git a/EditPedFast.r b/EditPedFast.r
 # Claas Heuer, February 2016
 #
 # Fast version of editPed form pedigreemm package

 if (!require("pacman")) install.packages("pacman")
 pacman::p_load(Rcpp, pedigreemm)

 sourceCpp("EditPedFast.cpp")

 EditPedFast <- function(sire, dam, label, verbose = FALSE) {

 	nped <- length(sire)
 	if (nped != length(dam))  stop("sire and dam have to be of the same length")
 	if (nped != length(label)) stop("label has to be of the same length than sire and dam")
 	tmp <- unique(sort(c(as.character(sire), as.character(dam))))

 	missingP <-NULL
 	if(any(completeId <- ! tmp %in% as.character(label))) missingP <- tmp[completeId]
 	labelOl <- c(as.character(missingP),as.character(label))
 	sireOl <- c(rep(NA, times=length(missingP)),as.character(sire))
 	damOl  <- c(rep(NA, times=length(missingP)),as.character(dam))
 	sire <- as.integer(factor(sireOl, levels = labelOl))
 	dam  <- as.integer(factor(damOl, levels = labelOl))
 	nped <-length(labelOl)
 	label <-1:nped
 	sire[!is.na(sire) & (sire<1 | sire>nped)] <- NA
 	dam[!is.na(dam) & (dam < 1 | dam > nped)] <- NA
 	pede <- data.frame(id= label, sire= sire, dam= dam, gene=rep(NA, times=nped))
 	noParents <- (is.na(pede$sire) & is.na(pede$dam))
 	pede$gene[noParents] <- 0
 	pede$gene<-as.integer(pede$gene)

 	# new version that calls the C-function "get_generation"
 	# Note: there is no return value, the R-objects are directly being changed
 	GetGeneration(pede$sire, pede$dam, pede$id, pede$gene, as.integer(verbose))

 	ord<- order(pede$gene)
 	ans<-data.frame(label=labelOl, sire=sireOl, dam=damOl, gene=pede$gene,
 			stringsAsFactors =F)
 	ans[ord,]

 }
diff --git a/GetAinvFast.r b/GetAinvFast.r
 # Claas Heuer, March 2016
 #
 # Very simple improvement of pedigreemm::getAInv.
 # Replaced dense diagonal by sparse one

 GetAinvFast <- function(ped) {

 	stopifnot(is(ped, "pedigree"))    
 	T_Inv <- as(ped, "sparseMatrix")
 	D_Inv <- Diagonal(x=1/Dmat(ped))
 	aiMx<-t(T_Inv) %*% D_Inv %*% T_Inv
 	dimnames(aiMx)[[1]]<-dimnames(aiMx)[[2]] <-ped@label
 	aiMx

 }
diff --git a/GetNRM.r b/GetNRM.r
 #' @name GetNRM
 #' @title Constructing numerator relationship matrix using full pedigree or sire and MGS
 #' @author Claas Heuer
 #' @details April 2016
 #' @usage 
 #' GetNRM(ped, mgs = FALSE)
 #' @description
 #' Construct numerator relationship matrix based on regular pedigree file
 #' or using sire and maternal grandsire information.
 #' @param ped 
 #' \code{data.frame} with three columns: \code{id}, \code{sire} and \code{dam/mgs}
 #' @param mgs 
 #' Indicate whether third column of \code{ped} is the maternal grandsire rather than the dam
 #' @return Numerator Relationship Matrix as \code{dgCMatrix}
 #' @examples
 #'
 #' A <- GetNRM(ped, mgs = FALSE)

 if (!require("pacman")) install.packages("pacman")
 pacman::p_load(pedigreemm)

 source("EditPedFast.r")

 #' @export
 GetNRM <- function(ped, mgs = FALSE) {

 	if(!is.data.frame(ped)) stop("ped must be a data.frame")

 	colnames(ped)[1:3] <- c("id", "sire", "dam")
 	if(mgs) colnames(ped)[3] <- "mgs"

 	pednew <- ped

 	# if dam is replaced by mgs we expand pedigree with pseudo dams
 	if(mgs) {

 		damped <- data.frame(id = paste("PseudoDam_", 1:nrow(ped), sep = ""), 
 				     sire = ped$mgs, dam = NA, stringsAsFactors = FALSE)

 		# fill dam column with pseudodams
 		pednew <- data.frame(id = ped$id, sire = ped$sire, dam = damped$id, stringsAsFactors = FALSE)

 		# add dams to pedigree
 		pednew <- rbind(damped, pednew)

 	}

 	# add founders
 	PEDtemp <- EditPedFast(label = pednew$id, sire = pednew$sire, dam = pednew$dam)
 	PED <- pedigreemm::pedigree(label = PEDtemp$label, dam = PEDtemp$dam,
 				    sire = PEDtemp$sire)

 	# get A without pseudodams
 	L <- t(relfactor(PED))

 	if(mgs) {

 		A <- tcrossprod(L[!PED@label %in% damped$id,])
 		ids <- PED@label[!PED@label %in% damped$id]

 	} else {

 		A <- tcrossprod(L)
 		ids <- PED@label

 	}

 	A <- as(A, "dgCMatrix")
 	rownames(A) <- ids
 	colnames(A) <- ids

 	return(A)

 }
	// Claas Heuer, February 2016
	//
	// Note: This is copied and modified form the sources of the
	// cran package: 'pedigreemm'
	#include <R.h>
	#include <Rdefines.h>
	#include "Rcpp.h"


	/**
	* This is a creplacement for the former R-function "getGenAncestors"
	* It takes integer pointers to the sire, dam, id and generation vectors
	* and it changes the generations directly on the passed R-object, so there is no
	* return value = void.
	*/

	void calc_generation(int* sire, int* dam, int* id, int* gene, int this_id) {

	/* j is the individual (label id) we are currently working with */
	int j = this_id;
	/* integers to store the generations of dam and sire */
	int gene_sire, gene_dam;

	/* If there are no parents assign 0 */
	gene[j] = 0;

	/* check first parent */
	if(sire[j] != NA_INTEGER) {

	/* if the parent is not a founder continue */
	if (gene[sire[j]-1] == NA_INTEGER) {

	/* we call this function again to get the generation of this parent - recursion */
	/* The function doesnt return anything, it changes 'gene' directly */
	/* we need the '-1' for the indices because R starts at 1, C at 0 */
	calc_generation(sire, dam, id, gene, sire[j]-1);
	gene_sire = 1 + gene[sire[j]-1];

	} else {
	/* if the parent is a founder my generation is 1 */
	gene_sire = 1 + gene[sire[j]-1];

	}
	/* we assign the generation derived from father for this individual */
	/* but this might get overriden by the dam's derived generation if it is larger */
	gene[j] = gene_sire;

	}

	/* check second parent */
	/* all the same as above */
	if(dam[j] != NA_INTEGER) {

	if (gene[dam[j]-1] == NA_INTEGER) {

	calc_generation(sire, dam, id, gene, dam[j]-1);
	gene_dam = 1 + gene[dam[j]-1];

	} else {

	gene_dam = 1 + gene[dam[j]-1];

	}
	/* if the dam's derived generation is higher then override gene[j] with it */
	if (gene_dam > gene[j]) gene[j] = gene_dam;

	}

	}

	/**
	* This function iterates over every element of the pedigree and let
	* "calc_generation" calculate the generations. Again without a return value.
	*/


	// [[Rcpp::export]]
	void GetGeneration(SEXP sire_in, SEXP dam_in, SEXP id_in, SEXP gene_in, SEXP verbose_in) {

	// get R-vectors for sire, dam, id and generation
	int *sire = INTEGER(sire_in),
	*dam = INTEGER(dam_in),
	*id = INTEGER(id_in),
	*gene = INTEGER(gene_in);

	int verbose = *INTEGER(verbose_in);
	int n = LENGTH(sire_in);

	for(int i=0; i<n; i++) {

	if(verbose) Rprintf("%i \n", i);

	// we only need to calculate the generation for non-founders, which
	// is the case when we assigned 'NA' to generation (founders have 0)
	if(gene[i] == NA_INTEGER) {

	// call the function above to get the generation for individual 'i'.
	// we pass the vectors as pointers to calc_generation()
	calc_generation(sire, dam, id, gene, i);

	}

	}

	}
	# Claas Heuer, February 2016
	#
	# Fast version of editPed form pedigreemm package

	if (!require("pacman")) install.packages("pacman")
	pacman::p_load(Rcpp, pedigreemm)

	sourceCpp("EditPedFast.cpp")

	EditPedFast <- function(sire, dam, label, verbose = FALSE) {

	nped <- length(sire)
	if (nped != length(dam)) stop("sire and dam have to be of the same length")
	if (nped != length(label)) stop("label has to be of the same length than sire and dam")
	tmp <- unique(sort(c(as.character(sire), as.character(dam))))

	missingP <-NULL
	if(any(completeId <- ! tmp %in% as.character(label))) missingP <- tmp[completeId]
	labelOl <- c(as.character(missingP),as.character(label))
	sireOl <- c(rep(NA, times=length(missingP)),as.character(sire))
	damOl <- c(rep(NA, times=length(missingP)),as.character(dam))
	sire <- as.integer(factor(sireOl, levels = labelOl))
	dam <- as.integer(factor(damOl, levels = labelOl))
	nped <-length(labelOl)
	label <-1:nped
	sire[!is.na(sire) & (sire<1 \| sire>nped)] <- NA
	dam[!is.na(dam) & (dam < 1 \| dam > nped)] <- NA
	pede <- data.frame(id= label, sire= sire, dam= dam, gene=rep(NA, times=nped))
	noParents <- (is.na(pede$sire) & is.na(pede$dam))
	pede$gene[noParents] <- 0
	pede$gene<-as.integer(pede$gene)

	# new version that calls the C-function "get_generation"
	# Note: there is no return value, the R-objects are directly being changed
	GetGeneration(pede$sire, pede$dam, pede$id, pede$gene, as.integer(verbose))

	ord<- order(pede$gene)
	ans<-data.frame(label=labelOl, sire=sireOl, dam=damOl, gene=pede$gene,
	stringsAsFactors =F)
	ans[ord,]

	}
	# Claas Heuer, March 2016
	#
	# Very simple improvement of pedigreemm::getAInv.
	# Replaced dense diagonal by sparse one

	GetAinvFast <- function(ped) {

	stopifnot(is(ped, "pedigree"))
	T_Inv <- as(ped, "sparseMatrix")
	D_Inv <- Diagonal(x=1/Dmat(ped))
	aiMx<-t(T_Inv) %% D_Inv %% T_Inv
	dimnames(aiMx)[[1]]<-dimnames(aiMx)[[2]] <-ped@label
	aiMx

	}
	#' @name GetNRM
	#' @title Constructing numerator relationship matrix using full pedigree or sire and MGS
	#' @author Claas Heuer
	#' @details April 2016
	#' @usage
	#' GetNRM(ped, mgs = FALSE)
	#' @description
	#' Construct numerator relationship matrix based on regular pedigree file
	#' or using sire and maternal grandsire information.
	#' @param ped
	#' \code{data.frame} with three columns: \code{id}, \code{sire} and \code{dam/mgs}
	#' @param mgs
	#' Indicate whether third column of \code{ped} is the maternal grandsire rather than the dam
	#' @return Numerator Relationship Matrix as \code{dgCMatrix}
	#' @examples
	#'
	#' A <- GetNRM(ped, mgs = FALSE)

	if (!require("pacman")) install.packages("pacman")
	pacman::p_load(pedigreemm)

	source("EditPedFast.r")

	#' @export
	GetNRM <- function(ped, mgs = FALSE) {

	if(!is.data.frame(ped)) stop("ped must be a data.frame")

	colnames(ped)[1:3] <- c("id", "sire", "dam")
	if(mgs) colnames(ped)[3] <- "mgs"

	pednew <- ped

	# if dam is replaced by mgs we expand pedigree with pseudo dams
	if(mgs) {

	damped <- data.frame(id = paste("PseudoDam_", 1:nrow(ped), sep = ""),
	sire = ped$mgs, dam = NA, stringsAsFactors = FALSE)

	# fill dam column with pseudodams
	pednew <- data.frame(id = ped$id, sire = ped$sire, dam = damped$id, stringsAsFactors = FALSE)

	# add dams to pedigree
	pednew <- rbind(damped, pednew)

	}

	# add founders
	PEDtemp <- EditPedFast(label = pednew$id, sire = pednew$sire, dam = pednew$dam)
	PED <- pedigreemm::pedigree(label = PEDtemp$label, dam = PEDtemp$dam,
	sire = PEDtemp$sire)

	# get A without pseudodams
	L <- t(relfactor(PED))

	if(mgs) {

	A <- tcrossprod(L[!PED@label %in% damped$id,])
	ids <- PED@label[!PED@label %in% damped$id]

	} else {

	A <- tcrossprod(L)
	ids <- PED@label

	}

	A <- as(A, "dgCMatrix")
	rownames(A) <- ids
	colnames(A) <- ids

	return(A)

	}