Last active
January 11, 2017 07:47
-
-
Save DASpringate/7804997 to your computer and use it in GitHub Desktop.
R code to produce a simulated dataset for an experiment on a made up insect. Measures include sex, body length, thorax width, number of thoracic bristles and some measure of aggression behaviour. Also there is exposure to some treatment stimulus/drug. This simulation uses Copulas to generate correlated variables from binomial, Gaussian and Poiss…
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# R code to produce a simulated dataset for an experiment on a made up insect. | |
# Measures include sex, body length, thorax width, number of thoracic bristles and some measure of aggression behaviour. | |
# Also there is exposure to some treatment stimulus/drug. | |
# This simulation uses Copulas to generate correlated variables from binomial, Gaussian and Poisson distributions | |
require(copula) | |
set.seed(1888) | |
n <- 1000 | |
# 6 variables: | |
# sex (binomial) | |
# length (Gaussian) | |
# treatment (binomial) | |
# Number of bristles on Thorax (Poisson) | |
# aggression (Stratified normal distribution) | |
# Thorax width (Gaussian) | |
## build a normal copula for the underlying distibution: | |
# `param` is the lower triangle of a correlation matrix | |
underlying <- normalCopula(dim = 6, | |
param = c(-0.4,0,-0.2,-0.5,-0.2, # sex | |
0.3,0.05,0.5,0.7, # length | |
0.4,0.6,0.3, # treatment | |
0.3,0.1, # bristles | |
0.5), # aggression | |
dispstr = "un") # The Copula | |
## build the marginal distibutions from the underlying copula | |
marginals <- mvdc(copula = underlying, | |
margins = c("binom", "norm", "binom", "pois", "norm", "norm"), | |
paramMargins = list(list(size = 1, prob = 0.5), # parameters for the marginal distributions... | |
list(mean = 30, sd = 13), | |
list(size = 1, prob = 0.5), | |
list( lambda = 5), | |
list(mean = 0, sd = 1), | |
list(mean = 20, sd = 4))) | |
## generate the correlated random variables from the marginal distributions | |
dat <- as.data.frame(rMvdc(n, marginals)) | |
## rename the variables | |
names(dat) <- c("sex", "length", "treatment", "bristles", "agg_base", "width") | |
## Stratify the aggression variable | |
dat$aggression <- cut(round(dat$agg_base, 5), | |
seq(from=min(dat$agg_base), to = max(dat$agg_base),length.out = 6)) | |
levels(dat$aggression) <- paste0("aggress_",1:5) | |
# convert others to factors and clean up | |
dat$aggression <- ordered(dat$aggression) | |
dat$sex <- factor(dat$sex) | |
levels(dat$sex) <- c("Male", "Female") | |
dat$treatment <- factor(dat$treatment) | |
levels(dat$treatment) <- c("Control", "Treatment") | |
dat$agg_base <- NULL | |
dat <- dat[complete.cases(dat),] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment