-
-
Save AngelBerihuete/c8637ad704f353bd79ec8a6feea22737 to your computer and use it in GitHub Desktop.
Modelling dependence with copulas. Full article at: http://datascienceplus.com/modelling-dependence-with-copulas/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Load library mass and set seed | |
library(MASS) | |
set.seed(100) | |
# We are going to use 3 random variables | |
m <- 3 | |
# Number of samples to be drawn | |
n <- 2000 | |
# Covariance matrix | |
sigma <- matrix(c(1, 0.4, 0.2, | |
0.4, 1, -0.8, | |
0.2, -0.8, 1), | |
nrow=3) | |
# Random samples | |
z <- mvrnorm(n,mu=rep(0, m),Sigma=sigma,empirical=T) | |
# Pairplot library | |
library(psych) | |
# Correlation calculation | |
cor(z,method='spearman') | |
# Pairplot | |
pairs.panels(z) | |
# Transform samples into uniform data within [0,1] | |
u <- pnorm(z) | |
pairs.panels(u) | |
# 3D plot library | |
library(rgl) | |
# 3D plot | |
plot3d(u[,1],u[,2],u[,3],pch=20,col='navyblue') | |
# Apply the inverse of the selected marginals | |
x1 <- qgamma(u[,1],shape=2,scale=1) | |
x2 <- qbeta(u[,2],2,2) | |
x3 <- qt(u[,3],df=5) | |
#3D plot | |
plot3d(x1,x2,x3,pch=20,col='blue') | |
# Check correlation | |
df <- cbind(x1,x2,x3) | |
pairs.panels(df) | |
cor(df,meth='spearman') | |
# Shorten the steps using the copula package | |
library(copula) | |
set.seed(100) | |
myCop <- normalCopula(param=c(0.4,0.2,-0.8), dim = 3, dispstr = "un") | |
myMvd <- mvdc(copula=myCop, margins=c("gamma", "beta", "t"), | |
paramMargins=list(list(shape=2, scale=1), | |
list(shape1=2, shape2=2), | |
list(df=5)) ) | |
# Generate random samples | |
Z2 <- rmvdc(myMvd, 2000) | |
colnames(Z2) <- c("x1", "x2", "x3") | |
pairs.panels(Z2) | |
#------------------------------------------------------------------------------------------------------------------------------ | |
# Example with real data | |
# Load data | |
cree <- read.csv('cree_r.csv',header=F)$V2 | |
yahoo <- read.csv('yahoo_r.csv',header=F)$V2 | |
# Plot and correlation checks | |
plot(cree,yahoo,pch='.') | |
abline(lm(yahoo~cree),col='red',lwd=1) | |
cor(cree,yahoo,method='spearman') | |
# Select the copula to be used | |
library(VineCopula) | |
u <- pobs(as.matrix(cbind(cree,yahoo)))[,1] | |
v <- pobs(as.matrix(cbind(cree,yahoo)))[,2] | |
selectedCopula <- BiCopSelect(u,v,familyset=NA) | |
selectedCopula | |
# Fit a t-copula | |
t.cop <- tCopula(dim=2) | |
set.seed(500) | |
m <- pobs(as.matrix(cbind(cree,yahoo))) | |
fit <- fitCopula(t.cop,m,method='ml') | |
coef(fit) | |
# Set the parameters | |
rho <- coef(fit)[1] | |
df <- coef(fit)[2] | |
# Plot the density | |
persp(tCopula(dim=2,rho,df=df),dCopula) | |
# Sample random observation from the copula | |
u <- rCopula(3965,tCopula(dim=2,rho,df=df)) | |
plot(u[,1],u[,2],pch='.',col='blue') | |
cor(u,method='spearman') | |
# Estimate marginals parameters | |
cree_mu <- mean(cree) | |
cree_sd <- sd(cree) | |
yahoo_mu <- mean(yahoo) | |
yahoo_sd <- sd(yahoo) | |
# Plot the histograms of the marginals and the fitted lines | |
hist(cree,breaks=80,main='Cree returns',freq=F,density=30,col='cyan',ylim=c(0,20),xlim=c(-0.2,0.3)) | |
lines(seq(-0.5,0.5,0.01),dnorm(seq(-0.5,0.5,0.01),cree_mu,cree_sd),col='red',lwd=2) | |
legend('topright',c('Fitted normal'),col=c('red'),lwd=2) | |
hist(yahoo,breaks=80,main='Yahoo returns',density=30,col='cyan',freq=F,ylim=c(0,20),xlim=c(-0.2,0.2)) | |
lines(seq(-0.5,0.5,0.01),dnorm(seq(-0.5,0.5,0.01),yahoo_mu,yahoo_sd),col='red',lwd=2) | |
legend('topright',c('Fitted normal'),col=c('red'),lwd=2) | |
# Build the distribution from the copula | |
copula_dist <- mvdc(copula=tCopula(rho,dim=2,df=df), margins=c("norm","norm"), | |
paramMargins=list(list(mean=cree_mu, sd=cree_sd), | |
list(mean=yahoo_mu, sd=yahoo_sd))) | |
# Sample from the distribution | |
sim <- rmvdc(copula_dist, 3965) | |
# Compare observed and simulated values | |
plot(cree,yahoo,main='Returns') | |
points(sim[,1],sim[,2],col='red') | |
legend('bottomright',c('Observed','Simulated'),col=c('black','red'),pch=21) | |
# Take a look at what would have happened for df=1 and df=8 | |
set.seed(4258) | |
copula_dist <- mvdc(copula=tCopula(rho,dim=2,df=1), margins=c("norm","norm"), | |
paramMargins=list(list(mean=cree_mu, sd=cree_sd), | |
list(mean=yahoo_mu, sd=yahoo_sd))) | |
sim <- rmvdc(copula_dist, 3965) | |
plot(cree,yahoo,main='Returns') | |
points(sim[,1],sim[,2],col='red') | |
legend('bottomright',c('Observed','Simulated df=1'),col=c('black','red'),pch=21) | |
cor(sim[,1],sim[,2],method='spearman') | |
copula_dist <- mvdc(copula=tCopula(rho,dim=2,df=8), margins=c("norm","norm"), | |
paramMargins=list(list(mean=cree_mu, sd=cree_sd), | |
list(mean=yahoo_mu, sd=yahoo_sd))) | |
sim <- rmvdc(copula_dist, 3965) | |
plot(cree,yahoo,main='Returns') | |
points(sim[,1],sim[,2],col='red') | |
legend('bottomright',c('Observed','Simulated df=8'),col=c('black','red'),pch=21) | |
cor(sim[,1],sim[,2],method='spearman') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment