Skip to content

Instantly share code, notes, and snippets.

@CnrLwlss
Last active May 21, 2022 17:04
Show Gist options
  • Save CnrLwlss/989be615f28456c2f89e655ebaa23567 to your computer and use it in GitHub Desktop.
Save CnrLwlss/989be615f28456c2f89e655ebaa23567 to your computer and use it in GitHub Desktop.
Investigating linear regression classification of OXPHOS data
library("MASS")
# Generate some synthetic (NPC-corrected) control data
N_ctrl = 666
sigma_ctrl = matrix(c(2,3,3,6),nrow=2,ncol=2)
ctrl = data.frame(mvrnorm(n = N_ctrl, mu=c(5,7.5), sigma_ctrl),stringsAsFactors=FALSE)
colnames(ctrl) = c("VDAC1","NDUFB8")
# Generate some synthetic (NPC-corrected) patient data
# This dataset represents a patient with a nuclear-encoded defect in CI
# i.e. all fibres are in the same OXPHOS state
N_pat = 1000
sigma_pat = matrix(c(5,0,0,0.1),nrow=2,ncol=2)
pat = data.frame(mvrnorm(n = N_pat, mu=c(7.5,1.5), sigma_pat),stringsAsFactors=FALSE)
colnames(pat) = c("VDAC1","NDUFB8")
# Generate a 2Dmito plot to classify fibres by eye
plot(ctrl$VDAC1,ctrl$NDUFB8,xlab="VDAC1",ylab="NDUFB8",pch=16,col=rgb(0,0,0,0.1),xlim=c(0,15),ylim=c(0,15))
points(pat$VDAC1,pat$NDUFB8,pch=16,col=rgb(1,0,0,0.1))
# Use linear regression and 95% interval to classify fibres automatically, based on control data
# Make a synthetic dataset that spans the range of "real" data for plotting purposes only
synthetic = data.frame("VDAC1" = seq(0,20,0.1))
mod = lm(NDUFB8~VDAC1,data=ctrl)
pred_syn = predict.lm(mod,newdata = synthetic,se.fit=TRUE, interval = "prediction",na.action=na.omit, level=0.95)$fit
# Generates classification vectors pisd standard deviations away from the control data
# These will become solid and dashed lines in plots below
sds_from_controls = function(pred,pisd=qnorm(0.975)) {
res = list()
res$mid = pred[,'fit']
res$psd = pisd*(pred[,'upr'] - pred[,'lwr'])/(2*qnorm(0.975))
res$up = res$mid + res$psd
res$low = res$mid - res$psd
return(res)
}
# Calculate classification vectors using the Rocha et al. cutoffs
cutoffs = lapply(c(3,6,9,12),sds_from_controls,pred=pred_syn)
# Plotting the Rocha et al. cutoffs on 2Dmito plot, can see that patient data
# is sliced into 5 categories, despite all being simulated as belonging to a single "deficient" category
points(synthetic$VDAC1,cutoffs[[1]]$mid,lwd=3,type="l")
for(cutoff in cutoffs){
points(synthetic$VDAC1,cutoff$low,lwd=2,type="l",lty=2)
points(synthetic$VDAC1,cutoff$up,lwd=2,type="l",lty=2)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment