Last active
May 21, 2022 17:04
-
-
Save CnrLwlss/989be615f28456c2f89e655ebaa23567 to your computer and use it in GitHub Desktop.
Investigating linear regression classification of OXPHOS data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library("MASS") | |
# Generate some synthetic (NPC-corrected) control data | |
N_ctrl = 666 | |
sigma_ctrl = matrix(c(2,3,3,6),nrow=2,ncol=2) | |
ctrl = data.frame(mvrnorm(n = N_ctrl, mu=c(5,7.5), sigma_ctrl),stringsAsFactors=FALSE) | |
colnames(ctrl) = c("VDAC1","NDUFB8") | |
# Generate some synthetic (NPC-corrected) patient data | |
# This dataset represents a patient with a nuclear-encoded defect in CI | |
# i.e. all fibres are in the same OXPHOS state | |
N_pat = 1000 | |
sigma_pat = matrix(c(5,0,0,0.1),nrow=2,ncol=2) | |
pat = data.frame(mvrnorm(n = N_pat, mu=c(7.5,1.5), sigma_pat),stringsAsFactors=FALSE) | |
colnames(pat) = c("VDAC1","NDUFB8") | |
# Generate a 2Dmito plot to classify fibres by eye | |
plot(ctrl$VDAC1,ctrl$NDUFB8,xlab="VDAC1",ylab="NDUFB8",pch=16,col=rgb(0,0,0,0.1),xlim=c(0,15),ylim=c(0,15)) | |
points(pat$VDAC1,pat$NDUFB8,pch=16,col=rgb(1,0,0,0.1)) | |
# Use linear regression and 95% interval to classify fibres automatically, based on control data | |
# Make a synthetic dataset that spans the range of "real" data for plotting purposes only | |
synthetic = data.frame("VDAC1" = seq(0,20,0.1)) | |
mod = lm(NDUFB8~VDAC1,data=ctrl) | |
pred_syn = predict.lm(mod,newdata = synthetic,se.fit=TRUE, interval = "prediction",na.action=na.omit, level=0.95)$fit | |
# Generates classification vectors pisd standard deviations away from the control data | |
# These will become solid and dashed lines in plots below | |
sds_from_controls = function(pred,pisd=qnorm(0.975)) { | |
res = list() | |
res$mid = pred[,'fit'] | |
res$psd = pisd*(pred[,'upr'] - pred[,'lwr'])/(2*qnorm(0.975)) | |
res$up = res$mid + res$psd | |
res$low = res$mid - res$psd | |
return(res) | |
} | |
# Calculate classification vectors using the Rocha et al. cutoffs | |
cutoffs = lapply(c(3,6,9,12),sds_from_controls,pred=pred_syn) | |
# Plotting the Rocha et al. cutoffs on 2Dmito plot, can see that patient data | |
# is sliced into 5 categories, despite all being simulated as belonging to a single "deficient" category | |
points(synthetic$VDAC1,cutoffs[[1]]$mid,lwd=3,type="l") | |
for(cutoff in cutoffs){ | |
points(synthetic$VDAC1,cutoff$low,lwd=2,type="l",lty=2) | |
points(synthetic$VDAC1,cutoff$up,lwd=2,type="l",lty=2) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment