Skip to content

Instantly share code, notes, and snippets.

@MJacobs1985
Created November 9, 2022 16:16
Show Gist options
  • Save MJacobs1985/e0a49e0c143f4e8c2d38cc9466c030a5 to your computer and use it in GitHub Desktop.
Save MJacobs1985/e0a49e0c143f4e8c2d38cc9466c030a5 to your computer and use it in GitHub Desktop.
## Clean all the datasets, a lot of duplicates
Severity_BPI<-Severity_BPI[!duplicated(Severity_BPI[c(1:5)]),];head(Severity_BPI)
Interference_BPI<-Interference_BPI[!duplicated(Interference_BPI[c(1:5)]),];head(Interference_BPI)
PF_QOL<-PF_QOL[!duplicated(PF_QOL[c(1,2,5:7)]),];head(PF_QOL)
EF_QOL<-EF_QOL[!duplicated(EF_QOL[c(1,2,3:5)]),];head(EF_QOL)
FA_QOL<-FA_QOL[!duplicated(FA_QOL[c(1,2,3:5)]),];head(FA_QOL)
NV_QOL<-NV_QOL[!duplicated(NV_QOL[c(1,2,3:5)]),];head(NV_QOL)
PA_QOL<-PA_QOL[!duplicated(PA_QOL[c(1,2,5:5)]),];head(PA_QOL)
DY_QOL<-DY_QOL[!duplicated(DY_QOL[c(1,2,5:5)]),];head(DY_QOL)
SL_QOL<-SL_QOL[!duplicated(SL_QOL[c(1,2,5:5)]),];head(SL_QOL)
AP_QOL<-AP_QOL[!duplicated(AP_QOL[c(1,2,5:5)]),];head(AP_QOL)
CO_QOL<-CO_QOL[!duplicated(CO_QOL[c(1,2,5:5)]),];head(CO_QOL)
Q12_QOL<-Q12_QOL[!duplicated(Q12_QOL[c(1,2,5:7)]),];head(Q12_QOL)
## Combine all of the above - can only merge 2 data frames at the same time
names(pancreas)
demograph <- pancreas[,c(1:33,312:317)];head(demograph)
m1 <- merge(Severity_BPI[,c(1:5)],
Interference_BPI[,c(1:5)],
by=c("Patientnr","time"));head(Severity_BPI);head(Interference_BPI);head(m1)
m2 <- merge(m1,
BPI_6[,c(1,3:6)],
by=c("Patientnr","time"));head(m1);head(BPI_6[,c(1,3:6)]);head(m2)
m3 <- merge(m2,PF_QOL[,c(1,2,5:7)], by=c("Patientnr","time"));head(m2);head(m3)
m4 <- merge(m3,EF_QOL, by=c("Patientnr","time"));head(m3);head(m4)
m5 <- merge(m4,FA_QOL[,c(1,2,3)], by=c("Patientnr","time"));head(m4);head(m5)
m6 <- merge(m5,NV_QOL[,c(1,2,5:7)], by=c("Patientnr","time"));head(m5);head(m6)
m7 <- merge(m6,PA_QOL[,c(1,2,5:7)], by=c("Patientnr","time"));head(m6);head(m7)
m8 <- merge(m7,DY_QOL[,c(1,2,5:7)], by=c("Patientnr","time"));head(m7);head(m8)
m9 <- merge(m8,SL_QOL[,c(1,2,5:7)], by=c("Patientnr","time"));head(m8);head(m9)
m10 <- merge(m9,AP_QOL[,c(1,2,5:7)], by=c("Patientnr","time"));head(m9);head(m10)
m11 <- merge(m10,CO_QOL[,c(1,2,5:7)], by=c("Patientnr","time"));head(m10);head(m11)
m12 <- merge(m11,Q12_QOL[,c(1,2,5:7)], by=c("Patientnr","time"));head(m11);head(m12)
total <- merge(m12,demograph,by="Patientnr");head(m12);head(total)
str(total)
dim(total)
## Rename columns of Total dataset
setnames(total,
old = c('value','pf2','ef','q15','fa','nv','sl','ap','co','pa','dy'),
new = c('Relief','Physical_Functioning',
'Emotional_Functioning',
'Overall_QOL',
'Fatigue',
'Nausea_Vomiting',
'Insomnia',
'Appetite_loss',
'Constipation',
'Pain','Dyspnoe'))
dim(total)
str(total)
DataExplorer::plot_missing(total)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment