Created
April 16, 2018 09:56
-
-
Save kambala-decapitator/94d50d86f35b42dc9db0fc9be887bded to your computer and use it in GitHub Desktop.
Batch processing of health data using QRISK®2-2015 algorithm
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <fstream> | |
#include <sstream> | |
#include <vector> | |
#include <cmath> | |
#include <cstring> | |
using std::string; | |
// source: https://qrisk.org/2015/QRISK2-2015-lgpl-source.tgz | |
/*static*/ double cvd_male_raw( | |
int age,int b_AF,int b_ra,int b_renal,int b_treatedhyp,int b_type1,int b_type2,double bmi,int ethrisk,int fh_cvd,double rati,double sbp,int smoke_cat,int surv,double town | |
) | |
{ | |
surv = 10; | |
double survivor[16] = { | |
0, | |
0, | |
0, | |
0, | |
0, | |
0, | |
0, | |
0, | |
0, | |
0, | |
0.978794217109680, | |
0, | |
0, | |
0, | |
0, | |
0 | |
}; | |
/* The conditional arrays */ | |
double Iethrisk[10] = { | |
0, | |
0, | |
0.3173321430481919100000000, | |
0.4738590786081115500000000, | |
0.5171314655968145500000000, | |
0.1370301157366419200000000, | |
-0.3885522304972663900000000, | |
-0.3812495485312194500000000, | |
-0.4064461381650994500000000, | |
-0.2285715521377336100000000 | |
}; | |
double Ismoke[5] = { | |
0, | |
0.2684479158158020200000000, | |
0.6307674973877591700000000, | |
0.7178078883378695700000000, | |
0.8704172533465485100000000 | |
}; | |
/* Applying the fractional polynomial transforms */ | |
/* (which includes scaling) */ | |
double dage = age; | |
dage=dage/10; | |
double age_1 = pow(dage,-1); | |
double age_2 = pow(dage,2); | |
double dbmi = bmi; | |
dbmi=dbmi/10; | |
double bmi_2 = pow(dbmi,-2)*log(dbmi); | |
double bmi_1 = pow(dbmi,-2); | |
/* Centring the continuous variables */ | |
age_1 = age_1 - 0.233734160661697; | |
age_2 = age_2 - 18.304403305053711; | |
bmi_1 = bmi_1 - 0.146269768476486; | |
bmi_2 = bmi_2 - 0.140587374567986; | |
rati = rati - 4.321151256561279; | |
sbp = sbp - 130.589752197265620; | |
town = town - 0.551009356975555; | |
/* Start of Sum */ | |
double a=0; | |
/* The conditional sums */ | |
a += Iethrisk[ethrisk]; | |
a += Ismoke[smoke_cat]; | |
/* Sum from continuous values */ | |
a += age_1 * -18.0437312550377270000000000; | |
a += age_2 * 0.0236486454254306940000000; | |
a += bmi_1 * 2.5388084343581578000000000; | |
a += bmi_2 * -9.1034725871528597000000000; | |
a += rati * 0.1684397636136909500000000; | |
a += sbp * 0.0105003089380754820000000; | |
a += town * 0.0323801637634487590000000; | |
/* Sum from boolean values */ | |
a += b_AF * 1.0363048000259454000000000; | |
a += b_ra * 0.2519953134791012600000000; | |
a += b_renal * 0.8359352886995286000000000; | |
a += b_treatedhyp * 0.6603459695917862600000000; | |
a += b_type1 * 1.3309170433446138000000000; | |
a += b_type2 * 0.9454348892774417900000000; | |
a += fh_cvd * 0.5986037897136281500000000; | |
/* Sum from interaction terms */ | |
a += age_1 * (smoke_cat==1) * 0.6186864699379683900000000; | |
a += age_1 * (smoke_cat==2) * 1.5522017055600055000000000; | |
a += age_1 * (smoke_cat==3) * 2.4407210657517648000000000; | |
a += age_1 * (smoke_cat==4) * 3.5140494491884624000000000; | |
a += age_1 * b_AF * 8.0382925558108482000000000; | |
a += age_1 * b_renal * -1.6389521229064483000000000; | |
a += age_1 * b_treatedhyp * 8.4621771382346651000000000; | |
a += age_1 * b_type1 * 5.4977016563835504000000000; | |
a += age_1 * b_type2 * 3.3974747488766690000000000; | |
a += age_1 * bmi_1 * 33.8489881012767600000000000; | |
a += age_1 * bmi_2 * -140.6707025404897100000000000; | |
a += age_1 * fh_cvd * 2.0858333154353321000000000; | |
a += age_1 * sbp * 0.0501283668830720540000000; | |
a += age_1 * town * -0.1988268217186850700000000; | |
a += age_2 * (smoke_cat==1) * -0.0040893975066796338000000; | |
a += age_2 * (smoke_cat==2) * -0.0056065852346001768000000; | |
a += age_2 * (smoke_cat==3) * -0.0018261006189440492000000; | |
a += age_2 * (smoke_cat==4) * -0.0014997157296173290000000; | |
a += age_2 * b_AF * 0.0052471594895864343000000; | |
a += age_2 * b_renal * -0.0179663586193546390000000; | |
a += age_2 * b_treatedhyp * 0.0092088445323379176000000; | |
a += age_2 * b_type1 * 0.0047493510223424558000000; | |
a += age_2 * b_type2 * -0.0048113775783491563000000; | |
a += age_2 * bmi_1 * 0.0627410757513945650000000; | |
a += age_2 * bmi_2 * -0.2382914909385732100000000; | |
a += age_2 * fh_cvd * -0.0049971149213281010000000; | |
a += age_2 * sbp * -0.0000523700987951435090000; | |
a += age_2 * town * -0.0012518116569283104000000; | |
/* Calculate the score itself */ | |
double score = 100.0 * (1 - pow(survivor[surv], exp(a)) ); | |
return score; | |
} | |
/*static*/ double cvd_female_raw( | |
int age,int b_AF,int b_ra,int b_renal,int b_treatedhyp,int b_type1,int b_type2,double bmi,int ethrisk,int fh_cvd,double rati,double sbp,int smoke_cat,int surv,double town | |
) | |
{ | |
surv = 10; | |
double survivor[16] = { | |
0, | |
0, | |
0, | |
0, | |
0, | |
0, | |
0, | |
0, | |
0, | |
0, | |
0.989747583866119, | |
0, | |
0, | |
0, | |
0, | |
0 | |
}; | |
/* The conditional arrays */ | |
double Iethrisk[10] = { | |
0, | |
0, | |
0.2574099349831925900000000, | |
0.6129795430571779400000000, | |
0.3362159841669621300000000, | |
0.1512517303224336400000000, | |
-0.1794156259657768100000000, | |
-0.3503423610057745400000000, | |
-0.2778372483233216800000000, | |
-0.1592734122665366000000000 | |
}; | |
double Ismoke[5] = { | |
0, | |
0.2119377108760385200000000, | |
0.6618634379685941500000000, | |
0.7570714587132305600000000, | |
0.9496298251457036000000000 | |
}; | |
/* Applying the fractional polynomial transforms */ | |
/* (which includes scaling) */ | |
double dage = age; | |
dage=dage/10; | |
double age_2 = dage; | |
double age_1 = pow(dage,.5); | |
double dbmi = bmi; | |
dbmi=dbmi/10; | |
double bmi_2 = pow(dbmi,-2)*log(dbmi); | |
double bmi_1 = pow(dbmi,-2); | |
/* Centring the continuous variables */ | |
age_1 = age_1 - 2.086397409439087; | |
age_2 = age_2 - 4.353054523468018; | |
bmi_1 = bmi_1 - 0.152244374155998; | |
bmi_2 = bmi_2 - 0.143282383680344; | |
rati = rati - 3.506655454635620; | |
sbp = sbp - 125.040039062500000; | |
town = town - 0.416743695735931; | |
/* Start of Sum */ | |
double a=0; | |
/* The conditional sums */ | |
a += Iethrisk[ethrisk]; | |
a += Ismoke[smoke_cat]; | |
/* Sum from continuous values */ | |
a += age_1 * 4.4417863976316578000000000; | |
a += age_2 * 0.0281637210672999180000000; | |
a += bmi_1 * 0.8942365304710663300000000; | |
a += bmi_2 * -6.5748047596104335000000000; | |
a += rati * 0.1433900561621420900000000; | |
a += sbp * 0.0128971795843613720000000; | |
a += town * 0.0664772630011438850000000; | |
/* Sum from boolean values */ | |
a += b_AF * 1.6284780236484424000000000; | |
a += b_ra * 0.2901233104088770700000000; | |
a += b_renal * 1.0043796680368302000000000; | |
a += b_treatedhyp * 0.6180430562788129500000000; | |
a += b_type1 * 1.8400348250874599000000000; | |
a += b_type2 * 1.1711626412196512000000000; | |
a += fh_cvd * 0.5147261203665195500000000; | |
/* Sum from interaction terms */ | |
a += age_1 * (smoke_cat==1) * 0.7464406144391666500000000; | |
a += age_1 * (smoke_cat==2) * 0.2568541711879666600000000; | |
a += age_1 * (smoke_cat==3) * -1.5452226707866523000000000; | |
a += age_1 * (smoke_cat==4) * -1.7113013709043405000000000; | |
a += age_1 * b_AF * -7.0177986441269269000000000; | |
a += age_1 * b_renal * -2.9684019256454390000000000; | |
a += age_1 * b_treatedhyp * -4.2219906452967848000000000; | |
a += age_1 * b_type1 * 1.6835769546040080000000000; | |
a += age_1 * b_type2 * -2.9371798540034648000000000; | |
a += age_1 * bmi_1 * 0.1797196207044682300000000; | |
a += age_1 * bmi_2 * 40.2428166760658140000000000; | |
a += age_1 * fh_cvd * 0.1439979240753906700000000; | |
a += age_1 * sbp * -0.0362575233899774460000000; | |
a += age_1 * town * 0.3735138031433442600000000; | |
a += age_2 * (smoke_cat==1) * -0.1927057741748231000000000; | |
a += age_2 * (smoke_cat==2) * -0.1526965063458932700000000; | |
a += age_2 * (smoke_cat==3) * 0.2313563976521429400000000; | |
a += age_2 * (smoke_cat==4) * 0.2307165013868296700000000; | |
a += age_2 * b_AF * 1.1395776028337732000000000; | |
a += age_2 * b_renal * 0.4356963208330940600000000; | |
a += age_2 * b_treatedhyp * 0.7265947108887239600000000; | |
a += age_2 * b_type1 * -0.6320977766275653900000000; | |
a += age_2 * b_type2 * 0.4023270434871086800000000; | |
a += age_2 * bmi_1 * 0.1319276622711877700000000; | |
a += age_2 * bmi_2 * -7.3211322435546409000000000; | |
a += age_2 * fh_cvd * -0.1330260018273720400000000; | |
a += age_2 * sbp * 0.0045842850495397955000000; | |
a += age_2 * town * -0.0952370300845990780000000; | |
/* Calculate the score itself */ | |
double score = 100.0 * (1 - pow(survivor[surv], exp(a)) ); | |
return score; | |
} | |
// handle any line endings | |
// https://stackoverflow.com/a/6089413/1971301 | |
std::istream& safeGetline(std::istream& is, std::string& t) | |
{ | |
t.clear(); | |
// The characters in the stream are read one-by-one using a std::streambuf. | |
// That is faster than reading them one-by-one using the std::istream. | |
// Code that uses streambuf this way must be guarded by a sentry object. | |
// The sentry object performs various tasks, | |
// such as thread synchronization and updating the stream state. | |
std::istream::sentry se(is, true); | |
std::streambuf* sb = is.rdbuf(); | |
for(;;) { | |
int c = sb->sbumpc(); | |
switch (c) { | |
case '\n': | |
return is; | |
case '\r': | |
if(sb->sgetc() == '\n') | |
sb->sbumpc(); | |
return is; | |
case std::streambuf::traits_type::eof(): | |
// Also handle the case when the last line has no line ending | |
if(t.empty()) | |
is.setstate(std::ios::eofbit); | |
return is; | |
default: | |
t += (char)c; | |
} | |
} | |
} | |
const char *formattedDouble(double v) | |
{ | |
const char *fmt = "%.1f"; | |
int sz = std::snprintf(nullptr, 0, fmt, v); | |
std::vector<char> buf(sz + 1); | |
std::snprintf(&buf[0], buf.size(), fmt, v); | |
return buf.data(); | |
} | |
int main (int argc, char const *argv[]) | |
{ | |
const char delimeter = ';'; | |
std::ifstream in(argc > 1 ? argv[1] : "data.csv", std::ios::in); | |
if(!in) | |
{ | |
std::cerr << "error opening input CSV file\n"; | |
return 1; | |
} | |
// skip first line (with column names) | |
string foo; | |
in >> foo; | |
std::ofstream out("out.csv", std::ios::out); | |
if(!out) | |
{ | |
std::cerr << "error opening output file out.csv\n"; | |
return 2; | |
} | |
while(!in.eof()) | |
{ | |
string s; | |
safeGetline(in, s); | |
if (s.empty()) | |
continue; | |
// sex;age;AF;ra;renal;treatedhyp;type1;type2;bmi;ethrisk;fh_cvd;rati;sbp;smoke_cat | |
std::istringstream iss(s); | |
string sex; | |
std::getline(iss, sex, delimeter); | |
double(*scoreFunc)(int,int,int,int,int,int,int,double,int,int,double,double,int,int,double) = sex == "0" ? cvd_female_raw : cvd_male_raw; | |
string ageStr; | |
std::getline(iss, ageStr, delimeter); | |
int age = 0; | |
try { | |
age = std::stoi(ageStr); | |
} catch (std::exception e) {} | |
string afStr; | |
std::getline(iss, afStr, delimeter); | |
int af = 0; | |
try { | |
af = std::stoi(afStr); | |
} catch (std::exception e) {} | |
string raStr; | |
std::getline(iss, raStr, delimeter); | |
int ra = 0; | |
try { | |
ra = std::stoi(raStr); | |
} catch (std::exception e) {} | |
string renalStr; | |
std::getline(iss, renalStr, delimeter); | |
int renal = 0; | |
try { | |
renal = std::stoi(renalStr); | |
} catch (std::exception e) {} | |
string treatedhypStr; | |
std::getline(iss, treatedhypStr, delimeter); | |
int treatedhyp = 0; | |
try { | |
treatedhyp = std::stoi(treatedhypStr); | |
} catch (std::exception e) {} | |
string type1Str; | |
std::getline(iss, type1Str, delimeter); | |
int type1 = 0; | |
try { | |
type1 = std::stoi(type1Str); | |
} catch (std::exception e) {} | |
string type2Str; | |
std::getline(iss, type2Str, delimeter); | |
int type2 = 0; | |
try { | |
type2 = std::stoi(type2Str); | |
} catch (std::exception e) {} | |
string bmiStr; | |
std::getline(iss, bmiStr, delimeter); | |
double bmi = 0.0; | |
try { | |
bmi = std::stod(bmiStr); | |
} catch (std::exception e) {} | |
string ethriskStr; | |
std::getline(iss, ethriskStr, delimeter); | |
int ethrisk = 0; | |
try { | |
ethrisk = std::stoi(ethriskStr); | |
} catch (std::exception e) {} | |
string fh_cvdStr; | |
std::getline(iss, fh_cvdStr, delimeter); | |
int fh_cvd = 0; | |
try { | |
fh_cvd = std::stoi(fh_cvdStr); | |
} catch (std::exception e) {} | |
string ratiStr; | |
std::getline(iss, ratiStr, delimeter); | |
double rati = 4.0; | |
try { | |
rati = std::stod(ratiStr); | |
} catch (std::exception e) {} | |
string sbpStr; | |
std::getline(iss, sbpStr, delimeter); | |
double sbp = 0.0; | |
try { | |
sbp = std::stod(sbpStr); | |
} catch (std::exception e) {} | |
string smokeStr; | |
std::getline(iss, smokeStr, delimeter); | |
int smoke = 0; | |
try { | |
smoke = std::stoi(smokeStr); | |
} catch (std::exception e) {} | |
double score = scoreFunc(age, af, ra, renal, treatedhyp, type1, type2, bmi, ethrisk, fh_cvd, rati, sbp, smoke, 10, 0.0); | |
double healthyScore = scoreFunc(age, 0, 0, 0, 0, 0, 0, 25.0, ethrisk, 0, 4.0, 125.0, 0, 10, 0.0); | |
out << formattedDouble(score) << delimeter << formattedDouble(healthyScore) << delimeter << formattedDouble(score / healthyScore) << delimeter; | |
// heart age calculation | |
while (healthyScore < score && age <= 100) { | |
healthyScore = scoreFunc(++age, 0, 0, 0, 0, 0, 0, 25.0, ethrisk, 0, 4.0, 125.0, 0, 10, 0.0); | |
} | |
out << age << delimeter << formattedDouble(healthyScore) << std::endl; | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment