Created
August 13, 2020 04:45
-
-
Save Planeshifter/5d18d991030c85154a5e5d6a70441697 to your computer and use it in GitHub Desktop.
Evaluation Script for deidentify results
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// MODULES // | |
const fs = require( 'fs' ); | |
const { join } = require( 'path' ); | |
// VARIABLES // | |
const RECORDS = {}; | |
const FLAGGED = { | |
NAME: 0, | |
LOCATION: 0, | |
DATE: 0, | |
PHONE: 0, | |
EMAIL: 0, | |
ORGANIZATION: 0, | |
FAX: 0 | |
}; | |
const IDENTIFIERS_MAPPING = { | |
'Date': 'DATE', | |
'Doctor': 'NAME', | |
'O': 'ORGANIZATION', | |
'Street': 'LOCATION', | |
'City': 'LOCATION', | |
'State': 'LOCATION', | |
'Zip': 'LOCATION', | |
'Patient': 'NAME', | |
'Organization': 'ORGANIZATION', | |
'Phone': 'PHONE', | |
'Fax': 'FAX', | |
'Hospital': 'ORGANIZATION', | |
'Country': 'LOCATION' | |
}; | |
const TALLY_IDENTIFIED = { | |
NAME: { | |
missed: 0, | |
hits: 0 | |
}, | |
LOCATION: { | |
missed: 0, | |
hits: 0 | |
}, | |
DATE: { | |
missed: 0, | |
hits: 0 | |
}, | |
PHONE: { | |
missed: 0, | |
hits: 0 | |
}, | |
EMAIL: { | |
missed: 0, | |
hits: 0 | |
}, | |
ORGANIZATION: { | |
missed: 0, | |
hits: 0 | |
}, | |
FAX: { | |
missed: 0, | |
hits: 0 | |
}, | |
}; | |
// FUNCTIONS // | |
/** Function that count occurrences of a substring in a string; | |
* @param {String} string The string | |
* @param {String} subString The sub string to search for | |
* @param {Boolean} [allowOverlapping] Optional. (Default:false) | |
* | |
* @author Vitim.us https://gist.github.com/victornpb/7736865 | |
* @see Unit Test https://jsfiddle.net/Victornpb/5axuh96u/ | |
* @see http://stackoverflow.com/questions/4009756/how-to-count-string-occurrence-in-string/7924240#7924240 | |
*/ | |
function occurrences(string, subString, allowOverlapping) { | |
string += ''; | |
subString += ''; | |
if (subString.length <= 0) return (string.length + 1); | |
let n = 0, | |
pos = 0, | |
step = allowOverlapping ? 1 : subString.length; | |
while ( true ) { | |
pos = string.indexOf(subString, pos); | |
if (pos >= 0) { | |
++n; | |
pos += step; | |
} else break; | |
} | |
return n; | |
} | |
// MAIN // | |
const args = process.argv.slice( 2 ); | |
const [ goldStandardFile, recordDir ] = args; | |
if ( typeof goldStandardFile !== 'string' ) { | |
throw new Error( 'Must supply path of gold-standard file.' ); | |
} | |
const idPhi = fs.readFileSync( goldStandardFile, 'utf-8' ) | |
.toString() | |
.split( '\n' ) // Split into an array for each line | |
.map( x => x.split( ' ' ) ) // Split each line array into an array of the components: [ <patientID>, <noteID>, <start>, <end>, <type>, <original> ] | |
for ( let i = 0; i < idPhi.length; i++ ) { | |
const fileName = '$record_' + idPhi[ i ][ 0 ] + '_' + idPhi[ i ][ 1 ] + '.txt'; | |
const phi = idPhi[ i ][ 5 ]; | |
const type = idPhi[ i ][ 4 ]; | |
if ( !RECORDS[ fileName ] ) { | |
try { | |
// Upon first reading file, count how many PIDs were flagged: | |
const fileContent = fs.readFileSync( join( __dirname, recordDir, fileName ) ).toString(); | |
RECORDS[ fileName ] = fileContent; | |
FLAGGED.DATE += occurrences( fileContent, '<dates>' ); | |
FLAGGED.NAME += occurrences( fileContent, '<names>' ); | |
FLAGGED.EMAIL += occurrences( fileContent, '<email>' ); | |
FLAGGED.LOCATION += occurrences( fileContent, '<locations>' ); | |
FLAGGED.PHONE += occurrences( fileContent, '<phone>' ); | |
FLAGGED.FAX += occurrences( fileContent, '<fax>' ); | |
FLAGGED.ORGANIZATION += occurrences( fileContent, '<organizations>' ); | |
FLAGGED.FAX += occurrences( fileContent, '<fax>' ); | |
} catch( err ) { | |
console.log( 'Not able to read file: '+fileName ); | |
} | |
} | |
const tally = TALLY_IDENTIFIED[ IDENTIFIERS_MAPPING[ type ] ]; | |
if ( tally ) { | |
if ( RECORDS[ fileName ].includes( phi ) ) { | |
tally.missed += 1; | |
RECORDS[ fileName ] = RECORDS[ fileName ].replace( phi ); // replace first occurrence of PHI | |
} else { | |
tally.hits += 1; | |
} | |
} | |
} | |
for ( let key in TALLY_IDENTIFIED ) { | |
const obj = TALLY_IDENTIFIED[ key ]; | |
obj.recall = obj.hits / ( obj.hits + obj.missed ); | |
obj.precision = Math.max( Math.min( obj.hits / FLAGGED[ key ], 1 ), 0 ); | |
obj.f1Score = 2.0 * ( obj.precision * obj.recall ) / ( obj.precision + obj.recall ); | |
} | |
console.log( TALLY_IDENTIFIED ); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment