Skip to content

Instantly share code, notes, and snippets.

@Planeshifter
Created August 13, 2020 04:45
Show Gist options
  • Save Planeshifter/5d18d991030c85154a5e5d6a70441697 to your computer and use it in GitHub Desktop.
Save Planeshifter/5d18d991030c85154a5e5d6a70441697 to your computer and use it in GitHub Desktop.
Evaluation Script for deidentify results
// MODULES //
const fs = require( 'fs' );
const { join } = require( 'path' );
// VARIABLES //
const RECORDS = {};
const FLAGGED = {
NAME: 0,
LOCATION: 0,
DATE: 0,
PHONE: 0,
EMAIL: 0,
ORGANIZATION: 0,
FAX: 0
};
const IDENTIFIERS_MAPPING = {
'Date': 'DATE',
'Doctor': 'NAME',
'O': 'ORGANIZATION',
'Street': 'LOCATION',
'City': 'LOCATION',
'State': 'LOCATION',
'Zip': 'LOCATION',
'Patient': 'NAME',
'Organization': 'ORGANIZATION',
'Phone': 'PHONE',
'Fax': 'FAX',
'Hospital': 'ORGANIZATION',
'Country': 'LOCATION'
};
const TALLY_IDENTIFIED = {
NAME: {
missed: 0,
hits: 0
},
LOCATION: {
missed: 0,
hits: 0
},
DATE: {
missed: 0,
hits: 0
},
PHONE: {
missed: 0,
hits: 0
},
EMAIL: {
missed: 0,
hits: 0
},
ORGANIZATION: {
missed: 0,
hits: 0
},
FAX: {
missed: 0,
hits: 0
},
};
// FUNCTIONS //
/** Function that count occurrences of a substring in a string;
* @param {String} string The string
* @param {String} subString The sub string to search for
* @param {Boolean} [allowOverlapping] Optional. (Default:false)
*
* @author Vitim.us https://gist.github.com/victornpb/7736865
* @see Unit Test https://jsfiddle.net/Victornpb/5axuh96u/
* @see http://stackoverflow.com/questions/4009756/how-to-count-string-occurrence-in-string/7924240#7924240
*/
function occurrences(string, subString, allowOverlapping) {
string += '';
subString += '';
if (subString.length <= 0) return (string.length + 1);
let n = 0,
pos = 0,
step = allowOverlapping ? 1 : subString.length;
while ( true ) {
pos = string.indexOf(subString, pos);
if (pos >= 0) {
++n;
pos += step;
} else break;
}
return n;
}
// MAIN //
const args = process.argv.slice( 2 );
const [ goldStandardFile, recordDir ] = args;
if ( typeof goldStandardFile !== 'string' ) {
throw new Error( 'Must supply path of gold-standard file.' );
}
const idPhi = fs.readFileSync( goldStandardFile, 'utf-8' )
.toString()
.split( '\n' ) // Split into an array for each line
.map( x => x.split( ' ' ) ) // Split each line array into an array of the components: [ <patientID>, <noteID>, <start>, <end>, <type>, <original> ]
for ( let i = 0; i < idPhi.length; i++ ) {
const fileName = '$record_' + idPhi[ i ][ 0 ] + '_' + idPhi[ i ][ 1 ] + '.txt';
const phi = idPhi[ i ][ 5 ];
const type = idPhi[ i ][ 4 ];
if ( !RECORDS[ fileName ] ) {
try {
// Upon first reading file, count how many PIDs were flagged:
const fileContent = fs.readFileSync( join( __dirname, recordDir, fileName ) ).toString();
RECORDS[ fileName ] = fileContent;
FLAGGED.DATE += occurrences( fileContent, '<dates>' );
FLAGGED.NAME += occurrences( fileContent, '<names>' );
FLAGGED.EMAIL += occurrences( fileContent, '<email>' );
FLAGGED.LOCATION += occurrences( fileContent, '<locations>' );
FLAGGED.PHONE += occurrences( fileContent, '<phone>' );
FLAGGED.FAX += occurrences( fileContent, '<fax>' );
FLAGGED.ORGANIZATION += occurrences( fileContent, '<organizations>' );
FLAGGED.FAX += occurrences( fileContent, '<fax>' );
} catch( err ) {
console.log( 'Not able to read file: '+fileName );
}
}
const tally = TALLY_IDENTIFIED[ IDENTIFIERS_MAPPING[ type ] ];
if ( tally ) {
if ( RECORDS[ fileName ].includes( phi ) ) {
tally.missed += 1;
RECORDS[ fileName ] = RECORDS[ fileName ].replace( phi ); // replace first occurrence of PHI
} else {
tally.hits += 1;
}
}
}
for ( let key in TALLY_IDENTIFIED ) {
const obj = TALLY_IDENTIFIED[ key ];
obj.recall = obj.hits / ( obj.hits + obj.missed );
obj.precision = Math.max( Math.min( obj.hits / FLAGGED[ key ], 1 ), 0 );
obj.f1Score = 2.0 * ( obj.precision * obj.recall ) / ( obj.precision + obj.recall );
}
console.log( TALLY_IDENTIFIED );
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment