Created
June 25, 2020 20:35
-
-
Save linuskohl/8bde152c22bb97273140a4d311a31057 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
# | |
# Author: Linus Kohl | |
# E-Mail: [email protected] | |
# Org: MunichResearch | |
# | |
use strict; | |
use UMLS::Interface; | |
use UMLS::Similarity::lch; | |
use UMLS::Similarity::path; | |
use Parse::CSV; | |
use Text::CSV; | |
use Try::Tiny; | |
use Term::ProgressBar; | |
my $filenameInput = $ARGV[0]; | |
my $filenameOutput = $ARGV[1]; | |
# Get number of lines | |
my $numPairings = `wc -l < $filenameInput`; | |
# Open files | |
open( my $ioIn, '<:encoding(UTF-8)', $filenameInput ) | |
or die "Failed to open file '$filenameInput': $!"; | |
open( my $ioOut, ">:encoding(utf8)", $filenameOutput ) | |
or die "Failed to create output file: $!"; | |
# CSV input | |
my $csvIn = Parse::CSV->new( handle => $ioIn, sep_char => ',' ); | |
# CSV output | |
my $csvOut = Text::CSV->new(); | |
$csvOut->eol("\r\n"); | |
$csvOut->column_names( 'cuid1', 'cuid2', 'lch', 'path' ); | |
# Initialize UMLS interfaces | |
my $umls = UMLS::Interface->new(); | |
die "Unable to create UMLS::Interface object.\n" if ( !$umls ); | |
my $path = UMLS::Similarity::path->new($umls); | |
die "Unable to create Path Similarity object.\n" if ( !$path ); | |
my $lch = UMLS::Similarity::lch->new($umls); | |
die "Unable to create LCH Similarity object.\n" if ( !$lch ); | |
my $progress = Term::ProgressBar->new($numPairings); | |
while ( my $rowRef = $csvIn->fetch ) { | |
# uncomment if file contains header row | |
# next if $objects->row==1; | |
my @row = @{$rowRef}; | |
try { | |
# find coresponding term by CUI | |
my ( $id, $cui1, $cui2 ) = @row; | |
# get distances | |
my $pathValue = $path->getRelatedness( $cui1, $cui2 ); | |
my $lchValue = $lch->getRelatedness( $cui1, $cui2 ); | |
# write values to file | |
$csvOut->print( $ioOut, [ $cui1, $cui2, $lchValue, $pathValue ] ); | |
} | |
catch { | |
# warn "Warning: '$_'"; | |
} | |
finally { | |
$progress->update($_); | |
}; | |
} | |
close $ioOut or die $!; | |
close $ioIn or die $!; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment