Created
November 25, 2015 16:22
-
-
Save peaeater/bc2ed5d99a4c4bf84af5 to your computer and use it in GitHub Desktop.
Takes a tab-delimited csv input file (tsv) produced by ner.ps1, and outputs a text file for each category found. A single category may be named, in which case a single output text file is created. If no output file is provided, results are written to the console instead.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <# | |
| parse tsv | |
| Categories: person, location, organization, misc, money, percent, date, time (depending on classifier used to produce the tsv) | |
| Outfile: Results written to console if outfile not provided. If all categories, outfile is used as a filename template. | |
| #> | |
| param( | |
| [Parameter(Mandatory=$true,Position=0)] | |
| [string]$file, | |
| [Parameter(Mandatory=$false,Position=1)] | |
| [string]$outfile, | |
| [Parameter(Mandatory=$false,Position=2)] | |
| [string]$category | |
| ) | |
| $tsv = import-csv $file -delimiter "`t" -header "Entity","Category" | |
| $outinfo = new-object System.IO.FileInfo($ExecutionContext.SessionState.Path.GetUnresolvedProviderPathFromPSPath($outfile)) | |
| # named category | |
| if ($category -ne '') { | |
| $entities = $tsv | where-object {$_.Category -eq $category} | sort -unique -property Entity | |
| if ($outfile) { | |
| if (!(test-path $outinfo.DirectoryName)) { | |
| mkdir $outinfo.DirectoryName | |
| } | |
| $stream = [System.IO.StreamWriter] $outinfo.FullName | |
| $entities | foreach-object { | |
| $stream.WriteLine($_.Entity) | |
| } | |
| $stream.close() | |
| } | |
| else { | |
| echo $entities | |
| } | |
| exit | |
| } | |
| # all categories | |
| $categories = $tsv | where-object {$_.Category -ne ''} | sort -unique -property Category | select-object -expandproperty Category -unique | |
| foreach ($cat in $categories) { | |
| $entities = $tsv | where-object {$_.Category -eq $cat} | sort -unique -property Entity | |
| if ($outfile) { | |
| if (!(test-path $outinfo.DirectoryName)) { | |
| mkdir $outinfo.DirectoryName | |
| } | |
| $stream = [System.IO.StreamWriter] ('{0}\{1}.{2}{3}' -f $outinfo.DirectoryName, $outinfo.BaseName, $cat, $outinfo.Extension) | |
| $entities | foreach-object { | |
| $stream.WriteLine($_.Entity) | |
| } | |
| $stream.close() | |
| } | |
| else { | |
| echo $entities | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment