Last active
December 29, 2015 16:09
-
-
Save mjwillson/7695728 to your computer and use it in GitHub Desktop.
ann -- ultra-basic console-based multiclass text annotation tool
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
require 'optparse' | |
OPTIONS = {} | |
PARSER = OptionParser.new do |opts| | |
opts.banner = "Usage: #{$0} [OPTIONS] INPUT_FILE [HOTKEY OUTPUT_FILE]..." | |
opts.separator(<<END | |
#{$0} -- ultra-basic console-based multiclass text annotation tool | |
Give it an input file with one example per line, and a series of hotkeys and | |
output files to be written to, e.g. | |
ann examples.txt y yes-file.txt n no-file.txt | |
For each line in the input it will issue a prompt, e.g. | |
Annotation (y/n): | |
at the console. If you press y the example is written to yes-file.txt, | |
if n then no-file.txt. (hotkeys are case-insensitive). Supports saving | |
and resuming progress with -s. | |
END | |
) | |
opts.separator("Options:") | |
opts.on("-a", "--append", "Append to output files (default is to write)") {|v| OPTIONS[:append] = v} | |
opts.on("-p", "--prompt PROMPT", "Overwrite the default prompt, which is", | |
"'Annotation? (h/o/t/k/e/y/s)'") {|v| OPTIONS[:prompt] = v} | |
opts.on("-s", "--save-progress", "Saves/resumes progress to/from a file <INPUT_FILE>.ann-pos", | |
"containing the byte offset within INPUT_FILE of the", | |
"next line to be annotated. Requires input is a file.", | |
"You can then Ctrl-C to stop annotating and your position", | |
"will be saved. Implies --append.") do |v| | |
OPTIONS[:save_progress] = true | |
OPTIONS[:append] = true | |
end | |
opts.on("--progress-file PROGRESS_FILENAME", "Like --save-progress but overrides the default progress", | |
"filename. Useful if annotating same dataset for different tasks.") do |v| | |
OPTIONS[:save_progress] = true | |
OPTIONS[:progress_file] = v | |
OPTIONS[:append] = true | |
end | |
opts.on_tail("-h", "--help", "This info") {puts opts; exit} | |
end | |
PARSER.parse!(ARGV) | |
def bad_args(message=nil) | |
if message then STDERR.puts message; STDERR.puts; end | |
STDERR.puts PARSER | |
exit 1 | |
end | |
unless ARGV.count > 1 && ARGV.count.odd? | |
bad_args("Need an INPUT-FILE followed by pairs of HOTKEY OUTPUT-FILE arguments") | |
end | |
INPUT_FILE = File.open(ARGV.shift, "r") | |
PROGRESS_FILENAME = if OPTIONS[:save_progress] | |
bad_args("--save-progress requires input is a file") unless File.file?(INPUT_FILE) | |
OPTIONS[:progress_file] || "#{INPUT_FILE.path}.ann-pos" | |
end | |
if PROGRESS_FILENAME && File.exists?(PROGRESS_FILENAME) | |
INPUT_FILE.seek(File.read(PROGRESS_FILENAME).to_i) | |
end | |
WRITE_MODE = OPTIONS[:append] ? 'a' : 'w' | |
HOTKEY_TO_FILE = {} | |
HOTKEYS = [] | |
ARGV.each_slice(2) do |hotkey, filename| | |
bad_args("Hotkeys must be single characters") if hotkey.length != 1 | |
file = File.open(filename, WRITE_MODE) | |
HOTKEY_TO_FILE[hotkey.downcase] = file | |
HOTKEYS << hotkey.downcase | |
end | |
bad_args("STDIN must be a terminal") unless STDIN.tty? | |
# This is a bit of a kludge and won't work on windows. | |
# I tried Curses.getch and Curses.cbreak but no joy. | |
def get_char | |
state = `stty -g` | |
`stty raw -echo -icanon isig` | |
STDIN.getc.chr | |
ensure | |
`stty #{state}` | |
end | |
PROMPT = OPTIONS[:prompt] || "Annotation? (#{HOTKEYS.join('/')})" | |
PROMPT << ": " | |
until INPUT_FILE.eof? | |
line = INPUT_FILE.readline | |
puts | |
puts line | |
puts | |
begin | |
print PROMPT | |
STDOUT.flush | |
# required to get character-at-a-time unbuffered input: | |
char = get_char() | |
puts | |
STDOUT.flush | |
end until HOTKEY_TO_FILE.has_key?(char) | |
HOTKEY_TO_FILE[char].write(line) | |
if PROGRESS_FILENAME | |
File.open(PROGRESS_FILENAME, 'w') {|f| f.write(INPUT_FILE.pos.to_s)} | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment