Created
October 6, 2010 21:24
-
-
Save amundo/614119 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
invert.js | |
given two lines of interlinear linguistic annotation on stdin, like: | |
Ehiwac c-a-kәri sәgәbehem h-ә-k-ec әrɨgeh=i. | |
spirit:8PL 8PL-R-say spirit:7PL 7PL-R-give-8PL sick=REL | |
return a data structure like this: | |
[ | |
{ | |
'word': 'Ehiwac', | |
'gloss': 'spirit' | |
}, | |
{ | |
'word': 'c-a-kәri', | |
'gloss': '8PL-R-say' | |
} | |
// etc | |
] | |
*/ | |
var lines = [], | |
words = [], | |
glosses = [], | |
stdin = process.openStdin(), | |
sys = require('sys'); | |
stdin.setEncoding('utf8'); | |
function trim(text){ | |
var rtrim = /^(\s|\u00A0)+|(\s|\u00A0)+$/g, | |
return (text || "").replace( rtrim, "" ); | |
}; | |
function extractLines(text){ | |
var text = trim(text); | |
return text.split('\n'); | |
} | |
stdin.on('data', function (chunk) { | |
var lines = extractLines(chunk) | |
for(var i=0;i<lines.length;i++){ console.log(lines[i]) } | |
}); | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment