Skip to content

Instantly share code, notes, and snippets.

@also
Created March 26, 2012 18:39
Show Gist options
  • Save also/2208577 to your computer and use it in GitHub Desktop.
Save also/2208577 to your computer and use it in GitHub Desktop.
// fix a tsv with unescaped newlines
val lines = io.Source.fromFile(argv(0)).getLines
val header = lines.next()
val tabCount = header.count(_ == '\t')
println(header)
while (lines.hasNext) {
var lineTabCount = 0
while (lineTabCount < tabCount) {
val line = lines.next()
print(line)
lineTabCount += line.count(_ == '\t')
}
println()
}
// check the number of tabs on each line in a tsv
val lines = io.Source.fromFile(argv(0)).getLines
val header = lines.next()
val tabCount = header.count(_ == '\t')
println("%d columns" format tabCount)
var i = 1
while (lines.hasNext) {
val lineTabCount = lines.next().count(_ == '\t')
if (lineTabCount != tabCount) {
println("line %10d has %d columns" format (i, lineTabCount))
}
i += 1
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment