Skip to content

Instantly share code, notes, and snippets.

@Sciss
Last active July 31, 2018 17:25
Show Gist options
  • Save Sciss/8043c206bc92aa47384100256ac6ffb5 to your computer and use it in GitHub Desktop.
Save Sciss/8043c206bc92aa47384100256ac6ffb5 to your computer and use it in GitHub Desktop.
import sys.process._
def read(f: File): String = {
val fin = new java.io.FileInputStream(f)
try {
val sz = fin.available()
val arr = new Array[Byte](sz)
fin.read(arr)
new String(arr, "UTF-8")
} finally {
fin.close()
}
}
def run(dir: File, word: String = "swarm"): Unit = {
val f1 = dir.children(_.extL == "pdf").sorted(File.NameOrdering)
f1.foreach { f0 =>
val tf = file("/data/temp/test.txt")
tf.delete()
val res = List("pdftotext", f0.path, tf.path).!
if (res == 0) {
val t = read(tf)
val i = t.toLowerCase.indexOf(word)
// val ok = if (i < 0) " " else "!!"
// println(s"$ok ${f0.name}")
if (i >= 0) println(f0.name)
}
}
}
val d0 = file("/data/texts/Papers")
run(d0 / "B")
@Sciss
Copy link
Author

Sciss commented Jul 31, 2018

Variant:

val base = file("/data/texts/Papers")
val all = base.children(_.isDirectory).flatMap(_.children(_.extL == "pdf"))

def run(word: String): Unit =
  new Thread {
    override def run(): Unit = {
      var last = "?"
      all.sorted(File.NameOrdering)foreach { f =>
        import scala.sys.process._
        import scala.util.Try
        val h = f.name.substring(0, 1).toLowerCase
        if (h != last) {
          println(s"---- LETTER: $h ----")
          last = h
        }
        val contents: String = Try(Seq("pdftotext", "-q", f.path, "-").!!).getOrElse("")
        if (contents.toLowerCase.contains(word)) println(f.name)
      }
    }
      
    start()
  }

run("gradient")

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment