Skip to content

Instantly share code, notes, and snippets.

@jmcarbo
Last active August 29, 2015 14:10
Show Gist options
  • Save jmcarbo/1ae3139df7193e80e62b to your computer and use it in GitHub Desktop.
Save jmcarbo/1ae3139df7193e80e62b to your computer and use it in GitHub Desktop.
Search pdf
package main
import (
"fmt"
"io/ioutil"
"log"
"os"
"os/exec"
"regexp"
"path"
"github.com/cheggaaa/pb"
)
func main() {
var dirname string
if len(os.Args) < 2 {
fmt.Print("Need search string")
} else if len(os.Args) == 2 {
dirname = "./"
} else {
dirname = os.Args[2]
}
fmt.Printf("Reading %s\n", dirname)
list, err := ioutil.ReadDir(dirname)
if err != nil {
log.Print("error reading directory")
log.Fatal(err)
}
count := len(list)
bar := pb.StartNew(count)
found := make([]string, 10)
for _, l := range list {
//fmt.Printf("%#v\n", l.Name())
bar.Increment()
if m, _ := regexp.MatchString(".pdf$", l.Name()); m {
out, err := exec.Command("pdftotext", "-enc", "UTF-8", path.Join(dirname, l.Name()), "-").Output()
if err != nil {
log.Print("Error executing pdftotext " + " ... " + l.Name())
//log.Fatal(err)
continue
}
if m2, _ := regexp.MatchString("(?i)" + os.Args[1], string(out)); m2 {
found=append(found, l.Name())
}
}
}
bar.FinishPrint("The End!")
for _, f := range found {
fmt.Printf("Match %s\n", f)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment