Skip to content

Instantly share code, notes, and snippets.

@arc279
Last active April 27, 2018 05:31
Show Gist options
  • Save arc279/fc82021d0d1b3639548a777dfe1cedc4 to your computer and use it in GitHub Desktop.
Save arc279/fc82021d0d1b3639548a777dfe1cedc4 to your computer and use it in GitHub Desktop.
groovyでpdfbox
curl -s -v -XPOST localhost:6001 --data-binary @all.pdf
@Grab('org.apache.pdfbox:pdfbox:2.0.8')
import org.apache.pdfbox.pdmodel.PDDocument
import org.apache.pdfbox.text.PDFTextStripper
def pdf2text(path) {
File file = new File(path)
PDDocument doc = PDDocument.load(file)
def stripper = new PDFTextStripper()
def text = stripper.getText(doc)
doc.close()
return text
}
args.each {
println pdf2text(it)
}
@Grab('org.apache.pdfbox:pdfbox:2.0.8')
import com.sun.net.httpserver.HttpExchange
import com.sun.net.httpserver.HttpHandler
import com.sun.net.httpserver.HttpServer
import org.apache.pdfbox.pdmodel.PDDocument
import org.apache.pdfbox.text.PDFTextStripper
def pdf2text(buf) {
PDDocument doc = PDDocument.load(buf)
def stripper = new PDFTextStripper()
def text = stripper.getText(doc)
doc.close()
return text
}
def PORT = 6001
HttpServer server = HttpServer.create(new InetSocketAddress(PORT), 0);
server.createContext("/", new HttpHandler() {
@Override
public void handle(HttpExchange he) throws IOException {
try {
def ifs = he.getRequestBody()
def outText = pdf2text(ifs)
def bs = outText.getBytes("UTF-8");
println bs.length
he.getResponseHeaders().add("Content-Type", "text/plain")
he.sendResponseHeaders(200, bs.length)
he.getResponseBody().write(bs)
} catch(Exception e) {
println e
} finally {
he.close()
}
}
})
server.start()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment