Last active
          April 27, 2018 05:31 
        
      - 
      
- 
        Save arc279/fc82021d0d1b3639548a777dfe1cedc4 to your computer and use it in GitHub Desktop. 
    groovyでpdfbox
  
        
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | curl -s -v -XPOST localhost:6001 --data-binary @all.pdf | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | @Grab('org.apache.pdfbox:pdfbox:2.0.8') | |
| import org.apache.pdfbox.pdmodel.PDDocument | |
| import org.apache.pdfbox.text.PDFTextStripper | |
| def pdf2text(path) { | |
| File file = new File(path) | |
| PDDocument doc = PDDocument.load(file) | |
| def stripper = new PDFTextStripper() | |
| def text = stripper.getText(doc) | |
| doc.close() | |
| return text | |
| } | |
| args.each { | |
| println pdf2text(it) | |
| } | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | @Grab('org.apache.pdfbox:pdfbox:2.0.8') | |
| import com.sun.net.httpserver.HttpExchange | |
| import com.sun.net.httpserver.HttpHandler | |
| import com.sun.net.httpserver.HttpServer | |
| import org.apache.pdfbox.pdmodel.PDDocument | |
| import org.apache.pdfbox.text.PDFTextStripper | |
| def pdf2text(buf) { | |
| PDDocument doc = PDDocument.load(buf) | |
| def stripper = new PDFTextStripper() | |
| def text = stripper.getText(doc) | |
| doc.close() | |
| return text | |
| } | |
| def PORT = 6001 | |
| HttpServer server = HttpServer.create(new InetSocketAddress(PORT), 0); | |
| server.createContext("/", new HttpHandler() { | |
| @Override | |
| public void handle(HttpExchange he) throws IOException { | |
| try { | |
| def ifs = he.getRequestBody() | |
| def outText = pdf2text(ifs) | |
| def bs = outText.getBytes("UTF-8"); | |
| println bs.length | |
| he.getResponseHeaders().add("Content-Type", "text/plain") | |
| he.sendResponseHeaders(200, bs.length) | |
| he.getResponseBody().write(bs) | |
| } catch(Exception e) { | |
| println e | |
| } finally { | |
| he.close() | |
| } | |
| } | |
| }) | |
| server.start() | 
  
    Sign up for free
    to join this conversation on GitHub.
    Already have an account?
    Sign in to comment