Last active
January 30, 2018 10:37
-
-
Save onlyforbopi/1c6eb3301a3b117573c9633c6410b9e2 to your computer and use it in GitHub Desktop.
1. Search a PDF document for a specific string, print all pages containing it 2. Search a PDF document for a specific pattern, print all pages containing it.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* Extract PDF based on Content */ | |
| // Initialize an array that will hold all the page values | |
| var pageArray = []; | |
| // Initialize the search string ΜΑΛΑΘΥΡΟΥ "ΜΑΛΑΘΥΡΟΥ"; ÊÉÓÓÁÌÏÕ | |
| var stringToSearchFor = "ΜΑΛΑΘΥΡΟΥ"; | |
| //stringToSearchFor = app.response("Enter search word"); | |
| // Initialize default save path | |
| var defaultpath = "z:\\cut_pdfs\\" | |
| // Initialize input filename and remove .pdf suffix | |
| var inputfilename = this.documentFileName.replace(/.pdf/,"") | |
| var addendum = ".scanned" | |
| var suffix = ".pdf" | |
| var fullsuffix = addendum.concat(suffix) | |
| // Construct output file name | |
| var outputfilename = inputfilename.concat(fullsuffix) | |
| // Construct full output path | |
| var foutpath = defaultpath.concat(outputfilename) | |
| //app.alert(stringToSearchFor, 3); | |
| //app.alert(inputfilename, 3); | |
| //app.alert(outputfilename, 3); | |
| //app.alert(foutpath, 3); | |
| // Iterate over all pages | |
| for (var p = 0; p < this.numPages; p++) { | |
| // Iterate over all the words of the page | |
| for (var n = 0; n < this.getPageNumWords(p); n++) { | |
| // Perform check | |
| //if (this.getPageNthWord(p, n) == stringToSearchFor) | |
| if ( (this.getPageNthWord(p, n)).indexOf(stringToSearchFor) >= 0 ) | |
| { | |
| //app.alert("FOUND MATCH", 3) | |
| // Add page index to array | |
| pageArray.push(p); | |
| //app.alert(pageArray, 3); | |
| // Break from loop. | |
| break; | |
| } | |
| } | |
| } | |
| // Iterate over page array | |
| if (pageArray.length > 0) { | |
| // extract all the pages that contain string into a new document | |
| var d = app.newDoc(); | |
| // Iterate over page array | |
| for (var n = 0; n < pageArray.length; n++ ) { | |
| //app.alert("Inserting page") | |
| d.insertPages( { | |
| nPage: d.numPages - 1, | |
| cPath: this.path, | |
| nStart: pageArray[n], | |
| nEnd: pageArray[n], | |
| }); | |
| //app.alert("Finished inserting page"); | |
| } | |
| // remove the first page | |
| d.deletePages(0); | |
| //} | |
| //d.saveAs(foutpath) | |
| d.saveAs(foutpath) | |
| d.closeDoc() | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* Extract PDF based on Content */ | |
| // Initialize an array that will hold all the page values | |
| var pageArray = []; | |
| // Initialize the search string | |
| //var stringToSearchFor = "ΦΥΤΡΟΥ"; | |
| var stringToSearchFor = app.response("Enter search word"); | |
| // Initialize default save path | |
| var defaultpath = "z:\\cut_pdfs\\" | |
| // Initialize input filename and remove .pdf suffix | |
| var inputfilename = this.documentFileName.replace(/.pdf/,"") | |
| var addendum = ".scanned" | |
| var suffix = ".pdf" | |
| var fullsuffix = addendum.concat(suffix) | |
| // Construct output file name | |
| var outputfilename = inputfilename.concat(fullsuffix) | |
| // Construct full output path | |
| var foutpath = defaultpath.concat(outputfilename) | |
| //app.alert(stringToSearchFor, 3); | |
| //app.alert(inputfilename, 3); | |
| //app.alert(outputfilename, 3); | |
| //app.alert(foutpath, 3); | |
| // Iterate over all pages | |
| for (var p = 0; p < this.numPages; p++) { | |
| // Iterate over all the words of the page | |
| for (var n = 0; n < this.getPageNumWords(p); n++) { | |
| // Perform check | |
| if (this.getPageNthWord(p, n) == stringToSearchFor) { | |
| // Add page index to array | |
| pageArray.push(p); | |
| //app.alert(pageArray); | |
| // Break from loop. | |
| break; | |
| } | |
| } | |
| } | |
| // Iterate over page array | |
| if (pageArray.length > 0) { | |
| // extract all the pages that contain string into a new document | |
| var d = app.newDoc(); | |
| // Iterate over page array | |
| for (var n = 0; n < pageArray.length; n++ ) { | |
| //app.alert("Inserting page") | |
| d.insertPages( { | |
| nPage: d.numPages - 1, | |
| cPath: this.path, | |
| nStart: pageArray[n], | |
| nEnd: pageArray[n], | |
| }); | |
| //app.alert("Finished inserting page"); | |
| } | |
| // remove the first page | |
| d.deletePages(0); | |
| //} | |
| //d.saveAs(foutpath) | |
| d.saveAs(foutpath) | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment