Skip to content

Instantly share code, notes, and snippets.

@onlyforbopi
Last active January 30, 2018 10:37
Show Gist options
  • Select an option

  • Save onlyforbopi/1c6eb3301a3b117573c9633c6410b9e2 to your computer and use it in GitHub Desktop.

Select an option

Save onlyforbopi/1c6eb3301a3b117573c9633c6410b9e2 to your computer and use it in GitHub Desktop.
1. Search a PDF document for a specific string, print all pages containing it 2. Search a PDF document for a specific pattern, print all pages containing it.
/* Extract PDF based on Content */
// Initialize an array that will hold all the page values
var pageArray = [];
// Initialize the search string ΜΑΛΑΘΥΡΟΥ "ΜΑΛΑΘΥΡΟΥ"; ÊÉÓÓÁÌÏÕ
var stringToSearchFor = "ΜΑΛΑΘΥΡΟΥ";
//stringToSearchFor = app.response("Enter search word");
// Initialize default save path
var defaultpath = "z:\\cut_pdfs\\"
// Initialize input filename and remove .pdf suffix
var inputfilename = this.documentFileName.replace(/.pdf/,"")
var addendum = ".scanned"
var suffix = ".pdf"
var fullsuffix = addendum.concat(suffix)
// Construct output file name
var outputfilename = inputfilename.concat(fullsuffix)
// Construct full output path
var foutpath = defaultpath.concat(outputfilename)
//app.alert(stringToSearchFor, 3);
//app.alert(inputfilename, 3);
//app.alert(outputfilename, 3);
//app.alert(foutpath, 3);
// Iterate over all pages
for (var p = 0; p < this.numPages; p++) {
// Iterate over all the words of the page
for (var n = 0; n < this.getPageNumWords(p); n++) {
// Perform check
//if (this.getPageNthWord(p, n) == stringToSearchFor)
if ( (this.getPageNthWord(p, n)).indexOf(stringToSearchFor) >= 0 )
{
//app.alert("FOUND MATCH", 3)
// Add page index to array
pageArray.push(p);
//app.alert(pageArray, 3);
// Break from loop.
break;
}
}
}
// Iterate over page array
if (pageArray.length > 0) {
// extract all the pages that contain string into a new document
var d = app.newDoc();
// Iterate over page array
for (var n = 0; n < pageArray.length; n++ ) {
//app.alert("Inserting page")
d.insertPages( {
nPage: d.numPages - 1,
cPath: this.path,
nStart: pageArray[n],
nEnd: pageArray[n],
});
//app.alert("Finished inserting page");
}
// remove the first page
d.deletePages(0);
//}
//d.saveAs(foutpath)
d.saveAs(foutpath)
d.closeDoc()
}
/* Extract PDF based on Content */
// Initialize an array that will hold all the page values
var pageArray = [];
// Initialize the search string
//var stringToSearchFor = "ΦΥΤΡΟΥ";
var stringToSearchFor = app.response("Enter search word");
// Initialize default save path
var defaultpath = "z:\\cut_pdfs\\"
// Initialize input filename and remove .pdf suffix
var inputfilename = this.documentFileName.replace(/.pdf/,"")
var addendum = ".scanned"
var suffix = ".pdf"
var fullsuffix = addendum.concat(suffix)
// Construct output file name
var outputfilename = inputfilename.concat(fullsuffix)
// Construct full output path
var foutpath = defaultpath.concat(outputfilename)
//app.alert(stringToSearchFor, 3);
//app.alert(inputfilename, 3);
//app.alert(outputfilename, 3);
//app.alert(foutpath, 3);
// Iterate over all pages
for (var p = 0; p < this.numPages; p++) {
// Iterate over all the words of the page
for (var n = 0; n < this.getPageNumWords(p); n++) {
// Perform check
if (this.getPageNthWord(p, n) == stringToSearchFor) {
// Add page index to array
pageArray.push(p);
//app.alert(pageArray);
// Break from loop.
break;
}
}
}
// Iterate over page array
if (pageArray.length > 0) {
// extract all the pages that contain string into a new document
var d = app.newDoc();
// Iterate over page array
for (var n = 0; n < pageArray.length; n++ ) {
//app.alert("Inserting page")
d.insertPages( {
nPage: d.numPages - 1,
cPath: this.path,
nStart: pageArray[n],
nEnd: pageArray[n],
});
//app.alert("Finished inserting page");
}
// remove the first page
d.deletePages(0);
//}
//d.saveAs(foutpath)
d.saveAs(foutpath)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment