Created
December 9, 2022 04:02
-
-
Save khanhkhuu/605929648f82c0bcb8517391fe471370 to your computer and use it in GitHub Desktop.
Extract Table From PDF
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function test() { | |
const data = extractDataFromPdf('1jVppnsxpiK56RY7vVFMkTa1_GV8SCWxo'); | |
console.log(data); | |
} | |
function extractDataFromPdf(pdfId) { | |
const PDF_LANGUAGE = 'th'; | |
const pdfFile = DriveApp.getFileById(pdfId); | |
const { id } = Drive.Files.insert( | |
{ | |
title: pdfFile.getName().replace(/\.pdf$/, ''), | |
mimeType: pdfFile.getMimeType() || 'application/pdf', | |
}, | |
pdfFile.getBlob(), | |
{ | |
ocr: true, | |
ocrLanguage: PDF_LANGUAGE, | |
fields: 'id', | |
} | |
); | |
const document = DocumentApp.openById(id); | |
const data = []; | |
const body = document.getBody(); | |
const table = body.getTables().pop(); | |
const numberOfRow = table.getNumRows(); | |
for (let i = 0; i < numberOfRow; i++) { | |
const rowData = []; | |
const row = table.getRow(i); | |
const numberOfCell = row.getNumCells(); | |
for (let ii = 0; ii < numberOfCell; ii++) { | |
const cell = row.getCell(ii); | |
rowData.push(cell.getText()); | |
} | |
data.push(rowData); | |
} | |
DriveApp.getFileById(id).setTrashed(true); | |
return data; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment