Created
September 28, 2017 04:54
-
-
Save rob0tca/b7fd4488d84a49e5ca87536048629406 to your computer and use it in GitHub Desktop.
Google Apps script for performing OCR on all JPEGS found in the specified Drive folder. Extracts text to a Google sheet, where it's mapped to the JPEG's filename.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function extractTextOnOpen() { | |
//ADD YOUR VALUES BELOW | |
var folderName = "[YOUR PROJECT FOLDER]"; | |
var sheetId = "[YOUR SHEET ID]"; | |
//Define folder | |
var folder = DriveApp.getFoldersByName(folderName).next(); | |
var folderId = folder.getId(); | |
//Find all jpegs in folder | |
var images = folder.getFilesByType("image/jpeg"); | |
while (images.hasNext()) { | |
//Convert each jpeg to a Google Doc with OCR | |
var image = images.next(); | |
var imageName = image.getName(); | |
var docName = imageName.split("\.")[0]; | |
var file = { | |
title: docName, | |
mimeType: "image/jpeg" | |
} | |
Drive.Files.insert(file, image, { ocr: true }); | |
//Store newly-created Google Doc in project folder | |
var newFile = DriveApp.getFilesByName(docName).next(); | |
folder.addFile(newFile); | |
var rootFolder = DriveApp.getRootFolder(); | |
rootFolder.removeFile(newFile); | |
} | |
//Find all Google Docs in folder | |
var docs = folder.getFilesByType("application/vnd.google-apps.document"); | |
//Set up spreadsheet | |
var ss = SpreadsheetApp.openById(sheetId); | |
SpreadsheetApp.setActiveSpreadsheet(ss); | |
Logger.log('File name: ' + ss.getName()); | |
var sheet = SpreadsheetApp.getActiveSheet(); | |
sheet.clear(); | |
sheet.appendRow(["Filename", "Text"]); | |
//Populate spreadsheet with OCR text | |
while (docs.hasNext()) { | |
var file = docs.next(); | |
var docId = file.getId(); | |
var doc = DocumentApp.openById(docId); | |
var name = doc.getName(); | |
var body = doc.getBody().getText(); | |
//Add item data to spreadsheet | |
sheet.appendRow([name, body]); | |
} | |
}; |
Thank you for this script.
@nafiz333, to solve this, you have to activate the drive API by going to Resources > Advanced google services > Drive API.
Thank you for this script.
es > Advanced google services > Drive API.
Hi - I'm a noob ...cud u pls. point out how do I get this to run on a specific goog drive or folder within it every say 12 hours. Thanks
Thank you for this! Very useful
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@nafiz333 use: Drive.Files.insert(file, image, { ocr: true,ocrLanguage: "en" });
get lang code for ocrLanguage at https://cloud.google.com/vision/docs/languages