Created
November 6, 2023 01:39
-
-
Save elecnix/6cddad5e278e44a013749b810c9e98e3 to your computer and use it in GitHub Desktop.
OCR with ChatGPT Plus
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
OCR with ChatGPT Plus | |
Convert a bunch of image files into text, using Playwright to control your Chrome browser. | |
Prerequisites: | |
- Run `chrome --remote-debugging-port=9222` | |
- Log in to ChatGPT | |
- Select GPT-4 | |
""" | |
import asyncio | |
import base64 | |
import mimetypes | |
from pathlib import Path | |
from playwright.async_api import async_playwright | |
from playwright.async_api import Browser | |
from playwright.async_api import Page | |
async def drag_and_drop(page: Page, file: Path, drop_zone_selector: str) -> None: | |
with open(file, 'rb') as f: buffer = f.read() | |
pdf_base64 = base64.b64encode(buffer).decode("utf-8") | |
mime_type, _ = mimetypes.guess_type(file) | |
data_transfer = await page.evaluate_handle("""(arg) => { | |
[data, filename, mime_type] = arg; | |
const dt = new DataTransfer(); | |
const hex = Uint8Array.from(atob(data), c => c.charCodeAt(0)); | |
const file = new File([hex], filename, { type: mime_type }); | |
dt.items.add(file); | |
return dt; | |
}""", [pdf_base64, file.name, mime_type]) | |
await page.dispatch_event(drop_zone_selector, 'drop', {"dataTransfer": data_transfer}) | |
async def send(page: Page, file_path : str, prompt : str): | |
await page.wait_for_selector('#prompt-textarea') | |
await drag_and_drop(page, file_path, '#prompt-textarea') | |
await asyncio.sleep(3) | |
await page.fill('#prompt-textarea', prompt) | |
await page.wait_for_selector('button[data-testid="send-button"]:enabled', timeout=1000*60); | |
await page.press('#prompt-textarea', 'Enter') | |
await page.wait_for_selector('button:has-text("Regenerate")', timeout=1000*60*5); | |
turns = await page.query_selector_all('[data-testid^="conversation-turn-"]') | |
return await turns[-1].text_content(); | |
async def main(): | |
async with async_playwright() as playwright: | |
prompt = """Transcribe in Markdown. Write page number. Answer only with the transcription.""" | |
directory = Path('./') | |
files = [entry for entry in directory.iterdir() if entry.is_file()] | |
browser : Browser = await playwright.chromium.connect_over_cdp("http://localhost:9222") | |
page = await browser.contexts[0].new_page() | |
await page.goto("https://chat.openai.com/") | |
for file in files: | |
retries = 5 | |
for attempt in range(retries): | |
try: | |
print(f"Processing file: {file}") | |
response = await send(page, file, prompt) | |
print(response) | |
break | |
except TimeoutError: | |
print(f"Retrying {attempt + 1}/{retries}...") | |
if attempt < retries - 1: | |
await asyncio.sleep(5) | |
else: | |
print("Max retries reached, skipping file.") | |
await browser.close() | |
asyncio.run(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment