Skip to content

Instantly share code, notes, and snippets.

@flashlab
Created January 20, 2025 09:30
Show Gist options
  • Save flashlab/5262f0549f6a7d6c004a8a1e50caf0d9 to your computer and use it in GitHub Desktop.
Save flashlab/5262f0549f6a7d6c004a8a1e50caf0d9 to your computer and use it in GitHub Desktop.
baidu OCR (text or table)
# encoding:utf-8
# https://console.bce.baidu.com/ai/#/ai/ocr/overview/index
# pip3 install requests pillow
import io
import os
import sys
import requests
import base64
import time
import tkinter as tk
from tkinter import filedialog
from PIL import ImageGrab
requests.adapters.DEFAULT_RETRIES = 2
basic_url = 'https://aip.baidubce.com/'
api_id = 'xxxxx'
api_key = 'xxxxxxxxx'
def get_token():
host = '{0}oauth/2.0/token?grant_type=client_credentials&client_id={1}&client_secret={2}'.format(basic_url, api_id, api_key)
try:
with open(sys.path[0]+"/token") as t:
return t.read()
except:
print("token不存在!准备获取..")
try:
response = requests.get(host)
aTok = response.json()['access_token']
with open(sys.path[0]+"/token", "w") as code:
code.write(aTok)
except:
input('无法获取或保存token,请检查密钥! Ctrl+C取消重试')
return get_token()
else:
print('获取token完成!')
return aTok
def send_img():
'''
表格文字识别(异步接口)
'''
request_url = basic_url+"rest/2.0/solution/v1/form_ocr/request"
request_url = request_url + "?access_token=" + token
headers = {'content-type': 'application/x-www-form-urlencoded'}
print('发送图片中..')
try:
response = requests.post(request_url, data=imgdata, headers=headers, timeout=(5, TOUT))
print(response.text)
requestId = response.json()['result'][0]['request_id']
print('图片上传完成!')
return(requestId)
except:
print(sys.exc_info())
input('上传失败,请尝试直接从网页复制图片! Ctrl+C取消重试')
return send_img()
def fetch_result(request_id):
'''
表格文字识别(异步接口)
'''
request_url = basic_url+"rest/2.0/solution/v1/form_ocr/get_request_result"
params = {"request_id": request_id,'result_type':'excel'} # 可使用json或excel
request_url = request_url + "?access_token=" + token
headers = {'content-type': 'application/x-www-form-urlencoded'}
counts = 1
tags=['—','\\','|','/']
while (counts<=30):
try:
counts+=1
response = requests.post(request_url, data=params, headers=headers)
percent = int(response.json()['result']['percent'])
sys.stdout.write("\r识别中[%s]%3d%%|%s| %s/100" %(tags[(percent-1)%4], percent, "█"*(percent//2), percent))
sys.stdout.flush()
if percent == 100:
break
except:
pass
time.sleep(2)
print()
try:
result_data=response.json()['result']['result_data']
except:
input('无法获取识别状态!Ctrl+C取消重试')
return fetch_result(request_id)
else:
print('识别完成->\033[4m{0}\033[0m'.format(result_data))
return(result_data)
def download_xls(url):
'''
表格文字识别(异步接口)
'''
print('开始下载xls..')
try:
res = requests.get(url)
r = tk.Tk()
r.withdraw()
with filedialog.asksaveasfile(mode='wb', defaultextension=".xls", filetypes=[('Excel','.xls')]) as f:
if f:
f.write(res.content)
r.destroy()
print('表格下载完成!')
except:
input('下载失败!Ctrl+C取消重试')
return download_xls(url)
return 0
def ocr_sync(flag):
'''
文字精确识别(同步接口)
'''
request_url = basic_url+"rest/2.0/ocr/v1/"+flag
access_token = token
request_url = request_url + "?access_token=" + access_token
headers = {'content-type': 'application/x-www-form-urlencoded'}
try:
r = requests.post(request_url, data=imgdata, headers=headers)
r.raise_for_status()
print('识别完成,随后写入剪切板!\n==========')
retString = ''
for words in r.json()['words_result']:
retString=retString+words['words']+' '
return retString #r.text
except requests.exceptions.HTTPError as e:
print (e.response.text)
input('识别错误!Ctrl+C取消重试')
return ocr_sync(flag)
except:
print (r.text)
def parse_img():
global imgdata
str2 = "表格" if ISTBL else "文本"
if IMG == '':
try:
clipimg = ImageGrab.grabclipboard()
img_bytes = io.BytesIO()
clipimg.save(img_bytes, format='PNG')
imgdata = {"image": base64.b64encode(img_bytes.getvalue())} #.decode('ascii')
return '剪切板', str2
except:
input('从剪切板获取失败!Ctrl+C取消重试')
return parse_img()
elif IMG == 'f':
try:
r = tk.Tk()
r.withdraw()
with filedialog.askopenfile(mode="rb", filetypes=[('image files',('.png','.jpg','.jpeg','.bmp'))]) as f:
if f:
imgdata = {"image": base64.b64encode(f.read())}
r.destroy()
return f.name, str2
except:
input('选择图片文件失败!Ctrl+C取消重试')
return parse_img()
elif IMG.startswith('http'):
if ISTBL:
try:
resp = requests.get(IMG)
imgdata = {"image": base64.b64encode(resp.content)}
except:
input('图片下载失败!Ctrl+C取消重试')
return parse_img()
else:
imgdata = {"url": IMG}
return '\033[4m{0}\033[0m'.format(IMG), str2
TOUT=20
imgdata = {}
os.system("color")
token=get_token() # 获取token
while 1:
IMG=input('输入图片链接(留空=剪切板;f=文件选择;q=退出):')
if IMG == 'q':
break
ISTBL=input("表格识别请输入y:")
imgsrc, imgtype = parse_img()
if input('即将从 \033[4m{0}\033[0m 识别 \033[4m{1}\033[0m,取消输入c:'.format(imgsrc, imgtype)) == 'c':
continue
if ISTBL:
request_id=send_img() # 上传图片,获取request_id
url=fetch_result(request_id) # 获取结果url
download_xls(url) # 下载结果url
else:
res = ocr_sync("accurate_basic")
print(res)
try:
r = tk.Tk()
r.withdraw()
r.clipboard_clear()
r.clipboard_append(res)
r.update()
r.destroy()
except:
print('写入剪切板失败!请手动复制')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment