1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177
|
import io import os import sys import requests import base64 import time
import tkinter as tk from tkinter import filedialog
from PIL import ImageGrab
requests.adapters.DEFAULT_RETRIES = 2 basic_url = 'https://aip.baidubce.com/' def get_token(): host = basic_url+'oauth/2.0/token?grant_type=client_credentials&client_id=xxx&client_secret=xxx' try: with open(sys.path[0]+"/token") as t: return t.read() except: print("token不存在!准备获取..") try: response = requests.get(host) aTok = response.json()['access_token'] with open(sys.path[0]+"/token", "w") as code: code.write(aTok) except: input('无法获取或保存token!Ctrl+C取消重试') return get_token() else: print('获取token完成!') return aTok
def send_img(): ''' 表格文字识别(异步接口) ''' request_url = basic_url+"rest/2.0/solution/v1/form_ocr/request" request_url = request_url + "?access_token=" + token headers = {'content-type': 'application/x-www-form-urlencoded'} print('发送图片中..') try: response = requests.post(request_url, data=imgdata, headers=headers, timeout=(5, TOUT)) print(response.text) requestId = response.json()['result'][0]['request_id'] print('图片上传完成!') return(requestId) except: print(sys.exc_info()) input('上传失败,请尝试直接从网页复制图片! Ctrl+C取消重试') return send_img()
def fetch_result(request_id): ''' 表格文字识别(异步接口) ''' request_url = basic_url+"rest/2.0/solution/v1/form_ocr/get_request_result" params = {"request_id": request_id,'result_type':'excel'} request_url = request_url + "?access_token=" + token headers = {'content-type': 'application/x-www-form-urlencoded'} counts = 1 tags=['—','\\','|','/'] while (counts<=30): try: counts+=1 response = requests.post(request_url, data=params, headers=headers) percent = int(response.json()['result']['percent']) sys.stdout.write("\r识别中[%s]%3d%%|%s| %s/100" %(tags[(percent-1)%4], percent, "█"*(percent//2), percent)) sys.stdout.flush() if percent == 100: break except: pass time.sleep(2) print() try: result_data=response.json()['result']['result_data'] except: input('无法获取识别状态!Ctrl+C取消重试') return fetch_result(request_id) else: print('识别完成->\033[4m{0}\033[0m'.format(result_data)) return(result_data)
def download_xls(url): ''' 表格文字识别(异步接口) ''' print('开始下载xls..') try: r = requests.get(url) root = tk.Tk() root.withdraw() with filedialog.asksaveasfile(mode='wb', defaultextension=".xls", filetypes=[('Excel','.xls')]) as f: if f: f.write(r.content) print('表格下载完成!') except: input('下载失败!Ctrl+C取消重试') return download_xls(url) return 0 def accurate_basic(): ''' 文字精确识别(同步接口) ''' request_url = basic_url+"rest/2.0/ocr/v1/accurate_basic" access_token = token request_url = request_url + "?access_token=" + access_token headers = {'content-type': 'application/x-www-form-urlencoded'} try: response = requests.post(request_url, data=imgdata, headers=headers) print('识别完成!\n==========') retString = '' for words in response.json()['words_result']: retString=retString+words['words']+'\n' return retString except: input('识别错误!Ctrl+C取消重试') return accurate_basic()
def parse_img(): global imgdata str2 = "表格" if ISTBL else "文本" if IMG == '': try: clipimg = ImageGrab.grabclipboard() img_bytes = io.BytesIO() clipimg.save(img_bytes, format='PNG') imgdata = {"image": base64.b64encode(img_bytes.getvalue())} return '剪切板', str2 except: input('从剪切板获取失败!Ctrl+C取消重试') return parse_img() elif IMG == 'f': root = tk.Tk() root.withdraw() try: with filedialog.askopenfile(mode="rb", filetypes=[('image files',('.png','.jpg','.jpeg','.bmp'))]) as f: if f: imgdata = {"image": base64.b64encode(f.read())} return f.name, str2 except: input('选择图片文件失败!Ctrl+C取消重试') return parse_img() elif IMG.startswith('http'): if ISTBL: try: resp = requests.get(IMG) imgdata = {"image": base64.b64encode(resp.content)} except: input('图片下载失败!Ctrl+C取消重试') return parse_img() else: imgdata = {"url": IMG} return '\033[4m{0}\033[0m'.format(IMG), str2
TOUT=20 imgdata = {} os.system("color") token=get_token() while 1: IMG=input('输入链接(留空=剪切板;f=文件选择;q=退出):') if IMG == 'q': break ISTBL=input("表格请输入文字再回车:") imgsrc, imgtype = parse_img() input('从 \033[4m{0}\033[0m 识别 \033[4m{1}\033[0m,回车确认上传..'.format(imgsrc, imgtype)) if ISTBL: request_id=send_img() url=fetch_result(request_id) download_xls(url) else: print(accurate_basic())
|