| 12
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
 100
 101
 102
 103
 104
 105
 106
 107
 108
 109
 110
 111
 112
 113
 114
 115
 116
 117
 118
 119
 120
 121
 122
 123
 124
 125
 126
 127
 128
 129
 130
 131
 132
 133
 134
 135
 136
 137
 138
 139
 140
 141
 142
 143
 144
 145
 146
 147
 148
 149
 150
 151
 152
 153
 154
 155
 156
 157
 158
 159
 160
 161
 162
 163
 164
 165
 166
 167
 168
 169
 170
 171
 172
 173
 174
 175
 176
 177
 
 | 
 import io
 import os
 import sys
 import requests
 import base64
 import time
 
 import tkinter as tk
 from tkinter import filedialog
 
 from PIL import ImageGrab
 
 requests.adapters.DEFAULT_RETRIES = 2
 basic_url = 'https://aip.baidubce.com/'
 def get_token():
 
 host = basic_url+'oauth/2.0/token?grant_type=client_credentials&client_id=xxx&client_secret=xxx'
 try:
 with open(sys.path[0]+"/token") as t:
 return t.read()
 except:
 print("token不存在!准备获取..")
 try:
 response = requests.get(host)
 aTok = response.json()['access_token']
 with open(sys.path[0]+"/token", "w") as code:
 code.write(aTok)
 except:
 input('无法获取或保存token!Ctrl+C取消重试')
 return get_token()
 else:
 print('获取token完成!')
 return aTok
 
 def send_img():
 '''
 表格文字识别(异步接口)
 '''
 request_url = basic_url+"rest/2.0/solution/v1/form_ocr/request"
 request_url = request_url + "?access_token=" + token
 headers = {'content-type': 'application/x-www-form-urlencoded'}
 print('发送图片中..')
 try:
 response = requests.post(request_url, data=imgdata, headers=headers, timeout=(5, TOUT))
 print(response.text)
 requestId = response.json()['result'][0]['request_id']
 print('图片上传完成!')
 return(requestId)
 except:
 print(sys.exc_info())
 input('上传失败,请尝试直接从网页复制图片! Ctrl+C取消重试')
 return send_img()
 
 
 def fetch_result(request_id):
 '''
 表格文字识别(异步接口)
 '''
 request_url = basic_url+"rest/2.0/solution/v1/form_ocr/get_request_result"
 params = {"request_id": request_id,'result_type':'excel'}
 request_url = request_url + "?access_token=" + token
 headers = {'content-type': 'application/x-www-form-urlencoded'}
 counts = 1
 tags=['—','\\','|','/']
 while (counts<=30):
 try:
 counts+=1
 response = requests.post(request_url, data=params, headers=headers)
 percent = int(response.json()['result']['percent'])
 sys.stdout.write("\r识别中[%s]%3d%%|%s| %s/100" %(tags[(percent-1)%4], percent, "█"*(percent//2), percent))
 sys.stdout.flush()
 if percent == 100:
 break
 except:
 pass
 time.sleep(2)
 print()
 try:
 result_data=response.json()['result']['result_data']
 except:
 input('无法获取识别状态!Ctrl+C取消重试')
 return fetch_result(request_id)
 else:
 print('识别完成->\033[4m{0}\033[0m'.format(result_data))
 return(result_data)
 
 def download_xls(url):
 '''
 表格文字识别(异步接口)
 '''
 print('开始下载xls..')
 try:
 r = requests.get(url)
 root = tk.Tk()
 root.withdraw()
 with filedialog.asksaveasfile(mode='wb', defaultextension=".xls", filetypes=[('Excel','.xls')]) as f:
 if f:
 f.write(r.content)
 print('表格下载完成!')
 except:
 input('下载失败!Ctrl+C取消重试')
 return download_xls(url)
 return 0
 def accurate_basic():
 '''
 文字精确识别(同步接口)
 '''
 request_url = basic_url+"rest/2.0/ocr/v1/accurate_basic"
 access_token = token
 request_url = request_url + "?access_token=" + access_token
 headers = {'content-type': 'application/x-www-form-urlencoded'}
 try:
 response = requests.post(request_url, data=imgdata, headers=headers)
 print('识别完成!\n==========')
 retString = ''
 for words in response.json()['words_result']:
 retString=retString+words['words']+'\n'
 return retString
 except:
 input('识别错误!Ctrl+C取消重试')
 return accurate_basic()
 
 def parse_img():
 global imgdata
 str2 = "表格" if ISTBL else "文本"
 if IMG == '':
 try:
 clipimg = ImageGrab.grabclipboard()
 img_bytes = io.BytesIO()
 clipimg.save(img_bytes, format='PNG')
 imgdata = {"image": base64.b64encode(img_bytes.getvalue())}
 return '剪切板', str2
 except:
 input('从剪切板获取失败!Ctrl+C取消重试')
 return parse_img()
 elif IMG == 'f':
 root = tk.Tk()
 root.withdraw()
 try:
 with filedialog.askopenfile(mode="rb", filetypes=[('image files',('.png','.jpg','.jpeg','.bmp'))]) as f:
 if f:
 imgdata = {"image": base64.b64encode(f.read())}
 return f.name, str2
 except:
 input('选择图片文件失败!Ctrl+C取消重试')
 return parse_img()
 elif IMG.startswith('http'):
 if ISTBL:
 try:
 resp = requests.get(IMG)
 imgdata = {"image": base64.b64encode(resp.content)}
 except:
 input('图片下载失败!Ctrl+C取消重试')
 return parse_img()
 else:
 imgdata = {"url": IMG}
 return '\033[4m{0}\033[0m'.format(IMG), str2
 
 TOUT=20
 imgdata = {}
 os.system("color")
 token=get_token()
 while 1:
 IMG=input('输入链接(留空=剪切板;f=文件选择;q=退出):')
 if IMG == 'q':
 break
 ISTBL=input("表格请输入文字再回车:")
 imgsrc, imgtype = parse_img()
 input('从 \033[4m{0}\033[0m 识别 \033[4m{1}\033[0m,回车确认上传..'.format(imgsrc, imgtype))
 if ISTBL:
 request_id=send_img()
 url=fetch_result(request_id)
 download_xls(url)
 else:
 print(accurate_basic())
 
 |