Python 3调用百度OCR API实现剪贴板文字识别

  • Post author:
  • Post category:python


本程序调用百度OCR API对剪贴板的图片文字识别,配合CaptureScreen软件,可快速识别文字。

#!python3
import urllib.request, urllib.parse
import os, io, sys, json, socket
import base64
from PIL import ImageGrab

socket.setdefaulttimeout(30)

def get_auth():
    apikey = 'your apikey'
    secret_key = 'your secret key'
    host = 'https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=%s&client_secret=%s' % (apikey, secret_key)
    req = urllib.request.Request(host)
    req.add_header('Content-Type', 'application/json; charset=UTF-8')
    res = urllib.request.urlopen(req)
    content = res.read()
    if (content):
        o = json.loads(content.decode())
        return o['access_token']
    return None

def ocr_clipboard():
    im = ImageGrab.grabclipboard()
    if im is None:
        print('No image in clipboard')
        return
    print('image size: %sx%s\n>>>\n' % (im.size[0], im.size[1]))
    mf = io.BytesIO()
    im.save(mf, 'JPEG')
    mf.seek(0)
    buf = mf.read()
    b64 = base64.encodebytes(buf)
    access_token = get_auth()
    if access_token is not None:
        url = 'https://aip.baidubce.com/rest/2.0/ocr/v1/general_basic?access_token=%s' % access_token
        data = urllib.parse.urlencode({'image' : b64}).encode()
        req = urllib.request.Request(url, method='POST')
        req.add_header('Content-Type', 'application/x-www-form-urlencoded')
        with urllib.request.urlopen(req, data) as p:
            res = p.read().decode('utf-8')
            o = json.loads(res)
            if o['words_result'] is not None:
                for w in o['words_result']:
                    print(w['words'])
            print('\n<<<')
    else:
        print('access_token is none')

if __name__ == '__main__':

    x = input('ocr form clipboard image: z to ocr, q to quit-->')
    while(x != 'q'):
        if x=='z':
            ocr_clipboard()
        x = input('ocr from clipboard image: r to ocr, q to quit-->')
    print('bye')



版权声明:本文为onestab原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。