paddleocr 实操笔记（前向后梳理）

要点：

参考：

基于PaddleOCR的数字显示器字符识别

工业仪表数值识别

前言

问题分析

要处理电表中的数据，可以分为步骤，拆解为以下问题：

感兴趣区域定位问题
OCR读数问题

针对问题1,经过实验与探索，也找到两种方案：

方案1，直接利用PaddleOCR默认自带的检测器

，筛选掉其他无效的框体和信息，剩下的就是有用的。(未经过训练的，直接使用预训练模型)

方案2，通过Opencv图像处理的方法

，根据电表字符区域特征进行相应的轮廓提取和颜色筛选，从而保证其得到有效的定位。

方案3，收集场景下的大量电表字符识别数据，制作数据集并进行标记，分别训练其定位和识别模型

。

考虑到时间成本和人工成本问题，这里优先选择前两种方案。下面是使用Opencv来进行

ROI区域定位

的方法。

一导包

# 导入依赖库
import os
from tqdm import tqdm
import cv2
import csv
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import warnings
from paddleocr import PaddleOCR, draw_ocr

# 忽略警告
warnings.filterwarnings("ignore")

os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"  # 防止报错

最后一行是防止报错，

二相关函数

2.1 plt画图

# 可视化绘图
def imshow_image(img_path):
    img = Image.open(img_path)
    plt.figure("test_img", figsize=(5, 5))
    plt.imshow(img)
    plt.show()

2.2 画出最大轮廓

def find_biggest_contour(image):
    """获取最大轮廓"""
    image = image.copy()
    contours, hierarchy = cv2.findContours(image, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
    contour_sizes = [(cv2.contourArea(contour), contour) for contour in contours]
    biggest_contour = max(contour_sizes, key=lambda x: x[0])[1]
    return biggest_contour

2.3 查找ROI轮廓

def get_find_display(input_path, lower=(0, 0, 0), higher=(255, 255, 255), output_path='./'):
    """查找ROI轮廓"""
    img = cv2.imread(input_path)
    # print('input:', input_path)
    filename = input_path.split('/')[-1]
    f_name = filename.split('.')[0]
    # print('filename:', filename, 'f_name:', f_name)

    global img_crop
    lowHue = lower[0]
    lowSat = lower[1]
    lowVal = lower[2]
    highHue = higher[0]
    highSat = higher[1]
    highVal = higher[2]

    # 可选择不同的模糊方法
    frameBGR = cv2.GaussianBlur(img, (7, 7), 0)

    # 转换为HSV颜色空间
    hsv = cv2.cvtColor(frameBGR, cv2.COLOR_BGR2HSV)

    # 定义HSV值颜色范围
    colorLow = np.array([lowHue, lowSat, lowVal])
    colorHigh = np.array([highHue, highSat, highVal])
    mask = cv2.inRange(hsv, colorLow, colorHigh)

    kernal = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7, 7))
    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernal)
    mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernal)

    biggest_contour = find_biggest_contour(mask)
    # cv2.drawContours(img, biggest_contour, -1, (0, 255, 0), 2)
    print('cnt_len:', len(biggest_contour))

    # 将遮罩放在原始图像的上方。
    result_img = cv2.bitwise_and(img, img, mask=mask)

    if biggest_contour is not None:
        x, y, w, h = cv2.boundingRect(biggest_contour)
        print(x, y, w, h)
        img_crop = img[y:y + h, x:x + w]

        print('wpath:', output_path + filename)
        save_path = output_path + filename
        if not os.path.exists(output_path):
            os.mkdir(output_path)
        cv2.imwrite(save_path, img_crop)
    else:
        img_crop = img

    return result_img, img_crop

三直接使用OCR算法识别

3.1 提取目标识别区域

（找出文字区域）

output_path = 'work/roi/'
# img_roi = 'test/133102_steerPoint5_preset1255_20220917221726_v.jpeg'
img_roi = 'test/number_item.jpg'
lower = (0, 80, 0)
higher = (255, 255, 255)
result_img, img_crop = get_find_display(img_roi, lower, higher, output_path)

3.2 查看识别区域

img_roi_path = 'work/roi/number_item.jpg'
imshow_image(img_roi_path)

3.3 直接识别

def rec_display_roi(img_roi): 
    ocr = PaddleOCR() 
    result = ocr.ocr(img_roi, det=False) 
    return result[0][0], result[0][1]
rec_display_roi(img_roi_path)

3.4 直接使用图片进行识别

获取识别区

output_path = 'work/roi/'
img_roi = 'test/number_use.jpg'
lower = (0, 0, 0)
higher = (255, 255, 255)
result_img, img_crop = get_find_display(img_roi, lower, higher, output_path)
imshow_image(img_roi)

进行识别

img_roi_path = "work/roi/number_use.jpg"
imshow_image(img_roi_path)
def rec_display_roi(img_roi): 
    # ocr = PaddleOCR() 
    ocr = PaddleOCR(use_gpu=True)
    result = ocr.ocr(img_roi, det=False) 
    return result[0][0]
rec_display_roi(img_roi)

识别结果： (‘0598’, 0.7145649790763855)

四直接使用OCR进行检测和识别

方案2，直接使用PaddleOCR将所有可能是OCR的对象进行检测和识别。再从中筛选要的结果。

4.1 OCR识别读数

# Paddleocr目前支持的多语言语种可以通过修改lang参数进行切换
# 例如`ch`, `en`, `fr`, `german`, `korean`, `japan`
ocr = PaddleOCR(use_angle_cls=True, lang="ch")  # need to run only once to download and load model into memory
# 数据可视化
# img_path = 'test/133102_steerPoint5_preset1255_20220917221726_v.jpeg'
img_path = 'test/number_item.jpg'
save_path = 'work/dst/result.jpg'
result = ocr.ocr(img_path, cls=True)
for line in result:
    print(line)

image = Image.open(img_path).convert('RGB')
boxes = [line[0] for line in result]
txts = [line[1][0] for line in result]
scores = [line[1][1] for line in result]
im_show = draw_ocr(image, boxes, txts, scores, font_path='work/font/simfang.ttf')
im_show = Image.fromarray(im_show)
im_show.save(save_path)

"""[[[1936.0, 56.0], [2461.0, 56.0], [2461.0, 109.0], [1936.0, 109.0]], ('2022-11-28 07:38:28', 0.8835511)]
[[[2461.0, 450.0], [2557.0, 450.0], [2557.0, 500.0], [2461.0, 500.0]], ('原水', 0.99717796)]
[[[12.0, 1310.0], [483.0, 1322.0], [481.0, 1404.0], [10.0, 1392.0]], ('水泵房仪表间', 0.93879247)]
im_show <PIL.Image.Image image mode=RGB size=1200x338 at 0x2852F99EB00>"""

五算法优化

def write_to_csv(log_path, filename='', result=0.00, score=0, mode_head=True):
    file = open(log_path, 'a+', encoding='utf-8', newline='')
    csv_writer = csv.writer(file)
    if mode_head == True:
        csv_writer.writerow([f'filename', f'result', f'score'])
    else:
        csv_writer.writerow([filename, result, score])
    file.close()


def get_bbox_area(box):
    """计算bbox的面积"""
    bbox_area = (max(box[2]) - max(box[0])) * (max(box[3]) - max(box[1]))
    return bbox_area


def quadArea(nodes):
    """计算多边形的面积"""
    # 基于向量积计算不规则多边形的面积, 坐标点需要按顺序（逆时针或顺时针）选取
    i_count = len(nodes)
    area_temp = 0
    for i in range(i_count):
        area_temp += nodes[i][0] * nodes[(i + 1) % i_count][1] - nodes[(i + 1) % i_count][0] * nodes[i][1]
    return abs(area_temp)


def bboxes_choose(boxes, txts, scores):
    """获取最大框体"""
    area_list = []
    for i in range(0, len(boxes)):
        bx = boxes[i]
        # area = get_bbox_area(bx)
        area = quadArea(bx)
        # print('bx:', bx, 'area:',area)
        area_list.append(area)

    if len(area_list) == 0:
        index = 0
    else:
        index = area_list.index(max(area_list))

    if len(boxes) == 0:
        boxes = []
    else:
        boxes = [boxes[index]]
        txts = [txts[index]]
        scores = [scores[index]]
    return boxes, txts, scores


def ocr_roi_det(img_path, font, save_path='./work/save/'):
    """OCR识别"""
    result = ocr.ocr(img_path, cls=True)
    # for line in result:
    # print(line)

    # 显示结果
    from PIL import Image
    image = Image.open(img_path).convert('RGB')

    fileslist = img_path.split('/')
    fname = fileslist[-1].split('.')[0]

    # [[[151.0, 53.0], [277.0, 53.0], [277.0, 111.0], [151.0, 111.0]], ('00.2', 0.9423570036888123)]
    boxes = [line[0] for line in result]
    txts = [line[1][0] for line in result]
    scores = [line[1][1] for line in result]
    boxes, txts, scores = bboxes_choose(boxes, txts, scores)
    # bs = nms(boxes, scores)
    # print('bs:', bs)

    im_show = draw_ocr(image, boxes, txts, scores, font_path=font)
    im_show = Image.fromarray(im_show)
    if not os.path.exists(save_path):
        os.mkdir(save_path)
    im_show.save(save_path + fname + '_result.jpg')

    return txts[0], scores[0]


def all_test_det(path, log_path, font, save_path):
    """执行识别算法，并记录结果到csv"""
    count = 0
    img_list = []
    img_ans_dic = {}
    for filepath, dirnames, filenames in os.walk(path):  # 在多级目录下找文件
        for filename in filenames:
            file_path = filepath + filename
            # print('file_path:', file_path)
            img_list.append(file_path)

    global score
    write_to_csv(log_path)
    for i in tqdm(range(0, len(img_list) - 1)):
        img_roi = img_list[i]
        # result, score = rec_display_roi(img_roi)
        fileslist = img_roi.split('/')
        fname = fileslist[-1].split('.')[0]
        result, score = ocr_roi_det(img_roi, font, save_path)
        print('result:', result, 'score:', score)

        if result != '':
            img_ans_dic[fname] = score
            count += 1
        else:
            score = -1
            img_ans_dic[fname] = score
            continue
        write_to_csv(log_path, fname, result, score, False)

    print('count:', count)
    print('dict_len:', len(img_ans_dic))
    print('ans_dict:', img_ans_dic)

if __name__ == '__main__':
    # Paddleocr目前支持的多语言语种可以通过修改lang参数进行切换
    # 例如`ch`, `en`, `fr`, `german`, `korean`, `japan`
    print('查看ocr数据模型')
    ocr = PaddleOCR(use_angle_cls=False, lang="en")  # need to run only once to download and load model into memory
    print('ocr:', ocr)
    # img_path = 'test/number_use.jpg'
    font_path = 'work/font/simfang.ttf'
    a, b = ocr_roi_det(img_path, font_path)
    print('查看识别结果：', a, b)

    log_path = 'work/log/result.csv'
    save_path = 'work/save_result/'
    test_path = 'work/dataset/test/'
    all_test_det(test_path, log_path, font_path, save_path)

    # 结果分析
    # rs_img = 'work/save_result/133102_steerPoint5_preset1255_20220917221726_v_result.jpg'
    # rs_img = 'test/72635_steerPoint12_preset1294_20220919123447_v.jpeg'
    rs_img = 'test/number_use.jpg'
    imshow_image(rs_img)

    print('执行到最后位置》')

优化后的算法

原文链接：https://blog.csdn.net/March_A/article/details/130185650

前言

一 导包

二 相关函数