基于opencv实现的手写数字识别

  • Post author:
  • Post category:其他


一、使用模板匹配算法

match.py:

import os

import Function

root_dir = “digits/train2”

file7_7 = open(“digits/picture7_7.txt”, ‘w+’)

for fl in os.listdir(root_dir):

img_str = fl[0:-4] + “:” + Function.image_compression(root_dir + ‘/’ + fl)

file7_7.write(img_str + ‘\n’)

file7_7.close()

file7_7 = open(“digits/picture7_7.txt”, ‘r’)

root_dir = “digits/test”

Correct_rate = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

Error_rate = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

Rejection_rate = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

for fl in os.listdir(root_dir):

Same_dist_number = 0

Same_class = -1

min_dist = 7

dist_img = “”

test_img_str = Function.image_compression(root_dir + ‘/’ + fl)

while True:

line = file7_7.readline()

if not line:

file7_7.seek(0)

break

train_str = line[-50:-1]

temp_dist = Function.distance(test_img_str, train_str)

if temp_dist < min_dist:

min_dist = temp_dist

dist_img = line[0:-51]

Same_dist_number = 0

Same_class = -1

elif temp_dist == min_dist:

Same_dist_number += 1

if dist_img[0:1] == line[0:1]:

Same_class = eval(line[0:1])

else:

Same_class = -1

if Same_dist_number == 0:

print(“测试数字:”, fl[0:-4], ”  —  识别出来的结果:”, dist_img)

if fl[0] == dist_img[0]:

Correct_rate[eval(fl[0])] += 1

else:

Error_rate[eval(fl[0])] += 1

elif Same_class == -1:

print(“测试数字:”, fl[0:-4], ”  —  该数字拒绝识别!”)

Rejection_rate[eval(fl[0])] += 1

else:

print(“测试数字:”, fl[0:-4], ”  —  识别出来的结果(类):”, Same_class)

if eval(fl[0]) == Same_class:

Correct_rate[eval(fl[0])] += 1

else:

Error_rate[eval(fl[0])] += 1

file7_7.close()

print(“————————————————“)

for i in range(10):

print(“数字 {:d} 识别的正确率 = {:.2f}% ,错误率 = {:.2f}% ,拒绝识别率 = {:.2f}%”.format(i, Correct_rate[i] * 5, Error_rate[i] * 5,

Rejection_rate[i] * 5))

print(“成功!”)

Function.py:

import cv2


def image_compression(img_path):

img_str = “”

img = cv2.imread(img_path)

x = y = 0

for k in range(1, 50):

title_imf = 0

for i in range(4):

for j in range(4):

if img[x + i – 1][y + j – 1][0] > 127:

title_imf += 1

y = (y + 4) % 28

if title_imf >= 13:

img_str += ‘1’

else:

img_str += ‘0’

if k % 7 == 0:

x = x + 4

y = 0

return img_str


def distance(test_str, train_str):

len_str = len(train_str)

dist = 0.0

for i in range(len_str):

dist += (eval(test_str[i:i + 1]) – eval(train_str[i:i + 1])) ** 2

dist **= 0.5

return dist

二、k邻近算法实现

k.py

import os

import numpy as np

import Function2


def handwritingClassficationTest():

hwLabels = []

trainingFileList = os.listdir(‘digits/trainingDigits’)

m = len(trainingFileList)

trainingMat = np.zeros((m, 1024))

for i in range(m):

fileNameStr = trainingFileList[i]

fileStr = fileNameStr.split(‘.’)[0]

classNumStr = int(fileStr.split(‘_’)[0])

hwLabels.append(classNumStr)

trainingMat[i, :] = Function2.img2vector(‘digits/trainingDigits/%s’ % fileNameStr)

testFileList = os.listdir(‘digits/testDigits’)

errorCount = 0.0

mTest = len(testFileList)

for i in range(mTest):

fileNameStr = testFileList[i]

fileStr = fileNameStr.split(‘.’)[0]

classNumStr = int(fileStr.split(‘_’)[0])

vectorUnderTest = Function2.img2vector(‘digits/testDigits/%s’ % fileNameStr)

classifierResult = Function2.classify0(vectorUnderTest, trainingMat, hwLabels, 3)

print(“测试出的数字是: %s, 正确的数字是: %s” % (classifierResult, classNumStr))

if (classifierResult[0] != classNumStr): errorCount += 1.0

print(“\n 识别错误的个数是: %s” % errorCount)

print(“\n 正确率为: %f” % (1 – errorCount / float(mTest)))


handwritingClassficationTest()

Function2.py:

from os import listdir

from numpy import *

import operator


def classify0(inX, dataSet, labels, k):

dataSetSize = dataSet.shape[0]

diffMat = tile(inX, (dataSetSize, 1)) – dataSet

sqDiffMat = diffMat ** 2

sqDistances = sqDiffMat.sum(axis=1)

distances = sqDistances ** 0.5

sortedDistIndicies = distances.argsort()

classCount = {}

for i in range(k):

voteIlabel = labels[sortedDistIndicies[i]]

classCount[voteIlabel] = classCount.get(voteIlabel, 0) + 1

sortedClassCount = sorted(classCount.items(), key=operator.itemgetter(1), reverse=True)

return sortedClassCount[0]


def img2vector(filename):

returnVect = zeros((1, 1024))

fr = open(filename)

for i in range(32):

lineStr = fr.readline()

for j in range(32):

returnVect[0, 32 * i + j] = int(lineStr[j])

return returnVect


def identify(image):

imageVector = zeros((1, 1024))

for i in range(32):

for j in range(32):

imageVector[0, 32 * i + j] = int(image[i][j])

hwLabels = []

trainingFileList = listdir(‘digits/trainingDigits’)

m = len(trainingFileList)

trainingMat = zeros((m, 1024))

for i in range(m):

fileNameStr = trainingFileList[i]

fileStr = fileNameStr.split(‘.’)[0]

classNumStr = int(fileStr.split(‘_’)[0])

hwLabels.append(classNumStr)

trainingMat[i, :] = img2vector(‘digits/trainingDigits/%s’ % fileNameStr)

classifierResult = classify0(imageVector[0], trainingMat, hwLabels, 3)

result = {}

result[‘result1’] = classifierResult[0]

result[‘result2’] = classifierResult[1]

return result

其中模板匹配使用28*28像素大小的手写数据集,k邻近算法使用二进制手写数据集。



版权声明:本文为qq_56213081原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。