使用opencv的SVM和神经网络实现车牌识别

  • Post author:
  • Post category:其他




一、前言



本文参考自《深入理解Opencv 实用计算机视觉项目解析》中的自动车牌识别项目,并对其中的方法理解后,再进行实践。深刻认识到实际上要完成车牌区域准确定位、车牌区域中字符的准确分割,字符准确识别这一系列步骤的困难。所以最后的识别效果也是有待进一步提高。



二、程序流程


程序流程如下所示:



相应的main函数如下


#include "carID_Detection.h"


int main()
{    
	Mat img_input = imread("testCarID.jpg");
	//如果读入图像失败
	if(img_input.empty())
	{
		fprintf(stderr, "Can not load image %s\n", "testCarID.jpg");
		return -1;
	}

	Mat hsvImg ;
	cvtColor(img_input,hsvImg,CV_BGR2HSV);
	vector<Mat> planes;
	split(hsvImg,planes);
	Mat sImg;
	sImg = planes[1];  //获得红色分量
	blur(sImg,sImg,Size(3,3));   //3*3高斯滤波
	vector <RotatedRect>  rects_sImg;
	posDetect(sImg ,rects_sImg);      	


	Mat grayImg;
	RgbConvToGray(img_input ,grayImg);
	medianBlur(grayImg,grayImg,3);   //3*3中值滤波

	vector <RotatedRect>  rects_grayImg;
	posDetect(grayImg ,rects_grayImg);

	vector <RotatedRect>  rects_closeImg;  //车牌区域较为贴近
	posDetect_closeImg(sImg ,rects_closeImg);

	vector <RotatedRect>  rects_optimal;
	optimPosDetect(rects_sImg,rects_grayImg,rects_closeImg,rects_optimal);

	vector <Mat> output_area;
	normalPosArea(img_input ,rects_optimal,output_area);  //获得144*33的候选车牌区域output_area

	CvSVM  svmClassifier;
			
 	svm_train(svmClassifier);  //使用SVM对正负样本进行训练
 
 	vector<Mat> plates_svm;   //需要把候选车牌区域output_area图像中每个像素点作为一行特征向量,后进行预测
 	for(int i=0;i< output_area.size(); ++i)
 	{
 		Mat img = output_area[i];
 		Mat p = img.reshape(1,1);
 		p.convertTo(p,CV_32FC1);
 		int response = (int)svmClassifier.predict( p );
 		if (response == 1)
 			plates_svm.push_back(output_area[i]);    //保存预测结果
 	}
	
	if(plates_svm.size() != 0)  
	{
		imshow("Test", plates_svm[0]);     //正确预测的话,就只有一个结果plates_svm[0]
		waitKey(0);
	}
	else
	{
		std::cout<<"定位失败";
		return -1;
	}
		
	//从SVM预测获得得车牌区域中分割得字符区域
	vector <Mat> char_seg;
	char_segment(plates_svm[0],char_seg); 

	//获得7个字符矩阵的相应特征矩阵
	vector <Mat> char_feature;
	char_feature.resize(7);
	for (int i =0;i<char_seg.size() ;++ i)
	 	features(char_seg[i], char_feature[i],5);

	//神经网络训练
	CvANN_MLP ann_classify;
	ann_train(ann_classify, 34 ,48);   //34为样本的类别数,48为隐藏层的神经元数

	//字符预测
	vector<int>  char_result;
	classify(ann_classify,char_feature,char_result);


 	//此函数等待按键,按键盘任意键就返回
	svmClassifier.clear();

	return 0;
}



三、代码简介


本文编程没有使用类的概念,纯属面向过程编程,以下为主要使用的函数

//carID_Detection.h
#include "opencv2/core/core.hpp"
#include "opencv2//highgui/highgui.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/ml/ml.hpp"
#include <time.h>
#include <stdlib.h>

#include <iostream>
using namespace std;

using namespace cv;


void RgbConvToGray(const Mat& inputImage,Mat & outpuImage); //rgb转为灰度


void posDetect(Mat &, vector <RotatedRect> &); //粗步选取候选车牌区域
bool verifySizes(const RotatedRect & );  //车牌区域需要满足的条件
void posDetect_closeImg(Mat &inputImage , vector <RotatedRect> & rects  ) ;  //考虑到车牌距离非常近的时候的情况
bool verifySizes_closeImg(const RotatedRect & candidate); //距离近时的车牌区域需要满足的条件     


void optimPosDetect(vector <RotatedRect> & rects_sImg , vector <RotatedRect> & rects_grayImg, //车牌区域进一步定位
	vector <RotatedRect> & rects_closeImg,vector <RotatedRect> & rects_optimal );
float calOverlap(const Rect& box1,const Rect& box2);  //计算2个矩阵的重叠比例


void normalPosArea(Mat &intputImg, vector<RotatedRect> &rects_optimal, vector <Mat>& output_area );  //车牌裁剪,标准化为144*33


void svm_train(CvSVM & );  //取出SVM.xml中的特征矩阵和标签矩阵进行训练


void char_segment(const Mat & inputImg,vector <Mat>&); //对车牌区域中的字符进行分割
bool char_verifySizes(const RotatedRect &);   //字符区域需要满足的条件
void char_sort(vector <RotatedRect > & in_char ); //对字符区域进行排序


void features(const Mat & in , Mat & out ,int sizeData);  //获得一个字符矩阵对应的特征向量
Mat projectHistogram(const Mat& img ,int t);             //计算水平或累计直方图,取决于t为0还是1


void ann_train(CvANN_MLP &ann ,int numCharacters, int nlayers); //取出ann_xml中的数据,并进行神经网络训练
void classify(CvANN_MLP& ann, vector<Mat> &char_feature , vector<int> & char_result); //使用神经网络模型预测车牌字符,并打印至屏幕
//carID_Detection.cpp

#include "carID_Detection.h"



void RgbConvToGray(const Mat& inputImage,Mat & outpuImage)  //g = 0.3R+0.59G+0.11B
{
	outpuImage = Mat(inputImage.rows ,inputImage.cols ,CV_8UC1);  

	for (int i = 0 ;i<inputImage.rows ;++ i)
	{
		uchar *ptrGray = outpuImage.ptr<uchar>(i); 
		const Vec3b * ptrRgb = inputImage.ptr<Vec3b>(i);
		for (int j = 0 ;j<inputImage.cols ;++ j)
		{
			ptrGray[j] = 0.3*ptrRgb[j][2]+0.59*ptrRgb[j][1]+0.11*ptrRgb[j][0];	
		}
	}
}

void posDetect_closeImg(Mat &inputImage , vector <RotatedRect> & rects  )   //初步找到候选区域 rects
{
	
	Mat img_canny;
	Canny(inputImage, img_canny, 150, 220);
	Mat img_threshold;
	threshold(img_canny , img_threshold,0,255 , CV_THRESH_OTSU+CV_THRESH_BINARY); //otsu算法自动获得阈值

	Mat element = getStructuringElement(MORPH_RECT ,Size(15 ,3));  //闭形态学的结构元素
	morphologyEx(img_threshold ,img_threshold,CV_MOP_CLOSE,element);  //形态学处理

	//寻找车牌区域的轮廓
	vector< vector <Point> > contours;
	findContours(img_threshold ,contours,CV_RETR_EXTERNAL, CV_CHAIN_APPROX_NONE);//只检测外轮廓
	//对候选的轮廓进行进一步筛选
	vector< vector <Point> > ::iterator itc = contours.begin();

	while( itc != contours.end())
	{
		RotatedRect mr = minAreaRect(Mat( *itc )); //返回每个轮廓的最小有界矩形区域
  		if(!verifySizes_closeImg(mr))  //判断矩形轮廓是否符合要求
		{
			itc = contours.erase(itc);
		}
		else     
		{
			
			rects.push_back(mr);
			++itc;
		}      
	}

}


bool verifySizes_closeImg(const RotatedRect & candidate)
{
	float error = 0.4;
	const float aspect = 44/14; //长宽比
	int min = 100*aspect*100; //最小区域
	int max = 180*aspect*180;  //最大区域
	float rmin = aspect - aspect*error; //考虑误差后的最小长宽比
	float rmax = aspect + aspect*error; //考虑误差后的最大长宽比

	int area = candidate.size.height * candidate.size.width;
	float r = (float)candidate.size.width/(float)candidate.size.height;
	if(r <1)
		r = 1/r;

	if( (area < min || area > max) || (r< rmin || r > rmax)  )
		return false;
	else
		return true;
}

void posDetect(Mat &inputImage , vector <RotatedRect> & rects  )   //初步找到候选区域 rects
{
	Mat img_sobel;
	Sobel(inputImage , img_sobel , CV_8U, 1,0,3,1,0);

	Mat img_threshold;
	threshold(img_sobel , img_threshold,0,255 , CV_THRESH_OTSU+CV_THRESH_BINARY); //otsu算法自动获得阈值

	Mat element = getStructuringElement(MORPH_RECT ,Size(15 ,3));  //闭形态学的结构元素
	morphologyEx(img_threshold ,img_threshold,CV_MOP_CLOSE,element);  

	//寻找车牌区域的轮廓
	vector< vector <Point> > contours;
	findContours(img_threshold ,contours,CV_RETR_EXTERNAL, CV_CHAIN_APPROX_NONE);//只检测外轮廓
	//对候选的轮廓进行进一步筛选
	vector< vector <Point> > ::iterator itc = contours.begin();

	while( itc != contours.end())
	{
		RotatedRect mr = minAreaRect(Mat( *itc )); //返回每个轮廓的最小有界矩形区域
		if(!verifySizes(mr))  //判断矩形轮廓是否符合要求
		{
			itc = contours.erase(itc);
		}
		else     
		{
			rects.push_back(mr);
			++itc;
		}      
	}
}

bool verifySizes(const RotatedRect & candidate)
{
	float error = 0.4;
	const float aspect = 44/14; //长宽比
	int min = 20*aspect*20; //最小区域
	int max = 180*aspect*180;  //最大区域
	float rmin = aspect - 2*aspect*error; //考虑误差后的最小长宽比
	float rmax = aspect + 2*aspect*error; //考虑误差后的最大长宽比
	
	int area = candidate.size.height * candidate.size.width;
	float r = (float)candidate.size.width/(float)candidate.size.height;
	if(r <1)
		r = 1/r;

	if( (area < min || area > max) || (r< rmin || r > rmax)  ) //满足该条件才认为该candidate为车牌区域
		return false;
	else
		return true;
}

void optimPosDetect(vector <RotatedRect> & rects_sImg , vector <RotatedRect> & rects_grayImg,
	vector <RotatedRect> & rects_closeImg,vector <RotatedRect> & rects_optimal )
{
	for (int i=0;i<rects_sImg.size() ;++ i)
	{
		for (int j=0;j<rects_grayImg.size() ; ++j)
		{
			if (calOverlap(rects_sImg[i].boundingRect() , rects_grayImg[j].boundingRect()) > 0.2)
			{
				if(rects_sImg[i].boundingRect().width * rects_sImg[i].boundingRect().height
					>= rects_grayImg[j].boundingRect().width * rects_grayImg[j].boundingRect().height)
					rects_optimal.push_back(rects_sImg[i]);
				else
					rects_optimal.push_back(rects_grayImg[j]);	
			}
		}
	}

	if (rects_closeImg.size()<2 )  //只考虑1个,为了速度
	{
		for (int i =0;i < rects_optimal.size();++ i )
			for (int j =0;j < rects_closeImg.size();++ j)
			{			
				if ((    calOverlap(rects_optimal[i].boundingRect() , rects_closeImg[j].boundingRect()) < 0.2 &&
					calOverlap(rects_optimal[i].boundingRect() , rects_closeImg[j].boundingRect()) > 0.05))
				{
					rects_optimal.push_back(rects_closeImg[j]);
				}
			}
	}

}

float calOverlap(const Rect& box1,const Rect& box2)
{
	if (box1.x > box2.x+box2.width) { return 0.0; }
	if (box1.y > box2.y+box2.height) { return 0.0; }
	if (box1.x+box1.width < box2.x) { return 0.0; }
	if (box1.y+box1.height < box2.y) { return 0.0; }
	float colInt =  min(box1.x+box1.width,box2.x+box2.width) - max(box1.x, box2.x);
	float rowInt =  min(box1.y+box1.height,box2.y+box2.height) - max(box1.y,box2.y);
	float intersection = colInt * rowInt;
	float area1 = box1.width*box1.height;
	float area2 = box2.width*box2.height;
	return intersection / (area1 + area2 - intersection);
}

void normalPosArea(Mat &intputImg, vector<RotatedRect> &rects_optimal, vector <Mat>& output_area )
{
	float r,angle;
	for (int i = 0 ;i< rects_optimal.size() ; ++i)
	{
		//旋转区域
		angle = rects_optimal[i].angle;
		r = (float)rects_optimal[i].size.width / (float) (float)rects_optimal[i].size.height;
		if(r<1)
			angle = 90 + angle;
		Mat rotmat = getRotationMatrix2D(rects_optimal[i].center , angle,1);//获得变形矩阵对象
		Mat img_rotated;
		warpAffine(intputImg ,img_rotated,rotmat, intputImg.size(),CV_INTER_CUBIC);

		//裁剪图像
		Size rect_size = rects_optimal[i].size;
		if(r<1)
			swap(rect_size.width, rect_size.height);
		Mat  img_crop;
		getRectSubPix(img_rotated ,rect_size,rects_optimal[i].center , img_crop );

		//用光照直方图调整所有裁剪得到的图像,使具有相同宽度和高度,适用于训练和分类
		Mat resultResized;
		resultResized.create(33,144,CV_8UC3);
		resize(img_crop , resultResized,resultResized.size() , 0,0,INTER_CUBIC);
		Mat grayResult;
		RgbConvToGray(resultResized ,grayResult);
		//blur(grayResult ,grayResult,Size(3,3));
		equalizeHist(grayResult,grayResult);

		output_area.push_back(grayResult);
	}
}

void svm_train(CvSVM & svmClassifier)
{
	FileStorage fs;

	fs.open("SVM.xml" , FileStorage::READ);
	Mat SVM_TrainningData;
	Mat SVM_Classes;	

	fs["TrainingData"] >>SVM_TrainningData;
	fs["classes"] >>SVM_Classes;
	CvSVMParams SVM_params;
	SVM_params.kernel_type = CvSVM::LINEAR;

	svmClassifier.train(SVM_TrainningData,SVM_Classes ,Mat(),Mat(),SVM_params); //SVM训练模型
	fs.release();
}

void char_segment(const Mat & inputImg,vector <Mat>& dst_mat)//得到20*20的标准字符分割图像
{
	Mat img_threshold;
	threshold(inputImg ,img_threshold , 180,255 ,CV_THRESH_BINARY );
	Mat img_contours;
	img_threshold.copyTo(img_contours);
	
	vector < vector <Point> > contours;
	findContours(img_contours ,contours,CV_RETR_EXTERNAL,CV_CHAIN_APPROX_NONE);

	vector< vector <Point> > ::iterator itc = contours.begin();
	vector<RotatedRect> char_rects;
	
	while( itc != contours.end())
	{
		RotatedRect minArea = minAreaRect(Mat( *itc )); //返回每个轮廓的最小有界矩形区域

		Point2f vertices[4];
		minArea.points(vertices);

  		if(!char_verifySizes(minArea))  //判断矩形轮廓是否符合要求
		{
			itc = contours.erase(itc);
		}
		else     
		{
			++itc; 
			char_rects.push_back(minArea);  

		}      		
	}
	char_sort(char_rects); //对字符排序

	vector <Mat> char_mat;
	for (int i = 0; i<char_rects.size() ;++i )
	{
		char_mat.push_back( Mat(img_threshold,char_rects[i].boundingRect())) ;

	}
	
	Mat train_mat(2,3,CV_32FC1);
	int length ;
	dst_mat.resize(7);
	Point2f srcTri[3];  
	Point2f dstTri[3];

	for (int i = 0; i< char_mat.size();++i)
	{
		srcTri[0] = Point2f( 0,0 );  
		srcTri[1] = Point2f( char_mat[i].cols - 1, 0 );  
		srcTri[2] = Point2f( 0, char_mat[i].rows - 1 );
		length = char_mat[i].rows > char_mat[i].cols?char_mat[i].rows:char_mat[i].cols;
		dstTri[0] = Point2f( 0.0, 0.0 );  
		dstTri[1] = Point2f( length, 0.0 );  
		dstTri[2] = Point2f( 0.0, length ); 
		train_mat = getAffineTransform( srcTri, dstTri );
		dst_mat[i]=Mat::zeros(length,length,char_mat[i].type());		
		warpAffine(char_mat[i],dst_mat[i],train_mat,dst_mat[i].size(),INTER_LINEAR,BORDER_CONSTANT,Scalar(0));
		resize(dst_mat[i],dst_mat[i],Size(20,20));  //尺寸调整为20*20

	}

}

bool char_verifySizes(const RotatedRect & candidate)
{
	float aspect = 33.0f/20.0f;
	float charAspect = (float) candidate.size.width/ (float)candidate.size.height; //宽高比
	float error = 0.35;
	float minHeight = 11;  //最小高度11
	float maxHeight = 33;  //最大高度33

	float minAspect = 0.20;  //考虑到数字1,最小长宽比为0.15
	float maxAspect = aspect + aspect*error;

	if( charAspect > minAspect && charAspect < maxAspect
		&& candidate.size.height >= minHeight && candidate.size.width< maxHeight) //非0像素点数、长宽比、高度需满足条件
		return true;
	else
		return false;
}

void char_sort(vector <RotatedRect > & in_char ) //对字符区域进行排序
{
	vector <RotatedRect >  out_char;
	const int length = 7;           //7个字符
	int index[length] = {0,1,2,3,4,5,6};
	float centerX[length];
	for (int i=0;i < length ; ++ i)
	{
		centerX[i] = in_char[i].center.x;
	}

	for (int j=0;j <length;j++) {
		for (int i=length-2;i >= j;i--)
			if (centerX[i] > centerX[i+1])
			{
				float t=centerX[i];
				centerX[i]=centerX[i+1];
				centerX[i+1]=t;

				int tt = index[i];
				index[i] = index[i+1];
				index[i+1] = tt;
			}
	}
	
	for(int i=0;i<length ;i++)
		out_char.push_back(in_char[(index[i])]);

	in_char.clear();     //清空in_char
	in_char = out_char; //将排序好的字符区域向量重新赋值给in_char
}

void features(const Mat & in , Mat & out ,int sizeData)
{
	Mat vhist = projectHistogram(in , 1); //水平直方图
	Mat hhist = projectHistogram(in , 0);  //垂直直方图

	Mat lowData;
	resize(in , lowData ,Size(sizeData ,sizeData ));
	int numCols = vhist.cols + hhist.cols + lowData.cols * lowData.cols;
	out = Mat::zeros(1, numCols , CV_32F);

	int j = 0;
	for (int i =0 ;i<vhist.cols ; ++i)
	{
		out.at<float>(j) = vhist.at<float>(i);
		j++;
	}
	for (int i=0 ; i < hhist.cols ;++i)
	{
		out.at<float>(j) = hhist.at<float>(i);
	}
	for(int x =0 ;x<lowData.rows ;++x)
	{
		for (int y =0 ;y < lowData.cols ;++ y)
		{
			out.at<float>(j) = (float)lowData.at<unsigned char>(x,y);
			j++;
		}
	}

}

Mat projectHistogram(const Mat& img ,int t)  //水平或垂直直方图,0为按列统计
{                                            //1为按行统计
	int sz = (t)? img.rows: img.cols;
	Mat mhist = Mat::zeros(1, sz ,CV_32F);

	for(int j = 0 ;j < sz; j++ )
	{
		Mat data = (t)?img.row(j):img.col(j);
		mhist.at<float>(j) = countNonZero(data);
	}

	double min,max;
	minMaxLoc(mhist , &min ,&max);

	if(max > 0)
		mhist.convertTo(mhist ,-1,1.0f/max , 0);

	return mhist;
}

void ann_train(CvANN_MLP &ann ,int numCharacters, int nlayers)
{
	Mat trainData ,classes;
	FileStorage fs;
	fs.open("ann_xml.xml" , FileStorage::READ);
	
	fs["TrainingData"] >>trainData;
	fs["classes"] >>classes;

	Mat layerSizes(1,3,CV_32SC1);
	layerSizes.at<int>( 0 ) = trainData.cols;
	layerSizes.at<int>( 1 ) = nlayers; //隐藏神经元数,可设为3
	layerSizes.at<int>( 2 ) = numCharacters; //样本类数为34
	ann.create(layerSizes , CvANN_MLP::SIGMOID_SYM ,1,1);  //初始化ann

	Mat trainClasses;
	trainClasses.create(trainData.rows , numCharacters ,CV_32FC1);
	for (int i =0;i< trainData.rows; i++)
	{
		for (int k=0 ; k< trainClasses.cols ; k++ )
		{
			if ( k == (int)classes.at<uchar> (i))
			{
				trainClasses.at<float>(i,k)  = 1 ;
			}
			else
				trainClasses.at<float>(i,k)  = 0;			
		}		
	}

	Mat weights(1 , trainData.rows , CV_32FC1 ,Scalar::all(1) );
	ann.train( trainData ,trainClasses , weights);
}

void classify(CvANN_MLP& ann, vector<Mat> &char_feature , vector<int> & char_result)
{
	char_result.resize(char_feature.size());
	for (int i=0;i<char_feature.size(); ++i)
	{
		Mat output(1 ,34 , CV_32FC1); //1*34矩阵
		ann.predict(char_feature[i] ,output);
		Point maxLoc;
		double maxVal;
		minMaxLoc(output , 0 ,&maxVal , 0 ,&maxLoc);
		char_result[i] =  maxLoc.x;
	}

	std::cout<<"该车牌后6位为:";
	char  s[] = {'0','1','2','3','4','5','6','7','8','9','A','B',
		'C','D','E','F','G','H','J','K','L','M','N','P','Q',
		'R','S','T','U','V','W','X','Y','Z'};
	for (int i=1;i<char_result.size(); ++i)   //第一位是汉字,这里没实现对汉字的预测
	{
		std::cout<<s[char_result[i]];
	}
	std::cout<<'\n';
}



四、关于SVM.xml与ann_xml.xml


SVM.xml中保存的是用于SVM训练的训练矩阵和类别矩阵数据,标签为”TrainingData”对应训练矩阵,为195*4752大小,195表示有195个训练样本,4752表示每个样本的特征向量维度,因为每个图片大小为144*33,将其转为一行,也即将每个像素值都作为一个特征值,则可得到4752个特征值,保存为1行,所使用的样本图片有75个正样本和120个负样本,保存如下:



标签为”classes”对应类别矩阵,为195*1矩阵,前75个值对应正样本为1.0,后120个值对应负样本为-1.0。


同理,ann_xml.xml保存的是用于神经网络训练的数据,标签为”TrainingData”对应训练矩阵,为1700*65大小,1700表示训练样本数,因为车牌字符有34类,每类有50个,故总数1700,65为每个样本提取的特征向量。标签为”classes”对应类别矩阵,为1700*1大小,标记负样本数是哪一类的,训练样本取自




五、结果及分析


勉强找到2张图片,可以完整地识别出车牌的后6为字符,效果如下。





故该系统的性能仍有待提升,不过我认为在车牌定位方面可以有所改进,采用其他更好的车牌定位算法会更好,以及可以增加神经网络的训练样本。


六、完整代码文件下载


下载链接为:http://download.csdn.net/detail/ap1005834/9513328



版权声明:本文为AP1005834原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。