OpenCV+Tesseract自动识别文字区域并识别文字

  • Post author:
  • Post category:其他


OpenCV+Tesseract自动识别文字区域并识别文字

如果图片中有非文字的其他图形,直接用tesseract进行识别的话,会把非文字的图形当成文字进行识别(往往识别出来的是乱七八糟的字符)。因此首先需要把文字区域识别出来,再对文字区域进行处理,最后进行文字识别。



文字区域识别

Mat preprocess(Mat gray)
{
	//1.Sobel算子,x方向求梯度
	Mat sobel;
	Sobel(gray, sobel, CV_8U, 1, 0, 3);

	//2.二值化
	Mat binary;
	threshold(sobel, binary, 0, 255, THRESH_OTSU + THRESH_BINARY);

	//3.膨胀和腐蚀操作核设定
	Mat element1 = getStructuringElement(MORPH_RECT, Size(30, 9));
	//控制高度设置可以控制上下行的膨胀程度,例如3比4的区分能力更强,但也会造成漏检
	Mat element2 = getStructuringElement(MORPH_RECT, Size(24, 4));

	//4.膨胀一次,让轮廓突出
	Mat dilate1;
	dilate(binary, dilate1, element2);

	//5.腐蚀一次,去掉细节,表格线等。这里去掉的是竖直的线
	Mat erode1;
	erode(dilate1, erode1, element1);

	//6.再次膨胀,让轮廓明显一些
	Mat dilate2;
	dilate(erode1, dilate2, element2);

	//7.存储中间图片
	imwrite("binary.jpg", binary);
	imwrite("dilate1.jpg", dilate1);
	imwrite("erode1.jpg", erode1);
	imwrite("dilate2.jpg", dilate2);

	return dilate2;
}


vector<RotatedRect> findTextRegion(Mat img)
{
	vector<RotatedRect> rects;
	//1.查找轮廓
	vector<vector<Point>> contours;
	vector<Vec4i> hierarchy;
	findContours(img, contours, hierarchy, RETR_CCOMP, CHAIN_APPROX_SIMPLE, Point(0, 0));

	//2.筛选那些面积小的
	for (int i = 0; i < contours.size(); i++)
	{
		//计算当前轮廓的面积
		double area = contourArea(contours[i]);

		//面积小于1000的全部筛选掉
		if (area < 1000)
			continue;

		//轮廓近似,作用较小,approxPolyDP函数有待研究
		double epsilon = 0.001 * arcLength(contours[i], true);
		Mat approx;
		approxPolyDP(contours[i], approx, epsilon, true);

		//找到最小矩形,该矩形可能有方向
		RotatedRect rect = minAreaRect(contours[i]);

		//计算高和宽
		int m_width = rect.boundingRect().width;
		int m_height = rect.boundingRect().height;

		//筛选那些太细的矩形,留下扁的
		if (m_height > m_width * 1.2)
			continue;

		//符合条件的rect添加到rects集合中
		rects.push_back(rect);

	}
	return rects;
}

void detect(Mat img)
{
	//1.转化成灰度图
	Mat gray;
	cvtColor(img, gray, CV_BGR2GRAY);

	//2.形态学变换的预处理,得到可以查找矩形的轮廓
	Mat dilation = preprocess(gray);

	//3.查找和筛选文字区域
	vector<RotatedRect> rects = findTextRegion(dilation);

	//4.用绿线画出这些找到的轮廓
	for each (RotatedRect rect in rects)
	{
		Point2f P[4];
		rect.points(P);
		for (int j = 0; j <= 3; j++)
		{
			line(img, P[j], P[(j + 1) % 4], Scalar(0, 255, 0), 2);
		}
	}

	//5.显示带轮廓的图像
	imshow("img", img);
	imwrite("imgDrawRect.jpg", img);

	waitKey(0);
}



文字区域处理

用上面的方法识别出来的文字区域是文字区域的最小外接矩形,有可能有些文字的边边角角有些像素就被排除在外了,因此还需要把文字区域扩大一点。

cv::Mat originalPicture = imread(picturePath, cv::IMREAD_GRAYSCALE);//读取一张图片
	Mat dilation = preprocess(originalPicture );
	vector<RotatedRect> rects = findTextRegion(dilation);
	int xmin = 0, xmax = 0, ymin = 0, ymax = 0;
	int count = 0;
	for each (RotatedRect rect in rects)
	{
		count++;
		Point2f P[4];
		rect.points(P);
		xmin = P[1].x;
		ymin = P[1].y;
		xmax = P[1].x;
		ymax = P[1].y;
		for (int j = 0; j <= 3; j++)
		{
			if (P[j].x < xmin)
			{
				xmin = P[j].x;
			}
			if (P[j].y < ymin)
			{
				ymin = P[j].y;
			}
			if (P[j].x > xmax)
			{
				xmax = P[j].x;
			}
			if (P[j].y > ymax)
			{
				ymax = P[j].y;
			}

		}
		Rect tempRect(xmin - 5, ymin - 5, xmax - xmin + 10, ymax - ymin + 10);//文字区域的最小外接矩形的4条边再外扩5个像素点。具体外扩多少,还取决于文字区域旁边有多少空白的地方可以扩,尽量往外扩。
	}

文字区域外扩完之后,有可能还不能直接用tesseract进行识别,有可能识别出来是乱码(有可能是因为此时截下来的图的边沿到实际的文字距离太近了),此时还得对刚才外扩之后的区域再进行放大。

		Mat temppicture = originalPicture (tempRect);

		double scale = 2;//文字区域截图放大,倍数为2。具体的放大位数还需要调试,这取决于未放大前的文字区域图片的大小和分辨率等。
		Size dsize = Size(temppicture.cols * scale, temppicture.rows * scale);
		Mat img2 = Mat(dsize, CV_32S);
		resize(temppicture, img2, dsize);
		tessChi_sim->SetImage((uchar*)img2.data, img2.cols, img2.rows, 1, img2.cols);
		//tessChi_sim->SetSourceResolution(1000);
		tessChi_sim->SetVariable("textord_really_old_xheight", "1");
		char* out = tessChi_sim->GetUTF8Text();

		std::string temp(out);
		std::string stdstr;
		if (out != NULL)
		{
			const char* textout = temp.c_str();
			printf(textout);
			text = text + gcnew System::String(textout, 0, strlen(textout), System::Text::UTF8Encoding::UTF8);
		}



完整代码

#include <opencv2\core\core.hpp>
#include <opencv2\highgui\highgui.hpp>
#include <opencv2/highgui/highgui_c.h>
#include "opencv2/imgproc/imgproc.hpp"
#include <opencv2\opencv.hpp>
#include <opencv2/imgcodecs.hpp>
#include <tesseract/baseapi.h>
#include <vector>

using namespace std;
using namespace cv;
using namespace tesseract;
Mat preprocess(Mat gray)
{
	//1.Sobel算子,x方向求梯度
	Mat sobel;
	Sobel(gray, sobel, CV_8U, 1, 0, 3);

	//2.二值化
	Mat binary;
	threshold(sobel, binary, 0, 255, THRESH_OTSU + THRESH_BINARY);

	//3.膨胀和腐蚀操作核设定
	Mat element1 = getStructuringElement(MORPH_RECT, Size(30, 9));
	//控制高度设置可以控制上下行的膨胀程度,例如3比4的区分能力更强,但也会造成漏检
	Mat element2 = getStructuringElement(MORPH_RECT, Size(24, 4));

	//4.膨胀一次,让轮廓突出
	Mat dilate1;
	dilate(binary, dilate1, element2);

	//5.腐蚀一次,去掉细节,表格线等。这里去掉的是竖直的线
	Mat erode1;
	erode(dilate1, erode1, element1);

	//6.再次膨胀,让轮廓明显一些
	Mat dilate2;
	dilate(erode1, dilate2, element2);

	//7.存储中间图片
	imwrite("binary.jpg", binary);
	imwrite("dilate1.jpg", dilate1);
	imwrite("erode1.jpg", erode1);
	imwrite("dilate2.jpg", dilate2);

	return dilate2;
}


vector<RotatedRect> findTextRegion(Mat img)
{
	vector<RotatedRect> rects;
	//1.查找轮廓
	vector<vector<Point>> contours;
	vector<Vec4i> hierarchy;
	findContours(img, contours, hierarchy, RETR_CCOMP, CHAIN_APPROX_SIMPLE, Point(0, 0));

	//2.筛选那些面积小的
	for (int i = 0; i < contours.size(); i++)
	{
		//计算当前轮廓的面积
		double area = contourArea(contours[i]);

		//面积小于1000的全部筛选掉
		if (area < 1000)
			continue;

		//轮廓近似,作用较小,approxPolyDP函数有待研究
		double epsilon = 0.001 * arcLength(contours[i], true);
		Mat approx;
		approxPolyDP(contours[i], approx, epsilon, true);

		//找到最小矩形,该矩形可能有方向
		RotatedRect rect = minAreaRect(contours[i]);

		//计算高和宽
		int m_width = rect.boundingRect().width;
		int m_height = rect.boundingRect().height;

		//筛选那些太细的矩形,留下扁的
		if (m_height > m_width * 1.2)
			continue;

		//符合条件的rect添加到rects集合中
		rects.push_back(rect);

	}
	return rects;
}

void detect(Mat img)
{
	//1.转化成灰度图
	Mat gray;
	cvtColor(img, gray, CV_BGR2GRAY);

	//2.形态学变换的预处理,得到可以查找矩形的轮廓
	Mat dilation = preprocess(gray);

	//3.查找和筛选文字区域
	vector<RotatedRect> rects = findTextRegion(dilation);

	//4.用绿线画出这些找到的轮廓
	for each (RotatedRect rect in rects)
	{
		Point2f P[4];
		rect.points(P);
		for (int j = 0; j <= 3; j++)
		{
			line(img, P[j], P[(j + 1) % 4], Scalar(0, 255, 0), 2);
		}
	}

	//5.显示带轮廓的图像
	imshow("img", img);
	imwrite("imgDrawRect.jpg", img);

	waitKey(0);
}
void main()
{
	cv::Mat originalPicture = imread(picturePath, cv::IMREAD_GRAYSCALE);//读取一张图片
	Mat dilation = preprocess(originalPicture );
	vector<RotatedRect> rects = findTextRegion(dilation);
	int xmin = 0, xmax = 0, ymin = 0, ymax = 0;
	int count = 0;
	for each (RotatedRect rect in rects)
	{
		count++;
		Point2f P[4];
		rect.points(P);
		xmin = P[1].x;
		ymin = P[1].y;
		xmax = P[1].x;
		ymax = P[1].y;
		for (int j = 0; j <= 3; j++)
		{
			if (P[j].x < xmin)
			{
				xmin = P[j].x;
			}
			if (P[j].y < ymin)
			{
				ymin = P[j].y;
			}
			if (P[j].x > xmax)
			{
				xmax = P[j].x;
			}
			if (P[j].y > ymax)
			{
				ymax = P[j].y;
			}

		}
		Rect tempRect(xmin - 5, ymin - 5, xmax - xmin + 10, ymax - ymin + 10);//文字区域的最小外接矩形的4条边再外扩5个像素点。具体外扩多少,还取决于文字区域旁边有多少空白的地方可以扩,尽量往外扩。
	}
		Mat temppicture = originalPicture(tempRect);

		double scale = 2;//文字区域截图放大,倍数为2。具体的放大位数还需要调试,这取决于未放大前的文字区域图片的大小和分辨率等。
		Size dsize = Size(temppicture.cols * scale, temppicture.rows * scale);
		Mat img2 = Mat(dsize, CV_32S);
		resize(temppicture, img2, dsize);
		tessChi_sim->SetImage((uchar*)img2.data, img2.cols, img2.rows, 1, img2.cols);
		//tessChi_sim->SetSourceResolution(1000);
		tessChi_sim->SetVariable("textord_really_old_xheight", "1");
		char* out = tessChi_sim->GetUTF8Text();

		std::string temp(out);
		std::string stdstr;
		if (out != NULL)
		{
			const char* textout = temp.c_str();
			printf(textout);
			text = text + gcnew System::String(textout, 0, strlen(textout), System::Text::UTF8Encoding::UTF8);
		}
}



版权声明:本文为bluesea089原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。