如果图片中有非文字的其他图形,直接用tesseract进行识别的话,会把非文字的图形当成文字进行识别(往往识别出来的是乱七八糟的字符)。因此首先需要把文字区域识别出来,再对文字区域进行处理,最后进行文字识别。
文字区域识别
Mat preprocess(Mat gray)
{
//1.Sobel算子,x方向求梯度
Mat sobel;
Sobel(gray, sobel, CV_8U, 1, 0, 3);
//2.二值化
Mat binary;
threshold(sobel, binary, 0, 255, THRESH_OTSU + THRESH_BINARY);
//3.膨胀和腐蚀操作核设定
Mat element1 = getStructuringElement(MORPH_RECT, Size(30, 9));
//控制高度设置可以控制上下行的膨胀程度,例如3比4的区分能力更强,但也会造成漏检
Mat element2 = getStructuringElement(MORPH_RECT, Size(24, 4));
//4.膨胀一次,让轮廓突出
Mat dilate1;
dilate(binary, dilate1, element2);
//5.腐蚀一次,去掉细节,表格线等。这里去掉的是竖直的线
Mat erode1;
erode(dilate1, erode1, element1);
//6.再次膨胀,让轮廓明显一些
Mat dilate2;
dilate(erode1, dilate2, element2);
//7.存储中间图片
imwrite("binary.jpg", binary);
imwrite("dilate1.jpg", dilate1);
imwrite("erode1.jpg", erode1);
imwrite("dilate2.jpg", dilate2);
return dilate2;
}
vector<RotatedRect> findTextRegion(Mat img)
{
vector<RotatedRect> rects;
//1.查找轮廓
vector<vector<Point>> contours;
vector<Vec4i> hierarchy;
findContours(img, contours, hierarchy, RETR_CCOMP, CHAIN_APPROX_SIMPLE, Point(0, 0));
//2.筛选那些面积小的
for (int i = 0; i < contours.size(); i++)
{
//计算当前轮廓的面积
double area = contourArea(contours[i]);
//面积小于1000的全部筛选掉
if (area < 1000)
continue;
//轮廓近似,作用较小,approxPolyDP函数有待研究
double epsilon = 0.001 * arcLength(contours[i], true);
Mat approx;
approxPolyDP(contours[i], approx, epsilon, true);
//找到最小矩形,该矩形可能有方向
RotatedRect rect = minAreaRect(contours[i]);
//计算高和宽
int m_width = rect.boundingRect().width;
int m_height = rect.boundingRect().height;
//筛选那些太细的矩形,留下扁的
if (m_height > m_width * 1.2)
continue;
//符合条件的rect添加到rects集合中
rects.push_back(rect);
}
return rects;
}
void detect(Mat img)
{
//1.转化成灰度图
Mat gray;
cvtColor(img, gray, CV_BGR2GRAY);
//2.形态学变换的预处理,得到可以查找矩形的轮廓
Mat dilation = preprocess(gray);
//3.查找和筛选文字区域
vector<RotatedRect> rects = findTextRegion(dilation);
//4.用绿线画出这些找到的轮廓
for each (RotatedRect rect in rects)
{
Point2f P[4];
rect.points(P);
for (int j = 0; j <= 3; j++)
{
line(img, P[j], P[(j + 1) % 4], Scalar(0, 255, 0), 2);
}
}
//5.显示带轮廓的图像
imshow("img", img);
imwrite("imgDrawRect.jpg", img);
waitKey(0);
}
文字区域处理
用上面的方法识别出来的文字区域是文字区域的最小外接矩形,有可能有些文字的边边角角有些像素就被排除在外了,因此还需要把文字区域扩大一点。
cv::Mat originalPicture = imread(picturePath, cv::IMREAD_GRAYSCALE);//读取一张图片
Mat dilation = preprocess(originalPicture );
vector<RotatedRect> rects = findTextRegion(dilation);
int xmin = 0, xmax = 0, ymin = 0, ymax = 0;
int count = 0;
for each (RotatedRect rect in rects)
{
count++;
Point2f P[4];
rect.points(P);
xmin = P[1].x;
ymin = P[1].y;
xmax = P[1].x;
ymax = P[1].y;
for (int j = 0; j <= 3; j++)
{
if (P[j].x < xmin)
{
xmin = P[j].x;
}
if (P[j].y < ymin)
{
ymin = P[j].y;
}
if (P[j].x > xmax)
{
xmax = P[j].x;
}
if (P[j].y > ymax)
{
ymax = P[j].y;
}
}
Rect tempRect(xmin - 5, ymin - 5, xmax - xmin + 10, ymax - ymin + 10);//文字区域的最小外接矩形的4条边再外扩5个像素点。具体外扩多少,还取决于文字区域旁边有多少空白的地方可以扩,尽量往外扩。
}
文字区域外扩完之后,有可能还不能直接用tesseract进行识别,有可能识别出来是乱码(有可能是因为此时截下来的图的边沿到实际的文字距离太近了),此时还得对刚才外扩之后的区域再进行放大。
Mat temppicture = originalPicture (tempRect);
double scale = 2;//文字区域截图放大,倍数为2。具体的放大位数还需要调试,这取决于未放大前的文字区域图片的大小和分辨率等。
Size dsize = Size(temppicture.cols * scale, temppicture.rows * scale);
Mat img2 = Mat(dsize, CV_32S);
resize(temppicture, img2, dsize);
tessChi_sim->SetImage((uchar*)img2.data, img2.cols, img2.rows, 1, img2.cols);
//tessChi_sim->SetSourceResolution(1000);
tessChi_sim->SetVariable("textord_really_old_xheight", "1");
char* out = tessChi_sim->GetUTF8Text();
std::string temp(out);
std::string stdstr;
if (out != NULL)
{
const char* textout = temp.c_str();
printf(textout);
text = text + gcnew System::String(textout, 0, strlen(textout), System::Text::UTF8Encoding::UTF8);
}
完整代码
#include <opencv2\core\core.hpp>
#include <opencv2\highgui\highgui.hpp>
#include <opencv2/highgui/highgui_c.h>
#include "opencv2/imgproc/imgproc.hpp"
#include <opencv2\opencv.hpp>
#include <opencv2/imgcodecs.hpp>
#include <tesseract/baseapi.h>
#include <vector>
using namespace std;
using namespace cv;
using namespace tesseract;
Mat preprocess(Mat gray)
{
//1.Sobel算子,x方向求梯度
Mat sobel;
Sobel(gray, sobel, CV_8U, 1, 0, 3);
//2.二值化
Mat binary;
threshold(sobel, binary, 0, 255, THRESH_OTSU + THRESH_BINARY);
//3.膨胀和腐蚀操作核设定
Mat element1 = getStructuringElement(MORPH_RECT, Size(30, 9));
//控制高度设置可以控制上下行的膨胀程度,例如3比4的区分能力更强,但也会造成漏检
Mat element2 = getStructuringElement(MORPH_RECT, Size(24, 4));
//4.膨胀一次,让轮廓突出
Mat dilate1;
dilate(binary, dilate1, element2);
//5.腐蚀一次,去掉细节,表格线等。这里去掉的是竖直的线
Mat erode1;
erode(dilate1, erode1, element1);
//6.再次膨胀,让轮廓明显一些
Mat dilate2;
dilate(erode1, dilate2, element2);
//7.存储中间图片
imwrite("binary.jpg", binary);
imwrite("dilate1.jpg", dilate1);
imwrite("erode1.jpg", erode1);
imwrite("dilate2.jpg", dilate2);
return dilate2;
}
vector<RotatedRect> findTextRegion(Mat img)
{
vector<RotatedRect> rects;
//1.查找轮廓
vector<vector<Point>> contours;
vector<Vec4i> hierarchy;
findContours(img, contours, hierarchy, RETR_CCOMP, CHAIN_APPROX_SIMPLE, Point(0, 0));
//2.筛选那些面积小的
for (int i = 0; i < contours.size(); i++)
{
//计算当前轮廓的面积
double area = contourArea(contours[i]);
//面积小于1000的全部筛选掉
if (area < 1000)
continue;
//轮廓近似,作用较小,approxPolyDP函数有待研究
double epsilon = 0.001 * arcLength(contours[i], true);
Mat approx;
approxPolyDP(contours[i], approx, epsilon, true);
//找到最小矩形,该矩形可能有方向
RotatedRect rect = minAreaRect(contours[i]);
//计算高和宽
int m_width = rect.boundingRect().width;
int m_height = rect.boundingRect().height;
//筛选那些太细的矩形,留下扁的
if (m_height > m_width * 1.2)
continue;
//符合条件的rect添加到rects集合中
rects.push_back(rect);
}
return rects;
}
void detect(Mat img)
{
//1.转化成灰度图
Mat gray;
cvtColor(img, gray, CV_BGR2GRAY);
//2.形态学变换的预处理,得到可以查找矩形的轮廓
Mat dilation = preprocess(gray);
//3.查找和筛选文字区域
vector<RotatedRect> rects = findTextRegion(dilation);
//4.用绿线画出这些找到的轮廓
for each (RotatedRect rect in rects)
{
Point2f P[4];
rect.points(P);
for (int j = 0; j <= 3; j++)
{
line(img, P[j], P[(j + 1) % 4], Scalar(0, 255, 0), 2);
}
}
//5.显示带轮廓的图像
imshow("img", img);
imwrite("imgDrawRect.jpg", img);
waitKey(0);
}
void main()
{
cv::Mat originalPicture = imread(picturePath, cv::IMREAD_GRAYSCALE);//读取一张图片
Mat dilation = preprocess(originalPicture );
vector<RotatedRect> rects = findTextRegion(dilation);
int xmin = 0, xmax = 0, ymin = 0, ymax = 0;
int count = 0;
for each (RotatedRect rect in rects)
{
count++;
Point2f P[4];
rect.points(P);
xmin = P[1].x;
ymin = P[1].y;
xmax = P[1].x;
ymax = P[1].y;
for (int j = 0; j <= 3; j++)
{
if (P[j].x < xmin)
{
xmin = P[j].x;
}
if (P[j].y < ymin)
{
ymin = P[j].y;
}
if (P[j].x > xmax)
{
xmax = P[j].x;
}
if (P[j].y > ymax)
{
ymax = P[j].y;
}
}
Rect tempRect(xmin - 5, ymin - 5, xmax - xmin + 10, ymax - ymin + 10);//文字区域的最小外接矩形的4条边再外扩5个像素点。具体外扩多少,还取决于文字区域旁边有多少空白的地方可以扩,尽量往外扩。
}
Mat temppicture = originalPicture(tempRect);
double scale = 2;//文字区域截图放大,倍数为2。具体的放大位数还需要调试,这取决于未放大前的文字区域图片的大小和分辨率等。
Size dsize = Size(temppicture.cols * scale, temppicture.rows * scale);
Mat img2 = Mat(dsize, CV_32S);
resize(temppicture, img2, dsize);
tessChi_sim->SetImage((uchar*)img2.data, img2.cols, img2.rows, 1, img2.cols);
//tessChi_sim->SetSourceResolution(1000);
tessChi_sim->SetVariable("textord_really_old_xheight", "1");
char* out = tessChi_sim->GetUTF8Text();
std::string temp(out);
std::string stdstr;
if (out != NULL)
{
const char* textout = temp.c_str();
printf(textout);
text = text + gcnew System::String(textout, 0, strlen(textout), System::Text::UTF8Encoding::UTF8);
}
}
版权声明:本文为bluesea089原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。