当前位置:网站首页>opencv+paddle orc 识别图片提取表格信息
opencv+paddle orc 识别图片提取表格信息
2022-07-28 06:30:00 【路新航】
思路:
1.提取出横线
2.提取出纵线
3.得到交叉点,删除错误的交叉点,两个交叉点距离小于10,取坐标值小的那个交叉点,得到表格行列
4.对每个单元格使用paddle ocr提取文字
在原文代码基础上修改了2点
1.pytesseract识别准确率不高,使用paddle ocr代替 pytesseract
2.识别出的表格交叉点有些并非真实交叉点,通过判断该行像素点个数,丢掉错误横纵坐标
import cv2
import numpy as np
import pandas as pd
# import pytesseract
# import re
from paddleocr import PaddleOCR
src = 'image.png'
raw = cv2.imread(src, 1)
# 灰度图片
gray = cv2.cvtColor(raw, cv2.COLOR_BGR2GRAY)
# 图片二值化 使二值化后的图片是黑底白字
binary = cv2.adaptiveThreshold(~gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 35, -5)
# src:灰度化的图片
# maxValue:满足条件的像素点需要设置的灰度值
# adaptiveMethod:自适应方法。有2种:ADAPTIVE_THRESH_MEAN_C 或 ADAPTIVE_THRESH_GAUSSIAN_C
# thresholdType:二值化方法,可以设置为THRESH_BINARY或者THRESH_BINARY_INV
# blockSize:分割计算的区域大小,取奇数
# C:常数,每个区域计算出的阈值的基础上在减去这个常数作为这个区域的最终阈值,可以为负数
# dst:输出图像,可选
# 展示图片
# cv2.imshow("binary_picture", binary)
# cv2.waitKey()
def recognize_bgkx(binary):
rows, cols = binary.shape
scale = 30 # 值越小 横线越少 40
# 自适应获取核值
# 识别横线:
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (cols // scale, 1))
# 矩形
eroded = cv2.erode(binary, kernel, iterations=1) # 腐蚀
dilated_col = cv2.dilate(eroded, kernel, iterations=1) # 膨胀
# cv2.imshow("excel_horizontal_line", dilated_col)
# cv2.waitKey()
# 识别竖线:
scale = 20
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, rows // scale))
eroded = cv2.erode(binary, kernel, iterations=1)
dilated_row = cv2.dilate(eroded, kernel, iterations=1)
# cv2.imshow("excel_vertical_line:", dilated_row)
# cv2.waitKey()
# 将识别出来的横竖线合起来 对二进制数据进行“与”操作
bitwise_and = cv2.bitwise_and(dilated_col, dilated_row)
cv2.imshow("excel_bitwise_and", bitwise_and)
cv2.waitKey()
# 标识表格轮廓
# merge = cv2.add(dilated_col, dilated_row) # 进行图片的加和
# cv2.imshow("entire_excel_contour:", merge)
# cv2.waitKey()
# 两张图片进行减法运算,去掉表格框线
# merge2 = cv2.subtract(binary, merge)
# cv2.imshow("binary_sub_excel_rect", merge2)
# cv2.waitKey()
# new_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
# erode_image = cv2.morphologyEx(merge2, cv2.MORPH_OPEN, new_kernel)
# cv2.imshow('erode_image2', erode_image)
# cv2.waitKey()
# merge3 = cv2.add(erode_image, bitwise_and)
# cv2.imshow('merge3', merge3)
# cv2.waitKey()
# 将焦点标识取出来
ys, xs = np.where(bitwise_and > 0)
# 横纵坐标数组
y_point_arr = []
x_point_arr = []
# 通过排序,排除掉相近的像素点,只取相近值的最后一点
# 这个10就是两个像素点的距离,不是固定的,根据不同的图片会有调整,基本上为单元格表格的高度(y坐标跳变)和长度(x坐标跳变)
i = 0
sort_x_point = np.sort(xs)
for i in range(len(sort_x_point) - 1):
if sort_x_point[i + 1] - sort_x_point[i] > 10:
x_point_arr.append(sort_x_point[i])
i = i + 1
# 要将最后一个点加入
x_point_arr.append(sort_x_point[i])
i = 0
sort_y_point = np.sort(ys)
# print(np.sort(ys))
for i in range(len(sort_y_point) - 1):
if (sort_y_point[i + 1] - sort_y_point[i] > 10):
y_point_arr.append(sort_y_point[i])
i = i + 1
y_point_arr.append(sort_y_point[i])
# 横纵坐标超过3个 代表点对应的行、列超过2
data = pd.DataFrame(bitwise_and)
drop_y_list = []
for i in y_point_arr:
# for j in x_point_arr: data[(data.loc[i-1,:]>0)].index.tolist()
# y_dot_num = [x for x in data.loc[i-1,:] if x!=0]
y_dot_num = 0
y_dot_num += len(data.loc[(data.loc[i, :] > 0)].index.tolist())
for j in range(1, 5):
y_dot_num += len(data.loc[(data.loc[i + j, :] > 0)].index.tolist())
y_dot_num += len(data.loc[(data.loc[i - j, :] > 0)].index.tolist())
if y_dot_num < 5:
print('纵坐标%s并不在框线上删除,该行只有%s个像素' % (i, y_dot_num))
drop_y_list.append(i)
for y in drop_y_list:
y_point_arr.remove(y)
drop_x_list = []
for m in x_point_arr:
# for j in x_point_arr: data[(data.loc[i-1,:]>0)].index.tolist()
# y_dot_num = [x for x in data.loc[i-1,:] if x!=0]
x_dot_num = 0
x_dot_num += len(data.loc[(data.loc[:, m] > 0)].index.tolist())
print('检测', m)
for n in range(1, 5):
x_dot_num += len(data.loc[(data.loc[:, m + n] > 0)].index.tolist())
x_dot_num += len(data.loc[(data.loc[:, m - n] > 0)].index.tolist())
if x_dot_num < 5:
print('横坐标坐标%s并不在框线上删除 ,该列只有%s个像素' % (m, x_dot_num))
drop_x_list.append(m)
for x in drop_x_list:
x_point_arr.remove(x)
print('该表格有%s行%s列 ' % (len(y_point_arr) - 1, len(x_point_arr) - 1))
return x_point_arr, y_point_arr
x_point_arr, y_point_arr = recognize_bgkx(binary)
# 退后
ocr = PaddleOCR(use_angle_cls=True, lang="ch")
def recognize_text_by_loop():
y_point_arr = [x - 3 for x in y_point_arr]
# 循环y坐标,x坐标分割表格
data = [[] for i in range(len(y_point_arr))]
for i in range(len(y_point_arr) - 1):
# if i==0:
# continue
for j in range(len(x_point_arr) - 1):
# 在分割时,第一个参数为y坐标,第二个参数为x坐标
cell = gray[
y_point_arr[i]:y_point_arr[i + 1],
x_point_arr[j]:x_point_arr[j + 1]
]
# cv2.imshow("sub_pic" + str(i) + str(j), cell)
# cv2.waitKey()
# cv2.destroyAllWindows()
img_path = "cell_image_"+str(i)+'_'+str(j)+".png"
cv2.imwrite(img_path,cell)
# 输入待识别图片路径
# 输出结果保存路径
result = ocr.ocr(img_path, cls=True)
text1 = ''.join([x[1][0] for x in result])
print(text1)
data[i].append(text1)
j = j + 1
i = i + 1
print(data)
table_coordinate = pd.DataFrame(data[1:-1],columns=data[0])
table_coordinate.to_excel('saomiaojian.xlsx',index=False)
待优化:
- 真实图片的单元格并不在一条横坐标或纵坐标下,使用框线交叉点坐标确定单元格,实际图像不够完整,待后续优化,获取每个单元格坐标
边栏推荐
- Awk from introduction to earth (16) discussion on the types of awk variables -- about the two types of numbers and strings
- Allure use
- @The role of documented
- Kubernetes技术与架构(七)
- CarSim simulation quick start (XII) - Driver Model (2)
- Forward propagation of deep learning neural networks (1)
- Deep browser rendering principles
- Prescan quick start to master the road elements of lecture 15
- 记录一次mycat连接Communications link failure问题解决
- Meituan Er Mian: why does redis have sentinels?
猜你喜欢

These mobile security browsers are more than a little easy to use

Parse tree structure JS
![Redis of non relational database [detailed setup of redis cluster]](/img/0b/bd05fb91d17f6e0dc9f657a4047ccb.png)
Redis of non relational database [detailed setup of redis cluster]

sql server时间字段排序

Understand the propagation process of EMI electromagnetic interference through five diagrams - the influence of square wave steepness on high-frequency components, the spectrum graph from time sequenc

Record a MYCAT connection and solve the problems of communications link failure

UE4 engine customizes screenpass and MRT output

记录一次mycat连接Communications link failure问题解决

QT uses semaphores to control threads (qsemaphore)

protobuf 基本语法总结
随机推荐
Autodesk desktop licensing service error 1067 handling method
我们如何在mysql中运行批处理模式?
深度学习基础宝典---激活函数、Batch Size、归一化
Tensorflow uses deep learning (II)
2022/7/27 考试总结
What if the computer file cannot be deleted?
JS candy xiaoxiaole game source code
解决CNN固有缺陷!通用 CNN 架构CCNN来了| ICML2022
Puzzle (004.3) pattern puzzle
CarSim simulation quick start (XII) - Driver Model (2)
Some experience of gd32 using Hal Library of ST and Gd official library
Prescan quick start to master the transportation elements in lesson 14, prescan
Huawei Senior Engineer -- BGP routing filtering and community attributes
mysql,可以使用多少列创建索引?
Swm32 series tutorial 5-adc application
使用FFmpeg来批量生成单图+单音频的一图流视频
Understand CDN
Forward propagation of deep learning neural networks (1)
对spark算子aggregateByKey的理解
"Wei Lai Cup" 2022 Niuke summer multi school training camp 2 supplementary question record (dghjkl)