当前位置:网站首页>Opencv learning notes 5 -- document scanning +ocr character recognition
Opencv learning notes 5 -- document scanning +ocr character recognition
2022-07-01 14:55:00 【Cloudy_ to_ sunny】
opencv Learning notes 5 -- File scanning +OCR Character recognition
File scanning
# Import toolkit
import numpy as np
import argparse
import cv2
import matplotlib.pyplot as plt#Matplotlib yes RGB
Defined function
# Graphic display
def cv_show(name,img):
b,g,r = cv2.split(img)
img_rgb = cv2.merge((r,g,b))
plt.imshow(img_rgb)
plt.show()
def cv_show1(name,img):
plt.imshow(img)
plt.show()
cv2.imshow(name,img)
cv2.waitKey()
cv2.destroyAllWindows()
def order_points(pts):
# altogether 4 Coordinates
rect = np.zeros((4, 2), dtype = "float32")
# Find the corresponding coordinates in order 0123 Namely Top left , The upper right , The lower right , The lower left
# Calculate top left , The lower right
s = pts.sum(axis = 1)# Addition of horizontal and vertical coordinates , The biggest one is the bottom right , The smallest is the upper left
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmax(s)]
# Count right up and left down
diff = np.diff(pts, axis = 1)
rect[1] = pts[np.argmin(diff)]
rect[3] = pts[np.argmax(diff)]
return rect
def four_point_transform(image, pts):
# Get the input coordinate point
rect = order_points(pts)#pts Is the coordinates of the four points on the original drawing
(tl, tr, br, bl) = rect
# Calculate the input w and h value
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
maxWidth = max(int(widthA), int(widthB))
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
maxHeight = max(int(heightA), int(heightB))
# Corresponding coordinate position after transformation
dst = np.array([
[0, 0],
[maxWidth - 1, 0],
[maxWidth - 1, maxHeight - 1],
[0, maxHeight - 1]], dtype = "float32")
# Calculate the transformation matrix
M = cv2.getPerspectiveTransform(rect, dst)# from rect To dst The transformation matrix of
warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))# Get the transformation result
# Return the result after transformation
return warped
def resize(image, width=None, height=None, inter=cv2.INTER_AREA):
dim = None
(h, w) = image.shape[:2]
if width is None and height is None:
return image
if width is None:
r = height / float(h)
dim = (int(w * r), height)
else:
r = width / float(w)
dim = (width, int(h * r))
resized = cv2.resize(image, dim, interpolation=inter)
return resized
# Read input
image = cv2.imread("./images/receipt.jpg")
# The coordinates will change the same
cv_show("Image",image)
ratio = image.shape[0] / 500.0
print(image.shape[0])
orig = image.copy()

2448
image = resize(orig, height = 500)
cv_show("Image",image)

edge detection
# Preprocessing
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray, (5, 5), 0)
edged = cv2.Canny(gray, 75, 200)
# Show the preprocessing results
print("STEP 1: edge detection ")
cv_show("Image", image)
cv_show1("Edged", edged)
STEP 1: edge detection


# Contour detection
cnts = cv2.findContours(edged.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)[1]# Detect contour
cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:5]# Sort the outline according to the area
#cv2.drawContours(image, cnts, -1, (0, 255, 0), 2)
#cv_show("Outline", image)
# Traverse the outline
for c in cnts:
# Calculate the contour approximation
peri = cv2.arcLength(c, True)# Calculate contour perimeter
# C Represents the set of input points
# epsilon Represents the maximum distance from the original contour to the approximate contour , It's an accuracy parameter
# True It means closed
approx = cv2.approxPolyDP(c, 0.02 * peri, True)
# 4 Take it out at one point
if len(approx) == 4:
screenCnt = approx
break
Get profile
# Display the results
print("STEP 2: Get profile ")
cv2.drawContours(image, [screenCnt], -1, (0, 255, 0), 2)
cv_show("Outline", image)
STEP 2: Get profile

# Perspective transformation
warped = four_point_transform(orig, screenCnt.reshape(4, 2) * ratio)
print(screenCnt.reshape(4, 2))
print(screenCnt.reshape(4, 2).sum(axis = 1))
[[465 110]
[113 137]
[147 375]
[474 323]]
[575 250 522 797]
Transformation
# Binary processing
warped = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
ref = cv2.threshold(warped, 100, 255, cv2.THRESH_BINARY)[1]
cv2.imwrite('scan.jpg', ref)
# Display the results
print("STEP 3: Transformation ")
cv_show("Original", resize(orig, height = 650))
cv_show1("Scanned", resize(ref, height = 650))
STEP 3: Transformation


OCR Character recognition
Environment configuration
install tesseract-ocr-w64-setup-v5.0.1.20220118.exe
- https://digi.bib.uni-mannheim.de/tesseract/
- Configure environment variables such as E:\Program Files (x86)\Tesseract-OCR
- tesseract -v To test
- tesseract XXX.png Get the results
- pip install pytesseract
- anaconda lib site-packges pytesseract pytesseract.py
- tesseract_cmd Change to absolute path
Code
from PIL import Image
import pytesseract
import cv2
import os
import matplotlib.pyplot as plt#Matplotlib yes RGB
# Graphic display
def cv_show(name,img):
b,g,r = cv2.split(img)
img_rgb = cv2.merge((r,g,b))
plt.imshow(img_rgb)
plt.show()
def cv_show1(name,img):
plt.imshow(img)
plt.show()
cv2.imshow(name,img)
cv2.waitKey()
cv2.destroyAllWindows()
preprocess = 'thresh' #thresh
image = cv2.imread('scan.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
if preprocess == "thresh":
gray = cv2.threshold(gray, 0, 255,cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]# Adaptive binarization
if preprocess == "blur":
gray = cv2.medianBlur(gray, 3)# Median ambiguity
filename = "{}.png".format(os.getpid())
cv2.imwrite(filename, gray)
True
text = pytesseract.image_to_string(Image.open(filename))
print(text)
os.remove(filename)
cv_show("Image", image)
cv_show1("Output", gray)
we KK Re KK KK OK OK KK
WHOLE FOODS MARKET - WESTPORT, CT 06880
399 POST RD WEST - (203) 227-6858
365
365
365
365
BROTH
BACON
BACON
BACON
BACUN
LS
LS
LS
LS
CHIC
FLOUR ALMUND
CHKN BRST BNLSS SK
HEAVY CREAM
BALSMC REDUCT
GRND 85/15
BEEF
JUICE
COF CASHEW
L
DOCS PINT ORGANIC
HNY ALMOND BUTTER
xeene TAX
.00
BAL
NP
NP
NP
NP
NP
NP
NP
NP
NP
NP
NP
NP
NP
4
4
4
99
.99
.99
mal
7 7 T
mana Ramm


边栏推荐
- [零基础学IoT Pwn] 复现Netgear WNAP320 RCE
- It's suitable for people who don't have eloquence. The benefits of joining the China Video partner program are really delicious. One video gets 3 benefits
- 适合没口才的人做,加入中视频伙伴计划收益是真香,一个视频拿3份收益
- NPDP产品经理国际认证报名有什么要求?
- 官宣:Apache Doris 顺利毕业,成为 ASF 顶级项目!
- 微服务追踪SQL(支持Isto管控下的gorm查询追踪)
- Sqlachemy common operations
- 深度分析数据在内存中的存储形式
- The first technology podcast month will be broadcast soon
- idea中新建的XML文件变成普通文件的解决方法.
猜你喜欢

What problems should be considered for outdoor LED display?

竣达技术丨多台精密空调微信云监控方案

2022-2-15 learning xiangniuke project - Section 1 filtering sensitive words

JVM第一话 -- JVM入门详解以及运行时数据区分析

微服务追踪SQL(支持Isto管控下的gorm查询追踪)

opencv学习笔记六--图像拼接
![[getting started with Django] 13 page Association MySQL](/img/78/cbf88ae3c3d311edd7d9af8c985749.jpg)
[getting started with Django] 13 page Association MySQL "multi" field table (check)
![[dynamic programming] interval dp:p1005 matrix retrieval](/img/c9/2091f51b905d2c0ebc978dab3d34d3.jpg)
[dynamic programming] interval dp:p1005 matrix retrieval

2022-2-15 learning the imitation Niuke project - Section 3 post details

The data in the database table recursively forms a closed-loop data. How can we get these data
随机推荐
Use the npoi package of net core 6 C to read excel Pictures in xlsx cells and stored to the specified server
适合没口才的人做,加入中视频伙伴计划收益是真香,一个视频拿3份收益
Ensure production safety! Guangzhou requires hazardous chemical enterprises to "not produce in an unsafe way, and keep constant communication"
Basic operations of SQL database
It's suitable for people who don't have eloquence. The benefits of joining the China Video partner program are really delicious. One video gets 3 benefits
Storage form of in-depth analysis data in memory
Written on the first day after Doris graduated
En utilisant le paquet npoi de net Core 6 c #, lisez Excel.. Image dans la cellule xlsx et stockée sur le serveur spécifié
opencv学习笔记六--图像拼接
使用net core 6 c# 的 NPOI 包,读取excel..xlsx单元格内的图片,并存储到指定服务器
Detailed explanation of ArrayList expansion, expansion principle [easy to understand]
首届技术播客月开播在即
数据产品经理需要掌握哪些数据能力?
Generate random numbers (4-bit, 6-bit)
Quelle valeur le pdnp peut - il apporter aux gestionnaires de produits? Vous savez tout?
QT capture interface is displayed as picture or label
数字化转型:数据可视化赋能销售管理
Tensorflow 2. X realizes iris classification
Solid basic structure and array, private / public function, return value and modifier of function, event
Vnctf2022 open web gocalc0