当前位置:网站首页>Opencv learning notes 5 -- document scanning +ocr character recognition
Opencv learning notes 5 -- document scanning +ocr character recognition
2022-07-01 14:55:00 【Cloudy_ to_ sunny】
opencv Learning notes 5 -- File scanning +OCR Character recognition
File scanning
# Import toolkit
import numpy as np
import argparse
import cv2
import matplotlib.pyplot as plt#Matplotlib yes RGB
Defined function
# Graphic display
def cv_show(name,img):
b,g,r = cv2.split(img)
img_rgb = cv2.merge((r,g,b))
plt.imshow(img_rgb)
plt.show()
def cv_show1(name,img):
plt.imshow(img)
plt.show()
cv2.imshow(name,img)
cv2.waitKey()
cv2.destroyAllWindows()
def order_points(pts):
# altogether 4 Coordinates
rect = np.zeros((4, 2), dtype = "float32")
# Find the corresponding coordinates in order 0123 Namely Top left , The upper right , The lower right , The lower left
# Calculate top left , The lower right
s = pts.sum(axis = 1)# Addition of horizontal and vertical coordinates , The biggest one is the bottom right , The smallest is the upper left
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmax(s)]
# Count right up and left down
diff = np.diff(pts, axis = 1)
rect[1] = pts[np.argmin(diff)]
rect[3] = pts[np.argmax(diff)]
return rect
def four_point_transform(image, pts):
# Get the input coordinate point
rect = order_points(pts)#pts Is the coordinates of the four points on the original drawing
(tl, tr, br, bl) = rect
# Calculate the input w and h value
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
maxWidth = max(int(widthA), int(widthB))
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
maxHeight = max(int(heightA), int(heightB))
# Corresponding coordinate position after transformation
dst = np.array([
[0, 0],
[maxWidth - 1, 0],
[maxWidth - 1, maxHeight - 1],
[0, maxHeight - 1]], dtype = "float32")
# Calculate the transformation matrix
M = cv2.getPerspectiveTransform(rect, dst)# from rect To dst The transformation matrix of
warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))# Get the transformation result
# Return the result after transformation
return warped
def resize(image, width=None, height=None, inter=cv2.INTER_AREA):
dim = None
(h, w) = image.shape[:2]
if width is None and height is None:
return image
if width is None:
r = height / float(h)
dim = (int(w * r), height)
else:
r = width / float(w)
dim = (width, int(h * r))
resized = cv2.resize(image, dim, interpolation=inter)
return resized
# Read input
image = cv2.imread("./images/receipt.jpg")
# The coordinates will change the same
cv_show("Image",image)
ratio = image.shape[0] / 500.0
print(image.shape[0])
orig = image.copy()

2448
image = resize(orig, height = 500)
cv_show("Image",image)

edge detection
# Preprocessing
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray, (5, 5), 0)
edged = cv2.Canny(gray, 75, 200)
# Show the preprocessing results
print("STEP 1: edge detection ")
cv_show("Image", image)
cv_show1("Edged", edged)
STEP 1: edge detection


# Contour detection
cnts = cv2.findContours(edged.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)[1]# Detect contour
cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:5]# Sort the outline according to the area
#cv2.drawContours(image, cnts, -1, (0, 255, 0), 2)
#cv_show("Outline", image)
# Traverse the outline
for c in cnts:
# Calculate the contour approximation
peri = cv2.arcLength(c, True)# Calculate contour perimeter
# C Represents the set of input points
# epsilon Represents the maximum distance from the original contour to the approximate contour , It's an accuracy parameter
# True It means closed
approx = cv2.approxPolyDP(c, 0.02 * peri, True)
# 4 Take it out at one point
if len(approx) == 4:
screenCnt = approx
break
Get profile
# Display the results
print("STEP 2: Get profile ")
cv2.drawContours(image, [screenCnt], -1, (0, 255, 0), 2)
cv_show("Outline", image)
STEP 2: Get profile

# Perspective transformation
warped = four_point_transform(orig, screenCnt.reshape(4, 2) * ratio)
print(screenCnt.reshape(4, 2))
print(screenCnt.reshape(4, 2).sum(axis = 1))
[[465 110]
[113 137]
[147 375]
[474 323]]
[575 250 522 797]
Transformation
# Binary processing
warped = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
ref = cv2.threshold(warped, 100, 255, cv2.THRESH_BINARY)[1]
cv2.imwrite('scan.jpg', ref)
# Display the results
print("STEP 3: Transformation ")
cv_show("Original", resize(orig, height = 650))
cv_show1("Scanned", resize(ref, height = 650))
STEP 3: Transformation


OCR Character recognition
Environment configuration
install tesseract-ocr-w64-setup-v5.0.1.20220118.exe
- https://digi.bib.uni-mannheim.de/tesseract/
- Configure environment variables such as E:\Program Files (x86)\Tesseract-OCR
- tesseract -v To test
- tesseract XXX.png Get the results
- pip install pytesseract
- anaconda lib site-packges pytesseract pytesseract.py
- tesseract_cmd Change to absolute path
Code
from PIL import Image
import pytesseract
import cv2
import os
import matplotlib.pyplot as plt#Matplotlib yes RGB
# Graphic display
def cv_show(name,img):
b,g,r = cv2.split(img)
img_rgb = cv2.merge((r,g,b))
plt.imshow(img_rgb)
plt.show()
def cv_show1(name,img):
plt.imshow(img)
plt.show()
cv2.imshow(name,img)
cv2.waitKey()
cv2.destroyAllWindows()
preprocess = 'thresh' #thresh
image = cv2.imread('scan.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
if preprocess == "thresh":
gray = cv2.threshold(gray, 0, 255,cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]# Adaptive binarization
if preprocess == "blur":
gray = cv2.medianBlur(gray, 3)# Median ambiguity
filename = "{}.png".format(os.getpid())
cv2.imwrite(filename, gray)
True
text = pytesseract.image_to_string(Image.open(filename))
print(text)
os.remove(filename)
cv_show("Image", image)
cv_show1("Output", gray)
we KK Re KK KK OK OK KK
WHOLE FOODS MARKET - WESTPORT, CT 06880
399 POST RD WEST - (203) 227-6858
365
365
365
365
BROTH
BACON
BACON
BACON
BACUN
LS
LS
LS
LS
CHIC
FLOUR ALMUND
CHKN BRST BNLSS SK
HEAVY CREAM
BALSMC REDUCT
GRND 85/15
BEEF
JUICE
COF CASHEW
L
DOCS PINT ORGANIC
HNY ALMOND BUTTER
xeene TAX
.00
BAL
NP
NP
NP
NP
NP
NP
NP
NP
NP
NP
NP
NP
NP
4
4
4
99
.99
.99
mal
7 7 T
mana Ramm


边栏推荐
- C#学习笔记(5)类和继承
- MIT团队使用图神经网络,加速无定形聚合物电解质筛选,促进下一代锂电池技术开发
- After twists and turns, I finally found the method of SRC vulnerability mining [recommended collection]
- What data capabilities do data product managers need to master?
- What are the books that have greatly improved the thinking and ability of programming?
- Problem note - Oracle 11g uninstall
- 【牛客网刷题系列 之 Verilog快速入门】~ 多功能数据处理器、求两个数的差值、使用generate…for语句简化代码、使用子模块实现三输入数的大小比较
- 这3款在线PS工具,得试试
- TypeScript:var
- Chapter 4 of getting started with MySQL: creation, modification and deletion of data tables
猜你喜欢

241. 为运算表达式设计优先级

炎炎夏日,这份安全用气指南请街坊们收好!
![[Verilog quick start of Niuke series] ~ multi function data processor, calculate the difference between two numbers, use generate... For statement to simplify the code, and use sub modules to realize](/img/30/aea4ae24f418eb971bca77a1d46bef.png)
[Verilog quick start of Niuke series] ~ multi function data processor, calculate the difference between two numbers, use generate... For statement to simplify the code, and use sub modules to realize

What are the books that have greatly improved the thinking and ability of programming?

JVM second conversation -- JVM memory model and garbage collection

Fundamentals of C language

Guess lantern riddles, not programmers still can't understand?

111. Minimum depth of binary tree

Build your own website (14)
![[leetcode 324] swing sorting II thinking + sorting](/img/cb/26d89e1a1f548b75a5ef9f29eebeee.png)
[leetcode 324] swing sorting II thinking + sorting
随机推荐
Hidden rules of the workplace that must be understood before 30
生成随机数(4位、6位)
[leetcode 324] 摆动排序 II 思维+排序
Research Report on the development trend and competitive strategy of the global camera filter bracket industry
tensorflow2-savedmodel convert to pb(frozen_graph)
Opencv interpolation mode
En utilisant le paquet npoi de net Core 6 c #, lisez Excel.. Image dans la cellule xlsx et stockée sur le serveur spécifié
643. Maximum average number of subarrays I
[stage life summary] I gave up the postgraduate entrance examination and participated in the work. I have successfully graduated and just received my graduation certificate yesterday
NPDP产品经理国际认证报名有什么要求?
Internet hospital system source code hospital applet source code smart hospital source code online consultation system source code
数据产品经理需要掌握哪些数据能力?
关于重载运算符的再整理
从零开发小程序和公众号【第三期】
竣达技术丨室内空气环境监测终端 pm2.5、温湿度TVOC等多参数监测
[Verilog quick start of Niuke question series] ~ use functions to realize data size conversion
The State Administration of Chia Tai market supervision, the national development and Reform Commission and the China Securities Regulatory Commission jointly reminded and warned some iron ores
Fundamentals of C language
关于软件测试的一些思考
C#学习笔记(5)类和继承