当前位置:网站首页>Opencv learning notes 5 -- document scanning +ocr character recognition
Opencv learning notes 5 -- document scanning +ocr character recognition
2022-07-01 14:55:00 【Cloudy_ to_ sunny】
opencv Learning notes 5 -- File scanning +OCR Character recognition
File scanning
# Import toolkit
import numpy as np
import argparse
import cv2
import matplotlib.pyplot as plt#Matplotlib yes RGB
Defined function
# Graphic display
def cv_show(name,img):
b,g,r = cv2.split(img)
img_rgb = cv2.merge((r,g,b))
plt.imshow(img_rgb)
plt.show()
def cv_show1(name,img):
plt.imshow(img)
plt.show()
cv2.imshow(name,img)
cv2.waitKey()
cv2.destroyAllWindows()
def order_points(pts):
# altogether 4 Coordinates
rect = np.zeros((4, 2), dtype = "float32")
# Find the corresponding coordinates in order 0123 Namely Top left , The upper right , The lower right , The lower left
# Calculate top left , The lower right
s = pts.sum(axis = 1)# Addition of horizontal and vertical coordinates , The biggest one is the bottom right , The smallest is the upper left
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmax(s)]
# Count right up and left down
diff = np.diff(pts, axis = 1)
rect[1] = pts[np.argmin(diff)]
rect[3] = pts[np.argmax(diff)]
return rect
def four_point_transform(image, pts):
# Get the input coordinate point
rect = order_points(pts)#pts Is the coordinates of the four points on the original drawing
(tl, tr, br, bl) = rect
# Calculate the input w and h value
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
maxWidth = max(int(widthA), int(widthB))
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
maxHeight = max(int(heightA), int(heightB))
# Corresponding coordinate position after transformation
dst = np.array([
[0, 0],
[maxWidth - 1, 0],
[maxWidth - 1, maxHeight - 1],
[0, maxHeight - 1]], dtype = "float32")
# Calculate the transformation matrix
M = cv2.getPerspectiveTransform(rect, dst)# from rect To dst The transformation matrix of
warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))# Get the transformation result
# Return the result after transformation
return warped
def resize(image, width=None, height=None, inter=cv2.INTER_AREA):
dim = None
(h, w) = image.shape[:2]
if width is None and height is None:
return image
if width is None:
r = height / float(h)
dim = (int(w * r), height)
else:
r = width / float(w)
dim = (width, int(h * r))
resized = cv2.resize(image, dim, interpolation=inter)
return resized
# Read input
image = cv2.imread("./images/receipt.jpg")
# The coordinates will change the same
cv_show("Image",image)
ratio = image.shape[0] / 500.0
print(image.shape[0])
orig = image.copy()

2448
image = resize(orig, height = 500)
cv_show("Image",image)

edge detection
# Preprocessing
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray, (5, 5), 0)
edged = cv2.Canny(gray, 75, 200)
# Show the preprocessing results
print("STEP 1: edge detection ")
cv_show("Image", image)
cv_show1("Edged", edged)
STEP 1: edge detection


# Contour detection
cnts = cv2.findContours(edged.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)[1]# Detect contour
cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:5]# Sort the outline according to the area
#cv2.drawContours(image, cnts, -1, (0, 255, 0), 2)
#cv_show("Outline", image)
# Traverse the outline
for c in cnts:
# Calculate the contour approximation
peri = cv2.arcLength(c, True)# Calculate contour perimeter
# C Represents the set of input points
# epsilon Represents the maximum distance from the original contour to the approximate contour , It's an accuracy parameter
# True It means closed
approx = cv2.approxPolyDP(c, 0.02 * peri, True)
# 4 Take it out at one point
if len(approx) == 4:
screenCnt = approx
break
Get profile
# Display the results
print("STEP 2: Get profile ")
cv2.drawContours(image, [screenCnt], -1, (0, 255, 0), 2)
cv_show("Outline", image)
STEP 2: Get profile

# Perspective transformation
warped = four_point_transform(orig, screenCnt.reshape(4, 2) * ratio)
print(screenCnt.reshape(4, 2))
print(screenCnt.reshape(4, 2).sum(axis = 1))
[[465 110]
[113 137]
[147 375]
[474 323]]
[575 250 522 797]
Transformation
# Binary processing
warped = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
ref = cv2.threshold(warped, 100, 255, cv2.THRESH_BINARY)[1]
cv2.imwrite('scan.jpg', ref)
# Display the results
print("STEP 3: Transformation ")
cv_show("Original", resize(orig, height = 650))
cv_show1("Scanned", resize(ref, height = 650))
STEP 3: Transformation


OCR Character recognition
Environment configuration
install tesseract-ocr-w64-setup-v5.0.1.20220118.exe
- https://digi.bib.uni-mannheim.de/tesseract/
- Configure environment variables such as E:\Program Files (x86)\Tesseract-OCR
- tesseract -v To test
- tesseract XXX.png Get the results
- pip install pytesseract
- anaconda lib site-packges pytesseract pytesseract.py
- tesseract_cmd Change to absolute path
Code
from PIL import Image
import pytesseract
import cv2
import os
import matplotlib.pyplot as plt#Matplotlib yes RGB
# Graphic display
def cv_show(name,img):
b,g,r = cv2.split(img)
img_rgb = cv2.merge((r,g,b))
plt.imshow(img_rgb)
plt.show()
def cv_show1(name,img):
plt.imshow(img)
plt.show()
cv2.imshow(name,img)
cv2.waitKey()
cv2.destroyAllWindows()
preprocess = 'thresh' #thresh
image = cv2.imread('scan.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
if preprocess == "thresh":
gray = cv2.threshold(gray, 0, 255,cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]# Adaptive binarization
if preprocess == "blur":
gray = cv2.medianBlur(gray, 3)# Median ambiguity
filename = "{}.png".format(os.getpid())
cv2.imwrite(filename, gray)
True
text = pytesseract.image_to_string(Image.open(filename))
print(text)
os.remove(filename)
cv_show("Image", image)
cv_show1("Output", gray)
we KK Re KK KK OK OK KK
WHOLE FOODS MARKET - WESTPORT, CT 06880
399 POST RD WEST - (203) 227-6858
365
365
365
365
BROTH
BACON
BACON
BACON
BACUN
LS
LS
LS
LS
CHIC
FLOUR ALMUND
CHKN BRST BNLSS SK
HEAVY CREAM
BALSMC REDUCT
GRND 85/15
BEEF
JUICE
COF CASHEW
L
DOCS PINT ORGANIC
HNY ALMOND BUTTER
xeene TAX
.00
BAL
NP
NP
NP
NP
NP
NP
NP
NP
NP
NP
NP
NP
NP
4
4
4
99
.99
.99
mal
7 7 T
mana Ramm


边栏推荐
- How to view the state-owned enterprises have unloaded Microsoft office and switched to Kingsoft WPS?
- tensorflow2-savedmodel convert to pb(frozen_graph)
- What are the requirements for NPDP product manager international certification registration?
- What data capabilities do data product managers need to master?
- tensorflow2-savedmodel convert to pb(frozen_graph)
- JVM performance tuning and practical basic theory part II
- 项目中字符串判空总结
- 30 Devops interview questions and answers
- Zabbix API与PHP的配置
- What is the relationship between network speed, broadband, bandwidth and traffic?
猜你喜欢

Don't want to knock the code? Here comes the chance
![[zero basic IOT pwn] reproduce Netgear wnap320 rce](/img/f7/d683df1d4b1b032164a529d3d94615.png)
[zero basic IOT pwn] reproduce Netgear wnap320 rce
![[leetcode 324] 摆动排序 II 思维+排序](/img/cb/26d89e1a1f548b75a5ef9f29eebeee.png)
[leetcode 324] 摆动排序 II 思维+排序

对于编程思想和能力有重大提升的书有哪些?

JVM performance tuning and practical basic theory part II

2022-2-15 learning the imitation Niuke project - post in Section 2
![[leetcode 324] swing sorting II thinking + sorting](/img/cb/26d89e1a1f548b75a5ef9f29eebeee.png)
[leetcode 324] swing sorting II thinking + sorting

Opencv interpolation mode

Word2vec yyds dry goods inventory

How to view the state-owned enterprises have unloaded Microsoft office and switched to Kingsoft WPS?
随机推荐
idea中新建的XML文件变成普通文件的解决方法.
How to view the state-owned enterprises have unloaded Microsoft office and switched to Kingsoft WPS?
Build your own website (14)
Filter &(登录拦截)
Take you to API development by hand
[leetcode 324] swing sorting II thinking + sorting
Reorganize the trivial knowledge points at the end of the term
Some thoughts on software testing
The State Administration of Chia Tai market supervision, the national development and Reform Commission and the China Securities Regulatory Commission jointly reminded and warned some iron ores
opencv学习笔记四--银行卡号识别
What problems should be considered for outdoor LED display?
Semiconductor foundation of binary realization principle
The first technology podcast month will be broadcast soon
[零基础学IoT Pwn] 复现Netgear WNAP320 RCE
Apk signature principle
[Verilog quick start of Niuke series] ~ multi function data processor, calculate the difference between two numbers, use generate... For statement to simplify the code, and use sub modules to realize
Generate random numbers (4-bit, 6-bit)
111. Minimum depth of binary tree
opencv学习笔记六--图像拼接
DirectX repair tool v4.1 public beta! [easy to understand]