当前位置:网站首页>Opencv learning notes 5 -- document scanning +ocr character recognition
Opencv learning notes 5 -- document scanning +ocr character recognition
2022-07-01 14:55:00 【Cloudy_ to_ sunny】
opencv Learning notes 5 -- File scanning +OCR Character recognition
File scanning
# Import toolkit
import numpy as np
import argparse
import cv2
import matplotlib.pyplot as plt#Matplotlib yes RGB
Defined function
# Graphic display
def cv_show(name,img):
b,g,r = cv2.split(img)
img_rgb = cv2.merge((r,g,b))
plt.imshow(img_rgb)
plt.show()
def cv_show1(name,img):
plt.imshow(img)
plt.show()
cv2.imshow(name,img)
cv2.waitKey()
cv2.destroyAllWindows()
def order_points(pts):
# altogether 4 Coordinates
rect = np.zeros((4, 2), dtype = "float32")
# Find the corresponding coordinates in order 0123 Namely Top left , The upper right , The lower right , The lower left
# Calculate top left , The lower right
s = pts.sum(axis = 1)# Addition of horizontal and vertical coordinates , The biggest one is the bottom right , The smallest is the upper left
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmax(s)]
# Count right up and left down
diff = np.diff(pts, axis = 1)
rect[1] = pts[np.argmin(diff)]
rect[3] = pts[np.argmax(diff)]
return rect
def four_point_transform(image, pts):
# Get the input coordinate point
rect = order_points(pts)#pts Is the coordinates of the four points on the original drawing
(tl, tr, br, bl) = rect
# Calculate the input w and h value
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
maxWidth = max(int(widthA), int(widthB))
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
maxHeight = max(int(heightA), int(heightB))
# Corresponding coordinate position after transformation
dst = np.array([
[0, 0],
[maxWidth - 1, 0],
[maxWidth - 1, maxHeight - 1],
[0, maxHeight - 1]], dtype = "float32")
# Calculate the transformation matrix
M = cv2.getPerspectiveTransform(rect, dst)# from rect To dst The transformation matrix of
warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))# Get the transformation result
# Return the result after transformation
return warped
def resize(image, width=None, height=None, inter=cv2.INTER_AREA):
dim = None
(h, w) = image.shape[:2]
if width is None and height is None:
return image
if width is None:
r = height / float(h)
dim = (int(w * r), height)
else:
r = width / float(w)
dim = (width, int(h * r))
resized = cv2.resize(image, dim, interpolation=inter)
return resized
# Read input
image = cv2.imread("./images/receipt.jpg")
# The coordinates will change the same
cv_show("Image",image)
ratio = image.shape[0] / 500.0
print(image.shape[0])
orig = image.copy()

2448
image = resize(orig, height = 500)
cv_show("Image",image)

edge detection
# Preprocessing
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray, (5, 5), 0)
edged = cv2.Canny(gray, 75, 200)
# Show the preprocessing results
print("STEP 1: edge detection ")
cv_show("Image", image)
cv_show1("Edged", edged)
STEP 1: edge detection


# Contour detection
cnts = cv2.findContours(edged.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)[1]# Detect contour
cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:5]# Sort the outline according to the area
#cv2.drawContours(image, cnts, -1, (0, 255, 0), 2)
#cv_show("Outline", image)
# Traverse the outline
for c in cnts:
# Calculate the contour approximation
peri = cv2.arcLength(c, True)# Calculate contour perimeter
# C Represents the set of input points
# epsilon Represents the maximum distance from the original contour to the approximate contour , It's an accuracy parameter
# True It means closed
approx = cv2.approxPolyDP(c, 0.02 * peri, True)
# 4 Take it out at one point
if len(approx) == 4:
screenCnt = approx
break
Get profile
# Display the results
print("STEP 2: Get profile ")
cv2.drawContours(image, [screenCnt], -1, (0, 255, 0), 2)
cv_show("Outline", image)
STEP 2: Get profile

# Perspective transformation
warped = four_point_transform(orig, screenCnt.reshape(4, 2) * ratio)
print(screenCnt.reshape(4, 2))
print(screenCnt.reshape(4, 2).sum(axis = 1))
[[465 110]
[113 137]
[147 375]
[474 323]]
[575 250 522 797]
Transformation
# Binary processing
warped = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
ref = cv2.threshold(warped, 100, 255, cv2.THRESH_BINARY)[1]
cv2.imwrite('scan.jpg', ref)
# Display the results
print("STEP 3: Transformation ")
cv_show("Original", resize(orig, height = 650))
cv_show1("Scanned", resize(ref, height = 650))
STEP 3: Transformation


OCR Character recognition
Environment configuration
install tesseract-ocr-w64-setup-v5.0.1.20220118.exe
- https://digi.bib.uni-mannheim.de/tesseract/
- Configure environment variables such as E:\Program Files (x86)\Tesseract-OCR
- tesseract -v To test
- tesseract XXX.png Get the results
- pip install pytesseract
- anaconda lib site-packges pytesseract pytesseract.py
- tesseract_cmd Change to absolute path
Code
from PIL import Image
import pytesseract
import cv2
import os
import matplotlib.pyplot as plt#Matplotlib yes RGB
# Graphic display
def cv_show(name,img):
b,g,r = cv2.split(img)
img_rgb = cv2.merge((r,g,b))
plt.imshow(img_rgb)
plt.show()
def cv_show1(name,img):
plt.imshow(img)
plt.show()
cv2.imshow(name,img)
cv2.waitKey()
cv2.destroyAllWindows()
preprocess = 'thresh' #thresh
image = cv2.imread('scan.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
if preprocess == "thresh":
gray = cv2.threshold(gray, 0, 255,cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]# Adaptive binarization
if preprocess == "blur":
gray = cv2.medianBlur(gray, 3)# Median ambiguity
filename = "{}.png".format(os.getpid())
cv2.imwrite(filename, gray)
True
text = pytesseract.image_to_string(Image.open(filename))
print(text)
os.remove(filename)
cv_show("Image", image)
cv_show1("Output", gray)
we KK Re KK KK OK OK KK
WHOLE FOODS MARKET - WESTPORT, CT 06880
399 POST RD WEST - (203) 227-6858
365
365
365
365
BROTH
BACON
BACON
BACON
BACUN
LS
LS
LS
LS
CHIC
FLOUR ALMUND
CHKN BRST BNLSS SK
HEAVY CREAM
BALSMC REDUCT
GRND 85/15
BEEF
JUICE
COF CASHEW
L
DOCS PINT ORGANIC
HNY ALMOND BUTTER
xeene TAX
.00
BAL
NP
NP
NP
NP
NP
NP
NP
NP
NP
NP
NP
NP
NP
4
4
4
99
.99
.99
mal
7 7 T
mana Ramm


边栏推荐
- Redis安装及Ubuntu 14.04下搭建ssdb主从环境
- Mongodb second call -- implementation of mongodb high availability cluster
- What data capabilities do data product managers need to master?
- Storage form of in-depth analysis data in memory
- The first technology podcast month will be broadcast soon
- It's suitable for people who don't have eloquence. The benefits of joining the China Video partner program are really delicious. One video gets 3 benefits
- The markdown editor uses basic syntax
- Solid basic basic grammar and definition function
- The data in the database table recursively forms a closed-loop data. How can we get these data
- 微服务开发步骤(nacos)
猜你喜欢

建立自己的网站(14)

JVM performance tuning and practical basic theory part II

问题随记 —— Oracle 11g 卸载

Problem note - Oracle 11g uninstall

The first word of JVM -- detailed introduction to JVM and analysis of runtime data area

手把手带你入门 API 开发

The data in the database table recursively forms a closed-loop data. How can we get these data

Filter &(登录拦截)

官宣:Apache Doris 顺利毕业,成为 ASF 顶级项目!

The markdown editor uses basic syntax
随机推荐
tensorflow2-savedmodel convert to tflite
Apk signature principle
The data in the database table recursively forms a closed-loop data. How can we get these data
What value can NPDP bring to product managers? Do you know everything?
微服务追踪SQL(支持Isto管控下的gorm查询追踪)
Opencv interpolation mode
Minimum spanning tree and bipartite graph in graph theory (acwing template)
Semiconductor foundation of binary realization principle
One of the data Lake series | you must love to read the history of minimalist data platforms, from data warehouse, data lake to Lake warehouse
tensorflow2-savedmodel convert to pb(frozen_graph)
Rearrangement of overloaded operators
DirectX修复工具V4.1公测![通俗易懂]
期末琐碎知识点再整理
opencv学习笔记六--图像拼接
Microservice development steps (Nacos)
Written on the first day after Doris graduated
30 Devops interview questions and answers
241. Design priorities for operational expressions
TypeScript:var
问题随记 —— Oracle 11g 卸载