当前位置:网站首页>Opencv learning notes 5 -- document scanning +ocr character recognition
Opencv learning notes 5 -- document scanning +ocr character recognition
2022-07-01 14:55:00 【Cloudy_ to_ sunny】
opencv Learning notes 5 -- File scanning +OCR Character recognition
File scanning
# Import toolkit
import numpy as np
import argparse
import cv2
import matplotlib.pyplot as plt#Matplotlib yes RGB
Defined function
# Graphic display
def cv_show(name,img):
b,g,r = cv2.split(img)
img_rgb = cv2.merge((r,g,b))
plt.imshow(img_rgb)
plt.show()
def cv_show1(name,img):
plt.imshow(img)
plt.show()
cv2.imshow(name,img)
cv2.waitKey()
cv2.destroyAllWindows()
def order_points(pts):
# altogether 4 Coordinates
rect = np.zeros((4, 2), dtype = "float32")
# Find the corresponding coordinates in order 0123 Namely Top left , The upper right , The lower right , The lower left
# Calculate top left , The lower right
s = pts.sum(axis = 1)# Addition of horizontal and vertical coordinates , The biggest one is the bottom right , The smallest is the upper left
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmax(s)]
# Count right up and left down
diff = np.diff(pts, axis = 1)
rect[1] = pts[np.argmin(diff)]
rect[3] = pts[np.argmax(diff)]
return rect
def four_point_transform(image, pts):
# Get the input coordinate point
rect = order_points(pts)#pts Is the coordinates of the four points on the original drawing
(tl, tr, br, bl) = rect
# Calculate the input w and h value
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
maxWidth = max(int(widthA), int(widthB))
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
maxHeight = max(int(heightA), int(heightB))
# Corresponding coordinate position after transformation
dst = np.array([
[0, 0],
[maxWidth - 1, 0],
[maxWidth - 1, maxHeight - 1],
[0, maxHeight - 1]], dtype = "float32")
# Calculate the transformation matrix
M = cv2.getPerspectiveTransform(rect, dst)# from rect To dst The transformation matrix of
warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))# Get the transformation result
# Return the result after transformation
return warped
def resize(image, width=None, height=None, inter=cv2.INTER_AREA):
dim = None
(h, w) = image.shape[:2]
if width is None and height is None:
return image
if width is None:
r = height / float(h)
dim = (int(w * r), height)
else:
r = width / float(w)
dim = (width, int(h * r))
resized = cv2.resize(image, dim, interpolation=inter)
return resized
# Read input
image = cv2.imread("./images/receipt.jpg")
# The coordinates will change the same
cv_show("Image",image)
ratio = image.shape[0] / 500.0
print(image.shape[0])
orig = image.copy()

2448
image = resize(orig, height = 500)
cv_show("Image",image)

edge detection
# Preprocessing
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray, (5, 5), 0)
edged = cv2.Canny(gray, 75, 200)
# Show the preprocessing results
print("STEP 1: edge detection ")
cv_show("Image", image)
cv_show1("Edged", edged)
STEP 1: edge detection


# Contour detection
cnts = cv2.findContours(edged.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)[1]# Detect contour
cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:5]# Sort the outline according to the area
#cv2.drawContours(image, cnts, -1, (0, 255, 0), 2)
#cv_show("Outline", image)
# Traverse the outline
for c in cnts:
# Calculate the contour approximation
peri = cv2.arcLength(c, True)# Calculate contour perimeter
# C Represents the set of input points
# epsilon Represents the maximum distance from the original contour to the approximate contour , It's an accuracy parameter
# True It means closed
approx = cv2.approxPolyDP(c, 0.02 * peri, True)
# 4 Take it out at one point
if len(approx) == 4:
screenCnt = approx
break
Get profile
# Display the results
print("STEP 2: Get profile ")
cv2.drawContours(image, [screenCnt], -1, (0, 255, 0), 2)
cv_show("Outline", image)
STEP 2: Get profile

# Perspective transformation
warped = four_point_transform(orig, screenCnt.reshape(4, 2) * ratio)
print(screenCnt.reshape(4, 2))
print(screenCnt.reshape(4, 2).sum(axis = 1))
[[465 110]
[113 137]
[147 375]
[474 323]]
[575 250 522 797]
Transformation
# Binary processing
warped = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
ref = cv2.threshold(warped, 100, 255, cv2.THRESH_BINARY)[1]
cv2.imwrite('scan.jpg', ref)
# Display the results
print("STEP 3: Transformation ")
cv_show("Original", resize(orig, height = 650))
cv_show1("Scanned", resize(ref, height = 650))
STEP 3: Transformation


OCR Character recognition
Environment configuration
install tesseract-ocr-w64-setup-v5.0.1.20220118.exe
- https://digi.bib.uni-mannheim.de/tesseract/
- Configure environment variables such as E:\Program Files (x86)\Tesseract-OCR
- tesseract -v To test
- tesseract XXX.png Get the results
- pip install pytesseract
- anaconda lib site-packges pytesseract pytesseract.py
- tesseract_cmd Change to absolute path
Code
from PIL import Image
import pytesseract
import cv2
import os
import matplotlib.pyplot as plt#Matplotlib yes RGB
# Graphic display
def cv_show(name,img):
b,g,r = cv2.split(img)
img_rgb = cv2.merge((r,g,b))
plt.imshow(img_rgb)
plt.show()
def cv_show1(name,img):
plt.imshow(img)
plt.show()
cv2.imshow(name,img)
cv2.waitKey()
cv2.destroyAllWindows()
preprocess = 'thresh' #thresh
image = cv2.imread('scan.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
if preprocess == "thresh":
gray = cv2.threshold(gray, 0, 255,cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]# Adaptive binarization
if preprocess == "blur":
gray = cv2.medianBlur(gray, 3)# Median ambiguity
filename = "{}.png".format(os.getpid())
cv2.imwrite(filename, gray)
True
text = pytesseract.image_to_string(Image.open(filename))
print(text)
os.remove(filename)
cv_show("Image", image)
cv_show1("Output", gray)
we KK Re KK KK OK OK KK
WHOLE FOODS MARKET - WESTPORT, CT 06880
399 POST RD WEST - (203) 227-6858
365
365
365
365
BROTH
BACON
BACON
BACON
BACUN
LS
LS
LS
LS
CHIC
FLOUR ALMUND
CHKN BRST BNLSS SK
HEAVY CREAM
BALSMC REDUCT
GRND 85/15
BEEF
JUICE
COF CASHEW
L
DOCS PINT ORGANIC
HNY ALMOND BUTTER
xeene TAX
.00
BAL
NP
NP
NP
NP
NP
NP
NP
NP
NP
NP
NP
NP
NP
4
4
4
99
.99
.99
mal
7 7 T
mana Ramm


边栏推荐
- Fundamentals of C language
- [Verilog quick start of Niuke series] ~ multi function data processor, calculate the difference between two numbers, use generate... For statement to simplify the code, and use sub modules to realize
- [zero basic IOT pwn] reproduce Netgear wnap320 rce
- 手把手带你入门 API 开发
- 使用net core 6 c# 的 NPOI 包,读取excel..xlsx单元格内的图片,并存储到指定服务器
- Rearrangement of overloaded operators
- 竣达技术丨多台精密空调微信云监控方案
- tensorflow2-savedmodel convert to pb(frozen_graph)
- MIT team used graph neural network to accelerate the screening of amorphous polymer electrolytes and promote the development of next-generation lithium battery technology
- The State Administration of Chia Tai market supervision, the national development and Reform Commission and the China Securities Regulatory Commission jointly reminded and warned some iron ores
猜你喜欢
![[zero basic IOT pwn] reproduce Netgear wnap320 rce](/img/f7/d683df1d4b1b032164a529d3d94615.png)
[zero basic IOT pwn] reproduce Netgear wnap320 rce

One of the data Lake series | you must love to read the history of minimalist data platforms, from data warehouse, data lake to Lake warehouse

Word2vec yyds dry goods inventory

Take you to API development by hand

2022-2-15 learning xiangniuke project - Section 1 filtering sensitive words

The markdown editor uses basic syntax

Cannot link redis when redis is enabled
![[getting started with Django] 13 page Association MySQL](/img/78/cbf88ae3c3d311edd7d9af8c985749.jpg)
[getting started with Django] 13 page Association MySQL "multi" field table (check)

Filter &(登录拦截)

2022-2-15 learning the imitation Niuke project - Section 3 post details
随机推荐
对于编程思想和能力有重大提升的书有哪些?
Build MySQL master-slave server under Ubuntu 14.04
TypeScript:const
【阶段人生总结】放弃考研,参与到工作中,已经顺利毕业了,昨天刚领毕业证
Don't want to knock the code? Here comes the chance
Ensure production safety! Guangzhou requires hazardous chemical enterprises to "not produce in an unsafe way, and keep constant communication"
[零基础学IoT Pwn] 复现Netgear WNAP320 RCE
基于价值量化的需求优先级排序方法
Mongodb second call -- implementation of mongodb high availability cluster
Storage form of in-depth analysis data in memory
Task.Run(), Task.Factory.StartNew() 和 New Task() 的行为不一致分析
【牛客网刷题系列 之 Verilog快速入门】~ 多功能数据处理器、求两个数的差值、使用generate…for语句简化代码、使用子模块实现三输入数的大小比较
tensorflow2-savedmodel convert to pb(frozen_graph)
互联网医院系统源码 医院小程序源码 智慧医院源码 在线问诊系统源码
solidty-基础篇-基础语法和定义函数
C learning notes (5) class and inheritance
What problems should be considered for outdoor LED display?
Microservice development steps (Nacos)
Day-02 database
[dynamic programming] p1004 grid access (four-dimensional DP template question)