当前位置：网站首页>[opencv learning] small ticket recognition based on perspective transformation and OCR recognition

[opencv learning] small ticket recognition based on perspective transformation and OCR recognition

2022-06-12 23:15:00 【A sea of stars】

This article is based on the perspective transformation learned before 、 and OCR distinguish , Made a simple small ticket identification , as follows ：

import cv2
import numpy as np
from PIL import Image
import pytesseract as tess

dsize = (55, 88)  #  Unified scale 


#  Show the image , Encapsulate as a function 
def cv_show_image(name, img):
    cv2.imshow(name, img)
    cv2.waitKey(0)  #  Waiting time , In milliseconds ,0 Represents any key termination 
    cv2.destroyAllWindows()


# =========================================================
# ================ Read image for preprocessing =========================
# =========================================================

#  Read the original color image 
ocr_img = cv2.imread('images/ocr_qr_code.PNG')
h_src, w_src, c_src = ocr_img.shape

#  Gray value and binary conversion 
ocr_img_gray = cv2.cvtColor(ocr_img, cv2.COLOR_BGR2GRAY)
# cv_show_image('template_gray', template_gray)

#  Gauss filtering 
ocr_img_gray = cv2.GaussianBlur(ocr_img_gray, (3, 3), 1)

#  Two valued 
ret, ocr_img_thresh = cv2.threshold(ocr_img_gray, 200, 255, cv2.THRESH_BINARY)
cv_show_image('template_thresh', ocr_img_thresh)

#  Find all the contours . Just need the outline 
ocr_img_contours, hierarchy = cv2.findContours(ocr_img_thresh,
                                               cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)

# =========================================================
# ================ Find the outline with the largest area =========================
# =========================================================
#  Find the outline with the largest area 
draw_img = ocr_img.copy()  #  Phased test view use 
#  The last parameter can control to find the first few largest . For example, ha ,0 Represents the largest ,2 It means the top three .
cont_max = sorted(ocr_img_contours, key=cv2.contourArea, reverse=True)[0]  #  Sort by area , Find the biggest , In reverse order .

#  Draw the outline , Red line 
x, y, w, h = cv2.boundingRect(cont_max)
draw_img = cv2.drawContours(draw_img, [cont_max], -1, color=(0, 0, 255), thickness=2)  #  Draw the outline , Will draw on the original picture 
arcLength = cv2.arcLength(cont_max, True)  #  Find the perimeter of the largest contour 

#  The original outline may be a lot of song points , But we only need the outline of a quadrilateral with four points . Here, contour approximation is required .
#  Keep trying to raise the threshold , Increase the approximate range , Reduce the number of edges .
rate = 0.01
approx_max = None
while len(cont_max) != 4:
    # epsilon Is the maximum distance from the original contour to the approximate contour , It is also an approximate judgment threshold . closed  It means a closed outline 
    approx_max = cv2.approxPolyDP(cont_max, epsilon=rate * arcLength, closed=True)
    if len(approx_max) == 4:
        print("rate={}, epsilon={}".format(rate, rate * arcLength))
        break
    rate += 0.01

print("approx: ", approx_max)
#  Draw the outline , Green lines 
draw_img = cv2.drawContours(draw_img, [approx_max], -1, color=(0, 255, 0), thickness=2)  #  Draw the outline , Will draw on the original picture 

cv_show_image('rectangle_contours_img', draw_img)
del draw_img


# =========================================================
# ================ We get four vertices , Do perspective transformation =========================
# =========================================================

#  Sort the four vertices first , according to （( Top left ),( The upper right ),( The lower right ),( Sit down )） In order to define 
#  Eventually these four points will turn into ((0,0), (w,0), (w,h), (h,w)) +  translation ( Top left ) In the form of .

def sort_dotCnt(kps):
    rect = np.zeros((4, 2), dtype='float32')
    s = kps.sum(axis=1)
    #  Find the top left and bottom right 
    rect[0] = kps[np.argmin(s)]
    rect[2] = kps[np.argmax(s)]
    #  Find the top right and bottom left 
    diff = np.diff(kps, axis=1)
    rect[1] = kps[np.argmin(diff)]
    rect[3] = kps[np.argmax(diff)]

    return rect


print(approx_max.shape)
print(approx_max.reshape(4, 2))
rect_ordered = sort_dotCnt(approx_max.reshape(4, 2))
(top_left, top_right, bottom_right, bottom_left) = rect_ordered

#  Information about the four vertices of the object in the original image 
pts_src = np.array([top_left, top_right, bottom_right, bottom_left], dtype="float32")
#  Four vertex information of the object in the target object 
pts_dst = np.array([(0 + top_left[0], 0 + top_left[1]),
                    (w + top_left[0], 0 + top_left[1]),
                    (w + top_left[0], h + top_left[1]),
                    (0 + top_left[0], h + top_left[1])], dtype="float32")

#  It's a 3x3 Matrix , According to the corresponding two points , Calculate the transformation matrix , Thus, the original image is converted .
M = cv2.getPerspectiveTransform(pts_src, pts_dst)
#  Based on homography matrix , Convert the original image into the target image 
im_out = cv2.warpPerspective(ocr_img_thresh, M, (w_src, h_src))
cv_show_image('im_out', im_out)


# =========================================================
# ================ Identify its number =========================
# =========================================================

textInImage = Image.fromarray(im_out)
text = tess.image_to_string(textInImage)
print("\nocr detect result:%s" % text)

The original drawing is pretreated ：
Please add a picture description
After contour detection , Get an outline of four vertices and draw it with green lines

After perspective transformation ：

Finally used OCR Identified ：

At present, the number can be recognized intelligently , The next time , I will go and learn how to recognize simplified Chinese characters