当前位置:网站首页>yolov5detect. Py comment
yolov5detect. Py comment
2022-07-02 23:43:00 【Recursions】
import argparse
import time
from pathlib import Path
import warnings
import cv2
import torch
import torch.backends.cudnn as cudnn
from numpy import random
from models.experimental import attempt_load
from utils.datasets import LoadStreams, LoadImages
from utils.general import check_img_size, check_requirements, check_imshow, non_max_suppression, apply_classifier, \
scale_coords, xyxy2xywh, strip_optimizer, set_logging, increment_path
from utils.plots import plot_one_box
from utils.torch_utils import select_device, load_classifier, time_synchronized
def detect(save_img=False):
# Get output folder , Input source , The weight , Parameters and other parameters
source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
save_img = not opt.nosave and not source.endswith('.txt') # save inference images
webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith(
('rtsp://', 'rtmp://', 'http://', 'https://'))
# Save the path
save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run
(save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir
# Initialize
# Get the device
set_logging()
device = select_device(opt.device)
half = device.type != 'cpu' # half precision only supported on CUDA
# Load model
# load Float32 Model , Ensure that the input picture resolution set by the user can be divided 32( If not, adjust it to be divisible and return )
model = attempt_load(weights, map_location=device) # load FP32 model
stride = int(model.stride.max()) # model stride
imgsz = check_img_size(imgsz, s=stride) # check img_size
if half:
model.half() # to FP16
# Second-stage classifier
# Set the second classification , Default not to use
classify = False
if classify:
modelc = load_classifier(name='resnet101', n=2) # initialize
modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval()
# Set Dataloader
# Different data loading methods can be set through different input sources
vid_path, vid_writer = None, None
if webcam:
view_img = check_imshow()
cudnn.benchmark = True # set True to speed up constant image size inference
dataset = LoadStreams(source, img_size=imgsz, stride=stride)
else:
cudnn.benchmark = True
dataset = LoadImages(source, img_size=imgsz, stride=stride)
# Get names and colors
# Get category name
names = model.module.names if hasattr(model, 'module') else model.names
# Set the color of the frame
colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]
# Run inference
# Make a forward reasoning , Test whether the procedure is normal
if device.type != 'cpu':
model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once
t0 = time.time()
"""
path picture / Video path
img Conduct resize+pad After the picture
img0 primary size picture
cap When reading a picture, it is None, When reading video, it is the video source
"""
for path, img, im0s, vid_cap in dataset:
img = torch.from_numpy(img).to(device)
img = img.half() if half else img.float() # uint8 to fp16/32
img /= 255.0 # 0 - 255 to 0.0 - 1.0
if img.ndimension() == 3:
img = img.unsqueeze(0)
"""
Forward propagation return pred Of shape yes (1, num_boxes, 5+num_class)
h,w The length and width of the incoming network picture , Be careful dataset Rectangular reasoning is used in detection , So here h Not necessarily equal to w
num_boxes = h/32 * w/32 + h/16 * w/16 + h/8 * w/8
pred[..., 0:4] Is the coordinate of the prediction frame
The coordinates of the prediction frame are xywh( Center point + Width length ) Format
pred[..., 4] by objectness Degree of confidence
pred[..., 5:-1] For classification results
"""
# Inference
t1 = time_synchronized()
pred = model(img, augment=opt.augment)[0]
"""
pred: The output of forward propagation
conf_thres: Confidence threshold
iou_thres:iou threshold
classes: Whether to keep only specific categories
agnostic: Conduct nms Whether to also remove the box between different categories
after nms after , Forecast box format :xywh-->xyxy( Top left, bottom right )
pred It's a list list[torch.tensor], The length is batch_size
every last torch.tensor Of shape by (num_boxes, 6), The content is box+conf+cls
"""
# Apply NMS
pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
t2 = time_synchronized()
# Apply Classifier
# Add secondary classification , Default not to use
if classify:
pred = apply_classifier(pred, modelc, img, im0s)
# Process detections
# Process each picture
for i, det in enumerate(pred): # detections per image
if webcam: # batch_size >= 1
p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(), dataset.count
else:
p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0)
# Set save picture / The path of the video
p = Path(p) # to Path
save_path = str(save_dir / p.name) # img.jpg
# Set the coordinates of the save box txt Path to file
txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # img.txt
# Set print information ( Picture length and width )
s += '%gx%g ' % img.shape[2:] # print string
gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh
if len(det):
# Rescale boxes from img_size to im0 size
# Adjust the coordinates of the prediction box : be based on resize+pad The coordinates of the picture --> Based on the original size The coordinates of the picture
# At this time, the coordinate format is xyxy
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
# Print results
# Print the number of categories detected
for c in det[:, -1].unique():
n = (det[:, -1] == c).sum() # detections per class
s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string
# Write results
# Save forecast results
for *xyxy, conf, cls in reversed(det):
if save_txt: # Write to file
# take xyxy( top left corner + The lower right corner ) Format to xywh( Center point + Width length ) Format , And divide w,h Normalization , Convert to a list and save
xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh
line = (cls, *xywh, conf) if opt.save_conf else (cls, *xywh) # label format
with open(txt_path + '.txt', 'a') as f:
f.write(('%g ' * len(line)).rstrip() % line + '\n')
# Draw a frame on the original picture
if save_img or view_img: # Add bbox to image
label = f'{names[int(cls)]} {conf:.2f}'
plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3)
# Print time (inference + NMS)
# Print forward propagation +nms Time
print(f'{s}Done. ({t2 - t1:.3f}s)')
# Stream results
# If set, display , be show picture / video
if view_img:
cv2.imshow(str(p), im0)
cv2.waitKey(1) # 1 millisecond
# Save results (image with detections)
# Set save picture / video
if save_img:
if dataset.mode == 'image':
cv2.imwrite(save_path, im0)
else: # 'video' or 'stream'
if vid_path != save_path: # new video
vid_path = save_path
if isinstance(vid_writer, cv2.VideoWriter):
vid_writer.release() # release previous video writer
if vid_cap: # video
fps = vid_cap.get(cv2.CAP_PROP_FPS)
w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
else: # stream
fps, w, h = 30, im0.shape[1], im0.shape[0]
save_path += '.mp4'
vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
vid_writer.write(im0)
if save_txt or save_img:
s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
print(f"Results saved to {save_dir}{s}")
print(f'Done. ({time.time() - t0:.3f}s)')
if __name__ == '__main__':# Parameter resolver
parser = argparse.ArgumentParser()
parser.add_argument('--weights', nargs='+', type=str, default='yolov5l.pt', help='model.pt path(s)')
parser.add_argument('--source', type=str, default='data/images', help='source') # file/folder, 0 for webcam
parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
parser.add_argument('--conf-thres', type=float, default=0.5, help='object confidence threshold')# Degree of confidence
parser.add_argument('--iou-thres', type=float, default=0.20, help='IOU threshold for NMS')
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--view-img', action='store_true', help='display results')
parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3')# Multiple tags
parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
parser.add_argument('--augment', action='store_true', help='augmented inference')
parser.add_argument('--update', action='store_true', help='update all models')
parser.add_argument('--project', default='runs/detect', help='save results to project/name')
parser.add_argument('--name', default='exp', help='save results to project/name')
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')# by true No generation expxx
opt = parser.parse_args()
print(opt)
check_requirements(exclude=('pycocotools', 'thop'))
with torch.no_grad():
if opt.update: # update all models (to fix SourceChangeWarning)
for opt.weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt']:
detect()
strip_optimizer(opt.weights)
else:
detect()
边栏推荐
- Flexible combination of applications is a false proposition that has existed for 40 years
- 基于FPGA的VGA协议实现
- Win11麦克风测试在哪里?Win11测试麦克风的方法
- SharedPreferences save list < bean > to local and solve com google. gson. internal. Linkedtreemap cannot be cast to exception
- Go project operation method
- [error record] the flutter reports an error (could not resolve io.flutter:flutter_embedding_debug:1.0.0.)
- 面试过了,起薪16k
- Yolox enhanced feature extraction network panet analysis
- Pandora IOT development board learning (HAL Library) - Experiment 4 serial port communication experiment (learning notes)
- 判断二叉树是否为满二叉树
猜你喜欢

请求与响应

"A good programmer is worth five ordinary programmers!"

Go basic data type

内网渗透 | 手把手教你如何进行内网渗透

Container runtime analysis
![[analysis of STL source code] imitation function (to be supplemented)](/img/40/a02a04a24f385a31e0484d1071ecec.jpg)
[analysis of STL source code] imitation function (to be supplemented)

JDBC practice cases

LINQ usage collection in C #

Go project operation method

Yolox enhanced feature extraction network panet analysis
随机推荐
Leetcode relaxation question - day of the week
公司里只有一个测试是什么体验?听听他们怎么说吧
How much do you know about synchronized?
How does win11 turn on visual control? Win11 method of turning on visual control
LINQ usage collection in C #
leetcode 650. 2 keys keyboard with only two keys (medium)
2022年最新最全软件测试面试题大全
CADD课程学习(4)-- 获取没有晶体结构的蛋白(SWISS-Model)
MFC 获取当前时间
顶级 DevOps 工具链大盘点
Where is the win11 automatic shutdown setting? Two methods of setting automatic shutdown in win11
CDN 加速,需要域名先备案
ArrayList分析2 :Itr、ListIterator以及SubList中的坑
(stinger) use pystinger Socks4 to go online and not go out of the network host
Remote connection of raspberry pie by VNC viewer
返回二叉树中最大的二叉搜索子树的根节点
判断二叉树是否为满二叉树
Bean加载控制
Why can't the start method be called repeatedly? But the run method can?
Pandora IOT development board learning (HAL Library) - Experiment 4 serial port communication experiment (learning notes)