当前位置：网站首页>C3D模型pytorch源码逐句详析（一）

C3D模型pytorch源码逐句详析（一）

2022-07-25 09:26:00 【zzh1370894823】

论文链接：http://vlg.cs.dartmouth.edu/c3d/c3d_video.pdf

代码链接：https://github.com/jfzhang95/pytorch-video-recognition

1.源码准备

git clone --recursive https://github.com/jfzhang95/pytorch-video-recognition.git
下载完成后得到C3D源码

2.源码结构

文件名称	功能
train.py	训练脚本
mypath.py	配置数据集和预训练模型的路径
dataest.py	数据读取和数据处理脚本
C3D_model.py	C3D模型网络结构构建脚本
ucf101-caffe.path	预训练模型

3.源码分析

3.1数据读取和数据处理脚本

注意第一次要预处理数据的，即preprocess=True

   def __init__(self, dataset='ucf101', split='train', clip_len=16, preprocess=False):
        self.root_dir, self.output_dir = Path.db_dir(dataset)   # 获取数据集的源路径和输出路径
        folder = os.path.join(self.output_dir, split)   # 获取对应分组的的路径，即train，test，val的路径
        self.clip_len = clip_len  # 一次多少帧
        self.split = split

        # The following three parameters are chosen as described in the paper section 4.1
        # 图片的高和宽的变化过程（h*w-->128*171-->112*112），先变化大小，再裁剪
        self.resize_height = 128
        self.resize_width = 171
        self.crop_size = 112  # 裁剪

check_integrity()

判断是否存在Dataset的源路径，若不存在，则报错

        # check_integrity()判断是否存在Dataset的源路径，若不存在，则报错
        if not self.check_integrity():
            raise RuntimeError('Dataset not found or corrupted.' +
                               ' You need to download it from official website.')

        if (not self.check_preprocess()) or preprocess:  # 判断是否进行需要进行预处理
            print('Preprocessing of {} dataset, this will take long, but it will be done only once.'.format(dataset))
            self.preprocess()

def preprocess(self):

预处理视频

    def preprocess(self):   # 预处理视频
        # 创建对应的分组路径
        if not os.path.exists(self.output_dir):
            os.mkdir(self.output_dir)
            os.mkdir(os.path.join(self.output_dir, 'train'))
            os.mkdir(os.path.join(self.output_dir, 'val'))
            os.mkdir(os.path.join(self.output_dir, 'test'))

        # Split train/val/test sets
        for file in os.listdir(self.root_dir):  # 遍历数据集每个文件夹，即ucf101这个文件夹
            file_path = os.path.join(self.root_dir, file)  # 得到每个动作类的路径
            video_files = [name for name in os.listdir(file_path)]  # 得到每个视频的名字，类型为list，中括号不能省

            # 将数据分成两部分，其中0.2为测试集，42为随机种子
            # 每一个类别一分的，每循环一次分一次
            train_and_valid, test = train_test_split(video_files, test_size=0.2, random_state=42)
            train, val = train_test_split(train_and_valid, test_size=0.2, random_state=42)

test, train, val 为分成的三个list,存储的为视频名称

            # 创建相应的文件夹
            train_dir = os.path.join(self.output_dir, 'train', file)
            val_dir = os.path.join(self.output_dir, 'val', file)
            test_dir = os.path.join(self.output_dir, 'test', file)

            if not os.path.exists(train_dir):
                os.mkdir(train_dir)
            if not os.path.exists(val_dir):
                os.mkdir(val_dir)
            if not os.path.exists(test_dir):
                os.mkdir(test_dir)

            for video in train:   # train的类型为视频名字的list
                self.process_video(video, file, train_dir)

            for video in val:
                self.process_video(video, file, val_dir)

            for video in test:
                self.process_video(video, file, test_dir)

        print('Preprocessing finished.')

process_video（）函数

处理视频，将其读取为numpy类型

    def process_video(self, video, action_name, save_dir):
        video_filename = video.split('.')[0]
        
        if not os.path.exists(os.path.join(save_dir, video_filename)):
            os.mkdir(os.path.join(save_dir, video_filename))

        capture = cv2.VideoCapture(os.path.join(self.root_dir, action_name, video))  # 读取视频

        frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))  # 读取视频有多少帧
        frame_width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))  # 读取视频宽度
        frame_height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))

split方法获取视频名如：video = ‘v_YoYo_g17_c03.avi’,将’.avi’去掉，返回’v_YoYo_g17_c03’

      #确定隔几帧取一张，取够16帧
        EXTRACT_FREQUENCY = 4  # 隔EXTRACT_FREQUENCY帧取一次数据，默认为4，取不够再减小
        if frame_count // EXTRACT_FREQUENCY <= 16:
            EXTRACT_FREQUENCY -= 1
            if frame_count // EXTRACT_FREQUENCY <= 16:
                EXTRACT_FREQUENCY -= 1
                if frame_count // EXTRACT_FREQUENCY <= 16:
                    EXTRACT_FREQUENCY -= 1


        count = 0  
        i = 0   
        retaining = True

        while (count < frame_count and retaining):
            retaining, frame = capture.read()

            if frame is None:
                continue
            # 读取视频的每一帧
            if count % EXTRACT_FREQUENCY == 0:  # 判断这一帧是不是隔EXTRACT_FREQUENCY一取
                if (frame_height != self.resize_height) or (frame_width != self.resize_width):
                    frame = cv2.resize(frame, (self.resize_width, self.resize_height))
                cv2.imwrite(filename=os.path.join(save_dir, video_filename, '0000{}.jpg'.format(str(i))), img=frame)
                i += 1
            count += 1

        # Release the VideoCapture once it is no longer needed-->释放资源
        capture.release()

count = 0 表示第几次cap.read(),即表示读取的是第几帧
i = 0 记录抽取的帧数，并命名的时候使用
capture.read()按帧读取视频，ret,frame是获cap.read()方法的两个返回值。
其中ret是布尔值，如果读取帧是正确的则返回True，如果文件读取到结尾，它的返回值就为False。
frame就是每一帧的图像，是个三维矩阵。为ndarray类型，为一帧图片

原网站

版权声明
本文为[zzh1370894823]所创，转载请带上原文链接，感谢
https://blog.csdn.net/zzh1370894823/article/details/113790994