当前位置：网站首页>(2) CBAM integrated two stream project construction - data preparation

(2) CBAM integrated two stream project construction - data preparation

2022-07-27 17:18:00 【Bald head whining devil】

Data preparation

1、 Optical flow extraction

dense_flow The installation tutorial is visible ： install dense_flow
dense_flow The extraction process is visible ：dense_flow Code understanding

The process is simple , The first is the input of some parameters , Data sets 、 Path of optical flow storage and optical flow extraction tool , Read the width of the frame 、 high , Number of threads ,GPU Number , Extracted optical flow format (dir、zip）, Video file format (avi、mp4). All video files in the read dataset exist vid_list, And print the number of videos . Pack the video path and video category into a tuple , Input to optical flow extraction function run_optical_flow in , call dense_flow Tools , complete rgb Image and optical flow extraction . See code comments for details .

from __future__ import print_function
import os
import sys
import glob
import argparse
from pipes import quote
from multiprocessing import Pool, current_process
def run_optical_flow(vid_item):
    # file name 
    vid_path = vid_item[0]
    # Document indexing 
    vid_id = vid_item[1]
    # Get the name of the video file 
    vid_name = vid_path.split('/')[-1].split('.')[0]
    # Create a new folder for video files 
    out_full_path = os.path.join(out_path, vid_name)
    try:
        os.mkdir(out_full_path)
    except OSError:
        pass
    current = current_process()
    # obtain GPU Of id
    dev_id = (int(current._identity[0]) - 1) % NUM_GPU
    # Building images 、 Optical flow path 
    image_path = '{}/img'.format(out_full_path)
    flow_x_path = '{}/flow_x'.format(out_full_path)
    flow_y_path = '{}/flow_y'.format(out_full_path)
    #quote Converts a string to ASSIC code 
    #-f  Video path  -x x Optical flow  -y y Optical flow  -i rgb picture  -d GPU Number  -o  File format  -w  New width  -h  New height 
    cmd = os.path.join(df_path + 'build/extract_gpu')+' -f {} -x {} -y {} -i {} -b 20 -t 1 -d {} -s 1 -o {} -w {} -h {}'.format(
        quote(vid_path), quote(flow_x_path), quote(flow_y_path), quote(image_path), dev_id, out_format, new_size[0], new_size[1])
    print(cmd);
    os.system(cmd)
    print('{} {} done'.format(vid_id, vid_name))
    sys.stdout.flush()
    return True
    
if __name__ == '__main__':
    parser = argparse.ArgumentParser(description="extract optical flows")
    #src_dir : Data set storage path 
    parser.add_argument("--src_dir", type=str, default='./UCF-101',
                        help='path to the video data')
    #out_dir :rgb Image and optical flow storage path 
    parser.add_argument("--out_dir", type=str, default='./ucf101_frames',
                        help='path to store frames and optical flow')
    #df_path : Dense optical flow tool path 
    parser.add_argument("--df_path", type=str, default='./dense_flow/',
                        help='path to the dense_flow toolbox')
    #new_width : Images resize New width 
    parser.add_argument("--new_width", type=int, default=0, help='resize image width')
    #new_height : Images resize New height 
    parser.add_argument("--new_height", type=int, default=0, help='resize image height')
    #num_worker  Number of threads 
    parser.add_argument("--num_worker", type=int, default=8)
    #num_gpu :gpu Number 
    parser.add_argument("--num_gpu", type=int, default=2, help='number of GPU')
    #out_format : Optical flow file format 
    parser.add_argument("--out_format", type=str, default='dir', choices=['dir','zip'],
                        help='path to the dense_flow toolbox')
    #ext : Video file suffix 
    parser.add_argument("--ext", type=str, default='avi', choices=['avi','mp4'],
                        help='video file extensions')
    args = parser.parse_args()
    out_path = args.out_dir
    src_path = args.src_dir
    num_worker = args.num_worker
    df_path = args.df_path
    out_format = args.out_format
    ext = args.ext
    new_size = (args.new_width, args.new_height)
    NUM_GPU = args.num_gpu
    if not os.path.isdir(out_path):
        print("creating folder: "+out_path)
        os.makedirs(out_path)
    # Get all the files of the dataset 
    vid_list = glob.glob(src_path+'/*/*.'+ext)
    # Number of printed files 
    print(len(vid_list))
    pool = Pool(num_worker)
    #zip Pack the list into meta groups , Here, the file name and file index, that is, the category to which the video belongs, are packaged into a meta Group 
    pool.map(run_optical_flow, zip(vid_list, range(len(vid_list))))

2、rgb and flow Training 、 Test data list generation

stay UCF-101 Data set , A data set list file is officially attached , take ucf101 The data set is divided into training set and test set by three methods , Get six data list files testlist01.txt、testlist02.txt、testlist03.txt、trainlist01.txt、trainlist02.txt、trainlist03.txt, The content of the document is category / Video name , Add one more ClassInd（ Describe the label corresponding to the category 1 Start ）.

Because the input of the network is rgb The frame and flow Optical flow , Therefore, the above six video list files need to be converted into rgb and flow List file .build_file_list.py Is used to generate rgb and flow List file . List files for each training set and test set in each division scheme , Calculate the rgb Quantity and sum flow Number , And write it into the file together with the category of the video . In other words, the above 6 Three files are test set and training set files in three different partition forms , Then divide them into rgb and flow, Corresponding generated 12 File .

Implementation process ：
1、parse_ucf101_splits()
Read your own 6 File （ Category / Video name ）, according to ClassInd Find out the category of each video ,
First read the training set and test set under the first partition scheme , Tuples are returned in the training set （ Video name , label ） And in the test set （ Video name , label ）, Then add it to split in , And read the other two division schemes in this order . List of returned Split, The first dimension indicates which partition scheme , The second dimension represents the data set category （ Test set or Training set ）, The third dimension represents ( Video name , label ）.

2、parse_directory(path, rgb_prefix=‘img_’, flow_x_prefix=‘flow_x_’, flow_y_prefix=‘flow_y_’)
According to the input rgb and flow route , Calculate each video rgb and flow The number of , Return a two-dimensional list to represent rgb and flow Number

3、build_split_list(split_tuple, frame_info, split_idx, shuffle=False)
split_tuple The list parsed for the first step , First of all, get the idx The data set of the partition scheme in is split =split_tuple[idx], The training sets split[0] And test set split[1] Find the video in rgb and flow Number , And according to the video name rgb/flow Number The format of the tag is stored as a string .

4、 For the third step , Write in the corresponding file by line , To form 12 Frame list file .

####
# The premise is that video framing and optical flow calculation have been processed , Stored under each video file 
# Create descriptive documents , Describe the number of frames contained in each video file 、 The quantity of optical flow and the category of video 
####
import argparse
import os
import glob
import random
import fnmatch

# The main function is to path Analyze the files under the path rgb Map and flow The number of , Each folder represents a video 
def parse_directory(path, rgb_prefix='img_', flow_x_prefix='flow_x_', flow_y_prefix='flow_y_'):
    # take path Path output 
    print('parse frames under folder {}'.format(path))
    #h obtain path All file paths under the path are at frame_folders in 
    frame_folders = glob.glob(os.path.join(path, '*'))

    # Function function ： Calculate a video rgb,flow_x,flow_y Number of files 
    def count_files(directory, prefix_list):
        # return directory Names of all files or folders in the directory 
        lst = os.listdir(directory)
        # Find all files that meet img_,flow_x,flow_y The number of files exists cnt_list in 
        cnt_list = [len(fnmatch.filter(lst, x+'*')) for x in prefix_list]
        return cnt_list
    rgb_counts = {
    }
    flow_counts = {
    }
    #i For documents in path Index under ,f Is the file path 
    for i,f in enumerate(frame_folders):
        # Find in f In the folder , There are pictures 、 Optical flow x、 Optical flow y Number of files 
        all_cnt = count_files(f, (rgb_prefix, flow_x_prefix, flow_y_prefix))
        # truncation f The last piece of , namely f Name of file 
        k = f.split('/')[-1]
        rgb_counts[k] = all_cnt[0]
        #x、y They are optical flow x、y Two channels in the direction 
        x_cnt = all_cnt[1]
        y_cnt = all_cnt[2]
        if x_cnt != y_cnt:
            raise ValueError('x and y direction have different number of flow images. video: '+f)
        flow_counts[k] = x_cnt
        if i % 200 == 0:
            print('{} videos parsed'.format(i))
    print('frame folder analysis done')
    return rgb_counts, flow_counts

#split_tuple The length of is the number of files described by the location of the training set file , That is to say 3
#frame_info Refers to the rgb Quantity and sum flow Number 
# Return to each video file rgb Number of images and flow Quantity and corresponding category of video 
def build_split_list(split_tuple, frame_info, split_idx, shuffle=False):
    # Find No split_idx Training set files and test set files 
    split = split_tuple[split_idx]
    #set_list Describe the file name and file category 
    def build_set_list(set_list):
        rgb_list, flow_list = list(), list()
        for item in set_list:
            #item[0] For the file name ,item[1] Is the document category 
            rgb_cnt = frame_info[0][item[0]]
            flow_cnt = frame_info[1][item[0]]
            rgb_list.append('{} {} {}\n'.format(item[0], rgb_cnt, item[1]))
            flow_list.append('{} {} {}\n'.format(item[0], flow_cnt, item[1]))
        if shuffle:
            random.shuffle(rgb_list)
            random.shuffle(flow_list)
        return rgb_list, flow_list   
    #split[0] Indicates the file name in the training set + Category ,split[1] Indicates the test set file name + Category 
    # according to split Build training sets rgb and flow And test sets rgb and flow
    train_rgb_list, train_flow_list = build_set_list(split[0])
    test_rgb_list, test_flow_list = build_set_list(split[1])
    return (train_rgb_list, test_rgb_list), (train_flow_list, test_flow_list)

# analysis UCF101 Data sets , Put the name of each file of the training set and the test set id And categories label return 
def parse_ucf101_splits():
    #class_ind  Serial number + Video category 
    class_ind = [x.strip().split() for x in open('ucf101_splits/classInd.txt')]
    # Map video categories to numbers, that is, labels , Expressed as a cell array （ The serial number from 0 Start ）,
    class_mapping = {
    x[1]:int(x[0])-1 for x in class_ind}
    
    # Training set 、 Test set file parsing , Return the file name and category 
    def line2rec(line):
        # Get rid of line The front and back blank spaces are / Separate , Action description + File name 
        items = line.strip().split('/')
        # Get the label of the file 
        label = class_mapping[items[0]]
        # Get the name of the file ,split('.') Is to remove the file suffix 
        vid = items[1].split('.')[0]
        return vid, label
    splits = []
    for i in xrange(1, 4):
        train_list = [line2rec(x) for x in open('ucf101_splits/trainlist{:02d}.txt'.format(i))]
        test_list = [line2rec(x) for x in open('ucf101_splits/testlist{:02d}.txt'.format(i))]
        splits.append((train_list, test_list))
    return splits
    
if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    # Add parameter , The data set defaults to ucf101
    parser.add_argument('--dataset', type=str, default='ucf101', choices=['ucf101', 'hmdb51'])
    #rgb and flow The path of , The default is ./ucf101_frame
    parser.add_argument('--frame_path', type=str, default='./ucf101_frames',
                        help="root directory holding the frames")
    # External list path 
    parser.add_argument('--out_list_path', type=str, default='./settings')
    #rgb File name prefix 
    parser.add_argument('--rgb_prefix', type=str, default='img_',
                        help="prefix of RGB frames")
    #x Direction flow File prefix 
    parser.add_argument('--flow_x_prefix', type=str, default='flow_x',
                        help="prefix of x direction flow images")
    #y Direction flow File prefix 
    parser.add_argument('--flow_y_prefix', type=str, default='flow_y',
                        help="prefix of y direction flow images", )
    # Number of data set segments 
    parser.add_argument('--num_split', type=int, default=3,
                        help="number of split building file list")
    # Whether the data is disturbed 
    parser.add_argument('--shuffle', action='store_true', default=False)
    args = parser.parse_args()
    dataset = args.dataset
    frame_path = args.frame_path
    rgb_p = args.rgb_prefix
    flow_x_p = args.flow_x_prefix
    flow_y_p = args.flow_y_prefix
    num_split = args.num_split
    out_path = args.out_list_path
    shuffle = args.shuffle
    # Get the data set path 
    out_path = os.path.join(out_path,dataset)
    # If the data set path does not exist , Create such a path 
    if not os.path.isdir(out_path):
        print("creating folder: "+out_path)
        os.makedirs(out_path)
    # operation
    print('processing dataset {}'.format(dataset))
    # Parsing data sets , Get the name and category of each file in the dataset 
    if dataset=='ucf101':
        split_tp = parse_ucf101_splits()
    else:
        split_tp = parse_hmdb51_splits()
    # obtain frame_path The number of single frame pictures and optical flow pictures in each video 
    f_info = parse_directory(frame_path, rgb_p, flow_x_p, flow_y_p)
    print('writing list files for training/testing')
    #xrange(m)-----[0,1...m-1]
    for i in xrange(max(num_split, len(split_tp))):
        lists = build_split_list(split_tp, f_info, i, shuffle)
        open(os.path.join(out_path, 'train_rgb_split{}.txt'.format(i + 1)), 'w').writelines(lists[0][0])
        open(os.path.join(out_path, 'val_rgb_split{}.txt'.format(i + 1)), 'w').writelines(lists[0][1])
        open(os.path.join(out_path, 'train_flow_split{}.txt'.format(i + 1)), 'w').writelines(lists[1][0])
        open(os.path.join(out_path, 'val_flow_split{}.txt'.format(i + 1)), 'w').writelines(lists[1][1])

3、ucf101 Data sets

Realization ：
1、 First, the data set is parsed into a list of video categories and a ( video , label ) tuples . Then, according to the input specified frame list file, its -----> List[ Video path , Number of frames , Video category ].

2、getitem() function ：segment For a video rgb perhaps flow take segment individual . First, or the video （ Video path , Number duration, Video category ）, Then according to segment, every other duration/segment Take a rgb Figure or flow, If it is a training set, then randomly take , If it is a test set, take the middle every time . Preprocess the obtained pictures （ Enhanced features ）, Return the retrieved segment A flow , And the corresponding label .
attach ： The training set is randomly selected , This ensures that the pictures are different every time , Thus, the training model can be more accurate , The test set needs to be the same every time , Because it is used for testing , Only by testing different models with the same picture can there be comparability .

This file cannot be run alone , Is used in the main function , Data import during model training .

import torch.utils.data as data

import os
import sys
import random
import numpy as np
import cv2

# Given data set path , Returns the video category and a tuple of video category and label 
def find_classes(dir):
    classes = [d for d in os.listdir(dir) if os.path.isdir(os.path.join(dir, d))]
    classes.sort()
    class_to_idx = {
    classes[i]: i for i in range(len(classes))}
    return classes, class_to_idx

def make_dataset(root, source):
    if not os.path.exists(source):
        print("Setting file %s for ucf101 dataset doesn't exist." % (source))
        sys.exit()
    else:
        clips = []
        with open(source) as split_f:
            data = split_f.readlines()
            for line in data:
                line_info = line.split()
                # Video path 
                clip_path = os.path.join(root, line_info[0])
                # Number of next frames of video 
                duration = int(line_info[1])
                # Video category 
                target = int(line_info[2])
                item = (clip_path, duration, target)
                clips.append(item)
    return clips

#path  Video path 
def ReadSegmentRGB(path, offsets, new_height, new_width, new_length, is_color, name_pattern):
    if is_color:
        cv_read_flag = cv2.IMREAD_COLOR         # > 0
    else:
        cv_read_flag = cv2.IMREAD_GRAYSCALE     # = 0
    interpolation = cv2.INTER_LINEAR

    sampled_list = []
    for offset_id in range(len(offsets)):
        offset = offsets[offset_id]
        for length_id in range(1, new_length+1):
            frame_name = name_pattern % (length_id + offset)
            frame_path = path + "/" + frame_name
            cv_img_origin = cv2.imread(frame_path, cv_read_flag)
            if cv_img_origin is None:
                print("Could not load file %s" % (frame_path))
                sys.exit()
               # TODO: error handling here
            if new_width > 0 and new_height > 0:
                # use OpenCV3, use OpenCV2.4.13 may have error
                cv_img = cv2.resize(cv_img_origin, (new_width, new_height), interpolation)
            else:
                cv_img = cv_img_origin
            cv_img = cv2.cvtColor(cv_img, cv2.COLOR_BGR2RGB)
            sampled_list.append(cv_img)
    clip_input = np.concatenate(sampled_list, axis=2)
    return clip_input

def ReadSegmentFlow(path, offsets, new_height, new_width, new_length, is_color, name_pattern):
    if is_color:
        cv_read_flag = cv2.IMREAD_COLOR         # > 0
    else:
        cv_read_flag = cv2.IMREAD_GRAYSCALE     # = 0
    interpolation = cv2.INTER_LINEAR

    sampled_list = []
    for offset_id in range(len(offsets)):
        offset = offsets[offset_id]
        for length_id in range(1, new_length+1):
            frame_name_x = name_pattern % ("x", length_id + offset)
            frame_path_x = path + "/" + frame_name_x
            cv_img_origin_x = cv2.imread(frame_path_x, cv_read_flag)
            frame_name_y = name_pattern % ("y", length_id + offset)
            frame_path_y = path + "/" + frame_name_y
            cv_img_origin_y = cv2.imread(frame_path_y, cv_read_flag)
            if cv_img_origin_x is None or cv_img_origin_y is None:
                print("Could not load file %s or %s" % (frame_path_x, frame_path_y))
                sys.exit()
               # TODO: error handling here
            if new_width > 0 and new_height > 0:
                cv_img_x = cv2.resize(cv_img_origin_x, (new_width, new_height), interpolation)
                cv_img_y = cv2.resize(cv_img_origin_y, (new_width, new_height), interpolation)
            else:
                cv_img_x = cv_img_origin_x
                cv_img_y = cv_img_origin_y
            sampled_list.append(np.expand_dims(cv_img_x, 2))
            sampled_list.append(np.expand_dims(cv_img_y, 2))
    clip_input = np.concatenate(sampled_list, axis=2)
    return clip_input
    
class ucf101(data.Dataset):   
    def __init__(self,
                 root, #root  Dataset path 
                 source,  #source  Dataset settings file 
                 phase,  #phase  Set keywords in the file (train val)
                 modality,#modality  Data form （rgb、flow）
                 name_pattern=None, #name_pattern  File format 
                 is_color=True,
                 num_segments=1, #num_segments  Number of documents 
                 new_length=1, #new_length  Number of frames 
                 new_width=0,
                 new_height=0,
                 transform=None,
                 target_transform=None,
                 video_transform=None):
        #class  Video name  class_to_index  Dictionaries   name ： Number 
        classes, class_to_idx = find_classes(root)        
        # Get each video in the setup file  -----> List[ Video path , Number of frames , Video category ]
        clips = make_dataset(root, source)
        if len(clips) == 0:
            raise(RuntimeError("Found 0 video clips in subfolders of: " + root + "\n"
                               "Check your data directory."))
        self.root = root
        self.source = source
        self.phase = phase
        self.modality = modality
        self.classes = classes
        self.class_to_idx = class_to_idx
        self.clips = clips
        if name_pattern:
            self.name_pattern = name_pattern
        else:
            if self.modality == "rgb":
                self.name_pattern = "img_%05d.jpg"
            elif self.modality == "flow":
                self.name_pattern = "flow_%s_%05d.jpg"
        self.is_color = is_color
        self.num_segments = num_segments
        self.new_length = new_length
        self.new_width = new_width
        self.new_height = new_height
        self.transform = transform
        self.target_transform = target_transform
        self.video_transform = video_transform
    # Return a frame in the video 
    def __getitem__(self, index):
        # All data in a video file ： route 、 Number 、 Category 
        path, duration, target = self.clips[index]
        average_duration = int(duration / self.num_segments)
        offsets = []
        #num_segment  Divide frames into num_segment block 
        #Train Take one randomly from each piece ,Val Take the middle frame  offset Record the number of the frame 
        for seg_id in range(self.num_segments):
            if self.phase == "train":
                if average_duration >= self.new_length:
                    offset = random.randint(0, average_duration - self.new_length)
                    # No +1 because randint(a,b) return a random integer N such that a <= N <= b.
                    offsets.append(offset + seg_id * average_duration)
                else:
                    offsets.append(0)
            elif self.phase == "val":
                if average_duration >= self.new_length:
                    offsets.append(int((average_duration - self.new_length + 1)/2 + seg_id * average_duration))
                else:
                    offsets.append(0)
            else:
                print("Only phase train and val are supported.")
        if self.modality == "rgb":
            clip_input = ReadSegmentRGB(path,
                                        offsets,
                                        self.new_height,
                                        self.new_width,
                                        self.new_length,
                                        self.is_color,
                                        self.name_pattern
                                        )
        elif self.modality == "flow":
            clip_input = ReadSegmentFlow(path,
                                        offsets,
                                        self.new_height,
                                        self.new_width,
                                        self.new_length,
                                        self.is_color,
                                        self.name_pattern
                                        )
        else:
            print("No such modality %s" % (self.modality))
        #clip_input  Sample frames under video 、resize、 Graying 
        # Then preprocess the picture 
        if self.transform is not None:
            clip_input = self.transform(clip_input)
         #target It is label processing , Because here target It's a number , You may need to convert it into a vector 
        if self.target_transform is not None:
            target = self.target_transform(target)
        if self.video_transform is not None:
            clip_input = self.video_transform(clip_input)
        # Return to network input , Category 
        return clip_input, target
    def __len__(self):
        return len(self.clips)