当前位置:网站首页>TensorRTx-YOLOv5工程解读(二)
TensorRTx-YOLOv5工程解读(二)
2022-08-04 05:24:00 【单胖】
TensorRTx-YOLOv5工程解读(二)
具体Workflow
if (!wts_name.empty()) {
IHostMemory* modelStream{
nullptr };
APIToModel(BATCH_SIZE, &modelStream, is_p6, gd, gw, wts_name);
assert(modelStream != nullptr);
std::ofstream p(engine_name, std::ios::binary);
if (!p) {
std::cerr << "could not open plan output file" << std::endl;
return -1;
}
p.write(reinterpret_cast<const char*>(modelStream->data()), modelStream->size());
modelStream->destroy();
return 0;
}
在加载完毕权重之后,建立IHostMemory*
类型的变量modelStream
,并将构建好的engine保存在本地磁盘之中。
之所以要把engine文件保存在本地,是因为如果每次运行程序之前都要重新制作一遍engine,整个过程将会非常的缓慢。而具体的engine制作工艺的流程就在函数APItoModel
之内。因此具体分析一下APItoModel
。
void APIToModel(unsigned int maxBatchSize, IHostMemory** modelStream, bool& is_p6, float& gd, float& gw, std::string& wts_name) {
// Create builder
IBuilder* builder = createInferBuilder(gLogger);
IBuilderConfig* config = builder->createBuilderConfig();
// Create model to populate the network, then set the outputs and create an engine
ICudaEngine *engine = nullptr;
if (is_p6) {
engine = build_engine_p6(maxBatchSize, builder, config, DataType::kFLOAT, gd, gw, wts_name);
} else {
engine = build_engine(maxBatchSize, builder, config, DataType::kFLOAT, gd, gw, wts_name);
}
assert(engine != nullptr);
// Serialize the engine
(*modelStream) = engine->serialize();
// Close everything down
engine->destroy();
builder->destroy();
config->destroy();
}
APItoModel
中的就是engine的一般传统工艺。首先声明IBuilder*
类型的builder
,后面很多组件都会需要builder来build之。接着声明IBuilderConfig*
类型的config
,使用builder
中的方法createBuilderConfig
来创建,顾名思义,就是创建了一个build过程中的配置变量。
然后声明一个ICudaEngine*
类型的engine
变量。这个变量初始化为空指针即可。
接着就是调用build_engine()
函数去创建engine
,具体build_engine
函数在下面分析。拿到engine后,将其序列化,赋值给*modelStream
。engine在序列化后才能够保存到本地。
回到主函数,创建一个std::ofstream
的对象p,并用p.write()
函数保存modelStream->data()
和modelStream->size()
,再destroy()
modelStream。
回过头来看build_engine()
函数。
ICudaEngine* build_engine(unsigned int maxBatchSize, IBuilder* builder, IBuilderConfig* config, DataType dt, float& gd, float& gw, std::string& wts_name) {
INetworkDefinition* network = builder->createNetworkV2(0U);
// Create input tensor of shape {3, INPUT_H, INPUT_W} with name INPUT_BLOB_NAME
ITensor* data = network->addInput(INPUT_BLOB_NAME, dt, Dims3{
3, INPUT_H, INPUT_W });
assert(data);
std::map<std::string, Weights> weightMap = loadWeights(wts_name);
/* ------ yolov5 backbone------ */
auto focus0 = focus(network, weightMap, *data, 3, get_width(64, gw), 3, "model.0");
auto conv1 = convBlock(network, weightMap, *focus0->getOutput(0), get_width(128, gw), 3, 2, 1, "model.1");
auto bottleneck_CSP2 = C3(network, weightMap, *conv1->getOutput(0), get_width(128, gw), get_width(128, gw), get_depth(3, gd), true, 1, 0.5, "model.2");
auto conv3 = convBlock(network, weightMap, *bottleneck_CSP2->getOutput(0), get_width(256, gw), 3, 2, 1, "model.3");
auto bottleneck_csp4 = C3(network, weightMap, *conv3->getOutput(0), get_width(256, gw), get_width(256, gw), get_depth(9, gd), true, 1, 0.5, "model.4");
auto conv5 = convBlock(network, weightMap, *bottleneck_csp4->getOutput(0), get_width(512, gw), 3, 2, 1, "model.5");
auto bottleneck_csp6 = C3(network, weightMap, *conv5->getOutput(0), get_width(512, gw), get_width(512, gw), get_depth(9, gd), true, 1, 0.5, "model.6");
auto conv7 = convBlock(network, weightMap, *bottleneck_csp6->getOutput(0), get_width(1024, gw), 3, 2, 1, "model.7");
auto spp8 = SPP(network, weightMap, *conv7->getOutput(0), get_width(1024, gw), get_width(1024, gw), 5, 9, 13, "model.8");
/* ------ yolov5 head ------ */
auto bottleneck_csp9 = C3(network, weightMap, *spp8->getOutput(0), get_width(1024, gw), get_width(1024, gw), get_depth(3, gd), false, 1, 0.5, "model.9");
auto conv10 = convBlock(network, weightMap, *bottleneck_csp9->getOutput(0), get_width(512, gw), 1, 1, 1, "model.10");
auto upsample11 = network->addResize(*conv10->getOutput(0));
assert(upsample11);
upsample11->setResizeMode(ResizeMode::kNEAREST);
upsample11->setOutputDimensions(bottleneck_csp6->getOutput(0)->getDimensions());
ITensor* inputTensors12[] = {
upsample11->getOutput(0), bottleneck_csp6->getOutput(0) };
auto cat12 = network->addConcatenation(inputTensors12, 2);
auto bottleneck_csp13 = C3(network, weightMap, *cat12->getOutput(0), get_width(1024, gw), get_width(512, gw), get_depth(3, gd), false, 1, 0.5, "model.13");
auto conv14 = convBlock(network, weightMap, *bottleneck_csp13->getOutput(0), get_width(256, gw), 1, 1, 1, "model.14");
auto upsample15 = network->addResize(*conv14->getOutput(0));
assert(upsample15);
upsample15->setResizeMode(ResizeMode::kNEAREST);
upsample15->setOutputDimensions(bottleneck_csp4->getOutput(0)->getDimensions());
ITensor* inputTensors16[] = {
upsample15->getOutput(0), bottleneck_csp4->getOutput(0) };
auto cat16 = network->addConcatenation(inputTensors16, 2);
auto bottleneck_csp17 = C3(network, weightMap, *cat16->getOutput(0), get_width(512, gw), get_width(256, gw), get_depth(3, gd), false, 1, 0.5, "model.17");
/* ------ detect ------ */
IConvolutionLayer* det0 = network->addConvolutionNd(*bottleneck_csp17->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{
1, 1 }, weightMap["model.24.m.0.weight"], weightMap["model.24.m.0.bias"]);
auto conv18 = convBlock(network, weightMap, *bottleneck_csp17->getOutput(0), get_width(256, gw), 3, 2, 1, "model.18");
ITensor* inputTensors19[] = {
conv18->getOutput(0), conv14->getOutput(0) };
auto cat19 = network->addConcatenation(inputTensors19, 2);
auto bottleneck_csp20 = C3(network, weightMap, *cat19->getOutput(0), get_width(512, gw), get_width(512, gw), get_depth(3, gd), false, 1, 0.5, "model.20");
IConvolutionLayer* det1 = network->addConvolutionNd(*bottleneck_csp20->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{
1, 1 }, weightMap["model.24.m.1.weight"], weightMap["model.24.m.1.bias"]);
auto conv21 = convBlock(network, weightMap, *bottleneck_csp20->getOutput(0), get_width(512, gw), 3, 2, 1, "model.21");
ITensor* inputTensors22[] = {
conv21->getOutput(0), conv10->getOutput(0) };
auto cat22 = network->addConcatenation(inputTensors22, 2);
auto bottleneck_csp23 = C3(network, weightMap, *cat22->getOutput(0), get_width(1024, gw), get_width(1024, gw), get_depth(3, gd), false, 1, 0.5, "model.23");
IConvolutionLayer* det2 = network->addConvolutionNd(*bottleneck_csp23->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{
1, 1 }, weightMap["model.24.m.2.weight"], weightMap["model.24.m.2.bias"]);
auto yolo = addYoLoLayer(network, weightMap, "model.24", std::vector<IConvolutionLayer*>{
det0, det1, det2});
yolo->getOutput(0)->setName(OUTPUT_BLOB_NAME);
network->markOutput(*yolo->getOutput(0));
// Build engine
builder->setMaxBatchSize(maxBatchSize);
config->setMaxWorkspaceSize(16 * (1 << 20)); // 16MB
#if defined(USE_FP16)
config->setFlag(BuilderFlag::kFP16);
#elif defined(USE_INT8)
std::cout << "Your platform support int8: " << (builder->platformHasFastInt8() ? "true" : "false") << std::endl;
assert(builder->platformHasFastInt8());
config->setFlag(BuilderFlag::kINT8);
Int8EntropyCalibrator2* calibrator = new Int8EntropyCalibrator2(1, INPUT_W, INPUT_H, "./coco_calib/", "int8calib.table", INPUT_BLOB_NAME);
config->setInt8Calibrator(calibrator);
#endif
std::cout << "Building engine, please wait for a while..." << std::endl;
ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config);
std::cout << "Build engine successfully!" << std::endl;
// Don't need the network any more
network->destroy();
// Release host memory
for (auto& mem : weightMap)
{
free((void*)(mem.second.values));
}
return engine;
}
这个函数很明显是在手写YOLOv5的network结构。首先先声明INetworkDefinition*
类型的变量network
,此处也是需要使用builder->createNetworkV2()
来创建。再创建网络的输入张量,即声明ITensor*
类型的data
变量,由network->addInput
来创建即可。这个输入张量需要在函数中指定输入张量的大小。
接下来就是定义YOLOv5的各个层。自定义层稍后做分析。
网络搭建完成后,通过builder->setMaxBatchSize()
设置最大Batch的大小,再使用config->setMaxWorkspaceSize()
设置每一层在实现时最大需要多少临时工作空间。最后使用config->setFlag
设置推理的数据类型,一般设置成kFP16
,不少平台不支持int8。
设置完毕后,在builder
中调用builder->buildEngineWithConfig(*network, *config);
即可完成engine的创建,再将netwok销毁,并释放内存空间。
边栏推荐
猜你喜欢
3面头条,花7天整理了面试题和学习笔记,已正式入职半个月
编程大杂烩(三)
自动化测试的成本高效果差,那么自动化测试的意义在哪呢?
嵌入式系统驱动初级【4】——字符设备驱动基础下_并发控制
企业需要知道的5个 IAM 最佳实践
Turn: Management is the love of possibility, and managers must have the courage to break into the unknown
Get the selected content of the radio box
想低成本保障软件安全?5大安全任务值得考虑
Can 't connect to MySQL server on' localhost3306 '(10061) simple solutions
Do you think border-radius is just rounded corners?【Various angles】
随机推荐
npm init [email protected] 构建项目报错SyntaxError: Unexpected token ‘.‘解决办法
C1认证之web基础知识及习题——我的学习笔记
CentOS7 - yum install mysql
[SemiDrive source code analysis] [MailBox inter-core communication] 47 - Analysis of RPMSG_IPCC_RPC mode limit size of single transmission and limit bandwidth test
嵌入式系统驱动初级【4】——字符设备驱动基础下_并发控制
MySQL date functions
C专家编程 第5章 对链接的思考 5.3 函数库链接的5个特殊秘密
Will the 2023 PMP exam use the new version of the textbook?Reply is here!
DP4398:国产兼容替代CS4398立体声24位/192kHz音频解码芯片
MySql数据恢复方法个人总结
有趣的 Kotlin 0x0E:DeepRecursiveFunction
数的划分之动态规划
C专家编程 第4章 令人震惊的事实:数组和指针并不相同 4.2 我的代码为什么无法运行
C专家编程 第5章 对链接的思考 5.1 函数库、链接和载入
8大软件供应链攻击事件概述
力扣:62.不同路径
Plus版SBOM:流水线物料清单PBOM
DataTable uses Linq for grouping and summarization, and converts the Linq result set into DataTable
力扣:63. 不同路径 II
7、特殊SQL的执行