当前位置:网站首页>"Iruntime": undeclared identifier
"Iruntime": undeclared identifier
2022-07-24 21:36:00 【AI vision netqi】
“IRuntime”: undeclared identifier
Complete usage :
TensorRT series (1) Model reasoning _ Blog of torrent source -CSDN Blog _tensorrt Reasoning
// tensorRT include
#include <NvInfer.h>
#include <NvInferRuntime.h>
// cuda include
#include <cuda_runtime.h>
// system include
#include <stdio.h>
#include <math.h>
#include <iostream>
#include <fstream>
#include <vector>
using namespace std;
// The code from the previous section
class TRTLogger : public nvinfer1::ILogger
{
public:
virtual void log(Severity severity, nvinfer1::AsciiChar const* msg) noexcept override
{
if(severity <= Severity::kINFO)
{
printf("%d: %s\n", severity, msg);
}
}
} logger;
nvinfer1::Weights make_weights(float* ptr, int n)
{
nvinfer1::Weights w;
w.count = n;
w.type = nvinfer1::DataType::kFLOAT;
w.values = ptr;
return w;
}
bool build_model()
{
TRTLogger logger;
// This is the basic component
nvinfer1::IBuilder* builder = nvinfer1::createInferBuilder(logger);
nvinfer1::IBuilderConfig* config = builder->createBuilderConfig();
nvinfer1::INetworkDefinition* network = builder->createNetworkV2(1);
// Build a model
/*
Network definition:
image
|
linear (fully connected) input = 3, output = 2, bias = True w=[[1.0, 2.0, 0.5], [0.1, 0.2, 0.5]], b=[0.3, 0.8]
|
sigmoid
|
prob
*/
const int num_input = 3;
const int num_output = 2;
float layer1_weight_values[] = {1.0, 2.0, 0.5, 0.1, 0.2, 0.5};
float layer1_bias_values[] = {0.3, 0.8};
nvinfer1::ITensor* input = network->addInput("image", nvinfer1::DataType::kFLOAT, nvinfer1::Dims4(1, num_input, 1, 1));
nvinfer1::Weights layer1_weight = make_weights(layer1_weight_values, 6);
nvinfer1::Weights layer1_bias = make_weights(layer1_bias_values, 2);
auto layer1 = network->addFullyConnected(*input, num_output, layer1_weight, layer1_bias);
auto prob = network->addActivation(*layer1->getOutput(0), nvinfer1::ActivationType::kSIGMOID);
// Will be what we need prob Mark as output
network->markOutput(*prob->getOutput(0));
printf("Workspace Size = %.2f MB\n", (1 << 28) / 1024.0f / 1024.0f);
config->setMaxWorkspaceSize(1 << 28);
builder->setMaxBatchSize(1);
nvinfer1::ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config);
if(engine == nullptr)
{
printf("Build engine failed.\n");
return false;
}
// Serialize the model , And save as a file
nvinfer1::IHostMemory* model_data = engine->serialize();
FILE* f = fopen("engine.trtmodel", "wb");
fwrite(model_data->data(), 1, model_data->size(), f);
fclose(f);
// The unloading order is in reverse order of the construction order
model_data->destroy();
engine->destroy();
network->destroy();
config->destroy();
builder->destroy();
printf("Done.\n");
return true;
}
vector<unsigned char> load_file(const string& file)
{
ifstream in(file, ios::in | ios::binary);
if (!in.is_open())
return {};
in.seekg(0, ios::end);
size_t length = in.tellg();
std::vector<uint8_t> data;
if (length > 0){
in.seekg(0, ios::beg);
data.resize(length);
in.read((char*)&data[0], length);
}
in.close();
return data;
}
void inference(){
// ------------------------------ 1. Prepare the model and load ----------------------------
TRTLogger logger;
auto engine_data = load_file("engine.trtmodel");
// Before executing reasoning , You need to create an inferential runtime Interface instance . And builer equally ,runtime need logger:
nvinfer1::IRuntime* runtime = nvinfer1::createInferRuntime(logger);
// Read the model from to engine_data in , You can deserialize it to get engine
nvinfer1::ICudaEngine* engine = runtime->deserializeCudaEngine(engine_data.data(), engine_data.size());
if(engine == nullptr){
printf("Deserialize cuda engine failed.\n");
runtime->destroy();
return;
}
nvinfer1::IExecutionContext* execution_context = engine->createExecutionContext();
cudaStream_t stream = nullptr;
// establish CUDA flow , To make sure that batch Your reasoning is independent
cudaStreamCreate(&stream);
/*
Network definition:
image
|
linear (fully connected) input = 3, output = 2, bias = True w=[[1.0, 2.0, 0.5], [0.1, 0.2, 0.5]], b=[0.3, 0.8]
|
sigmoid
|
prob
*/
// ------------------------------ 2. Prepare the data for reasoning and move it to GPU ----------------------------
float input_data_host[] = {1, 2, 3};
float* input_data_device = nullptr;
float output_data_host[2];
float* output_data_device = nullptr;
cudaMalloc(&input_data_device, sizeof(input_data_host));
cudaMalloc(&output_data_device, sizeof(output_data_host));
cudaMemcpyAsync(input_data_device, input_data_host, sizeof(input_data_host), cudaMemcpyHostToDevice, stream);
// Specify... With an array of pointers input and output stay gpu The pointer in .
float* bindings[] = {input_data_device, output_data_device};
// ------------------------------ 3. Reason and carry the results back to CPU ----------------------------
bool success = execution_context->enqueueV2((void**)bindings, stream, nullptr);
cudaMemcpyAsync(output_data_host, output_data_device, sizeof(output_data_host), cudaMemcpyDeviceToHost, stream);
cudaStreamSynchronize(stream);
printf("output_data_host = %f, %f\n", output_data_host[0], output_data_host[1]);
// ------------------------------ 4. Free memory ----------------------------
printf("Clean memory\n");
cudaStreamDestroy(stream);
execution_context->destroy();
engine->destroy();
runtime->destroy();
// ------------------------------ 5. Manual reasoning for verification ----------------------------
const int num_input = 3;
const int num_output = 2;
float layer1_weight_values[] = {1.0, 2.0, 0.5, 0.1, 0.2, 0.5};
float layer1_bias_values[] = {0.3, 0.8};
printf(" Manually verify the calculation results :\n");
for(int io = 0; io < num_output; ++io)
{
float output_host = layer1_bias_values[io];
for(int ii = 0; ii < num_input; ++ii)
{
output_host += layer1_weight_values[io * num_input + ii] * input_data_host[ii];
}
// sigmoid
float prob = 1 / (1 + exp(-output_host));
printf("output_prob[%d] = %f\n", io, prob);
}
}
int main()
{
if(!build_model())
{
return -1;
}
inference();
return 0;
}
Link to the original text :https://blog.csdn.net/weicao1990/article/details/125034572
makefile:
cc := g++
name := pro
workdir := workspace
srcdir := src
objdir := objs
stdcpp := c++11
cuda_home := /home/liuhongyuan/miniconda3/envs/trtpy/lib/python3.8/site-packages/trtpy/trt8cuda112cudnn8
syslib := /home/liuhongyuan/miniconda3/envs/trtpy/lib/python3.8/site-packages/trtpy/lib
cpp_pkg := /home/liuhongyuan/miniconda3/envs/trtpy/lib/python3.8/site-packages/trtpy/cpp-packages
cuda_arch :=
nvcc := $(cuda_home)/bin/nvcc -ccbin=$(cc)
# Definition cpp Path lookup and dependencies for mk file
cpp_srcs := $(shell find $(srcdir) -name "*.cpp")
cpp_objs := $(cpp_srcs:.cpp=.cpp.o)
cpp_objs := $(cpp_objs:$(srcdir)/%=$(objdir)/%)
cpp_mk := $(cpp_objs:.cpp.o=.cpp.mk)
# Definition cu File path lookup and dependencies mk file
cu_srcs := $(shell find $(srcdir) -name "*.cu")
cu_objs := $(cu_srcs:.cu=.cu.o)
cu_objs := $(cu_objs:$(srcdir)/%=$(objdir)/%)
cu_mk := $(cu_objs:.cu.o=.cu.mk)
# Definition opencv and cuda Library files needed
link_cuda := cudart cudnn
link_trtpro :=
link_tensorRT := nvinfer
link_opencv :=
link_sys := stdc++ dl
link_librarys := $(link_cuda) $(link_tensorRT) $(link_sys) $(link_opencv)
# Define the header file path , Please note that there must be no spaces behind the slash
# Just write the path , No need to write -I
include_paths := src \
$(cuda_home)/include/cuda \
$(cuda_home)/include/tensorRT \
$(cpp_pkg)/opencv4.2/include
# Define library file path , Just write the path , No need to write -L
library_paths := $(cuda_home)/lib64 $(syslib) $(cpp_pkg)/opencv4.2/lib
# hold library path To splice into a string , for example a b c => a:b:c
# And then make LD_LIBRARY_PATH=a:b:c
empty :=
library_path_export := $(subst $(empty) $(empty),:,$(library_paths))
# Concatenate the library path and header file path to form a , Batch automatic addition -I、-L、-l
run_paths := $(foreach item,$(library_paths),-Wl,-rpath=$(item))
include_paths := $(foreach item,$(include_paths),-I$(item))
library_paths := $(foreach item,$(library_paths),-L$(item))
link_librarys := $(foreach item,$(link_librarys),-l$(item))
# If it's another graphics card , Please amend -gencode=arch=compute_75,code=sm_75 For the ability of the corresponding graphics card
# The corresponding number of the graphics card is shown here :https://developer.nvidia.com/zh-cn/cuda-gpus#compute
# If it is jetson nano, Hint not found -m64 Instructions , Please delete it -m64 Options . It doesn't affect the result
cpp_compile_flags := -std=$(stdcpp) -w -g -O0 -m64 -fPIC -fopenmp -pthread
cu_compile_flags := -std=$(stdcpp) -w -g -O0 -m64 $(cuda_arch) -Xcompiler "$(cpp_compile_flags)"
link_flags := -pthread -fopenmp -Wl,-rpath='$$ORIGIN'
cpp_compile_flags += $(include_paths)
cu_compile_flags += $(include_paths)
link_flags += $(library_paths) $(link_librarys) $(run_paths)
# If the header file is modified , The instructions here allow him to automatically compile the dependent cpp perhaps cu file
ifneq ($(MAKECMDGOALS), clean)
-include $(cpp_mk) $(cu_mk)
endif
$(name) : $(workdir)/$(name)
all : $(name)
run : $(name)
@cd $(workdir) && ./$(name) $(run_args)
$(workdir)/$(name) : $(cpp_objs) $(cu_objs)
@echo Link [email protected]
@mkdir -p $(dir [email protected])
@$(cc) $^ -o [email protected] $(link_flags)
$(objdir)/%.cpp.o : $(srcdir)/%.cpp
@echo Compile CXX $<
@mkdir -p $(dir [email protected])
@$(cc) -c $< -o [email protected] $(cpp_compile_flags)
$(objdir)/%.cu.o : $(srcdir)/%.cu
@echo Compile CUDA $<
@mkdir -p $(dir [email protected])
@$(nvcc) -c $< -o [email protected] $(cu_compile_flags)
# compile cpp Dependencies , Generate mk file
$(objdir)/%.cpp.mk : $(srcdir)/%.cpp
@echo Compile depends C++ $<
@mkdir -p $(dir [email protected])
@$(cc) -M $< -MF [email protected] -MT $(@:.cpp.mk=.cpp.o) $(cpp_compile_flags)
# compile cu File Dependencies , Generate cumk file
$(objdir)/%.cu.mk : $(srcdir)/%.cu
@echo Compile depends CUDA $<
@mkdir -p $(dir [email protected])
@$(nvcc) -M $< -MF [email protected] -MT $(@:.cu.mk=.cu.o) $(cu_compile_flags)
# Define cleanup instructions
clean :
@rm -rf $(objdir) $(workdir)/$(name) $(workdir)/*.trtmodel
# Prevent symbols from being treated as files
.PHONY : clean run $(name)
# Export dependent library path , Make it possible to run
export LD_LIBRARY_PATH:=$(library_path_export)
Link to the original text :https://blog.csdn.net/weicao1990/article/details/125034572
Focus on refining :
1. You have to use createNetworkV2, And designated as 1( Indicates dominant batch),createNetwork Has been abandoned , non-overt batch The government does not recommend , This method directly affects reasoning enqueue still enqueueV2;
2. builder、config Equal pointer , Remember to release , Otherwise, there will be a memory leak , Use ptr->destroy() Release ;
3. markOutput Represents the output node of the model ,mark A few times , There are several outputs ,addInput A few times there are a few inputs ;
4. workspaceSize Is the size of the workspace , some layer When additional storage is needed , Don't allocate space by yourself , But for memory reuse , Direct search tensorRT want workspace Space ;
5. Remember that , The saved model can only be adapted to compile time trt edition 、 Device specified at compile time , It can only be guaranteed to be optimal under this configuration . If you use trt Execute across different devices , Sometimes you can run , But not the best , Also do not recommend ;
6. bindings yes tensorRT A description of the input-output tensor ,bindings = input-tensor + output-tensor. such as input Yes a,output Yes b, c, d, that bindings = [a, b, c, d],bindings[0] = a,bindings[2] = c;
7. enqueueV2 It's asynchronous reasoning , Add to stream Queue waiting to execute . Input bindings It is tensors The pointer to ( Note that device pointer);
8. createExecutionContext It can be executed multiple times , Allow an engine to have multiple execution contexts .
————————————————
Copyright notice : This paper is about CSDN Blogger 「 The source of the torrent 」 The original article of , follow CC 4.0 BY-SA Copyright agreement , For reprint, please attach the original source link and this statement .
Link to the original text :https://blog.csdn.net/weicao1990/article/details/125034572
边栏推荐
- How to prevent weight under Gao Bingfa?
- 图像处理笔记(1)图像增强
- Drawing library Matplotlib styles and styles
- MySQL - multi table query - seven join implementations, set operations, multi table query exercises
- [blind box app mall system] function introduction after online unpacking
- How to realize three schemes of finclip wechat authorized login
- PR 2022 22.5 Chinese version
- Which bank outlet in Zhejiang can buy ETF fund products?
- HSPF (hydraulic simulation program FORTRAN) model
- Node installation using NVM succeeded, but NPM installation failed (error while downloading, TLS handshake timeout)
猜你喜欢

陈春花与莫言,都有苦难言

Little Red Book Keyword Search commodity list API interface (commodity detail page API interface)

C # image template matching and marking

驱动子系统开发
![[Development Tutorial 4] open source Bluetooth heart rate waterproof sports Bracelet - external flash reading and writing](/img/cf/db40dcbb141c5a2e8ccaed7700fa3e.png)
[Development Tutorial 4] open source Bluetooth heart rate waterproof sports Bracelet - external flash reading and writing

One bite of Stream(7)

Case analysis of building cross department communication system on low code platform

Documentary of the second senior brother

Leetcode skimming -- bit by bit record 017

Uniqueness and ordering in set
随机推荐
图像处理笔记(1)图像增强
Intranet penetration learning (I) introduction to Intranet
String matching (Huawei)
Drive subsystem development
How to gracefully realize regular backup of MySQL database (glory Collection Edition)
支付宝上股票开户安全吗
Static & dynamic & file address book
What are the source database types supported by Alibaba cloud DTS?
Little Red Book Keyword Search commodity list API interface (commodity detail page API interface)
“IRuntime”: 未声明的标识符
Smarter! Airiot accelerates the upgrading of energy conservation and emission reduction in the coal industry
Experience of using dump file to reverse locate crash location
Binary search
Summary of yarn Explorer
Bring new people's experience
Penetration test - command execution injection
Codeforces Round #809 (Div. 2)(A~D2)
Makefile基础知识--扩展
What problems should be paid attention to when using a database without public ip: port?
Detailed OSPF configuration of layer 3 switch / router [Huawei ENSP experiment]