当前位置:网站首页>Cann operator: using iterators to efficiently realize tensor data cutting and blocking processing
Cann operator: using iterators to efficiently realize tensor data cutting and blocking processing
2022-07-04 19:53:00 【InfoQ】
Mission scenarios and objectives
Conventional scheme :
Prepare knowledge and analysis
1. step
2. iterator
template <typename T>
class PositionIterator {
public:
PositionIterator(){};
~PositionIterator(){};
PositionIterator(std::vector<T> stt, std::vector<T> sh) {
if (stt.size() != sh.size()) {
PositionIterator();
} else {
for (unsigned int i = 0; i < sh.size(); i++) {
if (stt[i] >= sh[i]) {
PositionIterator();
}
}
pos_ = stt;
shape_ = sh;
}
}
PositionIterator operator++() {
pos_[shape_.size() - 1] += 1;
for (unsigned int i = shape_.size() - 1; i > 0; i--) {
if (pos_[i] / shape_[i] != 0) {
pos_[i - 1] += pos_[i] / shape_[i];
pos_[i] = pos_[i] % shape_[i];
}
}
return *this;
}
bool End() {
if (pos_[0] != shape_[0]) {
return false;
}
return true;
}
std::vector<T> GetPos() { return pos_; }
std::vector<T> GetShape() { return shape_; }
private:
std::vector<T> pos_;
std::vector<T> shape_;
};
Diagonal The realization of operators
template <typename T>
uint32_t DiagonalCpuKernel::DoComputeType(CpuKernelContext &ctx,
const int64_t &offset,
const int64_t &dim1,
const int64_t &dim2) {
// Get the inuput and output
Tensor *input_x = ctx.Input(0);
Tensor *y = ctx.Output(0);
// Get some information of input
auto x_shape = input_x->GetTensorShape();
std::vector<int64_t> x_shape_ = x_shape->GetDimSizes();
const int64_t x_dim = x_shape->GetDims();
auto dataptr = reinterpret_cast<T *>(ctx.Input(0)->GetData());
auto y_dataptr = reinterpret_cast<T *>(y->GetData());
// Compute
// First, calculate the number of diagonal elements
int64_t dsize = OffsetSize(offset, dim1, dim2, x_shape_);
// To generate the input Tensor Step vector of x_stride
std::vector<int64_t> x_stride = ConstructStride<int64_t>(x_shape_);
// Discussion by situation ,2 Peacekeeping greater than 2 The d
if (x_dim != N2) {
//set the vx_shape and vx_stride
// Generate x_shape and x_stride Remove from dim1 and dim2 Corresponding to vx_shape And vx_stride
std::vector<int64_t> vx_shape, vx_stride;
for (unsigned int tmp_dim = 0; tmp_dim < x_shape_.size(); tmp_dim++) {
if (tmp_dim != dim1 && tmp_dim != dim2) {
vx_shape.push_back(x_shape_[tmp_dim]);
vx_stride.push_back(x_stride[tmp_dim]);
}
}
// set the y_shape, y_stride, vy_stride
// Generate output Tensor Shape and step vector of :y_shape and y_stride
std::vector<int64_t> y_shape = vx_shape;
y_shape.push_back(dsize);
std::vector<int64_t> y_stride =
ConstructStride<int64_t>(y_shape);
// Generate output Tensor Out of the last one-dimensional step vector :vy_stride
std::vector<int64_t> vy_stride = y_stride;
vy_stride.pop_back();
// Read diagonal data
std::vector<int64_t> v_start(vx_shape.size(), 0);
for (PositionIterator<int64_t> myiter(v_start, vx_shape); !myiter.End();
++myiter) {
// Use the iterator to determine the division dim1 and dim2 Position coordinates of dimensions
auto p = myiter.GetPos();
// The basic position values of input and output are calculated by step vector and position coordinates base_pos1 and outbase_pos
int64_t base_pos1 = MulSum<int64_t>(p, vx_stride);
int64_t outbase_pos = MulSum<int64_t>(p, vy_stride);
for (int i = 0; i < dsize; i++) {
// Combined with the foundation position value calculated above , Yes dim1 and dim2 The corresponding dimension determines the position of diagonal elements , And assign it to the output data address (get_data It involves taking elements from the upper diagonal or the lower diagonal , It does not affect the understanding of the function of iterators )
int64_t base_pos2 = i * (x_stride[dim1] + x_stride[dim2]);
int64_t arr[N2] = {x_stride[dim1], x_stride[dim2]};
y_dataptr[outbase_pos + i] =
get_data(base_pos1 + base_pos2, offset, arr, dataptr);
}
}
} else {
for (int i = 0; i < dsize; i++) {
int64_t base_pos = i * (x_stride[dim1] + x_stride[dim2]);
int64_t arr[N2] = {x_stride[dim1], x_stride[dim2]};
y_dataptr[i] = get_data(base_pos, offset, arr, dataptr);
}
}
return KERNEL_STATUS_OK;
}
Other uses of iterators
for (position_iterator<int64_t> mit(v_start, v_shape); !mit.end(); ++mit) {
auto p = mit.get_pos();
int axis_len = input_shape_[tmp_axis];
std::vector<ValueIndex<T>> data_(axis_len);
int base_pos = mul_sum<int64_t>(p, v_stride);
for (int32_t i = 0; i < axis_len; i++) {
data_[i].value = x_dataptr[base_pos + i * input_stride[tmp_axis]];
data_[i].index = i;
}
std::vector<std::vector<T1>> data_;
for (int64_t i = 0; i < dim0; i++) {
std::vector<T1> tmp_v1;
for (PositionIterator<int64_t> mit(v_start, v_shape); !mit.End(); ++mit) {
auto pos = mit.GetPos();
tmp_v1.push_back(
x_dataptr[MulSum<int64_t>(pos, v_stride) + i * input_stride[axis]]);
}
data_.push_back(tmp_v1);
}
边栏推荐
- FTP, SFTP file transfer
- 上线首月,这家露营地游客好评率高达99.9%!他是怎么做到的?
- HDU 1372 & POJ 2243 Knight Moves(广度优先搜索)
- 1007 Maximum Subsequence Sum(25 分)(PAT甲级)
- An example of multi module collaboration based on NCF
- 线上数据库迁移的几种方法
- 1006 sign in and sign out (25 points) (PAT class a)
- 1008 Elevator(20 分)(PAT甲级)
- 做社交媒体营销应该注意些什么?Shopline卖家的成功秘笈在这里!
- There are multiple divs in the large div, which are displayed on the same line. After overflow, scroll bars are generated without line breaks
猜你喜欢
There are multiple divs in the large div, which are displayed on the same line. After overflow, scroll bars are generated without line breaks
Stream stream
如何使用Async-Awati异步任務處理代替BackgroundWorker?
Master the use of auto analyze in data warehouse
CANN算子:利用迭代器高效实现Tensor数据切割分块处理
Actual combat simulation │ JWT login authentication
记一次 .NET 某工控数据采集平台 线程数 爆高分析
欧拉函数
PointNeXt:通过改进的模型训练和缩放策略审视PointNet++
Dark horse programmer - software testing - stage 08 2-linux and database-23-30-process port related, modify file permissions, obtain port number information, program and process related operations, Li
随机推荐
1011 World Cup Betting (20 分)(PAT甲级)
CANN算子:利用迭代器高效实现Tensor数据切割分块处理
1002. A+b for Polynomials (25) (PAT class a)
1006 Sign In and Sign Out(25 分)(PAT甲级)
kotlin 条件控制
1005 Spell It Right(20 分)(PAT甲级)
2022 version of stronger jsonpath compatibility and performance test (snack3, fastjson2, jayway.jsonpath)
BCG 使用之CBCGPProgressDlg进度条使用
Find the nth power of 2
如何使用Async-Awati异步任務處理代替BackgroundWorker?
Niuke Xiaobai month race 7 who is the divine Archer
"Only one trip", active recommendation and exploration of community installation and maintenance tasks
Niuke Xiaobai month race 7 F question
Allure of pytest visual test report
水晶光电:长安深蓝SL03的AR-HUD产品由公司供应
HDU 1097 A hard puzzle
Dark horse programmer - software testing - stage 08 2-linux and database-23-30-process port related, modify file permissions, obtain port number information, program and process related operations, Li
English grammar_ Noun - use
Dark horse programmer - software testing - 09 stage 2-linux and database -31-43 instructions issued by modifying the file permission letter, - find the link to modify the file, find the file command,
Add namespace declaration