当前位置:网站首页>Cann operator: using iterators to efficiently realize tensor data cutting and blocking processing
Cann operator: using iterators to efficiently realize tensor data cutting and blocking processing
2022-07-04 19:53:00 【InfoQ】
Mission scenarios and objectives
Conventional scheme :
Prepare knowledge and analysis
1. step
2. iterator
template <typename T>
class PositionIterator {
public:
PositionIterator(){};
~PositionIterator(){};
PositionIterator(std::vector<T> stt, std::vector<T> sh) {
if (stt.size() != sh.size()) {
PositionIterator();
} else {
for (unsigned int i = 0; i < sh.size(); i++) {
if (stt[i] >= sh[i]) {
PositionIterator();
}
}
pos_ = stt;
shape_ = sh;
}
}
PositionIterator operator++() {
pos_[shape_.size() - 1] += 1;
for (unsigned int i = shape_.size() - 1; i > 0; i--) {
if (pos_[i] / shape_[i] != 0) {
pos_[i - 1] += pos_[i] / shape_[i];
pos_[i] = pos_[i] % shape_[i];
}
}
return *this;
}
bool End() {
if (pos_[0] != shape_[0]) {
return false;
}
return true;
}
std::vector<T> GetPos() { return pos_; }
std::vector<T> GetShape() { return shape_; }
private:
std::vector<T> pos_;
std::vector<T> shape_;
};Diagonal The realization of operators

template <typename T>
uint32_t DiagonalCpuKernel::DoComputeType(CpuKernelContext &ctx,
const int64_t &offset,
const int64_t &dim1,
const int64_t &dim2) {
// Get the inuput and output
Tensor *input_x = ctx.Input(0);
Tensor *y = ctx.Output(0);
// Get some information of input
auto x_shape = input_x->GetTensorShape();
std::vector<int64_t> x_shape_ = x_shape->GetDimSizes();
const int64_t x_dim = x_shape->GetDims();
auto dataptr = reinterpret_cast<T *>(ctx.Input(0)->GetData());
auto y_dataptr = reinterpret_cast<T *>(y->GetData());
// Compute
// First, calculate the number of diagonal elements
int64_t dsize = OffsetSize(offset, dim1, dim2, x_shape_);
// To generate the input Tensor Step vector of x_stride
std::vector<int64_t> x_stride = ConstructStride<int64_t>(x_shape_);
// Discussion by situation ,2 Peacekeeping greater than 2 The d
if (x_dim != N2) {
//set the vx_shape and vx_stride
// Generate x_shape and x_stride Remove from dim1 and dim2 Corresponding to vx_shape And vx_stride
std::vector<int64_t> vx_shape, vx_stride;
for (unsigned int tmp_dim = 0; tmp_dim < x_shape_.size(); tmp_dim++) {
if (tmp_dim != dim1 && tmp_dim != dim2) {
vx_shape.push_back(x_shape_[tmp_dim]);
vx_stride.push_back(x_stride[tmp_dim]);
}
}
// set the y_shape, y_stride, vy_stride
// Generate output Tensor Shape and step vector of :y_shape and y_stride
std::vector<int64_t> y_shape = vx_shape;
y_shape.push_back(dsize);
std::vector<int64_t> y_stride =
ConstructStride<int64_t>(y_shape);
// Generate output Tensor Out of the last one-dimensional step vector :vy_stride
std::vector<int64_t> vy_stride = y_stride;
vy_stride.pop_back();
// Read diagonal data
std::vector<int64_t> v_start(vx_shape.size(), 0);
for (PositionIterator<int64_t> myiter(v_start, vx_shape); !myiter.End();
++myiter) {
// Use the iterator to determine the division dim1 and dim2 Position coordinates of dimensions
auto p = myiter.GetPos();
// The basic position values of input and output are calculated by step vector and position coordinates base_pos1 and outbase_pos
int64_t base_pos1 = MulSum<int64_t>(p, vx_stride);
int64_t outbase_pos = MulSum<int64_t>(p, vy_stride);
for (int i = 0; i < dsize; i++) {
// Combined with the foundation position value calculated above , Yes dim1 and dim2 The corresponding dimension determines the position of diagonal elements , And assign it to the output data address (get_data It involves taking elements from the upper diagonal or the lower diagonal , It does not affect the understanding of the function of iterators )
int64_t base_pos2 = i * (x_stride[dim1] + x_stride[dim2]);
int64_t arr[N2] = {x_stride[dim1], x_stride[dim2]};
y_dataptr[outbase_pos + i] =
get_data(base_pos1 + base_pos2, offset, arr, dataptr);
}
}
} else {
for (int i = 0; i < dsize; i++) {
int64_t base_pos = i * (x_stride[dim1] + x_stride[dim2]);
int64_t arr[N2] = {x_stride[dim1], x_stride[dim2]};
y_dataptr[i] = get_data(base_pos, offset, arr, dataptr);
}
}
return KERNEL_STATUS_OK;
}Other uses of iterators
for (position_iterator<int64_t> mit(v_start, v_shape); !mit.end(); ++mit) {
auto p = mit.get_pos();
int axis_len = input_shape_[tmp_axis];
std::vector<ValueIndex<T>> data_(axis_len);
int base_pos = mul_sum<int64_t>(p, v_stride);
for (int32_t i = 0; i < axis_len; i++) {
data_[i].value = x_dataptr[base_pos + i * input_stride[tmp_axis]];
data_[i].index = i;
}
std::vector<std::vector<T1>> data_;
for (int64_t i = 0; i < dim0; i++) {
std::vector<T1> tmp_v1;
for (PositionIterator<int64_t> mit(v_start, v_shape); !mit.End(); ++mit) {
auto pos = mit.GetPos();
tmp_v1.push_back(
x_dataptr[MulSum<int64_t>(pos, v_stride) + i * input_stride[axis]]);
}
data_.push_back(tmp_v1);
}边栏推荐
- abc229 总结(区间最长连续字符 图的联通分量计数)
- [problem] Druid reports exception SQL injection violation, part always true condition not allow solution
- TCP两次挥手,你见过吗?那四次握手呢?
- Multi table operation inner join query
- 需求开发思考
- 记一次 .NET 某工控数据采集平台 线程数 爆高分析
- Educational codeforces round 22 E. Army Creation
- New wizard effect used by BCG
- @Data source connection pool exhaustion caused by transactional abuse
- HDU 1372 & POJ 2243 Knight moves (breadth first search)
猜你喜欢

New wizard effect used by BCG

Multi table operation inner join query

CANN算子:利用迭代器高效实现Tensor数据切割分块处理

Siemens HMI download prompts lack of panel image solution

HMM隐马尔可夫模型最详细讲解与代码实现

黑马程序员-软件测试--08阶段2-linux和数据库-23-30-进程端口相关,修改文件权限,端口号信息的获取,程序和进程相关操作,linux命令案例

BCG 使用之CBCGPTabWnd控件(相当于MFC TabControl)

实战模拟│JWT 登录认证

PointNeXt:通过改进的模型训练和缩放策略审视PointNet++

C# 使用StopWatch测量程序运行时间
随机推荐
1011 World Cup betting (20 points) (pat a)
c# . Net MVC uses Baidu ueditor rich text box to upload files (pictures, videos, etc.)
1006 sign in and sign out (25 points) (PAT class a)
Kotlin condition control
Kotlin cycle control
BCG 使用之CBCGPTabWnd控件(相当于MFC TabControl)
更强的 JsonPath 兼容性及性能测试之2022版(Snack3,Fastjson2,jayway.jsonpath)
Master the use of auto analyze in data warehouse
1002. A+b for Polynomials (25) (PAT class a)
1006 Sign In and Sign Out(25 分)(PAT甲级)
Dark horse programmer - software testing - stage 08 2-linux and database-23-30-process port related, modify file permissions, obtain port number information, program and process related operations, Li
1003 emergency (25 points) (PAT class a)
欧拉函数
c# .net mvc 使用百度Ueditor富文本框上传文件(图片,视频等)
C# 使用StopWatch测量程序运行时间
Multi table operation inner join query
【问题】druid报异常sql injection violation, part alway true condition not allow 解决方案
FPGA timing constraint sharing 01_ Brief description of the four steps
[QNX hypervisor 2.2 user manual]6.3.1 factory page and control page
做社交媒体营销应该注意些什么?Shopline卖家的成功秘笈在这里!