VART-ML  0.3.0
vart_npu_tensor.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2025-2026 Advanced Micro Devices, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
34 #include <cstdint>
35 #include <memory>
36 #include <string>
37 #include <unordered_map>
38 #include <vector>
39 
46 namespace vart {
47 
48 // Forward declaration of the private NpuTensor implementation
49 class NpuTensorPriv;
50 
62 enum class DataType {
63  UNKNOWN,
64  BOOLEAN,
65  INT8,
66  UINT8,
67  INT16,
68  UINT16,
69  BF16,
70  FP16,
71  INT32,
72  UINT32,
73  FLOAT32,
74  INT64,
75  UINT64,
76 };
77 
89 enum class MemoryLayout {
90  UNKNOWN,
91  NC,
92  NCH,
93  NHC,
94  NHW,
95  NWC,
96  NHWC,
97  NCHW,
98  NHWC4,
99  NHWC8,
100  NC4HW4,
101  NC8HW8,
102  HCWNC4,
103  HCWNC8,
104  HCWNC16,
105  NHW16C4WC,
106  NHW16WC4C,
107  GENERIC,
108 };
109 
116 enum class MemoryType {
117  UNKNOWN,
118  XRT_BO,
119  DMA_FD,
122 };
123 
131 enum class TensorDirection {
132  INPUT,
133  OUTPUT,
134 };
135 
146 enum class TensorType {
147  CPU,
148  HW,
149 };
150 
196  std::string name;
201  std::vector<uint32_t> memory_layout_order;
202  size_t size = 0;
203  size_t size_in_bytes = 0;
204  std::vector<uint32_t> shape;
205  std::vector<uint32_t> strides;
206 
207  void print() const;
208 };
209 
225 class NpuTensor {
226  public:
241  NpuTensor(const NpuTensorInfo& info, void* buffer, MemoryType mem_type);
242 
256  NpuTensor(const NpuTensorInfo& info, const void* buffer, MemoryType mem_type);
257 
260 
276  void* get_buffer();
277 
287  const void* get_buffer() const;
288 
299 
310  const void* get_virtual_address() const;
311 
320  uint64_t get_physical_address() const;
321 
329  const NpuTensorInfo& get_info() const;
330 
337 
355  void sync_buffer() const;
356 
371  int export_buffer() const;
372 
380  void print_info() const;
381 
382  private:
383  friend class NpuTensorPrivAccess; // Allow controlled access to the private implementation
384  std::shared_ptr<NpuTensorPriv> priv_; // NpuTensor private implementation
385 };
386 
387 } // namespace vart
This class represents a tensor in the VART API.
Definition: vart_npu_tensor.hpp:225
const void * get_buffer() const
Retrieves a pointer to the tensor's buffer.
friend class NpuTensorPrivAccess
Definition: vart_npu_tensor.hpp:383
void * get_buffer()
Retrieves a pointer to the tensor's buffer.
int export_buffer() const
Export the tensor buffer as a dma-buf file descriptor.
NpuTensor(const NpuTensorInfo &info, const void *buffer, MemoryType mem_type)
Construct a NpuTensor from a user-supplied constant buffer.
NpuTensor(const NpuTensorInfo &info, void *buffer, MemoryType mem_type)
Construct a NpuTensor from a user-supplied buffer.
NpuTensor()
Default-constructs an empty NpuTensor with no buffer or metadata.
MemoryType get_memory_type() const
Get the memory type of the tensor.
uint64_t get_physical_address() const
Returns the physical address of the tensor buffer.
void print_info() const
Prints the metadata of the tensor.
const void * get_virtual_address() const
Returns the virtual address of the tensor buffer.
void * get_virtual_address()
Returns the virtual address of the tensor buffer.
const NpuTensorInfo & get_info() const
Returns the NpuTensorInfo metadata of the tensor.
void sync_buffer() const
Synchronizes the tensor buffer between CPU and AIE.
VART (Vitis AI Runtime) ML inference API namespace.
MemoryType
Enumerates the various memory types utilized for tensors in the VART API.
Definition: vart_npu_tensor.hpp:116
@ USER_POINTER_NON_CMA
User-provided pointer without contiguous memory guarantee (e.g. new, malloc).
@ DMA_FD
File descriptor used for Direct Memory Access (DMA).
@ UNKNOWN
Memory type is not specified or recognized.
@ XRT_BO
Buffer object associated with XRT.
@ USER_POINTER_CMA
User-provided pointer to a contiguous physical memory block.
TensorType
Specifies the tensor types supported in the VART API.
Definition: vart_npu_tensor.hpp:146
@ CPU
Tensor metadata from the ONNX model, as defined for standard CPU execution.
@ HW
AMD hardware-specific tensor metadata, formatted for direct execution on AMD AI engines.
DataType
Enumerates the supported data types for tensors in the VART API.
Definition: vart_npu_tensor.hpp:62
@ UINT32
32-bit unsigned integer.
@ UINT16
16-bit unsigned integer.
@ INT64
64-bit signed integer.
@ INT16
16-bit signed integer.
@ INT32
32-bit signed integer.
@ UNKNOWN
Unknown data type.
@ UINT64
64-bit unsigned integer.
@ FLOAT32
32-bit floating point.
@ FP16
16-bit floating point.
@ BOOLEAN
Boolean type.
@ UINT8
8-bit unsigned integer.
@ INT8
8-bit signed integer.
@ BF16
16-bit Brain Floating Point format.
TensorDirection
Enumerates the supported tensor directions in the VART API.
Definition: vart_npu_tensor.hpp:131
@ OUTPUT
Output tensor direction.
@ INPUT
Input tensor direction.
MemoryLayout
Enumerates the supported memory layouts for tensors in the VART API.
Definition: vart_npu_tensor.hpp:89
@ NWC
Model batch, Width, Channels (packed format).
@ NCH
Model batch, Channels (packed format), Height.
@ NHWC8
Model batch, Height, Width, Channel groups of 8.
@ HCWNC4
Height, Channels / 4, Width, N = 1, Channel groups of 4.
@ NC4HW4
Model batch, Channels / 4, Height, Width, Channel groups of 4.
@ NC8HW8
Model batch, Channels / 8, Height, Width, Channel groups of 8.
@ NHWC4
Model batch, Height, Width, Channel groups of 4 (e.g. RGBA).
@ HCWNC16
Height, Channels / 16, Width, N = 1, Channel groups of 16.
@ NHC
Model batch, Height, Channels (packed format).
@ UNKNOWN
Unknown memory layout.
@ NCHW
Model batch, Channels, Height, Width (planar format).
@ HCWNC8
Height, Channels / 8, Width, N = 1, Channel groups of 8.
@ NC
Model batch, Channels (packed format).
@ NHW
Model batch, Height, Width.
@ NHW16C4WC
Model batch, Height, Width / 16, Channels / 4, Width groups of 16, Channel groups of 4.
@ GENERIC
Generic layout. See NpuTensorInfo::memory_layout_order for more info.
@ NHW16WC4C
Model batch, Height, Width / 16, Width groups of 16, Channels / 4, Channel groups of 4.
@ NHWC
Model batch, Height, Width, Channels (packed format).
Metadata structure describing a tensor used in VART.
Definition: vart_npu_tensor.hpp:195
MemoryLayout memory_layout
Definition: vart_npu_tensor.hpp:200
std::vector< uint32_t > shape
Definition: vart_npu_tensor.hpp:204
TensorType tensor_type
Definition: vart_npu_tensor.hpp:199
size_t size
Definition: vart_npu_tensor.hpp:202
std::vector< uint32_t > memory_layout_order
Definition: vart_npu_tensor.hpp:201
void print() const
TensorDirection direction
Definition: vart_npu_tensor.hpp:198
std::vector< uint32_t > strides
Definition: vart_npu_tensor.hpp:205
std::string name
Definition: vart_npu_tensor.hpp:196
size_t size_in_bytes
Definition: vart_npu_tensor.hpp:203
DataType data_type
Definition: vart_npu_tensor.hpp:197