_static/vart-ml-api/vart__npu__tensor_8hpp_source.html

 /*

  * Copyright (C) 2025-2026 Advanced Micro Devices, Inc.

  *

  * Licensed under the Apache License, Version 2.0 (the "License");

  * you may not use this file except in compliance with the License.

  * You may obtain a copy of the License at

  *

  *     http://www.apache.org/licenses/LICENSE-2.0

  *

  * Unless required by applicable law or agreed to in writing, software

  * distributed under the License is distributed on an "AS IS" BASIS,

  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

  * See the License for the specific language governing permissions and

  * limitations under the License.

  */


 #pragma once


 #include <cstdint>

 #include <memory>

 #include <string>

 #include <unordered_map>

 #include <vector>


 namespace vart {


 // Forward declaration of the private NpuTensor implementation

 class NpuTensorPriv;


 enum class DataType {

   UNKNOWN,

   BOOLEAN,

   INT8,

   UINT8,

   INT16,

   UINT16,

   BF16,

   FP16,

   INT32,

   UINT32,

   FLOAT32,

   INT64,

   UINT64,

 };


 enum class MemoryLayout {

   UNKNOWN,

   NC,

   NCH,

   NHC,

   NHW,

   NWC,

   NHWC,

   NCHW,

   NHWC4,

   NHWC8,

   NC4HW4,

   NC8HW8,

   HCWNC4,

   HCWNC8,

   HCWNC16,

   NHW16C4WC,

   NHW16WC4C,

   GENERIC,

 };


 enum class MemoryType {

   UNKNOWN,

   XRT_BO,

   DMA_FD,

   USER_POINTER_CMA,

   USER_POINTER_NON_CMA,

 };


 enum class TensorDirection {

   INPUT,

   OUTPUT,

 };


 enum class TensorType {

   CPU,

   HW,

 };


 struct NpuTensorInfo {

   std::string name;

   DataType data_type = DataType::UNKNOWN;

   TensorDirection direction = TensorDirection::INPUT;

   TensorType tensor_type = TensorType::HW;

   MemoryLayout memory_layout = MemoryLayout::UNKNOWN;

   std::vector<uint32_t> memory_layout_order;

   size_t size = 0;

   size_t size_in_bytes = 0;

   std::vector<uint32_t> shape;

   std::vector<uint32_t> strides;


   void print() const;

 };


 class NpuTensor {

  public:

   NpuTensor(const NpuTensorInfo& info, void* buffer, MemoryType mem_type);


   NpuTensor(const NpuTensorInfo& info, const void* buffer, MemoryType mem_type);


   NpuTensor();


   void* get_buffer();


   const void* get_buffer() const;


   void* get_virtual_address();


   const void* get_virtual_address() const;


   uint64_t get_physical_address() const;


   const NpuTensorInfo& get_info() const;


   MemoryType get_memory_type() const;


   void sync_buffer() const;


   int export_buffer() const;


   void print_info() const;


  private:

   friend class NpuTensorPrivAccess;      // Allow controlled access to the private implementation

   std::shared_ptr<NpuTensorPriv> priv_;  // NpuTensor private implementation

 };


 }  // namespace vart

vart::NpuTensor
This class represents a tensor in the VART API.
Definition: vart_npu_tensor.hpp:225

vart::NpuTensor::get_buffer
const void * get_buffer() const
Retrieves a pointer to the tensor's buffer.

vart::NpuTensor::NpuTensorPrivAccess
friend class NpuTensorPrivAccess
Definition: vart_npu_tensor.hpp:383

vart::NpuTensor::get_buffer
void * get_buffer()
Retrieves a pointer to the tensor's buffer.

vart::NpuTensor::export_buffer
int export_buffer() const
Export the tensor buffer as a dma-buf file descriptor.

vart::NpuTensor::NpuTensor
NpuTensor(const NpuTensorInfo &info, const void *buffer, MemoryType mem_type)
Construct a NpuTensor from a user-supplied constant buffer.

vart::NpuTensor::NpuTensor
NpuTensor(const NpuTensorInfo &info, void *buffer, MemoryType mem_type)
Construct a NpuTensor from a user-supplied buffer.

vart::NpuTensor::NpuTensor
NpuTensor()
Default-constructs an empty NpuTensor with no buffer or metadata.

vart::NpuTensor::get_memory_type
MemoryType get_memory_type() const
Get the memory type of the tensor.

vart::NpuTensor::get_physical_address
uint64_t get_physical_address() const
Returns the physical address of the tensor buffer.

vart::NpuTensor::print_info
void print_info() const
Prints the metadata of the tensor.

vart::NpuTensor::get_virtual_address
const void * get_virtual_address() const
Returns the virtual address of the tensor buffer.

vart::NpuTensor::get_virtual_address
void * get_virtual_address()
Returns the virtual address of the tensor buffer.

vart::NpuTensor::get_info
const NpuTensorInfo & get_info() const
Returns the NpuTensorInfo metadata of the tensor.

vart::NpuTensor::sync_buffer
void sync_buffer() const
Synchronizes the tensor buffer between CPU and AIE.

vart
VART (Vitis AI Runtime) ML inference API namespace.

vart::MemoryType
MemoryType
Enumerates the various memory types utilized for tensors in the VART API.
Definition: vart_npu_tensor.hpp:116

vart::MemoryType::USER_POINTER_NON_CMA
@ USER_POINTER_NON_CMA
User-provided pointer without contiguous memory guarantee (e.g. new, malloc).

vart::MemoryType::DMA_FD
@ DMA_FD
File descriptor used for Direct Memory Access (DMA).

vart::MemoryType::UNKNOWN
@ UNKNOWN
Memory type is not specified or recognized.

vart::MemoryType::XRT_BO
@ XRT_BO
Buffer object associated with XRT.

vart::MemoryType::USER_POINTER_CMA
@ USER_POINTER_CMA
User-provided pointer to a contiguous physical memory block.

vart::TensorType
TensorType
Specifies the tensor types supported in the VART API.
Definition: vart_npu_tensor.hpp:146

vart::TensorType::CPU
@ CPU
Tensor metadata from the ONNX model, as defined for standard CPU execution.

vart::TensorType::HW
@ HW
AMD hardware-specific tensor metadata, formatted for direct execution on AMD AI engines.

vart::DataType
DataType
Enumerates the supported data types for tensors in the VART API.
Definition: vart_npu_tensor.hpp:62

vart::DataType::UINT32
@ UINT32
32-bit unsigned integer.

vart::DataType::UINT16
@ UINT16
16-bit unsigned integer.

vart::DataType::INT64
@ INT64
64-bit signed integer.

vart::DataType::INT16
@ INT16
16-bit signed integer.

vart::DataType::INT32
@ INT32
32-bit signed integer.

vart::DataType::UNKNOWN
@ UNKNOWN
Unknown data type.

vart::DataType::UINT64
@ UINT64
64-bit unsigned integer.

vart::DataType::FLOAT32
@ FLOAT32
32-bit floating point.

vart::DataType::FP16
@ FP16
16-bit floating point.

vart::DataType::BOOLEAN
@ BOOLEAN
Boolean type.

vart::DataType::UINT8
@ UINT8
8-bit unsigned integer.

vart::DataType::INT8
@ INT8
8-bit signed integer.

vart::DataType::BF16
@ BF16
16-bit Brain Floating Point format.

vart::TensorDirection
TensorDirection
Enumerates the supported tensor directions in the VART API.
Definition: vart_npu_tensor.hpp:131

vart::TensorDirection::OUTPUT
@ OUTPUT
Output tensor direction.

vart::TensorDirection::INPUT
@ INPUT
Input tensor direction.

vart::MemoryLayout
MemoryLayout
Enumerates the supported memory layouts for tensors in the VART API.
Definition: vart_npu_tensor.hpp:89

vart::MemoryLayout::NWC
@ NWC
Model batch, Width, Channels (packed format).

vart::MemoryLayout::NCH
@ NCH
Model batch, Channels (packed format), Height.

vart::MemoryLayout::NHWC8
@ NHWC8
Model batch, Height, Width, Channel groups of 8.

vart::MemoryLayout::HCWNC4
@ HCWNC4
Height, Channels / 4, Width, N = 1, Channel groups of 4.

vart::MemoryLayout::NC4HW4
@ NC4HW4
Model batch, Channels / 4, Height, Width, Channel groups of 4.

vart::MemoryLayout::NC8HW8
@ NC8HW8
Model batch, Channels / 8, Height, Width, Channel groups of 8.

vart::MemoryLayout::NHWC4
@ NHWC4
Model batch, Height, Width, Channel groups of 4 (e.g. RGBA).

vart::MemoryLayout::HCWNC16
@ HCWNC16
Height, Channels / 16, Width, N = 1, Channel groups of 16.

vart::MemoryLayout::NHC
@ NHC
Model batch, Height, Channels (packed format).

vart::MemoryLayout::UNKNOWN
@ UNKNOWN
Unknown memory layout.

vart::MemoryLayout::NCHW
@ NCHW
Model batch, Channels, Height, Width (planar format).

vart::MemoryLayout::HCWNC8
@ HCWNC8
Height, Channels / 8, Width, N = 1, Channel groups of 8.

vart::MemoryLayout::NC
@ NC
Model batch, Channels (packed format).

vart::MemoryLayout::NHW
@ NHW
Model batch, Height, Width.

vart::MemoryLayout::NHW16C4WC
@ NHW16C4WC
Model batch, Height, Width / 16, Channels / 4, Width groups of 16, Channel groups of 4.

vart::MemoryLayout::GENERIC
@ GENERIC
Generic layout. See NpuTensorInfo::memory_layout_order for more info.

vart::MemoryLayout::NHW16WC4C
@ NHW16WC4C
Model batch, Height, Width / 16, Width groups of 16, Channels / 4, Channel groups of 4.

vart::MemoryLayout::NHWC
@ NHWC
Model batch, Height, Width, Channels (packed format).

vart::NpuTensorInfo
Metadata structure describing a tensor used in VART.
Definition: vart_npu_tensor.hpp:195

vart::NpuTensorInfo::memory_layout
MemoryLayout memory_layout
Definition: vart_npu_tensor.hpp:200

vart::NpuTensorInfo::shape
std::vector< uint32_t > shape
Definition: vart_npu_tensor.hpp:204

vart::NpuTensorInfo::tensor_type
TensorType tensor_type
Definition: vart_npu_tensor.hpp:199

vart::NpuTensorInfo::size
size_t size
Definition: vart_npu_tensor.hpp:202

vart::NpuTensorInfo::memory_layout_order
std::vector< uint32_t > memory_layout_order
Definition: vart_npu_tensor.hpp:201

vart::NpuTensorInfo::print
void print() const

vart::NpuTensorInfo::direction
TensorDirection direction
Definition: vart_npu_tensor.hpp:198

vart::NpuTensorInfo::strides
std::vector< uint32_t > strides
Definition: vart_npu_tensor.hpp:205

vart::NpuTensorInfo::name
std::string name
Definition: vart_npu_tensor.hpp:196

vart::NpuTensorInfo::size_in_bytes
size_t size_in_bytes
Definition: vart_npu_tensor.hpp:203

vart::NpuTensorInfo::data_type
DataType data_type
Definition: vart_npu_tensor.hpp:197