_static/vart-ml-api/vart__runner__factory_8hpp_source.html

 /*

  * Copyright (C) 2025-2026 Advanced Micro Devices, Inc.

  *

  * Licensed under the Apache License, Version 2.0 (the "License");

  * you may not use this file except in compliance with the License.

  * You may obtain a copy of the License at

  *

  *     http://www.apache.org/licenses/LICENSE-2.0

  *

  * Unless required by applicable law or agreed to in writing, software

  * distributed under the License is distributed on an "AS IS" BASIS,

  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

  * See the License for the specific language governing permissions and

  * limitations under the License.

  */


 #pragma once


 #include <any>

 #include <chrono>

 #include <cstdint>

 #include <functional>

 #include <memory>

 #include <string>

 #include <unordered_map>

 #include <vector>


 #include "vart_npu_tensor.hpp"


 namespace vart {


 enum class RunnerType {

   VAIML,

 };


 enum class RoundingMode {

   UNKNOWN,

   ROUND_TO_NEAREST_EVEN,

   ROUND_TOWARD_ZERO,

 };


 struct QuantParameters {

   double scale = 0.0;

   int32_t zero_point = 0;

   RoundingMode rounding_mode = RoundingMode::UNKNOWN;

 };


 enum class StatusCode {

   SUCCESS = 0,

   FAILURE,

   INVALID_INPUT,

   INVALID_OUTPUT,

   OUT_OF_MEMORY,

   RUNTIME_ERROR,

   JOB_PENDING,

   INVALID_JOB_ID,

   RESOURCE_UNAVAILABLE

 };


 struct JobHandle {

   StatusCode status;  // Submission status of the job.

   uint32_t job_id;    // Unique identifier for the job.


   JobHandle(StatusCode stat, uint32_t id) : status(stat), job_id(id) {}


   JobHandle() : status(StatusCode::FAILURE), job_id(0) {}

 };


 class Runner {

  protected:

   explicit Runner(const std::string& model_path, const std::unordered_map<std::string, std::any>& options = {}) {}


  public:

   virtual ~Runner() = default;


   virtual const std::vector<NpuTensorInfo>& get_tensors_info(TensorDirection direction, TensorType type) const = 0;


   virtual const NpuTensorInfo& get_tensor_info_by_name(const std::string& tensor_name, TensorType type) const = 0;


   virtual const QuantParameters& get_quant_parameters(const std::string& tensor_name) const = 0;


   virtual size_t get_num_input_tensors() const = 0;


   virtual size_t get_num_output_tensors() const = 0;


   virtual size_t get_batch_size() const = 0;


   virtual StatusCode execute(const std::vector<std::vector<NpuTensor>>& inputs,

                              std::vector<std::vector<NpuTensor>>& outputs) noexcept = 0;


   virtual JobHandle execute_async(const std::vector<std::vector<NpuTensor>>& inputs,

                                   std::vector<std::vector<NpuTensor>>& outputs) noexcept = 0;


   virtual StatusCode wait(const JobHandle& job_handle, std::chrono::milliseconds timeout) noexcept = 0;


   using ExecuteAsyncCallback = std::function<void(const JobHandle&)>;


   virtual JobHandle execute_async(const std::vector<std::vector<NpuTensor>>& inputs,

                                   std::vector<std::vector<NpuTensor>>& outputs,

                                   ExecuteAsyncCallback cb) noexcept = 0;


   virtual NpuTensor allocate_npu_tensor(const NpuTensorInfo& info) const = 0;


   virtual NpuTensor allocate_sub_tensor(const NpuTensor& parent, const NpuTensorInfo& info, size_t offset) const = 0;

 };


 class RunnerFactory {

  public:

   static std::shared_ptr<Runner> create_runner(RunnerType runner_type,

                                                const std::string& model_path,

                                                const std::unordered_map<std::string, std::any>& options = {});

 };


 }  // namespace vart

vart::NpuTensor
This class represents a tensor in the VART API.
Definition: vart_npu_tensor.hpp:225

vart::RunnerFactory
Factory class for creating Runner instances.
Definition: vart_runner_factory.hpp:435

vart::RunnerFactory::create_runner
static std::shared_ptr< Runner > create_runner(RunnerType runner_type, const std::string &model_path, const std::unordered_map< std::string, std::any > &options={})
Creates and returns a shared pointer to a Runner instance.

vart::Runner
Abstract base class for executing model inference operations.
Definition: vart_runner_factory.hpp:169

vart::Runner::~Runner
virtual ~Runner()=default
Destroys the Runner object.

vart::Runner::Runner
Runner(const std::string &model_path, const std::unordered_map< std::string, std::any > &options={})
Constructs a Runner object with the specified model path and options.
Definition: vart_runner_factory.hpp:180

vart::Runner::wait
virtual StatusCode wait(const JobHandle &job_handle, std::chrono::milliseconds timeout) noexcept=0
Waits for the completion of an asynchronous job.

vart::Runner::allocate_sub_tensor
virtual NpuTensor allocate_sub_tensor(const NpuTensor &parent, const NpuTensorInfo &info, size_t offset) const =0
Creates a sub-tensor from a parent tensor with the specified metadata and offset.

vart::Runner::get_tensor_info_by_name
virtual const NpuTensorInfo & get_tensor_info_by_name(const std::string &tensor_name, TensorType type) const =0
Unified API to retrieve tensor information by name and tensor type (CPU/HW).

vart::Runner::ExecuteAsyncCallback
std::function< void(const JobHandle &)> ExecuteAsyncCallback
Type alias for the callback function used in asynchronous execution operations.
Definition: vart_runner_factory.hpp:347

vart::Runner::execute_async
virtual JobHandle execute_async(const std::vector< std::vector< NpuTensor >> &inputs, std::vector< std::vector< NpuTensor >> &outputs, ExecuteAsyncCallback cb) noexcept=0
Executes the operation asynchronously with the given input tensors.

vart::Runner::allocate_npu_tensor
virtual NpuTensor allocate_npu_tensor(const NpuTensorInfo &info) const =0
Allocates memory for an NPU tensor.

vart::Runner::get_quant_parameters
virtual const QuantParameters & get_quant_parameters(const std::string &tensor_name) const =0
Retrieves the quantization parameters for a specific tensor.

vart::Runner::execute
virtual StatusCode execute(const std::vector< std::vector< NpuTensor >> &inputs, std::vector< std::vector< NpuTensor >> &outputs) noexcept=0
Executes the main computation using the provided input tensors and produces output tensors.

vart::Runner::get_num_input_tensors
virtual size_t get_num_input_tensors() const =0
Returns the number of input tensors.

vart::Runner::get_tensors_info
virtual const std::vector< NpuTensorInfo > & get_tensors_info(TensorDirection direction, TensorType type) const =0
Unified API to retrieve tensor information based on direction and tensor type (CPU/HW).

vart::Runner::get_num_output_tensors
virtual size_t get_num_output_tensors() const =0
Returns the number of output tensors.

vart::Runner::get_batch_size
virtual size_t get_batch_size() const =0
Returns the device batch size.

vart::Runner::execute_async
virtual JobHandle execute_async(const std::vector< std::vector< NpuTensor >> &inputs, std::vector< std::vector< NpuTensor >> &outputs) noexcept=0
Executes the job asynchronously with the given input tensors.

vart
VART (Vitis AI Runtime) ML inference API namespace.

vart::StatusCode
StatusCode
Enumerates the status codes used in the VART.
Definition: vart_runner_factory.hpp:107

vart::StatusCode::FAILURE
@ FAILURE
Operation failed.

vart::StatusCode::JOB_PENDING
@ JOB_PENDING
Job is still pending.

vart::StatusCode::RUNTIME_ERROR
@ RUNTIME_ERROR
Runtime error occurred.

vart::StatusCode::INVALID_OUTPUT
@ INVALID_OUTPUT
Invalid output parameters.

vart::StatusCode::INVALID_JOB_ID
@ INVALID_JOB_ID
Provided job ID is invalid.

vart::StatusCode::INVALID_INPUT
@ INVALID_INPUT
Invalid input parameters.

vart::StatusCode::OUT_OF_MEMORY
@ OUT_OF_MEMORY
Memory allocation failed.

vart::StatusCode::SUCCESS
@ SUCCESS
Operation completed successfully.

vart::StatusCode::RESOURCE_UNAVAILABLE
@ RESOURCE_UNAVAILABLE
Required resource is unavailable. Transient; retry the operation.

vart::RoundingMode
RoundingMode
Enumerates the rounding modes used in quantization.
Definition: vart_runner_factory.hpp:74

vart::RoundingMode::UNKNOWN
@ UNKNOWN
Unknown rounding mode.

vart::RoundingMode::ROUND_TO_NEAREST_EVEN
@ ROUND_TO_NEAREST_EVEN
Round to nearest even value.

vart::RoundingMode::ROUND_TOWARD_ZERO
@ ROUND_TOWARD_ZERO
Truncate towards zero (no rounding).

vart::TensorType
TensorType
Specifies the tensor types supported in the VART API.
Definition: vart_npu_tensor.hpp:146

vart::RunnerType
RunnerType
Enumerates the types of runner implementations supported.
Definition: vart_runner_factory.hpp:63

vart::RunnerType::VAIML
@ VAIML
VAIML-based runner implementation.

vart::TensorDirection
TensorDirection
Enumerates the supported tensor directions in the VART API.
Definition: vart_npu_tensor.hpp:131

vart::JobHandle
Struct representing a job handle for asynchronous execution.
Definition: vart_runner_factory.hpp:132

vart::JobHandle::job_id
uint32_t job_id
Definition: vart_runner_factory.hpp:134

vart::JobHandle::status
StatusCode status
Definition: vart_runner_factory.hpp:133

vart::JobHandle::JobHandle
JobHandle()
Default constructor. Initializes status to FAILURE and job_id to 0 (invalid).
Definition: vart_runner_factory.hpp:144

vart::JobHandle::JobHandle
JobHandle(StatusCode stat, uint32_t id)
Constructs a JobHandle with the given status and job ID.
Definition: vart_runner_factory.hpp:141

vart::NpuTensorInfo
Metadata structure describing a tensor used in VART.
Definition: vart_npu_tensor.hpp:195

vart::QuantParameters
Struct representing quantization parameters for a tensor.
Definition: vart_runner_factory.hpp:94

vart::QuantParameters::scale
double scale
Definition: vart_runner_factory.hpp:95

vart::QuantParameters::zero_point
int32_t zero_point
Definition: vart_runner_factory.hpp:96

vart::QuantParameters::rounding_mode
RoundingMode rounding_mode
Definition: vart_runner_factory.hpp:97

vart_npu_tensor.hpp
Tensor data types, memory descriptors, and the NpuTensor handle for VART ML.