VART-ML  0.3.0
vart_runner_factory.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2025-2026 Advanced Micro Devices, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
36 #include <any>
37 #include <chrono>
38 #include <cstdint>
39 #include <functional>
40 #include <memory>
41 #include <string>
42 #include <unordered_map>
43 #include <vector>
44 
45 #include "vart_npu_tensor.hpp"
46 
53 namespace vart {
54 
63 enum class RunnerType {
64  VAIML,
65 };
66 
74 enum class RoundingMode {
75  UNKNOWN,
78 };
79 
95  double scale = 0.0;
96  int32_t zero_point = 0;
98 };
99 
107 enum class StatusCode {
108  SUCCESS = 0,
109  FAILURE,
110  INVALID_INPUT,
112  OUT_OF_MEMORY,
113  RUNTIME_ERROR,
114  JOB_PENDING,
117 };
118 
132 struct JobHandle {
133  StatusCode status; // Submission status of the job.
134  uint32_t job_id; // Unique identifier for the job.
135 
141  JobHandle(StatusCode stat, uint32_t id) : status(stat), job_id(id) {}
142 
145 };
146 
169 class Runner {
170  protected:
180  explicit Runner(const std::string& model_path, const std::unordered_map<std::string, std::any>& options = {}) {}
181 
182  public:
186  virtual ~Runner() = default;
187 
201  virtual const std::vector<NpuTensorInfo>& get_tensors_info(TensorDirection direction, TensorType type) const = 0;
202 
218  virtual const NpuTensorInfo& get_tensor_info_by_name(const std::string& tensor_name, TensorType type) const = 0;
219 
232  virtual const QuantParameters& get_quant_parameters(const std::string& tensor_name) const = 0;
233 
242  virtual size_t get_num_input_tensors() const = 0;
243 
252  virtual size_t get_num_output_tensors() const = 0;
253 
263  virtual size_t get_batch_size() const = 0;
264 
286  virtual StatusCode execute(const std::vector<std::vector<NpuTensor>>& inputs,
287  std::vector<std::vector<NpuTensor>>& outputs) noexcept = 0;
288 
313  virtual JobHandle execute_async(const std::vector<std::vector<NpuTensor>>& inputs,
314  std::vector<std::vector<NpuTensor>>& outputs) noexcept = 0;
315 
334  virtual StatusCode wait(const JobHandle& job_handle, std::chrono::milliseconds timeout) noexcept = 0;
335 
347  using ExecuteAsyncCallback = std::function<void(const JobHandle&)>;
348 
378  virtual JobHandle execute_async(const std::vector<std::vector<NpuTensor>>& inputs,
379  std::vector<std::vector<NpuTensor>>& outputs,
380  ExecuteAsyncCallback cb) noexcept = 0;
381 
395  virtual NpuTensor allocate_npu_tensor(const NpuTensorInfo& info) const = 0;
396 
423  virtual NpuTensor allocate_sub_tensor(const NpuTensor& parent, const NpuTensorInfo& info, size_t offset) const = 0;
424 };
425 
436  public:
454  static std::shared_ptr<Runner> create_runner(RunnerType runner_type,
455  const std::string& model_path,
456  const std::unordered_map<std::string, std::any>& options = {});
457 };
458 
459 } // namespace vart
This class represents a tensor in the VART API.
Definition: vart_npu_tensor.hpp:225
Factory class for creating Runner instances.
Definition: vart_runner_factory.hpp:435
static std::shared_ptr< Runner > create_runner(RunnerType runner_type, const std::string &model_path, const std::unordered_map< std::string, std::any > &options={})
Creates and returns a shared pointer to a Runner instance.
Abstract base class for executing model inference operations.
Definition: vart_runner_factory.hpp:169
virtual ~Runner()=default
Destroys the Runner object.
Runner(const std::string &model_path, const std::unordered_map< std::string, std::any > &options={})
Constructs a Runner object with the specified model path and options.
Definition: vart_runner_factory.hpp:180
virtual StatusCode wait(const JobHandle &job_handle, std::chrono::milliseconds timeout) noexcept=0
Waits for the completion of an asynchronous job.
virtual NpuTensor allocate_sub_tensor(const NpuTensor &parent, const NpuTensorInfo &info, size_t offset) const =0
Creates a sub-tensor from a parent tensor with the specified metadata and offset.
virtual const NpuTensorInfo & get_tensor_info_by_name(const std::string &tensor_name, TensorType type) const =0
Unified API to retrieve tensor information by name and tensor type (CPU/HW).
std::function< void(const JobHandle &)> ExecuteAsyncCallback
Type alias for the callback function used in asynchronous execution operations.
Definition: vart_runner_factory.hpp:347
virtual JobHandle execute_async(const std::vector< std::vector< NpuTensor >> &inputs, std::vector< std::vector< NpuTensor >> &outputs, ExecuteAsyncCallback cb) noexcept=0
Executes the operation asynchronously with the given input tensors.
virtual NpuTensor allocate_npu_tensor(const NpuTensorInfo &info) const =0
Allocates memory for an NPU tensor.
virtual const QuantParameters & get_quant_parameters(const std::string &tensor_name) const =0
Retrieves the quantization parameters for a specific tensor.
virtual StatusCode execute(const std::vector< std::vector< NpuTensor >> &inputs, std::vector< std::vector< NpuTensor >> &outputs) noexcept=0
Executes the main computation using the provided input tensors and produces output tensors.
virtual size_t get_num_input_tensors() const =0
Returns the number of input tensors.
virtual const std::vector< NpuTensorInfo > & get_tensors_info(TensorDirection direction, TensorType type) const =0
Unified API to retrieve tensor information based on direction and tensor type (CPU/HW).
virtual size_t get_num_output_tensors() const =0
Returns the number of output tensors.
virtual size_t get_batch_size() const =0
Returns the device batch size.
virtual JobHandle execute_async(const std::vector< std::vector< NpuTensor >> &inputs, std::vector< std::vector< NpuTensor >> &outputs) noexcept=0
Executes the job asynchronously with the given input tensors.
VART (Vitis AI Runtime) ML inference API namespace.
StatusCode
Enumerates the status codes used in the VART.
Definition: vart_runner_factory.hpp:107
@ FAILURE
Operation failed.
@ JOB_PENDING
Job is still pending.
@ RUNTIME_ERROR
Runtime error occurred.
@ INVALID_OUTPUT
Invalid output parameters.
@ INVALID_JOB_ID
Provided job ID is invalid.
@ INVALID_INPUT
Invalid input parameters.
@ OUT_OF_MEMORY
Memory allocation failed.
@ SUCCESS
Operation completed successfully.
@ RESOURCE_UNAVAILABLE
Required resource is unavailable. Transient; retry the operation.
RoundingMode
Enumerates the rounding modes used in quantization.
Definition: vart_runner_factory.hpp:74
@ UNKNOWN
Unknown rounding mode.
@ ROUND_TO_NEAREST_EVEN
Round to nearest even value.
@ ROUND_TOWARD_ZERO
Truncate towards zero (no rounding).
TensorType
Specifies the tensor types supported in the VART API.
Definition: vart_npu_tensor.hpp:146
RunnerType
Enumerates the types of runner implementations supported.
Definition: vart_runner_factory.hpp:63
@ VAIML
VAIML-based runner implementation.
TensorDirection
Enumerates the supported tensor directions in the VART API.
Definition: vart_npu_tensor.hpp:131
Struct representing a job handle for asynchronous execution.
Definition: vart_runner_factory.hpp:132
uint32_t job_id
Definition: vart_runner_factory.hpp:134
StatusCode status
Definition: vart_runner_factory.hpp:133
JobHandle()
Default constructor. Initializes status to FAILURE and job_id to 0 (invalid).
Definition: vart_runner_factory.hpp:144
JobHandle(StatusCode stat, uint32_t id)
Constructs a JobHandle with the given status and job ID.
Definition: vart_runner_factory.hpp:141
Metadata structure describing a tensor used in VART.
Definition: vart_npu_tensor.hpp:195
Struct representing quantization parameters for a tensor.
Definition: vart_runner_factory.hpp:94
double scale
Definition: vart_runner_factory.hpp:95
int32_t zero_point
Definition: vart_runner_factory.hpp:96
RoundingMode rounding_mode
Definition: vart_runner_factory.hpp:97
Tensor data types, memory descriptors, and the NpuTensor handle for VART ML.