42 #include <unordered_map>
180 explicit Runner(
const std::string& model_path,
const std::unordered_map<std::string, std::any>& options = {}) {}
287 std::vector<std::vector<NpuTensor>>& outputs) noexcept = 0;
314 std::vector<std::vector<NpuTensor>>& outputs) noexcept = 0;
379 std::vector<std::vector<NpuTensor>>& outputs,
455 const std::string& model_path,
456 const std::unordered_map<std::string, std::any>& options = {});
This class represents a tensor in the VART API.
Definition: vart_npu_tensor.hpp:225
Factory class for creating Runner instances.
Definition: vart_runner_factory.hpp:435
static std::shared_ptr< Runner > create_runner(RunnerType runner_type, const std::string &model_path, const std::unordered_map< std::string, std::any > &options={})
Creates and returns a shared pointer to a Runner instance.
Abstract base class for executing model inference operations.
Definition: vart_runner_factory.hpp:169
virtual ~Runner()=default
Destroys the Runner object.
Runner(const std::string &model_path, const std::unordered_map< std::string, std::any > &options={})
Constructs a Runner object with the specified model path and options.
Definition: vart_runner_factory.hpp:180
virtual StatusCode wait(const JobHandle &job_handle, std::chrono::milliseconds timeout) noexcept=0
Waits for the completion of an asynchronous job.
virtual NpuTensor allocate_sub_tensor(const NpuTensor &parent, const NpuTensorInfo &info, size_t offset) const =0
Creates a sub-tensor from a parent tensor with the specified metadata and offset.
virtual const NpuTensorInfo & get_tensor_info_by_name(const std::string &tensor_name, TensorType type) const =0
Unified API to retrieve tensor information by name and tensor type (CPU/HW).
std::function< void(const JobHandle &)> ExecuteAsyncCallback
Type alias for the callback function used in asynchronous execution operations.
Definition: vart_runner_factory.hpp:347
virtual JobHandle execute_async(const std::vector< std::vector< NpuTensor >> &inputs, std::vector< std::vector< NpuTensor >> &outputs, ExecuteAsyncCallback cb) noexcept=0
Executes the operation asynchronously with the given input tensors.
virtual NpuTensor allocate_npu_tensor(const NpuTensorInfo &info) const =0
Allocates memory for an NPU tensor.
virtual const QuantParameters & get_quant_parameters(const std::string &tensor_name) const =0
Retrieves the quantization parameters for a specific tensor.
virtual StatusCode execute(const std::vector< std::vector< NpuTensor >> &inputs, std::vector< std::vector< NpuTensor >> &outputs) noexcept=0
Executes the main computation using the provided input tensors and produces output tensors.
virtual size_t get_num_input_tensors() const =0
Returns the number of input tensors.
virtual const std::vector< NpuTensorInfo > & get_tensors_info(TensorDirection direction, TensorType type) const =0
Unified API to retrieve tensor information based on direction and tensor type (CPU/HW).
virtual size_t get_num_output_tensors() const =0
Returns the number of output tensors.
virtual size_t get_batch_size() const =0
Returns the device batch size.
virtual JobHandle execute_async(const std::vector< std::vector< NpuTensor >> &inputs, std::vector< std::vector< NpuTensor >> &outputs) noexcept=0
Executes the job asynchronously with the given input tensors.
VART (Vitis AI Runtime) ML inference API namespace.
StatusCode
Enumerates the status codes used in the VART.
Definition: vart_runner_factory.hpp:107
@ FAILURE
Operation failed.
@ JOB_PENDING
Job is still pending.
@ RUNTIME_ERROR
Runtime error occurred.
@ INVALID_OUTPUT
Invalid output parameters.
@ INVALID_JOB_ID
Provided job ID is invalid.
@ INVALID_INPUT
Invalid input parameters.
@ OUT_OF_MEMORY
Memory allocation failed.
@ SUCCESS
Operation completed successfully.
@ RESOURCE_UNAVAILABLE
Required resource is unavailable. Transient; retry the operation.
RoundingMode
Enumerates the rounding modes used in quantization.
Definition: vart_runner_factory.hpp:74
@ UNKNOWN
Unknown rounding mode.
@ ROUND_TO_NEAREST_EVEN
Round to nearest even value.
@ ROUND_TOWARD_ZERO
Truncate towards zero (no rounding).
TensorType
Specifies the tensor types supported in the VART API.
Definition: vart_npu_tensor.hpp:146
RunnerType
Enumerates the types of runner implementations supported.
Definition: vart_runner_factory.hpp:63
@ VAIML
VAIML-based runner implementation.
TensorDirection
Enumerates the supported tensor directions in the VART API.
Definition: vart_npu_tensor.hpp:131
Struct representing a job handle for asynchronous execution.
Definition: vart_runner_factory.hpp:132
uint32_t job_id
Definition: vart_runner_factory.hpp:134
StatusCode status
Definition: vart_runner_factory.hpp:133
JobHandle()
Default constructor. Initializes status to FAILURE and job_id to 0 (invalid).
Definition: vart_runner_factory.hpp:144
JobHandle(StatusCode stat, uint32_t id)
Constructs a JobHandle with the given status and job ID.
Definition: vart_runner_factory.hpp:141
Metadata structure describing a tensor used in VART.
Definition: vart_npu_tensor.hpp:195
Struct representing quantization parameters for a tensor.
Definition: vart_runner_factory.hpp:94
double scale
Definition: vart_runner_factory.hpp:95
int32_t zero_point
Definition: vart_runner_factory.hpp:96
RoundingMode rounding_mode
Definition: vart_runner_factory.hpp:97
Tensor data types, memory descriptors, and the NpuTensor handle for VART ML.