/**
* Step1: get model handle
* Step2: setinput data to nv12
* Step3: prepare roi mem
* Step4: prepare input and output tensor
* Step5: run inference
* Step6: do postprocess with output data for every task
* Step7: release resources
*/
#include <fstream>
#include <cstring>
#include <iostream>
#include <map>
#include <vector>
#include "hobot/dnn/hb_dnn.h"
#include "hobot/hb_ucp.h"
#include "hobot/hb_ucp_sys.h"
const char *model_file = "model.hbm";
std::string data_y_path = "ILSVRC2012_val_00000001_y.bin";
std::string data_uv_path = "ILSVRC2012_val_00000001_uv.bin";
typedef struct Roi {
int32_t left;
int32_t top;
int32_t right;
int32_t bottom;
} Roi;
int read_image_2_nv12(std::string &y_path, std::string &uv_path,
std::vector<hbUCPSysMem> &image_mem, int &input_h,
int &input_w);
int prepare_roi_mem(const std::vector<Roi> &rois,
std::vector<hbUCPSysMem> &roi_mem);
int prepare_image_tensor(const std::vector<hbUCPSysMem> &image_mem, int input_h,
int input_w, hbDNNHandle_t dnn_handle,
std::vector<hbDNNTensor> &input_tensor);
int read_binary_file(std::string file_path, char **bin, int *length);
/**
* prepare roi tensor
* @param[in] roi_mem: roi mem info
* @param[in] dnn_handle: dnn handle
* @param[in] roi_tensor_id: tensor id of roi input in model
* @param[out] roi_tensor: roi tensor
*/
int prepare_roi_tensor(const hbUCPSysMem *roi_mem, hbDNNHandle_t dnn_handle,
int32_t roi_tensor_id, hbDNNTensor *roi_tensor);
/**
* prepare out tensor
* @param[in] dnn_handle: dnn handle
* @param[out] output: output tensor
*/
int prepare_output_tensor(hbDNNHandle_t dnn_handle,
std::vector<hbDNNTensor> &output);
int main(int argc, char **argv) {
// load model
hbDNNPackedHandle_t packed_dnn_handle;
hbDNNHandle_t dnn_handle;
const char **model_name_list;
int model_count = 0;
// Step1: get model handle
hbDNNInitializeFromFiles(&packed_dnn_handle, &model_file, 1);
hbDNNGetModelNameList(&model_name_list, &model_count, packed_dnn_handle);
hbDNNGetModelHandle(&dnn_handle, packed_dnn_handle, model_name_list[0]);
// Step2: set input data to nv12
// In the sample, since the input is a same image, can allocate a memory for
// reusing. image_mems is to save image data for y and uv.
std::vector<hbUCPSysMem> image_mems(2);
// image input size
int input_h = 224;
int input_w = 224;
read_image_2_nv12(data_y_path, data_uv_path, image_mems, input_h, input_w);
// Step3: prepare roi mem
/**
* Suppose to infer 2 roi tasks of data, the number of ROIs to be prepared is
* also 2.
*/
// left = 0, top = 0 right = 223, bottom = 223
Roi roi_1 = {0, 0, 223, 223};
// left = 1, top = 1, right = 223, bottom = 223
Roi roi_2 = {1, 1, 223, 223};
std::vector<Roi> rois;
rois.push_back(roi_1);
rois.push_back(roi_2);
int roi_num = 2;
std::vector<hbUCPSysMem> roi_mems(2);
prepare_roi_mem(rois, roi_mems);
// Step4: prepare input and output tensor
std::vector<std::vector<hbDNNTensor>> input_tensors(roi_num);
std::vector<std::vector<hbDNNTensor>> output_tensors(roi_num);
for (int i = 0; i < roi_num; ++i) {
// prepare input tensor
int input_count = 0;
hbDNNGetInputCount(&input_count, dnn_handle);
input_tensors[i].resize(input_count);
// prepare image tensor
/** Tips:
* In the sample, all tasks use the same image, so allocate memory to
* save image. all input tensor can reuse the memory. if your model has
* different input image, please allocate different memory for all inputs.
* */
prepare_image_tensor(image_mems, input_h, input_w,
dnn_handle, input_tensors[i]);
auto roi_tensor_id = 2;
prepare_roi_tensor(&roi_mems[i], dnn_handle, roi_tensor_id,
&input_tensors[i][roi_tensor_id]);
// prepare output tensor
int output_count = 0;
hbDNNGetOutputCount(&output_count, dnn_handle);
output_tensors[i].resize(output_count);
prepare_output_tensor(dnn_handle, output_tensors[i]);
}
// Step5: run inference
hbUCPTaskHandle_t task_handle{nullptr};
/** Tips:
* In the sample, submit multiple tasks at the same time
* when taskHandle is nullptr, here create a new task,and
* when taskHandle is created but not submitted yet, attach new task to the previous which represents multi model task
* */
for (int i = 0; i < roi_num; ++i) {
hbDNNInferV2(&task_handle, output_tensors[i].data(),
input_tensors[i].data(), dnn_handle);
}
// submit multi tasks
hbUCPSchedParam infer_ctrl_param;
HB_UCP_INITIALIZE_SCHED_PARAM(&infer_ctrl_param);
hbUCPSubmitTask(task_handle, &infer_ctrl_param);
// wait task done
hbUCPWaitTaskDone(task_handle, 0);
// Step6: do postprocess with output data for every task
// Find the max score and corresponding label
for (auto roi_idx = 0; roi_idx < roi_num; roi_idx++) {
auto result = reinterpret_cast<float *>(output_tensors[roi_idx][0].sysMem.virAddr);
float max_score = 0.0;
int label = -1;
for (auto i = 0; i < 1000; i++) {
float score = result[i];
if (score > max_score) {
label = i;
max_score = score;
}
}
std::cout << "label: " << label << std::endl;
}
// Step7: release resources
// release task handle
hbUCPReleaseTask(task_handle);
// free input mem
for (auto &mem : image_mems) {
hbUCPFree(&mem);
}
for (auto &mem : roi_mems) {
hbUCPFree(&mem);
}
// free output mem
for (auto &tensors : output_tensors) {
for (auto &tensor : tensors) {
hbUCPFree(&(tensor.sysMem));
}
}
// release model
hbDNNRelease(packed_dnn_handle);
return 0;
}
#define ALIGN(value, alignment) (((value) + ((alignment)-1)) & ~((alignment)-1))
#define ALIGN_32(value) ALIGN(value, 32)
int prepare_image_tensor(const std::vector<hbUCPSysMem> &image_mem, int input_h,
int input_w, hbDNNHandle_t dnn_handle,
std::vector<hbDNNTensor> &input_tensor) {
// y and uv tensor
for (int i = 0; i < 2; i++) {
hbDNNGetInputTensorProperties(&input_tensor[i].properties, dnn_handle, i);
input_tensor[i].sysMem = image_mem[i];
/** Tips:
* roi model should modify input valid shape to input image shape.
* here the struct of y/uv shape is NHWC
* */
input_tensor[i].properties.validShape.dimensionSize[1] = input_h;
input_tensor[i].properties.validShape.dimensionSize[2] = input_w;
if (i == 1) {
// uv input
input_tensor[i].properties.validShape.dimensionSize[1] /= 2;
input_tensor[i].properties.validShape.dimensionSize[2] /= 2;
}
/** Tips:
* For input tensor, stride should be set according to real padding
* of the user's data. And 32 bytes alignment is the requirement of y/uv
**/
input_tensor[i].properties.stride[1] =
ALIGN_32(input_tensor[i].properties.stride[2] *
input_tensor[i].properties.validShape.dimensionSize[2]);
input_tensor[i].properties.stride[0] =
input_tensor[i].properties.stride[1] *
input_tensor[i].properties.validShape.dimensionSize[1];
}
return 0;
}
int prepare_roi_tensor(const hbUCPSysMem *roi_mem, hbDNNHandle_t dnn_handle,
int32_t roi_tensor_id, hbDNNTensor *roi_tensor) {
hbDNNGetInputTensorProperties(&roi_tensor->properties, dnn_handle, roi_tensor_id);
roi_tensor->sysMem = *roi_mem;
return 0;
}
int prepare_output_tensor(hbDNNHandle_t dnn_handle,
std::vector<hbDNNTensor> &output) {
for (size_t i = 0; i < output.size(); i++) {
hbDNNGetOutputTensorProperties(&output[i].properties, dnn_handle, i);
hbUCPMallocCached(&output[i].sysMem, output[i].properties.alignedByteSize, 0);
}
return 0;
}
int read_binary_file(std::string file_path, char **bin, int *length) {
std::ifstream ifs(file_path, std::ios::in | std::ios::binary);
ifs.seekg(0, std::ios::end);
*length = ifs.tellg();
ifs.seekg(0, std::ios::beg);
*bin = new char[sizeof(char) * (*length)];
ifs.read(*bin, *length);
ifs.close();
return 0;
}
/** You can define read_image_2_other_type to prepare your data **/
int read_image_2_nv12(std::string &y_path, std::string &uv_path,
std::vector<hbUCPSysMem> &image_mem, int &input_h,
int &input_w) {
// copy y data
auto w_stride = ALIGN_32(input_w);
int32_t y_mem_size = input_h * w_stride;
hbUCPMallocCached(&image_mem[0], y_mem_size, 0);
uint8_t *y_data_dst = reinterpret_cast<uint8_t *>(image_mem[0].virAddr);
int32_t y_data_length = 0;
char *y_data = nullptr;
read_binary_file(y_path, &y_data, &y_data_length);
memcpy(reinterpret_cast<char *>(image_mem[0].virAddr), y_data, y_mem_size);
// copy uv data
int32_t uv_height = input_h / 2;
int32_t uv_width = input_w / 2;
int32_t uv_mem_size = uv_height * w_stride;
hbUCPMallocCached(&image_mem[1], uv_mem_size, 0);
int32_t uv_data_length = 0;
char *uv_data = nullptr;
read_binary_file(uv_path, &uv_data, &uv_data_length);
memcpy(reinterpret_cast<char *>(image_mem[1].virAddr), uv_data, uv_mem_size);
// make sure cahced mem data is flushed to DDR before inference
hbUCPMemFlush(&image_mem[0], HB_SYS_MEM_CACHE_CLEAN);
hbUCPMemFlush(&image_mem[1], HB_SYS_MEM_CACHE_CLEAN);
free(y_data);
free(uv_data);
return 0;
}
int prepare_roi_mem(const std::vector<Roi> &rois,
std::vector<hbUCPSysMem> &roi_mem) {
auto roi_size = rois.size();
roi_mem.resize(roi_size);
for (auto i = 0; i < roi_size; ++i) {
int32_t mem_size = 4 * sizeof(int32_t);
hbUCPMallocCached(&roi_mem[i], mem_size, 0);
int32_t *roi_data = reinterpret_cast<int32_t *>(roi_mem[i].virAddr);
// The order of filling in the corner points of roi tensor is left, top, right, bottom
roi_data[0] = rois[i].left;
roi_data[1] = rois[i].top;
roi_data[2] = rois[i].right;
roi_data[3] = rois[i].bottom;
// make sure cahced mem data is flushed to DDR before inference
hbUCPMemFlush(&roi_mem[i], HB_SYS_MEM_CACHE_CLEAN);
}
return 0;
}