We have a visual application like the one below, and there are five in the app graph. The InputNode reads the yuv image from the disk as the input of the Graph, the LDCNode completes operations such as crop, resize and de-distortion, the ColorConvertNode completes the color conversion and the conversion to the model input vx_tensor, and two TIDL nodes respectively load the lane segmentation model and the obstacle detection model completes the inference on the input image.
When we ran the application, we found that the graph executed the first frame normally, but the second frame was stuck in the TIDLNode and could not continue to execute.
At this time, we went to check the log of C7x and found that there were many errors in the figure below.
To locate this problem, we tried adding some debugging code to tiovx/kernels_j7/tidl/dsp/vx_tidl_target.c.
First, we define two global static tivxTIDLObj type pointers to hold the tidlObj in the two TIDLNode.
static tivxTIDLObj *tidl_obj_mark_ptr1 = NULL;
static tivxTIDLObj *tidl_obj_mark_ptr2 = NULL;
tidlObj = tivxMemAlloc(sizeof(tivxTIDLObj), (vx_enum)TIVX_MEM_EXTERNAL);
if (tidl_obj_mark_ptr1 == NULL) {
tidl_obj_mark_ptr1 = tidlObj;
} else {
tidl_obj_mark_ptr2 = tidlObj;
}
Then, we print some properties of tidlObj in tivxKernelTIDLProcess. It is worth noting that we first print the tidlObj property of the currently executed TIDLNode instance, but the tidlObj property of another TIDLNode instance that is not currently executed is printed later. By the way, the prints like '###111' are there to differentiate the code in different stages of tivxKernelTIDLProcess.
printf("### [%s:%d] tidlObj address: %p tidlObj->inBufs.numBufs: %d tidlObj->outBufs.numBufs: %d\n", __func__, __LINE__, tidlObj, tidlObj->inBufs.numBufs, tidlObj->outBufs.numBufs);
if (tidlObj == tidl_obj_mark_ptr1) {
printf("###111 [%s:%d] tidl_obj_mark_ptr2 address: %p tidl_obj_mark_ptr2->inBufs.numBufs: %d tidl_obj_mark_ptr2->outBufs.numBufs: %d\n", __func__, __LINE__, tidl_obj_mark_ptr2, tidl_obj_mark_ptr2->inBufs.numBufs, tidl_obj_mark_ptr2->outBufs.numBufs);
} else {
printf("###111 [%s:%d] tidl_obj_mark_ptr1 address: %p tidl_obj_mark_ptr1->inBufs.numBufs: %d tidl_obj_mark_ptr1->outBufs.numBufs: %d\n", __func__, __LINE__, tidl_obj_mark_ptr1, tidl_obj_mark_ptr1->inBufs.numBufs, tidl_obj_mark_ptr1->outBufs.numBufs);
}
for(id = 0; id < tidlObj->inBufs.numBufs; id++) {
inTensor = (tivx_obj_desc_tensor_t *)obj_desc[in_tensor_idx + id];
printf("### [%s:%d] id: %d target_ptr: %llu mem_size: %d\n", __func__, __LINE__, id, inTensor->mem_ptr.shared_ptr, inTensor->mem_size);
in_tensor_target_ptr = tivxMemShared2TargetPtr(&inTensor->mem_ptr);
tivxCheckStatus(&status, tivxMemBufferMap(in_tensor_target_ptr, inTensor->mem_size, (vx_enum)VX_MEMORY_TYPE_HOST, (vx_enum)VX_READ_ONLY));
tidlObj->inBufDesc[id].bufPlanes[0].buf = in_tensor_target_ptr;
}
for(id = 0; id < tidlObj->outBufs.numBufs; id++) {
outTensor = (tivx_obj_desc_tensor_t *)obj_desc[out_tensor_idx + id];
out_tensor_target_ptr = tivxMemShared2TargetPtr(&outTensor->mem_ptr);
tivxCheckStatus(&status, tivxMemBufferMap(out_tensor_target_ptr, outTensor->mem_size, (vx_enum)VX_MEMORY_TYPE_HOST, (vx_enum)VX_WRITE_ONLY));
tidlObj->outBufDesc[id].bufPlanes[0].buf = out_tensor_target_ptr;
}
if (tidlObj == tidl_obj_mark_ptr1) {
printf("###222 [%s:%d] tidl_obj_mark_ptr2 address: %p tidl_obj_mark_ptr2->inBufs.numBufs: %d tidl_obj_mark_ptr2->outBufs.numBufs: %d\n", __func__, __LINE__, tidl_obj_mark_ptr2, tidl_obj_mark_ptr2->inBufs.numBufs, tidl_obj_mark_ptr2->outBufs.numBufs);
} else {
printf("###222 [%s:%d] tidl_obj_mark_ptr1 address: %p tidl_obj_mark_ptr1->inBufs.numBufs: %d tidl_obj_mark_ptr1->outBufs.numBufs: %d\n", __func__, __LINE__, tidl_obj_mark_ptr1, tidl_obj_mark_ptr1->inBufs.numBufs, tidl_obj_mark_ptr1->outBufs.numBufs);
}
status = tivxAlgiVisionProcess
(
tidlObj->algHandle,
&tidlObj->inBufs,
&tidlObj->outBufs,
(IVISION_InArgs *)tidlObj->inArgs,
(IVISION_OutArgs *)tidlObj->outArgs,
tidlObj->tidlParams.optimize_ivision_activation
);
if (tidlObj == tidl_obj_mark_ptr1) {
printf("###333 [%s:%d] tidl_obj_mark_ptr2 address: %p tidl_obj_mark_ptr2->inBufs.numBufs: %d tidl_obj_mark_ptr2->outBufs.numBufs: %d\n", __func__, __LINE__, tidl_obj_mark_ptr2, tidl_obj_mark_ptr2->inBufs.numBufs, tidl_obj_mark_ptr2->outBufs.numBufs);
} else {
printf("###333 [%s:%d] tidl_obj_mark_ptr1 address: %p tidl_obj_mark_ptr1->inBufs.numBufs: %d tidl_obj_mark_ptr1->outBufs.numBufs: %d\n", __func__, __LINE__, tidl_obj_mark_ptr1, tidl_obj_mark_ptr1->inBufs.numBufs, tidl_obj_mark_ptr1->outBufs.numBufs);
}
tivxCheckStatus(&status, tivxMemBufferUnmap(in_args_target_ptr, inArgs->mem_size, (vx_enum)VX_MEMORY_TYPE_HOST, (vx_enum)VX_READ_ONLY));
tivxCheckStatus(&status, tivxMemBufferUnmap(out_args_target_ptr, outArgs->mem_size, (vx_enum)VX_MEMORY_TYPE_HOST, (vx_enum)VX_WRITE_ONLY));
if((tidlObj->createParams.traceWriteLevel > 0) && (traceData != NULL))
{
tivxTIDLTraceWriteEOB(&tidlObj->mgr);
tivxCheckStatus(&status, tivxMemBufferUnmap(trace_data_target_ptr, traceData->mem_size, VX_MEMORY_TYPE_HOST, VX_WRITE_ONLY));
}
if (tidlObj == tidl_obj_mark_ptr1) {
printf("###444 [%s:%d] tidl_obj_mark_ptr2 address: %p tidl_obj_mark_ptr2->inBufs.numBufs: %d tidl_obj_mark_ptr2->outBufs.numBufs: %d\n", __func__, __LINE__, tidl_obj_mark_ptr2, tidl_obj_mark_ptr2->inBufs.numBufs, tidl_obj_mark_ptr2->outBufs.numBufs);
} else {
printf("###444 [%s:%d] tidl_obj_mark_ptr1 address: %p tidl_obj_mark_ptr1->inBufs.numBufs: %d tidl_obj_mark_ptr1->outBufs.numBufs: %d\n", __func__, __LINE__, tidl_obj_mark_ptr1, tidl_obj_mark_ptr1->inBufs.numBufs, tidl_obj_mark_ptr1->outBufs.numBufs);
}
for(id = 0; id < tidlObj->inBufs.numBufs; id++) {
inTensor = (tivx_obj_desc_tensor_t *)obj_desc[in_tensor_idx + id];
in_tensor_target_ptr = tivxMemShared2TargetPtr(&inTensor->mem_ptr);
tivxCheckStatus(&status, tivxMemBufferUnmap(in_tensor_target_ptr, inTensor->mem_size, (vx_enum)VX_MEMORY_TYPE_HOST, (vx_enum)VX_READ_ONLY));
}
if (tidlObj == tidl_obj_mark_ptr1) {
printf("###555 [%s:%d] tidl_obj_mark_ptr2 address: %p tidl_obj_mark_ptr2->inBufs.numBufs: %d tidl_obj_mark_ptr2->outBufs.numBufs: %d\n", __func__, __LINE__, tidl_obj_mark_ptr2, tidl_obj_mark_ptr2->inBufs.numBufs, tidl_obj_mark_ptr2->outBufs.numBufs);
} else {
printf("###555 [%s:%d] tidl_obj_mark_ptr1 address: %p tidl_obj_mark_ptr1->inBufs.numBufs: %d tidl_obj_mark_ptr1->outBufs.numBufs: %d\n", __func__, __LINE__, tidl_obj_mark_ptr1, tidl_obj_mark_ptr1->inBufs.numBufs, tidl_obj_mark_ptr1->outBufs.numBufs);
}
for(id = 0; id < tidlObj->outBufs.numBufs; id++) {
outTensor = (tivx_obj_desc_tensor_t *)obj_desc[out_tensor_idx + id];
out_tensor_target_ptr = tivxMemShared2TargetPtr(&outTensor->mem_ptr);
tivxCheckStatus(&status, tivxMemBufferUnmap(out_tensor_target_ptr, outTensor->mem_size, (vx_enum)VX_MEMORY_TYPE_HOST, (vx_enum)VX_WRITE_ONLY));
}
// add by nullmax
if (tidlObj == tidl_obj_mark_ptr1) {
printf("###666 [%s:%d] tidl_obj_mark_ptr2 address: %p tidl_obj_mark_ptr2->inBufs.numBufs: %d tidl_obj_mark_ptr2->outBufs.numBufs: %d\n", __func__, __LINE__, tidl_obj_mark_ptr2, tidl_obj_mark_ptr2->inBufs.numBufs, tidl_obj_mark_ptr2->outBufs.numBufs);
} else {
printf("###666 [%s:%d] tidl_obj_mark_ptr1 address: %p tidl_obj_mark_ptr1->inBufs.numBufs: %d tidl_obj_mark_ptr1->outBufs.numBufs: %d\n", __func__, __LINE__, tidl_obj_mark_ptr1, tidl_obj_mark_ptr1->inBufs.numBufs, tidl_obj_mark_ptr1->outBufs.numBufs);
}
}
Next, we ran the vision application and got the output on the C7x in the image below.
We can see that the detection model is executed in the blue box, and its input number is 1 and the output number is 18, which is consistent with our model. During the execution of the detection model, the input number of another lane model printed out is 1, and the output number is 6, which is also consistent with our model.
Let's look at the situation in the red box again. The lane model is executed. The first print is that its input is 1 and the input is 6, which is normal. The ‘###111’ and ‘###222’ tidlObj properties of the detection models are also normal, but from ###333 the number of inputs and outputs suddenly becomes a very strange value.
So when the detection model is executed next time, it will traverse 436861197 inputs, but we only passed in one input_tensor, which caused the error of tivxMemBufferMap later.
The function called between '###222' and '###333' printing is tivxAlgiVisionProcess. We suspect that memory corruption occurred in the middle, and we need TI to help locate.
Four additional points:
1. We modified the graph to run only one TIDLNode behind the colorconvertNode, which can run normally whether it is the detection model or the lane model.
2. SDK currently used: ti-processor-sdk-rtos-j721e-evm-08_02_00_05
3. Our same set of application code can run normally on both 7.0 and 7.1 SDK;
4. We confirmed that the input of TIDLNode - input_tensor is correct;
Thanks, I would appreciate it if you could give a prompt reply.