This thread has been locked.

If you have a related question, please click the "Ask a related question" button in the top right corner. The newly created question will be automatically linked to this question.

AM68A: System Halt Issue with 16-bit Grayscale Input in Deep Learning Model on QNX and TIDL

Part Number: AM68A

Tool/software:

My Environment

  • QNX 7.1 & RTOS 9.2
  • J721S2
  • TIDL 9.2

Problem

  • I’m encountering an issue with a deep learning model that takes 16-bit grayscale input.
  • The model does not work as expected, as shown in the code below.
  • Models with 8-bit grayscale and 16-bit RGB inputs work fine, but the 16-bit grayscale model does not.

Symptoms

  • No error logs are generated.
  • The system halts or freezes.

Error Occurrence

  • The system runs without issues until a value is provided to the network input tensor.
  • The system stops during the vxProcessGraph function call.
  • The printf("Optimized conversion time: %.3f ms\n", optimized_time); is executed, but nothing is printed after the vxProcessGraph function.

#define KERNEL_ID_CUSTOM_IMAGE_TO_TENSOR_GREY 123459
#define KERNEL_ID_CUSTOM_IMAGE_TO_TENSOR_16bit 123460
#define KERNEL_ID_CUSTOM_IMAGE_TO_TENSOR_16bit_grey 123461

////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
vx_status VX_CALLBACK customImageToTensorValidator_16bit_grey(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) {

    if (num != 2) return VX_ERROR_INVALID_PARAMETERS;
    vx_image input = (vx_image)parameters[0];

    vx_df_image df;
    vxQueryImage(input, VX_IMAGE_FORMAT, &df, sizeof(df));
    if (df != VX_DF_IMAGE_U8) return VX_ERROR_INVALID_FORMAT;

    return VX_SUCCESS;
}

vx_status VX_CALLBACK customImageToTensorInitialize_16bit_grey(vx_node node, const vx_reference *parameters, vx_uint32 num) {
    return VX_SUCCESS;
}

vx_status VX_CALLBACK customImageToTensorDeinitialize_16bit_grey(vx_node node, const vx_reference *parameters, vx_uint32 num) {
    return VX_SUCCESS;
}

vx_status VX_CALLBACK customImageToTensorKernel_16bit_grey(vx_node node, const vx_reference *parameters, vx_uint32 num) {
    vx_image input = (vx_image)parameters[0];
    vx_tensor output = (vx_tensor)parameters[1];
    vx_uint32 rect_end_x, rect_end_y;
    
    vxQueryImage(input, VX_IMAGE_WIDTH, &rect_end_x, sizeof(vx_uint32));
    vxQueryImage(input, VX_IMAGE_HEIGHT, &rect_end_y, sizeof(vx_uint32));

    vx_rectangle_t rect;
    rect.start_x = 0;
    rect.start_y = 0;
    rect.end_x = rect_end_x;
    rect.end_y = rect_end_y;

    vx_imagepatch_addressing_t addr;
    vx_map_id map_id;
    vx_uint8* src_ptr;

    vxMapImagePatch(input, &rect, 0, &map_id, &addr, (void**)&src_ptr, VX_READ_ONLY, VX_MEMORY_TYPE_HOST, 0);

    vx_size stride[4];
    vx_map_id map_id_tensor;
    vx_uint16* tensor_ptr;  


    tivxMapTensorPatch(output, 3, NULL, NULL, &map_id_tensor, stride, (void**)&tensor_ptr, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST);

    vx_uint64 start_time = tivxPlatformGetTimeInUsecs();
    printf("    [DIS_DL] [LM] Image to customImageToTensorKernel_16bit_grey\n");
    printf("stride : %ld, %ld, %ld %ld \n",stride[0],stride[1],stride[2],stride[3]);
    printf("addr.stride_x : %d, addr.stride_y : %d\n",addr.stride_x,addr.stride_y);
    
    for (vx_size y = 0; y < rect.end_y; y++) {
        for (vx_size x = 0; x < rect.end_x; x++) {
            vx_uint8* src_pixel_addr = src_ptr + y * addr.stride_y + x * addr.stride_x;
            vx_uint8 src_pixel_value = *src_pixel_addr;

            vx_uint16* tensor_element = (vx_uint16*)((vx_uint8*)tensor_ptr + y * stride[1] + x * stride[0]);

            *tensor_element = src_pixel_value;
        }
    }

    vx_uint64 end_time = tivxPlatformGetTimeInUsecs();

    vxUnmapImagePatch(input, map_id);
    tivxUnmapTensorPatch(output, map_id_tensor);

    float optimized_time = (end_time - start_time) / 1000.0f; 
    printf("Optimized conversion time: %.3f ms\n", optimized_time);

    return VX_SUCCESS;
}

void registerImage2TensorKernel_16bit_grey(vx_context context) {
    vx_kernel kernel = vxAddUserKernel(context,
                                       "custom.image_to_tensor_16bit_grey",
                                       KERNEL_ID_CUSTOM_IMAGE_TO_TENSOR_16bit_grey,
                                       customImageToTensorKernel_16bit_grey,
                                       2,
                                       customImageToTensorValidator_16bit_grey,
                                       customImageToTensorInitialize_16bit_grey,
                                       customImageToTensorDeinitialize_16bit_grey);
    vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED);
    vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED);
    vxFinalizeKernel(kernel);
}

vx_node vxCustomImage2TensorNode_16bit_grey(vx_graph graph, vx_image input, vx_tensor output) {
    vx_context context = vxGetContext((vx_reference)graph);
    vx_kernel kernel = vxGetKernelByEnum(context, KERNEL_ID_CUSTOM_IMAGE_TO_TENSOR_16bit_grey);
    vx_node node = vxCreateGenericNode(graph, kernel);

    printf("status : %d\n",status);
    vxSetParameterByIndex(node, 0, (vx_reference)input);
    vxSetParameterByIndex(node, 1, (vx_reference)output);
    return node;
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

  • Hi,

    Do you see the hang during the layer execution on C7x?

    Can you enable the traceLogLevel = 2 in the TIDL node and enable the remote core logs by running "./vision_apps_init.sh" before running the application and get more logs from the Algo.

    This would help us identify the location of hang

    Regards,

    Nikhil