TDA4AL-Q1: Delay in consuming released reference

Part Number: TDA4AL-Q1

Tool/software:

Hi Nikhil Dasan,

I am experimenting with a custom graph pipeline that contains 4 nodes, schematic diagram is shown.

Here, circles represent nodes and squares represent data reference queues. D1 and D4 are Graph parameters. Others are intermediate data reference queues. The function of the nodes is as follows: Nodes N1, N2 and N3 sleep for 1 sec, 2 sec, 3 sec respectively. Node N4 sleeps for 10 sec. So by the time node N3 has finished execution, node N4 will be sleeping further for 5 seconds. Since the data reference queue D4 is a graph parameter, it will send the reference consumed command to the host once N3 has completed execution, which will signal the event that waits to dequeue the reference from the graph parameter D4.

In my case, even though the reference consumed command is sent from the target side, the reference is not being consumed by the host till node N4 has completed its execution. Can you guide me what could cause this delay in the above case? 

The code for the same is attached below.

#include <stdio.h>
#include <unistd.h>
#include <VX/vx.h>
#include <VX/vx_khr_pipelining.h>
#include <TI/tivx.h>
#include <utility.h>
#include <tivx_openvx_core_kernels.h>


#define IN0_IMG_IDX (0u)
#define SLEEP_PARAM_IDX (1u)
#define OUT0_IMG_IDX (2u)
#define MAX_PARAMS (3u)
#define BUF_SIZE (2u)


static vx_enum kernel_id = (vx_status)VX_ERROR_INVALID_PARAMETERS;
static vx_kernel g_kernel = NULL;


static vx_status VX_CALLBACK kernel_init(vx_node node, vx_reference parameters[], vx_int32 num){
    // printf(" vx_custom_node_1: init SUCCESS ... \n");
    return VX_SUCCESS;
}

static vx_status VX_CALLBACK kernel_run(vx_node node, vx_reference parameters[], vx_uint32 num){
    // vx_image in_image = (vx_image)parameters[IN0_IMG_IDX];
    // vx_image out_image = (vx_image)parameters[OUT0_IMG_IDX];
    vx_scalar sleep_time = (vx_scalar)parameters[SLEEP_PARAM_IDX];
    int t;
    vxCopyScalar(sleep_time, &t, VX_READ_ONLY, VX_MEMORY_TYPE_HOST);
    printf(" \t[ %3.4f ] sleeping for %d seconds \n", tivxPlatformGetTimeInUsecs()/1000000.0, t);
    sleep(t);
    printf(" \t[ %3.4f ] done sleeping for %d seconds \n", tivxPlatformGetTimeInUsecs()/1000000.0, t);
    return VX_SUCCESS;
}

static vx_status VX_CALLBACK kernel_deinit(vx_node node, vx_reference parameters[], vx_uint32 num){
    return VX_SUCCESS;
}

static vx_status VX_CALLBACK kernel_validate(vx_node node,
        const vx_reference parameters[ ],
        vx_uint32 num, vx_meta_format metas[]){
    return VX_SUCCESS;
}

static vx_node get_kernel_node(vx_graph graph, vx_image in, vx_scalar sleep_time, vx_image out){
    vx_node node;
    vx_reference refs[] = {(vx_reference)in, (vx_reference)sleep_time, (vx_reference)out};
    
    node = tivxCreateNodeByKernelEnum(graph,kernel_id,refs,3);
    vxSetReferenceName((vx_reference)node, "NODE");
    return node;
}

static vx_status kernel_create(vx_context context){
    vx_kernel kernel = NULL;
    vx_status status;
    int index=0;

    status = vxAllocateUserKernelId(context, &kernel_id);
    kernel = vxAddUserKernel(
        context,
        "kernel_1",
        kernel_id,
        kernel_run,
        3,
        kernel_validate,
        kernel_init,
        kernel_deinit
    );

    tivxKernelsHostUtilsAddKernelTargetDsp(kernel);
    tivxAddKernelTarget(kernel, TIVX_TARGET_MPU_3);

    // tivxAddKernelTarget(kernel, TIVX_TARGET_DSP1);
    // #ifndef SOC_AM62A
    // tivxAddKernelTarget(kernel, TIVX_TARGET_DSP2);
    // #endif
    // tivxAddKernelTarget(kernel, TIVX_CPU_ID_MPU_0);

    status = vxGetStatus((vx_reference)kernel);

    if(status==VX_SUCCESS){
        status = vxAddParameterToKernel(
            kernel,
            index++,
            (vx_enum)VX_INPUT,
            (vx_enum)VX_TYPE_IMAGE,
            (vx_enum)VX_PARAMETER_STATE_REQUIRED
        );
    }
    printf(" done adding input image parameter \n");
    if(status==VX_SUCCESS){
        status = vxAddParameterToKernel(
            kernel,
            index++,
            (vx_enum)VX_INPUT,
            (vx_enum)VX_TYPE_SCALAR,
            (vx_enum)VX_PARAMETER_STATE_REQUIRED
        );
    }
    printf(" done adding input int parameter \n");
    if(status==VX_SUCCESS){
        status = vxAddParameterToKernel(
            kernel,
            index++,
            (vx_enum)VX_OUTPUT,
            (vx_enum)VX_TYPE_IMAGE,
            (vx_enum)VX_PARAMETER_STATE_REQUIRED
        );
    }
    printf(" done adding output image parameter \n");
    if(status == VX_SUCCESS){
        status = vxFinalizeKernel(kernel);
    }
    if(status != VX_SUCCESS){
        vxReleaseKernel(&kernel);
        kernel = NULL;
    }else{
        g_kernel = kernel;
    }
}

static vx_status kernel_remove(vx_context context){
    vx_status status;
    status = vxRemoveKernel(g_kernel);
    g_kernel = NULL;

    return status;
}


static void add_graph_parameter_by_node_index(vx_graph graph, vx_node node, vx_uint32 node_parameter_index)
{
    vx_parameter parameter = vxGetParameterByIndex(node, node_parameter_index);

    vxAddParameterToGraph(graph, parameter);
    vxReleaseParameter(&parameter);
}

void vx_custom_pipeline_split_data_q(){
    vx_context context;
    vx_graph graph;
    vx_image img1[BUF_SIZE], img2, img3, img4[BUF_SIZE], img5;
    vx_node node_1, node_2, node_3, node_4;
    vx_uint32 width=640, height=480;
    vx_status status;

    vx_uint32 num_buf=BUF_SIZE, pipeline_depth=2, buf_id, loop_id, loop_cnt=2, exe_time;
    vx_graph_parameter_queue_params_t graph_params_list[2];

    printf(" Tutorial started !!! \n");

    context = vxCreateContext();

    status = kernel_create(context);
    printf(" done creating kernel, kernel_id = %d \n", kernel_id);
    
    for(int i=0;i<num_buf;i++){
        img1[i] = vxCreateImage(context, width, height, (vx_df_image)VX_DF_IMAGE_U8);
        img4[i] = vxCreateImage(context, width, height, (vx_df_image)VX_DF_IMAGE_U8);
    }

    img2 = vxCreateImage(context, width, height, (vx_df_image)VX_DF_IMAGE_U8);
    img3 = vxCreateImage(context, width, height, (vx_df_image)VX_DF_IMAGE_U8);
    img5 = vxCreateImage(context, width, height, (vx_df_image)VX_DF_IMAGE_U8);

    printf(" done initializing image variables \n");
    
    graph = vxCreateGraph(context);
    vx_int32 tmp = 1;
    vx_scalar sleep_time_1 = vxCreateScalar(context, VX_TYPE_INT32, &tmp);
    tmp = 2;
    vx_scalar sleep_time_2 = vxCreateScalar(context, VX_TYPE_INT32, &tmp);
    tmp = 3;
    vx_scalar sleep_time_3 = vxCreateScalar(context, VX_TYPE_INT32, &tmp);
    tmp = 10;
    vx_scalar sleep_time_4 = vxCreateScalar(context, VX_TYPE_INT32, &tmp);

    node_1 = get_kernel_node(graph, img1[0], sleep_time_1, img2);
    node_2 = get_kernel_node(graph, img2, sleep_time_2, img3);
    node_3 = get_kernel_node(graph, img3, sleep_time_3, img4[0]);
    node_4 = get_kernel_node(graph, img2, sleep_time_4, img5);

    printf(" node creation done \n");

    vxSetNodeTarget(node_1, (vx_enum)VX_TARGET_STRING, TIVX_TARGET_MPU_3);
    vxSetNodeTarget(node_2, (vx_enum)VX_TARGET_STRING, TIVX_TARGET_MPU_3);
    vxSetNodeTarget(node_3, (vx_enum)VX_TARGET_STRING, TIVX_TARGET_MPU_3);
    vxSetNodeTarget(node_4, (vx_enum)VX_TARGET_STRING, TIVX_TARGET_DSP_C7_1);
    // TIVX_TARGET_DSP_C7_1

    add_graph_parameter_by_node_index(graph, node_1, 0);
    add_graph_parameter_by_node_index(graph, node_3, 2);

    graph_params_list[0].graph_parameter_index = 0;
    graph_params_list[0].refs_list_size = num_buf;
    graph_params_list[0].refs_list = (vx_reference *)&img1[0];

    graph_params_list[1].graph_parameter_index = 1;
    graph_params_list[1].refs_list_size = num_buf;
    graph_params_list[1].refs_list = (vx_reference *)&img4[0];

    vxSetGraphScheduleConfig(
        graph,
        (vx_enum)VX_GRAPH_SCHEDULE_MODE_QUEUE_AUTO,
        2,
        graph_params_list
    );

    tivxSetGraphPipelineDepth(graph, pipeline_depth);
    tivxSetNodeParameterNumBufByIndex(node_1, 2, 1);
    tivxSetNodeParameterNumBufByIndex(node_2, 2, num_buf);
    tivxSetNodeParameterNumBufByIndex(node_4, 2, num_buf);

    status = vxVerifyGraph(graph);

    printf("\n================= Verify graph done =================\n\n");

    exe_time = tivxPlatformGetTimeInUsecs();

    vx_image cur_out_img, cur_in_img;
    uint32_t num_refs;

    /* Running the loop */
    for(buf_id=0;buf_id<BUF_SIZE;buf_id++){
        vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference *)&img1[buf_id], 1);
        vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference *)&img4[buf_id], 1);
    }

    for(loop_id=0;loop_id<loop_cnt;loop_id++){
        vx_image cur_out_img=img4[0], cur_in_img=img1[0];
        uint32_t num_refs;
        vxGraphParameterDequeueDoneRef(graph, 0, (vx_reference *)&cur_in_img, 1, &num_refs);
        vxGraphParameterDequeueDoneRef(graph, 1, (vx_reference *)&cur_out_img, 1, &num_refs);
        // printf(" dequeing done \n");

        // Use the output

        if(loop_id>=loop_cnt-num_buf)continue;
        vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference *)&cur_in_img, 1);
        vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference *)&cur_out_img, 1);
    }

    vxWaitGraph(graph);

    exe_time = tivxPlatformGetTimeInUsecs() - exe_time;
    printf(" execution time in seconds: %d \n", exe_time/1000000);

    for(int i=0;i<num_buf;i++){
        vxReleaseImage(&img1[i]);
        vxReleaseImage(&img4[i]);
    }
    vxReleaseImage(&img2);
    vxReleaseImage(&img3);
    vxReleaseScalar(&sleep_time_1);
    vxReleaseScalar(&sleep_time_2);
    vxReleaseScalar(&sleep_time_3);
    vxReleaseNode(&node_1);
    vxReleaseNode(&node_2);
    vxReleaseNode(&node_3);
    vxReleaseGraph(&graph);
    kernel_remove(context);
    vxReleaseContext(&context);

    printf(" Tutorial completed !!! \n");
}

FYI, I am running the above code in host emulation mode. Additionally, I got to know about the exact time stamps when the reference is released and consumed with the help of logs in the respective function calls.

Kindly explain me the scenario what is happening internally. Thanks in advance!

  • Hello,

    Most of the team members are out of office. Please expect delayed response.

    - Keerthy

  • Hi,

    In my case, even though the reference consumed command is sent from the target side, the reference is not being consumed by the host till node N4 has completed its execution. Can you guide me what could cause this delay in the above case?

    Could you elaborate this? 

    Do you mean, the dequeue from D4 happens after N4 is done? 

    Regards,

    Nikhil Dasan

  • Hi

    Yes you are right, the dequeue from D4 is happening after N4 is done, even though N3 has completed its execution. In an ideal case, I expect the dequeue from D4 to happen right after N3 has completed its execution. 

    Kindly explain why this is not the case.

    Thanks

  • As this is PC emulation mode, all the nodes would be running on some specific tasks right on PC right? Could you help me understand the task priority of each node and also the task priority of the application?

    Regards

    Nikhil

  • Hi

    The targets for the nodes are as follows: nodes N1, N2 and N3 are running on TIVX_TARGET_MPU_3 and the node N4 is running on 

    TIVX_TARGET_DSP_C7_1. The task priority for each node and application is the default priority. 
  • Hi,

      In the kernel_create() add this target TIVX_TARGET_DSP_C7_1 using tivxAddKernelTarget(kernel, TIVX_TARGET_DSP_C7_1);

    After that dequeueing is done after execution of N3.

    I am getting this output on my side, can you share your logs.

    Regards,

    Gokul

  • Hi

    Thank you for the reply.

    This is the output I am getting.

    I am running in PC emulation mode, FYI. Are you using the same?

  • Hi,

     Yes, I am running in PC emulation mode. Can you share your sdk version, host os (ubuntu) version. And I am attaching the code below that I used to get that output.

    #include <stdio.h>
    #include <unistd.h>
    #include <VX/vx.h>
    #include <VX/vx_khr_pipelining.h>
    #include <TI/tivx.h>
    // #include <utility.h>
    #include <tivx_openvx_core_kernels.h>
    
    
    
    #define IN0_IMG_IDX (0u)
    #define SLEEP_PARAM_IDX (1u)
    #define OUT0_IMG_IDX (2u)
    #define MAX_PARAMS (3u)
    #define BUF_SIZE (2u)
    
    
    static vx_enum kernel_id = (vx_status)VX_ERROR_INVALID_PARAMETERS;
    static vx_kernel g_kernel = NULL;
    
    
    static vx_status VX_CALLBACK kernel_init(vx_node node, vx_reference parameters[], vx_int32 num){
        // printf(" vx_custom_node_1: init SUCCESS ... \n");
        return VX_SUCCESS;
    }
    
    static vx_status VX_CALLBACK kernel_run(vx_node node, vx_reference parameters[], vx_uint32 num){
        // vx_image in_image = (vx_image)parameters[IN0_IMG_IDX];
        // vx_image out_image = (vx_image)parameters[OUT0_IMG_IDX];
        vx_scalar sleep_time = (vx_scalar)parameters[SLEEP_PARAM_IDX];
        int t;
        vxCopyScalar(sleep_time, &t, VX_READ_ONLY, VX_MEMORY_TYPE_HOST);
        printf(" \t[ %3.4f ] sleeping for %d seconds \n", tivxPlatformGetTimeInUsecs()/1000000.0, t);
        sleep(t);
        printf(" \t[ %3.4f ] done sleeping for %d seconds \n", tivxPlatformGetTimeInUsecs()/1000000.0, t);
        return VX_SUCCESS;
    }
    
    static vx_status VX_CALLBACK kernel_deinit(vx_node node, vx_reference parameters[], vx_uint32 num){
        return VX_SUCCESS;
    }
    
    static vx_status VX_CALLBACK kernel_validate(vx_node node,
            const vx_reference parameters[ ],
            vx_uint32 num, vx_meta_format metas[]){
        return VX_SUCCESS;
    }
    
    static vx_node get_kernel_node(vx_graph graph, vx_image in, vx_scalar sleep_time, vx_image out){
        vx_node node;
        vx_reference refs[] = {(vx_reference)in, (vx_reference)sleep_time, (vx_reference)out};
        
        node = tivxCreateNodeByKernelEnum(graph,kernel_id,refs,3);
        vxSetReferenceName((vx_reference)node, "NODE");
        return node;
    }
    
    static vx_status kernel_create(vx_context context){
        vx_kernel kernel = NULL;
        vx_status status;
        int index=0;
    
        status = vxAllocateUserKernelId(context, &kernel_id);
        kernel = vxAddUserKernel(
            context,
            "kernel_1",
            kernel_id,
            kernel_run,
            3,
            kernel_validate,
            kernel_init,
            kernel_deinit
        );
        printf("%d\n",status); 
        tivxKernelsHostUtilsAddKernelTargetDsp(kernel);
        tivxAddKernelTarget(kernel, TIVX_TARGET_MPU_3);
    
        tivxAddKernelTarget(kernel, TIVX_TARGET_DSP_C7_1);
        // #ifndef SOC_AM62A
        // tivxAddKernelTarget(kernel, TIVX_TARGET_DSP2);
        // #endif
        // tivxAddKernelTarget(kernel, TIVX_CPU_ID_MPU_0);
    
        status = vxGetStatus((vx_reference)kernel);
        printf("%d\n",status); // -7
        if(status==VX_SUCCESS){
            status = vxAddParameterToKernel(
                kernel,
                index++,
                (vx_enum)VX_INPUT,
                (vx_enum)VX_TYPE_IMAGE,
                (vx_enum)VX_PARAMETER_STATE_REQUIRED
            );
            printf(" done adding input image parameter \n");
        }
        else{
            printf("Error adding user kernel\n");
        }
        
        if(status==VX_SUCCESS){
            status = vxAddParameterToKernel(
                kernel,
                index++,
                (vx_enum)VX_INPUT,
                (vx_enum)VX_TYPE_SCALAR,
                (vx_enum)VX_PARAMETER_STATE_REQUIRED
            );
            printf(" done adding input int parameter \n");
        }
        
        if(status==VX_SUCCESS){
            status = vxAddParameterToKernel(
                kernel,
                index++,
                (vx_enum)VX_OUTPUT,
                (vx_enum)VX_TYPE_IMAGE,
                (vx_enum)VX_PARAMETER_STATE_REQUIRED
            );
            printf(" done adding output image parameter \n");
        }
        
        if(status == VX_SUCCESS){
            status = vxFinalizeKernel(kernel);
        }
        if(status != VX_SUCCESS){
            vxReleaseKernel(&kernel);
            kernel = NULL;
        }else{
            g_kernel = kernel;
        }
    }
    
    static vx_status kernel_remove(vx_context context){
        vx_status status;
        status = vxRemoveKernel(g_kernel);
        g_kernel = NULL;
    
        return status;
    }
    
    
    static void add_graph_parameter_by_node_index(vx_graph graph, vx_node node, vx_uint32 node_parameter_index)
    {
        vx_parameter parameter = vxGetParameterByIndex(node, node_parameter_index);
    
        vxAddParameterToGraph(graph, parameter);
        vxReleaseParameter(&parameter);
    }
    
    void vx_custom_pipeline_split_data_q(){
        vx_context context;
        vx_graph graph;
        vx_image img1[BUF_SIZE], img2, img3, img4[BUF_SIZE], img5;
        vx_node node_1, node_2, node_3, node_4;
        vx_uint32 width=640, height=480;
        vx_status status;
    
        vx_uint32 num_buf=BUF_SIZE, pipeline_depth=2, buf_id, loop_id, loop_cnt=2, exe_time;
        vx_graph_parameter_queue_params_t graph_params_list[2];
    
        printf(" Tutorial started !!! \n");
    
        context = vxCreateContext();
        printf("context:%d\n",vxGetStatus((vx_reference)context));
    
        status = kernel_create(context);
        printf(" done creating kernel, kernel_id = %d \n", kernel_id);
        
        for(int i=0;i<num_buf;i++){
            img1[i] = vxCreateImage(context, width, height, (vx_df_image)VX_DF_IMAGE_U8);
            img4[i] = vxCreateImage(context, width, height, (vx_df_image)VX_DF_IMAGE_U8);
        }
    
        img2 = vxCreateImage(context, width, height, (vx_df_image)VX_DF_IMAGE_U8);
        img3 = vxCreateImage(context, width, height, (vx_df_image)VX_DF_IMAGE_U8);
        img5 = vxCreateImage(context, width, height, (vx_df_image)VX_DF_IMAGE_U8);
    
        printf(" done initializing image variables \n");
        
        graph = vxCreateGraph(context);
        vx_int32 tmp = 1;
        vx_scalar sleep_time_1 = vxCreateScalar(context, VX_TYPE_INT32, &tmp);
        tmp = 2;
        vx_scalar sleep_time_2 = vxCreateScalar(context, VX_TYPE_INT32, &tmp);
        tmp = 3;
        vx_scalar sleep_time_3 = vxCreateScalar(context, VX_TYPE_INT32, &tmp);
        tmp = 10;
        vx_scalar sleep_time_4 = vxCreateScalar(context, VX_TYPE_INT32, &tmp);
    
        node_1 = get_kernel_node(graph, img1[0], sleep_time_1, img2);
        node_2 = get_kernel_node(graph, img2, sleep_time_2, img3);
        node_3 = get_kernel_node(graph, img3, sleep_time_3, img4[0]);
        node_4 = get_kernel_node(graph, img2, sleep_time_4, img5);
    
        printf(" node creation done \n");
    
        vxSetNodeTarget(node_1, (vx_enum)VX_TARGET_STRING, TIVX_TARGET_MPU_3);
        printf(" target 1 \n");
        vxSetNodeTarget(node_2, (vx_enum)VX_TARGET_STRING, TIVX_TARGET_MPU_3);
        printf(" target 2 \n");
        vxSetNodeTarget(node_3, (vx_enum)VX_TARGET_STRING, TIVX_TARGET_MPU_3);
        printf(" target 3 \n");
        vxSetNodeTarget(node_4, (vx_enum)VX_TARGET_STRING, TIVX_TARGET_DSP_C7_1);
        // TIVX_TARGET_MPU_3
        printf(" target 4 \n");
    
    
        add_graph_parameter_by_node_index(graph, node_1, 0);
        add_graph_parameter_by_node_index(graph, node_3, 2);
    
        graph_params_list[0].graph_parameter_index = 0;
        graph_params_list[0].refs_list_size = num_buf;
        graph_params_list[0].refs_list = (vx_reference *)&img1[0];
    
        graph_params_list[1].graph_parameter_index = 1;
        graph_params_list[1].refs_list_size = num_buf;
        graph_params_list[1].refs_list = (vx_reference *)&img4[0];
    
        vxSetGraphScheduleConfig(
            graph,
            (vx_enum)VX_GRAPH_SCHEDULE_MODE_QUEUE_AUTO,
            2,
            graph_params_list
        );
    
        tivxSetGraphPipelineDepth(graph, pipeline_depth);
        tivxSetNodeParameterNumBufByIndex(node_1, 2, 1);
        tivxSetNodeParameterNumBufByIndex(node_2, 2, num_buf);
        tivxSetNodeParameterNumBufByIndex(node_4, 2, num_buf);
    
        status = vxVerifyGraph(graph);
        sleep(5);
    
        printf("\n================= Verify graph done =================\n\n");
    
        exe_time = tivxPlatformGetTimeInUsecs();
    
        vx_image cur_out_img, cur_in_img;
        uint32_t num_refs;
    
        /* Running the loop */
        for(buf_id=0;buf_id<BUF_SIZE;buf_id++){
            vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference *)&img1[buf_id], 1);
            vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference *)&img4[buf_id], 1);
        }
        // printf("enqueued 2 buffers success\n");
        for(loop_id=0;loop_id<loop_cnt;loop_id++){
            vx_image cur_out_img=img4[0], cur_in_img=img1[0];
            uint32_t num_refs;
            // printf("dequeuing \n");
            vxGraphParameterDequeueDoneRef(graph, 0, (vx_reference *)&cur_in_img, 1, &num_refs);
            vxGraphParameterDequeueDoneRef(graph, 1, (vx_reference *)&cur_out_img, 1, &num_refs);
    
            printf(" dequeing done \n");
    
            // Use the output
            // printf("enqueueing \n");
            if(loop_id>=loop_cnt-num_buf)continue;
            // printf("enqueueing1 \n");
            vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference *)&cur_in_img, 1);
            vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference *)&cur_out_img, 1);
        }
    
        vxWaitGraph(graph);
    
        exe_time = tivxPlatformGetTimeInUsecs() - exe_time;
        printf(" execution time in seconds: %d \n", exe_time/1000000);
    
        for(int i=0;i<num_buf;i++){
            vxReleaseImage(&img1[i]);
            vxReleaseImage(&img4[i]);
        }
        vxReleaseImage(&img2);
        vxReleaseImage(&img3);
        vxReleaseScalar(&sleep_time_1);
        vxReleaseScalar(&sleep_time_2);
        vxReleaseScalar(&sleep_time_3);
        vxReleaseNode(&node_1);
        vxReleaseNode(&node_2);
        vxReleaseNode(&node_3);
        vxReleaseGraph(&graph);
        kernel_remove(context);
        vxReleaseContext(&context);
    
        printf(" Tutorial completed !!! \n");
    }

    Regards,

    Gokul

  • Thanks for the reply. My sdk version is as follows:

    PROCESSOR-SDK-RTOS-J721S2, 09.00.00.02.

    Host os version: Ubuntu 22.04.5 LTS.

  • Hi Harsha,
      I have tested it in sdk version 09.00.00.02 as well. And got the same output that I posted before. Have you made the changes that I sent,

    In the kernel_create() add this target TIVX_TARGET_DSP_C7_1 using tivxAddKernelTarget(kernel, TIVX_TARGET_DSP_C7_1);

    After that dequeueing is done after execution of N3.

    Regards,

    Gokul

  • Thanks for pointing it to adding target for the kernel. I actually overlooked your earlier reply. 

    After adding the target, the dequeuing is done after sleeping for 3 seconds.

    The issue is solved, and I am closing this thread.

    Thanks again!