AM5728: TIDL error

user4652979

Part Number: AM5728

When I use the TIDL, there is a error which is weird. After the program runing a few seconds, the tiocl fatal error would be shown like following:

recvfrom failed: Link has been severed (67)
rpmsgThreadFxn: transportGet failed on fd 24, returned -20
TIOCL FATAL: Communication to a DSP has been lost (likely due to an MMU fault). Please wait while the DSPs are reset and the runtime attempts to terminate. A reboot may be required before running another OpenCL application if this fails. See the kernel log for fault information.

So, why?

My program is like these.

tidl_set.h

#ifndef TIDL_SET_H
#define TIDL_SET_H

#include <QObject>
#include <QMetaType>
#include <QVariant>

//#include "configuration.h"
//#include "common/object_classes.h"
#include "common/utils.h"
#include "common/video_utils.h"


using namespace std;
using namespace tidl;
using namespace cv;

class Configuration;
class ObjectClasses;

typedef struct Result
{
    cv::Mat image;         //the image
    uint32_t frame_index;
    bool status;            //success or fail
    float time;
    float ratio_xmin[20];
    float ratio_ymin[20];
    float ratio_xmax[20];
    float ratio_ymax[20];
    CvScalar color[20];
    int index; //roi num
}result;

Q_DECLARE_METATYPE(result)

typedef struct Model_size
{
    int width;
    int height;
}model;

class TIDL_SET : public QObject
{
    Q_OBJECT
 public:
    TIDL_SET();
    ~TIDL_SET();

public:
    int dl_initial(int eve_num, int dsp_num, int orighight, int origwidth);
    model dl_config();
    bool dl_run(Mat bgr_frames[], int channel_size);
    result dl_run_img();
    void dl_set_threshold(uint32_t confidece_num);

signals:
    void send_processed_image(QVariant variant);
    //void send_processed_image(result resultdl);

public slots:
    void dl_run_img1();

private:
    bool WriteFrameOutput(const ExecutionObjectPipeline& eop,
                          const Configuration& c, const cmdline_opts_t& opts,
                          float confidence_value);

public:
    // Process arguments
    // Read the TI DL configuration file
    Configuration c;
    //Executor* e_dsp;
    uint32_t pipeline_depth;
    std::vector<ExecutionObjectPipeline *> eops;
    uint32_t num_eops;
    uint32_t frame_idx;

    int orig_width;
    int orig_height;
    int channel_size;
    float img_scale_h,img_scale_w;//to scale the outimage to original image

    Mat bgr_frames[3];

    //yes or not have image to process
    volatile uint32_t have_image;
    //the num of the processing_image for multi DSP
    volatile uint32_t processing_image;

};

#endif // TIDL_SET_H

tild_set.cpp

#include "tidl_set.h"
#include "configuration.h"
#include "common/object_classes.h"

#include "executor.h"
#include "execution_object.h"
#include "execution_object_pipeline.h"

#include <signal.h>
#include <iostream>
#include <iomanip>
#include <fstream>
#include <cassert>
#include <functional>
#include <algorithm>
#include <time.h>
#include <unistd.h>

#include <queue>
#include <vector>
#include <cstdio>
#include <string>
#include <chrono>

#define NUM_VIDEO_FRAMES 100
#define DEFAULT_CONFIG "myjdetnet"
#define DEFAULT_INPUT "../test/testvecs/input/horse_768_320.y"
#define DEFAULT_INPUT_FRAMES (1)
#define DEFAULT_OBJECT_CLASSES_LIST_FILE "./barcode_objects.json"
#define DEFAULT_OUTPUT_PROB_THRESHOLD 40

Executor* CreateExecutor(DeviceType dt, uint32_t num, const Configuration& c,
                         int layers_group_id);

std::unique_ptr<ObjectClasses> object_classes;
cmdline_opts_t opts;
//static Executor* e_dsp;
/* Enable this macro to record individual output files and */
/* resized, cropped network input files                    */
//#define DEBUG_FILES

TIDL_SET::TIDL_SET()
{
    orig_height=200;
    orig_width=200;
    frame_idx=0;
    have_image = 0;
    processing_image = 0;
}

TIDL_SET::~TIDL_SET()
{
    //FreeMemory(eops);
    //for (auto eop : eops)  delete eop;
    //delete e_dsp;
    //delete e_eve;
}

int TIDL_SET::dl_initial(int eve_num, int dsp_num, int orighight, int origwidth)
{
    // Process arguments
    //cmdline_opts_t opts;
    opts.config = DEFAULT_CONFIG;
    opts.object_classes_list_file = DEFAULT_OBJECT_CLASSES_LIST_FILE;
    opts.num_eves = eve_num;
    opts.num_dsps = dsp_num;
    opts.output_prob_threshold = DEFAULT_OUTPUT_PROB_THRESHOLD;
    opts.num_frames=1;
    opts.is_camera_input=0;
    opts.is_video_input=0;
    opts.is_preprocessed_input=0;

    orig_width = origwidth;
    orig_height = orighight;
    // Get object classes list

    object_classes = std::unique_ptr<ObjectClasses>(new ObjectClasses(opts.object_classes_list_file));
    if (object_classes->GetNumClasses() == 0)
    {
        cout << "the file path is "<<opts.object_classes_list_file<<endl;
        cout << "No object classes defined for this config." << endl;
        return EXIT_FAILURE;
    }

    return 0;
}

model TIDL_SET::dl_config()
{
    //read the config file
    model model_size;
    std::string config_file = "./tidl_config.txt";
    bool status = c.ReadFromFile(config_file);
    if (!status)
    {
        cerr << "Error in configuration file: " << config_file << endl;
        model_size.height = -1;
        model_size.width  = -1;
        return model_size;
    }
    img_scale_h = 1.0 / c.inHeight;
    img_scale_w = 1.0 / c.inWidth;
    channel_size = c.inHeight * c.inWidth;
    qDebug("Scale_h = %f     scale_w =%f",img_scale_h,img_scale_w);

    model_size.height = c.inHeight;
    model_size.width  = c.inWidth;

    return model_size;
}


void TIDL_SET::dl_run_img1()
{

    try
    {
        Executor* e_dsp = CreateExecutor(DeviceType::DSP, opts.num_dsps,c,1);
        std::vector<ExecutionObjectPipeline *> eops;
        //set the depth of the pipeline
        pipeline_depth = 1;
        for (uint32_t j = 0; j < pipeline_depth; j++)
        {
            for (uint32_t i = 0; i < opts.num_dsps; i++)
            {
                eops.push_back(new ExecutionObjectPipeline({(*e_dsp)[i]}));
            }
        }
        qDebug("lalalla");
        num_eops = eops.size();
        // Allocate input/output memory for each EOP
        AllocateMemory(eops);
        while(1)
        {
            if(have_image == 0 && processing_image == 0)
            {
                //qDebug("there isn't a image");
                sleep(0.05);
                continue;
            }else{
                qDebug("have_imgae = %d  processing_image = %d", have_image, processing_image);
            }
            chrono::time_point<chrono::steady_clock> tloop0, tloop1,tloop2;
            tloop0 = chrono::steady_clock::now();

            result result_img;
            result_img.index=0;

            tloop2 = chrono::steady_clock::now();
            // Process frames with available eops in a pipelined manner
            // additional num_eops iterations to flush pipeline (epilogue)
            ExecutionObjectPipeline* eop = eops[frame_idx % num_eops];

            // Wait for previous frame on the same eop to finish processing
            Mat frame, bgr[3];
            qDebug("1111");
            if (processing_image > 0)
            {
                if (eop->ProcessFrameWait())
                {
                    qDebug("2222");
                    //WriteFrameOutput(*eop, c, opts, opts.output_prob_threshold);
                    //Mat frame, bgr[3];
                    int height= c.inHeight;
                    int width = c.inWidth;

                    unsigned char *in = (unsigned char *) eop->GetInputBufferPtr();
                    bgr[0] = Mat(height, width, CV_8UC(1), in);
                    bgr[1] = Mat(height, width, CV_8UC(1), in + channel_size);
                    bgr[2] = Mat(height, width, CV_8UC(1), in + channel_size*2);
                    cv::merge(bgr, 3, frame);

                    // Draw boxes around classified objects
                    float *out = (float *) eop->GetOutputBufferPtr();
                    int num_floats = eop->GetOutputBufferSizeInBytes() / sizeof(float);
                    for (int i = 0; i < num_floats / 7; i++)
                    {
                        int index = (int)    out[i * 7 + 0];
                        if (index < 0)  break;

                        float score =        out[i * 7 + 2];
                        if (score * 100 < opts.output_prob_threshold)  continue;

                        int   label = (int)  out[i * 7 + 1];
                        int   xmin  = (int) (out[i * 7 + 3] * width);
                        int   ymin  = (int) (out[i * 7 + 4] * height);
                        int   xmax  = (int) (out[i * 7 + 5] * width);
                        int   ymax  = (int) (out[i * 7 + 6] * height);

                        const ObjectClass& object_class = object_classes->At(label);

                        if(opts.verbose) {
                            printf("%2d: (%d, %d) -> (%d, %d): %s, score=%f\n",
                                i, xmin, ymin, xmax, ymax, object_class.label.c_str(), score);
                    }

                        if (xmin < 0)       xmin = 0;
                        if (ymin < 0)       ymin = 0;
                        if (xmax > width)   xmax = width;
                        if (ymax > height)  ymax = height;
                        cv::rectangle(frame, Point(xmin, ymin), Point(xmax, ymax),
                                  Scalar(object_class.color.blue,
                                         object_class.color.green,
                                         object_class.color.red), 2);
                        result_img.ratio_xmin[result_img.index] = xmin * img_scale_w;//0.00390625;
                        result_img.ratio_ymin[result_img.index] = ymin * img_scale_h;//0.00390625;
                        result_img.ratio_xmax[result_img.index] = xmax * img_scale_w;//0.00390625;
                        result_img.ratio_ymax[result_img.index] = ymax * img_scale_h;//0.00390625;
                        result_img.color     [result_img.index] = Scalar(object_class.color.blue,object_class.color.green,object_class.color.red);
                        result_img.index++;
                    }
                    result_img.image = frame;
                    result_img.frame_index = eop->GetFrameIndex();
                    result_img.status = true;
                    cv::imwrite("out.jpg",frame);

                    std::string name = eop->GetDeviceName();
                    qDebug("the name is %s", name.c_str());
                    QVariant variant;
                    variant.setValue(result_img);

                    tloop1 = chrono::steady_clock::now();
                    chrono::duration<float> elapsed = tloop1 - tloop0;
                    chrono::duration<float> elapsed1 = tloop1 - tloop2;
                    cout << "Loop total time (including read/write/opencv/print/etc): "
                          << setw(6) << setprecision(4)
                          << (elapsed.count() * 1000) << "ms" << endl;
                    cout << "run time: "
                          << setw(6) << setprecision(4)
                          << (elapsed1.count() * 1000) << "ms" << endl;
                    result_img.time = elapsed1.count()*1000;
                    emit send_processed_image(variant);
                }
                processing_image--;
            }
            if (have_image > 0)
            {
                char* frame_buffer = eop->GetInputBufferPtr();
                memcpy(frame_buffer,                bgr_frames[0].ptr(), channel_size);
                memcpy(frame_buffer+1*channel_size, bgr_frames[1].ptr(), channel_size);
                memcpy(frame_buffer+2*channel_size, bgr_frames[2].ptr(), channel_size);

                eop->SetFrameIndex(frame_idx);
                eop->ProcessFrameStartAsync();

                frame_idx++;
                processing_image++;
                have_image--;
            }
        }//for while
        FreeMemory(eops);
        for (auto eop : eops)  delete eop;
        delete e_dsp;
    }
    catch (tidl::Exception &e)
    {
        cerr << e.what() << endl;
        //result_img.status = false;
    }
}

// Create frame with boxes drawn around classified objects
bool TIDL_SET::WriteFrameOutput(const ExecutionObjectPipeline& eop,
                      const Configuration& c, const cmdline_opts_t& opts,
                      float confidence_value)
{
    // Asseembly original frame
    int width  = c.inWidth;
    int height = c.inHeight;
    int channel_size = width * height;
    Mat frame, bgr[3];

    unsigned char *in = (unsigned char *) eop.GetInputBufferPtr();
    bgr[0] = Mat(height, width, CV_8UC(1), in);
    bgr[1] = Mat(height, width, CV_8UC(1), in + channel_size);
    bgr[2] = Mat(height, width, CV_8UC(1), in + channel_size*2);
    cv::merge(bgr, 3, frame);

    int frame_index = eop.GetFrameIndex();
    char outfile_name[64];

    // Draw boxes around classified objects
    float *out = (float *) eop.GetOutputBufferPtr();
    int num_floats = eop.GetOutputBufferSizeInBytes() / sizeof(float);
    for (int i = 0; i < num_floats / 7; i++)
    {
        int index = (int)    out[i * 7 + 0];
        if (index < 0)  break;

        float score =        out[i * 7 + 2];
        if (score * 100 < confidence_value)  continue;

        int   label = (int)  out[i * 7 + 1];
        int   xmin  = (int) (out[i * 7 + 3] * width);
        int   ymin  = (int) (out[i * 7 + 4] * height);
        int   xmax  = (int) (out[i * 7 + 5] * width);
        int   ymax  = (int) (out[i * 7 + 6] * height);

        const ObjectClass& object_class = object_classes->At(label);

        if(opts.verbose) {
            printf("%2d: (%d, %d) -> (%d, %d): %s, score=%f\n",
               i, xmin, ymin, xmax, ymax, object_class.label.c_str(), score);
        }

        if (xmin < 0)       xmin = 0;
        if (ymin < 0)       ymin = 0;
        if (xmax > width)   xmax = width;
        if (ymax > height)  ymax = height;
        cv::rectangle(frame, Point(xmin, ymin), Point(xmax, ymax),
                      Scalar(object_class.color.blue,
                             object_class.color.green,
                             object_class.color.red), 2);
    }

    if (opts.is_camera_input || opts.is_video_input)
    {
        cv::imshow("SSD_Multibox", frame);
#ifdef DEBUG_FILES
        // Image files can be converted into video using, example script
        // (on desktop Ubuntu, with ffmpeg installed):
        // ffmpeg -i multibox_%04d.png -vf "scale=(iw*sar)*max(768/(iw*sar)\,320/ih):ih*max(768/(iw*sar)\,320/ih), crop=768:320" -b:v 4000k out.mp4
        // Update width 768, height 320, if necessary
        snprintf(outfile_name, 64, "multibox_%04d.png", frame_index);
        cv::imwrite(outfile_name, r_frame);
#endif
        waitKey(1);
    }
    else
    {
        // Resize to output width/height, keep aspect ratio
        Mat r_frame;
        uint32_t output_width = opts.output_width;
        if (output_width == 0)  output_width = orig_width;
        uint32_t output_height = (output_width*1.0f) / orig_width * orig_height;
        cv::resize(frame, r_frame, Size(output_width, output_height));

        snprintf(outfile_name, 64, "multibox_%d.png", frame_index);
        cv::imwrite(outfile_name, frame);
        printf("Saving frame %d with SSD multiboxes to: %s\n",
               frame_index, outfile_name);
    }

    return true;
}

void TIDL_SET::dl_set_threshold(uint32_t confidece_num)
{
    opts.output_prob_threshold = confidece_num;
}

// Create an Executor with the specified type and number of EOs
Executor* CreateExecutor(DeviceType dt, uint32_t num, const Configuration& c,
                         int layers_group_id)
{
    if (num == 0) return nullptr;

    DeviceIds ids;
    for (uint32_t i = 0; i < num; i++)
        ids.insert(static_cast<DeviceId>(i));

    //return new Executor(dt, ids, c, layers_group_id);
    printf("2222 \n");
    return new Executor(dt, ids, c);
}

I wil use the dl_run_img1() function to process the image after the dl_initial() and dl_config. Please help me and my project has stuck here for a long time.

over 6 years ago

0 user4652979 over 6 years ago

Expert 1860 points

Is there some update?

The problem is werid. When I process a video continuously, the program will run properly。 But when I process image and the interval between two images is greater than seven seconds, the program will crash. So, Is there any mechanism to automatically reclaim the resources of DSP? May be opencl?

0 Manisha Agrawal over 6 years ago in reply to user4652979

TI__Mastermind 22946 points

Hi,

Can you try disabling the DSP watchdog timer, and see if the issue goes away? You can do it by running below command.

echo "on" >"/sys/bus/platform/devices/40800000.dsp/power/control"

echo "on" >"/sys/bus/platform/devices/41000000.dsp/power/control"

Regards,

Manisha

0 Yuan Zhao over 6 years ago in reply to user4652979

TI__Expert 4045 points

In addition to try the workaround that Manisha mentioned, could you also try turn dynamic quantization to off when processing images, e.g.

if (input is image) { c.quantHistoryParam1 = c.quantHistoryParam2 = c.quantMargin = 0; }

Dynamic quantization makes sense for video input because they are strong correlation in between frames. However, if you are processing random images consecutively, you may want to turn dynamic quantization off.

Let us know how it goes. Thanks!

- Yuan

0 user4652979 over 6 years ago in reply to Yuan Zhao

Expert 1860 points

Thanks for your reply.

The problem has been solved. And can you tell me the mechanism of dynamic quantization？

0 Manisha Agrawal over 5 years ago in reply to user4652979

TI__Mastermind 22946 points

Check this document for dynamic quantization

Processors

Processors forum

AM5728: TIDL error