Part Number: AM5728
Tool/software: TI-RTOS
Hello.I'm developing a software in 5728 using openCL.
I'm using ti-processor-sdk-linux-rt-am57xx-evm-04.03.00.05.
But there is a problem.
I have known that openCL use DSPs in 5728.
And I have referred http://processors.wiki.ti.com/index.php/OpenCV.
As you can see, it says DSP is faster than arm for GaussBlur.
But as i experienced it, DSP is not faster.
The followings are my source code and result.
What is my problem?
Thanks.
#source code
//opencv
#include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/videoio.hpp"
#include <opencv2/highgui.hpp>
#include <opencv2/video.hpp>
#include <opencv2/core/ocl.hpp>
//C
#include <stdio.h>
#include <time.h>
#include <unistd.h>
//C++
#include<iostream>
#include<sstream>
#include<vector>
#include<algorithm>
#include<cstdio>
#include <time.h>
using namespace std;
using namespace cv;
/* Time difference calculation, in ms units */
double tdiff_calc(struct timespec &tp_start, struct timespec &tp_end)
{
return (double)(tp_end.tv_nsec -tp_start.tv_nsec) * 0.000001 + (double)(tp_end.tv_sec - tp_start.tv_sec) * 1000.0;
}
using namespace cv;
int main(int argc, char** argv)
{
struct timespec tp0, tp1, tp2, tp3, tp4;
Mat img_mat, img1;
UMat img, gray, img_bak, img2;
imread("1.jpg", 1).copyTo(img_mat);
cv::ocl::setUseOpenCL(false); /* suspend dispatch to DSP - from now on kernels are executed on A15 only! */
clock_gettime(CLOCK_MONOTONIC, &tp0);
cvtColor(img_mat, img_mat, COLOR_BGR2GRAY);
clock_gettime(CLOCK_MONOTONIC, &tp1);
cv::ocl::setUseOpenCL(true); /* resume DSP dispatch - from now on kernels, based on above decision tree, can be dispatched to DSP */
img_mat.copyTo(gray);
clock_gettime(CLOCK_MONOTONIC, &tp2);
for (int k = 0; k<5; k++)
{
GaussianBlur(gray, img2, Size(7, 7), 1.25);
Canny(img2, img_bak, 0, 30);
}
clock_gettime(CLOCK_MONOTONIC, &tp3);
usleep(4000000);-------------------
cv::ocl::setUseOpenCL(false); /* suspend dispatch to DSP - from now on kernels are executed on A15 only! */
vector<vector<Point> > contours;
findContours(img_bak, contours,CV_RETR_EXTERNAL,CV_CHAIN_APPROX_SIMPLE);
clock_gettime(CLOCK_MONOTONIC, &tp4);
printf ("BGR2GRAY tdiff=%lf ms \n", tdiff_calc(tp0, tp1));
printf ("Copy2UMat tdiff=%lf ms \n", tdiff_calc(tp1, tp2));
printf ("ImgProc tdiff=%lf ms \n", tdiff_calc(tp2, tp3));
printf ("Contour tdiff=%lf ms \n", tdiff_calc(tp2, tp4));
printf ("\n");
imwrite("canny_proc.jpg", gray);
return 0;
}
#test.sh
export TI_OCL_LOAD_KERNELS_ONCHIP=Y
export TI_OCL_CACHE_KERNELS=Y
export OPENCV_OPENCL_DEVICE='TI AM57:ACCELERATOR:TI Multicore C66 DSP'
echo "OpenCL on, canny"
./test
export OPENCV_OPENCL_DEVICE='disabled'
echo "OpenCL off, canny"
./test
#result
root@am57xx-evm:~/nfs/Documents/big_work/example-applications/test# ./test.sh
OpenCL on, canny
[72887.978500] omap-iommu 41501000.mmu: 41501000.mmu: version 3.0
[72887.984569] omap-iommu 41502000.mmu: 41502000.mmu: version 3.0
[72887.998851] omap-iommu 40d01000.mmu: 40d01000.mmu: version 3.0
[72888.004855] omap-iommu 40d02000.mmu: 40d02000.mmu: version 3.0
BGR2GRAY tdiff=13.505725 ms
Copy2UMat tdiff=61.719637 ms
ImgProc tdiff=13.152576 ms
Contour tdiff=4578.537824 ms
OpenCL off, canny
BGR2GRAY tdiff=13.960378 ms
Copy2UMat tdiff=3.409982 ms
ImgProc tdiff=526.907913 ms
Contour tdiff=557.267206 ms