Tool/software:
Hi Ti,
My goal is to check if we can reach the maximum MAC performance of 80GFLOPS, mentionned in the datsaheet (TDA4VM processors). In order to do so, I implemented a code using DSPLIB functions, which allow me to run a FIR algorithm on C7x. I mainly used the code from DSPLIB_fir_example.cpp to develop my code:
/** * main.cpp */ #include "dsplib.h" #include <stdint.h> #include <iostream> #include "signalFiltreInfloat.h" #include "IndicesFiltreNyquistV2.h" //contient les coefficients du filtre de nyquist #include <c7x.h> #define CLOCK_PER_SEC 1000000000 //1GHz using namespace std; int main(void){ /* --- Vecteurs d entree - sortie du filtre FIR --- */ long nbSymboles = 131704; /* Utilisation petits vecteurs */ /* x(t) : vecteur de complexe en entree, appele in(t) ici */ float* x = signalFiltreInfloat; /* h(t) : vecteur coefficients filtre FIR */ float* h = IndicesFiltreNyquist; /* y(t)*/ /* Conformement au benchmark de TI, on met y(t) de meme taille que x(t) */ // La fonction fir de DSPLIB impose d'initialise le tableau stockant les echantillons de sortie float* y = (float*)malloc(nbSymboles * sizeof(float)); /* Tailles des @ du filtre FIR */ uint32_t dataSize = nbSymboles; //x(t) contient 8 elements // in bytes uint32_t dataPitchInSize = nbSymboles * 8; //taille x(t) en bytes, pour rappel pour notre architecture, 1 float = 4 octet et 1 Complexe = 8 octets // in bytes uint32_t dataPitchOutSize = nbSymboles * 8; //taille y(t) en bytes : 131704 * 8 uint32_t batchSize = 1; uint32_t filterSize = 128; //correspond à l'ordre du filtre // float data type uint32_t shift = 1; /* --- Donnees sur le filtre h(t) --- */ //ajout pour compenser erreur DSPLIB_bufParams1D_t uint32_t dataPitchFilterSize = 129 * 8; //129 coefficients, de type Complexe, 1 Complexe = 8 floats //ajout pour compenser erreur DSPLIB_bufParams1D_t. Initialisation d'un @ prop par TI uint32_t filterPitch = 0; //cf fichier DSPLIB_fir_idat.c /* --- Caracteristiques du kernel --- */ DSPLIB_STATUS status; DSPLIB_fir_InitArgs kerInitArgs; int32_t handleSize = DSPLIB_fir_getHandleSize(&kerInitArgs); DSPLIB_kernelHandle handle = malloc(handleSize); /* Types des buffers d entree et sortie */ DSPLIB_bufParams2D_t bufParamsIn, bufParamsOut; // DSPLIB_bufParams1D_t bufParamsFilter; //modification car cree erreur DSPLIB_bufParams2D_t bufParamsFilter; /* Remplissages des buffers avec les valeurs des vecteurs */ /* Buffer contenant x(t) */ bufParamsIn.data_type = DSPLIB_FLOAT32; bufParamsIn.dim_x = dataSize; bufParamsIn.stride_y = dataPitchInSize; bufParamsIn.dim_y = batchSize; /* Buffer contenant y(t) */ bufParamsOut.data_type = DSPLIB_FLOAT32; bufParamsOut.dim_x = dataSize; bufParamsOut.stride_y = dataPitchOutSize; bufParamsOut.dim_y = batchSize; /* Buffer contenant h(t) */ bufParamsFilter.data_type = DSPLIB_FLOAT32; bufParamsFilter.dim_x = filterSize; bufParamsFilter.stride_y = dataPitchFilterSize; bufParamsFilter.dim_y = batchSize; /* Remplissage des champs de la structure des @ du kernel */ kerInitArgs.dataSize = dataSize; kerInitArgs.batchSize = batchSize; kerInitArgs.filterSize = filterSize; kerInitArgs.shift = shift; /* * Optimized C implementation of the function for the MMA + C7x architecture */ kerInitArgs.funcStyle = DSPLIB_FUNCTION_OPTIMIZED; //indicateur d'optimisation /* Statut initial de l algo */ status = DSPLIB_SUCCESS; /* --- Initialisation de l'algo FIR */ if (status == DSPLIB_SUCCESS){ status = DSPLIB_fir_init_checkParams(handle, &bufParamsIn, &bufParamsOut, &bufParamsFilter, &kerInitArgs); } if (status == DSPLIB_SUCCESS){ status = DSPLIB_fir_init(handle, &bufParamsIn, &bufParamsFilter, &bufParamsOut, &kerInitArgs); } /* --- Check avant Execution de l'algo FIR --- */ if (status == DSPLIB_SUCCESS){ status = DSPLIB_fir_exec_checkParams(handle, x, h, y); } /* --- Execution de l'algo FIR --- */ int nb_ap_fir = 1; /* Timer de start */ unsigned long start_timer = __TSC; for(int s=0;s<nb_ap_fir;s++){ if (status == DSPLIB_SUCCESS){ status = DSPLIB_fir_exec(handle, x, h, y); } else{ cout << "Impossible d'appliquer l'algo FIR " << endl; } } /* Timer de stop */ unsigned long stop_timer = __TSC; /* Nombre de cycles CPU ecoules */ unsigned long nb_cycles = stop_timer - start_timer; /* Temps ecoule en µs */ double tec = (double)(nb_cycles) / CLOCK_PER_SEC ; cout << "Le temps écoulé lors de l'exécution algo FIR vaut : " << tec << " s" << endl; long MAC = nb_ap_fir * (33979632/2); //hypothese faite /* Recuperation temps d execution en MAC\µs */ double MAC_par_sec = (double) MAC / tec; /* Affichage des perfomances */ cout << "Le temps d'exécution est de " << MAC_par_sec << " MAC/s " << endl; return 0; }
Then, I used the linker script lnk.cmd, provided by TI, located in the C7100 folder, at the path : ti-processor-sdk-rtos-j721e-evm-10_01_00_04\dsplib\cmake\linkers. However, whenever I try to run my project (in Release mode), I aways get a status different from DSPLIB_success, and therefore I never enter the DSPLIB_fir_exec() function...
Could someone solve this issue ?
Regards,
Mélanie