Hi, everyone,
I'm using TMS320C6670 DSP and trying to implement OpenMP in my project. But I need to do a manual QMSS initialization since it's used for other peripherals (Ethernet, FFTC). So I took OpenMP 2_01_16_03 Hello_world example and modify it a bit:
#define PAD (64) #define PNUM (16384) #define OPAD (8) #define TRNUM (4) #pragma DATA_SECTION(iqs, ".inputbuff"); #pragma DATA_SECTION(data, ".inputbuff"); #pragma DATA_ALIGN(iqs, 128); #pragma DATA_ALIGN(data, 128); #pragma DATA_ALIGN(win, 128); float data[2*(PNUM+PAD)]; float iqs[2*PNUM]; float win[PNUM]; inline static float * restrict windowing( int n // number of sampled data ) { short i; float ampl = (float)(1./(1<<0)); for (i=0; i < n; i++) win[i] = (float) ampl; return win; } int main (int argc, char *argv[]) { int i, n_treds,c_l_n,cl_pc,num_p_pc,iq_pairs; double st_o=0.0,fn_o=0.0; float *win; win = windowing(PNUM); omp_set_num_threads(TRNUM); iq_pairs=64/sizeof(float); // number of IQ pairs in one cache line (128 bytes) c_l_n= (PNUM+iq_pairs-1)/iq_pairs;// cache lines number cl_pc= (c_l_n +3)/4; // cache lines per core number. num_p_pc = cl_pc*iq_pairs; // number of IQ pairs per core st_o = omp_get_wtime(); #pragma omp parallel shared (data,iqs,win) private (i) { int id; float * restrict pout; const float * restrict pin,* restrict pw; id=omp_get_thread_num(); pout=&data[PAD+id*num_p_pc]; pin=&iqs[id*num_p_pc]; pw=&win[id*num_p_pc]; #pragma omp for for (i=0;i<num_p_pc/2;i++) { pout[2*i]=pin[2*i]*pw[i]; pout[2*i+1]=pin[2*i+1]*pw[i]; } } fn_o = omp_get_wtime(); printf("Calculation time = %f mus\n", (fn_o-st_o)*1000000); }
If i using standard example platform and config file with this code everything calculating just fine, ~4 times faster than sequential code.
But I need to specify additional regions for other peripherals, so I made some changes to the config file (as they given in the OpenMP user guide):
var ompSettings = xdc.useModule("ti.runtime.openmp.Settings"); ompSettings.runtimeInitializesQmss = false; var OpenMP = xdc.useModule('ti.runtime.ompbios.OpenMP'); OpenMP.qmssMemRegionIndex = 1; // region 0 is occupied by Ethernet OpenMP.qmssFirstDescIdxInLinkingRam = 32; // 0 region is 32 descriptor long // after shared region and heapOM config added that lines // __TI_omp_start_rtsc_mode configures the runtime and calls main var Startup = xdc.useModule('xdc.runtime.Startup'); Startup.lastFxns.$add('&qmssInitOmp'); Startup.lastFxns.$add('&__TI_omp_initialize_rtsc_mode');
Where the QMSS initialization function look like:
int qmssInitOmp (void) { int result = 0; Qmss_MemRegInfo memCfg; Qmss_InitCfg qmssInitConfig; // Set up QMSS configuration if (DNUM==0) { memset (&qmssInitConfig, 0, sizeof (Qmss_InitCfg)); // Use internal linking RAM qmssInitConfig.linkingRAM0Base = 0; qmssInitConfig.linkingRAM0Size = 0; qmssInitConfig.linkingRAM1Base = 0x0; qmssInitConfig.maxDescNum = NUM_HOST_DESC+256; qmssInitConfig.pdspFirmware[0].pdspId = Qmss_PdspId_PDSP1; qmssInitConfig.pdspFirmware[0].firmware = (void *) &acc48_le; qmssInitConfig.pdspFirmware[0].size = sizeof (acc48_le); // Initialize the Queue Manager result = Qmss_init (&qmssInitConfig, qmssGblCfgParams); if(result != QMSS_SOK) { #if DEBUG_ERRORS System_printf("Error initializing Queue Manager SubSystem, Error code : %d\n", result); #endif // DEBUG_ERRORS return -1; } // Initialize and setup CPSW Host Descriptors memset (gHostDesc, 0, SIZE_HOST_DESC * NUM_HOST_DESC); memCfg.descBase = (unsigned int *) Convert_CoreLocal2GlobalAddr ((unsigned int) gHostDesc); memCfg.descSize = SIZE_HOST_DESC; memCfg.descNum = NUM_HOST_DESC; memCfg.manageDescFlag = Qmss_ManageDesc_MANAGE_DESCRIPTOR; memCfg.memRegion = Qmss_MemRegion_MEMORY_REGION0; memCfg.startIndex = 0; // Insert Host Descriptor memory region result = Qmss_insertMemoryRegion(&memCfg); if(result == QMSS_MEMREGION_ALREADY_INITIALIZED) { #if DEBUG_ERRORS System_printf("Memory Region %d already Initialized \n", memCfg.memRegion); #endif // DEBUG_ERRORS } else if(result < QMSS_SOK) { #if DEBUG_ERRORS System_printf("Error: Inserting memory region for Eth %d, Error code : %d\n", memCfg.memRegion, result); #endif // DEBUG_ERRORS return -1; } Qmss_start(); } return 0; }
So when I made all includes and so on for the QMSS the project builds without errors, but during run time, then the core 0 enters #pragma omp zone it just hangs at the lines 228, 229 of the tomp_util.h file :
while (mysense != barrier->sense) tomp_completePendingTasks();
I think I made some mistake with QMSS init but I can't find it by m own. Thank you in adwance.
Best Regards,
Pavlo!