Hi, everyone,
I'm using TMS320C6670 DSP and trying to implement OpenMP in my project. But I need to do a manual QMSS initialization since it's used for other peripherals (Ethernet, FFTC). So I took OpenMP 2_01_16_03 Hello_world example and modify it a bit:
#define PAD (64)
#define PNUM (16384)
#define OPAD (8)
#define TRNUM (4)
#pragma DATA_SECTION(iqs, ".inputbuff");
#pragma DATA_SECTION(data, ".inputbuff");
#pragma DATA_ALIGN(iqs, 128);
#pragma DATA_ALIGN(data, 128);
#pragma DATA_ALIGN(win, 128);
float data[2*(PNUM+PAD)];
float iqs[2*PNUM];
float win[PNUM];
inline static float * restrict windowing(
int n // number of sampled data
)
{
short i;
float ampl = (float)(1./(1<<0));
for (i=0; i < n; i++)
win[i] = (float) ampl;
return win;
}
int main (int argc, char *argv[]) {
int i, n_treds,c_l_n,cl_pc,num_p_pc,iq_pairs;
double st_o=0.0,fn_o=0.0;
float *win;
win = windowing(PNUM);
omp_set_num_threads(TRNUM);
iq_pairs=64/sizeof(float); // number of IQ pairs in one cache line (128 bytes)
c_l_n= (PNUM+iq_pairs-1)/iq_pairs;// cache lines number
cl_pc= (c_l_n +3)/4; // cache lines per core number.
num_p_pc = cl_pc*iq_pairs; // number of IQ pairs per core
st_o = omp_get_wtime();
#pragma omp parallel shared (data,iqs,win) private (i)
{
int id;
float * restrict pout;
const float * restrict pin,* restrict pw;
id=omp_get_thread_num();
pout=&data[PAD+id*num_p_pc];
pin=&iqs[id*num_p_pc];
pw=&win[id*num_p_pc];
#pragma omp for
for (i=0;i<num_p_pc/2;i++)
{
pout[2*i]=pin[2*i]*pw[i];
pout[2*i+1]=pin[2*i+1]*pw[i];
}
}
fn_o = omp_get_wtime();
printf("Calculation time = %f mus\n", (fn_o-st_o)*1000000);
}
If i using standard example platform and config file with this code everything calculating just fine, ~4 times faster than sequential code.
But I need to specify additional regions for other peripherals, so I made some changes to the config file (as they given in the OpenMP user guide):
var ompSettings = xdc.useModule("ti.runtime.openmp.Settings");
ompSettings.runtimeInitializesQmss = false;
var OpenMP = xdc.useModule('ti.runtime.ompbios.OpenMP');
OpenMP.qmssMemRegionIndex = 1; // region 0 is occupied by Ethernet
OpenMP.qmssFirstDescIdxInLinkingRam = 32; // 0 region is 32 descriptor long
// after shared region and heapOM config added that lines
// __TI_omp_start_rtsc_mode configures the runtime and calls main
var Startup = xdc.useModule('xdc.runtime.Startup');
Startup.lastFxns.$add('&qmssInitOmp');
Startup.lastFxns.$add('&__TI_omp_initialize_rtsc_mode');
Where the QMSS initialization function look like:
int qmssInitOmp (void)
{
int result = 0;
Qmss_MemRegInfo memCfg;
Qmss_InitCfg qmssInitConfig;
// Set up QMSS configuration
if (DNUM==0)
{
memset (&qmssInitConfig, 0, sizeof (Qmss_InitCfg));
// Use internal linking RAM
qmssInitConfig.linkingRAM0Base = 0;
qmssInitConfig.linkingRAM0Size = 0;
qmssInitConfig.linkingRAM1Base = 0x0;
qmssInitConfig.maxDescNum = NUM_HOST_DESC+256;
qmssInitConfig.pdspFirmware[0].pdspId = Qmss_PdspId_PDSP1;
qmssInitConfig.pdspFirmware[0].firmware = (void *) &acc48_le;
qmssInitConfig.pdspFirmware[0].size = sizeof (acc48_le);
// Initialize the Queue Manager
result = Qmss_init (&qmssInitConfig, qmssGblCfgParams);
if(result != QMSS_SOK) {
#if DEBUG_ERRORS
System_printf("Error initializing Queue Manager SubSystem, Error code : %d\n", result);
#endif // DEBUG_ERRORS
return -1;
}
// Initialize and setup CPSW Host Descriptors
memset (gHostDesc, 0, SIZE_HOST_DESC * NUM_HOST_DESC);
memCfg.descBase = (unsigned int *) Convert_CoreLocal2GlobalAddr ((unsigned int) gHostDesc);
memCfg.descSize = SIZE_HOST_DESC;
memCfg.descNum = NUM_HOST_DESC;
memCfg.manageDescFlag = Qmss_ManageDesc_MANAGE_DESCRIPTOR;
memCfg.memRegion = Qmss_MemRegion_MEMORY_REGION0;
memCfg.startIndex = 0;
// Insert Host Descriptor memory region
result = Qmss_insertMemoryRegion(&memCfg);
if(result == QMSS_MEMREGION_ALREADY_INITIALIZED) {
#if DEBUG_ERRORS
System_printf("Memory Region %d already Initialized \n", memCfg.memRegion);
#endif // DEBUG_ERRORS
} else if(result < QMSS_SOK) {
#if DEBUG_ERRORS
System_printf("Error: Inserting memory region for Eth %d, Error code : %d\n", memCfg.memRegion, result);
#endif // DEBUG_ERRORS
return -1;
}
Qmss_start();
}
return 0;
}
So when I made all includes and so on for the QMSS the project builds without errors, but during run time, then the core 0 enters #pragma omp zone it just hangs at the lines 228, 229 of the tomp_util.h file :
while (mysense != barrier->sense)
tomp_completePendingTasks();
I think I made some mistake with QMSS init but I can't find it by m own. Thank you in adwance.
Best Regards,
Pavlo!

