This thread has been locked.

If you have a related question, please click the "Ask a related question" button in the top right corner. The newly created question will be automatically linked to this question.

TMS320F28379D: Bad CLA FFT Flash STANDALONE

Part Number: TMS320F28379D
Other Parts Discussed in Thread: C2000WARE

Hello, i struggling with bad CLA computation on STANDALONE mode. Im using code from FLASH, everything working fine without STANDALONE, when i want test my release program with standalone booting my computation gone wrong. I've been looking for problems and so far I can see it at CLA. My MagBuf, IOBuffer2 differs when the program is turned on in FLASH on jtag, and is different on standalone.

How can this be caused? Twiddles (i have it in GSRAM) or bad memcpy ?

My linker: 3757.2837xD_FLASH_lnk_cpu1.rar

main.c (running with _STANDALONE, _FLASH predefined symbols)

extern uint32_t Cla1ProgRunStart, Cla1ProgLoadStart, Cla1ProgLoadSize;
extern uint32_t Cla1ConstRunStart, Cla1ConstLoadStart, Cla1ConstLoadSize;
extern uint32_t CLA1fftTablesRunStart, CLA1fftTablesLoadStart, CLA1fftTablesLoadSize;

int main(void) {

	InitSysCtrl();
	GS_RAMaccessControl();

	DINT;
	IER = 0x0000;
	IFR = 0x0000;
	InitPieCtrl();
	InitPieVectTable();

#ifdef _STANDALONE
#ifdef _FLASH
//
// Send boot command to allow the CPU2 application to begin execution
//
IPCBootCPU2(C1C2_BROM_BOOTMODE_BOOT_FROM_FLASH);
#else
//
// Send boot command to allow the CPU2 application to begin execution
//
IPCBootCPU2(C1C2_BROM_BOOTMODE_BOOT_FROM_RAM);
#endif
#endif

    memcpy((uint32_t *)&Cla1ProgRunStart, (uint32_t *)&Cla1ProgLoadStart, (uint32_t)&Cla1ProgLoadSize );
    memcpy((uint32_t *)&Cla1ConstRunStart, (uint32_t *)&Cla1ConstLoadStart, (uint32_t)&Cla1ConstLoadSize );
    memcpy((uint32_t *)&CLA1fftTablesRunStart, (uint32_t *)&CLA1fftTablesLoadStart, (uint32_t)&CLA1fftTablesLoadSize);

    init_ClaFFT();
    init_Cla();
}


void init_ClaFFT(void)
{
    //#define TWFACTORINTABLE

    memset(&IOBuffer, 0, sizeof(IOBuffer));
    memset(&IOBuffer2, 0, sizeof(IOBuffer2));
    memset(&RFFTmagBuff, 0, sizeof(RFFTmagBuff));

    //rfft.InBuf     = &IOBuffer[0];     //Input buffer
    //rfft.OutBuf    = &IOBuffer2[0];    //Output buffer
    //rfft.CosSinBuf = &RFFTF32Coef[0];  //Twiddle factor buffer
    //rfft.MagBuf    = &RFFTmagBuff[0];  //Magnitude buffer

    RFFT_f32_setInputPtr(rfftHandleCla, IOBuffer);
    RFFT_f32_setOutputPtr(rfftHandleCla, IOBuffer2);
    RFFT_f32_setMagnitudePtr(rfftHandleCla, RFFTmagBuff);
    RFFT_f32_setPhasePtr(rfftHandleCla, RFFTphaseBuff);
    RFFT_f32_setStages(rfftHandleCla, FFT_STAGES);
    RFFT_f32_setFFTSize(rfftHandleCla, FFT_SIZE);

#ifdef TWFACTORINTABLE
    RFFT_f32_setTwiddlesPtr(rfftHandleCla, RFFT_f32_twiddleFactors); //pre twiddles v tabulke
#else
    RFFT_f32_setTwiddlesPtr(rfftHandleCla, RFFTF32Coef);
    RFFT_f32_sincostable(rfftHandleCla);
#endif
}


Thanks for help.
- Marek.

  • void init_Cla(void)
    {
        // Initialize and wait for CLA1ToCPUMsgRAM
        MemCfgRegs.MSGxINIT.bit.INIT_CLA1TOCPU = 1;
        while(MemCfgRegs.MSGxINITDONE.bit.INITDONE_CLA1TOCPU != 1){};

        // Initialize and wait for CPUToCLA1MsgRAM
        MemCfgRegs.MSGxINIT.bit.INIT_CPUTOCLA1 = 1;
        while(MemCfgRegs.MSGxINITDONE.bit.INITDONE_CPUTOCLA1 != 1){};

        //LS RAM CONTROL pre CLA

        //PROGRAM SPACE
        MemCfgRegs.LSxMSEL.bit.MSEL_LS2 = 1;
        MemCfgRegs.LSxCLAPGM.bit.CLAPGM_LS2 = 1;

        MemCfgRegs.LSxMSEL.bit.MSEL_LS3 = 1;
        MemCfgRegs.LSxCLAPGM.bit.CLAPGM_LS3 = 1;

        //DATA SPACE
        MemCfgRegs.LSxMSEL.bit.MSEL_LS0 = 1;
        MemCfgRegs.LSxCLAPGM.bit.CLAPGM_LS0 = 0;

        MemCfgRegs.LSxMSEL.bit.MSEL_LS1 = 1;
        MemCfgRegs.LSxCLAPGM.bit.CLAPGM_LS1 = 0;

        MemCfgRegs.LSxMSEL.bit.MSEL_LS4 = 1;
        MemCfgRegs.LSxCLAPGM.bit.CLAPGM_LS4 = 0;

        MemCfgRegs.LSxMSEL.bit.MSEL_LS5 = 1;
        MemCfgRegs.LSxCLAPGM.bit.CLAPGM_LS5 = 0;



    /*
        // Detect any CLA fetch access violations, enable
        // interrupt for it (TRM SPRUHM8, 2.11.1.7.4 & 2.14.18)
        AccessProtectionRegs.NMAVSET.bit.CLA1FETCH = 1;
        AccessProtectionRegs.NMAVINTEN.bit.CLA1FETCH = 1;
        // Set the ISR for access violation fault
        PieVectTable.RAM_ACCESS_VIOLATION_INT = accessViolationISR;
        PieCtrlRegs.PIEIER12.bit.INTx12       = 1;
        IER                                  |= M_INT12;
        */

        EDIS;

        //TASKS
        EALLOW;
        Cla1Regs.MVECT1 = (uint16_t)(&Cla1Task1);
    //  Cla1Regs.MVECT2 = (uint16_t)(&Cla1Task2);
    //  Cla1Regs.MVECT3 = (uint16_t)(&Cla1Task3);
    //  Cla1Regs.MVECT4 = (uint16_t)(&Cla1Task4);
    //  Cla1Regs.MVECT5 = (uint16_t)(&Cla1Task5);
    //  Cla1Regs.MVECT6 = (uint16_t)(&Cla1Task6);
    //  Cla1Regs.MVECT7 = (uint16_t)(&Cla1Task7);
        Cla1Regs.MVECT8 = (uint16_t)(&Cla1Task8);


        //Trigger set
        DmaClaSrcSelRegs.CLA1TASKSRCSEL1.bit.TASK1  = CLA_TRIG_NOPERPH;
        DmaClaSrcSelRegs.CLA1TASKSRCSEL2.bit.TASK8  = CLA_TRIG_NOPERPH;
        Cla1Regs.MIER.all                           = (M_INT1 | M_INT8);

        //Sw task force enable
        Cla1Regs.MCTL.bit.IACKE = 1;

        //Cla ISR
        PieVectTable.CLA1_1_INT          = &cla1Isr1;
        PieCtrlRegs.PIEIER11.bit.INTx1   = 0x01;
        IER |= (M_INT11);
        EDIS;

    }

  • Hi Marek,

    Please look at the following post (scroll to the bottom to the suggestion marked in green). Those will be the things to try and see. Its likely the Watchdog is being tripped. In CCS this does not happen as the CCS debugger scripts called GEL that run in the background disable watchdog.

    https://e2e.ti.com/support/microcontrollers/c2000-microcontrollers-group/c2000/f/c2000-microcontrollers-forum/918882/launchxl-f28379d-standalone-bootup-from-flash

    Thanks,

    Ashwini

  • Hello, I'll check this thing tomorrow. Im using two WDGs, one external feeded from cpu1 and one internal for cpu2.

  • Hi Marek, sounds good. -Thanks.

  • Hello,

    So I tried it, I physically turned off the external watchdog (I soldered it out), I turned off the internal wdg cpu2, and I even turned off the wdg for cpu1 and cpu2 in F2837xD_CodeStartBranch.asm, without change... I still have weird data after calculating the CLA FFT in IOBuffer and IOBuffer2 only in STANDALONE mode. I didn't run into the XRS trigger at startup (measured with oscilloscope). 

    I also test my DMA buffers for valid data. These are correct, and the IOBuffer is filled correctly before the calculation (before ClaTask1 trigger).

    Am I doing the upload process properly for STANDALONE ? At first I flash CPU1 then CPU2, after that I click on resume button for CPU1, then same for CPU2. After that, I reset power supply and then booting occurs. Shouldn't I also load CLA symbols, as is the case with debugging cla?

    Note: I do not have, init_ClaFFT and init_Cla in TI ramfuncs, Should i copy functions there ? Also my .stack is at higher GSRam (GS3)

    My map for CPU1: CPU1_CLA_map.rar

    Thanks.
    -Marek.

     

  • JTAG also enable CLA Clock, this should be issue too.
     

  • Hi Marek,

    I have not been able to figure out any other reasons yet. One suggestion I do have is to try one of the C2000Ware Flash examples and check if that works in standalone mode. If it does then you can use that to compare with your application.

    Thanks,
    Ashwini

  • The CLA clocks should be enabled within the application during device initialization. Example code snippet from SDK example that does this:

    SysCtl_enablePeripheral(SYSCTL_PERIPH_CLK_CLA1);

    Thanks,
    Ashwini

  • Thanks, i try this tomorrow. I have no idea why standalone doesn't want to work with cla, because I have a functional second version of the code without cla (only purely FFT in cpu1) and standalone works there without the slightest error. 

    The CLA should work, but in standalone i have bad output fft IOBuffer2.
    Example for same input values in IOBuffer, then:
    IOBuffer2[0] = 33573976.0 (JTAG DEBUG)
    IOBuffer2[0] = 233241.0 (STANDALONE) 
    same code, same input values, flash linker, but different data.

    Thanks for the help, I will try and test something and then write how I ended up.

    - Marek.

  • Hi, I tried everything still same issue. DMA, PWM, CMPSS, EMIF and all others peripherals working good. But CLA not. I got on every start access violation for cla, but AccessProtectionRegs.NMCLA1FAVADDR is on 0 whole time. I found out I had a corrupted buffer after cla computation fft.

    IOBuffer2/IOBuffer does not correspond for fixed data inputs jtag vs standalone. 

    Data:
    JTAG CLA FFT OUTPUT (correct):
    IOBuffer2[0] = 33569724.0
    IOBuffer2[1] = 0.0
    IOBuffer2[2] = 29.064
    IOBuffer2[3] = 5.5161

    STANDALONE CLA FFT OUTPUT (bad, but SAME input):
    IOBuffer2[0] = 262263.0
    IOBuffer2[1] = 0.00048
    IOBuffer2[2] = oscillates between +-1 and 0
    IOBuffer2[3] = oscillates between +-1 and 0
    IOBuffer2[1023] = oscillates between +-1 and 0

    If I hard-enter the correct output data to IOBuffer2, my program run good. But If I let the cla calculate, it's all wrong only in standalone ... In standalone im booting cpu2 also. CPU2 is sync by IPC Flags, so CPU1 always boots first. Can't initialize cpu2 overwrite any important registers?

    Then it looks like the buffers in the LS rams were not initialized correctly, or the LS rams were not well initialized without jtag GEL files.

    I tried all this:
    SysCtl_enablePeripheral(SYSCTL_PERIPH_CLK_CLA1) after CLA Init functions, float32 IOBuffer's to float IOBuffer's, CPU2 inc boot after cla init,  external wdg off, wdg off at CodeStartBranch for both cpus, simple couting two numbers in another ClaTask and checking correct cla output in cpu1 (this work well in standalone), twiddles moved to LSram 4_5...

    Thanks for advice.

    -Marek.

  • Hi Marek,

    Let me reach out to some other experts to get more inputs. Please give me a day or two to get back with suggestions.

    Thanks,
    Ashwini

  • Thank you, I'll wait. One more thing, I perform copying by function: memcpy(IOBuffer, InputBuffer, 1024*sizeof(float32)); Data seems be good. I hope this can't be caused by this function. In any case, I will wait for further help. Thank you so far.

    -Marek.

  • Hi,

    Can you compare the GS RAM contents for both flash mode and standalone.

    I suspect the memcpy function didn't copy the contents properly may be due to incorrect size parameter. remember in c28x each address holds 16bits of data vs most of the other CPUs which holds 8bits of data.

    Regards

    Baskaran

  • Hello, this probably wont be issue. Because, i made custom copy function and I tried copy data to IOBuffer with this:

    void CopyArray(float32 *srcArray, float32 *dstArray, Uint16 size)
    {
        Uint16 i;
    
        for(i = 0; i < size; i++)
        {
            *dstArray++ = *srcArray++;
        }
    }


    But I try compare GSRam content too. Thanks.

    - Marek.

  • So, I tried to read the data directly from the memory addresses in standalone, and after copying all data with memcpy, the read data match for IOBuffer, and same for InputBuffer located in GSrams. 

    - Marek

  • Hi again, I was able to simulate the problem in the debugger (jtag). So I used the macro: --define = CLA_FFT_TABLES_IN_ROM = 1. It behaves exactly the same as at = 0 in STANDALONE mode, exactly the same values. Now I have stored twiddles factors in LS with address 0xA000. For a better idea, I will attach pictures.

    JTAG GOOD CLA OUTPUT (--define = CLA_FFT_TABLES_IN_ROM = 0):


    JTAG BAD CLA OUTPUT with --define = CLA_FFT_TABLES_IN_ROM = 1. Same behavior as standalone with --define = CLA_FFT_TABLES_IN_ROM = 0.


    JTAG GOOD VALUES 0xA000 twiddles


    JTAG BAD 0xA000 twiddles, with --define = CLA_FFT_TABLES_IN_ROM = 1. Same behavior as standalone with --define = CLA_FFT_TABLES_IN_ROM = 0.


    I think something similar will happen in the standalone that I simulated now (bad LS initialization
    ,address mismatch, corrupted memory addresses etc... so i have bad factors for fft computation.

    Any ideas what this might cause in standalone ?

    - Marek.

  • In further research, I found that in standalone I have memory 0xA000 empty (values 0) ramLS_4_5.  I would see this on the error of copying Const or ClaFFtTable into the ram with the memcpy function.

  • Another error symptom, LoadSize, has a size of ZERO. CLA1fftTablesLoadSize and Cla1ConstRunSize, Cla1ConstLoadSize. 

    Linker snippet:

    #if !(CLA_FFT_TABLES_IN_ROM)
       	CLA1fftTables    : > RAMLS_4_5,            PAGE = 1
    #endif
    #if !(CLA_FFT_TABLES_IN_ROM)
       CLA1fftTables    :  LOAD = FLASHE,
                           RUN = RAMLS_4_5,
                           RUN_START(_CLA1fftTablesRunStart),
                           LOAD_START(_CLA1fftTablesLoadStart),
                           LOAD_SIZE(_CLA1fftTablesLoadSize),
                           PAGE = 1
    #endif //CLA_FFT_TABLES_IN_ROM
    
    
    #ifdef CLA_C
       CLAscratch       :
                         { *.obj(CLAscratch)
                         . += CLA_SCRATCHPAD_SIZE;
                         *.obj(CLAscratch_end) } >  RAMLS_4_5,  PAGE = 1
       .scratchpad      : > RAMLS_4_5,       PAGE = 1
       .bss_cla		    : > RAMLS_4_5,       PAGE = 1
       .const_cla	    :  LOAD = FLASHA,
                           RUN = RAMLS_4_5,
                           RUN_START(_Cla1ConstRunStart),
                           LOAD_START(_Cla1ConstLoadStart),
                           LOAD_SIZE(_Cla1ConstLoadSize),
                           LOAD_END(_Cla1ConstLoadEnd),
                           RUN_SIZE(_Cla1ConstRunSize),
                           RUN_END(_Cla1ConstRunEnd),
                           PAGE = 1
        Cla1DataRam     : > RAMLS_4_5,       PAGE = 1
    #endif //CLA_C


    - Marek.

  • So, Cla1fftTables (c28x_cla_dsp_library.lib<cla_fft_tables.obj>) is not loaded in standalone. How to force load this lib in ram please ? Thanks.

    - Marek.

  • Hi guys, I finally solved the problem.The problem was in Cla1fftTables (as I mentioned above). I had Cla1fftTables set in the linker for both RAM and FLASH, and in the standalone it probably started to affect each other and didn't write anything to RAM.

    Correct linker snippet:

    //THIS MUST BE OFF in FLASH STANDALONE !!
    /*
    #if !(CLA_FFT_TABLES_IN_ROM)
           CLA1fftTables    : > RAMLS_4_5,            PAGE = 0
    #endif
    */
    //Also LSRam must be on page 0 with FLASHE
    #if !(CLA_FFT_TABLES_IN_ROM)
       CLA1fftTables    :  LOAD = FLASHE,
                           RUN = RAMLS_4_5,
                           RUN_START(_CLA1fftTablesRunStart),
                           LOAD_START(_CLA1fftTablesLoadStart),
                           LOAD_SIZE(_CLA1fftTablesLoadSize),
                           PAGE = 0
    #endif //CLA_FFT_TABLES_IN_ROM
    
    
    #ifdef CLA_C
       CLAscratch       :
                         { *.obj(CLAscratch)
                         . += CLA_SCRATCHPAD_SIZE;
                         *.obj(CLAscratch_end) } >  RAMLS_4_5,  PAGE = 0
       .scratchpad      : > RAMLS_4_5,       PAGE = 0
       .bss_cla            : > RAMLS_4_5,       PAGE = 0
       .const_cla        :  LOAD = FLASHA,
                           RUN = RAMLS_4_5,
                           RUN_START(_Cla1ConstRunStart),
                           LOAD_START(_Cla1ConstLoadStart),
                           LOAD_SIZE(_Cla1ConstLoadSize),
                           LOAD_END(_Cla1ConstLoadEnd),
                           RUN_SIZE(_Cla1ConstRunSize),
                           RUN_END(_Cla1ConstRunEnd),
                           PAGE = 0
        Cla1DataRam     : > RAMLS_4_5,       PAGE = 0
    #endif //CLA_C


    Simple mistake, but hard to find :)

    - Marek.