The result of QDMA on my 6678 EVM board confuse me.
why is the time of call QDMA_copy_fast is so long?
Thank you!
6678,1GHz,EVM board
My QDMA 1D-1D copy code:
----------------------------------------------------
void QDMA_copy_fast(Uint8 *src, Uint8 *dst, Uint32 len, Uint32 Tccnum)
{
Uint32 * restrict p=(Uint32 *)(0x2704000+Tccnum*32); //global region
#if 0 /* 32bit write */
*p = 0x80100808| (Tccnum <<12); //OPT,dma early end, 1d->1d
*(p+1) = (Uint32)src; //SRC
*(p+2) = (1<<16)|len; //BCNT, ACNT
*(p+3) = (Uint32)dst; //DST
*(p+4) = (1<<16)|1; //DSTBDIX, SRCBDIX;
*(p+5) = 0xFFFF; //BCNTRLD, LINK;
*(p+6) = 1<<16; //DSTCIDX, SRCCDIX;
*(p+7) = 1; //CCNT
#else /* 64bit write */
_amemd8(p) = _itod((Uint32)src, (0x80100808| (Tccnum <<12)));
_amemd8(p+2) = _itod((Uint32)dst, (1<<16)|len);
_amemd8(p+4) = _itod( 0xFFFF, (1<<16)|1);
_amemd8(p+6) = _itod(1, (1<<16));
#endif
}
The out asm file:(O3)
-------------------------------------------------------
QDMA_copy_fast:
MVKL .S2 0x2704000,B5
SHL .S2 B6,5,B4 ; |207|
|| MV .L1X B4,A7 ; |206|
MVKH .S2 0x2704000,B5
|| MVKL .S1 0x10001,A8
RET .S2 B3 ; |233|
|| SET .S1 A6,16,16,A6 ; |222|
ADD .L2 B5,B4,B4 ; |207|
|| MVKH .S1 0x10001,A8
|| MVKL .S2 0x80100808,B7
STDW .D2T1 A7:A6,*+B4(8) ; |222|
|| ADD .L1 -2,A8,A9
|| MVKH .S2 0x80100808,B7
STDW .D2T1 A9:A8,*+B4(16) ; |223|
|| SHL .S2 B6,12,B5 ; |221|
|| ADD .L1 -1,A8,A4
|| MVK .S1 1,A5 ; |224|
|| MV .L2X A4,B9 ; |206|
STDW .D2T1 A5:A4,*+B4(24) ; |224|
|| OR .L2 B7,B5,B8 ; |221|
STDW .D2T2 B9:B8,*B4 ; |221|
; BRANCH OCCURS {B3} ; |233|
The Call code :
----------------------------------------------------------
CACHE_setL1DSize(CACHE_32KCACHE);
CACHE_setL1PSize(CACHE_32KCACHE);
CACHE_setL2Size (CACHE_256KCACHE);
for(i=0;i<256;i++)
CACHE_enableCaching(i);
...
...
...
InitQDMA();//my code
startClock = GET_Clock();//read TSCL&TSCH, return 64bit clock
for(i=0;i<1*1024*1024;i++)
{
QDMA_copy_fast(srcBuff1, dstBuff1, 3*1024, 0);
//QDMA_copy_wait(0);
}
endClock = GET_Clock();
ftime = (float)((endClock - startClock) / 1000)/1000;
printf("ftime = %f ms\n",ftime);
...
...
...
Run output
--------------------------------------------------
disableQDMA_copy_wait(0):
ftime = 281.364990ms //268 clocks per loop , too long!!!!
enable QDMA_copy_wait(0):
ftime = 374.341003ms //356 clocks per loop , too long!!!!