I use EDMA3 to move a frame video data from DDR to DDR,it costs 39ms,but the memcpy() costs only 7ms.Why? could you help me?
My EDMA programe is shown as belown:
#define IMG_WIDTH (720)
#define IMG_HEIGHT (576)
#define EDMA3_CACHE_LINE_SIZE_IN_BYTES (128u)
// buffer in L1 SRAM, 32K
#pragma DATA_SECTION( bufL1, ".L1Buffer" )
#pragma DATA_ALIGN( bufL1, EDMA3_CACHE_LINE_SIZE_IN_BYTES );
signed char bufL1[0x8000];
// buffer in L2 SRAM, 64K
#pragma DATA_SECTION( bufL2, ".L2Buffer" )
#pragma DATA_ALIGN( bufL2, EDMA3_CACHE_LINE_SIZE_IN_BYTES );
signed char bufL2[0x10000];
// buffer in DDR2, 32M
#pragma DATA_SECTION( bufExt1, ".ExtBuffer" )
#pragma DATA_ALIGN( bufExt1, EDMA3_CACHE_LINE_SIZE_IN_BYTES );
signed char bufExt1[0x2000000];
// buffer in DDR2, 32M
#pragma DATA_SECTION( bufExt2, ".ExtBuffer" )
#pragma DATA_ALIGN( bufExt2, EDMA3_CACHE_LINE_SIZE_IN_BYTES );
signed char bufExt2[0x2000000];
void main() {
// set entire external memory to be cacheable (256M)
BCACHE_setMar( (Ptr)(0x80000000), 0x10000000, BCACHE_MAR_ENABLE );
edma3init();
return;
}
void edma3_test() {
// Edma Test for memory to memory copy
LOG_printf( &trace, "Starting EDMA3 examples!\n");
if ( test_2D2D( hEdma, bufExt1, bufExt2 ) != EDMA3_DRV_SOK ) {
LOG_printf( &trace, "test_2D2D() to L2 mem failed!\n");
} else {
LOG_printf( &trace, "test_2D2D() to L2 mem passed!\n");
}
LOG_printf( &trace, "All EDMA3 examples are done!\n");
return;
}
//======================================================================
int check_2D2D( signed char *src, signed char *dst ) {
int i, j;
for ( i = 0; i < IMG_HEIGHT; i++ )
{
for ( j = 0; j < IMG_WIDTH*2 ; j++ ){
if ( src[j] != dst[j] ) {
return -1;
}
}
src += IMG_WIDTH*2;
dst += IMG_WIDTH*2;
}
return 0;
}
int test_2D2D( EDMA3_DRV_Handle hEdma,
signed char *src, signed char *dst ) {
unsigned int chId = 128;
unsigned int tcc = 0;
int i, j;
int fail = 0;
signed char *src1, *dst1;
unsigned int ts, te;
Uint32 startime, endtime;
// prepare data for src and dst buffer
src1 = src;
dst1 = dst;
for ( i = 0; i < IMG_HEIGHT; i++ )
{
for ( j = 0; j < IMG_WIDTH*2 ; j++ ){
src1[j] = j;
dst1[j] = 0;
}
src1 += IMG_WIDTH*2;
dst1 += IMG_WIDTH*2;
}
LOG_printf( &trace, "test_2D2D() started:" );
evtMiss = 0; // clear event miss count
ts = C64P_getltime();
// write back src
BCACHE_wb ( (void *)src, IMG_WIDTH*2*IMG_HEIGHT, 1 ); // wait for it to finish
// invalidate dst
BCACHE_inv( (void *)dst, IMG_WIDTH*2*IMG_HEIGHT, 1 ); // wait for it to finish
te = C64P_getltime();
LOG_printf( &trace, " cache operation cycles = %u", te - ts );
startime = CLK_getltime();
// request channel
tcc = EDMA3_DRV_TCC_ANY;
chId = EDMA3_DRV_DMA_CHANNEL_ANY;
if( EDMA3_DRV_requestChannel ( hEdma, &chId, &tcc, (EDMA3_RM_EventQueue)0,
&callback_my, &myCbData[0] ) != EDMA3_DRV_SOK ) {
fail = -1;
goto func_return;
}
// Fill the PaRAM Set for transferring Y
// Ideally we only need to do a 2-D transfer: acnt = 1 and bcnt = 720*480.
// however, since bcnt and bindex has to be 16 bits, the transfer is
// splited to 3-D transfer: acnt = 1, bcnt = 720, ccnt = 480
myCbData[0].chId = chId;
myCbData[0].hEdma = hEdma;
myCbData[0].numTrs = IMG_HEIGHT;//1;//
myCbData[0].numTrCnt = 0;
if ( EDMA3_DRV_setSrcParams ( hEdma, chId, (unsigned int)(src),
EDMA3_DRV_ADDR_MODE_INCR,
EDMA3_DRV_W8BIT ) != EDMA3_DRV_SOK ) {
fail = -1;
goto func_return;
}
if ( EDMA3_DRV_setDestParams ( hEdma, chId, (unsigned int)(dst),
EDMA3_DRV_ADDR_MODE_INCR,
EDMA3_DRV_W8BIT ) != EDMA3_DRV_SOK ) {
fail = -1;
goto func_return;
}
if ( EDMA3_DRV_setSrcIndex ( hEdma, chId, 1, IMG_WIDTH*2 ) != EDMA3_DRV_SOK ) {
fail = -1;
goto func_return;
}
if ( EDMA3_DRV_setDestIndex ( hEdma, chId, 1, IMG_WIDTH*2 ) != EDMA3_DRV_SOK ) {
fail = -1;
goto func_return;
}
if ( EDMA3_DRV_setTransferParams ( hEdma, chId, 1, IMG_WIDTH*2, IMG_HEIGHT, IMG_WIDTH*2,
EDMA3_DRV_SYNC_AB ) != EDMA3_DRV_SOK ) {
fail = -1;
goto func_return;
}
if ( EDMA3_DRV_setOptField ( hEdma, chId, EDMA3_DRV_OPT_FIELD_TCINTEN, 1u )
!= EDMA3_DRV_SOK ) {
fail = -1;
goto func_return;
}
if ( EDMA3_DRV_setOptField ( hEdma, chId, EDMA3_DRV_OPT_FIELD_ITCINTEN, 1u )
!= EDMA3_DRV_SOK ) {
fail = -1;
goto func_return;
}
trFinished = 0;
ts = C64P_getltime();
if ( EDMA3_DRV_enableTransfer ( hEdma, chId, EDMA3_DRV_TRIG_MODE_MANUAL )
!= EDMA3_DRV_SOK ) {
fail = -2;
goto func_return;
}
while ( trFinished == 0 ); // wait for transferring Y to finish
endtime = CLK_getltime();
te = C64P_getltime();
LOG_printf(&trace,"edma3 costs time =%d",endtime-startime);
LOG_printf( &trace, " Tran(Y) cycles = %u", te - ts );
// checking correctness
if ( check_2D2D( src, dst ) ) {
fail = -3;
goto func_return;
}
if ( evtMiss ) { // is there any event miss?
fail = -2;
goto func_return;
}
//==================================================
startime = CLK_getltime();
memcpy(dst,src, IMG_WIDTH*2*IMG_HEIGHT);
endtime = CLK_getltime();
LOG_printf(&trace,"memcpy costs time =%d",endtime-startime);
//==================================================
return (1);
func_return:
/* if ( chId != EDMA3_DRV_DMA_CHANNEL_ANY ) {
EDMA3_DRV_freeChannel ( hEdma, chId );
}
*/
return fail;
}