• Join
  • Sign In with my.TI Login
Texas Instruments
  • Products
  • Applications
  • Tools & Software
  • Support & Community
  • Sample & Buy
  • About TI
Sample & Purchase Cart Sample & Purchase Cart
  • Search
  • Advanced
TI E2E™ Community
  • Support Forums
  • Blogs
  • Groups
  • Videos
  • 简体中文
  • More ...
TI Home » TI E2E Community » Support Forums » Embedded Software » BIOS » BIOS forum » EDMA cost more time than memcpy()
Share
BIOS
  • Forum
  • Announcements
Options
  • Subscribe via RSS

EDMA cost more time than memcpy()

EDMA cost more time than memcpy()

This question is not answered
zhao yanjie
Posted by zhao yanjie
on Mar 30 2012 04:39 AM
Intellectual410 points

I use EDMA3 to move a frame video data  from DDR to DDR,it costs 39ms,but the  memcpy()  costs only 7ms.Why? could you help me?

My EDMA programe is shown as belown:

#define IMG_WIDTH  (720)
#define IMG_HEIGHT (576)
#define EDMA3_CACHE_LINE_SIZE_IN_BYTES      (128u)

// buffer in L1 SRAM, 32K
#pragma DATA_SECTION( bufL1, ".L1Buffer" )
#pragma DATA_ALIGN( bufL1, EDMA3_CACHE_LINE_SIZE_IN_BYTES );
signed char bufL1[0x8000];

// buffer in L2 SRAM, 64K
#pragma DATA_SECTION( bufL2, ".L2Buffer" )
#pragma DATA_ALIGN( bufL2, EDMA3_CACHE_LINE_SIZE_IN_BYTES );
signed char bufL2[0x10000];

// buffer in DDR2, 32M
#pragma DATA_SECTION( bufExt1, ".ExtBuffer" )
#pragma DATA_ALIGN( bufExt1, EDMA3_CACHE_LINE_SIZE_IN_BYTES );
signed char bufExt1[0x2000000];

// buffer in DDR2, 32M
#pragma DATA_SECTION( bufExt2, ".ExtBuffer" )
#pragma DATA_ALIGN( bufExt2, EDMA3_CACHE_LINE_SIZE_IN_BYTES );
signed char bufExt2[0x2000000];

void main() {

  // set entire external memory to be cacheable (256M)
  BCACHE_setMar( (Ptr)(0x80000000), 0x10000000, BCACHE_MAR_ENABLE );

  edma3init();
  return;
}

void edma3_test() {

  // Edma Test for memory to memory copy
  LOG_printf( &trace, "Starting EDMA3 examples!\n");

  if ( test_2D2D( hEdma, bufExt1, bufExt2 ) != EDMA3_DRV_SOK ) {
    LOG_printf( &trace, "test_2D2D() to L2 mem failed!\n");
  } else {
    LOG_printf( &trace, "test_2D2D() to L2 mem passed!\n");
  }

LOG_printf( &trace, "All EDMA3 examples are done!\n");

  return;
}
//======================================================================
int check_2D2D( signed char *src, signed char *dst ) {
  int i, j;

  for ( i = 0; i < IMG_HEIGHT; i++ )
  {
    for ( j = 0; j < IMG_WIDTH*2 ; j++ ){
   if ( src[j] != dst[j] ) {
  return -1;
   }
}
  src += IMG_WIDTH*2;
dst += IMG_WIDTH*2;

}

  return 0;
}

int test_2D2D( EDMA3_DRV_Handle hEdma,
               signed char *src, signed char *dst ) {
  unsigned int chId = 128;
  unsigned int tcc = 0;
  int i, j;
  int fail = 0;
  signed char *src1, *dst1;
  unsigned int ts, te;
   Uint32 startime, endtime;

  // prepare data for src and dst buffer
  src1 = src;
  dst1 = dst;
  for ( i = 0; i < IMG_HEIGHT; i++ )
  {
    for ( j = 0; j < IMG_WIDTH*2 ; j++ ){
   src1[j] = j;
   dst1[j] = 0;
}
  src1 += IMG_WIDTH*2;
dst1 += IMG_WIDTH*2;

  }

  LOG_printf( &trace, "test_2D2D() started:" );

  evtMiss = 0; // clear event miss count

  ts = C64P_getltime();
  // write back src
  BCACHE_wb ( (void *)src, IMG_WIDTH*2*IMG_HEIGHT, 1 ); // wait for it to finish
  // invalidate dst
  BCACHE_inv( (void *)dst, IMG_WIDTH*2*IMG_HEIGHT, 1 ); // wait for it to finish
  te = C64P_getltime();
  LOG_printf( &trace, "    cache operation cycles = %u", te - ts );

  startime = CLK_getltime();
   // request channel
  tcc = EDMA3_DRV_TCC_ANY;
  chId = EDMA3_DRV_DMA_CHANNEL_ANY;
  if( EDMA3_DRV_requestChannel ( hEdma, &chId, &tcc, (EDMA3_RM_EventQueue)0,
                                 &callback_my, &myCbData[0] ) != EDMA3_DRV_SOK ) {
fail = -1;
goto func_return;
  }

  // Fill the PaRAM Set for transferring Y
  // Ideally we only need to do a 2-D transfer: acnt = 1 and bcnt = 720*480.
  // however, since bcnt and bindex has to be 16 bits, the transfer is
  // splited to 3-D transfer: acnt = 1, bcnt = 720, ccnt = 480
 
  myCbData[0].chId = chId;
  myCbData[0].hEdma = hEdma;
  myCbData[0].numTrs = IMG_HEIGHT;//1;//
  myCbData[0].numTrCnt = 0;

  if ( EDMA3_DRV_setSrcParams ( hEdma, chId, (unsigned int)(src),
                                EDMA3_DRV_ADDR_MODE_INCR,
                                EDMA3_DRV_W8BIT ) != EDMA3_DRV_SOK ) {
fail = -1;
goto func_return;
  }
  if ( EDMA3_DRV_setDestParams ( hEdma, chId, (unsigned int)(dst),
                                 EDMA3_DRV_ADDR_MODE_INCR,
                                 EDMA3_DRV_W8BIT ) != EDMA3_DRV_SOK ) {
fail = -1;
goto func_return;
  }
  if ( EDMA3_DRV_setSrcIndex ( hEdma, chId, 1, IMG_WIDTH*2 ) != EDMA3_DRV_SOK ) {
fail = -1;
goto func_return;
  }
  if ( EDMA3_DRV_setDestIndex ( hEdma, chId, 1, IMG_WIDTH*2 ) != EDMA3_DRV_SOK ) {
fail = -1;
goto func_return;
  }
  if ( EDMA3_DRV_setTransferParams ( hEdma, chId, 1, IMG_WIDTH*2, IMG_HEIGHT, IMG_WIDTH*2,
                                     EDMA3_DRV_SYNC_AB ) != EDMA3_DRV_SOK ) {
fail = -1;
goto func_return;
  }
  if ( EDMA3_DRV_setOptField ( hEdma, chId, EDMA3_DRV_OPT_FIELD_TCINTEN, 1u )
                               != EDMA3_DRV_SOK ) {
fail = -1;
goto func_return;
  }
  if ( EDMA3_DRV_setOptField ( hEdma, chId, EDMA3_DRV_OPT_FIELD_ITCINTEN, 1u )
                               != EDMA3_DRV_SOK ) {
fail = -1;
goto func_return;
  }

  trFinished = 0;
  ts = C64P_getltime();
  if ( EDMA3_DRV_enableTransfer ( hEdma, chId, EDMA3_DRV_TRIG_MODE_MANUAL )
       != EDMA3_DRV_SOK ) {
fail = -2;
goto func_return;
  }
  while ( trFinished == 0 ); // wait for transferring Y to finish
  endtime = CLK_getltime();
  te = C64P_getltime();
  LOG_printf(&trace,"edma3 costs time =%d",endtime-startime);
  LOG_printf( &trace, "    Tran(Y) cycles = %u", te - ts );

  // checking correctness
  if ( check_2D2D( src, dst ) ) {
    fail = -3; 
    goto func_return;
  }

  if ( evtMiss ) { // is there any event miss?
    fail = -2;
    goto func_return;
  }
//==================================================
    startime = CLK_getltime();

   memcpy(dst,src, IMG_WIDTH*2*IMG_HEIGHT);
  endtime = CLK_getltime();
  LOG_printf(&trace,"memcpy costs time =%d",endtime-startime);
//==================================================
   return (1);
func_return:
/*  if ( chId != EDMA3_DRV_DMA_CHANNEL_ANY ) {
     EDMA3_DRV_freeChannel ( hEdma, chId );
  }
  */
  return fail;
}

EDMA3
Report Abuse
  • Reply
You have posted to a forum that requires a moderator to approve posts before they are publicly available.
All Replies
  • Prasad Konnur
    Posted by Prasad Konnur
    on May 07 2012 09:48 AM
    Intellectual1385 points

    Hi,

    Are you still facing the issue you mentioned?

    In memcpy you are passing the dst and src buffs for data transfer which are cacheble area hence the chche flush is required for the writeback to the DDR locations. And the EDMA hardware will write to the DDR locations directly you might be seeing this difference.

    Regarding the EDMA programming you are transferring acnt * bcnt bytes per transfer trigger since your acnt is 1, and you need ccnt number of triggers i am not sure from the code attached if you are seeing the full transfer of acnt * bcnt * ccnt bytes. maybe you can rearrange the PaRAMs making ccnt as 1 and transfer whole data at once, since for smaller data EDMALLD overhead will be more hence you might get slightly better performance for EDMA transfers if you transfer at once.

    Also mention on which platform you are running and the EDMA3LLD and BIOS versions.

    Regards,
    Prasad

    Report Abuse
    • Reply
    You have posted to a forum that requires a moderator to approve posts before they are publicly available.
  • RCReddy
    Posted by RCReddy
    on May 10 2012 00:14 AM
    Genius3265 points

    Hi,

    I am novice in this EDMA. Just wanted to know if you are doing

    "data transfer from one chip DDR to another chip DDR"

    or

    "data transfer from one chip DDR to same chip DDR" [If so, why is EDMA required, can't i relocate the contents in memory directly..i mean define a array/structure and pushing it to a a certain memory location say xyz[0] is sitting at 0x80000024 and pqr[0] is sitting at 0x80000056 and then do copy directly, is there any advantage i get with edma]

    can you give more details on what you are trying to do [i am learning very much here,]

    Thanks

    RC Reddy

    Report Abuse
    • Reply
    You have posted to a forum that requires a moderator to approve posts before they are publicly available.
  • RCReddy
    Posted by RCReddy
    on May 10 2012 08:59 AM
    Genius3265 points

    I understand that memcpy involves processor and takes more cycles or time. is there any comparison of throughput of the memcpy v/s EDMA in any Ti forums [is that kind of comparison apt ?].

    Thanks

    RC Reddy

    Report Abuse
    • Reply
    You have posted to a forum that requires a moderator to approve posts before they are publicly available.
TI E2E™ Community
  • Support Forums
  • Blogs
  • Videos
  • Groups
  • Site Support & Feedback
  • Settings
TI E2E™ Community Groups
  • TI University Program
  • Make the Switch
  • Microcontroller Projects
  • Motor Drive & Control
Other Communities
  • Deyisupport
  • Designsomething.org
  • beagleboard.org
  • TI on Element 14
  • TI on TechXchangeSM
Other Technical & Support Resources
  • WEBENCH® Design Center
  • Product Information Centers
  • Technical Documents
  • TI Design Network
  • TI Technical Articles
  • TI Training

All content and materials on this site are provided "as is". TI and its respective suppliers and providers of content make no representations about the suitability of these materials for any purpose and disclaim all warranties and conditions with regard to these materials, including but not limited to all implied warranties and conditions of merchantability, fitness for a particular purpose, title and non-infringement of any third party intellectual property right. TI and its respective suppliers and providers of content make no representations about the suitability of these materials for any purpose and disclaim all warranties and conditions with respect to these materials. No license, either express or implied, by estoppel or otherwise, is granted by TI. Use of the information on this site may require a license from a third party, or a license from TI.

Content on this site may contain or be subject to specific guidelines or limitations on use. All postings and use of the content on this site are subject to the Terms of Use of the site; third parties using this content agree to abide by any limitations or guidelines and to comply with the Terms of Use of this site. TI, its suppliers and providers of content reserve the right to make corrections, deletions, modifications, enhancements, improvements and other changes to the content and materials, its products, programs and services at any time or to move or discontinue any content, products, programs, or services without notice.

Follow Us Texas Instruments on Facebook Texas Instruments on Twitter Texas Instruments on LinkedIn Texas Instruments on Google+
TI Worldwide | Contact Us | my.TI Login | Site Map | Corporate Citizenship | mobile m.ti.com (Mobile Version)

TI is a global semiconductor design and manufacturing company. Innovate with 100,000+ analog ICs and
embedded processors, along with software, tools and the industry’s largest sales/support staff.

© Copyright 1995-2013 Texas Instruments Incorporated. All rights reserved.
Trademarks | Privacy Policy | Terms of Use