Hi experts,
I am using tda4vm sdk8.1, and I try to set no cached memory region (64M) to zero by c library memset function on mcu3_0,
but it consume about 14 second.
I use mem_set8_arm function to replace memset as follow , it consume about 500ms.
static void *mem_set8_arm (void *dest, int c, size_t n)
{
uint32_t *d = dest;
uint8_t *dc = dest;
uint32_t setflag32 =
(c & 0xff) |
((c << 8) & 0xff00) |
((c << 16) & 0xff0000) |
((c << 24) & 0xff000000);
uint8_t setflag8 = c & 0xff;
while (n >= 64) {
__asm __volatile
(
"\n\t mov r4, %[flag]"
"\n\t mov r5, r4"
"\n\t mov r6, r4"
"\n\t mov r7, r4"
"\n\t stmia %[dst]!,{r4-r7}"
"\n\t stmia %[dst]!,{r4-r7}"
:: [dst] "r" (d), [flag] "r" (&setflag32) : "r4", "r4", "r6", "r7");
d += 16;
n -= 64;
}
while (n >= 4) {
*d++ = setflag32;
n -= 4;
}
dc = (uint8_t *) d;
while (n--)
*dc++ = setflag8;
return dest;
}
there are two problem:
1. Why LLVM c library memset/memcpy function too slow ?
2. Why r5 access ddr bandwidth only about 100M/s ?
Regards,
Li quan





