Hello,
I've been having trouble with DMA and stale cache so I wrote a some cache coherency tests. Writeback seems to work properly but invalidate does not.
I'm already aware of advisory 6 (http://www.ti.com/lit/er/sprz332b/sprz332b.pdf) and this thread (http://e2e.ti.com/support/dsp/c6000_multi-core_dsps/f/639/t/253690.aspx). Following the advisory (see cache_clean()) in the sample code below) doesn't fix the issue.
Here's the relevant parts of my test code. Any referenced function or type that isn't defined here should be obvious (e.g. u32 is an unsigned 32-bit int) and has been independently verified (e.g. GetUncachedPointer() passes its unit test):
// Returns cache aligned address by rounding up if needed.
#define MON_CACHE_ALIGN_UP(addr) (u08*)((((u32)addr)+CACHELINE_LEN-1) & ~(CACHELINE_LEN-1))
// Returns cache aligned address by rounding down if needed.
#define MON_CACHE_ALIGN_DOWN(addr) (u08*)(((u32)addr) & ~(CACHELINE_LEN-1))
static int MonCacheInvalidateTest(u32 iterations)
{
u08 * pBuffer = NULL;
u08 * pDst = NULL;
u08 * pUncached = NULL;
u32 maxSize = 100000;
u32 minSize = maxSize/10;
u32 bufSize = 0;
u32 offset = 0;
i32 index = 0;
u32 errorCount = 0;
u32 iteration = 0;
u08 clearValue = 0xCC;
u08 setValue = 0x2B;
u08 * cacheAlignedStart = NULL;
u08 * cacheAlignedEnd = NULL;
u32 cacheAlignedLen = 0;
volatile u32 junk = 0;
printf("Performing cache invalidate test ...\n");
// Grab the necessary buffer
if ((pBuffer = malloc(maxSize)) == NULL)
{
printf("Out of memory in %s line %d.\n",__FILE__,__LINE__);
return(0);
}
memset(pBuffer,clearValue,maxSize);
// This test needs to be repeated a large number of time to detect problems
for (iteration=0; iteration < iterations; iteration++)
{
// Give periodic progress updates
if (iteration%(iterations/10) == 0)
{
printf("Iteration %d of %d ... \n",iteration,iterations);
}
// For each iteration, pick a random size and starting offset
if (MonCacheRandSizeAndLength(maxSize,minSize,&bufSize,&offset) == 0)
{
printf("Error determining random buffer size and offset!\n");
return(0);
}
pDst = pBuffer + offset;
// Force to cache aligned
cacheAlignedStart = MON_CACHE_ALIGN_UP(pDst);
cacheAlignedEnd = MON_CACHE_ALIGN_DOWN(pDst+bufSize);
cacheAlignedLen = cacheAlignedEnd - cacheAlignedStart;
bufSize = cacheAlignedLen;
pDst = cacheAlignedStart;
// Access access the buffer so it makes it into cache
for (index=0; index<bufSize; index++)
{
junk += *(pDst+index);
}
// Get an uncached pointer to the destination
pUncached = (u08 *)GetUncachedPointer(pDst);
// Set the destination directly in external memory
memset(pUncached,++setValue,bufSize);
// Clean cache for the destination area
clean_words((int *)cacheAlignedStart,cacheAlignedLen / sizeof(int));
// Check each byte to ensure the copy worked, check backwards to catch timing case
for (index=bufSize-1; index>=0; index--)
{
// Use normal pointer to ensure that cache resolves to match what's in external memory
if (*(pDst+index) != setValue)
{
printf("Mismatch at 0x%08x[%d of %d]=0x%02x != 0x%02x) on iteration %d\n",
pDst,index,bufSize,*(pDst+index),setValue,iteration);
errorCount++;
break;
}
}
} // for (iterations...)
// Report success or failure
if (errorCount != 0)
{
printf("Cache invalidate test FAILED!!! %d failures\n",errorCount);
}
else
{
printf("Cache invalidate test passed.\n");
}
// Release memory used for this test
if (pBuffer)
{
free(pBuffer);
pBuffer = NULL;
}
return(0);
}
void clean_words (int *pt, int word_count)
{
int words_done = 0;
while (words_done < word_count)
{
int words_to_do = word_count - words_done;
if (words_to_do > 0x4000)
words_to_do = 0x4000; // Limit to well below maximum
CacheRegs->L2WIBAR = (Uint32)&pt[words_done];
CacheRegs->L2WIWC = words_to_do;
__asm("\t MFENCE"); // Block until all memory operations are complete.
__asm("\t NOP 5"); // 16 NOP's (See sprz334d Advisory 7)
__asm("\t NOP 5");
__asm("\t NOP 5");
__asm("\t NOP");
words_done += words_to_do; // Accumulate the done words
}
}
Here's a sample of the output it generates:
[C66xx_0] Performing cache invalidate test ...
[C66xx_0] Iteration 0 of 10000 ...
[C66xx_0] Mismatch at 0x8202ba00[64895 of 64896]=0xcc != 0x2c) on iteration 0
[C66xx_0] Mismatch at 0x82033600[16383 of 16512]=0xcc != 0x2d) on iteration 1
[C66xx_0] Mismatch at 0x82026600[86015 of 86144]=0xcc != 0x2e) on iteration 2
[C66xx_0] Mismatch at 0x82029a80[10879 of 11008]=0x2e != 0x2f) on iteration 3
[C66xx_0] Mismatch at 0x82027d00[79487 of 79616]=0x2e != 0x30) on iteration 4
[C66xx_0] Mismatch at 0x82029b00[76031 of 76032]=0xcc != 0x31) on iteration 5
[C66xx_0] Mismatch at 0x82024d00[50687 of 50816]=0x31 != 0x32) on iteration 6
[C66xx_0] Mismatch at 0x8202ac80[46847 of 46976]=0x31 != 0x33) on iteration 7
[C66xx_0] Mismatch at 0x82026000[46463 of 46592]=0x33 != 0x34) on iteration 8
[C66xx_0] Mismatch at 0x82025300[88063 of 88192]=0x31 != 0x35) on iteration 9
[C66xx_0] Mismatch at 0x82023f00[93183 of 93824]=0x35 != 0x36) on iteration 10
Is my test flawed or have I isolated a bug in the cache invalidate functionality? Any suggestions are greatly appreciated! :)
Thanks,
Swade