Hi,
I have been working with CMEM for a Cortex-A9 (v7) architecture and have found that the cache operations are not working properly with the L2 (outer_*) calls.
It looks like the __pa() macro being used in the outer_*_range calls isn't providing the proper physical address for the case where the buffer has been allocated / mmaped in a non-CMA block. This is because the virtp being given to the __pa() macro is a user pointer not a kernel logical address.
Also, for invalidation, ARM's site suggests that the caches be invalidated outside->inside (L2, then L1), which is consistent with dma_sync_single_for_cpu().
The patch below outlines what I had to do to get invalidation to work properly. Can anyone confirm / check this out?
diff --git a/src/cmem/module/cmemk.c b/src/cmem/module/cmemk.c index e757345..717547d 100644 --- a/src/cmem/module/cmemk.c +++ b/src/cmem/module/cmemk.c @@ -1473,9 +1473,8 @@ alloc: */ virtp_end = virtp + size; #if 1 + outer_inv_range(physp, physp + size); dmac_map_area(virtp, size, DMA_FROM_DEVICE); - outer_inv_range(__pa((u32)virtp), - __pa((u32)virtp_end)); #else dma_sync_single_for_device(NULL, (dma_addr_t)physp, size, DMA_FROM_DEVICE); #endif @@ -1749,8 +1748,7 @@ alloc: case CMEM_IOCCACHEWB: #if 1 dmac_map_area(virtp, block.size, DMA_TO_DEVICE); - outer_clean_range(__pa((u32)virtp), - __pa((u32)virtp + block.size)); + outer_clean_range(physp, physp + block.size); #else dma_sync_single_for_device(NULL, (dma_addr_t)physp, block.size, DMA_TO_DEVICE); #endif @@ -1761,9 +1759,8 @@ alloc: case CMEM_IOCCACHEINV: #if 1 + outer_inv_range(physp, physp + block.size); dmac_map_area(virtp, block.size, DMA_FROM_DEVICE); - outer_inv_range(__pa((u32)virtp), - __pa((u32)virtp + block.size)); #else dma_sync_single_for_device(NULL, (dma_addr_t)physp, block.size, DMA_FROM_DEVICE); #endif @@ -1775,8 +1772,7 @@ alloc: case CMEM_IOCCACHEWBINV: #if 1 dmac_map_area(virtp, block.size, DMA_BIDIRECTIONAL); - outer_flush_range(__pa((u32)virtp), - __pa((u32)virtp + block.size)); + outer_flush_range(physp, physp + block.size); #else dma_sync_single_for_device(NULL, (dma_addr_t)physp, block.size, DMA_TO_DEVICE); dma_sync_single_for_device(NULL, (dma_addr_t)physp, block.size, DMA_FROM_DEVICE);