Hi,
We have configured the UDMA DRU by referring the DRU examples that came with pdk. Our use case has 16 scattered blocks of data that needs to be transferred at once. I have configured 8 + 8 16 TRs so when triggerted all of this are transferred and then I wait for the response.
For me the configuring 16 TR's and pushing to the ring queue and again waiting for completion de-queque and checking response of each of 16 transfers is taking away significant number of cycles.
Is there a better way of implementing such UDMA of scattered blocks of data.
I'm attaching the code used to configure and de-configuring of the UDMA dru for 16 blocks.
Please Guide.
void DMATransfer() { CSL_UdmapCppi5TRPD *pTrpd = (CSL_UdmapCppi5TRPD *)pTrpdMem; CSL_UdmapTR15 *pTr = (CSL_UdmapTR15 *)(pTrpdMem); uint32_t *pTrResp = (uint32_t *)(pTrpdMem + (sizeof(CSL_UdmapTR15) * (numOfPatches + 1U))); uint32_t cqRingNum = Udma_chGetCqRingNum(chHandle); uint32_t i; uint32_t hNumOfPatches = numOfPatches / 2; CSL_UdmapTR15 Tr; int32_t searchoffset; /* Make TRPD */ UdmaUtils_makeTrpd(pTrpd, UDMA_TR_TYPE_9, numOfPatches, cqRingNum); Tr.flags = CSL_FMK(UDMAP_TR_FLAGS_TYPE, CSL_UDMAP_TR_FLAGS_TYPE_4D_BLOCK_MOVE) | CSL_FMK(UDMAP_TR_FLAGS_STATIC, 0U) | CSL_FMK(UDMAP_TR_FLAGS_EOL, 0U) | /* NA */ CSL_FMK(UDMAP_TR_FLAGS_EVENT_SIZE, CSL_UDMAP_TR_FLAGS_EVENT_SIZE_COMPLETION) | CSL_FMK(UDMAP_TR_FLAGS_TRIGGER0, CSL_UDMAP_TR_FLAGS_TRIGGER_NONE) | CSL_FMK(UDMAP_TR_FLAGS_TRIGGER0_TYPE, CSL_UDMAP_TR_FLAGS_TRIGGER_TYPE_ALL) | CSL_FMK(UDMAP_TR_FLAGS_TRIGGER1, CSL_UDMAP_TR_FLAGS_TRIGGER_NONE) | CSL_FMK(UDMAP_TR_FLAGS_TRIGGER1_TYPE, CSL_UDMAP_TR_FLAGS_TRIGGER_TYPE_ALL) | CSL_FMK(UDMAP_TR_FLAGS_CMD_ID, 0x25U) | /* This will come back in TR response */ CSL_FMK(UDMAP_TR_FLAGS_SA_INDIRECT, 0U) | CSL_FMK(UDMAP_TR_FLAGS_DA_INDIRECT, 0U) | CSL_FMK(UDMAP_TR_FLAGS_EOP, 1U); Tr.icnt0 = searchPatchWidth * sizeof(uint16_t); Tr.icnt1 = searchPatchHeight; Tr.icnt2 = 1U; Tr.icnt3 = 1U; Tr.dim1 = imgWidth * sizeof(uint16_t); Tr.dim2 = (Tr.icnt0 * Tr.icnt1); Tr.dim3 = (Tr.icnt0 * Tr.icnt1 * Tr.icnt2); Tr.fmtflags = 0x00000000U; /* Linear addressing, 1 byte per elem. Replace with CSL-FL API */ Tr.dicnt0 = searchPatchWidth * sizeof(uint16_t); Tr.dicnt1 = searchPatchHeight; Tr.dicnt2 = 1U; Tr.dicnt3 = 1U; Tr.ddim1 = Tr.dicnt0; Tr.ddim2 = (Tr.dicnt0 * Tr.dicnt1); Tr.ddim3 = (Tr.dicnt0 * Tr.dicnt1 * Tr.dicnt2); /* Setup TR */ for (i = 0; i < hNumOfPatches; i++) { pTr++; *pTr = Tr; searchoffset = ((patchList[i].lower_left_y - searchMarginHeight + yPosSearch[i]) * imgWidth) + (patchList[i].lower_left_x - searchMarginwidth + xPosSearch[i]); // printf("patch:%d - searchOffset = %d\n", i, searchoffset); pTr->addr = (uint64_t)(searchImage + searchoffset); pTr->daddr = (uint64_t)(searchPatches + (i * searchPatchWidth * searchPatchHeight)); /* Clear TR response memory */ *pTrResp = 0xFFFFFFFFU; pTrResp++; } Tr.icnt0 = refPatchWidth * sizeof(uint16_t); Tr.icnt1 = refPatchHeight; Tr.dim2 = (Tr.icnt0 * Tr.icnt1); Tr.dim3 = (Tr.icnt0 * Tr.icnt1 * Tr.icnt2); Tr.dicnt0 = refPatchWidth * sizeof(uint16_t); Tr.dicnt1 = refPatchHeight; Tr.ddim1 = Tr.dicnt0; Tr.ddim2 = (Tr.dicnt0 * Tr.dicnt1); Tr.ddim3 = (Tr.dicnt0 * Tr.dicnt1 * Tr.dicnt2); for (i = 0; i < hNumOfPatches; i++) { pTr++; *pTr = Tr; pTr->addr = (uint64_t)(refImage + ((patchList[i].lower_left_y - 1) * imgWidth) + (patchList[i].lower_left_x - 1)); pTr->daddr = (uint64_t)(refPatches + (i * refPatchWidth * refPatchHeight)); /* Clear TR response memory */ *pTrResp = 0xFFFFFFFFU; pTrResp++; } /* Writeback cache */ Udma_appUtilsCacheWb(pTrpdMem, UDMA_TEST_APP_TRPD_SIZE); tempRetVal = Udma_ringQueueRaw(Udma_chGetFqRingHandle(chHandle0), (uint64_t)trpdMemCh0); if (UDMA_SOK != tempRetVal) { App_print("[Error] Channel 0 queue failed!!\n"); } if (UDMA_SOK == tempRetVal) { /* Wait for return descriptor in completion ring - this marks the * transfer completion */ SemaphoreP_pend(gUdmaAppDoneSemCh0, SemaphoreP_WAIT_FOREVER); SemaphoreP_pend(gUdmaAppDoneSemCh1, SemaphoreP_WAIT_FOREVER); // cycTransferA = __TSC; /* Response received in completion queue */ tempRetVal = Udma_ringDequeueRaw(Udma_chGetCqRingHandle(chHandle0), &pDescCh0); if (UDMA_SOK != tempRetVal) { App_print("[Error] No descriptor after callback Ch 0!!\n"); tempRetVal = UDMA_EFAIL; } #ifdef DEBUG_PRINTS App_print("[Debug] ringdequeue Ch 0 passed!!\n"); #endif } if (UDMA_SOK == tempRetVal) { /* * Sanity check */ /* Check returned descriptor pointer */ if (pDescCh0 != ((uint64_t)trpdMemCh0)) { App_print("[Error] TR descriptor pointer returned doesn't " "match the submitted address for Ch 0!!\n"); tempRetVal = UDMA_EFAIL; } } if (UDMA_SOK == tempRetVal) { /* Wait for return descriptor in completion ring - this marks the * transfer completion */ /* Response received in completion queue */ tempRetVal = Udma_ringDequeueRaw(Udma_chGetCqRingHandle(chHandle1), &pDescCh1); if (UDMA_SOK != tempRetVal) { App_print("[Error] No descriptor after callback Ch 1!!\n"); tempRetVal = UDMA_EFAIL; } } #ifdef DEBUG_PRINTS App_print("[Debug] ringdequeue Ch 1 passed!!\n"); #endif if (UDMA_SOK == tempRetVal) { /* * Sanity check */ /* Check returned descriptor pointer */ if (pDescCh1 != ((uint64_t)trpdMemCh1)) { App_print("[Error] TR descriptor pointer returned doesn't " "match the submitted address for Ch 1!!\n"); tempRetVal = UDMA_EFAIL; } } if (UDMA_SOK == tempRetVal) { /* Invalidate cache */ Udma_appUtilsCacheInv(gUdmaTrpdMem, UDMA_TEST_APP_TRPD_SIZE); /* check TR response status */ pTrResp = (uint32_t *)(trpdMemCh0 + (sizeof(CSL_UdmapTR15) * (numOfPatches + 1U))); for (i = 0; i < numOfPatches; i++) { trRespStatus = CSL_FEXT(*pTrResp, UDMAP_TR_RESPONSE_STATUS_TYPE); if (trRespStatus != CSL_UDMAP_TR_RESPONSE_STATUS_COMPLETE) { App_print("[Error] TR Response not completed for Ch 0!!\n"); tempRetVal = UDMA_EFAIL; } pTrResp++; } #ifdef DEBUG_PRINTS App_print("[Debug] TR Response completed for Ch 0 passed!!\n"); #endif } if (UDMA_SOK == tempRetVal) { /* Invalidate cache */ Udma_appUtilsCacheInv(gUdmaTrpdMem + UDMA_TEST_APP_TRPD_SIZE_ALIGN, UDMA_TEST_APP_TRPD_SIZE); /* check TR response status */ pTrResp = (uint32_t *)(trpdMemCh1 + (sizeof(CSL_UdmapTR15) * (numOfPatches + 1U))); for (i = 0; i < numOfPatches; i++) { trRespStatus = CSL_FEXT(*pTrResp, UDMAP_TR_RESPONSE_STATUS_TYPE); if (trRespStatus != CSL_UDMAP_TR_RESPONSE_STATUS_COMPLETE) { App_print("[Error] TR Response not completed for Ch 1!!\n"); tempRetVal = UDMA_EFAIL; } pTrResp++; } #ifdef DEBUG_PRINTS App_print("[Debug] TR Response completed for Ch 1 passed!!\n"); #endif } if (tempRetVal != UDMA_SOK) retVal = _CALL_LK_RETURN_DMA_ERROR_; }