/* ======================================================================== */ /* TEXAS INSTRUMENTS, INC. */ /* */ /* VICP Signal Processing Library */ /* */ /* This library contains proprietary intellectual property of Texas */ /* Instruments, Inc. The library and its source code are protected by */ /* various copyrights, and portions may also be protected by patents or */ /* other legal protections. */ /* */ /* This software is licensed for use with Texas Instruments TMS320 */ /* family DSPs. This license was provided to you prior to installing */ /* the software. You may review this license by consulting the file */ /* TI_license.PDF which accompanies the files in this library. */ /* */ /* ------------------------------------------------------------------------ */ /* */ /* NAME */ /* _imgproclib.c -- imgproc module */ /* */ /* DESCRIPTION */ /* This file implements interface functions with IP_run library */ /* */ /* REV */ /* version 0.0.2: 3rd March, 2009 */ /* Added _CPIS_updateSrcDstPtr */ /* */ /* version 0.0.1: Feb 9th, 2009 */ /* Initial version */ /* */ /* ------------------------------------------------------------------------ */ /* Copyright (c) 2008 Texas Instruments, Incorporated. */ /* All Rights Reserved. */ /* ======================================================================== */ //#define _BENCHMARK_VICP /* Include the lib interface header files */ #include "cpisCore.h" #include "_cpisCore.h" #include "cpisSched.h" #include "cpisCompu.h" #include "_cpis_utils.h" #include extern Uint8 _CPIS_demoMode; static Uint16 _CPIS2VICP_inputFormat[]= { IMXTYPE_BYTE, IMXTYPE_SHORT, IMXTYPE_LONG, 0, IMXTYPE_UBYTE, IMXTYPE_USHORT, \ IMXTYPE_ULONG, 0, 0, 0, IMXTYPE_BYTE, IMXTYPE_SHORT, IMXTYPE_LONG, IMXTYPE_UBYTE, IMXTYPE_USHORT, IMXTYPE_ULONG}; static Uint16 _CPIS2VICP_outputFormat[]= { IMXOTYPE_BYTE, IMXOTYPE_SHORT, IMXOTYPE_LONG, 0, IMXOTYPE_BYTE, IMXOTYPE_SHORT, \ IMXOTYPE_LONG, 0, 0, 0, IMXOTYPE_BYTE, IMXOTYPE_SHORT, IMXOTYPE_LONG, IMXOTYPE_BYTE, IMXOTYPE_SHORT, IMXOTYPE_LONG}; static Int16 _CPIS2VICP_sizeOfFormat[]= { 2, 2, 3, 2, 2, 3, 2, 2, 2, 2, 2, 2, 3, 3, 3, 1, 1, 2, 4, 8, 1, 2, 4, 8, -8, 1, 2, 4, 8, 2, 4, 8}; Int32 _CPIS_genericCall( CPIS_Handle *handle, CPIS_BaseParms *base, void *params, CPIS_ExecType execType, CPIS_FuncStruct *funcStruct ){ Uint32 maxBlockSize; Uint16 smallestDataSize; CHECK_INIT CPIS_obj.ipRunTab[0].functions= *funcStruct; if ((*funcStruct->checkFunc)(base, params)==-1) return -1; /* If functions that compute best block width and block height are defined */ if (funcStruct->getMaxBlockSizeFunc && funcStruct->getBlockDimFunc) { /* If processing block width or height is 0 then it means API has to figure out optimum block dimensions by calling (*funcStruct->getBlockDimFunc) otherwise it calls (*funcStruct->getMaxBlockSizeFunc) to get the value maxBlockSize and smallestDataSize which will be used by _CPIS_isBlockDimValid() to verify whether the passed values are valid. */ if (base->procBlockSize.width== 0 || base->procBlockSize.height== 0) { if ((*funcStruct->getBlockDimFunc)(base, params, &base->procBlockSize.width, &base->procBlockSize.height)== -1) { CPIS_errno= CPIS_NOSUPPORTDIM_ERROR; return -1; } } else { (*funcStruct->getMaxBlockSizeFunc)(base, params, &maxBlockSize, &smallestDataSize); /* Validate the block dimension */ if (_CPIS_isBlockDimValid(base, _CPIS_alignWithDMA2(4, smallestDataSize), maxBlockSize)== -1){ CPIS_errno= CPIS_NOSUPPORTDIM_ERROR; return -1; } } } CPIS_obj.lock(CPIS_obj.lockArg); if (_CPIS_setFirst(&CPIS_obj.ipRunTab[0])== -1) { CPIS_obj.unlock(CPIS_obj.unlockArg); return -1; } if (funcStruct->setDmaInFunc) { if ((*funcStruct->setDmaInFunc)(&CPIS_obj.ipRunTab[0], base, params)==-1){ CPIS_obj.unlock(CPIS_obj.unlockArg); return -1; } } else { if (_CPIS_setDmaInTransfers(&CPIS_obj.ipRunTab[0], base, funcStruct->arg)==-1){ CPIS_obj.unlock(CPIS_obj.unlockArg); return -1; } } if ((*funcStruct->procFunc)(&CPIS_obj.ipRunTab[0], base, params)==-1){ CPIS_obj.unlock(CPIS_obj.unlockArg); return -1; } if (funcStruct->setDmaOutFunc) { if ((*funcStruct->setDmaOutFunc)(&CPIS_obj.ipRunTab[0], base, params)==-1){ CPIS_obj.unlock(CPIS_obj.unlockArg); return -1; } } else { if (_CPIS_setDmaOutTransfers(&CPIS_obj.ipRunTab[0], base, funcStruct->arg)==-1){ CPIS_obj.unlock(CPIS_obj.unlockArg); return -1; } } /* if not single block, check whether the DMA settings meet constraints such as alignment */ if (CPIS_obj.ipRunTab[0].singleBlockProcessing!= 1) { if(_CPIS_validateDMAconstraints(&CPIS_obj.ipRunTab[0])== -1){ CPIS_obj.unlock(CPIS_obj.unlockArg); return -1; } } if (_CPIS_setLast(&CPIS_obj.ipRunTab[0], execType)== -1){ CPIS_obj.unlock(CPIS_obj.unlockArg); return -1; } if (execType== CPIS_SYNC) { #if !defined(_4MACS) || defined(_SIMCOP) CPIS_start(NULL); CPIS_wait(NULL); #endif CPIS_delete(&CPIS_obj.ipRunTab[0]); *handle= NULL; } else { *handle= &CPIS_obj.ipRunTab[0]; } return 0; } void _CPIS_dummyVicpLock(void*dummy){ return; } void _CPIS_dummyVicpUnlock(void*dummy){ return; } Int32 _CPIS_setDmaInTransfers(CPIS_IpRun *ipRun, CPIS_BaseParms *base, CPIS_Arg *arg){ /* This Dma structures are used to pass information about dma transfers to IP_RUN library */ Int32 dmaIdx; IP_run *ipRunObj; Uint16 trueSrcFormat, alphaOn; Int32 oldScaleFactor; Uint16 roundBlockWidth; Uint32 ddrOfstNextBlock, ddrOfstNextBlockRow; Uint16 numHorzBlocks, numVertBlocks; CPIS_Arg defArg; Int32 scaleFactorSrc= 1; Uint32 shiftFactorUVhorzSrc=0; Uint32 shiftFactorUVvertSrc=0; ipRunObj= &ipRun->ipRunObj; if (arg== NULL) { arg= &defArg; arg->multiCoreMode= ipRun->multiCoreMode; arg->alignImgBufWidth= 1; } numHorzBlocks= base->roiSize.width / base->procBlockSize.width; numVertBlocks= base->roiSize.height / base->procBlockSize.height; ipRun->multiCoreMode= arg->multiCoreMode; if ((numHorzBlocks * numVertBlocks)<= 2){ // This will disable the scheduling in IP_run ipRun->singleBlockProcessing= 1; if ((numHorzBlocks * numVertBlocks)== 1) { ipRun->multiCoreMode= CPIS_MCORE_MODE_SINGLE; ipRunObj->numHorzBlocks= 1; ipRunObj->numVertBlocks= 1; } else { ipRun->multiCoreMode= CPIS_MCORE_MODE_DUAL_CLONE; if (numVertBlocks== 2) ipRun->dmaBlockTferType= CPIS_DMATFER_COL_WISE; else ipRun->dmaBlockTferType= CPIS_DMATFER_ROW_WISE; ipRunObj->numHorzBlocks= 2; ipRunObj->numVertBlocks= 1; } } else { if (ipRun->dmaBlockTferType== CPIS_DMATFER_AUTO) { if ((ipRun->dmaBlockTferType=_CPIS_getBestDmaTferType(ipRun, numHorzBlocks, numVertBlocks))== -1) return -1; } if (ipRun->dmaBlockTferType== CPIS_DMATFER_ROW_WISE) { ipRunObj->numHorzBlocks= numHorzBlocks; ipRunObj->numVertBlocks= numVertBlocks; } else { ipRunObj->numVertBlocks= numHorzBlocks; ipRunObj->numHorzBlocks= numVertBlocks; } } trueSrcFormat= base->srcFormat[0] & ~CPIS_ALPHA; alphaOn= base->srcFormat[0] & CPIS_ALPHA; /* Calculate scaleFactor used to rescale the input widths */ if ((trueSrcFormat) <= CPIS_BAYER_P){ /* if format is planar */ if (alphaOn) ipRunObj->numDmaIn= 4; else ipRunObj->numDmaIn= 3; switch (trueSrcFormat){ case CPIS_YUV_420P: shiftFactorUVhorzSrc= shiftFactorUVvertSrc= 1; break; case CPIS_YUV_422P: shiftFactorUVhorzSrc= 1; break; case CPIS_YUV_411P: shiftFactorUVhorzSrc= 2; break; } /* switch */ }/* if */ else { if (trueSrcFormat== CPIS_YUV_420SP) { shiftFactorUVvertSrc= 1; if (alphaOn) ipRunObj->numDmaIn= 3; else ipRunObj->numDmaIn= 2; } else { if (alphaOn) ipRunObj->numDmaIn= 2; else ipRunObj->numDmaIn= 1; } scaleFactorSrc= _CPIS_sizeof(base->srcFormat[0]); }/* else */ ipRunObj->numDmaIn= ipRunObj->numDmaIn*base->numInput; if (ipRunObj->numDmaIn > CPIS_MAX_SRC_CHANNELS) { CPIS_errno= CPIS_NOT_ENOUGH_EDMACHAN_ERROR; return -1; } /*ipRunObj->numDmaOut= 1; */ /*ipRunObj->numDmaIn= 1; */ /* * * Initialize dma parameters * */ ipRunObj->dmaIn= &CPIS_obj.dmaIn[0]; /* IP_RUN will do dynamic channel allocation through dmac module. */ ipRunObj->dmaIn[0].dmaChNo = DMAC_CHAN_ANY;\ /* We are going to initialize the dma transfer parameters normally */ /* in which each block is separated from the previous one by a fixed offset. */ /* so no need of transfer param table. */ ipRunObj->dmaIn[0].useTferParamTable= 0; if (arg->alignImgBufWidth) roundBlockWidth= _CPIS_alignWithSIMD(base->procBlockSize.width); else roundBlockWidth= base->procBlockSize.width; ipRunObj->dmaIn[0].ddrAddr= (Uint32)base->srcBuf[0].ptr; ipRunObj->dmaIn[0].imgBufAddr= (Int32)(ipRun->imgbufInOfst + IMGBUF_A_BASE); if (ipRun->dmaBlockTferType== CPIS_DMATFER_ROW_WISE) { ddrOfstNextBlock= base->procBlockSize.width; ddrOfstNextBlockRow= base->srcBuf[0].stride * base->procBlockSize.height; } else { ddrOfstNextBlock= base->procBlockSize.height*base->srcBuf[0].stride; ddrOfstNextBlockRow= base->procBlockSize.width; } if (scaleFactorSrc > 0) { /* For any positif scaleFactorSrc, the scaleFactorSrc is a multiplier */ ipRunObj->dmaIn[0].ddrWidth = scaleFactorSrc*base->srcBuf[0].stride; ipRunObj->dmaIn[0].imgBufWidth = scaleFactorSrc*roundBlockWidth; ipRunObj->dmaIn[0].blockWidth = scaleFactorSrc*base->procBlockSize.width; ipRunObj->dmaIn[0].ddrOfstNextBlock= scaleFactorSrc*ddrOfstNextBlock; ipRunObj->dmaIn[0].ddrOfstNextBlockRow= scaleFactorSrc*ddrOfstNextBlockRow; } else { /* otherwise it is a divisor in order to handle type such as CPIS_1BIT */ scaleFactorSrc= -scaleFactorSrc; ipRunObj->dmaIn[0].ddrWidth = base->srcBuf[0].stride/scaleFactorSrc; ipRunObj->dmaIn[0].imgBufWidth = roundBlockWidth/scaleFactorSrc; ipRunObj->dmaIn[0].blockWidth = base->procBlockSize.width/scaleFactorSrc; ipRunObj->dmaIn[0].ddrOfstNextBlock= ddrOfstNextBlock/scaleFactorSrc; ipRunObj->dmaIn[0].ddrOfstNextBlockRow= ddrOfstNextBlockRow/scaleFactorSrc; } ipRunObj->dmaIn[0].blockHeight = base->procBlockSize.height; ipRun->imgbufLen= ipRunObj->dmaIn[0].imgBufWidth*ipRunObj->dmaIn[0].blockHeight; oldScaleFactor= scaleFactorSrc; for (dmaIdx=1; dmaIdxnumDmaIn; dmaIdx++) { if (alphaOn && (trueSrcFormat > CPIS_BAYER_P)) { if (trueSrcFormat!= CPIS_YUV_420SP && dmaIdx==1) scaleFactorSrc= 1; else if (trueSrcFormat== CPIS_YUV_420SP && dmaIdx==2) { scaleFactorSrc= 1; shiftFactorUVhorzSrc= 0; shiftFactorUVvertSrc= 0; } else scaleFactorSrc= 1; } else if (trueSrcFormat == CPIS_YUV_420SP) /* if 420 semi-planar */ scaleFactorSrc= 1; else if (trueSrcFormat >= CPIS_YUV_422IBE) /* if not planar */ scaleFactorSrc= _CPIS_sizeof(base->srcFormat[dmaIdx]); else /* if planar */ scaleFactorSrc= oldScaleFactor; ipRunObj->dmaIn[dmaIdx].ddrAddr= (Uint32)base->srcBuf[dmaIdx].ptr; ipRunObj->dmaIn[dmaIdx].imgBufAddr= ipRunObj->dmaIn[0].imgBufAddr \ + ipRun->imgbufLen; ipRunObj->dmaIn[dmaIdx].blockHeight = base->procBlockSize.height >> shiftFactorUVvertSrc; if (scaleFactorSrc > 0) { /* For any positif scaleFactorSrc, the scaleFactorSrc is a multiplier */ ipRunObj->dmaIn[dmaIdx].ddrWidth = scaleFactorSrc*base->srcBuf[dmaIdx].stride; ipRunObj->dmaIn[dmaIdx].imgBufWidth = (scaleFactorSrc*roundBlockWidth) >> shiftFactorUVhorzSrc; ipRunObj->dmaIn[dmaIdx].blockWidth = (scaleFactorSrc*base->procBlockSize.width) >> shiftFactorUVhorzSrc; if (ipRun->dmaBlockTferType== CPIS_DMATFER_ROW_WISE) { ipRunObj->dmaIn[dmaIdx].ddrOfstNextBlock= (scaleFactorSrc*base->procBlockSize.width) >> shiftFactorUVhorzSrc; ipRunObj->dmaIn[dmaIdx].ddrOfstNextBlockRow= ipRunObj->dmaIn[dmaIdx].ddrWidth * ipRunObj->dmaIn[dmaIdx].blockHeight; } else { ipRunObj->dmaIn[dmaIdx].ddrOfstNextBlock= ipRunObj->dmaIn[dmaIdx].ddrWidth *ipRunObj->dmaIn[dmaIdx].blockHeight; ipRunObj->dmaIn[dmaIdx].ddrOfstNextBlockRow= (scaleFactorSrc*base->procBlockSize.width) >> shiftFactorUVhorzSrc; } } else { /* otherwise it is a divisor in order to handle type such as CPIS_1BIT */ scaleFactorSrc= -scaleFactorSrc; ipRunObj->dmaIn[dmaIdx].ddrWidth = base->srcBuf[dmaIdx].stride/scaleFactorSrc; ipRunObj->dmaIn[dmaIdx].imgBufWidth = (roundBlockWidth/scaleFactorSrc) >> shiftFactorUVhorzSrc; ipRunObj->dmaIn[dmaIdx].blockWidth = (base->procBlockSize.width/scaleFactorSrc) >> shiftFactorUVhorzSrc; if (ipRun->dmaBlockTferType== CPIS_DMATFER_ROW_WISE) { ipRunObj->dmaIn[dmaIdx].ddrOfstNextBlock= (base->procBlockSize.width/scaleFactorSrc) >> shiftFactorUVhorzSrc; ipRunObj->dmaIn[dmaIdx].ddrOfstNextBlockRow= ipRunObj->dmaIn[dmaIdx].ddrWidth * ipRunObj->dmaIn[dmaIdx].blockHeight; } else { ipRunObj->dmaIn[dmaIdx].ddrOfstNextBlock= ipRunObj->dmaIn[dmaIdx].ddrWidth * ipRunObj->dmaIn[dmaIdx].blockHeight; ipRunObj->dmaIn[dmaIdx].ddrOfstNextBlockRow= (base->procBlockSize.width/scaleFactorSrc) >> shiftFactorUVhorzSrc; } } ipRun->imgbufLen+= ipRunObj->dmaIn[dmaIdx].imgBufWidth*ipRunObj->dmaIn[dmaIdx].blockHeight; #ifdef _BENCHMARK_VICP ipRunObj->dmaIn[dmaIdx].blockWidth = 16; ipRunObj->dmaIn[dmaIdx].blockHeight = 1; #endif } #ifdef _BENCHMARK_VICP ipRunObj->dmaIn[0].blockWidth = 16; ipRunObj->dmaIn[0].blockHeight = 1; #endif return 0; } Int32 _CPIS_setDmaOutTransfers(CPIS_IpRun *ipRun, CPIS_BaseParms *base, CPIS_Arg *arg){ /* This Dma structures are used to pass information about dma transfers to IP_RUN library */ Int32 dmaIdx; IP_run *ipRunObj; Uint16 trueDstFormat, alphaOn; Int32 oldScaleFactor; Uint16 roundBlockWidth; Uint32 ddrOfstNextBlock, ddrOfstNextBlockRow; CPIS_Arg defArg; Int32 scaleFactorDst= 1; Uint32 shiftFactorUVhorzDst=0; Uint32 shiftFactorUVvertDst=0; ipRunObj= &ipRun->ipRunObj; if (arg== NULL) { arg= &defArg; arg->multiCoreMode= ipRun->multiCoreMode; arg->alignImgBufWidth= 1; } trueDstFormat= base->dstFormat[0] & ~CPIS_ALPHA; alphaOn= base->dstFormat[0] & CPIS_ALPHA; /* Calculate scaleFactor used to rescale the output widths */ if (trueDstFormat<= CPIS_BAYER_P){ /* if format is planar */ if (alphaOn) ipRunObj->numDmaOut= 4; else ipRunObj->numDmaOut= 3; switch (trueDstFormat){ case CPIS_YUV_420P: shiftFactorUVhorzDst= shiftFactorUVvertDst= 1; break; case CPIS_YUV_422P: shiftFactorUVhorzDst= 1; break; case CPIS_YUV_411P: shiftFactorUVhorzDst= 2; break; } /* switch */ }/* if */ else { if (trueDstFormat== CPIS_YUV_420SP) { shiftFactorUVvertDst= 1; if (alphaOn) ipRunObj->numDmaOut= 3; else ipRunObj->numDmaOut= 2; } else { if (alphaOn) ipRunObj->numDmaOut= 2; else ipRunObj->numDmaOut= 1; } scaleFactorDst= _CPIS_sizeof(base->dstFormat[0]); }/* else */ /* * * Initialize dma parameters * */ if (ipRun->dmaBlockTferType== CPIS_DMATFER_AUTO) ipRun->dmaBlockTferType= CPIS_DMATFER_ROW_WISE; ipRunObj->numDmaOut= ipRunObj->numDmaOut*base->numOutput; if (ipRunObj->numDmaOut > CPIS_MAX_DST_CHANNELS || (ipRunObj->numDmaIn + ipRunObj->numDmaOut > CPIS_obj.maxNumDma) ) { printf("Embedded Vision Framework error ! Number of DMA channels required by this function is %d and exceeds maximum allowed of %d\n", ipRunObj->numDmaIn + ipRunObj->numDmaOut, CPIS_obj.maxNumDma); CPIS_errno= CPIS_NOT_ENOUGH_EDMACHAN_ERROR; return -1; } ipRunObj->dmaOut= &CPIS_obj.dmaOut[0]; /* IP_RUN will do dynamic channel allocation through dmac module. */ ipRunObj->dmaOut[0].dmaChNo = DMAC_CHAN_ANY; /* We are going to initialize the dma transfer parameters normally */ /* in which each block is separated from the previous one by a fixed offset. */ /* so no need of transfer param table. */ ipRunObj->dmaOut[0].useTferParamTable= 0; if (arg->alignImgBufWidth) roundBlockWidth= _CPIS_alignWithSIMD(base->procBlockSize.width); else roundBlockWidth= base->procBlockSize.width; if (ipRun->dmaBlockTferType== CPIS_DMATFER_ROW_WISE) { ddrOfstNextBlock= base->procBlockSize.width; ddrOfstNextBlockRow= base->dstBuf[0].stride * base->procBlockSize.height; } else { ddrOfstNextBlock= base->procBlockSize.height*base->dstBuf[0].stride; ddrOfstNextBlockRow= base->procBlockSize.width; } ipRunObj->dmaOut[0].ddrAddr= (Uint32)base->dstBuf[0].ptr; ipRunObj->dmaOut[0].imgBufAddr= (Int32)(ipRun->imgbufOutOfst[0] + IMGBUF_A_BASE); if (scaleFactorDst > 0) { /* For any positif scaleFactorDst, the scaleFactorDst is a multiplier */ ipRunObj->dmaOut[0].ddrWidth = scaleFactorDst*base->dstBuf[0].stride; ipRunObj->dmaOut[0].imgBufWidth = scaleFactorDst*roundBlockWidth; ipRunObj->dmaOut[0].blockWidth = scaleFactorDst*base->procBlockSize.width; ipRunObj->dmaOut[0].ddrOfstNextBlock= scaleFactorDst*ddrOfstNextBlock; ipRunObj->dmaOut[0].ddrOfstNextBlockRow= scaleFactorDst*ddrOfstNextBlockRow; } else { /* otherwise it is a divisor in order to handle type such as CPIS_1BIT */ scaleFactorDst= -scaleFactorDst; ipRunObj->dmaOut[0].ddrWidth = base->dstBuf[0].stride/scaleFactorDst; ipRunObj->dmaOut[0].imgBufWidth = roundBlockWidth/scaleFactorDst; ipRunObj->dmaOut[0].blockWidth = base->procBlockSize.width/scaleFactorDst; ipRunObj->dmaOut[0].ddrOfstNextBlock= base->procBlockSize.width/scaleFactorDst; ipRunObj->dmaOut[0].ddrOfstNextBlockRow= ddrOfstNextBlockRow/scaleFactorDst; } ipRunObj->dmaOut[0].blockHeight = base->procBlockSize.height; oldScaleFactor= scaleFactorDst; for (dmaIdx=1; dmaIdxnumDmaOut; dmaIdx++) { if (alphaOn && (trueDstFormat > CPIS_BAYER_P)) { if (trueDstFormat!= CPIS_YUV_420SP && dmaIdx==1) scaleFactorDst= 1; else if (trueDstFormat== CPIS_YUV_420SP && dmaIdx==2) { scaleFactorDst= 1; shiftFactorUVhorzDst= 0; shiftFactorUVvertDst= 0; } else scaleFactorDst= 1; } else if (trueDstFormat == CPIS_YUV_420SP) /* if 420 semi-planar */ scaleFactorDst= 1; else if (trueDstFormat >= CPIS_YUV_422IBE) /* if not planar */ scaleFactorDst= _CPIS_sizeof(base->dstFormat[dmaIdx]); else scaleFactorDst= oldScaleFactor; ipRunObj->dmaOut[dmaIdx].ddrAddr= (Uint32)base->dstBuf[dmaIdx].ptr; ipRunObj->dmaOut[dmaIdx].imgBufAddr= (Int32)(ipRun->imgbufOutOfst[dmaIdx] + IMGBUF_A_BASE); ipRunObj->dmaOut[dmaIdx].blockHeight = base->procBlockSize.height >> shiftFactorUVvertDst; if (scaleFactorDst > 0) { /* For any positif scaleFactorDst, the scaleFactorDst is a multiplier */ ipRunObj->dmaOut[dmaIdx].ddrWidth = scaleFactorDst*base->dstBuf[dmaIdx].stride; ipRunObj->dmaOut[dmaIdx].imgBufWidth = (scaleFactorDst*roundBlockWidth) >> shiftFactorUVhorzDst; ipRunObj->dmaOut[dmaIdx].blockWidth = (scaleFactorDst*base->procBlockSize.width) >> shiftFactorUVhorzDst; if (ipRun->dmaBlockTferType== CPIS_DMATFER_ROW_WISE) { ipRunObj->dmaOut[dmaIdx].ddrOfstNextBlock= (scaleFactorDst*base->procBlockSize.width) >> shiftFactorUVhorzDst; ipRunObj->dmaOut[dmaIdx].ddrOfstNextBlockRow= ipRunObj->dmaOut[dmaIdx].ddrWidth * ipRunObj->dmaOut[dmaIdx].blockHeight; } else { ipRunObj->dmaOut[dmaIdx].ddrOfstNextBlock= ipRunObj->dmaOut[dmaIdx].ddrWidth * ipRunObj->dmaOut[dmaIdx].blockHeight; ipRunObj->dmaOut[dmaIdx].ddrOfstNextBlockRow= (scaleFactorDst*base->procBlockSize.width) >> shiftFactorUVhorzDst; } } else { /* otherwise it is a divisor in order to handle type such as CPIS_1BIT */ scaleFactorDst= -scaleFactorDst; ipRunObj->dmaOut[dmaIdx].ddrWidth = base->dstBuf[dmaIdx].stride/scaleFactorDst; ipRunObj->dmaOut[dmaIdx].imgBufWidth = (roundBlockWidth/scaleFactorDst) >> shiftFactorUVhorzDst; ipRunObj->dmaOut[dmaIdx].blockWidth = (base->procBlockSize.width/scaleFactorDst) >> shiftFactorUVhorzDst; if (ipRun->dmaBlockTferType== CPIS_DMATFER_ROW_WISE) { ipRunObj->dmaOut[dmaIdx].ddrOfstNextBlock= (base->procBlockSize.width/scaleFactorDst) >> shiftFactorUVhorzDst; ipRunObj->dmaOut[dmaIdx].ddrOfstNextBlockRow= ipRunObj->dmaOut[dmaIdx].ddrWidth * ipRunObj->dmaOut[dmaIdx].blockHeight; } else { ipRunObj->dmaOut[dmaIdx].ddrOfstNextBlock= ipRunObj->dmaOut[dmaIdx].ddrWidth * ipRunObj->dmaOut[dmaIdx].blockHeight; ipRunObj->dmaOut[dmaIdx].ddrOfstNextBlockRow= (base->procBlockSize.width/scaleFactorDst) >> shiftFactorUVhorzDst; } } #ifdef _BENCHMARK_VICP ipRunObj->dmaOut[dmaIdx].blockWidth = 16; ipRunObj->dmaOut[dmaIdx].blockHeight = 1; #endif } #ifdef _BENCHMARK_VICP ipRunObj->dmaOut[0].blockWidth = 16; ipRunObj->dmaOut[0].blockHeight = 1; #endif return 0; } Int32 _CPIS_setFirst(CPIS_IpRun *ipRun){ if (CPIS_obj.numProcFunc== CPIS_obj.maxNumProcFunc){ CPIS_errno= CPIS_MAXNUMFUNCREACHED; return -1; } _CPIS_setDemoMode(0); #if 0 /* enable this for later future vsersion */ ipRun->imgbufOfst= CPIS_obj.imgbufOfst; ipRun->coefOfst= CPIS_obj.coefOfst; ipRun->coef1Ofst= CPIS_obj.coef1Ofst; ipRun->cmdOfst= CPIS_obj.cmdOfst; ipRun->cmd1Ofst= CPIS_obj.cmd1Ofst; #else ipRun->imgbufInOfst= 0; ipRun->imgbufOutOfst[0]= 0; ipRun->coefOfst= 0; ipRun->cmdOfst= 0; ipRun->coef1Ofst= 0; ipRun->cmd1Ofst= 0; #endif #ifdef _DUAL_CORE ipRun->multiCoreMode= CPIS_MCORE_MODE_MAXNUM_CLONE;//CPIS_MCORE_MODE_SINGLE;//CPIS_MCORE_MODE_MAXNUM_CLONE; #else ipRun->multiCoreMode= CPIS_MCORE_MODE_SINGLE; #endif ipRun->dmaBlockTferType= CPIS_DMATFER_AUTO; ipRun->context.action= CPIS_CONTEXT_NONE; ipRun->context.cmd0Ptr= NULL; ipRun->context.cmd1Ptr= NULL; ipRun->context.coef0Ptr= NULL; ipRun->context.coef1Ptr= NULL; ipRun->singleBlockProcessing= 0; ipRun->resetFlag= 0; ipRun->ipRunObj.cmdptr_ofst= ipRun->cmdOfst>>1; ipRun->ipRunObj.extension= NULL; /* NULL for DM350 */ ipRun->ipRunObj.customExtension= NULL; _CPIS_switchBuffersToCPU(); return 0; } Int32 _CPIS_setLast(CPIS_IpRun *ipRun, CPIS_ExecType execType){ #if defined(_4MACS) IP_RUN_extension extension; #endif _CPIS_switchBuffersToCPU(); #ifdef _SIMCOP if (ipRun->imgbufLen > 2*IMGBUF_SIZE_BYTES || ipRun->coefLen > COEFFBUF_SIZE_BYTES || ipRun->cmdLen > CMDBUF_SIZE_BYTES) { #else if (ipRun->imgbufLen > IMGBUF_SIZE_BYTES || ipRun->coefLen > COEFFBUF_SIZE_BYTES || ipRun->cmdLen > CMDBUF_SIZE_BYTES) { #endif CPIS_errno= CPIS_OUTOFMEM; printf("Out of mem, img buf= %d, coef = %d, cmd= %d\n", ipRun->imgbufLen, ipRun->coefLen, ipRun->cmdLen); return -1; }; CPIS_obj.imgbufOfst= ipRun->imgbufInOfst + ipRun->imgbufLen; CPIS_obj.coefOfst= ipRun->coefOfst + ipRun->coefLen; CPIS_obj.cmdOfst= ipRun->cmdOfst + ipRun->cmdLen; CPIS_obj.coef1Ofst= ipRun->coef1Ofst + ipRun->coef1Len; CPIS_obj.cmd1Ofst= ipRun->cmd1Ofst + ipRun->cmd1Len; ipRun->ipRunObj.cmdptr_ofst= ipRun->cmdOfst>>1; ipRun->ipRunObj.compCode= 0; #ifdef _BENCHMARK_EDMA *((Uint16*)CMDBUF_BASE + ipRun->ipRunObj.cmdptr_ofst)= 0x8000; *((Uint16*)CMD1BUF_BASE + (ipRun->cmd1Len>>1))= 0x8000; #endif if (CPIS_obj.waitCB) ipRun->ipRunObj.compIntEna= 1; #if defined(_4MACS) extension.magicWord= 0x12344321; extension.async= execType; if (ipRun->singleBlockProcessing== 1) extension.async|= 0x8000; ipRun->ipRunObj.extension= &extension; if ((ipRun->multiCoreMode & (~0x8000))== CPIS_MCORE_MODE_MAXNUM_CLONE){ if (ipRun->context.action & CPIS_CONTEXT_RESTORE_CMD0) { ipRun->context.action |= CPIS_CONTEXT_RESTORE_CMD1; ipRun->context.cmd1Ptr= ipRun->context.cmd0Ptr; ipRun->cmd1Len= ipRun->cmdLen; } if (ipRun->context.action & CPIS_CONTEXT_RESTORE_COEF0) { ipRun->context.action |= CPIS_CONTEXT_RESTORE_COEF1; ipRun->context.coef1Ptr= ipRun->context.coef0Ptr; ipRun->coef1Len= ipRun->coefLen; } if (_CPIS_multiCoreCloneDmaSetup(ipRun, 2)== -1) { _CPIS_printf("Reversing to single core processing."); ipRun->multiCoreMode= CPIS_MCORE_MODE_SINGLE; ipRun->context.action &= ~(CPIS_CONTEXT_SAVE_CMD1 | CPIS_CONTEXT_RESTORE_CMD1); CPIS_obj.cmd1Ofst= ipRun->cmd1Ofst + ipRun->cmd1Len; } else { _CPIS_multiCoreCloneMemSetup(ipRun, 2); extension.cmd1ptr_ofst= ipRun->cmdOfst>>1; ipRun->cmd1Ofst= ipRun->cmdOfst; ipRun->coef1Ofst= ipRun->coefOfst; } } else if ((ipRun->multiCoreMode & (~0x8000))== CPIS_MCORE_MODE_MAXNUM_SPLIT){ if (ipRun->ipRunObj.numHorzBlocks <=3) { _CPIS_printf("Fatal error: Dual-core split mode cannot be enabled because number of horizontal block <=3"); ipRun->multiCoreMode= CPIS_MCORE_MODE_SINGLE; } extension.cmd1ptr_ofst= ipRun->cmd1Ofst>>1; CACHE_writeBack(COEFF1BUF_BASE + ipRun->coef1Ofst, ipRun->coef1Len, 0); CACHE_writeBack(CMD1BUF_BASE + ipRun->cmd1Ofst, ipRun->cmd1Len, 1); } extension.iMXid= ipRun->multiCoreMode; extension.context= ipRun->context; extension.context.cmd0Len= ipRun->cmdLen; extension.context.cmd1Len= ipRun->cmd1Len; extension.context.coef0Len= ipRun->coefLen; extension.context.coef0Ofst= ipRun->coefOfst; extension.context.coef1Len= ipRun->coef1Len; extension.context.coef1Ofst= ipRun->coef1Ofst; #else ipRun->ipRunObj.extension= NULL; #endif ipRun->ipRunObj.customExtension= NULL; CACHE_writeBack(COEFFBUF_BASE + ipRun->coefOfst, ipRun->coefLen, 0); CACHE_writeBack(CMDBUF_BASE + ipRun->cmdOfst, ipRun->cmdLen, 1); if (IP_RUN_registerAlgo(&ipRun->ipRunObj, 0)== -1) { CPIS_errno= CPIS_ALGO_REGISTRATION_ERROR; return -1; } else CPIS_obj.numProcFunc++; /* In demo mode, we want to leave the other half unprocessed so we put a sleep */ if (_CPIS_demoMode== 1) { IMGBUF_switch(SELCMD1BUF, CMD1BUFCPU); *((Uint16*)CMD1BUF_BASE)= 0x8000; IMGBUF_switch(SELCMD1BUF, CMD1BUFVICP); } return 0; } Int32 _CPIS_delete(CPIS_Handle handle){ CPIS_IpRun *ipRunObj= (CPIS_IpRun *)handle; CPIS_obj.imgbufOfst-= ipRunObj->imgbufLen; CPIS_obj.coefOfst-= ipRunObj->coefLen; CPIS_obj.cmdOfst-= ipRunObj->cmdLen; CPIS_obj.coef1Ofst-= ipRunObj->coef1Len; CPIS_obj.cmd1Ofst-= ipRunObj->cmd1Len; CPIS_obj.numProcFunc--; CPIS_obj.unlock(CPIS_obj.unlockArg); return 0; } /* Return number of bytes of the CPIS_Format */ Int16 _CPIS_sizeof(CPIS_Format format){ Uint16 trueFormat, alphaOn; trueFormat= format & ~CPIS_ALPHA; alphaOn= format & CPIS_ALPHA; if (alphaOn && trueFormat== CPIS_RGB_888) return 4; else return (_CPIS2VICP_sizeOfFormat[trueFormat]); } /* Return the corresponding VICP macro symbol for the CPIS_Format */ Uint16 _CPIS_translateInputFormat(CPIS_Format format){ return(_CPIS2VICP_inputFormat[format - CPIS_8BIT]); } /* Return the corresponding VICP macro symbol for the CPIS_Format */ Uint16 _CPIS_translateOutputFormat(CPIS_Format format){ return(_CPIS2VICP_outputFormat[format - CPIS_8BIT]); } /* This function validate the processing block dimensions against some predefined criteria and returns 0 if valid or -1 if not valid */ Int32 _CPIS_isBlockDimValid(CPIS_BaseParms *base, Uint16 divisor, Uint16 maxNumPixels) { /* At this point none of the block dimensions should be 0. If yes then something has gone wrong ... */ if (base->procBlockSize.width== 0 || base->procBlockSize.height== 0) { return -1; } /* procBlockSize.width must be multiple of divisor it must divide the frame width */ if (base->procBlockSize.width & (divisor-1) || (base->roiSize.width % (base->procBlockSize.width) )) { return -1; } /* procBlockSize.height must divide the frame height */ if (base->roiSize.height % base->procBlockSize.height) { return -1; } if (base->procBlockSize.width * base->procBlockSize.height > maxNumPixels){ return -1; } return 0; } Int32 _CPIS_getFilterCascadeBlockDim(Uint16 numFilters, Uint16 outputBlockWidth, Uint16 outputBlockHeight, Uint16 *filterWidth, Uint16 *filterHeight, Uint16 *inBlockWidth, Uint16 *inBlockHeight) { Int16 i; inBlockWidth[numFilters-1]= _CPIS_alignWithSIMD(outputBlockWidth + filterWidth[numFilters-1]-1); inBlockHeight[numFilters-1]= outputBlockHeight + filterHeight[numFilters-1]-1; for(i=numFilters-2;i>=0;i--){ inBlockWidth[i]= _CPIS_alignWithSIMD(inBlockWidth[i+1] + filterWidth[i]-1); inBlockHeight[i]= inBlockHeight[i+1] + filterHeight[i]-1; } return 0; } Int32 _CPIS_getFilterCascadeBlockDim2(Uint16 numFilters, Uint16 outputBlockWidth, Uint16 outputBlockHeight, Uint16 *filterWidth, Uint16 *filterHeight, Uint16 *inBlockWidth, Uint16 *inBlockHeight, Uint8 *dsW, Uint8 *dsH) { Int16 i; inBlockWidth[numFilters-1]= (outputBlockWidth<=0;i--){ inBlockWidth[i]= (inBlockWidth[i+1]<