/* ======================================================================== */
/*  TEXAS INSTRUMENTS, INC.                                                 */
/*                                                                          */
/*  VICP  Signal Processing Library                                         */
/*                                                                          */
/*  This library contains proprietary intellectual property of Texas        */
/*  Instruments, Inc.  The library and its source code are protected by     */
/*  various copyrights, and portions may also be protected by patents or    */
/*  other legal protections.                                                */
/*                                                                          */
/*  This software is licensed for use with Texas Instruments TMS320         */
/*  family DSPs.  This license was provided to you prior to installing      */
/*  the software.  You may review this license by consulting the file       */
/*  TI_license.PDF which accompanies the files in this library.             */
/*                                                                          */
/* ------------------------------------------------------------------------ */
/*                                                                          */
/*     NAME                                                                 */
/*        _imgproclib.c -- imgproc module                                   */
/*                                                                          */
/*     DESCRIPTION                                                          */
/*         This file implements interface functions with IP_run library     */
/*                                                                          */
/*     REV                                                                  */
/*        version 0.0.2:  3rd March, 2009                                   */
/*        Added _CPIS_updateSrcDstPtr                                       */
/*                                                                          */
/*        version 0.0.1:  Feb 9th, 2009                                     */
/*        Initial version                                                   */
/*                                                                          */
/* ------------------------------------------------------------------------ */
/*            Copyright (c) 2008 Texas Instruments, Incorporated.           */
/*                           All Rights Reserved.                           */
/* ======================================================================== */

//#define _BENCHMARK_VICP

/* Include the lib interface header files */
#include "cpisCore.h"
#include "_cpisCore.h"
#include "cpisSched.h"
#include "cpisCompu.h"
#include "_cpis_utils.h"
#include <tistdtypes.h>

extern Uint8 _CPIS_demoMode;

static Uint16 _CPIS2VICP_inputFormat[]= { IMXTYPE_BYTE, IMXTYPE_SHORT, IMXTYPE_LONG, 0, IMXTYPE_UBYTE, IMXTYPE_USHORT, \
                                       IMXTYPE_ULONG, 0, 0, 0, IMXTYPE_BYTE, IMXTYPE_SHORT, IMXTYPE_LONG, IMXTYPE_UBYTE, IMXTYPE_USHORT, IMXTYPE_ULONG};

static Uint16 _CPIS2VICP_outputFormat[]= { IMXOTYPE_BYTE, IMXOTYPE_SHORT, IMXOTYPE_LONG, 0, IMXOTYPE_BYTE, IMXOTYPE_SHORT, \
                                       IMXOTYPE_LONG, 0, 0, 0, IMXOTYPE_BYTE, IMXOTYPE_SHORT, IMXOTYPE_LONG, IMXOTYPE_BYTE, IMXOTYPE_SHORT, IMXOTYPE_LONG};

static Int16 _CPIS2VICP_sizeOfFormat[]= { 2, 2, 3, 2, 2, 3, 2, 2, 2, 2, 2, 2, 3, 3, 3, 1, 1, 2, 4, 8, 1, 2, 4, 8, -8, 1, 2, 4, 8, 2, 4, 8};

Int32 _CPIS_genericCall(
 CPIS_Handle *handle,
 CPIS_BaseParms *base,
 void *params,
 CPIS_ExecType execType,
 CPIS_FuncStruct *funcStruct
){

  Uint32 maxBlockSize;
  Uint16 smallestDataSize;

  CHECK_INIT
  
  CPIS_obj.ipRunTab[0].functions= *funcStruct;

  if ((*funcStruct->checkFunc)(base, params)==-1)
   return -1;
  

  /* If functions that compute best block width and block height are defined */
  if (funcStruct->getMaxBlockSizeFunc && funcStruct->getBlockDimFunc) {
      /* If processing block width or height is 0 then it means API has to figure out
             optimum block dimensions by calling (*funcStruct->getBlockDimFunc)
             otherwise it calls (*funcStruct->getMaxBlockSizeFunc) to get the value maxBlockSize and smallestDataSize
             which will be used by _CPIS_isBlockDimValid() to verify whether the passed values are valid.
       */
      if (base->procBlockSize.width== 0 || base->procBlockSize.height== 0) {
          if ((*funcStruct->getBlockDimFunc)(base, params, &base->procBlockSize.width, &base->procBlockSize.height)== -1) {
              CPIS_errno= CPIS_NOSUPPORTDIM_ERROR;
              return -1;
          }
      }
      else {
          (*funcStruct->getMaxBlockSizeFunc)(base, params, &maxBlockSize, &smallestDataSize);

          /* Validate the block dimension */
          if (_CPIS_isBlockDimValid(base, _CPIS_alignWithDMA2(4, smallestDataSize), maxBlockSize)== -1){
              CPIS_errno= CPIS_NOSUPPORTDIM_ERROR;
              return -1;
          }
      }
  }

  CPIS_obj.lock(CPIS_obj.lockArg);
  
  if (_CPIS_setFirst(&CPIS_obj.ipRunTab[0])== -1) {
    CPIS_obj.unlock(CPIS_obj.unlockArg);
    return -1;
    }

  if (funcStruct->setDmaInFunc) {
    if ((*funcStruct->setDmaInFunc)(&CPIS_obj.ipRunTab[0], base, params)==-1){
     CPIS_obj.unlock(CPIS_obj.unlockArg);
     return -1;
     }
    }
  else {
    if (_CPIS_setDmaInTransfers(&CPIS_obj.ipRunTab[0], base, funcStruct->arg)==-1){
     CPIS_obj.unlock(CPIS_obj.unlockArg);
     return -1;
     }
    }

  if ((*funcStruct->procFunc)(&CPIS_obj.ipRunTab[0], base, params)==-1){
     CPIS_obj.unlock(CPIS_obj.unlockArg);
     return -1;
    }

  if (funcStruct->setDmaOutFunc) {
    if ((*funcStruct->setDmaOutFunc)(&CPIS_obj.ipRunTab[0], base, params)==-1){
     CPIS_obj.unlock(CPIS_obj.unlockArg);
     return -1;
     }
    }
  else {
   if (_CPIS_setDmaOutTransfers(&CPIS_obj.ipRunTab[0], base, funcStruct->arg)==-1){
     CPIS_obj.unlock(CPIS_obj.unlockArg);
     return -1;
    }
  }

  /* if not single block, check whether the DMA settings meet constraints such as alignment */
  if (CPIS_obj.ipRunTab[0].singleBlockProcessing!= 1) {
      if(_CPIS_validateDMAconstraints(&CPIS_obj.ipRunTab[0])== -1){
          CPIS_obj.unlock(CPIS_obj.unlockArg);
          return -1;
      }
  }

  if (_CPIS_setLast(&CPIS_obj.ipRunTab[0], execType)== -1){
     CPIS_obj.unlock(CPIS_obj.unlockArg);
     return -1;
    }
    
  if (execType== CPIS_SYNC) {
#if !defined(_4MACS) || defined(_SIMCOP)
    CPIS_start(NULL);
    CPIS_wait(NULL);
#endif
    CPIS_delete(&CPIS_obj.ipRunTab[0]);
    *handle= NULL;
    }
  else {
    *handle= &CPIS_obj.ipRunTab[0];
  }
 
  return 0;

}

void _CPIS_dummyVicpLock(void*dummy){
  return;
}

void _CPIS_dummyVicpUnlock(void*dummy){
  return;
}


Int32 _CPIS_setDmaInTransfers(CPIS_IpRun *ipRun, CPIS_BaseParms *base, CPIS_Arg *arg){

  /* This Dma structures are used to pass information about dma transfers to IP_RUN library */
  Int32 dmaIdx;
  IP_run *ipRunObj;
  Uint16 trueSrcFormat, alphaOn;
  Int32 oldScaleFactor; 
  Uint16 roundBlockWidth; 
  Uint32 ddrOfstNextBlock, ddrOfstNextBlockRow;
  Uint16 numHorzBlocks, numVertBlocks;
  CPIS_Arg defArg;

  Int32 scaleFactorSrc= 1;
  Uint32 shiftFactorUVhorzSrc=0;
  Uint32 shiftFactorUVvertSrc=0;

  ipRunObj= &ipRun->ipRunObj;
  
  if (arg== NULL) {
     arg= &defArg;
     arg->multiCoreMode= ipRun->multiCoreMode;
     arg->alignImgBufWidth= 1;
  }
     
  numHorzBlocks= base->roiSize.width / base->procBlockSize.width;
  numVertBlocks= base->roiSize.height / base->procBlockSize.height;
  
  ipRun->multiCoreMode= arg->multiCoreMode;
  
  if ((numHorzBlocks * numVertBlocks)<= 2){
     
     // This will disable the scheduling in IP_run
     ipRun->singleBlockProcessing= 1;
     
     if ((numHorzBlocks * numVertBlocks)== 1) {
        ipRun->multiCoreMode= CPIS_MCORE_MODE_SINGLE;
        ipRunObj->numHorzBlocks= 1;
        ipRunObj->numVertBlocks= 1;
        }
     else {
        ipRun->multiCoreMode= CPIS_MCORE_MODE_DUAL_CLONE;
        
        if (numVertBlocks== 2)
           ipRun->dmaBlockTferType= CPIS_DMATFER_COL_WISE;
        else
           ipRun->dmaBlockTferType= CPIS_DMATFER_ROW_WISE;
           
        ipRunObj->numHorzBlocks= 2;
        ipRunObj->numVertBlocks= 1; 
        }
  }
  else {
     if (ipRun->dmaBlockTferType== CPIS_DMATFER_AUTO) {
        if ((ipRun->dmaBlockTferType=_CPIS_getBestDmaTferType(ipRun, numHorzBlocks, numVertBlocks))== -1)
        return -1;
     }
  
     if (ipRun->dmaBlockTferType== CPIS_DMATFER_ROW_WISE) {
        ipRunObj->numHorzBlocks= numHorzBlocks;
        ipRunObj->numVertBlocks= numVertBlocks;
     } else {
        ipRunObj->numVertBlocks= numHorzBlocks;
        ipRunObj->numHorzBlocks= numVertBlocks;
     }
  }
  
  trueSrcFormat= base->srcFormat[0] & ~CPIS_ALPHA;
  alphaOn= base->srcFormat[0] & CPIS_ALPHA;

  /* Calculate scaleFactor used to rescale the input widths */
  if ((trueSrcFormat) <= CPIS_BAYER_P){ /* if format is planar */
   
   if (alphaOn)
    ipRunObj->numDmaIn= 4;
   else 
    ipRunObj->numDmaIn= 3;

    switch (trueSrcFormat){
      case CPIS_YUV_420P:
        shiftFactorUVhorzSrc= shiftFactorUVvertSrc= 1;
      break;
      case CPIS_YUV_422P:
        shiftFactorUVhorzSrc= 1;
      break;
      case CPIS_YUV_411P:
        shiftFactorUVhorzSrc= 2;
      break;
    } /* switch */
    
  }/* if */
  else {
     
    if (trueSrcFormat== CPIS_YUV_420SP) {
     
     shiftFactorUVvertSrc= 1;
     if (alphaOn)
         ipRunObj->numDmaIn= 3;
     else
         ipRunObj->numDmaIn= 2;
         
    } else {
       if (alphaOn)
         ipRunObj->numDmaIn= 2;
       else
         ipRunObj->numDmaIn= 1;
       }

    scaleFactorSrc= _CPIS_sizeof(base->srcFormat[0]);
    
  }/* else */

  ipRunObj->numDmaIn= ipRunObj->numDmaIn*base->numInput;

  if (ipRunObj->numDmaIn > CPIS_MAX_SRC_CHANNELS) {
     CPIS_errno= CPIS_NOT_ENOUGH_EDMACHAN_ERROR;
     return -1;
  }
  
  /*ipRunObj->numDmaOut= 1; */
  /*ipRunObj->numDmaIn= 1; */
  /*
   *
   * Initialize dma parameters
   *
   */
  ipRunObj->dmaIn= &CPIS_obj.dmaIn[0];
  
  /* IP_RUN will do dynamic channel allocation through dmac module. */
  ipRunObj->dmaIn[0].dmaChNo = DMAC_CHAN_ANY;\
  /* We are going to initialize the dma transfer parameters normally */
  /* in which each block is separated from the previous one by a fixed offset. */
  /* so no need of transfer param table. */
  ipRunObj->dmaIn[0].useTferParamTable= 0;
  
  if (arg->alignImgBufWidth)
     roundBlockWidth= _CPIS_alignWithSIMD(base->procBlockSize.width);
  else
     roundBlockWidth= base->procBlockSize.width;
  
  ipRunObj->dmaIn[0].ddrAddr= (Uint32)base->srcBuf[0].ptr;
  ipRunObj->dmaIn[0].imgBufAddr= (Int32)(ipRun->imgbufInOfst + IMGBUF_A_BASE);
  
  if (ipRun->dmaBlockTferType== CPIS_DMATFER_ROW_WISE) {
     ddrOfstNextBlock= base->procBlockSize.width;
     ddrOfstNextBlockRow= base->srcBuf[0].stride * base->procBlockSize.height;
  }
  else {
     ddrOfstNextBlock= base->procBlockSize.height*base->srcBuf[0].stride;
     ddrOfstNextBlockRow= base->procBlockSize.width;
  }
  
  if (scaleFactorSrc > 0) { /* For any positif scaleFactorSrc, the scaleFactorSrc is a multiplier */
     ipRunObj->dmaIn[0].ddrWidth = scaleFactorSrc*base->srcBuf[0].stride;
     ipRunObj->dmaIn[0].imgBufWidth = scaleFactorSrc*roundBlockWidth;
     ipRunObj->dmaIn[0].blockWidth  = scaleFactorSrc*base->procBlockSize.width;
     ipRunObj->dmaIn[0].ddrOfstNextBlock= scaleFactorSrc*ddrOfstNextBlock;
     ipRunObj->dmaIn[0].ddrOfstNextBlockRow= scaleFactorSrc*ddrOfstNextBlockRow;
  }
  else { /* otherwise it is a divisor in order to handle type such as CPIS_1BIT */
     scaleFactorSrc= -scaleFactorSrc;
     ipRunObj->dmaIn[0].ddrWidth = base->srcBuf[0].stride/scaleFactorSrc;
     ipRunObj->dmaIn[0].imgBufWidth = roundBlockWidth/scaleFactorSrc;
     ipRunObj->dmaIn[0].blockWidth  = base->procBlockSize.width/scaleFactorSrc;
     ipRunObj->dmaIn[0].ddrOfstNextBlock= ddrOfstNextBlock/scaleFactorSrc;
     ipRunObj->dmaIn[0].ddrOfstNextBlockRow= ddrOfstNextBlockRow/scaleFactorSrc;
  }
    
  ipRunObj->dmaIn[0].blockHeight = base->procBlockSize.height;
  
  ipRun->imgbufLen= ipRunObj->dmaIn[0].imgBufWidth*ipRunObj->dmaIn[0].blockHeight; 
  oldScaleFactor= scaleFactorSrc;

  
  for (dmaIdx=1; dmaIdx<ipRunObj->numDmaIn; dmaIdx++) {
    
    if (alphaOn && (trueSrcFormat > CPIS_BAYER_P)) {
      if (trueSrcFormat!= CPIS_YUV_420SP && dmaIdx==1)
        scaleFactorSrc= 1;
      else
        if (trueSrcFormat== CPIS_YUV_420SP && dmaIdx==2) {
           scaleFactorSrc= 1;
           shiftFactorUVhorzSrc= 0;
           shiftFactorUVvertSrc= 0;
        }
        else
           scaleFactorSrc= 1;
    } 
    else if (trueSrcFormat == CPIS_YUV_420SP) /* if 420 semi-planar */
      scaleFactorSrc= 1;
    else if (trueSrcFormat >= CPIS_YUV_422IBE) /* if not planar */
      scaleFactorSrc= _CPIS_sizeof(base->srcFormat[dmaIdx]);
    else /* if planar */
      scaleFactorSrc= oldScaleFactor; 

    ipRunObj->dmaIn[dmaIdx].ddrAddr= (Uint32)base->srcBuf[dmaIdx].ptr;
    ipRunObj->dmaIn[dmaIdx].imgBufAddr= ipRunObj->dmaIn[0].imgBufAddr \
                                  + ipRun->imgbufLen;
    
    ipRunObj->dmaIn[dmaIdx].blockHeight = base->procBlockSize.height >> shiftFactorUVvertSrc;
                                  
    if (scaleFactorSrc > 0) { /* For any positif scaleFactorSrc, the scaleFactorSrc is a multiplier */
     ipRunObj->dmaIn[dmaIdx].ddrWidth = scaleFactorSrc*base->srcBuf[dmaIdx].stride;
     ipRunObj->dmaIn[dmaIdx].imgBufWidth = (scaleFactorSrc*roundBlockWidth) >> shiftFactorUVhorzSrc;
     ipRunObj->dmaIn[dmaIdx].blockWidth = (scaleFactorSrc*base->procBlockSize.width) >> shiftFactorUVhorzSrc;
     if (ipRun->dmaBlockTferType== CPIS_DMATFER_ROW_WISE) {
        ipRunObj->dmaIn[dmaIdx].ddrOfstNextBlock= (scaleFactorSrc*base->procBlockSize.width) >> shiftFactorUVhorzSrc;
        ipRunObj->dmaIn[dmaIdx].ddrOfstNextBlockRow= ipRunObj->dmaIn[dmaIdx].ddrWidth * ipRunObj->dmaIn[dmaIdx].blockHeight;
     }
     else {
        ipRunObj->dmaIn[dmaIdx].ddrOfstNextBlock= ipRunObj->dmaIn[dmaIdx].ddrWidth *ipRunObj->dmaIn[dmaIdx].blockHeight;
        ipRunObj->dmaIn[dmaIdx].ddrOfstNextBlockRow= (scaleFactorSrc*base->procBlockSize.width) >> shiftFactorUVhorzSrc;
     }    
    }
    else { /* otherwise it is a divisor in order to handle type such as CPIS_1BIT */
     scaleFactorSrc= -scaleFactorSrc;
     ipRunObj->dmaIn[dmaIdx].ddrWidth = base->srcBuf[dmaIdx].stride/scaleFactorSrc;
     ipRunObj->dmaIn[dmaIdx].imgBufWidth = (roundBlockWidth/scaleFactorSrc) >> shiftFactorUVhorzSrc;
     ipRunObj->dmaIn[dmaIdx].blockWidth  = (base->procBlockSize.width/scaleFactorSrc) >> shiftFactorUVhorzSrc;
     if (ipRun->dmaBlockTferType== CPIS_DMATFER_ROW_WISE) {
        ipRunObj->dmaIn[dmaIdx].ddrOfstNextBlock= (base->procBlockSize.width/scaleFactorSrc) >> shiftFactorUVhorzSrc;
        ipRunObj->dmaIn[dmaIdx].ddrOfstNextBlockRow= ipRunObj->dmaIn[dmaIdx].ddrWidth * ipRunObj->dmaIn[dmaIdx].blockHeight; 
        }
     else {
        ipRunObj->dmaIn[dmaIdx].ddrOfstNextBlock= ipRunObj->dmaIn[dmaIdx].ddrWidth * ipRunObj->dmaIn[dmaIdx].blockHeight;
        ipRunObj->dmaIn[dmaIdx].ddrOfstNextBlockRow= (base->procBlockSize.width/scaleFactorSrc) >> shiftFactorUVhorzSrc; 
     }
    }
    
    ipRun->imgbufLen+= ipRunObj->dmaIn[dmaIdx].imgBufWidth*ipRunObj->dmaIn[dmaIdx].blockHeight; 
#ifdef _BENCHMARK_VICP
  ipRunObj->dmaIn[dmaIdx].blockWidth  = 16;
  ipRunObj->dmaIn[dmaIdx].blockHeight  = 1;
#endif
    }
#ifdef _BENCHMARK_VICP
  ipRunObj->dmaIn[0].blockWidth  = 16;
  ipRunObj->dmaIn[0].blockHeight  = 1;
#endif
  return 0;
}



Int32 _CPIS_setDmaOutTransfers(CPIS_IpRun *ipRun, CPIS_BaseParms *base, CPIS_Arg *arg){

  /* This Dma structures are used to pass information about dma transfers to IP_RUN library */
  Int32 dmaIdx;
  IP_run *ipRunObj;
  Uint16 trueDstFormat, alphaOn;
  Int32 oldScaleFactor;
  Uint16 roundBlockWidth;
  Uint32 ddrOfstNextBlock, ddrOfstNextBlockRow;
  CPIS_Arg defArg;
  
  Int32 scaleFactorDst= 1;
  Uint32 shiftFactorUVhorzDst=0;
  Uint32 shiftFactorUVvertDst=0;

  ipRunObj= &ipRun->ipRunObj;
  
  if (arg== NULL) {
     arg= &defArg;
     arg->multiCoreMode= ipRun->multiCoreMode;
     arg->alignImgBufWidth= 1;
  }
  
  trueDstFormat= base->dstFormat[0] & ~CPIS_ALPHA;
  alphaOn= base->dstFormat[0] & CPIS_ALPHA;

  /* Calculate scaleFactor used to rescale the output widths */
  if (trueDstFormat<= CPIS_BAYER_P){ /* if format is planar */
   
   if (alphaOn)
    ipRunObj->numDmaOut= 4;
   else 
    ipRunObj->numDmaOut= 3;
   
   switch (trueDstFormat){
      case CPIS_YUV_420P:
        shiftFactorUVhorzDst= shiftFactorUVvertDst= 1;
        break;
      case CPIS_YUV_422P:
        shiftFactorUVhorzDst= 1;
        break;
      case CPIS_YUV_411P:
        shiftFactorUVhorzDst= 2;
        break;
    } /* switch */
    
  }/* if */
  else {
    if (trueDstFormat== CPIS_YUV_420SP) {
     shiftFactorUVvertDst= 1;
     if (alphaOn)
         ipRunObj->numDmaOut= 3;
     else
         ipRunObj->numDmaOut= 2;
         
    } else {
       if (alphaOn)
         ipRunObj->numDmaOut= 2;
       else
         ipRunObj->numDmaOut= 1;
       }
       
    scaleFactorDst= _CPIS_sizeof(base->dstFormat[0]);
    
    }/* else */


  /*
   *
   * Initialize dma parameters
   *
   */
   
  if (ipRun->dmaBlockTferType== CPIS_DMATFER_AUTO)
     ipRun->dmaBlockTferType= CPIS_DMATFER_ROW_WISE;
  
  ipRunObj->numDmaOut= ipRunObj->numDmaOut*base->numOutput;
  
  if (ipRunObj->numDmaOut > CPIS_MAX_DST_CHANNELS || (ipRunObj->numDmaIn + ipRunObj->numDmaOut > CPIS_obj.maxNumDma) ) {
      printf("Embedded Vision Framework error ! Number of DMA channels required by this function is %d and exceeds maximum allowed of %d\n", ipRunObj->numDmaIn + ipRunObj->numDmaOut, CPIS_obj.maxNumDma);
      CPIS_errno= CPIS_NOT_ENOUGH_EDMACHAN_ERROR;
     return -1;
  }
  
  ipRunObj->dmaOut= &CPIS_obj.dmaOut[0];

  /* IP_RUN will do dynamic channel allocation through dmac module. */
  ipRunObj->dmaOut[0].dmaChNo = DMAC_CHAN_ANY;
  /* We are going to initialize the dma transfer parameters normally */
  /* in which each block is separated from the previous one by a fixed offset. */
  /* so no need of transfer param table. */
  ipRunObj->dmaOut[0].useTferParamTable= 0;

  if (arg->alignImgBufWidth)
     roundBlockWidth= _CPIS_alignWithSIMD(base->procBlockSize.width);
  else
     roundBlockWidth= base->procBlockSize.width;
  
  if (ipRun->dmaBlockTferType== CPIS_DMATFER_ROW_WISE) {
     ddrOfstNextBlock= base->procBlockSize.width;
     ddrOfstNextBlockRow= base->dstBuf[0].stride * base->procBlockSize.height;
  }
  else {
     ddrOfstNextBlock= base->procBlockSize.height*base->dstBuf[0].stride;
     ddrOfstNextBlockRow= base->procBlockSize.width;
  }
  
  ipRunObj->dmaOut[0].ddrAddr= (Uint32)base->dstBuf[0].ptr;
  ipRunObj->dmaOut[0].imgBufAddr= (Int32)(ipRun->imgbufOutOfst[0] + IMGBUF_A_BASE);
  
  if (scaleFactorDst > 0) { /* For any positif scaleFactorDst, the scaleFactorDst is a multiplier */
     ipRunObj->dmaOut[0].ddrWidth = scaleFactorDst*base->dstBuf[0].stride;
     ipRunObj->dmaOut[0].imgBufWidth = scaleFactorDst*roundBlockWidth;
     ipRunObj->dmaOut[0].blockWidth  = scaleFactorDst*base->procBlockSize.width;
     ipRunObj->dmaOut[0].ddrOfstNextBlock= scaleFactorDst*ddrOfstNextBlock;
     ipRunObj->dmaOut[0].ddrOfstNextBlockRow= scaleFactorDst*ddrOfstNextBlockRow;
  }
  else { /* otherwise it is a divisor in order to handle type such as CPIS_1BIT */
     scaleFactorDst= -scaleFactorDst;
     ipRunObj->dmaOut[0].ddrWidth = base->dstBuf[0].stride/scaleFactorDst;
     ipRunObj->dmaOut[0].imgBufWidth = roundBlockWidth/scaleFactorDst;
     ipRunObj->dmaOut[0].blockWidth  = base->procBlockSize.width/scaleFactorDst;
     ipRunObj->dmaOut[0].ddrOfstNextBlock= base->procBlockSize.width/scaleFactorDst;
     ipRunObj->dmaOut[0].ddrOfstNextBlockRow= ddrOfstNextBlockRow/scaleFactorDst;
  }
  
  ipRunObj->dmaOut[0].blockHeight = base->procBlockSize.height;
  
  oldScaleFactor= scaleFactorDst;
  for (dmaIdx=1; dmaIdx<ipRunObj->numDmaOut; dmaIdx++) {
  
     if (alphaOn && (trueDstFormat > CPIS_BAYER_P)) {
         if (trueDstFormat!= CPIS_YUV_420SP && dmaIdx==1)
           scaleFactorDst= 1;
         else
           if (trueDstFormat== CPIS_YUV_420SP && dmaIdx==2) {
              scaleFactorDst= 1;
              shiftFactorUVhorzDst= 0;
              shiftFactorUVvertDst= 0;
           }
           else
              scaleFactorDst= 1;
    } 
    else if (trueDstFormat == CPIS_YUV_420SP) /* if 420 semi-planar */
      scaleFactorDst= 1;
    else if (trueDstFormat >= CPIS_YUV_422IBE) /* if not planar */
      scaleFactorDst= _CPIS_sizeof(base->dstFormat[dmaIdx]);
    else
      scaleFactorDst= oldScaleFactor;
  
    ipRunObj->dmaOut[dmaIdx].ddrAddr= (Uint32)base->dstBuf[dmaIdx].ptr;
    ipRunObj->dmaOut[dmaIdx].imgBufAddr= (Int32)(ipRun->imgbufOutOfst[dmaIdx] + IMGBUF_A_BASE);
    
    ipRunObj->dmaOut[dmaIdx].blockHeight = base->procBlockSize.height >> shiftFactorUVvertDst;
    
    if (scaleFactorDst > 0) { /* For any positif scaleFactorDst, the scaleFactorDst is a multiplier */
     ipRunObj->dmaOut[dmaIdx].ddrWidth = scaleFactorDst*base->dstBuf[dmaIdx].stride;
     ipRunObj->dmaOut[dmaIdx].imgBufWidth = (scaleFactorDst*roundBlockWidth) >> shiftFactorUVhorzDst;
     ipRunObj->dmaOut[dmaIdx].blockWidth  = (scaleFactorDst*base->procBlockSize.width) >> shiftFactorUVhorzDst;
     if (ipRun->dmaBlockTferType== CPIS_DMATFER_ROW_WISE) {
        ipRunObj->dmaOut[dmaIdx].ddrOfstNextBlock= (scaleFactorDst*base->procBlockSize.width) >> shiftFactorUVhorzDst;
        ipRunObj->dmaOut[dmaIdx].ddrOfstNextBlockRow= ipRunObj->dmaOut[dmaIdx].ddrWidth * ipRunObj->dmaOut[dmaIdx].blockHeight;
     }
     else {
        ipRunObj->dmaOut[dmaIdx].ddrOfstNextBlock= ipRunObj->dmaOut[dmaIdx].ddrWidth * ipRunObj->dmaOut[dmaIdx].blockHeight;
        ipRunObj->dmaOut[dmaIdx].ddrOfstNextBlockRow= (scaleFactorDst*base->procBlockSize.width) >> shiftFactorUVhorzDst;
     }  
    }
    else { /* otherwise it is a divisor in order to handle type such as CPIS_1BIT */
     scaleFactorDst= -scaleFactorDst;
     ipRunObj->dmaOut[dmaIdx].ddrWidth = base->dstBuf[dmaIdx].stride/scaleFactorDst;
     ipRunObj->dmaOut[dmaIdx].imgBufWidth = (roundBlockWidth/scaleFactorDst) >> shiftFactorUVhorzDst;
     ipRunObj->dmaOut[dmaIdx].blockWidth  = (base->procBlockSize.width/scaleFactorDst) >> shiftFactorUVhorzDst;
     if (ipRun->dmaBlockTferType== CPIS_DMATFER_ROW_WISE) {
        ipRunObj->dmaOut[dmaIdx].ddrOfstNextBlock= (base->procBlockSize.width/scaleFactorDst) >> shiftFactorUVhorzDst;
        ipRunObj->dmaOut[dmaIdx].ddrOfstNextBlockRow= ipRunObj->dmaOut[dmaIdx].ddrWidth * ipRunObj->dmaOut[dmaIdx].blockHeight; 
        }
     else {
        ipRunObj->dmaOut[dmaIdx].ddrOfstNextBlock= ipRunObj->dmaOut[dmaIdx].ddrWidth * ipRunObj->dmaOut[dmaIdx].blockHeight;
        ipRunObj->dmaOut[dmaIdx].ddrOfstNextBlockRow= (base->procBlockSize.width/scaleFactorDst) >> shiftFactorUVhorzDst; 
     }
    }
#ifdef _BENCHMARK_VICP
  ipRunObj->dmaOut[dmaIdx].blockWidth  = 16;
  ipRunObj->dmaOut[dmaIdx].blockHeight  = 1;
#endif
  }

#ifdef _BENCHMARK_VICP
  ipRunObj->dmaOut[0].blockWidth  = 16;
  ipRunObj->dmaOut[0].blockHeight  = 1;
#endif
  return 0;
}


Int32 _CPIS_setFirst(CPIS_IpRun *ipRun){
  
  if (CPIS_obj.numProcFunc== CPIS_obj.maxNumProcFunc){
    CPIS_errno= CPIS_MAXNUMFUNCREACHED;
    return -1;
  }

  _CPIS_setDemoMode(0);
  
#if 0 /* enable this for later future vsersion */
  ipRun->imgbufOfst= CPIS_obj.imgbufOfst;
  ipRun->coefOfst= CPIS_obj.coefOfst;
  ipRun->coef1Ofst= CPIS_obj.coef1Ofst;
  ipRun->cmdOfst= CPIS_obj.cmdOfst;
  ipRun->cmd1Ofst= CPIS_obj.cmd1Ofst;
#else
  ipRun->imgbufInOfst= 0;
  ipRun->imgbufOutOfst[0]= 0;
  ipRun->coefOfst= 0;
  ipRun->cmdOfst= 0;
  ipRun->coef1Ofst= 0;
  ipRun->cmd1Ofst= 0;
#endif
#ifdef _DUAL_CORE
  ipRun->multiCoreMode= CPIS_MCORE_MODE_MAXNUM_CLONE;//CPIS_MCORE_MODE_SINGLE;//CPIS_MCORE_MODE_MAXNUM_CLONE;
#else
  ipRun->multiCoreMode= CPIS_MCORE_MODE_SINGLE;
#endif
  ipRun->dmaBlockTferType= CPIS_DMATFER_AUTO;
  
  ipRun->context.action= CPIS_CONTEXT_NONE;
  ipRun->context.cmd0Ptr= NULL;
  ipRun->context.cmd1Ptr= NULL;
  ipRun->context.coef0Ptr= NULL;
  ipRun->context.coef1Ptr= NULL;
  ipRun->singleBlockProcessing= 0;
  
  ipRun->resetFlag= 0;
  ipRun->ipRunObj.cmdptr_ofst= ipRun->cmdOfst>>1;
  ipRun->ipRunObj.extension= NULL; /* NULL for DM350 */
  ipRun->ipRunObj.customExtension= NULL;
  
  _CPIS_switchBuffersToCPU();

  return 0;

}

Int32 _CPIS_setLast(CPIS_IpRun *ipRun, CPIS_ExecType execType){
  
#if defined(_4MACS)
  IP_RUN_extension extension;
#endif
  
  _CPIS_switchBuffersToCPU();
  
#ifdef _SIMCOP
  if (ipRun->imgbufLen > 2*IMGBUF_SIZE_BYTES || ipRun->coefLen > COEFFBUF_SIZE_BYTES || ipRun->cmdLen > CMDBUF_SIZE_BYTES) {
#else
  if (ipRun->imgbufLen > IMGBUF_SIZE_BYTES || ipRun->coefLen > COEFFBUF_SIZE_BYTES || ipRun->cmdLen > CMDBUF_SIZE_BYTES) {
#endif
      CPIS_errno= CPIS_OUTOFMEM;
      printf("Out of mem, img buf= %d, coef = %d, cmd= %d\n", ipRun->imgbufLen, ipRun->coefLen, ipRun->cmdLen);
      return -1;
      };

  CPIS_obj.imgbufOfst=  ipRun->imgbufInOfst + ipRun->imgbufLen;
  CPIS_obj.coefOfst= ipRun->coefOfst + ipRun->coefLen;
  CPIS_obj.cmdOfst= ipRun->cmdOfst + ipRun->cmdLen;
  CPIS_obj.coef1Ofst= ipRun->coef1Ofst + ipRun->coef1Len;
  CPIS_obj.cmd1Ofst= ipRun->cmd1Ofst + ipRun->cmd1Len;

  ipRun->ipRunObj.cmdptr_ofst= ipRun->cmdOfst>>1;
  ipRun->ipRunObj.compCode= 0;
  
#ifdef _BENCHMARK_EDMA
  *((Uint16*)CMDBUF_BASE + ipRun->ipRunObj.cmdptr_ofst)= 0x8000;
  *((Uint16*)CMD1BUF_BASE + (ipRun->cmd1Len>>1))= 0x8000;
#endif

  if (CPIS_obj.waitCB)
    ipRun->ipRunObj.compIntEna= 1;
#if defined(_4MACS)
  extension.magicWord= 0x12344321;
  
  extension.async= execType;
  
  if (ipRun->singleBlockProcessing== 1)
     extension.async|= 0x8000;
        
  ipRun->ipRunObj.extension= &extension;
  
  if ((ipRun->multiCoreMode & (~0x8000))== CPIS_MCORE_MODE_MAXNUM_CLONE){
     
    if (ipRun->context.action & CPIS_CONTEXT_RESTORE_CMD0) {
        ipRun->context.action |= CPIS_CONTEXT_RESTORE_CMD1;
        ipRun->context.cmd1Ptr= ipRun->context.cmd0Ptr;
        ipRun->cmd1Len= ipRun->cmdLen; 
    }
    
    if (ipRun->context.action & CPIS_CONTEXT_RESTORE_COEF0) {
        ipRun->context.action |= CPIS_CONTEXT_RESTORE_COEF1;
        ipRun->context.coef1Ptr= ipRun->context.coef0Ptr;
        ipRun->coef1Len= ipRun->coefLen;
    }
    
    if (_CPIS_multiCoreCloneDmaSetup(ipRun, 2)== -1) {
        _CPIS_printf("Reversing to single core processing.");
        ipRun->multiCoreMode= CPIS_MCORE_MODE_SINGLE;
        ipRun->context.action &= ~(CPIS_CONTEXT_SAVE_CMD1 | CPIS_CONTEXT_RESTORE_CMD1);
        CPIS_obj.cmd1Ofst= ipRun->cmd1Ofst + ipRun->cmd1Len;
     }
     else {
        _CPIS_multiCoreCloneMemSetup(ipRun, 2);
        extension.cmd1ptr_ofst= ipRun->cmdOfst>>1;
        ipRun->cmd1Ofst= ipRun->cmdOfst;
        ipRun->coef1Ofst= ipRun->coefOfst;
        }
  }
  else if ((ipRun->multiCoreMode & (~0x8000))== CPIS_MCORE_MODE_MAXNUM_SPLIT){
        if (ipRun->ipRunObj.numHorzBlocks <=3) {
           _CPIS_printf("Fatal error: Dual-core split mode cannot be enabled because number of horizontal block <=3");
           ipRun->multiCoreMode= CPIS_MCORE_MODE_SINGLE;
        }
        extension.cmd1ptr_ofst= ipRun->cmd1Ofst>>1;
        CACHE_writeBack(COEFF1BUF_BASE + ipRun->coef1Ofst, ipRun->coef1Len, 0);
        CACHE_writeBack(CMD1BUF_BASE + ipRun->cmd1Ofst, ipRun->cmd1Len, 1);
  }
  
  extension.iMXid= ipRun->multiCoreMode;
  
  extension.context= ipRun->context;
  extension.context.cmd0Len= ipRun->cmdLen;
  extension.context.cmd1Len= ipRun->cmd1Len;
  extension.context.coef0Len= ipRun->coefLen;
  extension.context.coef0Ofst= ipRun->coefOfst;
  extension.context.coef1Len= ipRun->coef1Len;
  extension.context.coef1Ofst= ipRun->coef1Ofst;
  
#else
  ipRun->ipRunObj.extension= NULL;
#endif
  ipRun->ipRunObj.customExtension= NULL;
  
  CACHE_writeBack(COEFFBUF_BASE + ipRun->coefOfst, ipRun->coefLen, 0);
  CACHE_writeBack(CMDBUF_BASE + ipRun->cmdOfst, ipRun->cmdLen, 1);

  if (IP_RUN_registerAlgo(&ipRun->ipRunObj, 0)== -1) {
     CPIS_errno= CPIS_ALGO_REGISTRATION_ERROR;
     return -1;
  }
  else
     CPIS_obj.numProcFunc++;
  
  /* In demo mode, we want to leave the other half unprocessed so we put a sleep */
  if (_CPIS_demoMode== 1) {
     IMGBUF_switch(SELCMD1BUF, CMD1BUFCPU);
     *((Uint16*)CMD1BUF_BASE)= 0x8000;
     IMGBUF_switch(SELCMD1BUF, CMD1BUFVICP);
  }
     
  return 0;

}


Int32 _CPIS_delete(CPIS_Handle handle){
  CPIS_IpRun *ipRunObj= (CPIS_IpRun *)handle;
  
  CPIS_obj.imgbufOfst-=  ipRunObj->imgbufLen;
  CPIS_obj.coefOfst-= ipRunObj->coefLen;
  CPIS_obj.cmdOfst-= ipRunObj->cmdLen;
  CPIS_obj.coef1Ofst-= ipRunObj->coef1Len;
  CPIS_obj.cmd1Ofst-= ipRunObj->cmd1Len;
  CPIS_obj.numProcFunc--;
  CPIS_obj.unlock(CPIS_obj.unlockArg);
   
  return 0;
}

/* Return number of bytes of the CPIS_Format */
Int16 _CPIS_sizeof(CPIS_Format format){

    Uint16 trueFormat, alphaOn;

    trueFormat= format & ~CPIS_ALPHA;
    alphaOn= format & CPIS_ALPHA;

    if (alphaOn && trueFormat== CPIS_RGB_888)
        return 4;
    else
        return (_CPIS2VICP_sizeOfFormat[trueFormat]);

}

/* Return the corresponding VICP macro symbol for the CPIS_Format */
Uint16 _CPIS_translateInputFormat(CPIS_Format format){

 return(_CPIS2VICP_inputFormat[format - CPIS_8BIT]);
 
}

/* Return the corresponding VICP macro symbol for the CPIS_Format */
Uint16 _CPIS_translateOutputFormat(CPIS_Format format){

 return(_CPIS2VICP_outputFormat[format - CPIS_8BIT]);
 
}

/* This function validate the processing block dimensions
against some predefined criteria and returns 0 if valid
or -1 if not valid
*/
Int32 _CPIS_isBlockDimValid(CPIS_BaseParms *base, Uint16 divisor, Uint16 maxNumPixels) {

  /* At this point none of the block dimensions should be 0. If yes then something
  has gone wrong ... */
  if (base->procBlockSize.width== 0 || base->procBlockSize.height== 0) {
     return -1;
  }
      
  /* procBlockSize.width must be multiple of divisor 
     it must divide the frame width
  */
  if (base->procBlockSize.width & (divisor-1) || (base->roiSize.width % (base->procBlockSize.width) )) {
     return -1;
  }  
  
  /* procBlockSize.height must divide the frame height */
  if (base->roiSize.height % base->procBlockSize.height) {
     return -1;
  }

  if (base->procBlockSize.width * base->procBlockSize.height > maxNumPixels){
     return -1;
  }
  
return 0;
}

Int32 _CPIS_getFilterCascadeBlockDim(Uint16 numFilters, Uint16 outputBlockWidth, Uint16 outputBlockHeight, Uint16 *filterWidth, Uint16 *filterHeight,  Uint16 *inBlockWidth, Uint16 *inBlockHeight) {

    Int16 i;

    inBlockWidth[numFilters-1]= _CPIS_alignWithSIMD(outputBlockWidth + filterWidth[numFilters-1]-1);
    inBlockHeight[numFilters-1]= outputBlockHeight + filterHeight[numFilters-1]-1;

    for(i=numFilters-2;i>=0;i--){
        inBlockWidth[i]= _CPIS_alignWithSIMD(inBlockWidth[i+1] + filterWidth[i]-1);
        inBlockHeight[i]= inBlockHeight[i+1] + filterHeight[i]-1;
    }

    return 0;
}

Int32 _CPIS_getFilterCascadeBlockDim2(Uint16 numFilters, Uint16 outputBlockWidth, Uint16 outputBlockHeight, Uint16 *filterWidth, Uint16 *filterHeight,  Uint16 *inBlockWidth, Uint16 *inBlockHeight, Uint8 *dsW, Uint8 *dsH) {

    Int16 i;

    inBlockWidth[numFilters-1]= (outputBlockWidth<<dsW[numFilters-1]) + filterWidth[numFilters-1]-1;
    inBlockHeight[numFilters-1]= (outputBlockHeight<<dsH[numFilters-1]) + filterHeight[numFilters-1]-1;

    for(i=numFilters-2;i>=0;i--){
        inBlockWidth[i]= (inBlockWidth[i+1]<<dsW[i]) + filterWidth[i]-1;
        inBlockHeight[i]= (inBlockHeight[i+1]<<dsH[i]) + filterHeight[i]-1;
    }

    return 0;
}
/* ======================================================================== */
/*                       End of file                                        */
/* ------------------------------------------------------------------------ */
/*            Copyright (c) 2008 Texas Instruments, Incorporated.           */
/*                           All Rights Reserved.                           */
/* ======================================================================== */