
#include "stdio.h"
#include "stdlib.h"
#include <limits>

#include "itidl_ti.h"
#include "tidl_dataflow.h"

#include "tidl_custom.h"
#include "tidl_custom_equal.h"
#include "tidsp/tidl_custom_maxpool_ixX_oxX.h"
#include "../algo/inc/tidl_commonUtils.h"

#define EQUAL_OP_EPSILON                    0.000001f

// template <typename T>
// void TIDL_customOpsProcessCore(T (*inPtr)[], T (*outPtr)[], sTIDL_Layer_t *tidlLayer)
// {
//     constexpr float epsilonSquared    = EQUAL_OP_EPSILON * EQUAL_OP_EPSILON;
//   T* __restrict__ in1DataPtr  = static_cast<T*>(inPtr[0]);
//   T* __restrict__ in2DataPtr  = static_cast<T*>(inPtr[1]);
//   T* __restrict__ outDataPtr        = static_cast<T*>(outPtr[0]);
//   const float in1TensorScaleInv     = 1.0f / tidlLayer->inDataPtr[0].tensorScale;
//   const float in2TensorScaleInv     = 1.0f / tidlLayer->inDataPtr[1].tensorScale;
//   const float outTensorScale        = tidlLayer->outData.tensorScale;
//   int32_t outDataCount=1;
//   for (int i=0; i<TIDL_DIM_MAX; i++)
//   {
//     outDataCount*=tidlLayer->outData.dimValues[i];
//   }

//   for (int32_t i = 0; i < outDataCount; i++)
//   {
//     /* 
//      * Handle floating-point precision error and the loss from converting float to fixed and back to float by
//      * Instead of checking if two floating-point numbers are exactly equal, check if they are close enough using an epsilon,
//      * a small value representing the tolerance for error.
//      * TODO: Implement in fixed point instead.
//     */
//     float diff    = (in1DataPtr[i] * in1TensorScaleInv) - (in2DataPtr[i] * in2TensorScaleInv);
//     outDataPtr[i] = ((diff * diff) < epsilonSquared) * outTensorScale;
//   }
// }

template<class Tin> void TIDL_refCustomcustomEqualCore(Tin *pIn1,Tin *inPtr2,
                                                      int32_t width,
                                                      int32_t height,
                                                      int32_t inPitch,
                                                      int32_t inChPitch,
                                                      int32_t outPitch,
                                                      int32_t outChPitch,
                                                      int32_t numOutChannels,
                                                      Tin initValue,
                                                      Tin *pOut,
                                                      sTIDL_Layer_t *tidlLayer)
{
  // int32_t i1;
  // constexpr float epsilonSquared    = EQUAL_OP_EPSILON * EQUAL_OP_EPSILON;

  // const float in1TensorScaleInv     = 1.0f / tidlLayer->inDataPtr[0].tensorScale;
  // const float in2TensorScaleInv     = 1.0f / tidlLayer->inDataPtr[1].tensorScale;
  // const float outTensorScale        = tidlLayer->outData.tensorScale;

  // int32_t outDataCount= width * height * numOutChannels;

  // Tin *in1DataPtr = (Tin *)pIn1;
  // Tin *in2DataPtr = (Tin *)pIn1;
  // Tin *outData = (Tin *)pOut;

  // for (int32_t i = 0; i < outDataCount; i++)
  // {
  //   /* 
  //    * Handle floating-point precision error and the loss from converting float to fixed and back to float by
  //    * Instead of checking if two floating-point numbers are exactly equal, check if they are close enough using an epsilon,
  //    * a small value representing the tolerance for error.
  //    * TODO: Implement in fixed point instead.
  //   */
  //   float diff    = (in1DataPtr[i] * in1TensorScaleInv) - (in2DataPtr[i] * in2TensorScaleInv);
  //   outData[i] = ((diff * diff) < epsilonSquared) * outTensorScale;
  // }


}

int32_t TIDL_refCustomcustomEqualProcess(sTIDL_Layer_t *tidlLayer,
                                        TIDL_CustomParams0_t *customMaxPoolingParams,
                                        void *inPtrs[],
                                        void *outPtrs[],
                                        float32_tidl *currMin,
                                        float32_tidl *currMax)
{
  int32_t status = CUSTOM_SUCCESS;
  uint8_t (*inPtr1)[];
  uint8_t (*inPtr2)[];
  uint8_t (*outPtr)[];
  sTIDL_DataParams_t *inDataParams = tidlLayer->inDataPtr;
  sTIDL_DataParams_t *outDataParams = &tidlLayer->outData;

  int32_t width = inDataParams->dimValues[TIDL_DIM_WIDTH];
  int32_t height = inDataParams->dimValues[TIDL_DIM_HEIGHT];
  int32_t inPitch = inDataParams->pitch[TIDL_LINE_PITCH];
  int32_t inChPitch = inDataParams->pitch[TIDL_CHANNEL_PITCH];
  int32_t outPitch = outDataParams->pitch[TIDL_LINE_PITCH];
  int32_t outChPitch = outDataParams->pitch[TIDL_CHANNEL_PITCH];
  int32_t numOutChannels = outDataParams->dimValues[TIDL_DIM_NUMCH];

  int32_t elementType = inDataParams->elementType;
  int32_t numTotRoi = inDataParams->dimValues[TIDL_DIM_BATCH];
  int32_t i;

  inPtr1 = (uint8_t (*)[])(inPtrs[0]);
  inPtr2 = (uint8_t (*)[])(inPtrs[1]);
  outPtr = (uint8_t (*)[])(outPtrs[0]);

  for (i = 0; i < numTotRoi; i++)
  {
    if (elementType == TIDL_UnsignedChar)
    {
      TIDL_refCustomcustomEqualCore((uint8_t *)inPtr1,(uint8_t *)inPtr2,
                                   width,
                                   height,
                                   inPitch,
                                   inChPitch,
                                   outPitch,
                                   outChPitch,
                                   numOutChannels,
                                   (uint8_t)0,
                                   (uint8_t *)outPtr,
                                   tidlLayer);
    }
    else if (elementType == TIDL_SignedChar)
    {
      TIDL_refCustomcustomEqualCore((int8_t *)inPtr1,(int8_t *)inPtr2,
                                   width,
                                   height,
                                   inPitch,
                                   inChPitch,
                                   outPitch,
                                   outChPitch,
                                   numOutChannels,
                                   (int8_t)0x80,
                                   (int8_t *)outPtr,
                                   tidlLayer);
    }
    else if (elementType == TIDL_UnsignedShort)
    {
      TIDL_refCustomcustomEqualCore((uint16_t *)inPtr1,(uint16_t *)inPtr2,
                                   width,
                                   height,
                                   inPitch,
                                   inChPitch,
                                   outPitch,
                                   outChPitch,
                                   numOutChannels,
                                   (uint16_t)0,
                                   (uint16_t *)outPtr,
                                   tidlLayer);
    }
    else if (elementType == TIDL_SignedShort)
    {
      TIDL_refCustomcustomEqualCore((int16_t *)inPtr1,(int16_t *)inPtr2,
                                   width,
                                   height,
                                   inPitch,
                                   inChPitch,
                                   outPitch,
                                   outChPitch,
                                   numOutChannels,
                                   (int16_t)0x8000,
                                   (int16_t *)outPtr,
                                   tidlLayer);
    }
    else if (elementType == TIDL_SinglePrecFloat)
    {
      TIDL_refCustomcustomEqualCore((float32_tidl *)inPtr1,(float32_tidl *)inPtr2,
                                   width,
                                   height,
                                   inPitch,
                                   inChPitch,
                                   outPitch,
                                   outChPitch,
                                   numOutChannels,
                                   (float32_tidl)std::numeric_limits<float32_tidl>::lowest(),
                                   (float32_tidl *)outPtr,
                                   tidlLayer);
    }
    else
    {
      status = CUSTOM_FAIL;
      break;
    }
  }

  return status;
}


int32_t TIDL_customEqualProcess(void* tidlHandle,
    sTIDL_Layer_t* tidlLayer,
    void* inPtrs[],
    void* outPtrs[],
    void* params,
    void* dmaUtilsContext,
    const sTIDL_sysMemHandle_t sysMems[TIDL_SYSMEM_MAX],
    int32_t execMode)
{
//   int32_t status = CUSTOM_SUCCESS;
//   uint8_t (*inPtr)[];
//   uint8_t (*outPtr)[];
//   inPtr = (uint8_t (*)[])(inPtrs[0]);
//   outPtr = (uint8_t (*)[])(outPtrs[0]);

//   sTIDL_DataParams_t *inDataParams = tidlLayer->inDataPtr;

//   /* Assuming that all inputs and the output are of the same type , exept scatter elements case */
//   int32_t elementType = inDataParams->elementType;

//   if(TIDL_UnsignedChar == elementType)
//   {
//     status = TIDL_customOpsProcessCore<uint8_t>(inPtr, outPtr, tidlLayer);
//   }
//   else if(TIDL_SignedChar == elementType)
//   {
//     status = TIDL_customOpsProcessCore<int8_t>(inPtr, outPtr, tidlLayer);
//   }
//   else if(TIDL_UnsignedShort == elementType)
//   {
//     status = TIDL_customOpsProcessCore<uint16_t>(inPtr, outPtr, tidlLayer);
//   }
//   else if(TIDL_SignedShort == elementType)
//   {
//     status = TIDL_customOpsProcessCore<int16_t>(inPtr, outPtr, tidlLayer);
//   }
//   else if (TIDL_SinglePrecFloat == elementType)
//   {
//     status = TIDL_customOpsProcessCore<float32_tidl>(inPtr, outPtr, tidlLayer);
//   }
//   else
//   {
//     status = CUSTOM_FAIL;
//   }

    int32_t status = CUSTOM_SUCCESS;
  float32_tidl currMin = 0, currMax = 0;

  /* When execMode == TIDL_EXEC_MODE_STATS_COLLECTION then user is expected to find
       run a platform agnostic code for a given layer. This code is expected to support both float
       and fixed point implemenation. User can create template based code to handle multiple data types and
      appropriately call the right template as its done for 8 bit and 16 bit
      element type in this example */

  if ((TIDL_EXEC_MODE_STATS_COLLECTION == execMode) ||
      (TIDL_EXEC_MODE_INFER_PROCESS_REF == execMode))
  {

    /* Note that for max pooling layer we don't need to call the reference function
    during stats collection as max pooling stats are same as its producers layers.
    But in general user is expected to write a function to do stats collection.
    */

    /* Call reference implementation of the layer */
    TIDL_refCustomcustomEqualProcess(tidlLayer,
                                    (TIDL_CustomParams0_t *)params,
                                    inPtrs,
                                    outPtrs,
                                    &currMin,
                                    &currMax);
  }
  else if (TIDL_EXEC_MODE_INFER_PROCESS == execMode)
  {
    /* This is the location where user is supposed to call the optimized
    implementation of their layer */
    status = TIDL_refCustomcustomEqualProcess(tidlLayer,
                                    (TIDL_CustomParams0_t *)params,
                                    inPtrs,
                                    outPtrs,
                                    &currMin,
                                    &currMax);
    // TIDL_customcustomOpsDspProcess(tidlHandle,
    //                                       tidlLayer,
    //                                       (TIDL_CustomParams0_t *)params,
    //                                       inPtrs,
    //                                       outPtrs);
  }
  else
  {
    status = CUSTOM_FAIL;
  }
  return status;

}