/*
*
* Copyright (c) {2015 - 2017} Texas Instruments Incorporated
*
* All rights reserved not granted herein.
*
* Limited License.
*
* Texas Instruments Incorporated grants a world-wide, royalty-free, non-exclusive
* license under copyrights and patents it now or hereafter owns or controls to make,
* have made, use, import, offer to sell and sell ("Utilize") this software subject to the
* terms herein.  With respect to the foregoing patent license, such license is granted
* solely to the extent that any such patent is necessary to Utilize the software alone.
* The patent license shall not apply to any combinations which include this software,
* other than combinations with devices manufactured by or for TI ("TI Devices").
* No hardware patent is licensed hereunder.
*
* Redistributions must preserve existing copyright notices and reproduce this license
* (including the above copyright notice and the disclaimer and (if applicable) source
* code license limitations below) in the documentation and/or other materials provided
* with the distribution
*
* Redistribution and use in binary form, without modification, are permitted provided
* that the following conditions are met:
*
* *       No reverse engineering, decompilation, or disassembly of this software is
* permitted with respect to any software provided in binary form.
*
* *       any redistribution and use are licensed by TI for use only with TI Devices.
*
* *       Nothing shall obligate TI to provide you with source code for the software
* licensed and provided to you in object code.
*
* If software source code is provided to you, modification and redistribution of the
* source code are permitted provided that the following conditions are met:
*
* *       any redistribution and use of the source code, including any resulting derivative
* works, are licensed by TI for use only with TI Devices.
*
* *       any redistribution and use of any object code compiled from the source code
* and any resulting derivative works, are licensed by TI for use only with TI Devices.
*
* Neither the name of Texas Instruments Incorporated nor the names of its suppliers
*
* may be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* DISCLAIMER.
*
* THIS SOFTWARE IS PROVIDED BY TI AND TI'S LICENSORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL TI AND TI'S LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/

#include <google/protobuf/io/coded_stream.h>
#include <google/protobuf/io/zero_copy_stream_impl.h>
#include <google/protobuf/message.h>
#include <google/protobuf/text_format.h>
#if defined(GCC_BUILD)
#include <unistd.h>
#define O_BINARY (0)
#endif
#if defined(MSVC_BUILD)
#include <io.h>
#endif
#include <fcntl.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <cmath>
#include <float.h>

using namespace std;
using ::google::protobuf::Message;
using ::google::protobuf::io::FileInputStream;
using ::google::protobuf::io::FileOutputStream;
using ::google::protobuf::io::ZeroCopyInputStream;
using ::google::protobuf::io::CodedInputStream;
using ::google::protobuf::io::ZeroCopyOutputStream;
using ::google::protobuf::io::CodedOutputStream;

#include "ti_dl.h"
#include "tidl_import_config.h"
#include "tidl_import_common.h"
#include "perfsim.h"
#include "tidl_custom.h"
#define ALIGN_CHANNEL_PITCH (1)
#define TIDL_MSMC_NUM_PHY_BANKS ( 4U)
#define TIDL_MSMC_PHY_BANK_SIZE (256U)
#define TIDL_MSMC_BANK_PITCH (TIDL_MSMC_NUM_PHY_BANKS * TIDL_MSMC_PHY_BANK_SIZE)
#define TIDL_MSMC_CACHE_LINE_SIZE (128U)
#define ALIGN_SIZE(x,y)       ((((x) + ((y)-1)) / (y)) * (y))
#define QUANT_MODIFICATION (1)
extern int32_t gloab_data_format;
extern sTIDL_OrgNetwork_t      orgTIDLNetStructure;
extern sTIDL_OrgNetwork_t      tempTIDLNetStructure;
extern sTIDL_Network_t         tIDLNetStructure;


#define QUAN_STYLE2_ROUND ((gParams.quantRoundAdd*1.0 / 100))

static int totalMemAllocation = 0;
FILE *fpAlloc = NULL;
void * my_malloc(int size)
{
  void *ptr;
  //if(fpAlloc == NULL) fpAlloc = fopen ("MemAllocation.txt", "w");
  totalMemAllocation += size;
  ptr = malloc(size);
  assert(ptr != NULL);

  //fprintf(fpAlloc, "Alloc: Ptr: %0x, Size: %0x\n",ptr,size);
  //fflush(fpAlloc);
  return ptr;
}

void my_free(void *ptr)
{
  //fprintf(fpAlloc, "Free: Ptr: %0x\n",ptr);
  //fflush(fpAlloc);
  free(ptr);
}
FILE *paramDebugFile = NULL;
int debugLayeId = 0;

#define HIST_SIZE (1000)
void TIDL_computeHist(float *origParams, int paramNum, int *histPtr, float *orgMax)
{
  int i;
  float orgMaxFloat = 0;
  for (i = 0; i < HIST_SIZE; i++)
  {
    histPtr[i] = 0;
  }
  for (i = 0; i < paramNum; i++)
  {
    orgMaxFloat = orgMaxFloat <= abs(origParams[i]) ? abs(origParams[i]) : orgMaxFloat;
  }
  if(orgMaxFloat == 0)
    orgMaxFloat = 1;
  for (i = 0; i < paramNum; i++)
  {
    if (orgMaxFloat != 0.0)
    {
      histPtr[(int)((abs(origParams[i])* (HIST_SIZE - 1)) / orgMaxFloat)] += 1;
    }
  }
  *orgMax = orgMaxFloat;
}

/* compares quantized values and the original values (for parameters) */
template <class quantParamType>
int TIDL_CompareParams(quantParamType *quantizedParams, float *origParams, int paramNum, float scale) {
  /* absolute value of difference is considered */
  float meanDifference = 0;
  float maxDifference = 0;

  float meanRelDifference = 0;
  float maxRelDifference = 0;
  float orgMaxFloat   = 0;
  float quantMaxFloat = 0;

  float meanOrigFloat = 0;

  int relValidNum = 0;
  int maxRelDiffIndex = 0;
  for (int i = 0; i < paramNum; i++)
  {
    float quantParamFloat = quantizedParams[i] / scale;
    float origFloat = origParams[i];
    float difference = quantParamFloat>origFloat ? (quantParamFloat - origFloat) : (origFloat - quantParamFloat); /* abs value */
    float absOrigFloat = origFloat>0 ? origFloat : -origFloat;
    float absQuantFloat = quantParamFloat>0 ? quantParamFloat : -quantParamFloat;
    int  absQuantizedParams = quantizedParams[i] > 0 ? quantizedParams[i] : -quantizedParams[i];
    meanOrigFloat += absOrigFloat;

    meanDifference += difference;

    if (maxDifference < difference)
    {
      maxDifference = difference;
    }
    if (orgMaxFloat < absOrigFloat)
    {
      orgMaxFloat = absOrigFloat;
    }
    if (quantMaxFloat < absQuantFloat)
    {
      quantMaxFloat = absQuantFloat;
    }
    float relDifference = 0;

    if (absQuantizedParams > 2)
    {
      relDifference = (difference / absOrigFloat) * 100;
      relValidNum++;
    }

    if (maxRelDifference < relDifference)
    {
      maxRelDifference = relDifference;
      maxRelDiffIndex = i;
    }
    meanRelDifference += relDifference;

  }
  meanDifference /= paramNum;

  if (relValidNum != 0)
    meanRelDifference /= relValidNum;
  else
    meanRelDifference = -1;

  meanOrigFloat /= paramNum;
  if (paramDebugFile != NULL)
  {
    int * histPtr = (int *)my_malloc(HIST_SIZE * sizeof(int));
    float temp;
    TIDL_computeHist(origParams, paramNum, histPtr, &temp);
    fprintf(paramDebugFile, "%d,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f, , , ,", debugLayeId, meanDifference, maxDifference, meanOrigFloat, meanRelDifference, orgMaxFloat, quantMaxFloat, origParams[maxRelDiffIndex], quantizedParams[maxRelDiffIndex] / scale, maxRelDifference, scale);
    for (int i = 0; i < HIST_SIZE; i++)
    {
      fprintf(paramDebugFile, "%d,", histPtr[i]);
    }
    fprintf(paramDebugFile, "\n");
    my_free(histPtr);
  }
  return 0;

}


int32_t TIDL_QuantizeUnsignedMax(uint8_t * params, float * data, int32_t dataSize, float min, float max, int32_t weightsElementSizeInBits)

{
  int32_t i;
  float absRange = abs(max - min);

  float quantPrec = ((1.0*(1 << NUM_WHGT_BITS)) / absRange);
  float pData;
  int32_t param;

  int32_t offset;
  if (min > 0)
  {
    offset = (min *  quantPrec + QUAN_STYLE2_ROUND);
  }
  else
  {
    offset = (min *  quantPrec - QUAN_STYLE2_ROUND);
  }

  //Convert float params to 8-bit or 16-bit
  if (weightsElementSizeInBits <= 8)
  {
    for (i = 0; i < dataSize; i++)
    {
      pData = data[i];
      if (pData > 0)
      {
        param = (pData *  quantPrec + QUAN_STYLE2_ROUND);
      }
      else
      {
        param = (pData *  quantPrec - QUAN_STYLE2_ROUND);
      }
      param = param - offset;

      params[i] = param > ((1 << weightsElementSizeInBits) - 1) ? ((1 << weightsElementSizeInBits) - 1) : param;
    }
  }
  else
  {
    uint16_t *params16 = (uint16_t *)params;

    for (i = 0; i < dataSize; i++)
    {
      pData = data[i];
      if (pData > 0)
      {
        param = (pData *  quantPrec + QUAN_STYLE2_ROUND);
      }
      else
      {
        param = (pData *  quantPrec - QUAN_STYLE2_ROUND);
      }
      param = param - offset;

      params16[i] = param > ((1 << weightsElementSizeInBits) - 1) ? ((1 << weightsElementSizeInBits) - 1) : param;
    }
  }
  return ((int32_t)(quantPrec * 256));
}

template <class Tout>
float TIDL_QuantizeSignedMax(Tout * params, float * data, int32_t dataSize, float min, float max, int32_t weightsElementSizeInBits)

{
  int32_t i;
  float absRange = (abs(max) > abs(min)) ? abs(max) : abs(min);

  if (gParams.quantizationStyle == TIDL_QuantStyleP2Dynamic)
  {
    absRange = (float)ceil(log((double)absRange) / log((double)2));
    absRange = pow(2.0, (double)absRange);
  }

  float quantPrec;
  float pData;
  int32_t param;
  if (absRange != 0)
  {
    quantPrec = ((1.0*(1 << (weightsElementSizeInBits - 1))) / absRange);
  }
  else
  {
    quantPrec = 1;
  }

  for (i = 0; i < dataSize; i++)
  {
    pData = data[i];
    if (pData > 0)
    {
      param = (pData *  quantPrec + QUAN_STYLE2_ROUND);
    }
    else
    {
      param = (pData *  quantPrec - QUAN_STYLE2_ROUND);
    }
    param = param > ((1 << (weightsElementSizeInBits - 1)) - 1) ? ((1 << (weightsElementSizeInBits - 1)) - 1) : param;
    params[i] = param < (-1 * (1 << (weightsElementSizeInBits - 1))) ? (-1 * (1 << (weightsElementSizeInBits - 1))) : param;
  }
  TIDL_CompareParams(params, data, dataSize, quantPrec);
  return (quantPrec);
}

template float TIDL_QuantizeSignedMax<signed char>(signed char * params, float * data, int32_t dataSize, float min, float max, int32_t weightsElementSizeInBits);
template float TIDL_QuantizeSignedMax<signed short>(signed short * params, float * data, int32_t dataSize, float min, float max, int32_t weightsElementSizeInBits);


int32_t TIDL_normalize(float data, float min, float max)
{
  int32_t param;
  float absRange = abs(max - min);
  float quantPrec = ((1.0*(1 << NUM_WHGT_BITS)) / absRange);
  if (data > 0)
  {
    param = (data *  quantPrec + QUAN_STYLE2_ROUND);
  }
  else
  {
    param = (data *  quantPrec - QUAN_STYLE2_ROUND);
  }


  return param;
}

bool TIDL_readProtoFromTextFile(const char* fileName, Message* proto)
{
  int32_t           fd;
  bool              success;
  FileInputStream   *input;

  fd = open(fileName, O_RDONLY);
  if (fd == NULL)
  {
    printf("ERROR: Could not open prototext file for reading \n");
    exit(-1);
  }
  input = new FileInputStream(fd);
  success = google::protobuf::TextFormat::Parse(input, proto);
  delete input;
  close(fd);
  if (!success)
  {
    printf("ERROR: google::protobuf::TextFormat::Parse proto file(%s) FAILED !!!\n", fileName);
    exit(-1);
  }
  return success;
}

#define APP_CNN_INTEROP_CAFFE_READ_BINARY_TOTAL_BYTE_LIMIT  2147483647
#define APP_CNN_INTEROP_CAFFE_READ_BINARY_WARNING_THRESHOLD 1073741824

bool TIDL_readProtoFromBinaryFile(const char* fileName, Message* proto)
{
  int                   fd;
  ZeroCopyInputStream   *rawInput;
  CodedInputStream      *codedInput;
  bool                  success;

  fd = open(fileName, O_BINARY);
  if (fd == NULL)
  {
    printf("ERROR: Could not open caffe model(%s) for reading \n", fileName);
    exit(-1);
  }

  rawInput = new FileInputStream(fd);
  codedInput = new CodedInputStream(rawInput);

  codedInput->SetTotalBytesLimit(
    APP_CNN_INTEROP_CAFFE_READ_BINARY_TOTAL_BYTE_LIMIT,
    APP_CNN_INTEROP_CAFFE_READ_BINARY_WARNING_THRESHOLD
    );

  success = proto->ParseFromCodedStream(codedInput);
  delete codedInput;
  delete rawInput;
  close(fd);
  if (!success)
  {
    printf("ERROR: proto->ParseFromCodedStream proto file(%s)\n", fileName);
  }
  return success;
}
#if 0 // Code to read ResNet_mean.binaryproto"
BlobProto       blob;
TIDL_readProtoFromBinaryFile((const char *)params->inputParamsFile, &blob);
float*blob_data = (float*)my_malloc(sizeof(float)*blob.data_size());

for (i = 0; i < blob.data_size(); i++)
{
  blob_data[i] = blob.data(i);
}
fp1 = fopen((const char *)params->outputNetFile, "wb+");
if (fp1 == NULL)
{
  printf("Could not open %s file for writing \n", (const char *)params->outputNetFile);
}
fwrite(blob_data, 1, sizeof(float)*blob.data_size(), fp1);
if (fp1 != NULL)
{
  fclose(fp1);
}
exit(0);
#endif

int32_t TIDL_getLayerIdx(sTIDL_OrgNetwork_t * pOrgTIDLNetStructure, int32_t numLayer, const char *bufName)
{
  int32_t i, j;
  for (i = (numLayer - 1); i >= 0; i--)
  {
    for (j = 0; j < pOrgTIDLNetStructure->TIDLPCLayers[i].numOutBufs; j++)
    {
      if (strcmp((const char*)bufName, (const char*)pOrgTIDLNetStructure->TIDLPCLayers[i].outDataNames[j]) == 0)
      {
        return i;
      }
    }
  }
  return (-1);
}

const char* TIDL_getOutDataName(sTIDL_OrgNetwork_t * pOrgTIDLNetStructure, int32_t dataId)
{
  int32_t i, j;
  for (i = 0; i < pOrgTIDLNetStructure->numLayers; i++)
  {
    for (j = 0; j < pOrgTIDLNetStructure->TIDLPCLayers[i].numOutBufs; j++)
    {
      if (pOrgTIDLNetStructure->TIDLPCLayers[i].outData[j].dataId == dataId)
      {
        return (const char*)pOrgTIDLNetStructure->TIDLPCLayers[i].outDataNames[j];;
      }
    }
  }
  return (NULL);
}

int32_t TIDL_getDataID(sTIDL_DataParams_t *data,
  sTIDL_OrgNetwork_t * pOrgTIDLNetStructure,
  int32_t            numLayer,
  int8_t             *bufName)
{
  int32_t i, j;
  for (i = (numLayer - 1); i >= 0; i--)
  {
    for (j = 0; j < pOrgTIDLNetStructure->TIDLPCLayers[i].numOutBufs; j++)
    {
      if (strcmp((const char*)bufName,
        (const char*)pOrgTIDLNetStructure->TIDLPCLayers[i].outDataNames[j]) == 0)
      {
        *data = pOrgTIDLNetStructure->TIDLPCLayers[i].outData[j];
        return 0;
      }
    }
  }
  return -1;
}

int32_t TIDL_isDataBufUsed(int32_t           dataId,
  sTIDL_Network_t   *pTIDLNetStructure,
  int32_t           numLayer)
{
  int32_t i, j;
  for (i = 0; i < numLayer; i++)
  {
    for (j = 0; j < pTIDLNetStructure->TIDLLayers[i].numInBufs; j++)
    {
      if (pTIDLNetStructure->TIDLLayers[i].inData[j].dataId == dataId)
      {
        return 1;
      }
    }
  }
  return 0;
}

int32_t TIDL_isInputConv2D(sTIDL_OrgNetwork_t   *pOrgTIDLNetStruct,
  int32_t              numLayer,
  const char           *bufName)
{
  int32_t i, j;
  for (i = (numLayer - 1); i >= 0; i--)
  {
    for (j = 0; j < pOrgTIDLNetStruct->TIDLPCLayers[i].numOutBufs; j++)
    {
      if (strcmp((const char*)bufName,
        (const char*)pOrgTIDLNetStruct->TIDLPCLayers[i].outDataNames[j]) == 0)
      {
        if ((pOrgTIDLNetStruct->TIDLPCLayers[i].numOutBufs == 1) &&
          (pOrgTIDLNetStruct->TIDLPCLayers[i].layerType == TIDL_ConvolutionLayer))
        {
          return 1;
        }
        else
        {
          return 0;
        }
      }
    }
  }
  return 0;
}

void TIDL_UpdateInDataBuff(sTIDL_OrgNetwork_t * pOrgTIDLNetStructure,
  uint32_t numLayers, sTIDL_DataParams_t dataBuf)
{
  uint32_t i, j;
  for (i = 0; i < numLayers; i++)
  {
    for (j = 0; (j < pOrgTIDLNetStructure->TIDLPCLayers[i].numInBufs) &&
      (pOrgTIDLNetStructure->TIDLPCLayers[i].numInBufs > 0); j++)
    {
      if (pOrgTIDLNetStructure->TIDLPCLayers[i].inData[j].dataId ==
        dataBuf.dataId)
      {
        pOrgTIDLNetStructure->TIDLPCLayers[i].inData[j] = dataBuf;
      }
    }

  }

  return;
}
#define ENABLE_HIST_BASED_RANGE (0)
void TIDL_findRange(float * data, int32_t dataSize, float * minOut, float * maxOut, float scale)
{
  float min = FLT_MAX;
  float max = -FLT_MAX;
  int32_t i;
  for (i = 0; i < dataSize; i++)
  {
    min = ((data[i] * scale) < min) ? (data[i] * scale) : min;
    max = ((data[i] * scale) > max) ? (data[i] * scale) : max;
  }
  *minOut = (min < *minOut) ? min : *minOut;
  *maxOut = (max > *maxOut) ? max : *maxOut;
#if ENABLE_HIST_BASED_RANGE
  int * histPtr = (int *)my_malloc(HIST_SIZE * sizeof(int));
  float orgMax;
  TIDL_computeHist(data, dataSize, histPtr, &orgMax);
  int curPer = 0;
  int maxPer = dataSize*0.995;
  for (i = 0; i < HIST_SIZE; i++)
  {
    curPer += histPtr[i];
    if (curPer >= maxPer)
      break;
  }
  my_free(histPtr);

  *maxOut = (orgMax*i) / (HIST_SIZE);
  *minOut = -1 * *maxOut;
#endif

}

int32_t TIDL_findSymQ(float  min, float max)
{
  int32_t qValue = 0, i;
  float absMax = (abs(min) > abs(max)) ? abs(min) : abs(max);
  int32_t quantSteps = ((1.0*(1 << (NUM_WHGT_BITS - 1))) / absMax);
  while ((quantSteps & (quantSteps - 1)) != 0)
  {
    quantSteps--;
  }
  for (i = 32; i >= 0; i--)
  {
    if (quantSteps & (1 << i))
    {
      qValue = i;
    }
  }
  return(qValue);
}


int64_t TIDL_roundSat(int64_t val, uint8_t bits, int64_t min, int64_t max)
{
  if (bits > 0)
  {
    val += (1U << (bits - 1U));
    val >>= bits;
  }
  val = val < min ? min : val;
  val = val > max ? max : val;
  return val;

}

int32_t TIDL_alignParamsWrite(FILE *fp, sBuffer_t * buf, uint32_t *totalParamSize, uint32_t numBytes)
{

  uint32_t alignSize = (*totalParamSize % 64);
  int32_t offset;
  if (alignSize)
  {
    alignSize = 64 - alignSize;
    uint8_t * ptr = (uint8_t *)my_malloc(alignSize*sizeof(uint8_t));
    memset(ptr, 0, alignSize*sizeof(uint8_t));

    *totalParamSize += alignSize;
    if (fp) fwrite(ptr, 1, alignSize, fp);
    free(ptr);
  }
  uint32_t writeSize = buf->bufSize * numBytes;
  if (fp)
  {
    fwrite(buf->ptr, 1, writeSize, fp);
    free(buf->ptr);
  }
  offset = *totalParamSize;
  *totalParamSize += writeSize;
  return (offset);
}

int32_t TIDL_writeModel(sTIDL_Network_t * tIDLNetStructure, sTIDL_OrgNetwork_t * orgTIDLNetStructure, const char * name, uint32_t numLayers)
{
  FILE * fp1 = NULL;
  int32_t i, writeSize, tiLayerIndex;
  if (name)
  {
    fp1 = fopen(name, "wb+");
    if (fp1 == NULL)
    {
      printf("ERROR: Could not open %s file for writing \n", (const char *)name);
      exit(-1);
    }
  }

  writeSize = ((uint8_t*)(&tIDLNetStructure->TIDLLayers[tIDLNetStructure->numLayers])) - ((uint8_t*)tIDLNetStructure);
  if (fp1) fwrite(tIDLNetStructure, 1, writeSize, fp1);
  uint32_t totalParamSize = writeSize;

  tiLayerIndex = 0;
  for (i = 0; i < numLayers; i++)
  {
    if ((orgTIDLNetStructure->TIDLPCLayers[i].layerType != TIDL_SplitLayer) &&
      (orgTIDLNetStructure->TIDLPCLayers[i].layerType != TIDL_DropOutLayer) &&
      (orgTIDLNetStructure->TIDLPCLayers[i].layerType != TIDL_PriorBoxLayer) &&
      (orgTIDLNetStructure->TIDLPCLayers[i].layerType != TIDL_UnsupportedLayer) &&
      (orgTIDLNetStructure->TIDLPCLayers[i].layerType != TIDL_ConstDataLayer))
    {
      if ((orgTIDLNetStructure->TIDLPCLayers[i].layerType == TIDL_ConvolutionLayer) ||
        (orgTIDLNetStructure->TIDLPCLayers[i].layerType == TIDL_Deconv2DLayer))
      {
        tIDLNetStructure->TIDLLayers[tiLayerIndex].layerParams.convParams.weights =
          TIDL_alignParamsWrite(fp1, &orgTIDLNetStructure->TIDLPCLayers[i].weights, &totalParamSize, ((orgTIDLNetStructure->TIDLPCLayers[i].weightsElementSizeInBits + 7) / 8));
        if (orgTIDLNetStructure->TIDLPCLayers[i].layerParams.convParams.enableBias)
        {
          tIDLNetStructure->TIDLLayers[tiLayerIndex].layerParams.convParams.bias =
            TIDL_alignParamsWrite(fp1, &orgTIDLNetStructure->TIDLPCLayers[i].bias, &totalParamSize, 2);
        }

      }
      else if (orgTIDLNetStructure->TIDLPCLayers[i].layerType == TIDL_InnerProductLayer)
      {
        tIDLNetStructure->TIDLLayers[tiLayerIndex].layerParams.innerProductParams.weights =
          TIDL_alignParamsWrite(fp1, &orgTIDLNetStructure->TIDLPCLayers[i].weights, &totalParamSize, ((orgTIDLNetStructure->TIDLPCLayers[i].weightsElementSizeInBits + 7) / 8));
        tIDLNetStructure->TIDLLayers[tiLayerIndex].layerParams.innerProductParams.bias =
          TIDL_alignParamsWrite(fp1, &orgTIDLNetStructure->TIDLPCLayers[i].bias, &totalParamSize, 2);
      }
      else if (orgTIDLNetStructure->TIDLPCLayers[i].layerType == TIDL_BatchNormLayer)
      {
        tIDLNetStructure->TIDLLayers[tiLayerIndex].layerParams.batchNormParams.weights =
          TIDL_alignParamsWrite(fp1, &orgTIDLNetStructure->TIDLPCLayers[i].weights, &totalParamSize, ((orgTIDLNetStructure->TIDLPCLayers[i].weightsElementSizeInBits + 7) / 8));
        tIDLNetStructure->TIDLLayers[tiLayerIndex].layerParams.batchNormParams.bias =
          TIDL_alignParamsWrite(fp1, &orgTIDLNetStructure->TIDLPCLayers[i].bias, &totalParamSize, 2);

        if (orgTIDLNetStructure->TIDLPCLayers[i].actParams.actType == TIDL_PRelU)
        {
          tIDLNetStructure->TIDLLayers[tiLayerIndex].actParams.slope =
            TIDL_alignParamsWrite(fp1, &orgTIDLNetStructure->TIDLPCLayers[i].slope, &totalParamSize, ((orgTIDLNetStructure->TIDLPCLayers[i].weightsElementSizeInBits + 7) / 8));
        }
      }
      else if (orgTIDLNetStructure->TIDLPCLayers[i].layerType == TIDL_DetectionOutputLayer)
      {
        tIDLNetStructure->TIDLLayers[tiLayerIndex].layerParams.detectOutParams.priorBox =
          TIDL_alignParamsWrite(fp1, &orgTIDLNetStructure->TIDLPCLayers[i].priorBox, &totalParamSize, sizeof(float));
      }
      else if (orgTIDLNetStructure->TIDLPCLayers[i].layerType == TIDL_CustomLayer)
      {
        tIDLNetStructure->TIDLLayers[tiLayerIndex].layerParams.customParams.customLayerParamsOffset =
          TIDL_alignParamsWrite(fp1, &orgTIDLNetStructure->TIDLPCLayers[i].weights, &totalParamSize, 1);
      }
      tiLayerIndex++;
    }
  }

  if (fp1 != NULL)
  {
    fclose(fp1);
  }
}


void  TIDL_fillDataBufPadRequirements(sTIDL_Network_t * tIDLNetStructure)
{
  int32_t i, j, k, l;
  int32_t padW, padH, curPadW, curPadH;
  int32_t foundInData;
  for (i = 0; i < tIDLNetStructure->numLayers; i++)
  {
    for (j = 0; j < tIDLNetStructure->TIDLLayers[i].numOutBufs; j++)
    {
      padW = 1;//:TODO: Temporary change to make same padding for all layers in mobile to be 1
      padH = 1;//:TODO: Temporary change to make same padding for all layers in mobile to be 1
      for (k = i + 1; k < tIDLNetStructure->numLayers; k++)
      {
        for (l = 0; l < tIDLNetStructure->TIDLLayers[k].numInBufs; l++)
        {
          curPadW = 0;
          curPadH = 0;
          if (tIDLNetStructure->TIDLLayers[i].outData[j].dataId == tIDLNetStructure->TIDLLayers[k].inData[l].dataId)
          {
            if ((tIDLNetStructure->TIDLLayers[k].layerType == TIDL_ConvolutionLayer) ||
              (tIDLNetStructure->TIDLLayers[k].layerType == TIDL_Deconv2DLayer))
            {
              curPadW = tIDLNetStructure->TIDLLayers[k].layerParams.convParams.padW;
              curPadH = tIDLNetStructure->TIDLLayers[k].layerParams.convParams.padH;
            }
            else if (tIDLNetStructure->TIDLLayers[k].layerType == TIDL_PoolingLayer)
            {
              if (tIDLNetStructure->TIDLLayers[k].layerParams.poolParams.padW)
              {
                curPadW = tIDLNetStructure->TIDLLayers[k].layerParams.poolParams.padW;
              }
              else
              {
                curPadW = ((tIDLNetStructure->TIDLLayers[k].layerParams.poolParams.kernelW - 1) / 2);
              }
              if (tIDLNetStructure->TIDLLayers[k].layerParams.poolParams.padH)
              {
                curPadH = tIDLNetStructure->TIDLLayers[k].layerParams.poolParams.padH;
              }
              else
              {
                curPadH = ((tIDLNetStructure->TIDLLayers[k].layerParams.poolParams.kernelH - 1) / 2);
              }
            }
            if (curPadW > padW) padW = curPadW;
            if (curPadH > padH) padH = curPadH;
          }
        }
      }
      tIDLNetStructure->TIDLLayers[i].outData[j].padW = padW;
      tIDLNetStructure->TIDLLayers[i].outData[j].padH = padH;
      tIDLNetStructure->TIDLLayers[i].outData[j].pitch[TIDL_LINE_PITCH] = tIDLNetStructure->TIDLLayers[i].outData[j].dimValues[TIDL_DIM_WIDTH] + tIDLNetStructure->TIDLLayers[i].outData[j].padW;
      /* Align pitch accross channel so that we dont access same bank */
      tIDLNetStructure->TIDLLayers[i].outData[j].pitch[TIDL_CHANNEL_PITCH] = (tIDLNetStructure->TIDLLayers[i].outData[j].dimValues[TIDL_DIM_HEIGHT] + 2 * tIDLNetStructure->TIDLLayers[i].outData[j].padH + 1) * tIDLNetStructure->TIDLLayers[i].outData[j].pitch[TIDL_LINE_PITCH];
#if ALIGN_CHANNEL_PITCH
      tIDLNetStructure->TIDLLayers[i].outData[j].pitch[TIDL_CHANNEL_PITCH] = ALIGN_SIZE(tIDLNetStructure->TIDLLayers[i].outData[j].pitch[TIDL_CHANNEL_PITCH], TIDL_MSMC_BANK_PITCH) + TIDL_MSMC_CACHE_LINE_SIZE;
#endif
      tIDLNetStructure->TIDLLayers[i].outData[j].pitch[TIDL_ROI_PITCH] = tIDLNetStructure->TIDLLayers[i].outData[j].dimValues[TIDL_DIM_NUMCH] * tIDLNetStructure->TIDLLayers[i].outData[j].pitch[TIDL_CHANNEL_PITCH];


      //tIDLNetStructure->TIDLLayers[i].outData[j].padW = 4;
      //tIDLNetStructure->TIDLLayers[i].outData[j].padH = 4;
    }
  }
  for (i = 0; i < tIDLNetStructure->numLayers; i++)
  {
    for (j = 0; j < tIDLNetStructure->TIDLLayers[i].numInBufs; j++)
    {
      foundInData = 0;
      for (k = 0; ((k < tIDLNetStructure->numLayers) && (foundInData == 0)); k++)
      {
        for (l = 0; ((l < tIDLNetStructure->TIDLLayers[k].numOutBufs) && (foundInData == 0)); l++)
        {
          if (tIDLNetStructure->TIDLLayers[i].inData[j].dataId == tIDLNetStructure->TIDLLayers[k].outData[l].dataId)
          {
            tIDLNetStructure->TIDLLayers[i].inData[j].padW = tIDLNetStructure->TIDLLayers[k].outData[l].padW;
            tIDLNetStructure->TIDLLayers[i].inData[j].padH = tIDLNetStructure->TIDLLayers[k].outData[l].padH;
            tIDLNetStructure->TIDLLayers[i].inData[j].pitch[TIDL_LINE_PITCH]       = tIDLNetStructure->TIDLLayers[k].outData[l].pitch[TIDL_LINE_PITCH];
            tIDLNetStructure->TIDLLayers[i].inData[j].pitch[TIDL_CHANNEL_PITCH]    = tIDLNetStructure->TIDLLayers[k].outData[l].pitch[TIDL_CHANNEL_PITCH];
            tIDLNetStructure->TIDLLayers[i].inData[j].pitch[TIDL_ROI_PITCH]        = tIDLNetStructure->TIDLLayers[k].outData[l].pitch[TIDL_ROI_PITCH];



            foundInData = 1;
          }
        }
      }
      if (foundInData == 0)
      {
        printf("Could not find Indata for data ID %d \n", tIDLNetStructure->TIDLLayers[i].inData[j].dataId);
      }
    }
  }
}



int32_t tidltb_isOutDataBuff(sTIDL_Network_t *pTIDLNetStructure, int32_t dataId,
  int32_t layersGroupId)
{
  int32_t i, j;
  for (i = 0; i < pTIDLNetStructure->numLayers; i++)
  {
    for (j = 0; j < pTIDLNetStructure->TIDLLayers[i].numInBufs; j++)
    {
      if ((pTIDLNetStructure->TIDLLayers[i].layersGroupId != layersGroupId) &&
        (pTIDLNetStructure->TIDLLayers[i].inData[j].dataId == dataId))
      {
        return 1;
      }
    }
  }
  return 0;
}

int32_t tidltb_isInDataBuff(sTIDL_Network_t * pTIDLNetStructure, int32_t dataId,
  int32_t layersGroupId)
{
  int32_t i, j;
  for (i = 0; i < pTIDLNetStructure->numLayers; i++)
  {
    for (j = 0; j < pTIDLNetStructure->TIDLLayers[i].numInBufs; j++)
    {
      if ((pTIDLNetStructure->TIDLLayers[i].layersGroupId == layersGroupId) &&
        (pTIDLNetStructure->TIDLLayers[i].inData[j].dataId == dataId))
      {
        return 1;
      }
    }
  }
  return 0;
}
void TIDL_writeNInts(FILE * fp1, int32_t * intData, int32_t n, const char * name)
{
  fprintf(fp1, "%s = ", name);
  for (int32_t i = 0; i < n; i++)
  {
    fprintf(fp1, "%8d ", intData[i]);
  }
  fprintf(fp1, "\n");
}


int32_t TIDL_writeInfo(sTIDL_Network_t * tIDLNetStructure, sTIDL_OrgNetwork_t * orgTIDLNetStructure, const char * name, uint32_t numLayers, uint32_t currLayersGroupId, sPerfSim_t * perfSimInfo)
{
  FILE * fp1 = NULL;
  int32_t i, j, writeSize, tiLayerIndex;
  int32_t numDataBuf = 0;
  char fileName[500];
  char numString[500];
  strcpy(fileName, name);
  sprintf(numString, "%d", currLayersGroupId);
  strcat(fileName, numString);
  strcat(fileName, ".bin");
  sTIDL_IOBufDesc_t  gIOParams;

  sDataFlowInfo_t *dataFlowInfo = NULL;

  if(perfSimInfo != NULL)
  {
    dataFlowInfo  = perfSimInfo->sdataFlowInfo;
  }

  if (fileName)
  {
    fp1 = fopen(fileName, "wb+");
    if (fp1 == NULL)
    {
      printf("ERROR: Could not open %s file for writing \n", (const char *)fileName);
      exit(-1);
    }
  }
  for (i = 0; i < numLayers; i++)
  {
    if (tIDLNetStructure->TIDLLayers[i].layersGroupId != currLayersGroupId)
    {
      for (j = 0; j < tIDLNetStructure->TIDLLayers[i].numOutBufs; j++)
      {
        if (tidltb_isInDataBuff(tIDLNetStructure, tIDLNetStructure->TIDLLayers[i].outData[j].dataId,
          currLayersGroupId))
        {

          gIOParams.inWidth[numDataBuf] = tIDLNetStructure->TIDLLayers[i].outData[j].dimValues[TIDL_DIM_WIDTH];
          gIOParams.inHeight[numDataBuf] = tIDLNetStructure->TIDLLayers[i].outData[j].dimValues[TIDL_DIM_HEIGHT];
          gIOParams.inNumChannels[numDataBuf] = tIDLNetStructure->TIDLLayers[i].outData[j].dimValues[TIDL_DIM_NUMCH];

          gIOParams.inPadL[numDataBuf] = tIDLNetStructure->TIDLLayers[i].outData[j].padW;
          gIOParams.inPadT[numDataBuf] = tIDLNetStructure->TIDLLayers[i].outData[j].padH;
          gIOParams.inPadR[numDataBuf] = 0;
          gIOParams.inChannelPitch[numDataBuf] = tIDLNetStructure->TIDLLayers[i].outData[j].pitch[TIDL_CHANNEL_PITCH];

          int32_t totalHeight = (gIOParams.inChannelPitch[numDataBuf] +
                                   gIOParams.inWidth[numDataBuf]  + gIOParams.inPadL[numDataBuf]  -1)/ (gIOParams.inWidth[numDataBuf] + gIOParams.inPadL[numDataBuf]);
          gIOParams.inPadB[numDataBuf] = totalHeight - gIOParams.inPadT[numDataBuf] - gIOParams.inHeight[numDataBuf];

          gIOParams.inPadCh[numDataBuf] = 0;

          gIOParams.inElementType[numDataBuf] = tIDLNetStructure->TIDLLayers[i].outData[j].elementType;
          gIOParams.inDataId[numDataBuf] = tIDLNetStructure->TIDLLayers[i].outData[j].dataId;
          gIOParams.inDataFormat[numDataBuf] = gParams.inDataFormat[numDataBuf];
          gIOParams.inResizeType[numDataBuf] = gParams.inResizeType[numDataBuf];
          if (gParams.resizeWidth[numDataBuf] == -1)
          {
            gIOParams.resizeWidth[numDataBuf] = gIOParams.inWidth[numDataBuf];
          }
          else
          {
            gIOParams.resizeWidth[numDataBuf] = gParams.resizeWidth[numDataBuf];
          }
          if (gParams.resizeHeight[numDataBuf] == -1)
          {
            gIOParams.resizeHeight[numDataBuf] = gIOParams.inHeight[numDataBuf];
          }
          else
          {
            gIOParams.resizeHeight[numDataBuf] = gParams.resizeHeight[numDataBuf];
          }
          const char * str = TIDL_getOutDataName(orgTIDLNetStructure, tIDLNetStructure->TIDLLayers[i].outData[j].dataId);
          if(str)
          {
            strcpy((char *)gIOParams.inDataName[numDataBuf], str);
          }
          numDataBuf++;
        }
      }
    }
  }
  gIOParams.numInputBuf = numDataBuf;
  numDataBuf = 0;
  for (i = 0; i < numLayers; i++)
  {
    if (tIDLNetStructure->TIDLLayers[i].layersGroupId == currLayersGroupId)
    {
      for (j = 0; j < tIDLNetStructure->TIDLLayers[i].numOutBufs; j++)
      {
        if (tidltb_isOutDataBuff(tIDLNetStructure, tIDLNetStructure->TIDLLayers[i].outData[j].dataId,
          currLayersGroupId))
        {

          gIOParams.outWidth[numDataBuf] = tIDLNetStructure->TIDLLayers[i].outData[j].dimValues[TIDL_DIM_WIDTH];
          gIOParams.outHeight[numDataBuf] = tIDLNetStructure->TIDLLayers[i].outData[j].dimValues[TIDL_DIM_HEIGHT];
          gIOParams.outNumChannels[numDataBuf] = tIDLNetStructure->TIDLLayers[i].outData[j].dimValues[TIDL_DIM_NUMCH];
          gIOParams.outPadL[numDataBuf] = tIDLNetStructure->TIDLLayers[i].outData[j].padW;
          gIOParams.outPadT[numDataBuf] = tIDLNetStructure->TIDLLayers[i].outData[j].padH;
          gIOParams.outPadR[numDataBuf] = 0;

          gIOParams.outChannelPitch[numDataBuf] = tIDLNetStructure->TIDLLayers[i].outData[j].pitch[TIDL_CHANNEL_PITCH];

          int32_t totalHeight = (gIOParams.outChannelPitch[numDataBuf] +
                                   gIOParams.outWidth[numDataBuf] + gIOParams.outPadL[numDataBuf]  -1)/ (gIOParams.outWidth[numDataBuf] + gIOParams.outPadL[numDataBuf]);
          gIOParams.outPadB[numDataBuf] = totalHeight - gIOParams.outPadT[numDataBuf] - gIOParams.outHeight[numDataBuf];

          gIOParams.outPadCh[numDataBuf] = 0;
          if ( dataFlowInfo  != NULL )
          {
            int32_t totOutMemReq = gIOParams.outChannelPitch[numDataBuf]  *  gIOParams.outNumChannels[numDataBuf];
            int32_t extraMemReq = 0;

            extraMemReq = (dataFlowInfo[i].bufInfo[OUT_FEAT_MAP][WRITE].bufSize - totOutMemReq);
            if ( extraMemReq > 0 )
            {
              gIOParams.outPadCh[numDataBuf] = (extraMemReq + (gIOParams.outChannelPitch[numDataBuf] -1 )) /gIOParams.outChannelPitch[numDataBuf];
            }
          }

          gIOParams.outElementType[numDataBuf] = tIDLNetStructure->TIDLLayers[i].outData[j].elementType;
          gIOParams.outDataId[numDataBuf] = tIDLNetStructure->TIDLLayers[i].outData[j].dataId;
          const char * str = TIDL_getOutDataName(orgTIDLNetStructure, tIDLNetStructure->TIDLLayers[i].outData[j].dataId);
          if(str)
          {
            strcpy((char *)gIOParams.outDataName[numDataBuf], str);
          }
          numDataBuf++;
        }
      }
    }
  }
  gIOParams.numOutputBuf = numDataBuf;

  gIOParams.l1MemSize = (16 * 1024);
  gIOParams.l2MemSize = (448 * 1024);
  gIOParams.l3MemSize = (7968 * 1024);
  if (( dataFlowInfo  != NULL )  &&  (strcmp((char *)gParams.fileNameGrpInfo,"") == 0))
  {
    gIOParams.l2MemSize = (perfSimInfo->simConfig.sizeL2MemKB - perfSimInfo->simConfig.sizeL2Cache + 1) * 1024;
    gIOParams.l3MemSize = (perfSimInfo->simConfig.sizeL3MemKB + 56) * 1024;
  }

  fwrite(&gIOParams, 1, sizeof(sTIDL_IOBufDesc_t), fp1);
  fclose(fp1);
}


int32_t TIDL_tfOutReshapeDataLayer(sTIDL_OrgNetwork_t   *pOrgTIDLNetStructure, int32_t layerIndex)
{
  return 0;

}
int32_t TIDL_tfOutReshapeConvLayer(sTIDL_OrgNetwork_t   *pOrgTIDLNetStructure, int32_t layerIndex)
{
  sTIDL_LayerPC_t &TIDLPCLayers = pOrgTIDLNetStructure->TIDLPCLayers[layerIndex];
  sTIDL_ConvParams_t &convParams = pOrgTIDLNetStructure->TIDLPCLayers[layerIndex].layerParams.convParams;

  TIDLPCLayers.outData[0].elementType = tidl_getElementType(1);
  if((TIDLPCLayers.actParams.actType == TIDL_RelU6) || (TIDLPCLayers.actParams.actType == TIDL_RelU))
  {
    TIDLPCLayers.outData[0].elementType = tidl_getElementType(0);
  }

  TIDLPCLayers.outData[0].numDim = TIDLPCLayers.inData[0].numDim;
  TIDLPCLayers.outData[0].dimValues[0] = TIDLPCLayers.inData[0].dimValues[0];
  TIDLPCLayers.outData[0].dimValues[1] = convParams.numOutChannels;
  TIDLPCLayers.outData[0].dimValues[2] = ((TIDLPCLayers.inData[0].dimValues[2] + (convParams.padH * 2) -
    ((convParams.kernelH - 1)* convParams.dilationH + 1)) / convParams.strideH) + 1;
  TIDLPCLayers.outData[0].dimValues[3] = ((TIDLPCLayers.inData[0].dimValues[3] + (convParams.padW * 2) -
    ((convParams.kernelW - 1)* convParams.dilationW + 1)) / convParams.strideW) + 1;

  convParams.numInChannels = TIDLPCLayers.inData[0].dimValues[1];

  TIDLPCLayers.numMacs =
    (int64_t)(((int64_t)TIDLPCLayers.outData[0].dimValues[0] * TIDLPCLayers.outData[0].dimValues[1] *
      TIDLPCLayers.outData[0].dimValues[2] * TIDLPCLayers.outData[0].dimValues[3] *
      convParams.kernelW *convParams.kernelH *
      TIDLPCLayers.inData[0].dimValues[1]) / convParams.numGroups);

  /*
   * Handle Random Coeff Case
   */
  int32_t dataSize;

  dataSize = convParams.kernelH * convParams.kernelW *
             convParams.numOutChannels *
             TIDLPCLayers.inData[0].dimValues[1] / convParams.numGroups;

  if(TIDLPCLayers.weights.ptr == NULL ||
     TIDLPCLayers.weights.bufSize != dataSize)
  {
    printf("WARNING: Conv Layer %s's coeff cannot be found(or not match) in coef file, "
           "Random coeff will be generated! "
           "Only for evaluation usage! "
           "Results are all random!\n", TIDLPCLayers.name);

    float*  data;
    data = (float *)my_malloc(dataSize*sizeof(float));

    for (int i = 0; i < dataSize; i++) {
      int32_t val = (rand() & (0X7FFFFFFF));
      data[i] = ((float)((rand() & 1) ? val : -val)) / (0X3FFFFF);
    }

    TIDLPCLayers.weights.ptr = data;
    TIDLPCLayers.weights.bufSize = dataSize;
  }

  /*
   * Handle Random Bias Case
   */
  dataSize = convParams.numOutChannels;

  if(convParams.enableBias &&
    (TIDLPCLayers.bias.ptr == NULL ||
     TIDLPCLayers.bias.bufSize != dataSize))
  {
    printf("WARNING: Conv Layer %s's bias cannot be found(or not match) in coef file, "
           "Random bias will be generated! "
           "Only for evaluation usage! "
           "Results are all random!\n", TIDLPCLayers.name);

    float*  data;
    data = (float *)my_malloc(dataSize*sizeof(float));

    for (int i = 0; i < dataSize; i++) {
      int32_t val = (rand() & (0X7FFFFFFF));
      data[i] = ((float)((rand() & 1) ? val : -val)) / (0XFFFFFF);
    }

    TIDLPCLayers.bias.ptr = data;
    TIDLPCLayers.bias.bufSize = dataSize;
  }

  return 0;
}

int32_t TIDL_tfOutReshapeResize(sTIDL_OrgNetwork_t   *pOrgTIDLNetStructure, int32_t layerIndex)
{
  sTIDL_LayerPC_t &TIDLPCLayers = pOrgTIDLNetStructure->TIDLPCLayers[layerIndex];
  sTIDL_ConvParams_t &convParams = pOrgTIDLNetStructure->TIDLPCLayers[layerIndex].layerParams.convParams;

  TIDLPCLayers.outData[0].elementType = TIDLPCLayers.inData[0].elementType;
  TIDLPCLayers.outData[0].numDim = TIDLPCLayers.inData[0].numDim;
  TIDLPCLayers.outData[0].dimValues[0] = TIDLPCLayers.inData[0].dimValues[0];
  TIDLPCLayers.outData[0].dimValues[1] = TIDLPCLayers.inData[0].dimValues[1];
  if ((pOrgTIDLNetStructure->TIDLPCLayers[layerIndex].layerParams.resizeParams.resizeRatio[2] < 0) ||
    (pOrgTIDLNetStructure->TIDLPCLayers[layerIndex].layerParams.resizeParams.resizeRatio[3] < 0))
  {
    pOrgTIDLNetStructure->TIDLPCLayers[layerIndex].layerParams.resizeParams.resizeRatio[2] =
      -pOrgTIDLNetStructure->TIDLPCLayers[layerIndex].layerParams.resizeParams.resizeRatio[2] / TIDLPCLayers.inData[0].dimValues[2];
    pOrgTIDLNetStructure->TIDLPCLayers[layerIndex].layerParams.resizeParams.resizeRatio[3] =
      -pOrgTIDLNetStructure->TIDLPCLayers[layerIndex].layerParams.resizeParams.resizeRatio[3] / TIDLPCLayers.inData[0].dimValues[3];
  }
  TIDLPCLayers.outData[0].dimValues[2] = TIDLPCLayers.inData[0].dimValues[2] * pOrgTIDLNetStructure->TIDLPCLayers[layerIndex].layerParams.resizeParams.resizeRatio[2];
  TIDLPCLayers.outData[0].dimValues[3] = TIDLPCLayers.inData[0].dimValues[3] * pOrgTIDLNetStructure->TIDLPCLayers[layerIndex].layerParams.resizeParams.resizeRatio[3];

  TIDLPCLayers.numMacs =
    (int64_t)((int64_t)TIDLPCLayers.outData[0].dimValues[0] * TIDLPCLayers.outData[0].dimValues[1] *
      TIDLPCLayers.outData[0].dimValues[2] * TIDLPCLayers.outData[0].dimValues[3] * 4);
  return 0;
}


int32_t TIDL_tfOutReshapePoolingLayer(sTIDL_OrgNetwork_t   *pOrgTIDLNetStructure, int32_t layerIndex)
{
  sTIDL_LayerPC_t &TIDLPCLayers = pOrgTIDLNetStructure->TIDLPCLayers[layerIndex];
  sTIDL_PoolingParams_t &poolParams = pOrgTIDLNetStructure->TIDLPCLayers[layerIndex].layerParams.poolParams;
  TIDLPCLayers.outData[0].elementType = TIDLPCLayers.inData[0].elementType;
  TIDLPCLayers.outData[0].numDim = TIDLPCLayers.inData[0].numDim;
  if (poolParams.kernelH > 0 || poolParams.kernelW > 0)
  {
    TIDLPCLayers.outData[0].dimValues[0] = TIDLPCLayers.inData[0].dimValues[0];
    TIDLPCLayers.outData[0].dimValues[1] = TIDLPCLayers.inData[0].dimValues[1];
    if(poolParams.useCeil)
    {
      TIDLPCLayers.outData[0].dimValues[2] = ceil(((TIDLPCLayers.inData[0].dimValues[2] +
        poolParams.padH*2.0) - (poolParams.kernelH)) / poolParams.strideH) + 1;
      TIDLPCLayers.outData[0].dimValues[3] = ceil(((TIDLPCLayers.inData[0].dimValues[3] +
        poolParams.padW*2.0) - (poolParams.kernelW)) / poolParams.strideW) + 1;
    }
    else
    {
      TIDLPCLayers.outData[0].dimValues[2] = floor(((TIDLPCLayers.inData[0].dimValues[2] +
        poolParams.padH*2.0) - (poolParams.kernelH)) / poolParams.strideH) + 1;
      TIDLPCLayers.outData[0].dimValues[3] = floor(((TIDLPCLayers.inData[0].dimValues[3] +
        poolParams.padW*2.0) - (poolParams.kernelW)) / poolParams.strideW) + 1;
    }

    TIDLPCLayers.numMacs =
      (int64_t)((int64_t)TIDLPCLayers.outData[0].dimValues[0] * TIDLPCLayers.outData[0].dimValues[1] *
        TIDLPCLayers.outData[0].dimValues[2] * TIDLPCLayers.outData[0].dimValues[3] *
        poolParams.kernelW *poolParams.kernelH);
  }
  else
  {
    TIDLPCLayers.outData[0].dimValues[0] = TIDLPCLayers.inData[0].dimValues[0];
    TIDLPCLayers.outData[0].dimValues[1] = TIDLPCLayers.inData[0].dimValues[1];
    TIDLPCLayers.outData[0].dimValues[2] = 1;
    TIDLPCLayers.outData[0].dimValues[3] = 1;
    TIDLPCLayers.numMacs =
      (int64_t)((int64_t)TIDLPCLayers.outData[0].dimValues[0] * TIDLPCLayers.outData[0].dimValues[1] *
        TIDLPCLayers.outData[0].dimValues[2] * TIDLPCLayers.outData[0].dimValues[3]);
  }
  return 0;
}
int32_t TIDL_tfOutReshapeIdentity(sTIDL_OrgNetwork_t   *pOrgTIDLNetStructure, int32_t layerIndex)
{
  sTIDL_LayerPC_t &TIDLPCLayers = pOrgTIDLNetStructure->TIDLPCLayers[layerIndex];
  TIDLPCLayers.outData[0].elementType = TIDLPCLayers.inData[0].elementType;
  TIDLPCLayers.outData[0].numDim = TIDLPCLayers.inData[0].numDim;
  TIDLPCLayers.outData[0].dimValues[0] = TIDLPCLayers.inData[0].dimValues[0];
  TIDLPCLayers.outData[0].dimValues[1] = TIDLPCLayers.inData[0].dimValues[1];
  TIDLPCLayers.outData[0].dimValues[2] = TIDLPCLayers.inData[0].dimValues[2];
  TIDLPCLayers.outData[0].dimValues[3] = TIDLPCLayers.inData[0].dimValues[3];
  TIDLPCLayers.numMacs =
    (int64_t)((int64_t)TIDLPCLayers.outData[0].dimValues[0] * TIDLPCLayers.outData[0].dimValues[1] *
      TIDLPCLayers.outData[0].dimValues[2] * TIDLPCLayers.outData[0].dimValues[3]);
  return 0;
}

int32_t TIDL_tfOutCustomIdentity(sTIDL_OrgNetwork_t   *pOrgTIDLNetStructure, int32_t layerIndex)
{
  sTIDL_LayerPC_t &TIDLPCLayers = pOrgTIDLNetStructure->TIDLPCLayers[layerIndex];
  sTIDL_CustomParams_t &customParams = pOrgTIDLNetStructure->TIDLPCLayers[layerIndex].layerParams.customParams;
  if (customParams.customLayerType == 0)
  {
    TIDLPCLayers.outData[0].elementType = TIDLPCLayers.inData[0].elementType;
    TIDLPCLayers.outData[0].numDim = TIDLPCLayers.inData[0].numDim;
    TIDLPCLayers.outData[0].dimValues[0] = TIDLPCLayers.inData[0].dimValues[0];
    TIDLPCLayers.outData[0].dimValues[1] = 1;
    TIDLPCLayers.outData[0].dimValues[2] = 1;
    TIDLPCLayers.outData[0].dimValues[3] = 512 ;
    TIDLPCLayers.numMacs =
      (int64_t)((int64_t)TIDLPCLayers.outData[0].dimValues[0] * TIDLPCLayers.outData[0].dimValues[1] *
        TIDLPCLayers.outData[0].dimValues[2] * TIDLPCLayers.outData[0].dimValues[3]);
  }
  else if (customParams.customLayerType == 2)
  {
    printf("Custom 2 in common import\n");
    TIDL_CustomParams2_t* customParams2 = (TIDL_CustomParams2_t*) TIDLPCLayers.weights.ptr;
    TIDLPCLayers.outData[0].numDim = 1;
    TIDLPCLayers.outData[0].elementType  = TIDLPCLayers.inData[0].elementType;
    TIDLPCLayers.outData[0].dimValues[0] = TIDLPCLayers.inData[0].dimValues[0];
    TIDLPCLayers.outData[0].dimValues[1] = TIDLPCLayers.inData[0].dimValues[1]*3;
    TIDLPCLayers.outData[0].dimValues[2] = customParams2->y_max - customParams2->y_min;
    TIDLPCLayers.outData[0].dimValues[3] = customParams2->x_max - customParams2->x_min;
    printf("width : %d , height : %d\n", TIDLPCLayers.outData[0].dimValues[3] , TIDLPCLayers.outData[0].dimValues[2]);

    TIDLPCLayers.numMacs =
      (int64_t)((int64_t)TIDLPCLayers.outData[0].dimValues[0] * TIDLPCLayers.outData[0].dimValues[1] *
        TIDLPCLayers.outData[0].dimValues[2] * TIDLPCLayers.outData[0].dimValues[3]);
  }

  return 0;
}


int32_t TIDL_tfOutReshapePRelu(sTIDL_OrgNetwork_t   *pOrgTIDLNetStructure, int32_t layerIndex)
{
  sTIDL_LayerPC_t &TIDLPCLayers = pOrgTIDLNetStructure->TIDLPCLayers[layerIndex];
  TIDLPCLayers.outData[0].elementType = tidl_getElementType(1);
  TIDLPCLayers.layerParams.batchNormParams.numChannels = TIDLPCLayers.inData[0].dimValues[1];
  TIDLPCLayers.outData[0].numDim = TIDLPCLayers.inData[0].numDim;
  TIDLPCLayers.outData[0].dimValues[0] = TIDLPCLayers.inData[0].dimValues[0];
  TIDLPCLayers.outData[0].dimValues[1] = TIDLPCLayers.inData[0].dimValues[1];
  TIDLPCLayers.outData[0].dimValues[2] = TIDLPCLayers.inData[0].dimValues[2];
  TIDLPCLayers.outData[0].dimValues[3] = TIDLPCLayers.inData[0].dimValues[3];
  TIDLPCLayers.numMacs =
    (int64_t)((int64_t)TIDLPCLayers.outData[0].dimValues[0] * TIDLPCLayers.outData[0].dimValues[1] *
      TIDLPCLayers.outData[0].dimValues[2] * TIDLPCLayers.outData[0].dimValues[3]);

  /*
  * Handle Random Coeff case
  */
  int32_t dataSize = TIDLPCLayers.layerParams.batchNormParams.numChannels;

  if(TIDLPCLayers.bias.ptr == NULL ||
     TIDLPCLayers.bias.bufSize !=  dataSize ||
     TIDLPCLayers.weights.ptr == NULL ||
     TIDLPCLayers.weights.bufSize != dataSize ||
     TIDLPCLayers.slope.ptr == NULL ||
     TIDLPCLayers.slope.bufSize != dataSize)
  {
    printf("WARNING: PReLU Layer %s's bias cannot be found(or not match) in coef file, "
           "Random bias will be generated! "
           "Only for evaluation usage! "
           "Results are all random!\n", TIDLPCLayers.name);

    float * weights = (float*)my_malloc(dataSize*sizeof(float));
    float * bias = (float*)my_malloc(dataSize*sizeof(float));
    float * slope = (float*)my_malloc(dataSize*sizeof(float));
    for (int j = 0; j < dataSize; j++)
    {
      weights[j] = 1;
      bias[j]  = 0;
      slope[j] = 1;
    }

    TIDLPCLayers.bias.ptr = bias;
    TIDLPCLayers.bias.bufSize = dataSize;
    TIDLPCLayers.weights.ptr = weights;
    TIDLPCLayers.weights.bufSize = dataSize;
    TIDLPCLayers.slope.ptr = slope;
    TIDLPCLayers.slope.bufSize = dataSize;
  }
  return 0;
}

int32_t TIDL_tfOutReshapeBN(sTIDL_OrgNetwork_t   *pOrgTIDLNetStructure, int32_t layerIndex)
{
  TIDL_tfOutReshapeIdentity(pOrgTIDLNetStructure, layerIndex);
  sTIDL_LayerPC_t &TIDLPCLayers = pOrgTIDLNetStructure->TIDLPCLayers[layerIndex];
  TIDLPCLayers.outData[0].elementType = tidl_getElementType(1);
  TIDLPCLayers.layerParams.batchNormParams.numChannels = TIDLPCLayers.inData[0].dimValues[1];

  if((TIDLPCLayers.actParams.actType == TIDL_RelU6) || (TIDLPCLayers.actParams.actType == TIDL_RelU))
  {
    TIDLPCLayers.outData[0].elementType = tidl_getElementType(0);
  }

  /*
  * Handle Random Coeff case
  */
  int32_t dataSize = TIDLPCLayers.layerParams.batchNormParams.numChannels;

  if(TIDLPCLayers.bias.ptr == NULL ||
     TIDLPCLayers.bias.bufSize !=  dataSize ||
     TIDLPCLayers.weights.ptr == NULL ||
     TIDLPCLayers.weights.bufSize != dataSize)
  {
    printf("WARNING: Batch Norm Layer %s's coeff cannot be found(or not match) in coef file, "
           "Random bias will be generated! "
           "Only for evaluation usage! "
           "Results are all random!\n", TIDLPCLayers.name);

    float * weights = (float*)my_malloc(dataSize*sizeof(float));
    float * bias = (float*)my_malloc(dataSize*sizeof(float));
    for (int j = 0; j < dataSize; j++)
    {
      weights[j] = 1;
      bias[j]  = 0;
    }

    TIDLPCLayers.bias.ptr = bias;
    TIDLPCLayers.bias.bufSize = dataSize;
    TIDLPCLayers.weights.ptr = weights;
    TIDLPCLayers.weights.bufSize = dataSize;
  }

  return 0;
}


int32_t TIDL_tfOutReshapeRelu(sTIDL_OrgNetwork_t   *pOrgTIDLNetStructure, int32_t layerIndex)
{
  TIDL_tfOutReshapeIdentity(pOrgTIDLNetStructure, layerIndex);
  sTIDL_LayerPC_t &TIDLPCLayers = pOrgTIDLNetStructure->TIDLPCLayers[layerIndex];
  TIDLPCLayers.outData[0].elementType = tidl_getElementType(0);
  return 0;
}

int32_t TIDL_tfOutReshapeClip(sTIDL_OrgNetwork_t   *pOrgTIDLNetStructure, int32_t layerIndex)
{
  TIDL_tfOutReshapeIdentity(pOrgTIDLNetStructure, layerIndex);
  sTIDL_LayerPC_t &TIDLPCLayers = pOrgTIDLNetStructure->TIDLPCLayers[layerIndex];
  if(TIDLPCLayers.actParams.clipMin >= 0)
  {
    TIDLPCLayers.outData[0].elementType = tidl_getElementType(0);
  }
  else
  {
    TIDLPCLayers.outData[0].elementType = tidl_getElementType(1);
  }
  return 0;
}

int32_t TIDL_tfOutReshapeSoftmax(sTIDL_OrgNetwork_t   *pOrgTIDLNetStructure, int32_t layerIndex)
{
  sTIDL_LayerPC_t &TIDLPCLayers = pOrgTIDLNetStructure->TIDLPCLayers[layerIndex];
  TIDLPCLayers.outData[0].elementType = TIDL_SinglePrecFloat;
  TIDLPCLayers.outData[0].numDim = TIDLPCLayers.inData[0].numDim;
  TIDLPCLayers.outData[0].dimValues[0] = TIDLPCLayers.inData[0].dimValues[0];
  TIDLPCLayers.outData[0].dimValues[1] = 1;
  TIDLPCLayers.outData[0].dimValues[2] = 1;
  TIDLPCLayers.outData[0].dimValues[3] = TIDLPCLayers.inData[0].dimValues[3] * TIDLPCLayers.inData[0].dimValues[1] * TIDLPCLayers.inData[0].dimValues[2];
  TIDLPCLayers.numMacs =
    (int64_t)((int64_t)TIDLPCLayers.outData[0].dimValues[0] * TIDLPCLayers.outData[0].dimValues[1] *
      TIDLPCLayers.outData[0].dimValues[2] * TIDLPCLayers.outData[0].dimValues[3]);

  return 0;
}

int32_t TIDL_tfOutReshapeIPLayer(sTIDL_OrgNetwork_t   *pOrgTIDLNetStructure, int32_t layerIndex)
{
  sTIDL_LayerPC_t &TIDLPCLayers = pOrgTIDLNetStructure->TIDLPCLayers[layerIndex];
  sTIDL_InnerProductParams_t &innerProductParams = pOrgTIDLNetStructure->TIDLPCLayers[layerIndex].layerParams.innerProductParams;

  TIDLPCLayers.outData[0].elementType = tidl_getElementType(1);
  TIDLPCLayers.outData[0].numDim = TIDLPCLayers.inData[0].numDim;
  TIDLPCLayers.outData[0].dimValues[0] = TIDLPCLayers.inData[0].dimValues[0];
  TIDLPCLayers.outData[0].dimValues[1] =  1;
  TIDLPCLayers.outData[0].dimValues[2] =  1;
  TIDLPCLayers.outData[0].dimValues[3] = innerProductParams.numOutNodes;

  TIDLPCLayers.numMacs =
    (int64_t)((int64_t)TIDLPCLayers.outData[0].dimValues[0] * (innerProductParams.numOutNodes* innerProductParams.numInNodes + innerProductParams.numOutNodes));

  /*
   * Handle Random Coeff Case
   */
  int32_t dataSize;

  dataSize = innerProductParams.numOutNodes *
             TIDLPCLayers.inData[0].dimValues[0] *
             TIDLPCLayers.inData[0].dimValues[1] *
             TIDLPCLayers.inData[0].dimValues[2] *
             TIDLPCLayers.inData[0].dimValues[3];

  if(TIDLPCLayers.weights.ptr == NULL ||
     TIDLPCLayers.weights.bufSize != dataSize)
  {
    printf("WARNING: Inner Product Layer %s's coeff cannot be found(or not match) in coef file, "
           "Random coeff will be generated! "
           "Only for evaluation usage! "
           "Results are all random!\n", TIDLPCLayers.name);

    TIDLPCLayers.layerParams.innerProductParams.numInNodes =
      dataSize / TIDLPCLayers.layerParams.innerProductParams.numOutNodes;

    float  * data = (float *)my_malloc(dataSize*sizeof(float));
    float  * bias = (float *)my_malloc(dataSize*sizeof(float));
    for (int idx = 0; idx < dataSize; idx++)
    {
      data[idx] = 0;
    }

    TIDLPCLayers.weights.ptr = data;
    TIDLPCLayers.weights.bufSize = dataSize;

    for (int idx = 0; idx < dataSize; idx++)
    {
      bias[idx] = 0;
    }

    TIDLPCLayers.bias.ptr = bias;
    TIDLPCLayers.bias.bufSize = dataSize;
  }

  return 0;
}

int32_t TIDL_tfOutReshapeDeConvLayer(sTIDL_OrgNetwork_t   *pOrgTIDLNetStructure, int32_t layerIndex)
{
  sTIDL_LayerPC_t &TIDLPCLayers = pOrgTIDLNetStructure->TIDLPCLayers[layerIndex];
  sTIDL_ConvParams_t &convParams = pOrgTIDLNetStructure->TIDLPCLayers[layerIndex].layerParams.convParams;

  /* Currently validated with ONNX */
  TIDLPCLayers.outData[0].elementType = tidl_getElementType(1);
  if((TIDLPCLayers.actParams.actType == TIDL_RelU6) || (TIDLPCLayers.actParams.actType == TIDL_RelU))
  {
    TIDLPCLayers.outData[0].elementType = tidl_getElementType(0);
  }
  TIDLPCLayers.outData[0].numDim = TIDLPCLayers.inData[0].numDim;
  TIDLPCLayers.outData[0].dimValues[0] = TIDLPCLayers.inData[0].dimValues[0];
  TIDLPCLayers.outData[0].dimValues[1] = convParams.numOutChannels;

  /* stride[i] * (input_size[i] - 1) + output_padding[i] + kernel_shape[i] - pads[start_i] - pads[end_i]*/
  if((TIDLPCLayers.outData[0].dimValues[2] < 0) || (TIDLPCLayers.outData[0].dimValues[3] < 0))
  {
    TIDLPCLayers.outData[0].dimValues[2] = -TIDLPCLayers.outData[0].dimValues[2];
    TIDLPCLayers.outData[0].dimValues[3] = -TIDLPCLayers.outData[0].dimValues[3];
  }
  else
  {
    TIDLPCLayers.outData[0].dimValues[2] =
      (convParams.strideH * (TIDLPCLayers.inData[0].dimValues[2] - 1) + convParams.kernelH - (convParams.padH * 2));
    TIDLPCLayers.outData[0].dimValues[3] =
      (convParams.strideW * (TIDLPCLayers.inData[0].dimValues[3] - 1) + convParams.kernelW - (convParams.padW * 2));
  }
  convParams.numInChannels = TIDLPCLayers.inData[0].dimValues[1];

  TIDLPCLayers.numMacs =
    (int64_t)(((int64_t)TIDLPCLayers.outData[0].dimValues[0] * TIDLPCLayers.outData[0].dimValues[1] *
      TIDLPCLayers.outData[0].dimValues[2] * TIDLPCLayers.outData[0].dimValues[3] *
      convParams.kernelW *convParams.kernelH *
      TIDLPCLayers.inData[0].dimValues[1]) / convParams.numGroups);

  /*
   * Handle Random Coeff Case
   */
  int32_t dataSize;

  dataSize = convParams.kernelH * convParams.kernelW *
             convParams.numOutChannels *
             TIDLPCLayers.inData[0].dimValues[1] / convParams.numGroups;

  if(TIDLPCLayers.weights.ptr == NULL ||
     TIDLPCLayers.weights.bufSize != dataSize)
  {
    printf("WARNING: Deconv Layer %s's coeff cannot be found(or not match) in coef file, "
           "Random coeff will be generated! "
           "Only for evaluation usage! "
           "Results are all random!\n", TIDLPCLayers.name);

    float*  data;
    data = (float *)my_malloc(dataSize*sizeof(float));

    for (int i = 0; i < dataSize; i++) {
      int32_t val = (rand() & (0X7FFFFFFF));
      data[i] = ((float)((rand() & 1) ? val : -val)) / (0X3FFFFF);
    }

    TIDLPCLayers.weights.ptr = data;
    TIDLPCLayers.weights.bufSize = dataSize;
    if((gParams.modelType == 0) || (gParams.modelType == 2))
    {
      TIDL_caffeReorderDeconvFilters(TIDLPCLayers.weights, convParams.numOutChannels, convParams.numInChannels, convParams.numGroups, convParams.kernelW*convParams.kernelH);
    }
  }

  /*
   * Handle Random Bias Case
   */
  dataSize = convParams.numOutChannels;

  if(convParams.enableBias &&
    (TIDLPCLayers.bias.ptr == NULL ||
     TIDLPCLayers.bias.bufSize != dataSize))
  {
    printf("WARNING: Deconv Layer %s's bias cannot be found(or not match) in coef file, "
           "Random bias will be generated! "
           "Only for evaluation usage! "
           "Results are all random!\n", TIDLPCLayers.name);

    float*  data;
    data = (float *)my_malloc(dataSize*sizeof(float));

    for (int i = 0; i < dataSize; i++) {
      int32_t val = (rand() & (0X7FFFFFFF));
      data[i] = ((float)((rand() & 1) ? val : -val)) / (0X3FFFFF);
    }

    TIDLPCLayers.bias.ptr = data;
    TIDLPCLayers.bias.bufSize = dataSize;
  }
  return 0;
}

int32_t TIDL_tfOutReshapeConcatLayer(sTIDL_OrgNetwork_t   *pOrgTIDLNetStructure, int32_t layerIndex)
{
  sTIDL_LayerPC_t &TIDLPCLayers = pOrgTIDLNetStructure->TIDLPCLayers[layerIndex];
  int32_t j;

  int32_t totDim = 0, axisId;

  // Special code for Caffe Concat import
  if(gParams.modelType == 0)
  {
    bool widthWise = true;
    for (j = 0; j < TIDLPCLayers.numInBufs; j++)
    {
      int32_t  idx_previous = tidl_getInLayer(*pOrgTIDLNetStructure, layerIndex, TIDLPCLayers.inData[j].dataId);
      if (idx_previous == -1)
      {
        printf("ERROR: TIDL_tfOutReshapeConcatLayer ConcatLayer find prev layer failed!\n");
        return -1;
      }
      sTIDL_LayerPC_t &previous = pOrgTIDLNetStructure->TIDLPCLayers[idx_previous];
      if(previous.layerType != TIDL_FlattenLayer)
      {
        widthWise = false;
      }
    }

    if(widthWise)
    {
      TIDLPCLayers.layerParams.concatParams.axis = TIDL_DIM_WIDTH;
    }

  }
  axisId = TIDLPCLayers.layerParams.concatParams.axis;

  TIDLPCLayers.outData[0].elementType = tidl_getElementType(0);
  TIDLPCLayers.outData[0].numDim = TIDLPCLayers.inData[0].numDim;
  TIDLPCLayers.outData[0].dimValues[0] = TIDLPCLayers.inData[0].dimValues[0];
  TIDLPCLayers.outData[0].dimValues[1] = TIDLPCLayers.inData[0].dimValues[1];
  TIDLPCLayers.outData[0].dimValues[2] = TIDLPCLayers.inData[0].dimValues[2];
  TIDLPCLayers.outData[0].dimValues[3] = TIDLPCLayers.inData[0].dimValues[3];

  for (j = 0; j < TIDLPCLayers.numInBufs; j++)
  {
    if ((TIDLPCLayers.inData[j].elementType == TIDL_SignedChar) ||
        (TIDLPCLayers.inData[j].elementType == TIDL_SignedShort))
    {
      TIDLPCLayers.outData[0].elementType = tidl_getElementType(1);
    }
    totDim += TIDLPCLayers.inData[j].dimValues[axisId];
  }
  TIDLPCLayers.outData[0].dimValues[axisId] = totDim;

  TIDLPCLayers.numMacs =
    (int64_t)((int64_t)TIDLPCLayers.outData[0].dimValues[0] * TIDLPCLayers.outData[0].dimValues[1] *
      TIDLPCLayers.outData[0].dimValues[2] * TIDLPCLayers.outData[0].dimValues[3]);
  return 0;
}
int32_t TIDL_tfOutReshapeSliceLayer(sTIDL_OrgNetwork_t   *pOrgTIDLNetStructure, int32_t layerIndex)
{
  sTIDL_LayerPC_t &TIDLPCLayers = pOrgTIDLNetStructure->TIDLPCLayers[layerIndex];
  int32_t j;
  TIDLPCLayers.outData[0].numDim = TIDLPCLayers.inData[0].numDim;

  int32_t totDim, axisId;

  axisId = TIDLPCLayers.layerParams.sliceParams.axis;
  totDim = TIDLPCLayers.inData[0].dimValues[axisId];

  for (j = 0; j < TIDLPCLayers.numOutBufs; j++)
  {
    if (TIDLPCLayers.layerParams.sliceParams.slicePoints[j] == -1)
    {
      TIDLPCLayers.layerParams.sliceParams.slicePoints[j] = j * (totDim / TIDLPCLayers.numOutBufs);
    }
  }

  TIDLPCLayers.layerParams.sliceParams.slicePoints[TIDLPCLayers.numOutBufs] = totDim;
  for (j = 0; j < TIDLPCLayers.numOutBufs; j++)
  {
    TIDLPCLayers.outData[j].elementType = TIDLPCLayers.inData[0].elementType;
    TIDLPCLayers.outData[j].dimValues[0] = TIDLPCLayers.inData[0].dimValues[0];
    TIDLPCLayers.outData[j].dimValues[2] = TIDLPCLayers.inData[0].dimValues[2];
    TIDLPCLayers.outData[j].dimValues[3] = TIDLPCLayers.inData[0].dimValues[3];
    TIDLPCLayers.outData[j].dimValues[1] = TIDLPCLayers.inData[0].dimValues[1];
  }

  for (j = 0; j < TIDLPCLayers.numOutBufs; j++)
  {
    TIDLPCLayers.outData[j].dimValues[axisId] = 
    TIDLPCLayers.layerParams.sliceParams.slicePoints[j+1] - TIDLPCLayers.layerParams.sliceParams.slicePoints[j];
  }
  TIDLPCLayers.numMacs = 0;

  return 0;
}

int32_t TIDL_tfOutReshapeCropLayer(sTIDL_OrgNetwork_t   *pOrgTIDLNetStructure, int32_t layerIndex)
{
  pOrgTIDLNetStructure->TIDLPCLayers[layerIndex].layerParams.cropParams.numChannels = pOrgTIDLNetStructure->TIDLPCLayers[layerIndex].inData[0].dimValues[1];

  pOrgTIDLNetStructure->TIDLPCLayers[layerIndex].outData[0].numDim = pOrgTIDLNetStructure->TIDLPCLayers[layerIndex].inData[0].numDim;
  pOrgTIDLNetStructure->TIDLPCLayers[layerIndex].outData[0].dimValues[0] = pOrgTIDLNetStructure->TIDLPCLayers[layerIndex].inData[0].dimValues[0];
  pOrgTIDLNetStructure->TIDLPCLayers[layerIndex].outData[0].dimValues[1] = pOrgTIDLNetStructure->TIDLPCLayers[layerIndex].inData[0].dimValues[1];
  /* Second Input has cropped output sizes */
  pOrgTIDLNetStructure->TIDLPCLayers[layerIndex].outData[0].dimValues[2] = pOrgTIDLNetStructure->TIDLPCLayers[layerIndex].inData[1].dimValues[2];
  pOrgTIDLNetStructure->TIDLPCLayers[layerIndex].outData[0].dimValues[3] = pOrgTIDLNetStructure->TIDLPCLayers[layerIndex].inData[1].dimValues[3];

  pOrgTIDLNetStructure->TIDLPCLayers[layerIndex].numMacs = 1;

  return 0;
}
int32_t TIDL_tfOutReshapeFlattenLayer(sTIDL_OrgNetwork_t   *pOrgTIDLNetStructure, int32_t layerIndex)
{
  sTIDL_LayerPC_t &TIDLPCLayers = pOrgTIDLNetStructure->TIDLPCLayers[layerIndex];
  TIDLPCLayers.outData[0].elementType = TIDLPCLayers.inData[0].elementType;
  TIDLPCLayers.outData[0].numDim = TIDLPCLayers.inData[0].numDim;
  TIDLPCLayers.outData[0].dimValues[0] = TIDLPCLayers.inData[0].dimValues[0];
  TIDLPCLayers.outData[0].dimValues[1] = 1;
  TIDLPCLayers.outData[0].dimValues[2] = 1;
  TIDLPCLayers.outData[0].dimValues[3] = TIDLPCLayers.inData[0].dimValues[1] *
    TIDLPCLayers.inData[0].dimValues[2] *
    TIDLPCLayers.inData[0].dimValues[3];
  TIDLPCLayers.numMacs =
    (int64_t)((int64_t)TIDLPCLayers.outData[0].dimValues[0] * TIDLPCLayers.outData[0].dimValues[1] *
      TIDLPCLayers.outData[0].dimValues[2] * TIDLPCLayers.outData[0].dimValues[3]);
  return 0;
}
int32_t TIDL_tfOutReshapeArgmaxLayer(sTIDL_OrgNetwork_t   *pOrgTIDLNetStructure, int32_t layerIndex)
{
  sTIDL_LayerPC_t &TIDLPCLayers = pOrgTIDLNetStructure->TIDLPCLayers[layerIndex];
  TIDLPCLayers.outData[0].elementType = tidl_getElementType(0);
  TIDLPCLayers.outData[0].numDim = TIDLPCLayers.inData[0].numDim;
  TIDLPCLayers.outData[0].dimValues[0] = TIDLPCLayers.inData[0].dimValues[0];
  TIDLPCLayers.outData[0].dimValues[1] = 1;
  TIDLPCLayers.outData[0].dimValues[2] = TIDLPCLayers.inData[0].dimValues[2];
  TIDLPCLayers.outData[0].dimValues[3] = TIDLPCLayers.inData[0].dimValues[3];
  pOrgTIDLNetStructure->TIDLPCLayers[layerIndex].layerParams.argMaxParams.numChannels = TIDLPCLayers.inData[0].dimValues[1];

  TIDLPCLayers.numMacs =
    (int64_t)((int64_t)TIDLPCLayers.outData[0].dimValues[0] * TIDLPCLayers.outData[0].dimValues[1] *
      TIDLPCLayers.outData[0].dimValues[2] * TIDLPCLayers.outData[0].dimValues[3]);
  return 0;
}
int32_t TIDL_tfOutReshapePadLayer(sTIDL_OrgNetwork_t   *pOrgTIDLNetStructure, int32_t layerIndex)
{
  sTIDL_LayerPC_t &TIDLPCLayers = pOrgTIDLNetStructure->TIDLPCLayers[layerIndex];
  TIDLPCLayers.outData[0].elementType = tidl_getElementType(0);
  TIDLPCLayers.outData[0].numDim = TIDLPCLayers.inData[0].numDim;
  TIDLPCLayers.outData[0].dimValues[0] = TIDLPCLayers.inData[0].dimValues[0]
    + TIDLPCLayers.layerPCParams.padParams.padTensor[0] + TIDLPCLayers.layerPCParams.padParams.padTensor[1];

  if (gloab_data_format == 0 || gParams.modelType == 3)
  {
    TIDLPCLayers.outData[0].dimValues[1] = TIDLPCLayers.inData[0].dimValues[1]
      + TIDLPCLayers.layerPCParams.padParams.padTensor[3 * 2 + 0] + TIDLPCLayers.layerPCParams.padParams.padTensor[3 * 2 + 1];
    TIDLPCLayers.outData[0].dimValues[2] = TIDLPCLayers.inData[0].dimValues[2]
      + TIDLPCLayers.layerPCParams.padParams.padTensor[1 * 2 + 0] + TIDLPCLayers.layerPCParams.padParams.padTensor[1 * 2 + 1];
    TIDLPCLayers.outData[0].dimValues[3] = TIDLPCLayers.inData[0].dimValues[3]
      + TIDLPCLayers.layerPCParams.padParams.padTensor[2 * 2 + 0] + TIDLPCLayers.layerPCParams.padParams.padTensor[2 * 2 + 1];
  }
  else
  {
    TIDLPCLayers.outData[0].dimValues[1] = TIDLPCLayers.inData[0].dimValues[1]
      + TIDLPCLayers.layerPCParams.padParams.padTensor[1 * 2 + 0] + TIDLPCLayers.layerPCParams.padParams.padTensor[1 * 2 + 1];
    TIDLPCLayers.outData[0].dimValues[2] = TIDLPCLayers.inData[0].dimValues[2]
      + TIDLPCLayers.layerPCParams.padParams.padTensor[2 * 2 + 0] + TIDLPCLayers.layerPCParams.padParams.padTensor[2 * 2 + 1];
    TIDLPCLayers.outData[0].dimValues[3] = TIDLPCLayers.inData[0].dimValues[3]
      + TIDLPCLayers.layerPCParams.padParams.padTensor[3 * 2 + 0] + TIDLPCLayers.layerPCParams.padParams.padTensor[3 * 2 + 1];
  }
  TIDLPCLayers.numMacs = 0;
  return 0;
}

int32_t TIDL_tfOutReshapeDetOutLayer(sTIDL_OrgNetwork_t   *pOrgTIDLNetStructure, int32_t layerIndex)
{
  int32_t keep_top_k;
  int32_t numOutDataPerObject;

  keep_top_k = pOrgTIDLNetStructure->TIDLPCLayers[layerIndex].layerParams.detectOutParams.keepTopK;
  numOutDataPerObject = 7 + pOrgTIDLNetStructure->TIDLPCLayers[layerIndex].layerParams.detectOutParams.numKeypoints * 2;


  pOrgTIDLNetStructure->TIDLPCLayers[layerIndex].outData[0].numDim       = 1;
  pOrgTIDLNetStructure->TIDLPCLayers[layerIndex].outData[0].dimValues[0] = 1;
  pOrgTIDLNetStructure->TIDLPCLayers[layerIndex].outData[0].dimValues[1] = 1;
  pOrgTIDLNetStructure->TIDLPCLayers[layerIndex].outData[0].dimValues[2] = 1;
  pOrgTIDLNetStructure->TIDLPCLayers[layerIndex].outData[0].dimValues[3] = 4 + keep_top_k*numOutDataPerObject;
  pOrgTIDLNetStructure->TIDLPCLayers[layerIndex].outData[0].elementType = TIDL_SinglePrecFloat;


  return 0;
}

int32_t TIDL_tfOutReshapeRoiPoolingLayer(sTIDL_OrgNetwork_t   *pOrgTIDLNetStructure, int32_t layerIndex)
{
  printf("ERROR: TIDL_tfOutReshapeRoiPoolingLayer is not supported for current version.\n");
  return -1;
}

int32_t TIDL_tfOutReshapeOdPostProcessingLayer(sTIDL_OrgNetwork_t   *pOrgTIDLNetStructure, int32_t layerIndex)
{
  printf("ERROR: TIDL_tfOutReshapeOdPostProcessingLayer is not supported for current version.\n");
  return -1;
}

int32_t tidl_linkInputTensors(sTIDL_OrgNetwork_t  *pOrgTIDLNetStructure, int32_t layerIndex)
{
  int32_t i0, i1, i2;
  for (i0 = 0; i0 < pOrgTIDLNetStructure->TIDLPCLayers[layerIndex].numInBufs; i0++)
  {
    for (i1 = layerIndex - 1; i1 >= 0; i1--)
    {
      for (i2 = 0; i2 < pOrgTIDLNetStructure->TIDLPCLayers[i1].numOutBufs; i2++)
      {
        if (pOrgTIDLNetStructure->TIDLPCLayers[i1].outConsumerLinked[i2] < pOrgTIDLNetStructure->TIDLPCLayers[i1].outConsumerCnt[i2])
        {
          if (strcmp((const char *)pOrgTIDLNetStructure->TIDLPCLayers[layerIndex].inDataNames[i0], (const char *)pOrgTIDLNetStructure->TIDLPCLayers[i1].outDataNames[i2]) == 0)
          {
            pOrgTIDLNetStructure->TIDLPCLayers[layerIndex].inData[i0].dataId = pOrgTIDLNetStructure->TIDLPCLayers[i1].outData[i2].dataId;
            pOrgTIDLNetStructure->TIDLPCLayers[i1].outConsumerLinked[i2]++;
          }
        }
      }
    }
  }
  return 0;
}


int32_t tidl_linkOutputTensors(sTIDL_OrgNetwork_t  *pOrgTIDLNetStructure, int32_t layerIndex)
{
  int32_t i0, i1, i2;
  for (i0 = 0; i0 < pOrgTIDLNetStructure->TIDLPCLayers[layerIndex].numOutBufs; i0++)
  {
    for (i1 = layerIndex - 1; i1 >= 0; i1--)
    {
      for (i2 = 0; i2 < pOrgTIDLNetStructure->TIDLPCLayers[i1].numInBufs; i2++)
      {
        if (pOrgTIDLNetStructure->TIDLPCLayers[layerIndex].outConsumerLinked[i0] < pOrgTIDLNetStructure->TIDLPCLayers[layerIndex].outConsumerCnt[i0])
        {
          if (strcmp((const char *)pOrgTIDLNetStructure->TIDLPCLayers[layerIndex].outDataNames[i0], (const char *)pOrgTIDLNetStructure->TIDLPCLayers[i1].inDataNames[i2]) == 0)
          {
            pOrgTIDLNetStructure->TIDLPCLayers[i1].inData[i2].dataId = pOrgTIDLNetStructure->TIDLPCLayers[layerIndex].outData[i0].dataId;
            pOrgTIDLNetStructure->TIDLPCLayers[layerIndex].outConsumerLinked[i0]++;
          }
        }
      }
    }
  }
  return 0;
}
int32_t tidl_getConsumerCount(sTIDL_OrgNetwork_t  *pOrgTIDLNetStructure, int32_t layerIndex, const char * name)
{
  int32_t i0, i1, i2;
  int32_t outConsumerLinked = 0;
  for (i1 = layerIndex - 1; i1 >= 0; i1--)
  {
    for (i2 = 0; i2 < pOrgTIDLNetStructure->TIDLPCLayers[i1].numInBufs; i2++)
    {
      if (strcmp((const char *)pOrgTIDLNetStructure->TIDLPCLayers[i1].inDataNames[i2], (const char *)name) == 0)
      {
        outConsumerLinked++;
      }
    }
  }
  return outConsumerLinked;
}

int32_t tidl_isAllInsAvailable(sTIDL_LayerPC_t  *orgLayer, sTIDL_OrgNetwork_t  *ptempTIDLNetStructure, int32_t layerIndex)
{
  int32_t i0, i1, i2;
  int32_t status = 0;
  int32_t availableIns = 0;
  for (i0 = 0; i0 < orgLayer->numInBufs; i0++)
  {
    for (i1 = 0; i1 < layerIndex; i1++)
    {
      for (i2 = 0; i2 < ptempTIDLNetStructure->TIDLPCLayers[i1].numOutBufs; i2++)
      {
        if (strcmp((const char *)ptempTIDLNetStructure->TIDLPCLayers[i1].outDataNames[i2], (const char *)orgLayer->inDataNames[i0]) == 0)
        {
          //printf("Ins %s vs %s\n", (const char *)ptempTIDLNetStructure->TIDLPCLayers[i1].outDataNames[i2], (const char *)orgLayer->inDataNames[i0]);
          availableIns++;
        }
      }
    }
  }
  /* Is shall be orgLayer->numInBufs <= availableIns, temprary fix to get caffe import working
     TODO : need rever back after migatration caffe to new import framework */

  if ((orgLayer->numInBufs <= availableIns) || (orgLayer->numInBufs == -1))
  {
    status = 1;
  }
  return(status);
}

int32_t tidl_sortLayersInProcOrder(sTIDL_OrgNetwork_t  *pOrgTIDLNetStructure, sTIDL_OrgNetwork_t  *ptempTIDLNetStructure, int32_t layerIndex)
{
  int32_t i0, i1, i2;
  int32_t newNetIdx = 0;
  int32_t lastNetIdx = 0;
  int32_t *isAddedToList = (int32_t *)my_malloc(layerIndex*sizeof(int32_t));
  memset(isAddedToList, 0, sizeof(int32_t)*layerIndex);
  while (newNetIdx < layerIndex)
  {
    lastNetIdx = newNetIdx;
    for (i0 = 0; i0 < layerIndex; i0++)
    {
      if (isAddedToList[i0] == 0)
      {
        if (tidl_isAllInsAvailable(&pOrgTIDLNetStructure->TIDLPCLayers[i0], ptempTIDLNetStructure, newNetIdx))
        {
          ptempTIDLNetStructure->TIDLPCLayers[newNetIdx] = pOrgTIDLNetStructure->TIDLPCLayers[i0];
          newNetIdx++;
          isAddedToList[i0] = 1;
        }
        else
        {
          ;
        }
      }
    }
    if((newNetIdx == lastNetIdx) && (newNetIdx < layerIndex))
    {
      printf("Error : Input for Layer %s:%s Not found in the input network \n",
             ptempTIDLNetStructure->TIDLPCLayers[newNetIdx - 1].name,
             ptempTIDLNetStructure->TIDLPCLayers[newNetIdx - 1].outDataNames[0]);
      break;
    }
  }
  my_free(isAddedToList);
  ptempTIDLNetStructure->numLayers = newNetIdx;
  memset((void *)pOrgTIDLNetStructure, 0, sizeof(orgTIDLNetStructure));
  memcpy((void *)pOrgTIDLNetStructure, (void *)ptempTIDLNetStructure, sizeof(orgTIDLNetStructure));
  return 0;
}

void tidl_replaceInTensorName(sTIDL_OrgNetwork_t  *pOrgTIDLNetStructure, int32_t layerIndex, char * orgTensorName, char * newTensorName)
{
  int32_t i0, i1, i2;
  int32_t newNetIdx = 0;

  for (i0 = 0; i0 < layerIndex; i0++)
  {
    for (i1 = 0; i1 < pOrgTIDLNetStructure->TIDLPCLayers[i0].numInBufs; i1++)
    {
      if(strcmp((const char *)pOrgTIDLNetStructure->TIDLPCLayers[i0].inDataNames[i1],(const char *)orgTensorName) == 0)
      {
        strcpy((char *)pOrgTIDLNetStructure->TIDLPCLayers[i0].inDataNames[i1], (const char *)newTensorName);
      }
    }
  }
}

int32_t tidl_removeMergedLayersFromNet(sTIDL_OrgNetwork_t  *pOrgTIDLNetStructure, sTIDL_OrgNetwork_t  *ptempTIDLNetStructure, int32_t layerIndex)
{
  int32_t i0, i1, i2;
  int32_t newNetIdx = 0;

  for (i0 = 0; i0 < layerIndex; i0++)
  {
    if ((pOrgTIDLNetStructure->TIDLPCLayers[i0].numInBufs != -1) ||
      (pOrgTIDLNetStructure->TIDLPCLayers[i0].numOutBufs != -1))
    {
      ptempTIDLNetStructure->TIDLPCLayers[newNetIdx] = pOrgTIDLNetStructure->TIDLPCLayers[i0];
      newNetIdx++;
    }
  }
  ptempTIDLNetStructure->numLayers = newNetIdx;
  memset((void *)pOrgTIDLNetStructure, 0, sizeof(orgTIDLNetStructure));
  memcpy((void *)pOrgTIDLNetStructure, (void *)ptempTIDLNetStructure, sizeof(orgTIDLNetStructure));
  return 0;
}
int32_t tidl_upateAInDataId(sTIDL_OrgNetwork_t  *pOrgTIDLNetStructure, int32_t layerIndex, int32_t oldId, int32_t currId)
{
  int32_t i0, i1, i2, i3, i4;
  for (i3 = 0; i3 < layerIndex; i3++)
  {
    for (i4 = 0; i4 < pOrgTIDLNetStructure->TIDLPCLayers[i3].numInBufs; i4++)
    {
      if (pOrgTIDLNetStructure->TIDLPCLayers[i3].inData[i4].dataId == oldId)
      {
        pOrgTIDLNetStructure->TIDLPCLayers[i3].inData[i4].dataId = currId;
      }
    }
  }
  return 0;
}

int32_t tidl_sortDataIds(sTIDL_OrgNetwork_t  *pOrgTIDLNetStructure, int32_t layerIndex)
{
  int32_t i0, i1, i2, i3, i4;
  int32_t maxDataId = 0;
  int32_t currId = 0;
  int32_t oldId = 0;
  for (i1 = 0; i1 < layerIndex; i1++)
  {
    for (i2 = 0; i2 < pOrgTIDLNetStructure->TIDLPCLayers[i1].numOutBufs; i2++)
    {
      maxDataId = pOrgTIDLNetStructure->TIDLPCLayers[i1].outData[i2].dataId >= maxDataId ? pOrgTIDLNetStructure->TIDLPCLayers[i1].outData[i2].dataId : maxDataId;
    }
  }
  maxDataId = maxDataId + 1;
  for (i1 = 0; i1 < layerIndex; i1++)
  {
    for (i2 = 0; i2 < pOrgTIDLNetStructure->TIDLPCLayers[i1].numOutBufs; i2++)
    {
      pOrgTIDLNetStructure->TIDLPCLayers[i1].outData[i2].dataId += maxDataId;
    }
    for (i2 = 0; i2 < pOrgTIDLNetStructure->TIDLPCLayers[i1].numInBufs; i2++)
    {
      pOrgTIDLNetStructure->TIDLPCLayers[i1].inData[i2].dataId += maxDataId;
    }
  }

  for (i1 = 0; i1 < layerIndex; i1++)
  {
    for (i2 = 0; i2 < pOrgTIDLNetStructure->TIDLPCLayers[i1].numOutBufs; i2++)
    {
      oldId = pOrgTIDLNetStructure->TIDLPCLayers[i1].outData[i2].dataId;
      //currId = i1;
      pOrgTIDLNetStructure->TIDLPCLayers[i1].outData[i2].dataId = currId;
      tidl_upateAInDataId(pOrgTIDLNetStructure, layerIndex, oldId, currId);
      currId++;
    }
  }
  return 0;
}

int32_t tidl_makeDataIdLayerIdSame(sTIDL_OrgNetwork_t  *pOrgTIDLNetStructure, int32_t layerIndex)
{
  int32_t i0, i1, i2, i3, i4;
  int32_t maxDataId = 0;
  int32_t currId = 0;
  int32_t oldId = 0;
  for (i1 = 0; i1 < layerIndex; i1++)
  {
    for (i2 = 0; i2 < pOrgTIDLNetStructure->TIDLPCLayers[i1].numOutBufs; i2++)
    {
      maxDataId = pOrgTIDLNetStructure->TIDLPCLayers[i1].outData[i2].dataId >= maxDataId ? pOrgTIDLNetStructure->TIDLPCLayers[i1].outData[i2].dataId : maxDataId;
    }
  }
  maxDataId = maxDataId + 1;
  for (i1 = 0; i1 < layerIndex; i1++)
  {
    for (i2 = 0; i2 < pOrgTIDLNetStructure->TIDLPCLayers[i1].numOutBufs; i2++)
    {
      pOrgTIDLNetStructure->TIDLPCLayers[i1].outData[i2].dataId += maxDataId;
    }
    for (i2 = 0; i2 < pOrgTIDLNetStructure->TIDLPCLayers[i1].numInBufs; i2++)
    {
      pOrgTIDLNetStructure->TIDLPCLayers[i1].inData[i2].dataId += maxDataId;
    }
  }

  for (i1 = 0; i1 < layerIndex; i1++)
  {
    for (i2 = 0; i2 < pOrgTIDLNetStructure->TIDLPCLayers[i1].numOutBufs; i2++)
    {
      oldId = pOrgTIDLNetStructure->TIDLPCLayers[i1].outData[i2].dataId;
      currId = i1;
      pOrgTIDLNetStructure->TIDLPCLayers[i1].outData[i2].dataId = currId;
      tidl_upateAInDataId(pOrgTIDLNetStructure, layerIndex, oldId, currId);
    }
  }
  return 0;
}

int32_t tidl_updateOutDataShape(sTIDL_OrgNetwork_t  &pOrgTIDLNetStructure, int32_t startIdx, int32_t layerIndex, sTIDL_tfOutRehapeMap_t * sTIDL_tfOutRehapeTable)
{
  int32_t i1, i2, i3, i4;
  int32_t status = 0;
  for (i1 = startIdx; i1 < layerIndex; i1++)
  {
    status = sTIDL_tfOutRehapeTable[pOrgTIDLNetStructure.TIDLPCLayers[i1].layerType].tidl_tfOutReshape(&pOrgTIDLNetStructure, i1);
    if (status != -1)
    {
      for (i2 = 0; i2 < pOrgTIDLNetStructure.TIDLPCLayers[i1].numOutBufs; i2++)
      {
        for (i3 = 0; i3 < layerIndex; i3++)
        {
          for (i4 = 0; i4 < pOrgTIDLNetStructure.TIDLPCLayers[i3].numInBufs; i4++)
          {
            if (pOrgTIDLNetStructure.TIDLPCLayers[i3].inData[i4].dataId == pOrgTIDLNetStructure.TIDLPCLayers[i1].outData[i2].dataId)
            {
              pOrgTIDLNetStructure.TIDLPCLayers[i3].inData[i4] = pOrgTIDLNetStructure.TIDLPCLayers[i1].outData[i2];
            }
          }

        }
      }
    }
  }
  return status;
}
int32_t tidl_getInLayer(sTIDL_OrgNetwork_t  &pOrgTIDLNetStructure, int32_t layerIndex, int32_t dataId)
{
  int32_t i1, i2;
  for (i1 = 0; i1 < layerIndex; i1++)
  {
    for (i2 = 0; i2 < pOrgTIDLNetStructure.TIDLPCLayers[i1].numOutBufs; i2++)
    {
      if (pOrgTIDLNetStructure.TIDLPCLayers[i1].outData[i2].dataId == dataId)
      {
        return (i1);
      }
    }
  }
  return (-1);
}
int32_t tidl_getOutLayer(sTIDL_OrgNetwork_t  &pOrgTIDLNetStructure, int32_t layerIndex, int32_t dataId)
{
  int32_t i1, i2;
  for (i1 = 0; i1 < layerIndex; i1++)
  {
    for (i2 = 0; i2 < pOrgTIDLNetStructure.TIDLPCLayers[i1].numInBufs; i2++)
    {
      if (pOrgTIDLNetStructure.TIDLPCLayers[i1].inData[i2].dataId == dataId)
      {
        return (i1);
      }
    }
  }
  return (-1);
}
int32_t tidl_mergeFalttenLayer(sTIDL_OrgNetwork_t  &pOrgTIDLNetStructure, int32_t layerIndex)
{
  int32_t i1, i2, i3, i4;
  int32_t status = 0;
  for (i1 = 0; i1 < layerIndex; i1++)
  {
    if (pOrgTIDLNetStructure.TIDLPCLayers[i1].layerType == TIDL_FlattenLayer)
    {
      int32_t  idx = tidl_getInLayer(pOrgTIDLNetStructure, layerIndex, pOrgTIDLNetStructure.TIDLPCLayers[i1].inData[0].dataId);
      if (idx == -1)
      {
        return -1;
      }
      sTIDL_LayerPC_t &TIDLPCLayers = pOrgTIDLNetStructure.TIDLPCLayers[idx];
      if ((TIDLPCLayers.layerType == TIDL_InnerProductLayer) &&
        (TIDLPCLayers.outConsumerCnt[0] == 1))
      {
        TIDLPCLayers.numMacs += pOrgTIDLNetStructure.TIDLPCLayers[i1].numMacs;
        TIDLPCLayers.outData[0] = pOrgTIDLNetStructure.TIDLPCLayers[i1].outData[0];
        strcpy((char *)TIDLPCLayers.outDataNames[0], (char *)pOrgTIDLNetStructure.TIDLPCLayers[i1].outDataNames[0]);
        TIDLPCLayers.outConsumerCnt[0] = pOrgTIDLNetStructure.TIDLPCLayers[i1].outConsumerCnt[0];
        pOrgTIDLNetStructure.TIDLPCLayers[i1].numInBufs = -1;
        pOrgTIDLNetStructure.TIDLPCLayers[i1].numOutBufs = -1;
      }
    }
  }

  return 0;
}


int32_t tidl_mergeBiasLayer(sTIDL_OrgNetwork_t  &pOrgTIDLNetStructure, int32_t layerIndex)
{
  int32_t i1, i2, i3, i4;
  int32_t status = 0;
  for (i1 = 0; i1 < layerIndex; i1++)
  {
    if (pOrgTIDLNetStructure.TIDLPCLayers[i1].layerType == TIDL_BiasLayer)
    {
      int32_t  idx = tidl_getInLayer(pOrgTIDLNetStructure, layerIndex, pOrgTIDLNetStructure.TIDLPCLayers[i1].inData[0].dataId);
      if (idx == -1)
      {
        return -1;
      }
      sTIDL_LayerPC_t &TIDLPCLayers = pOrgTIDLNetStructure.TIDLPCLayers[idx];
      if ((TIDLPCLayers.layerType == TIDL_ConvolutionLayer) &&
        (TIDLPCLayers.outConsumerCnt[0] == 1))
      {
        TIDLPCLayers.numMacs += pOrgTIDLNetStructure.TIDLPCLayers[i1].numMacs;
        TIDLPCLayers.outData[0] = pOrgTIDLNetStructure.TIDLPCLayers[i1].outData[0];
        strcpy((char *)TIDLPCLayers.outDataNames[0], (char *)pOrgTIDLNetStructure.TIDLPCLayers[i1].outDataNames[0]);
        TIDLPCLayers.outConsumerCnt[0] = pOrgTIDLNetStructure.TIDLPCLayers[i1].outConsumerCnt[0];
        pOrgTIDLNetStructure.TIDLPCLayers[i1].numInBufs = -1;
        pOrgTIDLNetStructure.TIDLPCLayers[i1].numOutBufs = -1;
        if (TIDLPCLayers.layerParams.convParams.enableBias == 0)
        {
          TIDLPCLayers.layerParams.convParams.enableBias = 1;
          TIDLPCLayers.bias = pOrgTIDLNetStructure.TIDLPCLayers[i1].bias;
        }
        else
        {
          float * src = (float *)pOrgTIDLNetStructure.TIDLPCLayers[i1].bias.ptr;
          float * dst = (float *)TIDLPCLayers.bias.ptr;
          for (i2 = 0; i2 < TIDLPCLayers.bias.bufSize; i2++)
          {
            dst[i2] += src[i2];
          }
        }
      }
      else if((TIDLPCLayers.layerType == TIDL_InnerProductLayer) &&
        (TIDLPCLayers.outConsumerCnt[0] == 1))
      {
        TIDLPCLayers.numMacs += pOrgTIDLNetStructure.TIDLPCLayers[i1].numMacs;
        TIDLPCLayers.outData[0] = pOrgTIDLNetStructure.TIDLPCLayers[i1].outData[0];
        strcpy((char *)TIDLPCLayers.outDataNames[0], (char *)pOrgTIDLNetStructure.TIDLPCLayers[i1].outDataNames[0]);
        TIDLPCLayers.outConsumerCnt[0] = pOrgTIDLNetStructure.TIDLPCLayers[i1].outConsumerCnt[0];
        pOrgTIDLNetStructure.TIDLPCLayers[i1].numInBufs = -1;
        pOrgTIDLNetStructure.TIDLPCLayers[i1].numOutBufs = -1;
        float * src = (float *)pOrgTIDLNetStructure.TIDLPCLayers[i1].bias.ptr;
        float * dst = (float *)TIDLPCLayers.bias.ptr;
        for (i2 = 0; i2 < TIDLPCLayers.bias.bufSize; i2++)
        {
          dst[i2] += src[i2];
        }
      }
    }
  }

  return 0;
}

int32_t tidl_mergePadLayer(sTIDL_OrgNetwork_t  &pOrgTIDLNetStructure, int32_t layerIndex)
{
  int32_t i1, i2, i3, i4;
  int32_t status = 0;
  int32_t padW, padH;
  int32_t padL = 0, padT = 0;
  for (i1 = 0; i1 < layerIndex; i1++)
  {
    if (pOrgTIDLNetStructure.TIDLPCLayers[i1].layerType == TIDL_PadLayer)
    {
      int32_t  inIdx = tidl_getInLayer(pOrgTIDLNetStructure, layerIndex, pOrgTIDLNetStructure.TIDLPCLayers[i1].inData[0].dataId);
      if (inIdx == -1)
      {
        return -1;
      }
      int32_t  outIdx = tidl_getOutLayer(pOrgTIDLNetStructure, layerIndex, pOrgTIDLNetStructure.TIDLPCLayers[i1].outData[0].dataId);
      if (outIdx == -1)
      {
        return -1;
      }

      sTIDL_LayerPC_t &TIDLPCLayersIn = pOrgTIDLNetStructure.TIDLPCLayers[inIdx];
      sTIDL_LayerPC_t &TIDLPCLayersOut = pOrgTIDLNetStructure.TIDLPCLayers[outIdx];

      if (gParams.modelType == 1)
      {
        if (gloab_data_format == 0)
        {
          padL = pOrgTIDLNetStructure.TIDLPCLayers[i1].layerPCParams.padParams.padTensor[2 * 2 + 0];
          padT = pOrgTIDLNetStructure.TIDLPCLayers[i1].layerPCParams.padParams.padTensor[1 * 2 + 0];
          padW = pOrgTIDLNetStructure.TIDLPCLayers[i1].layerPCParams.padParams.padTensor[2 * 2 + 1];
          padH = pOrgTIDLNetStructure.TIDLPCLayers[i1].layerPCParams.padParams.padTensor[1 * 2 + 1];
        }
        else
        {
          padL = pOrgTIDLNetStructure.TIDLPCLayers[i1].layerPCParams.padParams.padTensor[3 * 2 + 0];
          padT = pOrgTIDLNetStructure.TIDLPCLayers[i1].layerPCParams.padParams.padTensor[2 * 2 + 0];
          padW = pOrgTIDLNetStructure.TIDLPCLayers[i1].layerPCParams.padParams.padTensor[3 * 2 + 1];
          padH = pOrgTIDLNetStructure.TIDLPCLayers[i1].layerPCParams.padParams.padTensor[2 * 2 + 1];
        }
      }
      else if (gParams.modelType == 2)
      {
        padW = pOrgTIDLNetStructure.TIDLPCLayers[i1].layerPCParams.padParams.padTensor[3];
        padH = pOrgTIDLNetStructure.TIDLPCLayers[i1].layerPCParams.padParams.padTensor[2];
      }
      else if (gParams.modelType == 3)
      {
        padL = pOrgTIDLNetStructure.TIDLPCLayers[i1].layerPCParams.padParams.padTensor[4];
        padT = pOrgTIDLNetStructure.TIDLPCLayers[i1].layerPCParams.padParams.padTensor[2];
        padW = pOrgTIDLNetStructure.TIDLPCLayers[i1].layerPCParams.padParams.padTensor[5];
        padH = pOrgTIDLNetStructure.TIDLPCLayers[i1].layerPCParams.padParams.padTensor[3];
      }
      else
      {
        printf("ERROR: PAD layer is NOT supported for modelType %s\n", gParams.modelType);
        return -1;
      }

      padW = padW < padL ? padL : padW;
      padH = padH < padT ? padT : padH;
      if ((TIDLPCLayersOut.layerType == TIDL_ConvolutionLayer) &&
        (pOrgTIDLNetStructure.TIDLPCLayers[i1].outConsumerCnt[0] == 1) &&
        /*(TIDLPCLayersIn.outConsumerCnt[0] == 1) &&*/
        ((TIDLPCLayersOut.layerParams.convParams.kernelW/2) == padW) &&
        ((TIDLPCLayersOut.layerParams.convParams.kernelH/2) == padH))
      {
        TIDLPCLayersOut.numMacs += pOrgTIDLNetStructure.TIDLPCLayers[i1].numMacs;

        TIDLPCLayersOut.layerParams.convParams.padW = padW;
        TIDLPCLayersOut.layerParams.convParams.padH = padH;
        if (((gParams.modelType == 1 || gParams.modelType == 3)) && (padL == padW) && (padT == padH))
        {
          TIDLPCLayersOut.strideOffsetMethod = TIDL_StrideOffsetTopLeft;
        }

          //TIDLPCLayersIn.outData[0]        = pOrgTIDLNetStructure.TIDLPCLayers[i1].outData[0];
        TIDLPCLayersOut.inData[0] = pOrgTIDLNetStructure.TIDLPCLayers[i1].inData[0];
        strcpy((char *)TIDLPCLayersOut.inDataNames[0], (char *)pOrgTIDLNetStructure.TIDLPCLayers[i1].inDataNames[0]);
        pOrgTIDLNetStructure.TIDLPCLayers[i1].numInBufs = -1;
        pOrgTIDLNetStructure.TIDLPCLayers[i1].numOutBufs = -1;
      }
      else if ((TIDLPCLayersOut.layerType == TIDL_PoolingLayer) &&
        (TIDLPCLayersOut.layerParams.poolParams.poolingType == TIDL_AveragePooling) &&
        (pOrgTIDLNetStructure.TIDLPCLayers[i1].outConsumerCnt[0] == 1))
      {
        TIDLPCLayersIn.numMacs += pOrgTIDLNetStructure.TIDLPCLayers[i1].numMacs;

        TIDLPCLayersOut.layerParams.poolParams.padW += padW;
        TIDLPCLayersOut.layerParams.poolParams.padH += padH;
        {
          TIDLPCLayersOut.inData[0] = pOrgTIDLNetStructure.TIDLPCLayers[i1].inData[0];
          strcpy((char *)TIDLPCLayersOut.inDataNames[0], (char *)pOrgTIDLNetStructure.TIDLPCLayers[i1].inDataNames[0]);
          pOrgTIDLNetStructure.TIDLPCLayers[i1].numInBufs = -1;
          pOrgTIDLNetStructure.TIDLPCLayers[i1].numOutBufs = -1;
        }
        if ( gParams.modelType == 2 )
        {
          printf("Warning : Merging Pad layer with Average Pooling layer. This is expected to work but  this flow is functionally not validated with ONNX model format. \n");
        }
      }
      else
      {
        printf("ERROR: Currently PAD layer is supported only when the following layer is convolution with stride > 1\n");
        return -1;
      }
    }
  }

  return 0;
}

int32_t tidl_mergeBNLayer(sTIDL_OrgNetwork_t  &pOrgTIDLNetStructure, int32_t layerIndex)
{
  int32_t i1, i2, i3, i4;
  int32_t status = 0;
  for (i1 = 0; i1 < layerIndex; i1++)
  {
    if (pOrgTIDLNetStructure.TIDLPCLayers[i1].layerType == TIDL_BatchNormLayer)
    {
      int32_t  idx = tidl_getInLayer(pOrgTIDLNetStructure, layerIndex, pOrgTIDLNetStructure.TIDLPCLayers[i1].inData[0].dataId);
      if (idx == -1)
      {
        return -1;
      }
      sTIDL_LayerPC_t &TIDLPCLayers = pOrgTIDLNetStructure.TIDLPCLayers[idx];
      idx = tidl_getOutLayer(pOrgTIDLNetStructure, layerIndex, pOrgTIDLNetStructure.TIDLPCLayers[i1].outData[0].dataId);
      if (idx == -1)
      {
        return -1;
      }
      sTIDL_LayerPC_t &outTIDLPCLayers = pOrgTIDLNetStructure.TIDLPCLayers[idx];

       if ((outTIDLPCLayers.layerType == TIDL_BatchNormLayer) &&
        (pOrgTIDLNetStructure.TIDLPCLayers[i1].outConsumerCnt[0] == 1) &&
        (pOrgTIDLNetStructure.TIDLPCLayers[i1].actParams.actType == TIDL_NoAct))
      {
        outTIDLPCLayers.numMacs += pOrgTIDLNetStructure.TIDLPCLayers[i1].numMacs;
        outTIDLPCLayers.inData[0] = pOrgTIDLNetStructure.TIDLPCLayers[i1].inData[0];
        strcpy((char *)outTIDLPCLayers.inDataNames[0], (char *)pOrgTIDLNetStructure.TIDLPCLayers[i1].inDataNames[0]);
        pOrgTIDLNetStructure.TIDLPCLayers[i1].numInBufs = -1;
        pOrgTIDLNetStructure.TIDLPCLayers[i1].numOutBufs = -1;
        /* Merge BN scale and Bias */
        float * weights = (float *)outTIDLPCLayers.weights.ptr;
        float * bias = (float *)outTIDLPCLayers.bias.ptr;

        float * weights0 = (float *)pOrgTIDLNetStructure.TIDLPCLayers[i1].weights.ptr;
        float * bias0 = (float *)pOrgTIDLNetStructure.TIDLPCLayers[i1].bias.ptr;
        /*
           T1 = T0*W0 + B0
           T2 = T1*W + B
           T2 = (T0*W0 + B0)*W + B
           T2 = (T0*W0*W + B0*W + B)
        */
        for (i2 = 0; i2 < outTIDLPCLayers.bias.bufSize; i2++)
        {
          bias[i2] = bias0[i2] * weights[i2] + bias[i2];
          weights[i2] *= weights0[i2];
        }
        my_free(weights0);
        my_free(bias0);
      }
      else if (((TIDLPCLayers.layerType == TIDL_ConvolutionLayer) || (TIDLPCLayers.layerType == TIDL_Deconv2DLayer)) &&
        (TIDLPCLayers.outConsumerCnt[0] == 1) && (TIDLPCLayers.actParams.actType == TIDL_NoAct))
      {
        TIDLPCLayers.numMacs += pOrgTIDLNetStructure.TIDLPCLayers[i1].numMacs;
        TIDLPCLayers.actParams.actType = pOrgTIDLNetStructure.TIDLPCLayers[i1].actParams.actType;
        TIDLPCLayers.outData[0] = pOrgTIDLNetStructure.TIDLPCLayers[i1].outData[0];
        strcpy((char *)TIDLPCLayers.outDataNames[0], (char *)pOrgTIDLNetStructure.TIDLPCLayers[i1].outDataNames[0]);
        TIDLPCLayers.outConsumerCnt[0] = pOrgTIDLNetStructure.TIDLPCLayers[i1].outConsumerCnt[0];
        pOrgTIDLNetStructure.TIDLPCLayers[i1].numInBufs = -1;
        pOrgTIDLNetStructure.TIDLPCLayers[i1].numOutBufs = -1;

        if (TIDLPCLayers.layerParams.convParams.enableBias == 0)
        {
          TIDLPCLayers.layerParams.convParams.enableBias = 1;
          TIDLPCLayers.bias.ptr = my_malloc(sizeof(float)*TIDLPCLayers.outData[0].dimValues[1]);
          TIDLPCLayers.bias.bufSize = TIDLPCLayers.outData[0].dimValues[1];
          float * dst = (float *)TIDLPCLayers.bias.ptr;
          for (i2 = 0; i2 < TIDLPCLayers.bias.bufSize; i2++)
          {
            dst[i2] = 0;
          }
        }
        /* Merge BN scale and Bias to Conv2d */
        float * weights = (float *)TIDLPCLayers.weights.ptr;
        float * bias = (float *)TIDLPCLayers.bias.ptr;

        float * scale = (float *)pOrgTIDLNetStructure.TIDLPCLayers[i1].weights.ptr;
        float * bias2 = (float *)pOrgTIDLNetStructure.TIDLPCLayers[i1].bias.ptr;
        int32_t weightsSize = (TIDLPCLayers.weights.bufSize / TIDLPCLayers.bias.bufSize);
        for (i2 = 0; i2 < TIDLPCLayers.bias.bufSize; i2++)
        {
          for (i3 = 0; i3 < weightsSize; i3++)
          {
            weights[i2*weightsSize + i3] *= scale[i2];
          }
          bias[i2] = bias[i2] * scale[i2] + bias2[i2];
        }
        my_free(scale);
        my_free(bias2);
      }
    }
  }

  return 0;
}


int32_t tidl_mergePreBNLayer(sTIDL_OrgNetwork_t  &pOrgTIDLNetStructure, int32_t layerIndex)
{
  int32_t i1, i2, i3, i4;
  int32_t status = 0;
  for (i1 = 0; i1 < layerIndex; i1++)
  {
    if ((pOrgTIDLNetStructure.TIDLPCLayers[i1].layerType == TIDL_BatchNormLayer) &&
       (pOrgTIDLNetStructure.TIDLPCLayers[i1].outConsumerCnt[0] == 1) &&
       (pOrgTIDLNetStructure.TIDLPCLayers[i1].actParams.actType == TIDL_NoAct))
    {
      int32_t  idx = tidl_getOutLayer(pOrgTIDLNetStructure, layerIndex, pOrgTIDLNetStructure.TIDLPCLayers[i1].outData[0].dataId);
      if (idx == -1)
      {
        return -1;
      }
      sTIDL_LayerPC_t &TIDLPCLayers = pOrgTIDLNetStructure.TIDLPCLayers[idx];
      if (TIDLPCLayers.layerType == TIDL_ConvolutionLayer)
      {
        TIDLPCLayers.inData[0] = pOrgTIDLNetStructure.TIDLPCLayers[i1].inData[0];
        strcpy((char *)TIDLPCLayers.inDataNames[0], (char *)pOrgTIDLNetStructure.TIDLPCLayers[i1].inDataNames[0]);
        pOrgTIDLNetStructure.TIDLPCLayers[i1].numInBufs = -1;
        pOrgTIDLNetStructure.TIDLPCLayers[i1].numOutBufs = -1;

        if (TIDLPCLayers.layerParams.convParams.enableBias == 0)
        {
          TIDLPCLayers.layerParams.convParams.enableBias = 1;
          TIDLPCLayers.bias.ptr = my_malloc(sizeof(float)*TIDLPCLayers.outData[0].dimValues[1]);
          TIDLPCLayers.bias.bufSize = TIDLPCLayers.outData[0].dimValues[1];
          float * dst = (float *)TIDLPCLayers.bias.ptr;
          for (i2 = 0; i2 < TIDLPCLayers.bias.bufSize; i2++)
          {
            dst[i2] = 0;
          }
        }
        /* Merge BN scale and Bias to Conv2d */
        float * weights = (float *)TIDLPCLayers.weights.ptr;
        float * bias = (float *)TIDLPCLayers.bias.ptr;

        float * scale = (float *)pOrgTIDLNetStructure.TIDLPCLayers[i1].weights.ptr;
        float * bias2 = (float *)pOrgTIDLNetStructure.TIDLPCLayers[i1].bias.ptr;

        int32_t numInCh  = pOrgTIDLNetStructure.TIDLPCLayers[i1].weights.bufSize;
        int32_t numOutCh = TIDLPCLayers.bias.bufSize;
        int32_t weightsSize = ((TIDLPCLayers.weights.bufSize / TIDLPCLayers.bias.bufSize))/ numInCh;


        for (i2 = 0; i2 < numOutCh; i2++)
        {
          float bnBias = 0;
          for (i3 = 0; i3 < numInCh; i3++)
          {
            for (i4 = 0; i4 < weightsSize; i4++)
            {
              bnBias += bias2[i3] * weights[i2*numInCh*weightsSize + i3*weightsSize + i4];
              weights[i2*numInCh*weightsSize + i3*weightsSize + i4] *= scale[i3];
            }
          }
          bias[i2] = bnBias + bias[i2];
        }
        my_free(scale);
        my_free(bias2);
      }
    }
  }

  return 0;
}

int32_t tidl_mergeFlattenLayer(sTIDL_OrgNetwork_t  &pOrgTIDLNetStructure, int32_t layerIndex)
{
  int32_t i1, i2, i3, i4;
  int32_t status = 0;
  int32_t merged;
  for (i1 = 0; i1 < layerIndex; i1++)
  {
    if (pOrgTIDLNetStructure.TIDLPCLayers[i1].layerType == TIDL_FlattenLayer)
    {
      merged = 1;
      for (i2 = 0; i2 < 3; i2++)
      {
        if ((pOrgTIDLNetStructure.TIDLPCLayers[i1].inData[0].dimValues[i2] != 1) ||
          (pOrgTIDLNetStructure.TIDLPCLayers[i1].outData[0].dimValues[i2] != 1))
        {
          merged = 0;
          break;
        }
      }
      int32_t  inIdx = tidl_getInLayer(pOrgTIDLNetStructure, layerIndex, pOrgTIDLNetStructure.TIDLPCLayers[i1].inData[0].dataId);
      if (inIdx != -1)
      {
        sTIDL_LayerPC_t &TIDLPCLayersIn = pOrgTIDLNetStructure.TIDLPCLayers[inIdx];

        if ((TIDLPCLayersIn.layerType == TIDL_PoolingLayer) &&
            (TIDLPCLayersIn.layerParams.poolParams.poolingType == TIDL_AveragePooling) &&
            (TIDLPCLayersIn.outConsumerCnt[0] == 1) &&
            (TIDLPCLayersIn.layerParams.poolParams.kernelW == 0) &&
            (TIDLPCLayersIn.layerParams.poolParams.kernelH == 0))
        {
            merged = 1;
        }
      }

      if (merged == 1)
      {
        int32_t  idx = tidl_getInLayer(pOrgTIDLNetStructure, layerIndex, pOrgTIDLNetStructure.TIDLPCLayers[i1].inData[0].dataId);
        if (idx == -1)
        {
          return -1;
        }
        sTIDL_LayerPC_t &TIDLPCLayers = pOrgTIDLNetStructure.TIDLPCLayers[idx];
        TIDLPCLayers.numMacs += pOrgTIDLNetStructure.TIDLPCLayers[i1].numMacs;
        TIDLPCLayers.outData[0] = pOrgTIDLNetStructure.TIDLPCLayers[i1].outData[0];
        strcpy((char *)TIDLPCLayers.outDataNames[0], (char *)pOrgTIDLNetStructure.TIDLPCLayers[i1].outDataNames[0]);
        TIDLPCLayers.outConsumerCnt[0] = pOrgTIDLNetStructure.TIDLPCLayers[i1].outConsumerCnt[0];
        pOrgTIDLNetStructure.TIDLPCLayers[i1].numInBufs = -1;
        pOrgTIDLNetStructure.TIDLPCLayers[i1].numOutBufs = -1;
      }
    }
  }

  return 0;
}
int32_t tidl_mergeReluLayer(sTIDL_OrgNetwork_t  &pOrgTIDLNetStructure, int32_t layerIndex)
{
  int32_t i1, i2, i3, i4;
  int32_t status = 0;
  int32_t merged;
  for (i1 = 0; i1 < layerIndex; i1++)
  {
    if (pOrgTIDLNetStructure.TIDLPCLayers[i1].layerType == TIDL_ReLULayer)
    {
      merged = 0;
      int32_t  idx = tidl_getInLayer(pOrgTIDLNetStructure, layerIndex, pOrgTIDLNetStructure.TIDLPCLayers[i1].inData[0].dataId);
      if (idx == -1)
      {
        return -1;
      }
      sTIDL_LayerPC_t &TIDLPCLayers = pOrgTIDLNetStructure.TIDLPCLayers[idx];
      if (((TIDLPCLayers.layerType == TIDL_ConvolutionLayer) || (TIDLPCLayers.layerType == TIDL_Deconv2DLayer) ) &&
        (TIDLPCLayers.outConsumerCnt[0] == 1) && ((pOrgTIDLNetStructure.TIDLPCLayers[i1].actParams.actType == TIDL_RelU) ||
        (pOrgTIDLNetStructure.TIDLPCLayers[i1].actParams.actType == TIDL_RelU6) ))
      {
        merged = 1;
      }
      if ((TIDLPCLayers.layerType == TIDL_EltWiseLayer) &&
        (TIDLPCLayers.outConsumerCnt[0] == 1) )
      {
        merged = 1;
      }
      if ((TIDLPCLayers.layerType == TIDL_BatchNormLayer) &&
        (TIDLPCLayers.outConsumerCnt[0] == 1))
      {
        merged = 1;
      }
      if ((TIDLPCLayers.layerType == TIDL_InnerProductLayer) &&
        (TIDLPCLayers.outConsumerCnt[0] == 1))
      {
        merged = 1;
      }
      if (merged == 1)
      {
        TIDLPCLayers.actParams = pOrgTIDLNetStructure.TIDLPCLayers[i1].actParams;
        TIDLPCLayers.numMacs += pOrgTIDLNetStructure.TIDLPCLayers[i1].numMacs;
        TIDLPCLayers.outData[0] = pOrgTIDLNetStructure.TIDLPCLayers[i1].outData[0];
        strcpy((char *)TIDLPCLayers.outDataNames[0], (char *)pOrgTIDLNetStructure.TIDLPCLayers[i1].outDataNames[0]);
        TIDLPCLayers.actParams.actType == pOrgTIDLNetStructure.TIDLPCLayers[i1].actParams.actType;
        TIDLPCLayers.outConsumerCnt[0] = pOrgTIDLNetStructure.TIDLPCLayers[i1].outConsumerCnt[0];
        pOrgTIDLNetStructure.TIDLPCLayers[i1].numInBufs = -1;
        pOrgTIDLNetStructure.TIDLPCLayers[i1].numOutBufs = -1;
      }
      else
      {
        // TODO: Confirm?
        //pOrgTIDLNetStructure.TIDLPCLayers[i1].layerType = TIDL_BatchNormLayer;
        //TIDLPCLayers.actParams.actType = TIDL_RelU;
      }
    }
  }

  return 0;
}
int32_t tidl_mergeMinimumLayer(sTIDL_OrgNetwork_t  &pOrgTIDLNetStructure, int32_t layerIndex)
{
  int32_t i1, i2, i3, i4;
  int32_t status = 0;
  int32_t merged;
  for (i1 = 0; i1 < layerIndex; i1++)
  {
    if ((pOrgTIDLNetStructure.TIDLPCLayers[i1].layerType == TIDL_MinimumLayer) &&
        (pOrgTIDLNetStructure.TIDLPCLayers[i1].layerPCParams.minimumParams.min == 6))
    {
      merged = 0;
      int32_t  idx = tidl_getInLayer(pOrgTIDLNetStructure, layerIndex, pOrgTIDLNetStructure.TIDLPCLayers[i1].inData[0].dataId);
      if (idx == -1)
      {
        return -1;
      }
      sTIDL_LayerPC_t &TIDLPCLayers = pOrgTIDLNetStructure.TIDLPCLayers[idx];
      if ((TIDLPCLayers.actParams.actType == TIDL_RelU) && (TIDLPCLayers.outConsumerCnt[0] == 1))
      {
        merged = 1;
      }
      if (merged == 1)
      {
        TIDLPCLayers.actParams.actType = TIDL_RelU6;
        TIDLPCLayers.numMacs += pOrgTIDLNetStructure.TIDLPCLayers[i1].numMacs;
        TIDLPCLayers.outData[0] = pOrgTIDLNetStructure.TIDLPCLayers[i1].outData[0];
        strcpy((char *)TIDLPCLayers.outDataNames[0], (char *)pOrgTIDLNetStructure.TIDLPCLayers[i1].outDataNames[0]);
        TIDLPCLayers.outConsumerCnt[0] = pOrgTIDLNetStructure.TIDLPCLayers[i1].outConsumerCnt[0];
        pOrgTIDLNetStructure.TIDLPCLayers[i1].numInBufs = -1;
        pOrgTIDLNetStructure.TIDLPCLayers[i1].numOutBufs = -1;
      }
    }
  }

  return 0;
}


int32_t tidl_mergeClipLayer(sTIDL_OrgNetwork_t  &pOrgTIDLNetStructure, int32_t layerIndex)
{
  int32_t i1, i2, i3, i4;
  int32_t status = 0;
  int32_t merged;
  for (i1 = 0; i1 < layerIndex; i1++)
  {
    if (pOrgTIDLNetStructure.TIDLPCLayers[i1].layerType == TIDL_ClipLayer)
    {
      merged = 1;
      int32_t  idx = tidl_getInLayer(pOrgTIDLNetStructure, layerIndex, pOrgTIDLNetStructure.TIDLPCLayers[i1].inData[0].dataId);
      if (idx == -1)
      {
        return -1;
      }
      sTIDL_LayerPC_t &TIDLPCLayers = pOrgTIDLNetStructure.TIDLPCLayers[idx];
      if ((TIDLPCLayers.layerType == TIDL_ConvolutionLayer) &&
        (TIDLPCLayers.outConsumerCnt[0] == 1) && (pOrgTIDLNetStructure.TIDLPCLayers[i1].actParams.actType == TIDL_Clip))
      {
        merged = 1;
      }
      if ((TIDLPCLayers.layerType == TIDL_EltWiseLayer) &&
        (TIDLPCLayers.outConsumerCnt[0] == 1) && (pOrgTIDLNetStructure.TIDLPCLayers[i1].actParams.actType == TIDL_Clip))
      {
        merged = 1;
      }
      if ((TIDLPCLayers.layerType == TIDL_ConcatLayer) &&
        (TIDLPCLayers.outConsumerCnt[0] == 1) && (pOrgTIDLNetStructure.TIDLPCLayers[i1].actParams.actType == TIDL_Clip))
      {
        merged = 1;
      }
      if ((TIDLPCLayers.layerType == TIDL_BatchNormLayer) &&
        (TIDLPCLayers.outConsumerCnt[0] == 1))
      {
        merged = 1;
      }
      if ((TIDLPCLayers.layerType == TIDL_InnerProductLayer) &&
        (TIDLPCLayers.outConsumerCnt[0] == 1) && (pOrgTIDLNetStructure.TIDLPCLayers[i1].actParams.actType == TIDL_Clip))
      {
        merged = 1;
      }
      if (merged == 1)
      {
        TIDLPCLayers.actParams = pOrgTIDLNetStructure.TIDLPCLayers[i1].actParams;
        TIDLPCLayers.numMacs += pOrgTIDLNetStructure.TIDLPCLayers[i1].numMacs;
        TIDLPCLayers.outData[0] = pOrgTIDLNetStructure.TIDLPCLayers[i1].outData[0];
        strcpy((char *)TIDLPCLayers.outDataNames[0], (char *)pOrgTIDLNetStructure.TIDLPCLayers[i1].outDataNames[0]);
        TIDLPCLayers.outConsumerCnt[0] = pOrgTIDLNetStructure.TIDLPCLayers[i1].outConsumerCnt[0];
        pOrgTIDLNetStructure.TIDLPCLayers[i1].numInBufs = -1;
        pOrgTIDLNetStructure.TIDLPCLayers[i1].numOutBufs = -1;
      }
      else
      {
        //pOrgTIDLNetStructure.TIDLPCLayers[i1].layerType = TIDL_BatchNormLayer;
        //TIDLPCLayers.actParams.actType = TIDL_RelU;
      }
    }
  }

  return 0;
}


int32_t tidl_mergeDropoutLayer(sTIDL_OrgNetwork_t  &pOrgTIDLNetStructure, int32_t layerIndex)
{
  int32_t i1, i2, i3, i4;
  int32_t status = 0;
  for (i1 = 0; i1 < layerIndex; i1++)
  {
    if (pOrgTIDLNetStructure.TIDLPCLayers[i1].layerType == TIDL_DropOutLayer)
    {
      int32_t  idx = tidl_getInLayer(pOrgTIDLNetStructure, layerIndex, pOrgTIDLNetStructure.TIDLPCLayers[i1].inData[0].dataId);
      if (idx == -1)
      {
        return -1;
      }
      sTIDL_LayerPC_t &TIDLPCLayers = pOrgTIDLNetStructure.TIDLPCLayers[idx];
      if ((TIDLPCLayers.outConsumerCnt[0] == 1))
      {
        TIDLPCLayers.numMacs += pOrgTIDLNetStructure.TIDLPCLayers[i1].numMacs;
        TIDLPCLayers.outData[0] = pOrgTIDLNetStructure.TIDLPCLayers[i1].outData[0];
        strcpy((char *)TIDLPCLayers.outDataNames[0], (char *)pOrgTIDLNetStructure.TIDLPCLayers[i1].outDataNames[0]);
        TIDLPCLayers.outConsumerCnt[0] = pOrgTIDLNetStructure.TIDLPCLayers[i1].outConsumerCnt[0];
        pOrgTIDLNetStructure.TIDLPCLayers[i1].numInBufs = -1;
        pOrgTIDLNetStructure.TIDLPCLayers[i1].numOutBufs = -1;
      }
    }
  }

  return 0;
}


int32_t tidl_merge1x1MaxPoolingLayer(sTIDL_OrgNetwork_t  &pOrgTIDLNetStructure, int32_t layerIndex)
{
  int32_t i1, i2, i3, i4;
  int32_t status = 0;
  for (i1 = 0; i1 < layerIndex; i1++)
  {
    if (pOrgTIDLNetStructure.TIDLPCLayers[i1].layerType == TIDL_PoolingLayer &&
        pOrgTIDLNetStructure.TIDLPCLayers[i1].layerParams.poolParams.poolingType == TIDL_MaxPooling &&
        pOrgTIDLNetStructure.TIDLPCLayers[i1].layerParams.poolParams.kernelH == 1 &&
        pOrgTIDLNetStructure.TIDLPCLayers[i1].layerParams.poolParams.kernelW == 1 &&
        pOrgTIDLNetStructure.TIDLPCLayers[i1].layerParams.poolParams.strideH == 1 &&
        pOrgTIDLNetStructure.TIDLPCLayers[i1].layerParams.poolParams.strideW == 1)
    {
      int32_t  idx = tidl_getInLayer(pOrgTIDLNetStructure, layerIndex, pOrgTIDLNetStructure.TIDLPCLayers[i1].inData[0].dataId);
      if (idx == -1)
      {
        return -1;
      }
      sTIDL_LayerPC_t &TIDLPCLayers = pOrgTIDLNetStructure.TIDLPCLayers[idx];
      if ((TIDLPCLayers.outConsumerCnt[0] == 1))
      {
        TIDLPCLayers.numMacs += pOrgTIDLNetStructure.TIDLPCLayers[i1].numMacs;
        TIDLPCLayers.outData[0] = pOrgTIDLNetStructure.TIDLPCLayers[i1].outData[0];
        strcpy((char *)TIDLPCLayers.outDataNames[0], (char *)pOrgTIDLNetStructure.TIDLPCLayers[i1].outDataNames[0]);
        TIDLPCLayers.outConsumerCnt[0] = pOrgTIDLNetStructure.TIDLPCLayers[i1].outConsumerCnt[0];
        pOrgTIDLNetStructure.TIDLPCLayers[i1].numInBufs = -1;
        pOrgTIDLNetStructure.TIDLPCLayers[i1].numOutBufs = -1;
      }
    }
  }

  return 0;
}

int32_t tidl_mergeDetectionoutLayer(sTIDL_OrgNetwork_t  &pOrgTIDLNetStructure, int32_t layerIndex)
{
  int32_t i1, i2, i3, i4;
  int32_t status = 0;
  int32_t SINGLE_INPUT = 0;
  int32_t SINGLE_OUTPUT = 0;

  // Remove Permute First
  for (i1 = 0; i1 < layerIndex; i1++)
  {
    if (pOrgTIDLNetStructure.TIDLPCLayers[i1].layerType == TIDL_PermuteLayer)
    {
      int32_t  idx_permute = i1;
      sTIDL_LayerPC_t& permute = pOrgTIDLNetStructure.TIDLPCLayers[idx_permute];

      // previous -> Permute
      int32_t  idx_previous = tidl_getInLayer(pOrgTIDLNetStructure, layerIndex, permute.inData[SINGLE_INPUT].dataId);
      if (idx_previous == -1)
      {
        printf("ERROR: tidl_mergeDetectionoutLayer PermuteLayer find prev layer failed!\n");
        return -1;
      }
      sTIDL_LayerPC_t &previous = pOrgTIDLNetStructure.TIDLPCLayers[idx_previous];

      // Permute -> Flatten
      int32_t  idx_flatten = tidl_getOutLayer(pOrgTIDLNetStructure, layerIndex, permute.outData[SINGLE_INPUT].dataId);
      if (idx_flatten == -1 || pOrgTIDLNetStructure.TIDLPCLayers[idx_flatten].layerType != TIDL_FlattenLayer)
      {
        printf("ERROR: tidl_mergeDetectionoutLayer PermuteLayer find next layer failed! or Permute + Flatten combo not found!\n");
        return -1;
      }
      sTIDL_LayerPC_t &flatten = pOrgTIDLNetStructure.TIDLPCLayers[idx_flatten];

      // Flatten -> Concat
      int32_t  idx_concat = tidl_getOutLayer(pOrgTIDLNetStructure, layerIndex, flatten.outData[SINGLE_OUTPUT].dataId);
      if (idx_concat == -1 || pOrgTIDLNetStructure.TIDLPCLayers[idx_concat].layerType != TIDL_ConcatLayer)
      {
        printf("ERROR: tidl_mergeDetectionoutLayer Flatten find next layer failed! or Permute + Flatten + Concat combo not found!\n");
        return -1;
      }
      sTIDL_LayerPC_t &concat = pOrgTIDLNetStructure.TIDLPCLayers[idx_concat];

      previous.outData[SINGLE_OUTPUT] = permute.outData[SINGLE_OUTPUT];
      strcpy((char *)previous.outDataNames[0], (char *)permute.outDataNames[0]);
      previous.outConsumerCnt[0] = permute.outConsumerCnt[0];
      permute.numInBufs = -1;
      permute.numOutBufs = -1;
    }
  }

  // Remove Softmax/Reshape
  for(i1 = 0; i1 < layerIndex; i1++)
  {
    if (pOrgTIDLNetStructure.TIDLPCLayers[i1].layerType == TIDL_SoftMaxLayer)
    {
      int32_t idx_softmax = i1;
      sTIDL_LayerPC_t &softmax = pOrgTIDLNetStructure.TIDLPCLayers[idx_softmax];
      int32_t  idx_detectionOut = tidl_getOutLayer(pOrgTIDLNetStructure, layerIndex, softmax.outData[SINGLE_OUTPUT].dataId);
      if (idx_detectionOut == -1)
      {
        printf("ERROR: tidl_mergeDetectionoutLayer Softmax find next layer failed!\n");
        return -1;
      }
      if(pOrgTIDLNetStructure.TIDLPCLayers[idx_detectionOut].layerType != TIDL_DetectionOutputLayer)
      {
        // Check next softmax.
        continue;
      }

      sTIDL_LayerPC_t &detectionOut = pOrgTIDLNetStructure.TIDLPCLayers[idx_detectionOut];
      int32_t  idx_reshape = tidl_getInLayer(pOrgTIDLNetStructure, layerIndex, softmax.inData[SINGLE_INPUT].dataId);
      if (idx_reshape == -1 || pOrgTIDLNetStructure.TIDLPCLayers[idx_reshape].layerType != TIDL_ReshapeLayer)
      {
        printf("ERROR: tidl_mergeDetectionoutLayer Softmax find prev layer failed! or Reshape + Softmax + DetectionOut combo not found!\n");
        return -1;
      }

      sTIDL_LayerPC_t &reshape = pOrgTIDLNetStructure.TIDLPCLayers[idx_reshape];
      int32_t  idx_concat = tidl_getInLayer(pOrgTIDLNetStructure, layerIndex, reshape.inData[SINGLE_INPUT].dataId);
      if (idx_concat == -1 || pOrgTIDLNetStructure.TIDLPCLayers[idx_concat].layerType != TIDL_ConcatLayer)
      {
        printf("ERROR: tidl_mergeDetectionoutLayer Reshape find prev layer failed! or Concat + Reshape + Softmax + DetectionOut combo not found!\n");
        return -1;
      }

      sTIDL_LayerPC_t &concat = pOrgTIDLNetStructure.TIDLPCLayers[idx_concat];
      reshape.numInBufs = -1;
      reshape.numOutBufs = -1;
      softmax.numInBufs = -1;
      softmax.numOutBufs = -1;

      concat.outData[SINGLE_OUTPUT].dataId = softmax.outData[SINGLE_OUTPUT].dataId;
      strcpy((char*)concat.outDataNames[SINGLE_OUTPUT], (char*)softmax.outDataNames[SINGLE_OUTPUT]);

    }
  }

  // Remove PriorBox Layer
  for (i1 = 0; i1 < layerIndex; i1++)
  {
    if (pOrgTIDLNetStructure.TIDLPCLayers[i1].layerType == TIDL_ConcatLayer)
    {
      int32_t idx_concat = i1;
      sTIDL_LayerPC_t &concat = pOrgTIDLNetStructure.TIDLPCLayers[idx_concat];


      // check the detection out
      int32_t  idx_detectionOut = tidl_getOutLayer(pOrgTIDLNetStructure, layerIndex, concat.outData[SINGLE_OUTPUT].dataId);
      if (idx_detectionOut == -1)
      {
        printf("ERROR: tidl_mergeDetectionoutLayer Concat find prev layer failed!\n");
        return -1;
      }
      if (pOrgTIDLNetStructure.TIDLPCLayers[idx_detectionOut].layerType != TIDL_DetectionOutputLayer)
      {
        continue;
      }


      // check the priorbox
      int32_t  idx_priorBox = tidl_getInLayer(pOrgTIDLNetStructure, layerIndex, concat.inData[0].dataId);
      if (idx_priorBox == -1)
      {
        printf("ERROR: tidl_mergeDetectionoutLayer Concat find prev layer failed!\n");
        return -1;
      }
      if(pOrgTIDLNetStructure.TIDLPCLayers[idx_priorBox].layerType != TIDL_PriorBoxLayer)
      {
        continue;
      }


      // Priorbox Path confirmed
      sTIDL_LayerPC_t &detectionOut = pOrgTIDLNetStructure.TIDLPCLayers[idx_detectionOut];
      detectionOut.numInBufs = 2;

      /* calculate the size of all the priorboxes for concatination */
      int32_t total_priorBoxSize = 1; // To store number of heads

      // parse each priorbox
      for(i2 = 0; i2 < concat.numInBufs; i2++)
      {
        int32_t  idx_priorBox = tidl_getInLayer(pOrgTIDLNetStructure, layerIndex, concat.inData[i2].dataId);
        if (idx_priorBox == -1 || pOrgTIDLNetStructure.TIDLPCLayers[idx_priorBox].layerType != TIDL_PriorBoxLayer)
        {
          printf("ERROR: tidl_mergeDetectionoutLayer Concat find prev layer failed! or Priorbox + Concat + DetectionOut combo not found!\n");
          return -1;
        }
        sTIDL_LayerPC_t &priorbox = pOrgTIDLNetStructure.TIDLPCLayers[idx_priorBox];

        total_priorBoxSize += priorbox.layerParams.detectOutParams.priorBoxSize;
      }

      sTIDL_AnchorBoxParams_t* anchorBox;
      anchorBox = (sTIDL_AnchorBoxParams_t*)my_malloc(concat.numInBufs*sizeof(sTIDL_AnchorBoxParams_t));
      memset(anchorBox, 0, concat.numInBufs*sizeof(sTIDL_AnchorBoxParams_t));

      detectionOut.layerParams.detectOutParams.numHeads = concat.numInBufs;

      // parse each priorbox
      for(i2 = 0; i2 < concat.numInBufs; i2++)
      {
        int32_t  idx_priorBox = tidl_getInLayer(pOrgTIDLNetStructure, layerIndex, concat.inData[i2].dataId);
        if (idx_priorBox == -1 || pOrgTIDLNetStructure.TIDLPCLayers[idx_priorBox].layerType != TIDL_PriorBoxLayer)
        {
          printf("ERROR: tidl_mergeDetectionoutLayer Concat find prev layer failed! or Priorbox + Concat + DetectionOut combo not found!\n");
          return -1;
        }
        sTIDL_LayerPC_t &priorbox = pOrgTIDLNetStructure.TIDLPCLayers[idx_priorBox];

        /*
         * Parsing PriorBox Layer params before remove this layer
         *
         */
        int32_t img_height = priorbox.inData[1].dimValues[2];
        int32_t img_width  = priorbox.inData[1].dimValues[3];
        int32_t layer_height = priorbox.inData[0].dimValues[2];
        int32_t layer_width  = priorbox.inData[0].dimValues[3];
        int32_t step_h = ((float)img_height)/layer_height;
        int32_t step_w = ((float)img_width)/layer_width;

        detectionOut.layerParams.detectOutParams.imHeight = img_height;
        detectionOut.layerParams.detectOutParams.imWidth = img_width;

        anchorBox[i2] = *(sTIDL_AnchorBoxParams_t*)priorbox.priorBox.ptr;

        if(anchorBox[i2].headWidth == -1) anchorBox[i2].headWidth = layer_width;
        if(anchorBox[i2].headHeight == -1) anchorBox[i2].headHeight = layer_height;
        if(anchorBox[i2].strideW == -1) anchorBox[i2].strideW = step_w;
        if(anchorBox[i2].strideH == -1) anchorBox[i2].strideH = step_h;

        free(priorbox.priorBox.ptr);
        priorbox.priorBox.ptr = NULL;
        priorbox.priorBox.bufSize = 0;


        /*
         * Remove PriorBox Layer & Previous layer output
         *
         */
        for(i4 = 0; i4 < priorbox.numInBufs; i4++)
        {
          int32_t  idx_previous = tidl_getInLayer(pOrgTIDLNetStructure, layerIndex, priorbox.inData[i4].dataId);
          if (idx_previous == -1)
          {
            printf("ERROR: tidl_mergeDetectionoutLayer PriorBox find prev layer failed!\n");
            return -1;
          }
          sTIDL_LayerPC_t &previous = pOrgTIDLNetStructure.TIDLPCLayers[idx_previous];
          for(i3 = 0; i3 < previous.numOutBufs; i3++)
          {
            if(previous.outData[i3].dataId == priorbox.inData[i4].dataId)
            {
              break;
            }
          }
          int32_t dataIdx_priorbox = i3;
          // Remove Previous layer's output
          previous.outConsumerCnt[dataIdx_priorbox]--;
          previous.outConsumerLinked[dataIdx_priorbox]--;
        }

        // Remove PriorBox Layer
        priorbox.numInBufs = -1;
        priorbox.numOutBufs = -1;
      }

      detectionOut.priorBox.ptr = anchorBox;
      detectionOut.priorBox.bufSize = concat.numInBufs*sizeof(sTIDL_AnchorBoxParams_t)/sizeof(float);

      // Remove Concat Layer
      concat.numInBufs = -1;
      concat.numOutBufs = -1;

    }
  }

  return 0;
}


int32_t tidl_mergeSplitLayer(sTIDL_OrgNetwork_t  &pOrgTIDLNetStructure, int32_t layerIndex)
{
  int32_t i1, i2, i3, i4;

  for (i1 = 0; i1 < layerIndex; i1++)
  {
    sTIDL_LayerPC_t &currentLayer = pOrgTIDLNetStructure.TIDLPCLayers[i1];

    if (currentLayer.layerType == TIDL_SplitLayer)
    {
      // Get previous layer index
      int32_t  idx = tidl_getInLayer(pOrgTIDLNetStructure, layerIndex, currentLayer.inData[0].dataId);
      if (idx == -1)
      {
        ;// Treat this error as debug info, since the second split layer will not get previous layer index.
      }
      sTIDL_LayerPC_t &previousLayer = pOrgTIDLNetStructure.TIDLPCLayers[idx];

      // Match Split input with Previous Layer's output number
      for(i2 = 0; i2 < previousLayer.numOutBufs; i2++)
      {
        if(currentLayer.inData[0].dataId == previousLayer.outData[i2].dataId)
          break;
      }
      previousLayer.outConsumerCnt[i2]--;

      for(i4 = 0; i4 < currentLayer.numOutBufs; i4++)
      {
        // Get next layer index
        int32_t  idx_next = tidl_getOutLayer(pOrgTIDLNetStructure, layerIndex, currentLayer.outData[i4].dataId);
        if (idx_next == -1)
        {
          // Treat this error as debug info, since the second split layer will not get previous layer index.
        }
        sTIDL_LayerPC_t &nextLayer = pOrgTIDLNetStructure.TIDLPCLayers[idx_next];
        for(i3 = 0; i3 < nextLayer.numInBufs; i3++)
        {
          if(currentLayer.outData[i4].dataId == nextLayer.inData[i3].dataId)
            break;
        }

        // Split Layer only have single output, but previous layer has multiple output, need to parse in order
        previousLayer.numMacs += currentLayer.numMacs;
        nextLayer.inData[i3] = previousLayer.outData[i2];
        previousLayer.outConsumerCnt[i2]++;
        strcpy((char*)nextLayer.inDataNames[i3], (char*)previousLayer.outDataNames[i2]);
      }

      currentLayer.numInBufs = -1;
      currentLayer.numOutBufs = -1;


    }
  }

  return 0;
}


int32_t tidl_duplicateSliceLayer(sTIDL_OrgNetwork_t  &pOrgTIDLNetStructure, int32_t& layerIndex)
{
  int32_t i1, i2, i3, i4;
  int32_t status = 0;

  for (i1 = 0; i1 < layerIndex; i1++)
  {
    if (pOrgTIDLNetStructure.TIDLPCLayers[i1].layerType == TIDL_SliceLayer && pOrgTIDLNetStructure.TIDLPCLayers[i1].numOutBufs > 1)
    {
      int32_t idx_slice_origin = i1;
      sTIDL_LayerPC_t &slice_origin = pOrgTIDLNetStructure.TIDLPCLayers[idx_slice_origin];

      int32_t  idx_previous = tidl_getInLayer(pOrgTIDLNetStructure, layerIndex, slice_origin.inData[0].dataId);
      if (idx_previous == -1)
      {
        // Treat this error as debug info, since the second split layer will not get previous layer index.
        printf("ERROR: tidl_duplicateSliceLayer slice layer search for previous layer failed!\n");
        return -1;
      }
      sTIDL_LayerPC_t &previous = pOrgTIDLNetStructure.TIDLPCLayers[idx_previous];

      for(i2 = 0; i2 < previous.numOutBufs; i2++)
      {
        if(previous.outData[i2].dataId == slice_origin.inData[0].dataId)
          break;
      }
      previous.outConsumerCnt[i2]--;
      previous.outConsumerCnt[i2] += slice_origin.numOutBufs;

      for(i2 = 0; i2 < slice_origin.numOutBufs; i2++)
      {
        sTIDL_LayerPC_t &slice_new = pOrgTIDLNetStructure.TIDLPCLayers[layerIndex];
        slice_new.inData[0] = slice_origin.inData[0];
        strcpy((char*)slice_new.inDataNames[0], (char*)slice_origin.inDataNames[0]);
        slice_new.layerParams.sliceParams.slicePoints[0] = slice_origin.layerParams.sliceParams.slicePoints[i2];
        slice_new.layerParams.sliceParams.axis = slice_origin.layerParams.sliceParams.axis;
        slice_new.layerType = slice_origin.layerType;
        sprintf((char*)slice_new.name, "%s_TIDL_%d", (char*)slice_origin.name, i2);
        slice_new.numInBufs = 1;
        slice_new.numOutBufs = 1;
        slice_new.outConsumerCnt[0] = slice_origin.outConsumerCnt[i2];
        slice_new.outConsumerLinked[0] = slice_origin.outConsumerLinked[i2];
        slice_new.outData[0] = slice_origin.outData[i2];
        strcpy((char*)slice_new.outDataNames[0], (char*)slice_origin.outDataNames[i2]);

        layerIndex++;
      }

      slice_origin.numInBufs = -1;
      slice_origin.numOutBufs = -1;
    }
  }

  return 0;
}


int32_t tidl_mergePoolingLayer(sTIDL_OrgNetwork_t  &pOrgTIDLNetStructure, int32_t layerIndex)
{
  int32_t i1, i2, i3, i4;
  int32_t status = 0;
  int32_t merged;

  for (i1 = 0; i1 < layerIndex; i1++)
  {
    if (pOrgTIDLNetStructure.TIDLPCLayers[i1].layerType == TIDL_PoolingLayer &&
        pOrgTIDLNetStructure.TIDLPCLayers[i1].layerParams.poolParams.kernelH != 0 &&
        pOrgTIDLNetStructure.TIDLPCLayers[i1].layerParams.poolParams.kernelW != 0)
    {
      merged = 0;
      int32_t  idx = tidl_getInLayer(pOrgTIDLNetStructure, layerIndex, pOrgTIDLNetStructure.TIDLPCLayers[i1].inData[0].dataId);
      if (idx == -1)
      {
        continue;
      }
      sTIDL_LayerPC_t &TIDLPCLayers = pOrgTIDLNetStructure.TIDLPCLayers[idx];
      if ((TIDLPCLayers.layerType == TIDL_ConvolutionLayer) &&
        (TIDLPCLayers.outConsumerCnt[0] == 1) &&
        (pOrgTIDLNetStructure.TIDLPCLayers[i1].layerParams.poolParams.kernelH == 2) &&
        (pOrgTIDLNetStructure.TIDLPCLayers[i1].layerParams.poolParams.kernelW == 2))
      {
        merged = 1;
      }

      if (merged == 1)
      {
        int32_t  idx = tidl_getInLayer(pOrgTIDLNetStructure, layerIndex, pOrgTIDLNetStructure.TIDLPCLayers[i1].inData[0].dataId);
        if (idx == -1)
        {
          return -1;
        }
        sTIDL_LayerPC_t &TIDLPCLayers = pOrgTIDLNetStructure.TIDLPCLayers[idx];
        TIDLPCLayers.numMacs += pOrgTIDLNetStructure.TIDLPCLayers[i1].numMacs;
        TIDLPCLayers.outData[0] = pOrgTIDLNetStructure.TIDLPCLayers[i1].outData[0];
        strcpy((char*)TIDLPCLayers.outDataNames[0], (char*)pOrgTIDLNetStructure.TIDLPCLayers[i1].outDataNames[0]);
        TIDLPCLayers.outConsumerCnt[0] = pOrgTIDLNetStructure.TIDLPCLayers[i1].outConsumerCnt[0];
        TIDLPCLayers.layerParams.convParams.poolParams = pOrgTIDLNetStructure.TIDLPCLayers[i1].layerParams.poolParams;
        TIDLPCLayers.layerParams.convParams.enablePooling = 1;
        pOrgTIDLNetStructure.TIDLPCLayers[i1].numInBufs = -1;
        pOrgTIDLNetStructure.TIDLPCLayers[i1].numOutBufs = -1;
      }
    }
  }

  return 0;
}

int32_t tidl_mergeReshapeLayer(sTIDL_OrgNetwork_t  &pOrgTIDLNetStructure, int32_t layerIndex, sTIDL_tfOutRehapeMap_t * sTIDL_tfOutRehapeTable)
{
  int32_t i1, i2, i3, i4;
  int32_t status = 0;
  for (i1 = 0; i1 < layerIndex; i1++)
  {
    if (pOrgTIDLNetStructure.TIDLPCLayers[i1].layerType == TIDL_ReshapeLayer)
    {
      int32_t  idx = tidl_getInLayer(pOrgTIDLNetStructure, layerIndex, pOrgTIDLNetStructure.TIDLPCLayers[i1].inData[0].dataId);
      if (idx == -1)
      {
        return -1;
      }
      sTIDL_LayerPC_t &TIDLPCLayers = pOrgTIDLNetStructure.TIDLPCLayers[idx];
      if ((TIDLPCLayers.layerType == TIDL_InnerProductLayer) ||
        ((TIDLPCLayers.layerType == TIDL_PoolingLayer) && (TIDLPCLayers.layerParams.poolParams.poolingType == TIDL_AveragePooling) /*&& (TIDLPCLayers.outConsumerCnt[0] == 1)*/))
      {
        if (TIDLPCLayers.layerType == TIDL_PoolingLayer)
        {
          TIDLPCLayers.layerParams.poolParams.kernelW = 0;
          TIDLPCLayers.layerParams.poolParams.kernelH = 0;
        }
        TIDLPCLayers.numMacs += pOrgTIDLNetStructure.TIDLPCLayers[i1].numMacs;
        TIDLPCLayers.outData[0] = pOrgTIDLNetStructure.TIDLPCLayers[i1].outData[0];
        strcpy((char *)TIDLPCLayers.outDataNames[0], (char *)pOrgTIDLNetStructure.TIDLPCLayers[i1].outDataNames[0]);
        TIDLPCLayers.outConsumerCnt[0] = pOrgTIDLNetStructure.TIDLPCLayers[i1].outConsumerCnt[0];
        pOrgTIDLNetStructure.TIDLPCLayers[i1].numInBufs = -1;
        pOrgTIDLNetStructure.TIDLPCLayers[i1].numOutBufs = -1;

        TIDLPCLayers.outData[0].dimValues[3] = TIDLPCLayers.outData[0].dimValues[1] * TIDLPCLayers.outData[0].dimValues[2] * TIDLPCLayers.outData[0].dimValues[3];
        TIDLPCLayers.outData[0].dimValues[2] = 1;
        TIDLPCLayers.outData[0].dimValues[1] = 1;
        sTIDL_LayerPC_t *TIDLPCLayersOut;
        int32_t  outIdx = tidl_getOutLayer(pOrgTIDLNetStructure, layerIndex, pOrgTIDLNetStructure.TIDLPCLayers[i1].outData[0].dataId);
        if (outIdx != -1)
        {
          TIDLPCLayersOut = &pOrgTIDLNetStructure.TIDLPCLayers[outIdx];
          TIDLPCLayersOut->inData[0] = TIDLPCLayers.outData[0];
          tidl_updateOutDataShape(pOrgTIDLNetStructure, outIdx, layerIndex, sTIDL_tfOutRehapeTable);
        }
      }
    }
  }

  return 0;
}


int32_t tidl_convertIpLayerInputShape(sTIDL_OrgNetwork_t  &pOrgTIDLNetStructure, int32_t layerIndex)
{
  int32_t i1, i2, i3, i4;
  for (i1 = 0; i1 < layerIndex; i1++)
  {
    if (pOrgTIDLNetStructure.TIDLPCLayers[i1].layerType == TIDL_InnerProductLayer)
    {
      sTIDL_LayerPC_t &TIDLPCLayers = pOrgTIDLNetStructure.TIDLPCLayers[i1];
      int32_t  inIdx = tidl_getInLayer(pOrgTIDLNetStructure, layerIndex, pOrgTIDLNetStructure.TIDLPCLayers[i1].inData[0].dataId);
      if (inIdx == -1)
      {
        return -1;
      }
      sTIDL_LayerPC_t &TIDLPCLayersIn = pOrgTIDLNetStructure.TIDLPCLayers[inIdx];

      if ((TIDLPCLayersIn.layerType == TIDL_PoolingLayer) &&
          (TIDLPCLayersIn.layerParams.poolParams.poolingType == TIDL_AveragePooling) &&
          (TIDLPCLayersIn.outConsumerCnt[0] == 1))
      {
        TIDLPCLayersIn.layerParams.poolParams.kernelW = 0;
        TIDLPCLayersIn.layerParams.poolParams.kernelH = 0;

        TIDLPCLayersIn.outData[0].dimValues[3] = TIDLPCLayersIn.outData[0].dimValues[1] * TIDLPCLayersIn.outData[0].dimValues[2] * TIDLPCLayersIn.outData[0].dimValues[3];
        TIDLPCLayersIn.outData[0].dimValues[2] = 1;
        TIDLPCLayersIn.outData[0].dimValues[1] = 1;

        TIDLPCLayers.inData[0].dimValues[3] = TIDLPCLayers.inData[0].dimValues[1] * TIDLPCLayers.inData[0].dimValues[2] * TIDLPCLayers.inData[0].dimValues[3];
        TIDLPCLayers.inData[0].dimValues[2]    = 1;
        TIDLPCLayers.inData[0].dimValues[1]    = 1;
      }
      else
      {
        if ((TIDLPCLayersIn.outData[0].dimValues[1] != 1) || (TIDLPCLayersIn.outData[0].dimValues[2] != 1))
        {
          printf("In put of TIDL_InnerProductLayer layer needs to be Faltten. Please add Flatten layer to import this mdoels \n");
          exit(0);
        }
      }
    }
  }

  return 0;
}


int32_t tidl_convertSoftMaxLayerInputShape(sTIDL_OrgNetwork_t  &pOrgTIDLNetStructure, int32_t layerIndex)
{
  int32_t i1, i2, i3, i4;
  for (i1 = 0; i1 < layerIndex; i1++)
  {
    if (pOrgTIDLNetStructure.TIDLPCLayers[i1].layerType == TIDL_SoftMaxLayer)
    {
      sTIDL_LayerPC_t &TIDLPCLayers = pOrgTIDLNetStructure.TIDLPCLayers[i1];
      int32_t  inIdx = tidl_getInLayer(pOrgTIDLNetStructure, layerIndex, pOrgTIDLNetStructure.TIDLPCLayers[i1].inData[0].dataId);
      if (inIdx == -1)
      {
        return -1;
      }
      sTIDL_LayerPC_t &TIDLPCLayersIn = pOrgTIDLNetStructure.TIDLPCLayers[inIdx];

      if ((TIDLPCLayersIn.layerType == TIDL_PoolingLayer) &&
          (TIDLPCLayersIn.layerParams.poolParams.poolingType == TIDL_AveragePooling) &&
          (TIDLPCLayersIn.outConsumerCnt[0] == 1))
      {
        TIDLPCLayersIn.layerParams.poolParams.kernelW = 0;
        TIDLPCLayersIn.layerParams.poolParams.kernelH = 0;

        TIDLPCLayersIn.outData[0].dimValues[3] = TIDLPCLayersIn.outData[0].dimValues[1] * TIDLPCLayersIn.outData[0].dimValues[2] * TIDLPCLayersIn.outData[0].dimValues[3];
        TIDLPCLayersIn.outData[0].dimValues[2] = 1;
        TIDLPCLayersIn.outData[0].dimValues[1] = 1;

        TIDLPCLayers.inData[0].dimValues[3] = TIDLPCLayers.inData[0].dimValues[1] * TIDLPCLayers.inData[0].dimValues[2] * TIDLPCLayers.inData[0].dimValues[3];
        TIDLPCLayers.inData[0].dimValues[2]    = 1;
        TIDLPCLayers.inData[0].dimValues[1]    = 1;
      }
      else
      {
        if ((TIDLPCLayersIn.outData[0].dimValues[1] != 1) || (TIDLPCLayersIn.outData[0].dimValues[2] != 1))
        {
          printf("In put of TIDL_SoftMaxLayer layer needs to be Faltten. Please add Flatten layer to import this mdoels. Except SSD. \n");
          //exit(0);
        }
      }
    }
  }

  return 0;
}


int32_t tidl_convertConv2DToIpLayer(sTIDL_OrgNetwork_t  &pOrgTIDLNetStructure, int32_t layerIndex, sTIDL_tfOutRehapeMap_t * sTIDL_tfOutRehapeTable)
{
  int32_t i1, i2, i3, i4;
  for (i1 = 0; i1 < layerIndex; i1++)
  {
    if (pOrgTIDLNetStructure.TIDLPCLayers[i1].layerType == TIDL_ConvolutionLayer)
    {
      sTIDL_LayerPC_t &TIDLPCLayers = pOrgTIDLNetStructure.TIDLPCLayers[i1];
      sTIDL_ConvParams_t &convParams = pOrgTIDLNetStructure.TIDLPCLayers[i1].layerParams.convParams;
      if ((convParams.kernelW == 1) && (convParams.kernelH == 1) && (TIDLPCLayers.inData[0].dimValues[2] == 1) && (TIDLPCLayers.inData[0].dimValues[3] == 1))
      {
        int32_t  inIdx = tidl_getInLayer(pOrgTIDLNetStructure, layerIndex, pOrgTIDLNetStructure.TIDLPCLayers[i1].inData[0].dataId);
        if (inIdx == -1)
        {
          return -1;
        }
        sTIDL_LayerPC_t &TIDLPCLayersIn = pOrgTIDLNetStructure.TIDLPCLayers[inIdx];

        if ((TIDLPCLayersIn.layerType == TIDL_PoolingLayer) && (TIDLPCLayersIn.layerParams.poolParams.poolingType == TIDL_AveragePooling) && (TIDLPCLayersIn.outConsumerCnt[0] == 1))
        {
          sTIDL_LayerPC_t *TIDLPCLayersOut;
          int32_t  outIdx = tidl_getOutLayer(pOrgTIDLNetStructure, layerIndex, pOrgTIDLNetStructure.TIDLPCLayers[i1].outData[0].dataId);
          if (outIdx != -1)
          {
            TIDLPCLayersOut = &pOrgTIDLNetStructure.TIDLPCLayers[outIdx];
          }
          if ((outIdx == -1) ||
            (TIDLPCLayersOut->layerType == TIDL_InnerProductLayer) ||
            (TIDLPCLayersOut->layerType == TIDL_DataLayer) ||
            (TIDLPCLayersOut->layerType == TIDL_SoftMaxLayer) ||
            (TIDLPCLayersOut->layerType == TIDL_FlattenLayer) ||
            (TIDLPCLayersOut->layerType == TIDL_ReshapeLayer))
          {
            TIDLPCLayersIn.layerParams.poolParams.kernelW = 0;
            TIDLPCLayersIn.layerParams.poolParams.kernelH = 0;

            sTIDL_LayerPC_t TIDLPCLayerstemp = TIDLPCLayers;
            TIDLPCLayers.layerType = TIDL_InnerProductLayer;
            TIDLPCLayers.inData[0].dimValues[3] = TIDLPCLayers.inData[0].dimValues[1] * TIDLPCLayers.inData[0].dimValues[2] * TIDLPCLayers.inData[0].dimValues[3];
            TIDLPCLayers.inData[0].dimValues[2] = 1;
            TIDLPCLayers.inData[0].dimValues[1] = 1;
            TIDLPCLayersIn.outData[0] = TIDLPCLayers.inData[0];

            TIDLPCLayers.outData[0].dimValues[3] = TIDLPCLayers.outData[0].dimValues[1] * TIDLPCLayers.outData[0].dimValues[2] * TIDLPCLayers.outData[0].dimValues[3];
            TIDLPCLayers.outData[0].dimValues[2] = 1;
            TIDLPCLayers.outData[0].dimValues[1] = 1;
            TIDLPCLayersOut->inData[0] = TIDLPCLayers.outData[0];
            tidl_updateOutDataShape(pOrgTIDLNetStructure, outIdx, layerIndex, sTIDL_tfOutRehapeTable);

            TIDLPCLayers.layerParams.innerProductParams.numInNodes = TIDLPCLayers.inData[0].dimValues[3];
            TIDLPCLayers.layerParams.innerProductParams.numOutNodes = TIDLPCLayers.outData[0].dimValues[3];
          }
        }
      }
    }
  }
  return 0;
}


int32_t tidl_convertRelUToBNLayer(sTIDL_OrgNetwork_t  &pOrgTIDLNetStructure, int32_t layerIndex)
{
  int32_t i1, i2, i3, i4;
  for (i1 = 0; i1 < layerIndex; i1++)
  {
    if (pOrgTIDLNetStructure.TIDLPCLayers[i1].layerType == TIDL_ReLULayer ||
        pOrgTIDLNetStructure.TIDLPCLayers[i1].layerType == TIDL_PReLULayer)
    {
      sTIDL_LayerPC_t &TIDLPCLayers = pOrgTIDLNetStructure.TIDLPCLayers[i1];
      pOrgTIDLNetStructure.TIDLPCLayers[i1].layerType = TIDL_BatchNormLayer;

      TIDLPCLayers.actParams = pOrgTIDLNetStructure.TIDLPCLayers[i1].actParams;
      int32_t dataSize = TIDLPCLayers.outData[0].dimValues[1];

      TIDLPCLayers.weights.ptr = my_malloc(dataSize*sizeof(float));
      TIDLPCLayers.weights.bufSize = dataSize;
      TIDLPCLayers.bias.ptr = my_malloc(dataSize*sizeof(float));
      TIDLPCLayers.bias.bufSize = dataSize;
      float * scalePtr = (float*)TIDLPCLayers.weights.ptr;
      float * biasPtr  = (float*)TIDLPCLayers.bias.ptr;

      for (i2 = 0; i2 < dataSize; i2++)
      {
        scalePtr[i2] = 1;
        biasPtr[i2]  = 0;
      }
    }
  }
  return 0;
}

int32_t tidl_copyPCNetToDeviceNet(sTIDL_OrgNetwork_t  &pOrgTIDLNetStructure, sTIDL_Network_t  &tIDLNetStructure, int32_t layerIndex)
{
  int32_t i, j;
  int64_t                    totalMacs = 0;
  int32_t tiLayerIndex = 0;

  tIDLNetStructure.dataElementSize = (gParams.numFeatureBits + 7)>>3; /* Convert to number of bytes */
  tIDLNetStructure.biasElementSize = 2;
  tIDLNetStructure.weightsElementSize = ((NUM_WHGT_BITS - 1) / 8 + 1);
  tIDLNetStructure.slopeElementSize = tIDLNetStructure.weightsElementSize;
  tIDLNetStructure.interElementSize = 4;
  tIDLNetStructure.quantizationStyle = gParams.quantizationStyle;
  tIDLNetStructure.netVersion = TIDL_NET_VERSION;


  for (i = 0; i < layerIndex; i++)
  {
    if ((orgTIDLNetStructure.TIDLPCLayers[i].layerType != TIDL_UnsupportedLayer) &&
      (orgTIDLNetStructure.TIDLPCLayers[i].layerType != TIDL_ConstDataLayer))
    {
      tIDLNetStructure.TIDLLayers[tiLayerIndex].layerType = orgTIDLNetStructure.TIDLPCLayers[i].layerType;
      tIDLNetStructure.TIDLLayers[tiLayerIndex].layerParams = orgTIDLNetStructure.TIDLPCLayers[i].layerParams;
      tIDLNetStructure.TIDLLayers[tiLayerIndex].actParams  = orgTIDLNetStructure.TIDLPCLayers[i].actParams;
      tIDLNetStructure.TIDLLayers[tiLayerIndex].numInBufs = orgTIDLNetStructure.TIDLPCLayers[i].numInBufs;
      tIDLNetStructure.TIDLLayers[tiLayerIndex].numOutBufs = orgTIDLNetStructure.TIDLPCLayers[i].numOutBufs;
      tIDLNetStructure.TIDLLayers[tiLayerIndex].weightsElementSizeInBits = orgTIDLNetStructure.TIDLPCLayers[i].weightsElementSizeInBits;
      if ((gParams.modelType == 2) || (gParams.modelType == 0))
      {
        tIDLNetStructure.TIDLLayers[tiLayerIndex].strideOffsetMethod = TIDL_StrideOffsetTopLeft;
      }
      else
      {
        tIDLNetStructure.TIDLLayers[tiLayerIndex].strideOffsetMethod = orgTIDLNetStructure.TIDLPCLayers[i].strideOffsetMethod;
      }

      if (tIDLNetStructure.TIDLLayers[tiLayerIndex].layerType == TIDL_DataLayer)
      {
        tIDLNetStructure.TIDLLayers[tiLayerIndex].layersGroupId = 0;
      }
      else
      {
        tIDLNetStructure.TIDLLayers[tiLayerIndex].coreID = 1;
        tIDLNetStructure.TIDLLayers[tiLayerIndex].layersGroupId = 1;
      }

      for (j = 0; j < orgTIDLNetStructure.TIDLPCLayers[i].numInBufs; j++)
      {
        tIDLNetStructure.TIDLLayers[tiLayerIndex].inData[j] = orgTIDLNetStructure.TIDLPCLayers[i].inData[j];
      }
      for (j = 0; j < orgTIDLNetStructure.TIDLPCLayers[i].numOutBufs; j++)
      {
        tIDLNetStructure.TIDLLayers[tiLayerIndex].outData[j] = orgTIDLNetStructure.TIDLPCLayers[i].outData[j];
      }
      totalMacs += orgTIDLNetStructure.TIDLPCLayers[i].numMacs;
      tiLayerIndex++;
    }
  }

  char filenameStr[1000];
  sprintf(filenameStr, "%s_netLog.txt", gParams.outputNetFile);
  FILE * nfp = fopen(filenameStr, "w+");
  if(nfp)
  {
    fprintf(nfp,"Num of Layer Detected : %3d \n", layerIndex);
    fprintf(nfp,"--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n");
    fprintf(nfp,"%5s|%-30s|%-50s|%-6s|%-6s|%-6s|%-32s|%-10s|%-36s|%-36s|%-11s|\n", "Num", "TIDL Layer Name", "Out Data Name", "Group", "#Ins", "#Outs", "Inbuf Ids", "Outbuf Id", "In NCHW", "Out NCHW", "MACS");
    fprintf(nfp,"--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n");
    int32_t layerIndexCount = 0;
    for (i = 0; i < layerIndex; i++)
    {
      if ((orgTIDLNetStructure.TIDLPCLayers[i].layerType != TIDL_UnsupportedLayer) &&
        (orgTIDLNetStructure.TIDLPCLayers[i].layerType != TIDL_ConstDataLayer))
      {
        fprintf(nfp,"%5d|%-30s|", layerIndexCount++, TIDL_LayerString[orgTIDLNetStructure.TIDLPCLayers[i].layerType], orgTIDLNetStructure.TIDLPCLayers[i].outDataNames[0]);
        if (strlen((const char *)orgTIDLNetStructure.TIDLPCLayers[i].outDataNames[0]) > 50)
        {
          fprintf(nfp,"%-50s|", &orgTIDLNetStructure.TIDLPCLayers[i].outDataNames[0][strlen((const char *)orgTIDLNetStructure.TIDLPCLayers[i].outDataNames[0]) - 50]);
        }
        else
        {
          fprintf(nfp,"%-50s|", orgTIDLNetStructure.TIDLPCLayers[i].outDataNames[0]);
        }

        fprintf(nfp,"%6d|%6d|%6d|", tIDLNetStructure.TIDLLayers[tiLayerIndex].layersGroupId, orgTIDLNetStructure.TIDLPCLayers[i].numInBufs, orgTIDLNetStructure.TIDLPCLayers[i].numOutBufs);

        for (j = 0; j < orgTIDLNetStructure.TIDLPCLayers[i].numInBufs; j++)
        {
          fprintf(nfp,"%3d ", orgTIDLNetStructure.TIDLPCLayers[i].inData[j].dataId);
        }
        for (j = (orgTIDLNetStructure.TIDLPCLayers[i].numInBufs > 0 ? orgTIDLNetStructure.TIDLPCLayers[i].numInBufs : 0); j < 8; j++)
        {
          fprintf(nfp,"  x ");
        }
        fprintf(nfp,"|");
        fprintf(nfp,"%3d ", orgTIDLNetStructure.TIDLPCLayers[i].outData[0].dataId);
        fprintf(nfp,"      |");
        for (j = 0; j < TIDL_DIM_MAX; j++)
        {
          fprintf(nfp,"%8d ", orgTIDLNetStructure.TIDLPCLayers[i].inData[0].dimValues[j]);
        }
        fprintf(nfp,"|");

        for (j = 0; j < TIDL_DIM_MAX; j++)
        {
          fprintf(nfp,"%8d ", orgTIDLNetStructure.TIDLPCLayers[i].outData[0].dimValues[j]);
        }
        fprintf(nfp,"|");
        fprintf(nfp,"%10lld |", orgTIDLNetStructure.TIDLPCLayers[i].numMacs);
        fprintf(nfp,"\n");
      }
    }
    fprintf(nfp,"--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n");
    fprintf(nfp,"Total Giga Macs : %4.4f\n", ((float)totalMacs / 1000000000));
    fprintf(nfp,"--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n");
    fclose(nfp);
  }
  if(gParams.debugTraceLevel > 0)
  {
    nfp = fopen(filenameStr, "r");
    if(nfp)
    {
      while(fgets(filenameStr, sizeof(filenameStr),nfp))
      {
        printf("%s", filenameStr);
      }
    }
  }
  return tiLayerIndex;
}
int32_t tidl_addOutDataLayer(sTIDL_Network_t  &tIDLNetStructure, int32_t tiLayerIndex)
{
  int32_t i, j, addOneLayer = 0;

  tIDLNetStructure.TIDLLayers[tiLayerIndex].layerType = TIDL_DataLayer;
  tIDLNetStructure.TIDLLayers[tiLayerIndex].numInBufs = 0;
  tIDLNetStructure.TIDLLayers[tiLayerIndex].numOutBufs = -1;
  tIDLNetStructure.TIDLLayers[tiLayerIndex].coreID = 255;

  for (i = 0; i < tiLayerIndex; i++)
  {
    if (tIDLNetStructure.TIDLLayers[i].layerType != TIDL_DataLayer)
    {
      for (j = 0; j < tIDLNetStructure.TIDLLayers[i].numOutBufs; j++)
      {
        if (!TIDL_isDataBufUsed(tIDLNetStructure.TIDLLayers[i].outData[j].dataId, &tIDLNetStructure, tiLayerIndex))
        {
          tIDLNetStructure.TIDLLayers[tiLayerIndex].inData[tIDLNetStructure.TIDLLayers[tiLayerIndex].numInBufs] = tIDLNetStructure.TIDLLayers[i].outData[j];
          tIDLNetStructure.TIDLLayers[tiLayerIndex].numInBufs++;
          addOneLayer = 1;
        }
      }
    }
  }
  tIDLNetStructure.numLayers = tiLayerIndex + addOneLayer;
  return 0;
}
int32_t tidl_addInDataLayer(sTIDL_OrgNetwork_t  &pOrgTIDLNetStructure, int32_t layerIndex, int32_t * dataIndex)
{
  int32_t i, j;
  int32_t idx;

  for (i = 0; i < layerIndex; i++)
  {
    if (pOrgTIDLNetStructure.TIDLPCLayers[i].layerType != TIDL_DataLayer)
    {
      for (j = 0; j < pOrgTIDLNetStructure.TIDLPCLayers[i].numInBufs; j++)
      {

        idx = tidl_getInLayer(pOrgTIDLNetStructure, layerIndex, pOrgTIDLNetStructure.TIDLPCLayers[i].inData[j].dataId);
        if (idx == -1)
        {
          pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].layerType = TIDL_DataLayer;
          pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].numInBufs  = -1;
          pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].numOutBufs = 1;
          strcpy((char *)pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outDataNames[0], (char *)pOrgTIDLNetStructure.TIDLPCLayers[i].inDataNames[j]);
          pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outData[0].dataId = (*dataIndex)++;
          pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outConsumerCnt[0] = \
            tidl_getConsumerCount(&pOrgTIDLNetStructure, layerIndex, (char *)pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outDataNames[0]);
          pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outConsumerLinked[0] = 0;
          tidl_linkOutputTensors(&pOrgTIDLNetStructure, layerIndex);
          layerIndex++;
        }
      }
    }
  }
  pOrgTIDLNetStructure.numLayers = layerIndex;
  return 0;
}

int32_t tidl_addNormLayerToInData(sTIDL_OrgNetwork_t  &pOrgTIDLNetStructure, int32_t layerIndex, int32_t * dataIndex, tidl_import_config * params)
{
  int32_t i, j;
  int32_t idx;
  int32_t normParamIdx = 0;
  for (i = 0; i < layerIndex; i++)
  {
    if ((pOrgTIDLNetStructure.TIDLPCLayers[i].layerType == TIDL_DataLayer) && (pOrgTIDLNetStructure.TIDLPCLayers[i].numOutBufs > 0))
    {
//      if (pOrgTIDLNetStructure.TIDLPCLayers[i].outData[0].dimValues[TIDL_DIM_NUMCH] != 1024)//:TODO: Temp change for RCNN
      {
        int32_t dataSize;
        pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].layerType = TIDL_BatchNormLayer;
        pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].numInBufs = 1;
        pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].numOutBufs = pOrgTIDLNetStructure.TIDLPCLayers[i].numOutBufs;
        pOrgTIDLNetStructure.TIDLPCLayers[i].numOutBufs = 1;
        pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].weightsElementSizeInBits = NUM_WHGT_BITS;

        strcpy((char *)pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outDataNames[0], (char *)pOrgTIDLNetStructure.TIDLPCLayers[i].outDataNames[0]);
        strcat((char *)pOrgTIDLNetStructure.TIDLPCLayers[i].outDataNames[0], "_original");

        strcpy((char *)pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].inDataNames[0], (char *)pOrgTIDLNetStructure.TIDLPCLayers[i].outDataNames[0]);

        pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outData[0] = pOrgTIDLNetStructure.TIDLPCLayers[i].outData[0];
        pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outConsumerCnt[0] = pOrgTIDLNetStructure.TIDLPCLayers[i].outConsumerCnt[0];
        pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outConsumerLinked[0] = pOrgTIDLNetStructure.TIDLPCLayers[i].outConsumerLinked[0];

        pOrgTIDLNetStructure.TIDLPCLayers[i].outData[0] = pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outData[0];

        pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outData[0].elementType = tidl_getElementType(1);
        pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].actParams.actType = TIDL_NoAct;

        pOrgTIDLNetStructure.TIDLPCLayers[i].outConsumerCnt[0] = 1;
        pOrgTIDLNetStructure.TIDLPCLayers[i].outConsumerLinked[0] = 1;
        pOrgTIDLNetStructure.TIDLPCLayers[i].outData[0].dataId = (*dataIndex)++;

        pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].inData[0] = pOrgTIDLNetStructure.TIDLPCLayers[i].outData[0];

        dataSize = pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outData[0].dimValues[1];

        pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].weights.ptr = my_malloc(dataSize*sizeof(float));
        pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].weights.bufSize = dataSize;
        pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].bias.ptr = my_malloc(dataSize*sizeof(float));
        pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].bias.bufSize = dataSize;
        float *  bias     = (float *)pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].bias.ptr;
        float *  weights  = (float *)pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].weights.ptr;
        for (j = 0; j < dataSize; j++)
        {
          if (normParamIdx >= TIDL_MAX_ALG_IN_BUFS*TIDL_IN_NUF_MAX_CH)
          {
            printf(" normParamIdx >= TIDL_MAX_ALG_IN_BUFS*TIDL_IN_NUF_MAX_CH \n ");
            exit(0);
          }
          weights[j] = params->inScale[normParamIdx];
          bias[j]    = - (params->inMean[normParamIdx]* weights[j]);
          normParamIdx++;
        }
        pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].numMacs =
          (int64_t)((int64_t)pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outData[0].dimValues[0] * pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outData[0].dimValues[1] *
            pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outData[0].dimValues[2] * pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outData[0].dimValues[3] * 4);

        TIDL_UpdateInDataBuff(&pOrgTIDLNetStructure, layerIndex, pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outData[0]);

        layerIndex++;
      }
    }
  }
  pOrgTIDLNetStructure.numLayers = layerIndex;
  return 0;
}

extern TIDL_TFSSDConfig_t tidl_TFSSDConfigs[TIDL_MAX_TF_SSD_LAYERS];
extern TIDL_TFFasterRCNNConfig_t tidl_TFFasterRCNNConfigs[TIDL_MAX_TF_FASTER_RCNN_LAYERS];

extern int32_t numTFMetaLayers;

int32_t tidl_addFasterRCNNLayersToNet(sTIDL_OrgNetwork_t  &pOrgTIDLNetStructure, int32_t * numLayers, int32_t * dataIndex, tidl_import_config * params)
{

    uint32_t boxFlattenLayerId;
    uint32_t classFlattenLayerId;
    uint32_t featureLayerIdx;
    uint32_t secondStageLayerIdx;
    int32_t roiPoolingLayerIdx =0;
    int32_t detOutLayerIdx;
    int32_t secondStagePostProcessingLayerIdx;
    int32_t dataIdIdx =0;
    int32_t i, j;
    int32_t numHeads = 1;
    int32_t layerIndex;
    float widthStride;
    float heightStride;
    int32_t numOutputPerDataObject;
    sTIDL_DetectOutputParams_t * odPostProcessingParams;
    sTIDL_DetectOutputParams_t * doParams;
    if (numTFMetaLayers < 1)
    {
      printf("Could not find Meta Arch confg file \n");
      return -1;
    }
    TIDL_TFFasterRCNNConfig_t &tidl_TFFasterRCNNConfig = tidl_TFFasterRCNNConfigs[0];

    layerIndex = *numLayers;


    /* Add detection output layer, data id for this layer is assigned later which will be connected to ROI pooling layer */
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].layerType = TIDL_DetectionOutputLayer;
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].numInBufs = 2;
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].numOutBufs = 1;
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].weightsElementSizeInBits = NUM_WHGT_BITS;
    strcpy((char *)pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outDataNames[0], "tf_faster_rcnn_detection_output_layer");
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outConsumerCnt[0] = 1;
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outConsumerLinked[0] = 0;

    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].layerParams.detectOutParams.numKeypoints = 0;

    numOutputPerDataObject = 7;
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outData[0].numDim       = 1;
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outData[0].dimValues[0] = 1;
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outData[0].dimValues[1] = 1;
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outData[0].dimValues[2] = 1;
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outData[0].dimValues[3] = 4 + tidl_TFFasterRCNNConfig.firstStageConfig.max_proposals * numOutputPerDataObject;/* Reusing same structure as ssd*/
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outData[0].elementType = TIDL_SinglePrecFloat;
    detOutLayerIdx = layerIndex;
    layerIndex++;

    doParams = &pOrgTIDLNetStructure.TIDLPCLayers[detOutLayerIdx].layerParams.detectOutParams;
    numHeads = 1;

    doParams->processingType = TIDL_processingTypeDetectionOutput;
    doParams->numClasses    = 2;
    doParams->topK              = tidl_TFFasterRCNNConfig.firstStageConfig.max_proposals;//:TODO: Check this
    doParams->keepTopK          = tidl_TFFasterRCNNConfig.firstStageConfig.max_proposals;//:TODO: Check this
    doParams->nmsThreshold      = tidl_TFFasterRCNNConfig.firstStageConfig.nms_iou_threshold;
    doParams->confThreshold     = tidl_TFFasterRCNNConfig.firstStageConfig.nms_score_threshold;
    doParams->backgroundLabelId = 0;
    doParams->codeType          = 0;
    doParams->varianceEncoded   = 0;
    doParams->eta               = 0;
    doParams->numKeypoints      = 0;
    doParams->shareLocation     = 1;
    doParams->imWidth  = params->inWidth[0];
    doParams->imHeight = params->inHeight[0];
    doParams->metaArchType = TIDL_metaArchTFFasterRcnn;
    doParams->numHeads     = numHeads;

    orgTIDLNetStructure.TIDLPCLayers[detOutLayerIdx].priorBox.ptr    = (void*)malloc(sizeof(sTIDL_AnchorBoxParams_t)*numHeads);
    orgTIDLNetStructure.TIDLPCLayers[detOutLayerIdx].priorBox.bufSize = (sizeof(sTIDL_AnchorBoxParams_t)*numHeads)/sizeof(float);
    sTIDL_AnchorBoxParams_t *anchorBoxParams = (sTIDL_AnchorBoxParams_t*) orgTIDLNetStructure.TIDLPCLayers[detOutLayerIdx].priorBox.ptr;

    /* Find the data layer which matches the output data list as mentioned in config file */
    for (i = 0; i < layerIndex; i++)
    {
      if ((pOrgTIDLNetStructure.TIDLPCLayers[i].layerType == TIDL_DataLayer) && (pOrgTIDLNetStructure.TIDLPCLayers[i].numInBufs ==1)
         && (strcmp((const char *)pOrgTIDLNetStructure.TIDLPCLayers[i].outDataNames[0],(const char *)outDataNames[0]) == 0))
      {
        int32_t dataSize;
        /* Rename this data layer to Flatten Layer */
        pOrgTIDLNetStructure.TIDLPCLayers[i].layerType  = TIDL_FlattenLayer;
        pOrgTIDLNetStructure.TIDLPCLayers[i].numInBufs  = 1;
        pOrgTIDLNetStructure.TIDLPCLayers[i].numOutBufs = 1;
        pOrgTIDLNetStructure.TIDLPCLayers[i].outData[0] = pOrgTIDLNetStructure.TIDLPCLayers[i].inData[0];
        pOrgTIDLNetStructure.TIDLPCLayers[i].outData[0].dataId = (*dataIndex)++;/* assign a new output data id which will be connected to detection output layer*/
        strcpy((char *)pOrgTIDLNetStructure.TIDLPCLayers[i].outDataNames[0], (char *)pOrgTIDLNetStructure.TIDLPCLayers[i].inDataNames[0]);
        strcat((char *)pOrgTIDLNetStructure.TIDLPCLayers[i].outDataNames[0], "_flatten");


        pOrgTIDLNetStructure.TIDLPCLayers[i].outData[0].dimValues[0] = 1;
        pOrgTIDLNetStructure.TIDLPCLayers[i].outData[0].dimValues[1] = 1;
        pOrgTIDLNetStructure.TIDLPCLayers[i].outData[0].dimValues[2] = 1;
        pOrgTIDLNetStructure.TIDLPCLayers[i].outData[0].dimValues[3] = pOrgTIDLNetStructure.TIDLPCLayers[i].inData[0].dimValues[0] *
                                                                       pOrgTIDLNetStructure.TIDLPCLayers[i].inData[0].dimValues[1] *
                                                                       pOrgTIDLNetStructure.TIDLPCLayers[i].inData[0].dimValues[2] *
                                                                       pOrgTIDLNetStructure.TIDLPCLayers[i].inData[0].dimValues[3];
        anchorBoxParams[0].headWidth  = pOrgTIDLNetStructure.TIDLPCLayers[i].inData[0].dimValues[3];
        anchorBoxParams[0].headHeight = pOrgTIDLNetStructure.TIDLPCLayers[i].inData[0].dimValues[2];
        boxFlattenLayerId = i;
      }
    }

    for (i = 0; i < layerIndex; i++)
    {
      /* Store the feature layer index, to be used later */
      if (strcmp((const char *)pOrgTIDLNetStructure.TIDLPCLayers[i].outDataNames[0], (const char *)"FirstStageFeatureExtractor/resnet_v1_50/resnet_v1_50/block3/unit_6/bottleneck_v1/Relu") == 0)
      {
        featureLayerIdx = i;
      }
      if (strcmp((const char *)pOrgTIDLNetStructure.TIDLPCLayers[i].outDataNames[0], (const char *)"MaxPool2D/MaxPool") == 0)
      {
        secondStageLayerIdx = i;
      }
      if ((pOrgTIDLNetStructure.TIDLPCLayers[i].layerType == TIDL_DataLayer) && (pOrgTIDLNetStructure.TIDLPCLayers[i].numInBufs == 1)
        && (strcmp((const char *)pOrgTIDLNetStructure.TIDLPCLayers[i].outDataNames[0], (const char *)outDataNames[1]) == 0))
      {
        int32_t dataSize;
        pOrgTIDLNetStructure.TIDLPCLayers[i].layerType = TIDL_FlattenLayer;
        pOrgTIDLNetStructure.TIDLPCLayers[i].numInBufs = 1;
        pOrgTIDLNetStructure.TIDLPCLayers[i].numOutBufs = 1;
        pOrgTIDLNetStructure.TIDLPCLayers[i].outData[0] = pOrgTIDLNetStructure.TIDLPCLayers[i].inData[0];
        pOrgTIDLNetStructure.TIDLPCLayers[i].outData[0].dataId = (*dataIndex)++;
        strcpy((char *)pOrgTIDLNetStructure.TIDLPCLayers[i].outDataNames[0], (char *)pOrgTIDLNetStructure.TIDLPCLayers[i].inDataNames[0]);
        strcat((char *)pOrgTIDLNetStructure.TIDLPCLayers[i].outDataNames[0], "_flatten");

        pOrgTIDLNetStructure.TIDLPCLayers[i].outData[0].dimValues[0] = 1;
        pOrgTIDLNetStructure.TIDLPCLayers[i].outData[0].dimValues[1] = 1;
        pOrgTIDLNetStructure.TIDLPCLayers[i].outData[0].dimValues[2] = 1;
        pOrgTIDLNetStructure.TIDLPCLayers[i].outData[0].dimValues[3] = pOrgTIDLNetStructure.TIDLPCLayers[i].inData[0].dimValues[0] *
          pOrgTIDLNetStructure.TIDLPCLayers[i].inData[0].dimValues[1] *
          pOrgTIDLNetStructure.TIDLPCLayers[i].inData[0].dimValues[2] *
          pOrgTIDLNetStructure.TIDLPCLayers[i].inData[0].dimValues[2];
        classFlattenLayerId = i;
      }
    }

    pOrgTIDLNetStructure.TIDLPCLayers[i].outData[0].dataId = (*dataIndex)++;

    strcpy((char *)pOrgTIDLNetStructure.TIDLPCLayers[detOutLayerIdx].inDataNames[0], (char *)pOrgTIDLNetStructure.TIDLPCLayers[boxFlattenLayerId].outDataNames[0]);
    strcpy((char *)pOrgTIDLNetStructure.TIDLPCLayers[detOutLayerIdx].inDataNames[1], (char *)pOrgTIDLNetStructure.TIDLPCLayers[classFlattenLayerId].outDataNames[0]);

    pOrgTIDLNetStructure.TIDLPCLayers[detOutLayerIdx].inData[0] = pOrgTIDLNetStructure.TIDLPCLayers[boxFlattenLayerId].outData[0];
    pOrgTIDLNetStructure.TIDLPCLayers[detOutLayerIdx].inData[1] = pOrgTIDLNetStructure.TIDLPCLayers[classFlattenLayerId].outData[0];
    pOrgTIDLNetStructure.TIDLPCLayers[detOutLayerIdx].outData[0].dataId = (*dataIndex)++;

    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].layerType = TIDL_RoiPoolingLayer;
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].numInBufs = 2;
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].numOutBufs = 1;
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].weightsElementSizeInBits = NUM_WHGT_BITS;
    strcpy((char *)pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outDataNames[0], "tf_faster_rcnn_roi_pooling_layer");
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outConsumerCnt[0] = 1;
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outConsumerLinked[0] = 0;

    strcpy((char *)pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].inDataNames[0], (char *)pOrgTIDLNetStructure.TIDLPCLayers[featureLayerIdx].outDataNames[0]);
    strcpy((char *)pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].inDataNames[1], (char *)pOrgTIDLNetStructure.TIDLPCLayers[detOutLayerIdx].outDataNames[0]);

    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outData[0].numDim       = 1;
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outData[0].dimValues[0] = tidl_TFFasterRCNNConfig.firstStageConfig.max_proposals;
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outData[0].dimValues[1] = (tidl_TFFasterRCNNConfig.initial_crop_size /  tidl_TFFasterRCNNConfig.maxpool_kernel_size);
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outData[0].dimValues[2] = (tidl_TFFasterRCNNConfig.initial_crop_size /  tidl_TFFasterRCNNConfig.maxpool_kernel_size);
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outData[0].dimValues[3] = pOrgTIDLNetStructure.TIDLPCLayers[featureLayerIdx].outData[0].dimValues[TIDL_DIM_NUMCH];
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outData[0].elementType = TIDL_SinglePrecFloat;
    roiPoolingLayerIdx = layerIndex;
    layerIndex++;

    pOrgTIDLNetStructure.TIDLPCLayers[roiPoolingLayerIdx].inData[0] = pOrgTIDLNetStructure.TIDLPCLayers[featureLayerIdx].outData[0];
    pOrgTIDLNetStructure.TIDLPCLayers[roiPoolingLayerIdx].inData[1] = pOrgTIDLNetStructure.TIDLPCLayers[detOutLayerIdx].outData[0];

    pOrgTIDLNetStructure.TIDLPCLayers[roiPoolingLayerIdx].outData[0]= pOrgTIDLNetStructure.TIDLPCLayers[secondStageLayerIdx].outData[0];
    pOrgTIDLNetStructure.TIDLPCLayers[roiPoolingLayerIdx].outConsumerCnt[0] = 2;
    pOrgTIDLNetStructure.TIDLPCLayers[roiPoolingLayerIdx].outConsumerLinked[0] = 2;
    strcpy((char *)pOrgTIDLNetStructure.TIDLPCLayers[roiPoolingLayerIdx].outDataNames[0], (char *)pOrgTIDLNetStructure.TIDLPCLayers[secondStageLayerIdx].outDataNames[0]);

    /* Remove one of the layer which was created because of second part of the network */
    pOrgTIDLNetStructure.TIDLPCLayers[secondStageLayerIdx].numInBufs = -1;
    pOrgTIDLNetStructure.TIDLPCLayers[secondStageLayerIdx].numOutBufs = -1;

    /* Add second stage post processing layer, data id for this layer is assigned later which will be connected to ROI pooling layer */
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].layerType = TIDL_OdPostProcessingLayer;
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].numInBufs = 3;
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].numOutBufs = 1;
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].weightsElementSizeInBits = NUM_WHGT_BITS;
    strcpy((char *)pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outDataNames[0], "tf_faster_rcnn_post_processing_layer");
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outConsumerCnt[0] = 1;
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outConsumerLinked[0] = 0;

    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outData[0].dataId = (*dataIndex)++;
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outData[0].numDim       = 1;
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outData[0].dimValues[0] = 1;
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outData[0].dimValues[1] = 1;
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outData[0].dimValues[2] = 1;
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outData[0].dimValues[3] = 4 * tidl_TFFasterRCNNConfig.secondStageConfig.max_detections_per_class *
                                                                                                                            tidl_TFFasterRCNNConfig.secondStageConfig.max_total_detections;
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outData[0].elementType = TIDL_SinglePrecFloat;
    secondStagePostProcessingLayerIdx = layerIndex;
    layerIndex++;

    odPostProcessingParams =
       &pOrgTIDLNetStructure.TIDLPCLayers[secondStagePostProcessingLayerIdx].layerParams.detectOutParams;
    odPostProcessingParams->processingType = TIDL_processingTypeOdPostProcessing;
    odPostProcessingParams->numClasses    = tidl_TFFasterRCNNConfig.num_classes;
    odPostProcessingParams->topK              = tidl_TFFasterRCNNConfig.secondStageConfig.max_detections_per_class;//:TODO: Check this
    odPostProcessingParams->keepTopK          = tidl_TFFasterRCNNConfig.secondStageConfig.max_total_detections;//:TODO: Check this
    odPostProcessingParams->nmsThreshold      = tidl_TFFasterRCNNConfig.secondStageConfig.nms_iou_threshold;
    odPostProcessingParams->confThreshold     = tidl_TFFasterRCNNConfig.secondStageConfig.nms_score_threshold;
    odPostProcessingParams->backgroundLabelId = 0;
    odPostProcessingParams->codeType          = 0;
    odPostProcessingParams->varianceEncoded   = 0;
    odPostProcessingParams->eta               = 0;
    odPostProcessingParams->numKeypoints      = 0;
    odPostProcessingParams->shareLocation     = 1;
    odPostProcessingParams->imWidth  = params->inWidth[0];
    odPostProcessingParams->imHeight = params->inHeight[0];
    odPostProcessingParams->metaArchType = TIDL_metaArchTFFasterRcnn;
    odPostProcessingParams->numHeads     = 1;

    for (i = 0; i < layerIndex; i++)
    {
      if ((pOrgTIDLNetStructure.TIDLPCLayers[i].layerType == TIDL_DataLayer) && (pOrgTIDLNetStructure.TIDLPCLayers[i].numOutBufs == -1) &&
        (pOrgTIDLNetStructure.TIDLPCLayers[i].numInBufs == 1))
      {
        int32_t dataSize;
        pOrgTIDLNetStructure.TIDLPCLayers[i].numInBufs = -1;
        pOrgTIDLNetStructure.TIDLPCLayers[i].numOutBufs = -1;
        pOrgTIDLNetStructure.TIDLPCLayers[secondStagePostProcessingLayerIdx].inData[dataIdIdx] = pOrgTIDLNetStructure.TIDLPCLayers[i].inData[0];

        strcpy((char *)pOrgTIDLNetStructure.TIDLPCLayers[secondStagePostProcessingLayerIdx].inDataNames[dataIdIdx], (char *)pOrgTIDLNetStructure.TIDLPCLayers[i].inDataNames[0]);

        dataIdIdx++;
      }
    }
    /* Connect detection output layer output to second stage post processing layer*/
    pOrgTIDLNetStructure.TIDLPCLayers[secondStagePostProcessingLayerIdx].inData[dataIdIdx] = pOrgTIDLNetStructure.TIDLPCLayers[detOutLayerIdx].outData[0];
    strcpy((char *)pOrgTIDLNetStructure.TIDLPCLayers[secondStagePostProcessingLayerIdx].inDataNames[dataIdIdx], (char *)pOrgTIDLNetStructure.TIDLPCLayers[detOutLayerIdx].outDataNames[0]);

    sTIDL_RoiPoolingLayerParams_t * roiPoolingParams = &pOrgTIDLNetStructure.TIDLPCLayers[roiPoolingLayerIdx].layerParams.roiPoolingParams;
    roiPoolingParams->poolingType = TIDL_RoiPoolingFasterRCNN;
    roiPoolingParams->imWidth  =params->inWidth[0];
    roiPoolingParams->imHeight = params->inHeight[0];

    float scales[TIDL_MAX_AR_PER_HEAD];
    float aspect_ratios[TIDL_MAX_AR_PER_HEAD];
    int num_ars, min_im_shape;
    float ratio_sqrts;

    for (j = 0; j < tidl_TFFasterRCNNConfig.firstStageConfig.num_aspect_ratios; j++)
    {
      aspect_ratios[j] = tidl_TFFasterRCNNConfig.firstStageConfig.aspect_ratios[j];
    }

    for (j = 0; j < tidl_TFFasterRCNNConfig.firstStageConfig.num_scales; j++)
    {
      scales[j] = tidl_TFFasterRCNNConfig.firstStageConfig.scales[j];
    }

    num_ars = tidl_TFFasterRCNNConfig.firstStageConfig.num_aspect_ratios * tidl_TFFasterRCNNConfig.firstStageConfig.num_scales;

    widthStride =  tidl_TFFasterRCNNConfig.firstStageConfig.width_stride;
    heightStride = tidl_TFFasterRCNNConfig.firstStageConfig.height_stride;

    tidl_TFFasterRCNNConfig.firstStageConfig.width_stride =  widthStride;
    tidl_TFFasterRCNNConfig.firstStageConfig.height_stride = heightStride;

    anchorBoxParams[0].numAnchors = num_ars;
    anchorBoxParams[0].numKeyPoints =0;
    anchorBoxParams[0].strideW = tidl_TFFasterRCNNConfig.firstStageConfig.width_stride;
    anchorBoxParams[0].strideH = tidl_TFFasterRCNNConfig.firstStageConfig.height_stride;
    anchorBoxParams[0].offsetW =tidl_TFFasterRCNNConfig.firstStageConfig.width_offset;
    anchorBoxParams[0].offsetH = tidl_TFFasterRCNNConfig.firstStageConfig.height_offset;

    anchorBoxParams[0].boxScales[0] = 10.0;//tidl_TFSSDConfig.y_scale;
    anchorBoxParams[0].boxScales[1] = 10.0;//tidl_TFSSDConfig.x_scale;
    anchorBoxParams[0].boxScales[2] = 5.0;//tidl_TFSSDConfig.height_scale;
    anchorBoxParams[0].boxScales[3] = 5.0;//tidl_TFSSDConfig.width_scale;

    /* Need to fill
    anchorBoxParams[i].kpScales[0] = 0;
    */
    for ( j = 0; j < tidl_TFFasterRCNNConfig.firstStageConfig.num_aspect_ratios ; j++)
    {
      ratio_sqrts = sqrt(aspect_ratios[j]);
      for (i = 0; i < tidl_TFFasterRCNNConfig.firstStageConfig.num_scales; i++)
      {
        anchorBoxParams[0].boxHeight[i + (j * tidl_TFFasterRCNNConfig.firstStageConfig.num_scales)] = (scales[i] / ratio_sqrts) * tidl_TFFasterRCNNConfig.firstStageConfig.height;
        anchorBoxParams[0].boxWidth[i + (j * tidl_TFFasterRCNNConfig.firstStageConfig.num_scales)]  = scales[i] * ratio_sqrts * tidl_TFFasterRCNNConfig.firstStageConfig.width;
      }
    }
    *numLayers = layerIndex;

  return 0;
}

extern float *tidlPriorPtrs[TIDL_MAX_TF_SSD_LAYERS];
extern sTIDL_DetectOutputParams_t tidl_OdPostProc[TIDL_MAX_TF_SSD_LAYERS];
extern uint8_t tidl_OdBoxInputNames[TIDL_MAX_TF_SSD_LAYERS][TIDL_MAX_ALG_OUT_BUFS][TIDL_STRING_SIZE];
extern uint8_t tidl_OdClassInputNames[TIDL_MAX_TF_SSD_LAYERS][TIDL_MAX_ALG_OUT_BUFS][TIDL_STRING_SIZE];
extern uint8_t tidl_OdOutputNames[TIDL_MAX_TF_SSD_LAYERS][TIDL_STRING_SIZE];


int32_t tidl_addTidlSSDPostProc(sTIDL_OrgNetwork_t  &pOrgTIDLNetStructure, int32_t * numLayers, int32_t * dataIndex, tidl_import_config * params)
{

  int32_t i, j;
  int32_t idx;
  int32_t normParamIdx = 0;
  int32_t numHeads = 6;
  int32_t hidx = 0;
  int32_t keep_top_k = 40;
  int32_t numOutDataPerObject;
  int32_t boxCatLayerIdx = 0;
  int32_t classCatLayerIdx = 0;
  int32_t detOutLayerIdx = 0;
  int32_t layerIndex = *numLayers;

  numHeads = tidl_OdPostProc[0].numHeads;
  pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].layerType  = TIDL_ConcatLayer;
  pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].numInBufs  = numHeads;
  pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].numOutBufs = 1;
  pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].weightsElementSizeInBits = NUM_WHGT_BITS;
  strcpy((char *)pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outDataNames[0], "tidl_ssd_box_conat_layer");
  pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outConsumerCnt[0] = 1;
  pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outConsumerLinked[0] = 1;
  pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].layerParams.concatParams.axis = TIDL_DIM_WIDTH;
  boxCatLayerIdx = layerIndex;
  layerIndex++;

  pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].layerType = TIDL_ConcatLayer;
  pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].numInBufs = numHeads;
  pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].numOutBufs = 1;
  pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].weightsElementSizeInBits = NUM_WHGT_BITS;
  strcpy((char *)pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outDataNames[0], "tidl_ssd_class_conat_layer");
  pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outConsumerCnt[0] = 1;
  pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outConsumerLinked[0] = 1;
  pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].layerParams.concatParams.axis = TIDL_DIM_WIDTH;
  classCatLayerIdx = layerIndex;
  layerIndex++;

  pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].layerType = TIDL_DetectionOutputLayer;
  pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].numInBufs = 2;
  pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].numOutBufs = 1;
  pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].weightsElementSizeInBits = NUM_WHGT_BITS;
  strcpy((char *)pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outDataNames[0], "tidl_ssd_detection_output_layer");
  pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outConsumerCnt[0] = 1;
  pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outConsumerLinked[0] = 0;

  strcpy((char *)pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].inDataNames[0], (char *)pOrgTIDLNetStructure.TIDLPCLayers[layerIndex - 2].outDataNames[0]);
  strcpy((char *)pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].inDataNames[1], (char *)pOrgTIDLNetStructure.TIDLPCLayers[layerIndex - 1].outDataNames[0]);

   sTIDL_DetectOutputParams_t * doParams =
    &pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].layerParams.detectOutParams;
  *doParams =  tidl_OdPostProc[0];
  doParams->numHeads     = numHeads;

  numOutDataPerObject = 7 + pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].layerParams.detectOutParams.numKeypoints * 2;
  keep_top_k = pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].layerParams.detectOutParams.keepTopK;

  pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outData[0].numDim       = 1;
  pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outData[0].dimValues[0] = 1;
  pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outData[0].dimValues[1] = 1;
  pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outData[0].dimValues[2] = 1;
  pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outData[0].dimValues[3] = 4 + keep_top_k*numOutDataPerObject;
  pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outData[0].elementType = TIDL_SinglePrecFloat;
  detOutLayerIdx = layerIndex;
  layerIndex++;



  orgTIDLNetStructure.TIDLPCLayers[detOutLayerIdx].priorBox.ptr    = (void*)tidlPriorPtrs[0];
  orgTIDLNetStructure.TIDLPCLayers[detOutLayerIdx].priorBox.bufSize = doParams->priorBoxSize;
  for (i = 0; i < layerIndex; i++)
  {
    if ((pOrgTIDLNetStructure.TIDLPCLayers[i].layerType == TIDL_DataLayer) && (pOrgTIDLNetStructure.TIDLPCLayers[i].numInBufs ==1)
        && (strcmp((const char *)pOrgTIDLNetStructure.TIDLPCLayers[i].outDataNames[0],(const char *)tidl_OdBoxInputNames[0][hidx]) == 0))
    {
      int32_t dataSize;
      pOrgTIDLNetStructure.TIDLPCLayers[i].layerType  = TIDL_FlattenLayer;
      pOrgTIDLNetStructure.TIDLPCLayers[i].numInBufs  = 1;
      pOrgTIDLNetStructure.TIDLPCLayers[i].numOutBufs = 1;
      pOrgTIDLNetStructure.TIDLPCLayers[i].outData[0] = pOrgTIDLNetStructure.TIDLPCLayers[i].inData[0];
      pOrgTIDLNetStructure.TIDLPCLayers[i].outData[0].dataId = (*dataIndex)++;
      strcpy((char *)pOrgTIDLNetStructure.TIDLPCLayers[i].outDataNames[0], (char *)pOrgTIDLNetStructure.TIDLPCLayers[i].inDataNames[0]);
      strcat((char *)pOrgTIDLNetStructure.TIDLPCLayers[i].outDataNames[0], "_flatten");

      pOrgTIDLNetStructure.TIDLPCLayers[i].outData[0].dimValues[0] = 1;
      pOrgTIDLNetStructure.TIDLPCLayers[i].outData[0].dimValues[1] = 1;
      pOrgTIDLNetStructure.TIDLPCLayers[i].outData[0].dimValues[2] = 1;
      pOrgTIDLNetStructure.TIDLPCLayers[i].outData[0].dimValues[3] = pOrgTIDLNetStructure.TIDLPCLayers[i].inData[0].dimValues[0] *
                                                                      pOrgTIDLNetStructure.TIDLPCLayers[i].inData[0].dimValues[1] *
                                                                      pOrgTIDLNetStructure.TIDLPCLayers[i].inData[0].dimValues[2] *
                                                                      pOrgTIDLNetStructure.TIDLPCLayers[i].inData[0].dimValues[3];
      pOrgTIDLNetStructure.TIDLPCLayers[boxCatLayerIdx].inData[hidx] = pOrgTIDLNetStructure.TIDLPCLayers[i].outData[0];
      strcpy((char *)pOrgTIDLNetStructure.TIDLPCLayers[boxCatLayerIdx].inDataNames[hidx], (char *)pOrgTIDLNetStructure.TIDLPCLayers[i].outDataNames[0]);
      hidx++;
    }
  }
  hidx = 0;
  for (i = 0; i < layerIndex; i++)
  {
    if ((pOrgTIDLNetStructure.TIDLPCLayers[i].layerType == TIDL_DataLayer) && (pOrgTIDLNetStructure.TIDLPCLayers[i].numInBufs == 1)
      && (strcmp((const char *)pOrgTIDLNetStructure.TIDLPCLayers[i].outDataNames[0], (const char *)tidl_OdClassInputNames[0][hidx]) == 0))
    {
      int32_t dataSize;
      pOrgTIDLNetStructure.TIDLPCLayers[i].layerType = TIDL_FlattenLayer;
      pOrgTIDLNetStructure.TIDLPCLayers[i].numInBufs = 1;
      pOrgTIDLNetStructure.TIDLPCLayers[i].numOutBufs = 1;
      pOrgTIDLNetStructure.TIDLPCLayers[i].outData[0] = pOrgTIDLNetStructure.TIDLPCLayers[i].inData[0];
      pOrgTIDLNetStructure.TIDLPCLayers[i].outData[0].dataId = (*dataIndex)++;
      strcpy((char *)pOrgTIDLNetStructure.TIDLPCLayers[i].outDataNames[0], (char *)pOrgTIDLNetStructure.TIDLPCLayers[i].inDataNames[0]);
      strcat((char *)pOrgTIDLNetStructure.TIDLPCLayers[i].outDataNames[0], "_flatten");

      pOrgTIDLNetStructure.TIDLPCLayers[i].outData[0].dimValues[0] = 1;
      pOrgTIDLNetStructure.TIDLPCLayers[i].outData[0].dimValues[1] = 1;
      pOrgTIDLNetStructure.TIDLPCLayers[i].outData[0].dimValues[2] = 1;
      pOrgTIDLNetStructure.TIDLPCLayers[i].outData[0].dimValues[3] = pOrgTIDLNetStructure.TIDLPCLayers[i].inData[0].dimValues[0] *
        pOrgTIDLNetStructure.TIDLPCLayers[i].inData[0].dimValues[1] *
        pOrgTIDLNetStructure.TIDLPCLayers[i].inData[0].dimValues[2] *
        pOrgTIDLNetStructure.TIDLPCLayers[i].inData[0].dimValues[3];
      pOrgTIDLNetStructure.TIDLPCLayers[classCatLayerIdx].inData[hidx] = pOrgTIDLNetStructure.TIDLPCLayers[i].outData[0];
      strcpy((char *)pOrgTIDLNetStructure.TIDLPCLayers[classCatLayerIdx].inDataNames[hidx], (char *)pOrgTIDLNetStructure.TIDLPCLayers[i].outDataNames[0]);
      hidx++;
    }
  }
  pOrgTIDLNetStructure.TIDLPCLayers[boxCatLayerIdx].outData[0]    = pOrgTIDLNetStructure.TIDLPCLayers[boxCatLayerIdx].inData[0];
  pOrgTIDLNetStructure.TIDLPCLayers[classCatLayerIdx].outData[0]  = pOrgTIDLNetStructure.TIDLPCLayers[classCatLayerIdx].inData[0];
  for (i = 1; i < numHeads; i++)
  {
    pOrgTIDLNetStructure.TIDLPCLayers[boxCatLayerIdx].outData[0].dimValues[3]   += pOrgTIDLNetStructure.TIDLPCLayers[boxCatLayerIdx].inData[i].dimValues[3];
    pOrgTIDLNetStructure.TIDLPCLayers[classCatLayerIdx].outData[0].dimValues[3] += pOrgTIDLNetStructure.TIDLPCLayers[classCatLayerIdx].inData[i].dimValues[3];
  }
  pOrgTIDLNetStructure.TIDLPCLayers[boxCatLayerIdx].outData[0].dataId = (*dataIndex)++;
  pOrgTIDLNetStructure.TIDLPCLayers[classCatLayerIdx].outData[0].dataId = (*dataIndex)++;

  pOrgTIDLNetStructure.TIDLPCLayers[detOutLayerIdx].inData[0] = pOrgTIDLNetStructure.TIDLPCLayers[boxCatLayerIdx].outData[0];
  pOrgTIDLNetStructure.TIDLPCLayers[detOutLayerIdx].inData[1] = pOrgTIDLNetStructure.TIDLPCLayers[classCatLayerIdx].outData[0];
  pOrgTIDLNetStructure.TIDLPCLayers[detOutLayerIdx].outData[0].dataId = (*dataIndex)++;

  *numLayers = layerIndex;
  return 0;
}

int32_t tidl_addMetaArchLayersTONet(sTIDL_OrgNetwork_t  &pOrgTIDLNetStructure, int32_t layerIndex, int32_t * dataIndex, tidl_import_config * params)
{
  int32_t i, j;
  int32_t idx;
  int32_t normParamIdx = 0;
  int32_t numHeads = 6;
  int32_t hidx = 0;
  int32_t keep_top_k = 40;
  int32_t numOutDataPerObject;
  int32_t boxCatLayerIdx = 0;
  int32_t classCatLayerIdx = 0;
  int32_t detOutLayerIdx = 0;


  if (params->metaArchType == TIDL_metaArchTFSSD)
  {
    if (numTFMetaLayers < 1)
    {
      printf("Could not find Meta Arch confg file \n");
      return -1;
    }
    TIDL_TFSSDConfig_t &tidl_TFSSDConfig = tidl_TFSSDConfigs[0];
    numHeads = tidl_TFSSDConfig.num_layers;
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].layerType  = TIDL_ConcatLayer;
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].numInBufs  = numHeads;
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].numOutBufs = 1;
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].weightsElementSizeInBits = NUM_WHGT_BITS;
    strcpy((char *)pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outDataNames[0], "tf_ssd_box_conat_layer");
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outConsumerCnt[0] = 1;
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outConsumerLinked[0] = 1;
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].layerParams.concatParams.axis = TIDL_DIM_WIDTH;
    boxCatLayerIdx = layerIndex;
    layerIndex++;

    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].layerType = TIDL_ConcatLayer;
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].numInBufs = numHeads;
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].numOutBufs = 1;
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].weightsElementSizeInBits = NUM_WHGT_BITS;
    strcpy((char *)pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outDataNames[0], "tf_ssd_class_conat_layer");
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outConsumerCnt[0] = 1;
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outConsumerLinked[0] = 1;
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].layerParams.concatParams.axis = TIDL_DIM_WIDTH;
    classCatLayerIdx = layerIndex;
    layerIndex++;

    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].layerType = TIDL_DetectionOutputLayer;
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].numInBufs = 2;
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].numOutBufs = 1;
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].weightsElementSizeInBits = NUM_WHGT_BITS;
    strcpy((char *)pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outDataNames[0], "tf_ssd_detection_output_layer");
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outConsumerCnt[0] = 1;
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outConsumerLinked[0] = 0;

    strcpy((char *)pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].inDataNames[0], (char *)pOrgTIDLNetStructure.TIDLPCLayers[layerIndex - 2].outDataNames[0]);
    strcpy((char *)pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].inDataNames[1], (char *)pOrgTIDLNetStructure.TIDLPCLayers[layerIndex - 1].outDataNames[0]);

    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].layerParams.detectOutParams.numKeypoints = 0;
    numOutDataPerObject = 7 + pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].layerParams.detectOutParams.numKeypoints * 2;

    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outData[0].numDim       = 1;
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outData[0].dimValues[0] = 1;
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outData[0].dimValues[1] = 1;
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outData[0].dimValues[2] = 1;
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outData[0].dimValues[3] = 4 + keep_top_k*numOutDataPerObject;
    pOrgTIDLNetStructure.TIDLPCLayers[layerIndex].outData[0].elementType = TIDL_SinglePrecFloat;
    detOutLayerIdx = layerIndex;
    layerIndex++;

    sTIDL_DetectOutputParams_t * doParams =
      &pOrgTIDLNetStructure.TIDLPCLayers[detOutLayerIdx].layerParams.detectOutParams;

    doParams->numClasses        = tidl_TFSSDConfig.num_classes;
    doParams->topK              = tidl_TFSSDConfig.max_detections_per_class;
    doParams->keepTopK          = tidl_TFSSDConfig.max_total_detections;
    doParams->nmsThreshold      = tidl_TFSSDConfig.iou_threshold;
    doParams->confThreshold     = tidl_TFSSDConfig.score_threshold;
    doParams->backgroundLabelId = 0;
    doParams->codeType          = 0;
    doParams->varianceEncoded   = 0;
    doParams->eta               = 0;
    doParams->numKeypoints      = 0;
    doParams->shareLocation     = 1;
    doParams->imWidth  = params->inWidth[0];
    doParams->imHeight = params->inHeight[0];
    doParams->metaArchType = TIDL_metaArchTFSSD;
    doParams->numHeads     = numHeads;

    orgTIDLNetStructure.TIDLPCLayers[detOutLayerIdx].priorBox.ptr    = (void*)malloc(sizeof(sTIDL_AnchorBoxParams_t)*numHeads);
    orgTIDLNetStructure.TIDLPCLayers[detOutLayerIdx].priorBox.bufSize = (sizeof(sTIDL_AnchorBoxParams_t)*numHeads)/sizeof(float);
    sTIDL_AnchorBoxParams_t *anchorBoxParams = (sTIDL_AnchorBoxParams_t*) orgTIDLNetStructure.TIDLPCLayers[detOutLayerIdx].priorBox.ptr;
    for (i = 0; i < layerIndex; i++)
    {
      if ((pOrgTIDLNetStructure.TIDLPCLayers[i].layerType == TIDL_DataLayer) && (pOrgTIDLNetStructure.TIDLPCLayers[i].numInBufs ==1)
         && (strcmp((const char *)pOrgTIDLNetStructure.TIDLPCLayers[i].outDataNames[0],(const char *)outDataNames[2*hidx]) == 0))
      {
        int32_t dataSize;
        pOrgTIDLNetStructure.TIDLPCLayers[i].layerType  = TIDL_FlattenLayer;
        pOrgTIDLNetStructure.TIDLPCLayers[i].numInBufs  = 1;
        pOrgTIDLNetStructure.TIDLPCLayers[i].numOutBufs = 1;
        pOrgTIDLNetStructure.TIDLPCLayers[i].outData[0] = pOrgTIDLNetStructure.TIDLPCLayers[i].inData[0];
        pOrgTIDLNetStructure.TIDLPCLayers[i].outData[0].dataId = (*dataIndex)++;
        strcpy((char *)pOrgTIDLNetStructure.TIDLPCLayers[i].outDataNames[0], (char *)pOrgTIDLNetStructure.TIDLPCLayers[i].inDataNames[0]);
        strcat((char *)pOrgTIDLNetStructure.TIDLPCLayers[i].outDataNames[0], "_flatten");

        pOrgTIDLNetStructure.TIDLPCLayers[i].outData[0].dimValues[0] = 1;
        pOrgTIDLNetStructure.TIDLPCLayers[i].outData[0].dimValues[1] = 1;
        pOrgTIDLNetStructure.TIDLPCLayers[i].outData[0].dimValues[2] = 1;
        pOrgTIDLNetStructure.TIDLPCLayers[i].outData[0].dimValues[3] = pOrgTIDLNetStructure.TIDLPCLayers[i].inData[0].dimValues[0] *
                                                                       pOrgTIDLNetStructure.TIDLPCLayers[i].inData[0].dimValues[1] *
                                                                       pOrgTIDLNetStructure.TIDLPCLayers[i].inData[0].dimValues[2] *
                                                                       pOrgTIDLNetStructure.TIDLPCLayers[i].inData[0].dimValues[3];
        pOrgTIDLNetStructure.TIDLPCLayers[boxCatLayerIdx].inData[hidx] = pOrgTIDLNetStructure.TIDLPCLayers[i].outData[0];
        strcpy((char *)pOrgTIDLNetStructure.TIDLPCLayers[boxCatLayerIdx].inDataNames[hidx], (char *)pOrgTIDLNetStructure.TIDLPCLayers[i].outDataNames[0]);
        anchorBoxParams[hidx].headWidth  = pOrgTIDLNetStructure.TIDLPCLayers[i].inData[0].dimValues[3];
        anchorBoxParams[hidx].headHeight = pOrgTIDLNetStructure.TIDLPCLayers[i].inData[0].dimValues[2];
        hidx++;
      }
    }
    hidx = 0;
    for (i = 0; i < layerIndex; i++)
    {
      if ((pOrgTIDLNetStructure.TIDLPCLayers[i].layerType == TIDL_DataLayer) && (pOrgTIDLNetStructure.TIDLPCLayers[i].numInBufs == 1)
        && (strcmp((const char *)pOrgTIDLNetStructure.TIDLPCLayers[i].outDataNames[0], (const char *)outDataNames[2*hidx+1]) == 0))
      {
        int32_t dataSize;
        pOrgTIDLNetStructure.TIDLPCLayers[i].layerType = TIDL_FlattenLayer;
        pOrgTIDLNetStructure.TIDLPCLayers[i].numInBufs = 1;
        pOrgTIDLNetStructure.TIDLPCLayers[i].numOutBufs = 1;
        pOrgTIDLNetStructure.TIDLPCLayers[i].outData[0] = pOrgTIDLNetStructure.TIDLPCLayers[i].inData[0];
        pOrgTIDLNetStructure.TIDLPCLayers[i].outData[0].dataId = (*dataIndex)++;
        strcpy((char *)pOrgTIDLNetStructure.TIDLPCLayers[i].outDataNames[0], (char *)pOrgTIDLNetStructure.TIDLPCLayers[i].inDataNames[0]);
        strcat((char *)pOrgTIDLNetStructure.TIDLPCLayers[i].outDataNames[0], "_flatten");

        pOrgTIDLNetStructure.TIDLPCLayers[i].outData[0].dimValues[0] = 1;
        pOrgTIDLNetStructure.TIDLPCLayers[i].outData[0].dimValues[1] = 1;
        pOrgTIDLNetStructure.TIDLPCLayers[i].outData[0].dimValues[2] = 1;
        pOrgTIDLNetStructure.TIDLPCLayers[i].outData[0].dimValues[3] = pOrgTIDLNetStructure.TIDLPCLayers[i].inData[0].dimValues[0] *
          pOrgTIDLNetStructure.TIDLPCLayers[i].inData[0].dimValues[1] *
          pOrgTIDLNetStructure.TIDLPCLayers[i].inData[0].dimValues[2] *
          pOrgTIDLNetStructure.TIDLPCLayers[i].inData[0].dimValues[2];
        pOrgTIDLNetStructure.TIDLPCLayers[classCatLayerIdx].inData[hidx] = pOrgTIDLNetStructure.TIDLPCLayers[i].outData[0];
        strcpy((char *)pOrgTIDLNetStructure.TIDLPCLayers[classCatLayerIdx].inDataNames[hidx], (char *)pOrgTIDLNetStructure.TIDLPCLayers[i].outDataNames[0]);
        hidx++;
      }
    }
    pOrgTIDLNetStructure.TIDLPCLayers[boxCatLayerIdx].outData[0]    = pOrgTIDLNetStructure.TIDLPCLayers[boxCatLayerIdx].inData[0];
    pOrgTIDLNetStructure.TIDLPCLayers[classCatLayerIdx].outData[0]  = pOrgTIDLNetStructure.TIDLPCLayers[classCatLayerIdx].inData[0];
    for (i = 1; i < numHeads; i++)
    {
      pOrgTIDLNetStructure.TIDLPCLayers[boxCatLayerIdx].outData[0].dimValues[3]   += pOrgTIDLNetStructure.TIDLPCLayers[boxCatLayerIdx].inData[i].dimValues[3];
      pOrgTIDLNetStructure.TIDLPCLayers[classCatLayerIdx].outData[0].dimValues[3] += pOrgTIDLNetStructure.TIDLPCLayers[classCatLayerIdx].inData[i].dimValues[3];
    }
    pOrgTIDLNetStructure.TIDLPCLayers[boxCatLayerIdx].outData[0].dataId = (*dataIndex)++;
    pOrgTIDLNetStructure.TIDLPCLayers[classCatLayerIdx].outData[0].dataId = (*dataIndex)++;

    pOrgTIDLNetStructure.TIDLPCLayers[detOutLayerIdx].inData[0] = pOrgTIDLNetStructure.TIDLPCLayers[boxCatLayerIdx].outData[0];
    pOrgTIDLNetStructure.TIDLPCLayers[detOutLayerIdx].inData[1] = pOrgTIDLNetStructure.TIDLPCLayers[classCatLayerIdx].outData[0];
    pOrgTIDLNetStructure.TIDLPCLayers[detOutLayerIdx].outData[0].dataId = (*dataIndex)++;


    float scales[TIDL_MAX_AR_PER_HEAD];
    float aspect_ratios[TIDL_MAX_AR_PER_HEAD];
    int num_ars, min_im_shape;
    float ratio_sqrts;

    min_im_shape = doParams->imHeight < doParams->imWidth ? doParams->imHeight : doParams->imWidth;
    tidl_TFSSDConfig.base_anchor_height = (min_im_shape / doParams->imHeight) * tidl_TFSSDConfig.base_anchor_height;
    tidl_TFSSDConfig.base_anchor_width = (min_im_shape / doParams->imWidth) * tidl_TFSSDConfig.base_anchor_width;

    for (i = 0; i < numHeads; i++)
    {
      if ((i == 0) && (tidl_TFSSDConfig.reduce_boxes_in_lowest_layer))
      {
        num_ars = 3;
        scales[0] = 0.1;
        aspect_ratios[0] = 1.0;
        scales[1] = tidl_TFSSDConfig.scales[i];
        aspect_ratios[1] = 2.0;
        scales[2] = tidl_TFSSDConfig.scales[i];
        aspect_ratios[2] = 0.5;
      }
      else
      {
        for (j = 0; j < tidl_TFSSDConfig.num_aspect_ratios; j++)
        {
          scales[j] = tidl_TFSSDConfig.scales[i];
          aspect_ratios[j] = tidl_TFSSDConfig.aspect_ratios[i];
        }
        num_ars = tidl_TFSSDConfig.num_aspect_ratios;
        if (tidl_TFSSDConfig.interpolated_scale_aspect_ratio > 0.0)
        {
          scales[num_ars] = sqrt(tidl_TFSSDConfig.scales[i] * tidl_TFSSDConfig.scales[i + 1]);
          aspect_ratios[num_ars] = tidl_TFSSDConfig.interpolated_scale_aspect_ratio;
          num_ars += 1;
        }
      }
      tidl_TFSSDConfig.width_stride[i] = 1.0 / anchorBoxParams[i].headWidth;
      tidl_TFSSDConfig.height_stride[i] = 1.0 / anchorBoxParams[i].headHeight;
      tidl_TFSSDConfig.width_offset[i] = 0.5 * tidl_TFSSDConfig.width_stride[i];
      tidl_TFSSDConfig.height_offset[i] = 0.5 * tidl_TFSSDConfig.height_stride[i];

      anchorBoxParams[i].numAnchors = num_ars;
      anchorBoxParams[i].numKeyPoints = tidl_TFSSDConfig.num_keypoints;
      anchorBoxParams[i].strideW = tidl_TFSSDConfig.width_stride[i]  ;
      anchorBoxParams[i].strideH = tidl_TFSSDConfig.height_stride[i] ;
      anchorBoxParams[i].offsetW = tidl_TFSSDConfig.width_offset[i]  ;
      anchorBoxParams[i].offsetH = tidl_TFSSDConfig.height_offset[i] ;

      anchorBoxParams[i].boxScales[0] = tidl_TFSSDConfig.y_scale;
      anchorBoxParams[i].boxScales[1] = tidl_TFSSDConfig.x_scale;
      anchorBoxParams[i].boxScales[2] = tidl_TFSSDConfig.height_scale;
      anchorBoxParams[i].boxScales[3] = tidl_TFSSDConfig.width_scale;

      /* Need to fill
      anchorBoxParams[i].kpScales[0] = 0;
      */
      for (j = 0; j < num_ars; j++)
      {
        ratio_sqrts = sqrt(aspect_ratios[j]);
        anchorBoxParams[i].boxHeight[j] = scales[j] / ratio_sqrts * tidl_TFSSDConfig.base_anchor_height;
        anchorBoxParams[i].boxWidth[j]  = scales[j] * ratio_sqrts * tidl_TFSSDConfig.base_anchor_width;
      }
    }
  }
  else if ( params->metaArchType == TIDL_metaArchTFFasterRcnn )
  {
    tidl_addFasterRCNNLayersToNet(pOrgTIDLNetStructure, &layerIndex, dataIndex, params);
  }
  else if ( params->metaArchType == TIDL_metaArchTidlSsd )
  {
    tidl_addTidlSSDPostProc(pOrgTIDLNetStructure, &layerIndex, dataIndex, params);
  }
  pOrgTIDLNetStructure.numLayers = layerIndex;
  return 0;
}


int32_t tidl_fillInDataLayerShape(sTIDL_OrgNetwork_t  &pOrgTIDLNetStructure, tidl_import_config * params, int32_t layerIndex)
{
  int32_t i, j, inDataIdx;
  int overWritefirstNode = 1;
  if ((params->inWidth[0] == -1) || (params->inHeight[0] == -1) || (params->inNumChannels[0] == -1) )
  {
    overWritefirstNode = 0;
  }
  inDataIdx = 0;
  for (i = 0; i < layerIndex; i++)
  {
    if ((orgTIDLNetStructure.TIDLPCLayers[i].layerType == TIDL_DataLayer) && (orgTIDLNetStructure.TIDLPCLayers[i].numOutBufs > 0))
    {
      orgTIDLNetStructure.TIDLPCLayers[i].outData[0].dimValues[0] = 1;
      if (overWritefirstNode)
      {
        orgTIDLNetStructure.TIDLPCLayers[i].outData[0].dimValues[0] = params->numRoi[inDataIdx];
        orgTIDLNetStructure.TIDLPCLayers[i].outData[0].dimValues[1] = params->inNumChannels[inDataIdx];
        orgTIDLNetStructure.TIDLPCLayers[i].outData[0].dimValues[2] = params->inHeight[inDataIdx];
        orgTIDLNetStructure.TIDLPCLayers[i].outData[0].dimValues[3] = params->inWidth[inDataIdx];
      }
      orgTIDLNetStructure.TIDLPCLayers[i].outData[0].elementType = tidl_getElementType(params->inElementType[inDataIdx]);
      orgTIDLNetStructure.TIDLPCLayers[i].outData[0].maxTensorValue = 255;
      orgTIDLNetStructure.TIDLPCLayers[i].outData[0].minTensorValue = 0;
      orgTIDLNetStructure.TIDLPCLayers[i].outData[0].tensorScale = params->inQuantFactor[inDataIdx];
      inDataIdx++;
    }
  }
  return 0;
}

#if QUANT_MODIFICATION


void TIDL_dynamicThresholdOneParam(float * data, uint32_t dataSize, uint32_t No, uint32_t kerSize, int32_t tSpacing, float percent_gmin , uint32_t i)
{
  float gmax, gmin, orgGmax;
  int32_t j, k;
  float *  chMax = (float *)malloc(sizeof(float)*No);
  int32_t *  chHist = (int32_t *)malloc(sizeof(float)*HIST_SIZE);
  gmax = -FLT_MAX;
  gmin = FLT_MAX;
  float threshold;
  int32_t countplus = 0;
  int32_t countminus = 0;
  int32_t tcount = 0;

  //STORING PARAMS
  for (j = 0; j < No; j++)
  {
    float max = -FLT_MAX;
    float min = FLT_MAX;
    for (k = 0; k < kerSize; k++)
    {
      max = (max < data[j*kerSize + k]) ? data[j*kerSize + k] : max;
      min = (min > data[j*kerSize + k]) ? data[j*kerSize + k] : min;
    }
    max = fabs(max);
    min = fabs(min);
    max = max > min ? max : min;
    gmax = gmax > max ? gmax : max;
    gmin = gmin < max ? gmin : max;
    chMax[j] = max;
  }
  orgGmax = gmax;
  if (gmax / gmin > tSpacing)
  {
    /*Recalculating gmin: Frequency Based*/
    float x;
    TIDL_computeHist(chMax, No, chHist, &x);
    //Running sum:
    float acc = 0;

    for (int a1 = 0; a1 < HIST_SIZE; a1++)
    {
      acc += (float)chHist[a1];
      chHist[a1] += (int32_t)acc;
    }
    acc = (acc * percent_gmin)/100; //PERCENTILE
    float a2 = 0;
    int32_t binno = 0;
    while (a2 <= acc)
    {
      a2 = chHist[binno];
      binno++;
    }
    gmin = (binno + 1)*(gmax / HIST_SIZE);
    /*END OF GMIN CALC*/
  }
  if ((gmax / gmin > tSpacing)) //Thresholding condition  #CHECK CONDITION
  {

    threshold = gmin * tSpacing;
    for (j = 0; j < No; j++)
    {
      for (k = 0; k < kerSize; k++)
      {
        if (data[j*kerSize + k] > 0)
        {
          if (data[j*kerSize + k] > threshold)
          {
            countplus++;
            data[j*kerSize + k] = threshold;
          }
        }
        else
        {
          if (data[j*kerSize + k] < (-1 * threshold))
          {
            countminus++;
            data[j*kerSize + k] = -1 * threshold;
          }
        }
        tcount++;
      }
    }
  }
  gmax = -FLT_MAX;
  for (j = 0; j < No; j++)
  {
    float max = -FLT_MAX;
    float min = FLT_MAX;
    for (k = 0; k < kerSize; k++)
    {
      max = (max < data[j*kerSize + k]) ? data[j*kerSize + k] : max;
      min = (min > data[j*kerSize + k]) ? data[j*kerSize + k] : min;
    }
    max = fabs(max);
    min = fabs(min);
    max = max > min ? max : min;
    gmax = gmax > max ? gmax : max;
  }
  if (orgGmax != gmax)
  {
    printf("LyrIdx = %6d, GMIN = %10.7f ORG GMAX = %10.7f,  GMAX = %10.7f  && %10.6f && Squash count = %8d %8d && TPAM = %8d Perc = %10.5f @@THRESHOLD = %10.7f\n", i, gmin, orgGmax, gmax, orgGmax / gmin, countplus, countminus, kerSize*No, (100.0*(countplus+countminus)) / (kerSize*No), threshold);
  }
  free(chMax);
  free(chHist);

}

void TIDL_thresholdParams(sTIDL_OrgNetwork_t  &pOrgTIDLNetStructure, int32_t layerIndex)
{
  int32_t i, j, k;
  int32_t Kh, Kw;
  int32_t Ni, No, Ng;
  FILE* FP;
  int32_t dataSize;
  for (i = 0; i < layerIndex; i++)
  {
    int32_t weightsElementSizeInBits;
    if ((orgTIDLNetStructure.TIDLPCLayers[i].layerType == TIDL_ConvolutionLayer) ||
      (orgTIDLNetStructure.TIDLPCLayers[i].layerType == TIDL_InnerProductLayer) ||
      (orgTIDLNetStructure.TIDLPCLayers[i].layerType == TIDL_BatchNormLayer))
    {
      weightsElementSizeInBits = orgTIDLNetStructure.TIDLPCLayers[i].weightsElementSizeInBits;
      //printf("QLAYER%d && BITSIZE = %d\n",i,weightsElementSizeInBits);
      if (orgTIDLNetStructure.TIDLPCLayers[i].layerType == TIDL_ConvolutionLayer && weightsElementSizeInBits <= 8)
      {
        Kh = orgTIDLNetStructure.TIDLPCLayers[i].layerParams.convParams.kernelH;
        Kw = orgTIDLNetStructure.TIDLPCLayers[i].layerParams.convParams.kernelW;
        Ni = orgTIDLNetStructure.TIDLPCLayers[i].layerParams.convParams.numInChannels;
        No = orgTIDLNetStructure.TIDLPCLayers[i].layerParams.convParams.numOutChannels;
        Ng = orgTIDLNetStructure.TIDLPCLayers[i].layerParams.convParams.numGroups;
        if(Ng == Ni)
        {
          float *  data = (float *)orgTIDLNetStructure.TIDLPCLayers[i].weights.ptr;
          dataSize = orgTIDLNetStructure.TIDLPCLayers[i].weights.bufSize;
          TIDL_dynamicThresholdOneParam(data, dataSize, No, (Kh*Kw*Ni / Ng), 32, 5, i);
          data = (float *)orgTIDLNetStructure.TIDLPCLayers[i].bias.ptr;
          dataSize = orgTIDLNetStructure.TIDLPCLayers[i].bias.bufSize;
          if(dataSize)
          {
            //TIDL_dynamicThresholdOneParam(data, dataSize, No, 1, 32, 50, i+1000);
          }
        }
      }
    }
  }
}

#endif

void TIDL_transpose2DMatrix(float * dstPtr, float * srcPtr, int32_t kw, int32_t kh)
{
  int32_t i,j;
  /* Horizontal Fliep */
  for(j = 0; j < kh; j++)
  {
    for(i = 0; i < kw; i++)
    {
      dstPtr[j*kw + i] = srcPtr[(j*kw-1)-i];
    }
  }
  memcpy(srcPtr,dstPtr,kw*kh*sizeof(float));
  /* Vertical Flipe */
  for(j = 0; j < kh; j++)
  {
    for(i = 0; i < kw; i++)
    {
      dstPtr[j*kw + i] = srcPtr[(kh-j-1)*kw + i];
    }
  }

}

void TIDL_convertDeconv2DtoConv(sTIDL_OrgNetwork_t   &pOrgTIDLNetStructure, int32_t layerIndex)
{
  int32_t i,j,k;
  for (i = 0; i < layerIndex; i++)
  {
    if(orgTIDLNetStructure.TIDLPCLayers[i].layerType == TIDL_Deconv2DLayer)
    {
     if((orgTIDLNetStructure.TIDLPCLayers[i].layerParams.convParams.strideH == 1) &&
        (orgTIDLNetStructure.TIDLPCLayers[i].layerParams.convParams.strideW == 1))
        {
          orgTIDLNetStructure.TIDLPCLayers[i].layerType = TIDL_ConvolutionLayer;
          int32_t kw, kh, ni, no;
          kw = orgTIDLNetStructure.TIDLPCLayers[i].layerParams.convParams.kernelW;
          kh = orgTIDLNetStructure.TIDLPCLayers[i].layerParams.convParams.kernelH;
          ni = orgTIDLNetStructure.TIDLPCLayers[i].layerParams.convParams.numInChannels;
          no = orgTIDLNetStructure.TIDLPCLayers[i].layerParams.convParams.numOutChannels;

          ni = ni / orgTIDLNetStructure.TIDLPCLayers[i].layerParams.convParams.numGroups;

          float * basePtr = (float *)orgTIDLNetStructure.TIDLPCLayers[i].weights.ptr;
          float * tempPtr = (float *)my_malloc(kw*kh*sizeof(float));
          for(j = 0; j < no; j++)
          {
            for(k = 0; k < ni; k++)
            {
                float * srcPtr = &basePtr[j*ni*kw*kh + k*kw*kh];
                memcpy(tempPtr,srcPtr,kw*kh*sizeof(float));
                TIDL_transpose2DMatrix(srcPtr, tempPtr, kw, kh);
            }
          }
          free(tempPtr);
        }
    }
  }
}

void TIDL_importQuantLayerParams(sTIDL_OrgNetwork_t   &pOrgTIDLNetStructure, int32_t layerIndex)
{
  int32_t i;
  char filenameStr[1000];
  sprintf(filenameStr, "%s_paramDebug.csv", gParams.outputNetFile);

  paramDebugFile = fopen(filenameStr, "w+");
  if (paramDebugFile == NULL)
  {
    printf("Coudl not open %s file \n", filenameStr);
    exit(0);
  }
  fprintf(paramDebugFile, "LayerId , meanDifference, maxDifference, meanOrigFloat, meanRelDifference, orgmax, quantizedMax,orgAtmaxDiff, quantizedAtMaxDiff,maxRelDifference, Scale , , , , Hist \n");

  for (i = 0; i < layerIndex; i++)
  {
    int32_t weightsElementSizeInBits;
    debugLayeId = i;
    if  ((orgTIDLNetStructure.TIDLPCLayers[i].layerType == TIDL_ConvolutionLayer) ||
        (orgTIDLNetStructure.TIDLPCLayers[i].layerType == TIDL_InnerProductLayer) ||
        (orgTIDLNetStructure.TIDLPCLayers[i].layerType == TIDL_Deconv2DLayer) ||
        (orgTIDLNetStructure.TIDLPCLayers[i].layerType == TIDL_BatchNormLayer) ||
        (orgTIDLNetStructure.TIDLPCLayers[i].layerType == TIDL_DetectionOutputLayer)
        )
    {
      float min = FLT_MAX;
      float max = -FLT_MAX;

      if ((orgTIDLNetStructure.TIDLPCLayers[i].layerType == TIDL_ConvolutionLayer) ||
        (orgTIDLNetStructure.TIDLPCLayers[i].layerType == TIDL_Deconv2DLayer))
      {
        weightsElementSizeInBits = orgTIDLNetStructure.TIDLPCLayers[i].weightsElementSizeInBits;

        float *  data = (float *)orgTIDLNetStructure.TIDLPCLayers[i].weights.ptr;
        uint32_t dataSize = orgTIDLNetStructure.TIDLPCLayers[i].weights.bufSize;
        if (dataSize > 0)
        {
          uint8_t * params = (uint8_t *)my_malloc(dataSize * ((weightsElementSizeInBits - 1) / 8 + 1));
          TIDL_findRange(data, dataSize, &min, &max, 1.0);

          if (weightsElementSizeInBits <= 8)
          {
            orgTIDLNetStructure.TIDLPCLayers[i].layerParams.convParams.weightScale =
              TIDL_QuantizeSignedMax((int8_t *)params, data, dataSize, min, max, weightsElementSizeInBits);
          }
          else /* weightsElementSizeInBits == 2*/
          {
            orgTIDLNetStructure.TIDLPCLayers[i].layerParams.convParams.weightScale =
              TIDL_QuantizeSignedMax((int16_t *)params, data, dataSize, min, max, weightsElementSizeInBits);
          }
          my_free(data);
          orgTIDLNetStructure.TIDLPCLayers[i].weights.ptr = params;
        }
        if (orgTIDLNetStructure.TIDLPCLayers[i].layerParams.convParams.enableBias)
        {
          data = (float *)orgTIDLNetStructure.TIDLPCLayers[i].bias.ptr;
          dataSize = orgTIDLNetStructure.TIDLPCLayers[i].bias.bufSize;
          if (dataSize > 0)
          {
            int16_t * params = (int16_t *)my_malloc(dataSize * 2);
            min = FLT_MAX;
            max = -FLT_MAX;
#if 0
            int j;
            for (j = 0; j < dataSize; j++)
            {
              data[j] *= (8.0 / 6);
            }
#endif
            TIDL_findRange(data, dataSize, &min, &max, 1.0);
            if (weightsElementSizeInBits <= 8)
            {
              orgTIDLNetStructure.TIDLPCLayers[i].layerParams.convParams.biasScale =
                TIDL_QuantizeSignedMax(params, data, dataSize, min, max, (NUM_BIAS_BITS));
            }
            else
            {
              orgTIDLNetStructure.TIDLPCLayers[i].layerParams.convParams.biasScale =
                TIDL_QuantizeSignedMax(params, data, dataSize, min, max, (NUM_BIAS_BITS+4));
            }
            orgTIDLNetStructure.TIDLPCLayers[i].layerParams.convParams.biasB = 1;

            my_free(data);
            orgTIDLNetStructure.TIDLPCLayers[i].bias.ptr = params;
          }
        }

      }
      else if (orgTIDLNetStructure.TIDLPCLayers[i].layerType == TIDL_InnerProductLayer)
      {
        weightsElementSizeInBits = orgTIDLNetStructure.TIDLPCLayers[i].weightsElementSizeInBits;
        float *  data = (float *)orgTIDLNetStructure.TIDLPCLayers[i].weights.ptr;
        uint32_t dataSize = orgTIDLNetStructure.TIDLPCLayers[i].weights.bufSize;
        if (dataSize > 0)
        {
          uint8_t * params = (uint8_t *)my_malloc(dataSize * ((weightsElementSizeInBits - 1) / 8 + 1));
          TIDL_findRange(data, dataSize, &min, &max, 1.0);
          if (weightsElementSizeInBits <= 8)
          {
            orgTIDLNetStructure.TIDLPCLayers[i].layerParams.innerProductParams.weightScale =
              TIDL_QuantizeSignedMax((int8_t *)params, data, dataSize, min, max, weightsElementSizeInBits);
          }
          else /* (weightsElementSizeInBits <=16) */
          {
            orgTIDLNetStructure.TIDLPCLayers[i].layerParams.innerProductParams.weightScale =
              TIDL_QuantizeSignedMax((int16_t *)params, data, dataSize, min, max, weightsElementSizeInBits);

          }
          my_free(data);
          orgTIDLNetStructure.TIDLPCLayers[i].weights.ptr = params;
        }
        else
        {
          orgTIDLNetStructure.TIDLPCLayers[i].layerParams.innerProductParams.weightScale = -1;
        }
        data = (float *)orgTIDLNetStructure.TIDLPCLayers[i].bias.ptr;

        dataSize = orgTIDLNetStructure.TIDLPCLayers[i].bias.bufSize;
        if (dataSize > 0)
        {
          int16_t *params = (int16_t *)my_malloc(dataSize * 2);

          min = FLT_MAX;
          max = -FLT_MAX;
#if 0
          int j;
          for (j = 0; j < dataSize; j++)
          {
            data[j] *= (8.0 / 6);
          }
#endif
          TIDL_findRange(data, dataSize, &min, &max, 1.0);
          orgTIDLNetStructure.TIDLPCLayers[i].layerParams.innerProductParams.biasScale =
            TIDL_QuantizeSignedMax(params, data, dataSize, min, max, NUM_BIAS_BITS);
          my_free(data);
          orgTIDLNetStructure.TIDLPCLayers[i].bias.ptr = params;
        }
        else
        {
          orgTIDLNetStructure.TIDLPCLayers[i].layerParams.innerProductParams.biasScale = -1;
        }
      }
      else if (orgTIDLNetStructure.TIDLPCLayers[i].layerType == TIDL_BatchNormLayer)
      {
        weightsElementSizeInBits = orgTIDLNetStructure.TIDLPCLayers[i].weightsElementSizeInBits;
        float *  data = (float *)orgTIDLNetStructure.TIDLPCLayers[i].weights.ptr;
        uint32_t dataSize = orgTIDLNetStructure.TIDLPCLayers[i].weights.bufSize;
        if (dataSize > 0)
        {
          uint8_t * params = (uint8_t *)my_malloc(dataSize * ((weightsElementSizeInBits - 1) / 8 + 1));
          TIDL_findRange(data, dataSize, &min, &max, 1.0);

          if (weightsElementSizeInBits <= 8)
          {
            orgTIDLNetStructure.TIDLPCLayers[i].layerParams.batchNormParams.weightScale =
              TIDL_QuantizeSignedMax((int8_t *)params, data, dataSize, min, max, weightsElementSizeInBits);
          }
          else /*(weightsElementSizeInBits <=16)*/
          {
            orgTIDLNetStructure.TIDLPCLayers[i].layerParams.batchNormParams.weightScale =
              TIDL_QuantizeSignedMax((int16_t *)params, data, dataSize, min, max, weightsElementSizeInBits);
          }
          my_free(data);
          orgTIDLNetStructure.TIDLPCLayers[i].weights.ptr = params;
        }

        data = (float *)orgTIDLNetStructure.TIDLPCLayers[i].bias.ptr;
        dataSize = orgTIDLNetStructure.TIDLPCLayers[i].bias.bufSize;
        if (dataSize > 0)
        {
          int16_t *params = (int16_t *)my_malloc(dataSize * 2);
          min = FLT_MAX;
          max = -FLT_MAX;
          TIDL_findRange(data, dataSize, &min, &max, 1.0);
          orgTIDLNetStructure.TIDLPCLayers[i].layerParams.batchNormParams.biasScale =
            TIDL_QuantizeSignedMax(params, data, dataSize, min, max, NUM_BIAS_BITS);
          my_free(data);
          orgTIDLNetStructure.TIDLPCLayers[i].bias.ptr = params;
        }
        if (orgTIDLNetStructure.TIDLPCLayers[i].actParams.actType == TIDL_PRelU)
        {
          float * slopeData = (float *)orgTIDLNetStructure.TIDLPCLayers[i].slope.ptr;
          uint32_t slopeDataSize = orgTIDLNetStructure.TIDLPCLayers[i].slope.bufSize;
          uint8_t * params = (uint8_t *)my_malloc(slopeDataSize * ((weightsElementSizeInBits - 1) / 8 + 1));
          float min = FLT_MAX;
          float max = -FLT_MAX;
          TIDL_findRange(slopeData, slopeDataSize, &min, &max, (1.0));
          if (weightsElementSizeInBits <= 8)
          {
            orgTIDLNetStructure.TIDLPCLayers[i].actParams.slopeScale =
              TIDL_QuantizeSignedMax((int8_t *)params, slopeData, slopeDataSize, min, max, weightsElementSizeInBits);
          }
          else /* (weightsElementSizeInBits <=16) */
          {
            orgTIDLNetStructure.TIDLPCLayers[i].actParams.slopeScale =
              TIDL_QuantizeSignedMax((int16_t *)params, slopeData, slopeDataSize, min, max, weightsElementSizeInBits);
          }
          my_free(slopeData);
          orgTIDLNetStructure.TIDLPCLayers[i].slope.ptr = params;
        }
      }
    }
  }
  fclose(paramDebugFile);
}

int32_t TIDL_isInputLayer(sTIDL_OrgNetwork_t * pOrgTIDLNetStructure, int32_t numLayer, const char *bufName, int32_t layerType)
{
  int32_t i, j;
  for (i = (numLayer - 1); i >= 0; i--)
  {
    for (j = 0; j < pOrgTIDLNetStructure->TIDLPCLayers[i].numOutBufs; j++)
    {
      if (strcmp((const char*)bufName, (const char*)pOrgTIDLNetStructure->TIDLPCLayers[i].outDataNames[j]) == 0)
      {
        if ((pOrgTIDLNetStructure->TIDLPCLayers[i].numOutBufs == 1) && (pOrgTIDLNetStructure->TIDLPCLayers[i].layerType == layerType))
        {
          return 1;
        }
        else
        {
          return 0;
        }
      }
    }
  }
  return 0;
}


int32_t tf_getLayreTypeMapIdx(char* layerName, TIDL_TFLayerMapping_t* TIDL_TFLayerMap, int32_t tblSize)
{
  int32_t idx;
  for (idx = 0; idx < tblSize; idx++)
  {
    if (strcmp(layerName, TIDL_TFLayerMap[idx].layerName) == 0)
    {
      return (idx);
    }
  }
  return -1;
}

int32_t tf_isLayerType(char* layerName, int32_t  startLayer, sTIDL_OrgNetwork_t  &pOrgTIDLNetStructure, TIDL_TFLayerMapping_t* TIDL_TFLayerMap, int32_t tblSize)
{
  int32_t i, numOps;
  int32_t mapIdx = tf_getLayreTypeMapIdx(layerName, TIDL_TFLayerMap, tblSize);
  if (mapIdx != -1)
  {
    char layerOpsString[300] = "";
    numOps = TIDL_TFLayerMap[mapIdx].NumOps;
    int32_t nextlayerIdx = startLayer;
    for (i = 0; i < numOps; i++)
    {
      strcat(layerOpsString, TIDL_LayerString[pOrgTIDLNetStructure.TIDLPCLayers[nextlayerIdx].layerType]);
      if ((pOrgTIDLNetStructure.TIDLPCLayers[nextlayerIdx].numOutBufs == 1) && (pOrgTIDLNetStructure.TIDLPCLayers[nextlayerIdx].outConsumerCnt[0] == 1))
      {
        nextlayerIdx = tidl_getOutLayer(pOrgTIDLNetStructure, pOrgTIDLNetStructure.numLayers, pOrgTIDLNetStructure.TIDLPCLayers[nextlayerIdx].outData[0].dataId);
      }
      else
      {
        break;
      }
    }
    if (strcmp(layerOpsString, TIDL_TFLayerMap[mapIdx].layerOpsString) == 0)
    {
      return (1);
    }
  }
  return (0);
}
int32_t tidl_getStringsFromList(char *list, char * names, int strLen)
{
  int32_t numStrings = 0;
  char *ptr = list;
  while (ptr[0] != '\0')
  {
    if (ptr[0] == ',') ptr[0] = ' ';
    ptr++;
  }
  ptr = list;
  while (ptr[0] != '\0' )
  {
    if((ptr[0] == ' ') || (ptr[0] == ',') || (ptr[0] == '\t'))
    {
      ptr++;
    }
    else
    {
      sscanf(ptr, "%s", &names[strLen*numStrings]);
      ptr += strlen((char*)(&names[strLen*numStrings]));
      numStrings++;
    }
  }
  return numStrings;
}

int32_t tidl_getElementType(int32_t sign)
{
  if (gParams.numFeatureBits <= 8)
  {
    if (sign)
    {
      return(TIDL_SignedChar);
    }
    else
    {
      return(TIDL_UnsignedChar);
    }
  }
  else // 16 bits
  {
    if (sign)
    {
      return(TIDL_SignedShort);
    }
    else
    {
      return(TIDL_UnsignedShort);
    }
  }
}
