TDA4VH-Q1: Adding Custom TIDL Layer

Amine Hamidi

Part Number: TDA4VH-Q1

Tool/software:

Hello,

SDK version: 09_02_00_05

Currently we are working on implementing our custom layers (in this case SoftMax) using the documentation provided in c7x-mma-tidl/ti_dl/docs/user_guide_nda_html/md_tidl_custom_layer.html.

So we started by defining our custom layer params in tidl_custom.h

typedef struct {
  int32_t axis;

  int32_t nDims;
  int32_t dim[TIDL_DIM_MAX];
  int32_t inStride[TIDL_PITCH_MAX];
  int32_t outStride[TIDL_PITCH_MAX];

  int32_t inElementType;
  int32_t outElementType;
  float inScaleInv;
  int32_t inZeroPoint;
  float outScale;
  float outScaleInv;
  int32_t outZeroPoint;


  uint32_t l2SramSize;
  uint32_t l2IoStride;
  uint32_t l2DmaTrStride;
  uint8_t* l2SramPtr;
  uint8_t* l2Input[2];
  uint8_t* l2Output[2];
  uint8_t* l2Inter;
  uint8_t* l2InDmaTr;
  uint8_t* l2OutDmaTr;

  uint8_t* inPtrDdr;
  uint8_t* outPtrDdr;

  void *dmaUtilsContext;
} TIDL_CustomSoftMaxParams_t;

Next, we modified the TIDL_MapCustomParamsOnnx function in tidl_custom_import.c file to parse the onnx and check the Softmax attributes.

int32_t TIDL_MapCustomParamsOnnx (sTIDL_LayerPC_t &TIDLPCLayers,
                                  int32_t nodeIndex,
                                  onnx::GraphProto* onnxGraph)
{
  int32_t status = -1;

  TIDL_setDefaultCustomParams(&(TIDLPCLayers.layerParams.customParams));
  const char  * currLayerName = onnxGraph->node(nodeIndex).op_type().c_str();

  if (strcmp(currLayerName, (const char *)"MaxPool") == 0)
  {
    onnx::NodeProto node = onnxGraph->node(nodeIndex);
    /* Parameters can be parsed based from node object based on the
    layer */
  }

  if (strcmp(currLayerName, (const char *)"Softmax") == 0)
  {
    int32_t axis = -1;

    sTIDL_CustomParams_t &customParams = TIDLPCLayers.layerParams.customParams;
    sTIDL_allowlistingMetaData &md = TIDLPCLayers.allowlistingMetaData;
    int32_t numDim = md.varTensorsDims[0].size();

    onnx::NodeProto node = onnxGraph->node(nodeIndex);

    for (const auto& attr : node.attribute())
    {
      if ((attr.name() == "axis") && (attr.type() == onnx::AttributeProto::INT))
      {
        axis = attr.i();
        break;
      }
    }

    // attributes specific for the transformers layers
    if ((2 == axis) && (3 == numDim))
    {
      printf("\n##################################################################################################################\n");
      printf("######################## Using TIDL_CustomLayer for SoftMax Layer(%d) with input's dimensions nbr: %d and axis: %d\n", nodeIndex, numDim, axis);
      printf("##################################################################################################################\n\n");

      TIDL_CustomSoftMaxParams_t *customUserParams = (TIDL_CustomSoftMaxParams_t *)malloc(sizeof(TIDL_CustomSoftMaxParams_t));
      customUserParams->axis = axis;
      customUserParams->nDims = numDim;

      TIDLPCLayers.layerParams.customParams.customLayerType = TIDL_CUSTOM_TYPE_1;
      TIDLPCLayers.layerParams.customParams.doesLayerChangePadding = 0;

      TIDLPCLayers.layerType = TIDL_CustomLayer;
      TIDLPCLayers.weights.ptr = (void*)customUserParams;
      TIDLPCLayers.weights.bufSize = sizeof(TIDL_CustomSoftMaxParams_t);

      TIDLPCLayers.parseStatus = TIDL_ParsePassed;

      status = 0;
    }
  }
  return status;
}

Next, we modified the TIDL_tfOutReshapeCustomLayer function

else if(TIDL_CUSTOM_TYPE_1 == customParams.customLayerType)
  {
    TIDLPCLayers.outData[0].elementType = TIDLPCLayers.inData[0].elementType;

    /* always have unsigned output with ZeroPoint = 0 when dealing with 8/16 bits */
    if((TIDL_UnsignedShort == TIDLPCLayers.inData[0].elementType) || (TIDL_SignedShort == TIDLPCLayers.inData[0].elementType))
    {
      TIDLPCLayers.outData[0].elementType = TIDL_UnsignedShort;
    }
    if((TIDL_UnsignedChar == TIDLPCLayers.inData[0].elementType) || (TIDL_SignedChar == TIDLPCLayers.inData[0].elementType))
    {
      TIDLPCLayers.outData[0].elementType = TIDL_UnsignedChar;
    }

    TIDLPCLayers.outData[0].numDim = 6;//TIDLPCLayers.inData[0].numDim;
    TIDLPCLayers.outData[0].dimValues[TIDL_DIM_BATCH] = TIDLPCLayers.inData[0].dimValues[TIDL_DIM_BATCH];
    TIDLPCLayers.outData[0].dimValues[TIDL_DIM_DIM1] = TIDLPCLayers.inData[0].dimValues[TIDL_DIM_DIM1];
    TIDLPCLayers.outData[0].dimValues[TIDL_DIM_DIM2] = TIDLPCLayers.inData[0].dimValues[TIDL_DIM_DIM2];
    TIDLPCLayers.outData[0].dimValues[TIDL_DIM_NUMCH] = TIDLPCLayers.inData[0].dimValues[TIDL_DIM_NUMCH];
    TIDLPCLayers.outData[0].dimValues[TIDL_DIM_HEIGHT] = TIDLPCLayers.inData[0].dimValues[TIDL_DIM_HEIGHT];
    TIDLPCLayers.outData[0].dimValues[TIDL_DIM_WIDTH] = TIDLPCLayers.inData[0].dimValues[TIDL_DIM_WIDTH];

    TIDLPCLayers.numMacs =
    (int64_t)((int64_t)TIDLPCLayers.outData[0].dimValues[TIDL_DIM_BATCH] * TIDLPCLayers.outData[0].dimValues[TIDL_DIM_DIM1] *TIDLPCLayers.outData[0].dimValues[TIDL_DIM_DIM2]* TIDLPCLayers.outData[0].dimValues[TIDL_DIM_NUMCH] *
      TIDLPCLayers.outData[0].dimValues[TIDL_DIM_HEIGHT] * TIDLPCLayers.outData[0].dimValues[TIDL_DIM_WIDTH]);

      printf("\n##################################################################################################################\n");
      printf("######################## Computing  outTensorShape for SoftMax Layer\n");
      printf("######################## \t TIDLPCLayers.numMacs                               : %lld\n", TIDLPCLayers.numMacs);
      printf("######################## \t TIDLPCLayers.outData[0].elementType                : %d\n", TIDLPCLayers.outData[0].elementType);
      printf("######################## \t TIDLPCLayers.outData[0].numDim                     : %d\n", TIDLPCLayers.outData[0].numDim);
      printf("######################## \t TIDLPCLayers.outData[0].dimValues[TIDL_DIM_BATCH]  : %d\n", TIDLPCLayers.outData[0].dimValues[TIDL_DIM_BATCH]);
      printf("######################## \t TIDLPCLayers.outData[0].dimValues[TIDL_DIM_DIM1]   : %d\n", TIDLPCLayers.outData[0].dimValues[TIDL_DIM_DIM1]);
      printf("######################## \t TIDLPCLayers.outData[0].dimValues[TIDL_DIM_DIM2]   : %d\n", TIDLPCLayers.outData[0].dimValues[TIDL_DIM_DIM2]);
      printf("######################## \t TIDLPCLayers.outData[0].dimValues[TIDL_DIM_NUMCH]  : %d\n", TIDLPCLayers.outData[0].dimValues[TIDL_DIM_NUMCH]);
      printf("######################## \t TIDLPCLayers.outData[0].dimValues[TIDL_DIM_HEIGHT] : %d\n", TIDLPCLayers.outData[0].dimValues[TIDL_DIM_HEIGHT]);
      printf("######################## \t TIDLPCLayers.outData[0].dimValues[TIDL_DIM_WIDTH]  : %d\n", TIDLPCLayers.outData[0].dimValues[TIDL_DIM_WIDTH]);
      printf("##################################################################################################################\n\n");
  }

Next, we modified the TIDL_getCustomLayerOutputTensorScale function

else if(TIDL_CUSTOM_TYPE_1 == customParams.customLayerType)
  {
    TIDL_CustomSoftMaxParams_t*  prms = (TIDL_CustomSoftMaxParams_t*)customUserParams;
    if ((2 == prms->axis) && (3 == prms->nDims))
    {
      sTIDL_DataParams_t& outDataParams = TIDLPCLayers->outData[0];

      outDataParams.tensorZeroPoint = 0;
      outDataParams.roundBits = 0;

      if (TIDL_UnsignedShort == outDataParams.elementType)
      {
        outTensorScale = 256.0f * 256.0f;
      }
      else if (TIDL_UnsignedChar == outDataParams.elementType)
      {
        outTensorScale = 256.0f;
      }
      else
      {
        outTensorScale = 1.0f;
      }

      printf("\n##################################################################################################################\n");
      printf("######################## Computing  outTensorScale for SoftMax Layer: %f\n", outTensorScale);
      printf("##################################################################################################################\n\n");
    }

And in TIDL_customLayerProcess tidl_custom.c we only added a printf for the processing for now

else if(TIDL_CUSTOM_TYPE_2 == tidlLayer->layerParams.customParams.customLayerType)
  {
    /* Call the corresponding custom layer's process function */
    printf("############################ Running Custom TIDL SotMax\n");
  }

Under the c7x-mma-tidl we ran:

make clean
make

Afterwards we copy the import tool .out and .so file to our model compilation folder and we update the c7x firmware.

Compiling the model seems to be working correctly since no issue or errors were emitted.

----------------------------- Optimization Summary -----------------------------
-------------------------------------------------------------------------------------
|            Layer           | Nodes before optimization | Nodes after optimization |
-------------------------------------------------------------------------------------
| TIDL_BatchNormLayer        |                         0 |                        1 |
| TIDL_EltWiseLayer          |                         3 |                        2 |
| TIDL_CustomLayer           |                         1 |                        1 |
| TIDL_InnerProductLayer     |                         4 |                        4 |
| TIDL_ConstDataLayer        |                         0 |                        7 |
| TIDL_TransposeLayer        |                         1 |                        0 |
| TIDL_ReLULayer             |                         1 |                        0 |
| TIDL_ConcatLayer           |                         4 |                        4 |
-------------------------------------------------------------------------------------

Below are the compile options

#    compile_options = {
#        'tidl_tools_path': tidl_tools_path,
#        'artifacts_folder': output_dir,
#        'tensor_bits': artifacts_quant_bits,
#        'accuracy_level': 1,
#        'advanced_options:calibration_frames': 1,
#        'advanced_options:calibration_iterations': 1,
#        'advanced_options:quantization_scale_type': 4,
#        'advanced_options:add_data_convert_ops': 3,
#        'debug_level': 1,
#        'advanced_options:network_name': 'FormersTop',
#        'ti_internal_nc_flag': 83886080 + 1601,
#        'advanced_options:inference_mode': inference_mode,
#        'advanced_options:num_cores': num_cores,
#        "deny_list:layer_type": deny_list,
#    }

Once we try running the inference on the board we endup with these error messgaes

libtidl_onnxrt_EP loaded 0xae22d50 
artifacts_folder                                = /mnt/ssd/amine/custom_tidl_tests/Artifacts 
debug_level                                     = 3 
target_priority                                 = 0 
max_pre_empt_delay                              = 340282346638528859811704183484516925440.000000 
Final number of subgraphs created are : 1, - Offloaded Nodes - 17, Total Nodes - 17 
In TIDL_createStateInfer 
Compute on node : TIDLExecutionProvider_TIDL_0_0
************ in TIDL_subgraphRtCreate ************ 
 604657.136074 s:  VX_ZONE_ERROR:[ownContextSendCmd:875] Command ack message returned failure cmd_status: -1
604657.136117 s:  VX_ZONE_ERROR:[ownNodeKernelInit:590] Target kernel, TIVX_CMD_NODE_CREATE failed for node TIDLNode
604657.136137 s:  VX_ZONE_ERROR:[ownNodeKernelInit:591] Please be sure the target callbacks have been registered for this core
604657.136155 s:  VX_ZONE_ERROR:[ownNodeKernelInit:592] If the target callbacks have been registered, please ensure no errors are occurring within the create callback of this kernel
604657.136176 s:  VX_ZONE_ERROR:[ownGraphNodeKernelInit:608] kernel init for node 0, kernel com.ti.tidl:4:1 ... failed !!!
604657.136217 s:  VX_ZONE_ERROR:[vxVerifyGraph:2159] Node kernel init failed
604657.136237 s:  VX_ZONE_ERROR:[vxVerifyGraph:2213] Graph verify failed
TIDL_RT_OVX: ERROR: Verifying TIDL graph ... Failed !!!
TIDL_RT_OVX: ERROR: Verify OpenVX graph failed
************ TIDL_subgraphRtCreate done ************

[C7x_1 ] 604657.135601 s: [tidsp/tidl_matmul_device.c:357] MMALIB init failed
[C7x_1 ] 604657.135637 s: [tidsp/tidl_matmul_device.c:506] Alg init fail for handle 0
[C7x_1 ] 604657.135684 s: WorkloadUnitExec_Init: initParams->linkInitParams[linkIdx].initFuncPtr Failed, Link Id 35183969436048 
[C7x_1 ] 604657.135719 s:  VX_ZONE_ERROR:[tivxAlgiVisionCreate:335] Calling ialg.algInit failed with status = 1
[C7x_1 ] 604657.135772 s: Error: handle (100000000) doesn't exist in priority table
[C7x_1 ] 604657.135797 s:  VX_ZONE_ERROR:[tivxKernelTIDLCreate:961] tivxAlgiVisionCreate returned NULL
[C7x_1 ] 604657.182095 s:  VX_ZONE_ERROR:[tivxKernelTIDLCreate:945] Network version - 0x00000000, Expected version - 0x20240719
[C7x_1 ] 604657.184676 s:  VX_ZONE_ERROR:[tivxKernelTIDLCreate:945] Network version - 0x00000000, Expected version - 0x20240719
[C7x_1 ] 604657.186301 s:  VX_ZONE_ERROR:[tivxKernelTIDLCreate:945] Network version - 0x00000000, Expected version - 0x20240719
[C7x_1 ] 604657.187795 s:  VX_ZONE_ERROR:[tivxKernelTIDLCreate:945] Network version - 0x00000000, Expected version - 0x20240719
[C7x_1 ] 604657.189231 s:  VX_ZONE_ERROR:[tivxKernelTIDLCreate:945] Network version - 0x00000000, Expected version - 0x20240719
[C7x_1 ] 604657.190667 s:  VX_ZONE_ERROR:[tivxKernelTIDLCreate:945] Network version - 0x00000000, Expected version - 0x20240719
[C7x_1 ] 604657.192096 s:  VX_ZONE_ERROR:[tivxKernelTIDLCreate:945] Network version - 0x00000000, Expected version - 0x20240719
[C7x_1 ] 604657.193481 s:  VX_ZONE_ERROR:[tivxKernelTIDLCreate:945] Network version - 0x00000000, Expected version - 0x20240719
[C7x_1 ] 604657.194967 s:  VX_ZONE_ERROR:[tivxKernelTIDLCreate:945] Network version - 0x00000000, Expected version - 0x20240719
[C7x_1 ] 604657.196334 s:  VX_ZONE_ERROR:[tivxKernelTIDLCreate:945] Network version - 0x00000000, Expected version - 0x20240719
[C7x_1 ] 604657.197720 s:  VX_ZONE_ERROR:[tivxKernelTIDLCreate:945] Network version - 0x00000000, Expected version - 0x20240719
[C7x_1 ] 604657.199080 s:  VX_ZONE_ERROR:[tivxKernelTIDLCreate:945] Network version - 0x00000000, Expected version - 0x20240719

Any idea on how to solve this ?

Regards

6 months ago

0 Chris Tsongas 6 months ago

TI__Genius 14730 points

Hi,

If you take the custom layer out, does the model run? I ask this because it is similar to an SD card file system issue. As a general recommendation, I do not recommend doing custom layers as, most of the time, it ends up being a longer and more painful enterprise.

Chris

Processors

Processors forum

TDA4VH-Q1: Adding Custom TIDL Layer