According to the document www.ti.com/.../sprabg7.pdf, we write a demo to test matrix operation on DSP.
Now the question is why the output of the "_cmatmpyr1" API under line 56 is always 0. I have tried many times but I cannot solve this error. What is the problem? See the appendix for the code.(This code is written in #D3_VISION_SDK_03_02_103_00_TDA3X_RVP\tmp\vision_sdk\apps\src\rtos\alg_plugins\clr\clrLink_algPlugin.c)
void MatrixMulTest()
{
Vps_printf("$$$$$$$$$$$$$$$$$$$$ begin MatrixMul $$$$$$$$$$$$$$$$$$$$$$$\n");
int32_t input_mat1[16] = {1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6};
int32_t input_mat2[16] = {6,6,7,7,7,8,8,8,9,9,9,10,10,10,11,11};
int32_t MatrixOutput[16] = {1};
int64_t *input_vec1 = (long long *) &input_mat1[0];
int64_t *input_vec2 = (long long *) &input_mat1[4];
int64_t *input_vec3 = (long long *) &input_mat1[8];
int64_t *input_vec4 = (long long *) &input_mat1[12];
int64_t *inputMatPtr1 = (long long *) &input_mat2[0];
int64_t *inputMatPtr2 = (long long *) &input_mat2[4];
int64_t *inputMatPtr3 = (long long *) &input_mat2[8];
int64_t *inputMatPtr4 = (long long *) &input_mat2[12];
int64_t llinputV1 = _amem8(input_vec1++);
Vps_printf("_loll(llinputV1) = %d\n",_loll(llinputV1));
Vps_printf("_hill(llinputV1) = %d\n",_hill(llinputV1));
int64_t llinputV2 = _amem8(input_vec2++);
Vps_printf("_loll(llinputV2) = %d\n",_loll(llinputV2));
Vps_printf("_hill(llinputV2) = %d\n",_hill(llinputV2));
int64_t llinputV3 = _amem8(input_vec3++);
Vps_printf("_loll(llinputV3) = %d\n",_loll(llinputV3));
Vps_printf("_hill(llinputV3) = %d\n",_hill(llinputV3));
int64_t llinputV4 = _amem8(input_vec4++);
Vps_printf("_loll(llinputV4) = %d\n",_loll(llinputV4));
Vps_printf("_hill(llinputV4) = %d\n",_hill(llinputV4));
int64_t llinputM1 = _amem8(inputMatPtr1++);
Vps_printf("_loll(llinputM1) = %d\n",_loll(llinputM1));
Vps_printf("_hill(llinputM1) = %d\n",_hill(llinputM1));
int64_t llinputM2 = _amem8(inputMatPtr2++);
Vps_printf("_loll(llinputM2) = %d\n",_loll(llinputM2));
Vps_printf("_hill(llinputM2) = %d\n",_hill(llinputM2));
/* _dadds test */
int64_t daddTest = _dadd2(llinputM1,llinputM2);
Vps_printf("_loll(daddTest) = %d\n",_loll(daddTest));
Vps_printf("_hill(daddTest) = %d\n",_hill(daddTest));
__x128_t inputMat;
#ifdef _LITTLE_ENDIAN
inputMat = _llto128(llinputM2,llinputM1);
#else
inputMat = _llto128(llinputM1,llinputM2);
#endif
/*print __x128_t inputMat*/
int32_t *testTmp = (int32_t *)&inputMat;
Vps_printf("inputMat.word0 = %d\n",*(testTmp++));
Vps_printf("inputMat.word1 = %d\n",*(testTmp++));
Vps_printf("inputMat.word2 = %d\n",*(testTmp++));
Vps_printf("inputMat.word3 = %d\n",*(testTmp));
int64_t acc1 = _cmatmpyr1(llinputV1,inputMat);
Vps_printf("_loll(acc1) = %d\n",_loll(acc1));
Vps_printf("_hill(acc1) = %d\n",_hill(acc1));
int64_t acc2 = _cmatmpyr1(llinputV2,inputMat);
Vps_printf("_loll(acc2) = %d\n",_loll(acc2));
Vps_printf("_hill(acc2) = %d\n",_hill(acc2));
int64_t acc3 = _cmatmpyr1(llinputV3,inputMat);
Vps_printf("_loll(acc3) = %d\n",_loll(acc3));
Vps_printf("_hill(acc3) = %d\n",_hill(acc3));
int64_t acc4 = _cmatmpyr1(llinputV4,inputMat);
Vps_printf("_loll(acc4) = %d\n",_loll(acc4));
Vps_printf("_hill(acc4) = %d\n",_hill(acc4));
llinputM1 = _amem8(inputMatPtr1++);
Vps_printf("_loll(llinputM1) = %d\n",_loll(llinputM1));
Vps_printf("_hill(llinputM1) = %d\n",_hill(llinputM1));
llinputM2 = _amem8(inputMatPtr2++);
Vps_printf("_loll(llinputM2) = %d\n",_loll(llinputM2));
Vps_printf("_hill(llinputM2) = %d\n",_hill(llinputM2));
#ifdef _LITTLE_ENDIAN
inputMat = _llto128(llinputM2,llinputM1);
#else
inputMat = _llto128(llinputM1,llinputM2);
#endif
int64_t acc5 = _cmatmpyr1(llinputV1,inputMat);
Vps_printf("_loll(acc5) = %d\n",_loll(acc5));
Vps_printf("_hill(acc5) = %d\n",_hill(acc5));
int64_t acc6 = _cmatmpyr1(llinputV2,inputMat);
Vps_printf("_loll(acc6) = %d\n",_loll(acc6));
Vps_printf("_hill(acc6) = %d\n",_hill(acc6));
int64_t acc7 = _cmatmpyr1(llinputV3,inputMat);
Vps_printf("_loll(acc7) = %d\n",_loll(acc7));
Vps_printf("_hill(acc7) = %d\n",_hill(acc7));
int64_t acc8 = _cmatmpyr1(llinputV4,inputMat);
Vps_printf("_loll(acc8) = %d\n",_loll(acc8));
Vps_printf("_hill(acc8) = %d\n",_hill(acc8));
llinputV1 = _amem8(input_vec1++);
Vps_printf("_loll(llinputV1) = %d\n",_loll(llinputV1));
Vps_printf("_hill(llinputV1) = %d\n",_hill(llinputV1));
llinputV2 = _amem8(input_vec2++);
Vps_printf("_loll(llinputV2) = %d\n",_loll(llinputV2));
Vps_printf("_hill(llinputV2) = %d\n",_hill(llinputV2));
llinputV3 = _amem8(input_vec3++);
Vps_printf("_loll(llinputV3) = %d\n",_loll(llinputV3));
Vps_printf("_hill(llinputV3) = %d\n",_hill(llinputV3));
llinputV4 = _amem8(input_vec4++);
Vps_printf("_loll(llinputV4) = %d\n",_loll(llinputV4));
Vps_printf("_hill(llinputV4) = %d\n",_hill(llinputV4));
llinputM1 = _amem8(inputMatPtr3++);
Vps_printf("_loll(llinputM1) = %d\n",_loll(llinputM1));
Vps_printf("_hill(llinputM1) = %d\n",_hill(llinputM1));
llinputM2 = _amem8(inputMatPtr4++);
Vps_printf("_loll(llinputM2) = %d\n",_loll(llinputM2));
Vps_printf("_hill(llinputM2) = %d\n",_hill(llinputM2));
#ifdef _LITTLE_ENDIAN
inputMat = _llto128(llinputM2,llinputM1);
#else
inputMat = _llto128(llinputM1,llinputM2);
#endif
acc1 = _dadd2(_cmatmpyr1(llinputV1,inputMat),acc1);
Vps_printf("_loll(acc1) = %d\n",_loll(acc1));
Vps_printf("_hill(acc1) = %d\n",_hill(acc1));
acc2 = _dadd2(_cmatmpyr1(llinputV2,inputMat),acc2);
Vps_printf("_loll(acc2) = %d\n",_loll(acc2));
Vps_printf("_hill(acc2) = %d\n",_hill(acc2));
acc3 = _dadd2(_cmatmpyr1(llinputV3,inputMat),acc3);
Vps_printf("_loll(acc3) = %d\n",_loll(acc3));
Vps_printf("_hill(acc3) = %d\n",_hill(acc3));
acc4 = _dadd2(_cmatmpyr1(llinputV4,inputMat),acc4);
Vps_printf("_loll(acc4) = %d\n",_loll(acc4));
Vps_printf("_hill(acc4) = %d\n",_hill(acc4));
llinputM1 = _amem8(inputMatPtr3++);
Vps_printf("_loll(llinputM1) = %d\n",_loll(llinputM1));
Vps_printf("_hill(llinputM1) = %d\n",_hill(llinputM1));
llinputM2 = _amem8(inputMatPtr4++);
Vps_printf("_loll(llinputM2) = %d\n",_loll(llinputM2));
Vps_printf("_hill(llinputM2) = %d\n",_hill(llinputM2));
#ifdef _LITTLE_ENDIAN
inputMat = _llto128(llinputM2,llinputM1);
#else
inputMat = _llto128(llinputM1,llinputM2);
#endif
acc5 = _dadd2(_cmatmpyr1(llinputV1,inputMat),acc5);
Vps_printf("_loll(acc5) = %d\n",_loll(acc5));
Vps_printf("_hill(acc5) = %d\n",_hill(acc5));
acc6 = _dadd2(_cmatmpyr1(llinputV2,inputMat),acc6);
Vps_printf("_loll(acc6) = %d\n",_loll(acc6));
Vps_printf("_hill(acc6) = %d\n",_hill(acc6));
acc7 = _dadd2(_cmatmpyr1(llinputV3,inputMat),acc7);
Vps_printf("_loll(acc7) = %d\n",_loll(acc7));
Vps_printf("_hill(acc7) = %d\n",_hill(acc7));
acc8 = _dadd2(_cmatmpyr1(llinputV4,inputMat),acc8);
Vps_printf("_loll(acc8) = %d\n",_loll(acc8));
Vps_printf("_hill(acc8) = %d\n",_hill(acc8));
_amem8(&MatrixOutput[0]) = acc1;
_amem8(&MatrixOutput[2]) = acc5;
_amem8(&MatrixOutput[4]) = acc2;
_amem8(&MatrixOutput[6]) = acc6;
_amem8(&MatrixOutput[8]) = acc3;
_amem8(&MatrixOutput[10]) = acc7;
_amem8(&MatrixOutput[12]) = acc4;
_amem8(&MatrixOutput[14]) = acc8;
Vps_printf("MatrixOutput[0] = %d\n",MatrixOutput[0]);
Vps_printf("MatrixOutput[1] = %d\n",MatrixOutput[1]);
Vps_printf("MatrixOutput[2] = %d\n",MatrixOutput[2]);
Vps_printf("MatrixOutput[3] = %d\n",MatrixOutput[3]);
Vps_printf("MatrixOutput[4] = %d\n",MatrixOutput[4]);
Vps_printf("MatrixOutput[5] = %d\n",MatrixOutput[5]);
Vps_printf("MatrixOutput[6] = %d\n",MatrixOutput[6]);
Vps_printf("MatrixOutput[7] = %d\n",MatrixOutput[7]);
Vps_printf("MatrixOutput[8] = %d\n",MatrixOutput[8]);
Vps_printf("MatrixOutput[9] = %d\n",MatrixOutput[9]);
Vps_printf("MatrixOutput[10] = %d\n",MatrixOutput[10]);
Vps_printf("MatrixOutput[11] = %d\n",MatrixOutput[11]);
Vps_printf("MatrixOutput[12] = %d\n",MatrixOutput[12]);
Vps_printf("MatrixOutput[13] = %d\n",MatrixOutput[13]);
Vps_printf("MatrixOutput[14] = %d\n",MatrixOutput[14]);
Vps_printf("MatrixOutput[15] = %d\n",MatrixOutput[15]);
Vps_printf("$$$$$$$$$$$$$$$$$$$$ end MatrixMul $$$$$$$$$$$$$$$$$$$$$$$\n");
}