This thread has been locked.

If you have a related question, please click the "Ask a related question" button in the top right corner. The newly created question will be automatically linked to this question.

TDA2: EVE/VCOP deinterleave(), interleave() problem

Part Number: TDA2

Hi,

I try release simple mean filter window 3x3. I use npt() and get the same result as host emulation (visual studio vcop.h library), but when i change npt() to deinterleave(), interleave(), the result is diffrent.

__vptr_uint16 bufx,

__vptr_uint8  output,

w = 640; 

npt loops:

for (int I1 = 0; I1 < blockH; I1++)
{


for (int I2 = 0; I2 < w / VCOP_SIMD_WIDTH; I2++)
{
__agen indexOut = I1 * w * sizeof(*output) + I2 * sizeof(*output) * VCOP_SIMD_WIDTH;
__agen index = I1 * bufWidth * sizeof(*bufx) + I2 * sizeof(*bufx) * VCOP_SIMD_WIDTH;

Vec10 = (bufx)[index].npt();
Vec20 = (bufx + 2)[index].npt();
Vec30 = (bufx + 4)[index].npt();

Vec10 += Vec20 + Vec30;

Vec20 = Vec10 * VecMul;
Vec30 = Vec20 << Vshift;

output[indexOut].npt() = Vec30;
}


deinterleave(), interleave() loops:

for (int I1 = 0; I1 < blockH; I1++)

{

for (int I2 = 0; I2 < w / VCOP_2SIMD_WIDTH; I2++)

{


__agen indexout = I1 * w * sizeof(*output) + I2 * sizeof(*output) * VCOP_2SIMD_WIDTH;
__agen index0 = I1 * bufWidth * sizeof(*bufx) + I2 * sizeof(*bufx) * VCOP_2SIMD_WIDTH;

(Vec10, Vec11) = (bufx)[index0].deinterleave();
(Vec20, Vec21) = (bufx + sizeof(*bufx))[index0].deinterleave();
(Vec30, Vec31) = (bufx + 2 * sizeof(*bufx))[index0].deinterleave();

Vec10 += Vec20 + Vec30;
Vec11 += Vec21 + Vec31;

Vec20 = Vec10 * VecMul;
Vec21 = Vec11 * VecMul1;

Vec30 = Vec20 << Vshift;
Vec31 = Vec21 << Vshift1;

output[indexout].interleave() = (Vec30, Vec31);


}

}

Where is the problem? Thank you for help.

  • The next loop for VCOP_2SIMD_WIDTH works correct. In documentation for deinterleave aligment must be for word. Word mean 4 or 2 bytes?
    for (int I1 = 0; I1 < blockH; I1++)
    {
    for (int I2 = 0; I2 < w / VCOP_2SIMD_WIDTH; I2++)
    {
    __agen indexout = I1 * w * sizeof(*output) + I2 * sizeof(*output) * VCOP_2SIMD_WIDTH;
    __agen index0 = I1 * bufWidth * sizeof(*bufx) + I2 * sizeof(*bufx) * VCOP_2SIMD_WIDTH;

    Vec10 = (bufx)[index0].npt();
    Vec11 = (bufx + 16)[index0].npt();
    Vec20 = (bufx + sizeof(*bufx ) )[index0].npt();
    Vec21 = (bufx + 16 + sizeof(*bufx) )[index0].npt();
    Vec30 = (bufx + 2 * sizeof(*bufx) )[index0].npt();
    Vec31 = (bufx + 16 + 2 * sizeof(*bufx) )[index0].npt();

    Vec10 += Vec20 + Vec30;
    Vec11 += Vec21 + Vec31;

    Vec20 = Vec10 * VecMul;
    Vec21 = Vec11 * VecMul1;

    Vec30 = Vec20 << Vshift;
    Vec31 = Vec21 << Vshift1;

    output[indexout].npt() = Vec30;
    (output+8)[indexout].npt() = Vec31;
    }
    }
  • I have mistake, make alignment equal 2 bytes, but it must be 4 bytes.
  • Hi Danil,

    thanks for updating the thread.

    Regards,
    Yordan