Running the attached code on a DSP core (C66) on the AM5728.
This code was purchased from TI. We have unit test code that uses this function that was developed on a 6482 and is my working baseline. I am now porting this code to the AM5728. The function in question is LTELIB_genRefSignals_shft(). For a given set of inputs,
numSubCarrAlloc = 36
N_ZC = 31
u = 1
v = 0
alpha = 10
I should get an output vector in OutputSequence[].
That output vector changes depending on the optimizer setting. Opt_level = off, 0, 1 works, Opt_level = 2,3 doesn't. The sequence changes, quite dramatically actually.
Working vector: 0 32766 -32537 3864 5508 -32300 21343 24860 30322 -12407 31089 10332
-25906 -20049 5509 32288 27192 -18257 -18711 -26875 -31955 7138 -17778 27490
-6582 32064 -7659 31818 -20456 25540 -32673 1668 -11891 -30483 31384 -9277
Broken vector: 0 32766 -32537 3864 17796 -32768 21343 24860 -30322 -12407 31089 10332
0 0 3 1 32767 -14864 0 0 -31955 7138 -17778 27490
-6582 32064 -7659 31818 -32768 26573 -32673 1668 -11891 -30483 31384 -9277
Sure, I can turn the optimizer off for this file, but we have dozens of files in this library that use intrinsics that could be potentially broken and I wouldn't know it. I am hoping to identify the offending code so through inspection we can find other possible offenders.
/** * \file LTELIB_genUL_PUSCH_demodRS.c * * \brief Functions to generate UL PUSCH demodulator reference signal * * Copyright (c) Texas Instruments Incorporated 2007 * * Use of this software is controlled by the terms and conditions found in the * license agreement under which this software has been supplied or provided. * */ #include "swpform.h" #include "LTELIB_genUL_PUSCH_demodRS.h" #include "table_short_PUSCH_ref_sig.h" #include "table_sin_cos.h" #ifdef _TMS320C6X #include "C6x.h" #endif #define RESETINTV 16 /* 0 means never, 1 means always */ /** * \fn LTELIB_genRefSignals( * OUT cplx16_t OutputSequence[RESTRICT], * IN Uint16 numSubCarrAlloc, * IN Uint16 N_ZC, * IN Uint16 u, * IN Uint16 v); * * \brief Generates the UL PUSCH demodulator reference signal excluding the shift with the factor alpha * * * \param[out] OutputSequence * Pointer to a vector where the uplink demodulation reference signal is stored. The length of * the vector is the numSubCarrAlloc entries. * * \param[in] numSubCarrAlloc * Number of subcarriers allocated to the allocation. * * * \param[in] N_ZC * Number of Zadoff-Chu sequence elements; related to numSubCarrAlloc as the largest prime number * less than numSubCarrAlloc. * * * \param[in] u * Base sequence group number . * * \param[in] v * Base sequence number within the group. * * \pre none * * \post none * * \sa none * */ void LTELIB_genRefSignals( OUT cplx16_t OutputSequence[RESTRICT], IN Uint16 numSubCarrAlloc, IN Uint16 N_ZC, IN Uint16 u, IN Uint16 v) { Int32 expn1,expn; Int32 normal; Uint32 a, b; Int32 i; Uint32 xr, mask; Int32 c,d,k; Uint32 a2; Uint32 a1; Int32 cscoartab, csfinetab; Uint32 CosSinIndx; Int32 rc; Uint32 ttmp1, ttmp2; Uint64 lltemp; Int32 * RESTRICT tempPtr1, * RESTRICT tempPtr2; Uint16 q; Int32 qbar; // qbar=_mpyhl(ONE_OVER_31,u+1)+1; // qbar=_mpylir(u+1,ONE_OVER_31)+1; u++; normal = _norm(u); qbar = _mpyhir(u<<normal,ONE_OVER_31)<<1; qbar = _mpylir(N_ZC,qbar); // q= (Uint16) ( (qbar + 0x00004000)>>15); /* Add 1/2 and do floor operator */ q = (Uint16) ( (qbar + (1 << (normal-15)))>>(normal-14)); /* Add 1/2 and do floor operator */ // qbar >>= 14; /* Shift by 14 because I'm doing floor operation after multiplying by 2 */ qbar >>= (normal-15); /* Shift by normal-15 because I'm doing floor operation after multiplying by 2 */ if( (qbar & 0x00000001) == 0) /* even ? */ { q += v; } else { q -= v; } normal = _norm(N_ZC); a = (N_ZC << normal) & 0x7fff0000; b = 0x80000000; /* dividend = 1 */ #ifdef _TMS320C6X #pragma MUST_ITERATE(15,15); #endif for( i = 15; i > 0; i-- ) { b = _subc(b,a); /* divide */ } /* Work with reminder to improve precision */ xr = ( b & 0x7fff ) << ( normal - 14 ); expn = _norm(xr) + 1; xr <<= expn; b = ( b & 0xffff0000 ) >> 1; expn1 = _norm(b); b = ( b << expn1 ) & 0xffff0000; #ifdef _TMS320C6X #pragma MUST_ITERATE(15,15); #endif for( i = 15; i > 0; i-- ) { b = _subc(b,a); /* divide */ } b = _sshvl( b & 0xffff, normal + expn - expn1 - 28 ); xr |= b; a1 = xr; lltemp = _mpy32u(xr, q); ttmp1 = _hill(lltemp) << ( 33 - expn); ttmp2 = _loll(lltemp) >> (expn - 1); xr = ttmp1 | ttmp2; CosSinIndx = xr >> 32 - HALFTABLERESOL; cscoartab = _amem4( (Int32 *)CosSinCoarse + CosSinIndx ); mask = 0xffffffff>>32 - HALFTABLERESOL; CosSinIndx = (xr >> ( 32-(HALFTABLERESOL << 1 ) ) ) & mask; csfinetab = _amem4( (Int32 *)CosSinFine + CosSinIndx ); #ifdef _LITTLE_ENDIAN d = _ssub2(0, _cmpyr1(cscoartab, csfinetab)); #else d = _cmpyr1(cscoartab, csfinetab); d = _packhl2(d, _ssub2(0, d)); #endif k = d; c = 0x7fff0000; _mem4(&OutputSequence[0]) = c; _mem4(&OutputSequence[N_ZC - 1])= c; _mem4(&OutputSequence[N_ZC]) = c; rc = 0; while(( RESETINTV + rc ) <= ( N_ZC>>1 )) { tempPtr1 = (Int32 *) &OutputSequence[1 + rc]; tempPtr2 = (Int32 *) &OutputSequence[N_ZC - 2 - rc]; #ifdef _TMS320C6X #pragma MUST_ITERATE(RESETINTV - 1); #endif for( i = 1 + rc; i < RESETINTV + rc; i++) { c = _cmpyr1(c, d); d = _cmpyr1(k, d); if( i + N_ZC < numSubCarrAlloc ) { _amem4( tempPtr1 + N_ZC) = c; } _amem4(tempPtr1++) = c; _amem4(tempPtr2--) = c; } a2 = q * (i + 1); lltemp = _mpy32u(a2, a1); ttmp1 = _hill(lltemp) << ( 33 - expn); ttmp2 = _loll(lltemp) >> (expn - 1); xr = ttmp1 | ttmp2; csfinetab = _amem4( (Int32 *)CosSinFine + ((xr >> (32 - (HALFTABLERESOL << 1)) ) & mask ) ); cscoartab = _amem4( (Int32 *)CosSinCoarse + (xr >> 32 - HALFTABLERESOL) ); #ifdef _LITTLE_ENDIAN d = _ssub2(0, _cmpyr1(cscoartab, csfinetab)); #else d = _cmpyr1(cscoartab, csfinetab); d = _packhl2(d, _ssub2(0, d)); #endif a2 *= i; lltemp = _mpy32u(a2, a1); ttmp1 = _hill(lltemp) << ( 32 - expn); ttmp2 = _loll(lltemp) >> (expn); xr = ttmp1 | ttmp2; csfinetab = _amem4( (Int32 *)CosSinFine + (( xr >> (32 - (HALFTABLERESOL << 1))) & mask )); cscoartab = _amem4( (Int32 *)CosSinCoarse + (xr >> (32 - HALFTABLERESOL))); #ifdef _LITTLE_ENDIAN c = _ssub2(0, _cmpyr1(cscoartab, csfinetab)); #else c = _cmpyr1(cscoartab, csfinetab); c = _packhl2(c, _ssub2(0, c)); #endif _amem4(&OutputSequence[i]) = c; _amem4(&OutputSequence[N_ZC - i - 1]) = c; if(i + N_ZC < numSubCarrAlloc) { _amem4( &OutputSequence[ i + N_ZC ] ) = c; } rc += RESETINTV; } for( i++; i <= (N_ZC >> 1); i++ ) { c = _cmpyr1(c, d); d = _cmpyr1(k, d); _amem4(&OutputSequence[i]) = c; _amem4(&OutputSequence[N_ZC - i - 1]) = c; if( i + N_ZC < numSubCarrAlloc ) { _amem4( &OutputSequence[ i + N_ZC ] ) = c; } } return; } /** * \fn void LTELIB_genRefSignals_shft( * OUT cplx16_t OutputSequence[RESTRICT], * IN Uint16 numSubCarrAlloc, * IN Uint16 N_ZC, * IN Uint16 thetaOffset , * IN Uint16 u_root, * IN Uint16 alpha, * IN Int32 ShiftSequence[RESTRICT] ); * * \brief Generates the UL PUSCH demodulator reference signal including the shift with the factor alpha * * \param[out] OutputSequence * Pointer to a vector where the uplink demodulation reference signal is stored. The length of * the vector is numSubCarrAlloc entries. * * \param[in] numSubCarrAlloc * Number of subcarriers allocated to the allocation. * * * \param[in] N_ZC * Number of Zadoff-Chu elements in sequence; related to numSubCarrAlloc as the largest prime number * less than numSubCarrAlloc. * * * \param[in] u * Base sequence group number . * * \param[in] v * Base sequence number within the group. * * \param[in] alpha * Cyclic shift in the time domain. Alpha takes integer values between 0 and 11. The cyclic shift is actually * calculated as exp(j*2*pi*alpha*index/12). * * \pre none * * \post none * * \sa none * */ void LTELIB_genRefSignals_shft( OUT cplx16_t OutputSequence[RESTRICT], IN Uint16 numSubCarrAlloc, IN Uint16 N_ZC, IN Uint16 u, IN Uint16 v, IN Uint16 alpha) { Int32 expn1,expn; Int32 normal, normal1; Uint32 a, b; Int32 i; Uint32 xr, mask; Int32 c,d,k; Uint32 a2; Uint32 a1; Int32 cscoartab, csfinetab; Uint32 CosSinIndx; Int32 rc; Int32 z, s; Uint32 ShiftExp; Uint32 ttmp1, ttmp2; Uint64 lltemp; Int32 * RESTRICT tempPtr1, * RESTRICT tempPtr2; Int32 MkeMdl12, MkeMdl12h, MkeMdl12o; Int32 ShiftSequence[12]; Uint16 q; Int32 qbar; u++; normal = _norm(u); qbar = _mpyhir(u << normal, ONE_OVER_31) << 1; qbar = _mpylir(N_ZC, qbar); q = (Uint16) ( (qbar + (1 << (normal-15)))>>(normal - 14)); /* Add 1/2 and do floor operator */ qbar >>= (normal - 15); /* Shift by normal-15 because I'm doing floor operation after multiplying by 2 */ if( (qbar & 0x00000001) == 0) /* even ? */ { q += v; } else { q -= v; } normal = _norm(N_ZC); a = (N_ZC << normal) & 0x7fff0000; b = 0x80000000; /* dividend = 1 */ #ifdef _TMS320C6X #pragma MUST_ITERATE(15,15); #endif for(i = 15; i > 0; i--) { b = _subc(b,a); /* divide */ } /* Work with reminder to improve precision */ xr = ( b & 0x7fff ) << (normal - 14); expn = _norm(xr) + 1; xr <<= expn; b = ( b & 0xffff0000 ) >> 1; expn1 = _norm(b); b = (b << expn1) & 0xffff0000; #ifdef _TMS320C6X #pragma MUST_ITERATE(15,15); #endif for( i = 15; i > 0; i--) { b = _subc(b,a); /* divide */ } b = _sshvl( b & 0xffff, normal + expn - expn1 - 28); xr |= b; a1 = xr; lltemp = _mpy32u(xr, q); ttmp1 = _hill(lltemp) << ( 33 - expn); ttmp2 = _loll(lltemp) >> (expn - 1); xr = ttmp1 | ttmp2; CosSinIndx = xr >> 32 - HALFTABLERESOL; cscoartab = _amem4((Int32 *)CosSinCoarse + CosSinIndx); mask = 0xffffffff >> 32 - HALFTABLERESOL; CosSinIndx = (xr >> (32-(HALFTABLERESOL<<1))) & mask; csfinetab = _amem4( (Int32 *)CosSinFine+CosSinIndx ); #ifdef _LITTLE_ENDIAN d = _ssub2(0, _cmpyr1(cscoartab, csfinetab)); #else d = _cmpyr1(cscoartab, csfinetab); d = _packhl2(d, _ssub2(0, d)); #endif k = d; c = 0x7fff0000; normal1 = _norm(alpha); ShiftExp = alpha << normal1; ShiftExp = _mpyhir( ShiftExp, ONE_OVER_THREE ) << (30 - normal1); /* Implicit in the shift of mpyhir is the division by to have division by 12 in Q32 */ CosSinIndx = ShiftExp >> (32 - HALFTABLERESOL); /* Since alpha is Q32 and normalized to 2*pi it can be used directly as index */ cscoartab = _amem4( (Int32 *)CosSinCoarse + CosSinIndx); CosSinIndx = (ShiftExp >> (32-(HALFTABLERESOL<<1)) ) & mask; csfinetab = _amem4( (Int32 *)CosSinFine + CosSinIndx ); #ifdef _LITTLE_ENDIAN z = _packh2( _sshvl( (_mpy(cscoartab,csfinetab) - _mpyh(cscoartab,csfinetab)), 1), _sshvl( (_mpyhl(cscoartab,csfinetab) + _mpylh(cscoartab,csfinetab)), 1) ); #else z = _packh2( _sshvl( (_mpyh(cscoartab,csfinetab) - _mpy(cscoartab,csfinetab)), 1), _sshvl( (_mpyhl(cscoartab,csfinetab) + _mpylh(cscoartab,csfinetab)), 1) ); #endif s = 0x7fff0000; /* For the cyclic shift given by alpha */ ShiftSequence[0]= 0x7fff0000; ShiftSequence[1]= z; i = 0; rc = 0; #ifdef _TMS320C6X #pragma MUST_ITERATE(11,11); #endif for( i = 2; i < 12; i++ ) { s = alpha * i; normal1 = _norm(s); ShiftExp = s << normal1; ShiftExp = _mpyhir( ShiftExp, ONE_OVER_THREE ) << (30 - normal1); /* Implicit in the shift of mpyhir is the division by to have division by 12 in Q32 */ CosSinIndx = ShiftExp >> (32 - HALFTABLERESOL); /* Since alpha is Q32 and normalized to 2*pi it can be used directly as index */ cscoartab = _amem4( (Int32 *)CosSinCoarse + CosSinIndx); CosSinIndx = (ShiftExp >> (32-(HALFTABLERESOL<<1)) ) & mask; csfinetab = _amem4( (Int32 *)CosSinFine + CosSinIndx ); #ifdef _LITTLE_ENDIAN z = _packh2( _sshvl( (_mpy(cscoartab,csfinetab) - _mpyh(cscoartab,csfinetab)), 1), _sshvl( (_mpyhl(cscoartab,csfinetab) + _mpylh(cscoartab,csfinetab)), 1) ); #else z = _packh2( _sshvl( (_mpyh(cscoartab,csfinetab) - _mpy(cscoartab,csfinetab)), 1), _sshvl( (_mpyhl(cscoartab,csfinetab) + _mpylh(cscoartab,csfinetab)), 1) ); #endif ShiftSequence[i] = z; } i = 0; MkeMdl12 = 1; MkeMdl12h = N_ZC - 2; while( MkeMdl12h > 0 ) { MkeMdl12h -= 12; } MkeMdl12h += 12; MkeMdl12o = MkeMdl12h + 3; if( MkeMdl12o > 11 ) { MkeMdl12o -= 12; } _amem4(&OutputSequence[0]) = _cmpyr1(ShiftSequence[0], c); if( (MkeMdl12h + 1) < 12) { _amem4(&OutputSequence[N_ZC - 1]) = _cmpyr1(ShiftSequence[MkeMdl12h + 1], c); } else { _amem4(&OutputSequence[N_ZC - 1]) = _cmpyr1(ShiftSequence[MkeMdl12h - 11], c); } if(( MkeMdl12h + 2 ) < 12) { _amem4( &OutputSequence[ N_ZC ] ) = _cmpyr1(ShiftSequence[MkeMdl12h + 2], c); } else { _amem4( &OutputSequence[ N_ZC ] ) = _cmpyr1(ShiftSequence[MkeMdl12h - 10], c); } rc = 0; while( (RESETINTV + rc) <= ( N_ZC >> 1 ) ) { tempPtr1 = (Int32 *) &OutputSequence[1 + rc]; tempPtr2 = (Int32 *) &OutputSequence[N_ZC - 2 - rc]; #ifdef _TMS320C6X #pragma MUST_ITERATE(RESETINTV-1,RESETINTV-1); #endif for( i = 1 + rc; i < RESETINTV + rc; i++ ) { c = _cmpyr1(c, d); d = _cmpyr1(k, d); if( i + N_ZC < numSubCarrAlloc ) { _amem4( tempPtr1 + N_ZC) = _cmpyr1(ShiftSequence[MkeMdl12o++], c); if(MkeMdl12o == 12) { MkeMdl12o = 0; } } _amem4(tempPtr1++) = _cmpyr1(ShiftSequence[MkeMdl12++], c); if(MkeMdl12 == 12) { MkeMdl12 = 0; } _amem4(tempPtr2--) = _cmpyr1(ShiftSequence[MkeMdl12h--], c); if( MkeMdl12h < 0 ) { MkeMdl12h = 11; } } a2 = q * (i + 1); lltemp = _mpy32u(a2, a1); ttmp1 = _hill(lltemp) << ( 33 - expn); ttmp2 = _loll(lltemp) >> (expn - 1); xr = ttmp1 | ttmp2; csfinetab = _amem4( (Int32 *)CosSinFine + ((xr >> (32 - (HALFTABLERESOL << 1)) ) & mask ) ); cscoartab = _amem4( (Int32 *)CosSinCoarse + (xr >> 32 - HALFTABLERESOL) ); #ifdef _LITTLE_ENDIAN d = _ssub2(0, _cmpyr1(cscoartab, csfinetab)); #else d = _cmpyr1(cscoartab, csfinetab); d = _packhl2(d, _ssub2(0, d)); #endif a2 *= i; lltemp = _mpy32u(a2, a1); ttmp1 = _hill(lltemp) << ( 32 - expn); ttmp2 = _loll(lltemp) >> (expn); xr = ttmp1 | ttmp2; csfinetab = _amem4( (Int32 *)CosSinFine + (( xr >> (32 - (HALFTABLERESOL << 1))) & mask )); cscoartab = _amem4( (Int32 *)CosSinCoarse + (xr >> (32 - HALFTABLERESOL))); #ifdef _LITTLE_ENDIAN c = _ssub2(0, _cmpyr1(cscoartab, csfinetab)); #else c = _cmpyr1(cscoartab, csfinetab); c = _packhl2(c, _ssub2(0, c)); #endif if(i + N_ZC < numSubCarrAlloc) { _amem4( &OutputSequence[ i + N_ZC ] ) = _cmpyr1(ShiftSequence[MkeMdl12o++], c); if(MkeMdl12o == 12) { MkeMdl12o = 0; } } _amem4(&OutputSequence[i]) = _cmpyr1(ShiftSequence[MkeMdl12++], c); if(MkeMdl12 == 12) { MkeMdl12 = 0; } _amem4(&OutputSequence[N_ZC - i - 1]) = _cmpyr1(ShiftSequence[MkeMdl12h--], c); if(MkeMdl12h < 0) { MkeMdl12h = 11; } rc += RESETINTV; } for( i++; i <= ( N_ZC >> 1); i++ ) { c = _cmpyr1(c, d); d = _cmpyr1(k, d); if(i + N_ZC < numSubCarrAlloc) { _amem4( &OutputSequence[ i + N_ZC ] ) = _cmpyr1(ShiftSequence[MkeMdl12o++], c); if( MkeMdl12o == 12 ) { MkeMdl12o = 0; } } _amem4(&OutputSequence[i]) = _cmpyr1(ShiftSequence[MkeMdl12++], c); if( MkeMdl12 == 12 ) { MkeMdl12 = 0; } _amem4(&OutputSequence[N_ZC - i - 1]) = _cmpyr1(ShiftSequence[MkeMdl12h--], c); if( MkeMdl12h < 0 ) { MkeMdl12h = 11; } } return; } /** * \fn LTELIB_genRefSignals_short( * OUT cplx16_t OutputSequence[RESTRICT], * IN Uint16 numSubCarrAlloc, * IN Uint16 SeqIndx); * * \brief Generates the UL PUSCH demodulator reference signal for lengths 12 or 24. * * \param[out] OutputSequence * Pointer to a vector where the uplink demodulation reference signal is stored. The length of * the vector is numSubCarrAlloc entries. * * \param[in] numSubCarrAlloc * Number of subcarriers allocated to the allocation (possible values are 12 or 24). * * \param[in] SeqIndx * Index into the table containing the sequences. * * * \pre none * * \post none * * \sa none * */ void LTELIB_genRefSignals_short( OUT cplx16_t OutputSequence[RESTRICT], IN Uint16 numSubCarrAlloc, IN Uint16 SeqIndx) { Int32 Count; Int64 * RESTRICT tempPtr1; if( numSubCarrAlloc == 12 ) { tempPtr1 = ((Int64 *) refSeqLen12) + SeqIndx * 6; #ifdef _TMS320C6X #pragma MUST_ITERATE(6,6); #endif for( Count = 0; Count < 6; Count++ ) _amem8( (Int64 *)OutputSequence + Count) = _amem8(tempPtr1 + Count); } else if( numSubCarrAlloc == 24 ) { tempPtr1 = ((Int64 *) refSeqLen24) + SeqIndx * 12; #ifdef _TMS320C6X #pragma MUST_ITERATE(12, 12); #endif for( Count = 0; Count < 12; Count++ ) _amem8( (Int64 *)OutputSequence + Count) = _amem8(tempPtr1 + Count); } return; } /** * \fn LTELIB_genRefSignals_short_shft( * OUT cplx16_t OutputSequence[RESTRICT], * IN Uint16 numSubCarrAlloc, * IN Uint16 SeqIndx); * * \brief Generates the UL PUSCH demodulator reference signal for lengths 12 or 24 with cyclic shift to separate streams. * * \param[out] OutputSequence * Pointer to a vector where the uplink demodulation reference signal is stored. The length of * the vector is numSubCarrAlloc entries. * * \param[in] numSubCarrAlloc * Number of subcarriers allocated to the allocation (possible values are 12 or 24). * * \param[in] SeqIndx * Index into the table containing the sequences. * * \param[in] alpha * Cyclic shift in the time domain. Alpha takes integer values between 0 and 11. The cyclic shift is actually * calculated as exp(j*2*pi*alpha*index/12). * * \pre none * * \post none * * \sa none * */ void LTELIB_genRefSignals_short_shft( OUT cplx16_t OutputSequence[RESTRICT], IN Uint16 numSubCarrAlloc, IN Uint16 SeqIndx, IN Uint16 alpha) { Int32 Count; Int32 * RESTRICT tempPtr1; Int32 ShiftSequence[12]; Int32 cscoartab, csfinetab; Uint32 CosSinIndx; Int32 z,s,i; Uint32 ShiftExp; Int32 normal1; Uint32 mask; normal1 = _norm(alpha); ShiftExp = alpha << normal1; ShiftExp = _mpyhir( ShiftExp, ONE_OVER_THREE ) << (30 - normal1); /* Implicit in the shift of mpyhir is the division by to have division by 12 in Q32 */ CosSinIndx = ShiftExp >> (32 - HALFTABLERESOL); /* Since alpha is Q32 and normalized to 2*pi it can be used directly as index */ cscoartab = _amem4( (Int32 *)CosSinCoarse + CosSinIndx); mask = 0xffffffff >> (32 - HALFTABLERESOL); CosSinIndx = (ShiftExp >> (32 - (HALFTABLERESOL << 1)) ) & mask; csfinetab = _amem4( (Int32 *)CosSinFine + CosSinIndx ); #ifdef _LITTLE_ENDIAN z = _packh2( _sshvl( (_mpy(cscoartab,csfinetab) - _mpyh(cscoartab,csfinetab)), 1), _sshvl( (_mpyhl(cscoartab,csfinetab) + _mpylh(cscoartab,csfinetab)), 1) ); #else z = _packh2( _sshvl( (_mpyh(cscoartab,csfinetab) - _mpy(cscoartab,csfinetab)), 1), _sshvl( (_mpyhl(cscoartab,csfinetab) + _mpylh(cscoartab,csfinetab)), 1) ); #endif ShiftSequence[0]= 0x7fff0000; ShiftSequence[1]= z; #ifdef _TMS320C6X #pragma MUST_ITERATE(11,11); #endif for( i = 2; i < 12; i++ ) { s = alpha * i; normal1 = _norm(s); ShiftExp = s << normal1; ShiftExp = _mpyhir( ShiftExp, ONE_OVER_THREE ) << (30 - normal1); /* Implicit in the shift of mpyhir is the division by to have division by 12 in Q32 */ CosSinIndx = ShiftExp >> (32 - HALFTABLERESOL); /* Since alpha is Q32 and normalized to 2*pi it can be used directly as index */ cscoartab = _amem4( (Int32 *)CosSinCoarse + CosSinIndx); CosSinIndx = (ShiftExp >> (32 - (HALFTABLERESOL << 1)) )&mask; csfinetab = _amem4( (Int32 *)CosSinFine + CosSinIndx ); #ifdef _LITTLE_ENDIAN z = _packh2( _sshvl( (_mpy(cscoartab,csfinetab) - _mpyh(cscoartab,csfinetab)), 1), _sshvl( (_mpyhl(cscoartab,csfinetab) + _mpylh(cscoartab,csfinetab)), 1) ); #else z = _packh2( _sshvl( (_mpyh(cscoartab,csfinetab) - _mpy(cscoartab,csfinetab)), 1), _sshvl( (_mpyhl(cscoartab,csfinetab) + _mpylh(cscoartab,csfinetab)), 1) ); #endif ShiftSequence[i] = z; } if( numSubCarrAlloc == 12 ) { tempPtr1 = ((Int32 *) refSeqLen12) + SeqIndx * 12; #ifdef _TMS320C6X #pragma MUST_ITERATE(12,12); #endif for( Count = 0; Count < 12; Count++ ) _amem4( OutputSequence + Count) = _cmpyr1(ShiftSequence[Count], _amem4(tempPtr1 + Count)); } else if( numSubCarrAlloc == 24 ) { tempPtr1 = ((Int32 *) refSeqLen24) + SeqIndx * 24; #ifdef _TMS320C6X #pragma MUST_ITERATE(12,12); #endif for( Count = 0; Count < 12; Count++ ) _amem4( OutputSequence + Count) = _cmpyr1(ShiftSequence[Count], _amem4(tempPtr1 + Count)); #ifdef _TMS320C6X #pragma MUST_ITERATE(12,12); #endif for( Count = 12; Count < 24; Count++ ) _amem4( OutputSequence+Count) = _cmpyr1(ShiftSequence[Count - 12], _amem4(tempPtr1 + Count)); } return; }