Tool/software: Code Composer Studio
Hi,
We are facing performance issue when all the audio effects are turned on, we tried optimizing the code to reduce the instructions count, and we do see the decrease of clock numbers in debug mode(from 500k+ clocks down to 360k+ clocks), but when the modified codes are built as release mode and downloaded to the board, seems the cycle period becomes longer. (We use a LED to observe the cycle period, it flickers slower when the cycle period gets longer.)
Attached the original codes and optimized codes, please help to analyze, thanks.
#pragma CODE_SECTION(iirLatticeLadderStereoProcess, ".criticalSectionInternal")
void iirLatticeLadderStereoProcess(float *restrict in, float *restrict out, iirLatticeLadder *filt, int len, int numFilters, Bool on)
{
float *signalInL, *signalOutL, *signalInR, *signalOutR;
float w1L, w4L, allpassL, allpoleL, sumL, inputL;
float w1R, w4R, allpassR, allpoleR, sumR, inputR;
int i, j;
iirLatticeLadder *restrict filt1 = filt;
signalInL = in;
signalOutL = out;
signalInR = in + len;
signalOutR = out + len;
// bypass filter
if (on == FALSE)
{
for (j=0; j<len; j++)
{
*signalOutL++ = *signalInL++;
*signalOutR++ = *signalInR++;
}
}
else
{
#pragma MUST_ITERATE(1, , )
for (j=0; j<len; j++)
{
inputL = *signalInL++;
inputR = *signalInR++;
#pragma MUST_ITERATE(1, , )
for (i=0; i<numFilters; i++)
{
w1L = filt1[i].c2 * inputL - filt1[i].k2 * filt1[i].w5; // L
w1R = filt1[i].c2 * inputR - filt1[i].k2 * filt1[i].x5; // R
allpassL = filt1[i].k2 * inputL + filt1[i].c2 * filt1[i].w5; // L
allpassR = filt1[i].k2 * inputR + filt1[i].c2 * filt1[i].x5; // R
allpoleL = filt1[i].c1 * w1L - filt1[i].k1 * filt1[i].w3; // L
allpoleR = filt1[i].c1 * w1R - filt1[i].k1 * filt1[i].x3; // R
w4L = filt1[i].k1 * w1L + filt1[i].c1 * filt1[i].w3; // L
w4R = filt1[i].k1 * w1R + filt1[i].c1 * filt1[i].x3; // R
filt1[i].w5 = w4L;
filt1[i].w3 = allpoleL;
sumL = allpassL*filt1[i].v2+ w4L*filt1[i].v1 + allpoleL*filt1[i].v0;
inputL = sumL;
filt1[i].x5 = w4R;
filt1[i].x3 = allpoleR;
sumR = allpassR*filt1[i].v2+ w4R*filt1[i].v1 + allpoleR*filt1[i].v0;
inputR = sumR;
}
*signalOutL++ = sumL;
*signalOutR++ = sumR;
}
}
}
#pragma CODE_SECTION(iirDirectform1StereoProcess, ".criticalSectionInternal")
void iirDirectform1StereoProcess(float *restrict in, float *restrict out, float (*x)[6], float (*y)[6], iirdirectform1 *filt, int len, int numFilters, Bool on)
{
float *signalInL, *signalOutL, *signalInR, *signalOutR;
int i, j;
iirdirectform1 *restrict filt1 = filt;
signalInL = in;
signalOutL = out;
signalInR = in + len;
signalOutR = out + len;
// bypass filter
if (on == FALSE)
{
for (j=0; j<len; j++)
{
*signalOutL++ = *signalInL++;
*signalOutR++ = *signalInR++;
}
}
else
{
#pragma MUST_ITERATE(1, , )
for (j=0; j<len; j++)
{
x[0][0] = *signalInL++;
x[0][3] = *signalInR++;
#pragma MUST_ITERATE(1, , )
for (i=0; i<numFilters; i++)
{
y[i][0] = filt1[i].b0 * x[i][0] + filt1[i].b1 * x[i][1] + filt1[i].b2 * x[i][2] - filt1[i].a1 * y[i][1] - filt1[i].a2 * y[i][2];
y[i][2] = y[i][1];
y[i][1] = y[i][0];
x[i][2] = x[i][1];
x[i][1] = x[i][0];
y[i][3] = filt1[i].b0 * x[i][3] + filt1[i].b1 * x[i][4] + filt1[i].b2 * x[i][5] - filt1[i].a1 * y[i][4] - filt1[i].a2 * y[i][5];
y[i][5] = y[i][4];
y[i][4] = y[i][3];
x[i][5] = x[i][4];
x[i][4] = x[i][3];
x[i+1][0] = y[i][0];
x[i+1][3] = y[i][3];
}
*signalOutL++ = y[numFilters-1][0];
*signalOutR++ = y[numFilters-1][3];
}
}
}
Thanks,
Zhanjun Li

