I use the following code:
float calc(float *A, float *B,float *C)
{
int i,j;
float Temp;
float Ret=0;
for (i=0;i<10;i++)
{
Temp=A[i];
for(j=0;j<10;j++)
{
Temp-=A[j]*B[j];
}
Ret+=Temp;
C[i]=Temp;
}
return Temp;
}
int main(void) {
float A[10]={0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9};
float B[10]={0.2, 1.2, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9};;
float C[10];
calc(A, B,C);
int i;
for (i=0;i<10;i++)
{
printf("Result Temp=%f\n", C[i]);
}
return 0;
}
Local Optimization (Level1): C[0]-C[9]
-344.96 -343.86 -342.76 -341.66 -340.56 -339.46 -338.36 -337.26 -336.16 -335.06
Global Optimization (Level2): C[0]-C[9]
-54.56 -53.45999 -52.36 -51.25999 -50.15999 -49.06 -47.95999 -46.86 -45.75999 -44.65999
6 {
calc():
00f8d4: B2BD MOVL *SP++, XAR1
00f8d5: BE09 MOVB XAR6, #0x09
00f8d6: AABD MOVL *SP++, XAR2
00f8d7: 8BA4 MOVL XAR1, @XAR4
00f8d8: E20306BD MOV32 *SP++, R6H
00f8da: E20307BD MOV32 *SP++, R7H
00f8dc: 8E4C MOVL XAR0, *-SP[12]
10 for (i=0;i<10;i++)
00f8dd: B51800A6 RPTB #24, @AR6
12 Temp=A[i];
C$L1:
00f8df: E2AF0081 MOV32 R0H, *XAR1++, UNCF
13 for(j=0;j<10;j++)
00f8e1: E591 ZERO R1
00f8e2: 86A4 MOVL XAR2, @XAR4
00f8e3: C5A5 MOVL XAR7, @XAR5
15 Temp-=A[j]*B[j];
00f8e4: E597 ZERO R7
00f8e5: E596 ZERO R6
00f8e6: E6CF0003 MOV32 R3H, R0H, UNCF
00f8e8: E6CF000A MOV32 R2H, R1H, UNCF
00f8ea: F609 RPT #9
00f8eb: E2501F82 || MACF32 R7H, R3H, *XAR2++, *XAR7++ // wrong below
00f8ed: E7100098 ADDF32 R0H, R3H, R2H
00f8ef: E71001B9 ADDF32 R1H, R7H, R6H
00f8f1: 7700 NOP
19 C[i]=Temp;
00f8f2: E7200040 SUBF32 R0H, R0H, R1H
00f8f4: 7700 NOP
00f8f5: E2030080 MOV32 *XAR0++, R0H
C$L2:
00f8f7: E2AF07BE MOV32 R7H, *--SP, UNCF
00f8f9: E2AF06BE MOV32 R6H, *--SP, UNCF
00f8fb: 86BE MOVL XAR2, *--SP
00f8fc: 8BBE MOVL XAR1, *--SP
00f8fd: 0006 LRETR