I believe there is some inconsistency when using the CCS profile clock to perform computational benchmarking on the F28027. I am using the C2000 LaunchPad to perform matrix multiplications for two cases; int32 matrices in the first case and float32 matrices in the second case. In the two programs shown below, I place breakpoints at line 25 and line 66. I zero out the profile clock after the first breakpoint is reached, and I focus on the value shown by the profile clock after the second breakpoint is reached. Here are my observations:
1. int32 matrices: For this case, the profile clock shows the correct value at the second breakpoint after one execution of the program. Without leaving the debug session, if I click CPU Reset followed by Restart in order to repeat the experiment, the profile clock value at the second breakpoint will be identical to what was displayed during the first experiment. This is CORRECT behavior (and is consistent with and without compiler optimizations).
2. float32 matrices: For this case, the profile clock shows the correct value at the second breakpoint after one execution of the program. Without leaving the debug session, if I click CPU Reset followed by Restart in order to repeat the experiment, the profile clock value at the second breakpoint will be significantly smaller than what was displayed during the first experiment, and each time I repeat the experiment the profile clock value at the second breakpoint will continue to decrease each time. This is INCORRECT behavior (and is consistent with and without compiler optimizations).
I have done similar testing with an F28069 device on a Peripheral Explorer board, and I get CORRECT behavior for both int32 matrices and float32 matrices, so the problem occurs on F28027 but not on F28069. I realize that the F28027 does the floating-point math in software whereas the F28069 does the floating-point math in hardware, but this doesn't change the fact that the profile clock tool provides FALSE information after using CPU Reset followed by Restart only on the F28027.
Here is the first program (int32):
#include "F2802x_Device.h" void main(void) { EALLOW; SysCtrlRegs.WDCR = 0x68; SysCtrlRegs.PLLSTS.bit.DIVSEL = 0; SysCtrlRegs.PLLSTS.bit.MCLKOFF = 1; SysCtrlRegs.PLLCR.bit.DIV = 6; while(SysCtrlRegs.PLLSTS.bit.PLLLOCKS != 1); SysCtrlRegs.PLLSTS.bit.MCLKOFF = 0; SysCtrlRegs.PLLSTS.bit.DIVSEL = 3; GpioCtrlRegs.GPADIR.bit.GPIO0 = 1; GpioCtrlRegs.GPADIR.bit.GPIO1 = 1; GpioCtrlRegs.GPADIR.bit.GPIO2 = 1; GpioCtrlRegs.GPADIR.bit.GPIO3 = 1; EDIS; GpioDataRegs.GPASET.bit.GPIO0 = 1; GpioDataRegs.GPASET.bit.GPIO1 = 1; GpioDataRegs.GPASET.bit.GPIO2 = 1; GpioDataRegs.GPASET.bit.GPIO3 = 1; int32 j, m, n, p; int32 m3[5][5] = { // first breakpoint {0 , 0 , 0 , 0 , 0}, {0 , 0 , 0 , 0 , 0}, {0 , 0 , 0 , 0 , 0}, {0 , 0 , 0 , 0 , 0}, {0 , 0 , 0 , 0 , 0} }; const int32 m1[5][5] = { {1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}, {11, 12, 13, 14, 15}, {16, 17, 18, 19, 20}, {21, 22, 23, 24, 25} }; const int32 m2[5][5] = { {1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}, {11, 12, 13, 14, 15}, {16, 17, 18, 19, 20}, {21, 22, 23, 24, 25} }; for(j = 0; j < 100000; j++) { for(m = 0; m < 5; m++) { for(p = 0; p < 5; p++) { m3[m][p] = 0; for(n = 0; n < 5; n++) { m3[m][p] += m1[m][n] * m2[n][p]; } } } } while(1) { GpioDataRegs.GPACLEAR.bit.GPIO0 = 1; GpioDataRegs.GPACLEAR.bit.GPIO1 = 1; GpioDataRegs.GPACLEAR.bit.GPIO2 = 1; GpioDataRegs.GPACLEAR.bit.GPIO3 = 1; // second breakpoint } }
Here is the second program (float32):
#include "F2802x_Device.h" void main(void) { EALLOW; SysCtrlRegs.WDCR = 0x68; SysCtrlRegs.PLLSTS.bit.DIVSEL = 0; SysCtrlRegs.PLLSTS.bit.MCLKOFF = 1; SysCtrlRegs.PLLCR.bit.DIV = 6; while(SysCtrlRegs.PLLSTS.bit.PLLLOCKS != 1); SysCtrlRegs.PLLSTS.bit.MCLKOFF = 0; SysCtrlRegs.PLLSTS.bit.DIVSEL = 3; GpioCtrlRegs.GPADIR.bit.GPIO0 = 1; GpioCtrlRegs.GPADIR.bit.GPIO1 = 1; GpioCtrlRegs.GPADIR.bit.GPIO2 = 1; GpioCtrlRegs.GPADIR.bit.GPIO3 = 1; EDIS; GpioDataRegs.GPASET.bit.GPIO0 = 1; GpioDataRegs.GPASET.bit.GPIO1 = 1; GpioDataRegs.GPASET.bit.GPIO2 = 1; GpioDataRegs.GPASET.bit.GPIO3 = 1; int32 j, m, n, p; float32 m3[5][5] = { // first breakpoint {0.0 , 0.0 , 0.0 , 0.0 , 0.0}, {0.0 , 0.0 , 0.0 , 0.0 , 0.0}, {0.0 , 0.0 , 0.0 , 0.0 , 0.0}, {0.0 , 0.0 , 0.0 , 0.0 , 0.0}, {0.0 , 0.0 , 0.0 , 0.0 , 0.0} }; const float32 m1[5][5] = { {0.0001, 0.001, 0.01, 0.1, 1}, {0.001, 0.01, 0.1, 1, 10}, {0.01, 0.1, 1, 10, 100}, {0.1, 1.0, 10, 100, 1000}, {1, 10, 100, 1000, 10000} }; const float32 m2[5][5] = { {0.0001, 0.001, 0.01, 0.1, 1}, {0.001, 0.01, 0.1, 1, 10}, {0.01, 0.1, 1, 10, 100}, {0.1, 1.0, 10, 100, 1000}, {1, 10, 100, 1000, 10000} }; for(j = 0; j < 100000; j++) { for(m = 0; m < 5; m++) { for(p = 0; p < 5; p++) { m3[m][p] = 0; for(n = 0; n < 5; n++) { m3[m][p] += m1[m][n] * m2[n][p]; } } } } while(1) { GpioDataRegs.GPACLEAR.bit.GPIO0 = 1; GpioDataRegs.GPACLEAR.bit.GPIO1 = 1; GpioDataRegs.GPACLEAR.bit.GPIO2 = 1; GpioDataRegs.GPACLEAR.bit.GPIO3 = 1; // second breakpoint } }