I am seeing what appears to be C2000 compiler bug. When the C++ code below shown is compiled, it produces a 32-bit write that clears the upper 16 bits instead of ORing them.
Notes:
1. The struct labeled "fpga" within in gd is a group of pointers that point to a memmory mapped IO device. gd.fpga.control->header is defined
as unsigned long.
2. If I exclude the "proprietary code" the problem does not manifest.
3. If I change the two lines labeled "W1" and "W2" to the single line labeled "W3", the problem does not manifest.
4. The problem is new to CCS version 6.1 tools. It is not seen with version 5.5.
So it appears the problem is somehow related to optimization, especially given note 2. It appears to be setting up the registers to optimize for
the code that follows.
When trying to produce a version of a pre-compiled file that doesn't contain a bunch of proprietery code, I observed
the assembly language would take on a 2nd non-working variant; after reading in the initial 32-bit value to ACC, it would copy the lower 16 bits to P,
operate on P and then write the full 32 bits of P back to the output. This had the effect of making the upper 16 bits arbitrary depending on what was already in P.
Also, I tried writing it without using the local variable "header" and just doing the bit-wise OR directly on the IO register and it produced a similar failure.
Below the non working assembly language code, I've included a snippet of the working assembly code that results from the changes mentioned in notes 2 and 3 above.
The compile line command is:
C:/ti/ccsv6/tools/compiler/c2000_6.4.11/bin/cl2000" -v28 -ml -mt --float_support=fpu32 -O3 --opt_for_speed=3 --include_path= ... -g
--define= ... --diag_warning=225 --display_error_number --rpt_threshold=8 -k --preproc_with_compile --preproc_dependency=".../src/myFile.pp" --obj_directory=".../src" "D:/xfpWorkspace/2.4/xfp/3L/BaseSoftware/src/myFile.cpp"
_______________________________________________________________________________
C++ source code:
volatile Uint32 ledDebug = 0;
void calc(void)
{
int i;
static unsigned int counter = 0;
if(gd.numToAvg) {
unsigned long header = gd.fpga.control->header;
if (gd.fpga.control->header&0x00010000)
{
ledDebug = gd.fpga.control->header;
}
/* W1 */ header |= 0x00000020;
/* W2 */ gd.fpga.control->header = header; /*** resulting write writes only the lower 16 bits and 0s the upper 16 bits ****/
// /* W3*/ gd.fpga.control->header = header | 0x00000020;
}
/***** PROPRIETARY CODE HERE *****/
}
_______________________________________________________________________________
NON-WORKING Generated assembly code:
MOVL *SP++,XAR1 ; [CPU_]
MOVL *SP++,XAR2 ; [CPU_]
MOVL *SP++,XAR3 ; [CPU_]
MOV32 *SP++,R4H ; [CPU_]
MOV32 *SP++,R5H ; [CPU_]
MOV32 *SP++,R6H ; [CPU_]
MOV32 *SP++,R7H ; [CPU_]
MOVZ AR2,SP ; [CPU_]
SUBB FP,#16 ; [CPU_U]
ADDB SP,#64 ; [CPU_U]
MOVL XAR1,#_gd ; [CPU_U] |14|
MOVL ACC,XAR1 ; [CPU_] |14|
ADD ACC,#31 << 4 ; [CPU_] |14|
MOVL XAR4,ACC ; [CPU_] |14|
MOV AL,*+XAR4[0] ; [CPU_] |14|
BF $C$L2,EQ ; [CPU_] |14|
MOVL ACC,XAR1 ; [CPU_] |16|
ADD ACC,#69 << 2 ; [CPU_] |16|
MOVL XAR4,ACC ; [CPU_] |16|
MOVL XAR5,*+XAR4[0] ; [CPU_] |16|
MOVL ACC,XAR1 ; [CPU_] |17|
ADD ACC,#69 << 2 ; [CPU_] |17|
MOVL XAR4,ACC ; [CPU_] |16|
MOVL XAR6,*+XAR5[0] ; [CPU_] |16| ******** READS FULL 32 BITS FROM I/O REGISTER INTO XAR6 ********
MOVL XAR4,*+XAR4[0] ; [CPU_] |17|
TBIT *+XAR4[1],#0 ; [CPU_] |17|
BF $C$L1,NTC ; [CPU_] |17|
MOVL ACC,XAR1 ; [CPU_] |19|
ADD ACC,#69 << 2 ; [CPU_] |19|
MOVL XAR4,ACC ; [CPU_] |19|
MOVL XAR4,*+XAR4[0] ; [CPU_] |19|
MOVW DP,#_ledDebug ; [CPU_U]
MOVL ACC,*+XAR4[0] ; [CPU_] |19|
MOVL @_ledDebug,ACC ; [CPU_] |19|
$C$L1:
MOV AL,AR6 ; [CPU_] ******* COPIES ONLY LOWER 16 BITS FROM XAR6 TO ACC ******
ORB AL,#0x20 ; [CPU_] |22| ******* BIT-WISE OPERATION ON LOWER 16 BITS *********
MOVZ AR6,AL ; [CPU_] |22| ****** CLEARS THE UPPER 16 BITS OF XAR6 ********
MOVL ACC,XAR1 ; [CPU_] |22|
ADD ACC,#69 << 2 ; [CPU_] |22|
MOVL XAR4,ACC ; [CPU_] |22|
MOVL XAR4,*+XAR4[0] ; [CPU_] |22|
MOVL *+XAR4[0],XAR6 ; [CPU_] |22| ****** WRITES THE FULL 32 BITS ************
/***** PROPRIETARY CODE HERE *****/
_____________________________________________________________________________________________________________
WORKING Assembly CODE
MOVL XAR1,#_gd ; [CPU_U] |14|
MOVL ACC,XAR1 ; [CPU_] |14|
ADD ACC,#31 << 4 ; [CPU_] |14|
MOVL XAR4,ACC ; [CPU_] |14|
MOV AL,*+XAR4[0] ; [CPU_] |14|
BF $C$L2,EQ ; [CPU_] |14|
MOVL ACC,XAR1 ; [CPU_] |16|
ADD ACC,#69 << 2 ; [CPU_] |16|
MOVL XAR4,ACC ; [CPU_] |16|
MOVL XAR5,*+XAR4[0] ; [CPU_] |16|
MOVL ACC,XAR1 ; [CPU_] |17|
ADD ACC,#69 << 2 ; [CPU_] |17|
MOVL XAR4,ACC ; [CPU_] |16|
MOVL XAR6,*+XAR5[0] ; [CPU_] |16| ******** READS FULL 32 BITS FROM I/O REGISTER INTO XAR6 ********
MOVL XAR4,*+XAR4[0] ; [CPU_] |17|
TBIT *+XAR4[1],#0 ; [CPU_] |17|
BF $C$L1,NTC ; [CPU_] |17|
MOVL ACC,XAR1 ; [CPU_] |19|
ADD ACC,#69 << 2 ; [CPU_] |19|
MOVL XAR4,ACC ; [CPU_] |19|
MOVL XAR4,*+XAR4[0] ; [CPU_] |19|
MOVW DP,#_ledDebug ; [CPU_U]
MOVL ACC,*+XAR4[0] ; [CPU_] |19|
MOVL @_ledDebug,ACC ; [CPU_] |19|
MOVL ACC,XAR1 ; [CPU_] |23|
ADD ACC,#69 << 2 ; [CPU_] |23|
MOVL XAR4,ACC ; [CPU_] |23|
MOVL XAR4,*+XAR4[0] ; [CPU_] |23|
MOVL ACC,XAR6 ; [CPU_] |23| ******* COPIES FULL 32 BITS FROM XAR6 TO ACC ******
ORB AL,#0x20 ; [CPU_] |23| *** BIT-WISE OPERATION ON LOW 8 BITS ****
MOVL *+XAR4[0],ACC ; [CPU_] |23| /*** 32-BIT WRITE ***/