This thread has been locked.

If you have a related question, please click the "Ask a related question" button in the top right corner. The newly created question will be automatically linked to this question.

ARM compiler v5.2.4 generates incorrect code for Optimization Level 2

Other Parts Discussed in Thread: AM3352

The attachment arm_compiler_incorrect_optimization.zip contains bl_hsmmcsd.pp which is a pre-processed source file, and bl_hsmmcsd.asm which is the assembler listing file produced by the compiler when the following build options were used:

"/opt/ti/ti_ccs6_1/ccsv6/tools/compiler/ti-cgt-arm_5.2.4/bin/armcl" -mv7A8 --code_state=32 --abi=eabi -me -O2 --include_path="/opt/ti/ti_ccs6_1/ccsv6/tools/compiler/ti-cgt-arm_5.2.4/include" --include_path="/home/Mr_Halfword/AM3352-SOM-EVB_bare_metal/bootloader" --include_path="/home/Mr_Halfword/AM3352-SOM-EVB_bare_metal/AM3352_SOM_platform" --include_path="/opt/ti/AM335X_StarterWare_02_00_01_01/include/armv7a" --include_path="/opt/ti/AM335X_StarterWare_02_00_01_01/include" --include_path="/opt/ti/AM335X_StarterWare_02_00_01_01/include/hw" --include_path="/opt/ti/AM335X_StarterWare_02_00_01_01/include/armv7a/am335x" --include_path="/opt/ti/AM335X_StarterWare_02_00_01_01/third_party/fatfs/src" --include_path="/opt/ti/AM335X_StarterWare_02_00_01_01/bootloader/include" --include_path="/opt/ti/AM335X_StarterWare_02_00_01_01/bootloader/include/armv7a/am335x" --include_path="/opt/ti/AM335X_StarterWare_02_00_01_01/mmcsdlib/include" -g --gcc --preproc_with_comment --preproc_with_compile --define=am335x --define=AM3352_SOM --define=MMCSD --define=am3352 --display_error_number --diag_warning=225 --diag_wrap=off --src_interlist "/opt/ti/AM335X_StarterWare_02_00_01_01/bootloader/src/bl_hsmmcsd.c"

When compiled with the optimization level of 2, the ARM v5.2.4 compiler was found to generate incorrect code for the following loop in the HSMMCSDXferStatusGet function:

                for (i = 0; i < hsmmcsd_dataLen; i+=4)
                {
                    temp = HWREG(MMCSD_BASE + MMCHS_DATA);
                    hsmmcsd_buffer[i] = *((char*)&temp);
                    hsmmcsd_buffer[i+1] = *((char*)&temp + 1);
                    hsmmcsd_buffer[i+2] = *((char*)&temp + 2);
                    hsmmcsd_buffer[i+3] = *((char*)&temp + 3);
                }

The symptom of the incorrect code observed in the debugger was elements for the "hsmmcsd_buffer[i] = *((char*)&temp)" writes were not correct. E.g.:

- The value of hsmmcsd_buffer[0] was zero.
- The expected value for hsmmcsd_buffer[0] appeared in hsmmcsd_buffer[4].
- The expected value for hsmmcsd_buffer[4] appeared in hsmmcsd_buffer[8].
- ...

The pre-processed version of the loop is:

                for (i = 0; i < hsmmcsd_dataLen; i+=4)
                {
                    temp = (*((volatile unsigned int *)((0x48060000) + (0x220))));
                    hsmmcsd_buffer[i] = *((char*)&temp);
                    hsmmcsd_buffer[i+1] = *((char*)&temp + 1);
                    hsmmcsd_buffer[i+2] = *((char*)&temp + 2);
                    hsmmcsd_buffer[i+3] = *((char*)&temp + 3);
                }

The assembler listing file shows the following code has been generated for the loop:

;* --------------------------------------------------------------------------*
;**  	-----------------------    U$25 = &U$10[-4];
;** 272	-----------------------    L$1 = U$16+3u>>2;
;**  	-----------------------    #pragma MUST_ITERATE(1, 1073741823, 1)
;**  	-----------------------    #pragma LOOP_FLAGS(4096u)
	.dwpsn	file "/opt/ti/AM335X_StarterWare_02_00_01_01/bootloader/src/bl_hsmmcsd.c",line 272,column 21,is_stmt,isa 2
        ADD       A1, A1, #3            ; [DPU_8_PIPE0] |272| 
        SUB       V9, V9, #4            ; [DPU_8_PIPE1] 
        MOV       A1, A1, LSR #2        ; [DPU_8_PIPE0] |272| 
;* --------------------------------------------------------------------------*
;*   BEGIN LOOP ||$C$L3||
;*
;*   Loop source line                : 270
;*   Loop closing brace source line  : 277
;*   Known Minimum Trip Count        : 1
;*   Known Maximum Trip Count        : 1073741823
;*   Known Max Trip Count Factor     : 1
;* --------------------------------------------------------------------------*
||$C$L3||:    
;**	-----------------------g6:
;** 272	-----------------------    temp = *(volatile unsigned *)0x48060220u;
;** 273	-----------------------    *(U$25 += 4) = *&temp;
;** 274	-----------------------    U$25[1] = *(&temp+1);
;** 275	-----------------------    U$25[2] = *(&temp+2);
;** 276	-----------------------    U$25[3] = *(&temp+3);
;** 270	-----------------------    if ( --L$1 ) goto g6;
        LDR       A3, [V1, #0]          ; [DPU_8_PIPE0] |272| 
	.dwpsn	file "/opt/ti/AM335X_StarterWare_02_00_01_01/bootloader/src/bl_hsmmcsd.c",line 273,column 21,is_stmt,isa 2
        LDRB      A2, [SP, #4]          ; [DPU_8_PIPE0] |273| 
	.dwpsn	file "/opt/ti/AM335X_StarterWare_02_00_01_01/bootloader/src/bl_hsmmcsd.c",line 272,column 21,is_stmt,isa 2
        STR       A3, [SP, #4]          ; [DPU_8_PIPE0] |272| 
	.dwpsn	file "/opt/ti/AM335X_StarterWare_02_00_01_01/bootloader/src/bl_hsmmcsd.c",line 273,column 21,is_stmt,isa 2
        STRB      A2, [V9, #4]!         ; [DPU_8_PIPE0] |273| 
	.dwpsn	file "/opt/ti/AM335X_StarterWare_02_00_01_01/bootloader/src/bl_hsmmcsd.c",line 274,column 21,is_stmt,isa 2
        LDRB      LR, [SP, #5]          ; [DPU_8_PIPE0] |274| 
        STRB      LR, [V9, #1]          ; [DPU_8_PIPE0] |274| 
	.dwpsn	file "/opt/ti/AM335X_StarterWare_02_00_01_01/bootloader/src/bl_hsmmcsd.c",line 275,column 21,is_stmt,isa 2
        LDRB      A2, [SP, #6]          ; [DPU_8_PIPE0] |275| 
        STRB      A2, [V9, #2]          ; [DPU_8_PIPE0] |275| 
	.dwpsn	file "/opt/ti/AM335X_StarterWare_02_00_01_01/bootloader/src/bl_hsmmcsd.c",line 276,column 21,is_stmt,isa 2
        LDRB      A2, [SP, #7]          ; [DPU_8_PIPE0] |276| 
	.dwpsn	file "/opt/ti/AM335X_StarterWare_02_00_01_01/bootloader/src/bl_hsmmcsd.c",line 270,column 29,is_stmt,isa 2
        SUBS      A1, A1, #1            ; [DPU_8_PIPE1] |270| 
	.dwpsn	file "/opt/ti/AM335X_StarterWare_02_00_01_01/bootloader/src/bl_hsmmcsd.c",line 276,column 21,is_stmt,isa 2
        STRB      A2, [V9, #3]          ; [DPU_8_PIPE0] |276| 
	.dwpsn	file "/opt/ti/AM335X_StarterWare_02_00_01_01/bootloader/src/bl_hsmmcsd.c",line 270,column 29,is_stmt,isa 2
        BNE       ||$C$L3||             ; [DPU_8_PIPE1] |270| 
        ; BRANCHCC OCCURS {||$C$L3||}    ; [] |270| 

In the generated loop, the temp variable is at offset 4 on the stack, and V1 holds the address for MMCSD_BASE + MMCHS_DATA (which is the hardware register which is the source of the data). Analysis on the generated assembler which I think is incorrect:

        LDR       A3, [V1, #0]          ; [DPU_8_PIPE0] |272| // Load A3 from HWREG(MMCSD_BASE + MMCHS_DATA)
	.dwpsn	file "/opt/ti/AM335X_StarterWare_02_00_01_01/bootloader/src/bl_hsmmcsd.c",line 273,column 21,is_stmt,isa 2
        LDRB      A2, [SP, #4]          ; [DPU_8_PIPE0] |273| // Load A2 from the byte at address of temp+0
	.dwpsn	file "/opt/ti/AM335X_StarterWare_02_00_01_01/bootloader/src/bl_hsmmcsd.c",line 272,column 21,is_stmt,isa 2
        STR       A3, [SP, #4]          ; [DPU_8_PIPE0] |272| // Write A3 to the temp variable
	.dwpsn	file "/opt/ti/AM335X_StarterWare_02_00_01_01/bootloader/src/bl_hsmmcsd.c",line 273,column 21,is_stmt,isa 2
        STRB      A2, [V9, #4]!         ; [DPU_8_PIPE0] |273| // Write A2 to the byte hsmmcsd_buffer[i], which is the value from temp on the *previous* loop iteration
	.dwpsn	file "/opt/ti/AM335X_StarterWare_02_00_01_01/bootloader/src/bl_hsmmcsd.c",line 274,column 21,is_stmt,isa 2

i.e. believe the correct instruction sequence should be:

        LDR       A3, [V1, #0]          ; [DPU_8_PIPE0] |272| // Load A3 from HWREG(MMCSD_BASE + MMCHS_DATA)
	.dwpsn	file "/opt/ti/AM335X_StarterWare_02_00_01_01/bootloader/src/bl_hsmmcsd.c",line 273,column 21,is_stmt,isa 2
        STR       A3, [SP, #4]          ; [DPU_8_PIPE0] |272| // Write A3 to the temp variable
	.dwpsn	file "/opt/ti/AM335X_StarterWare_02_00_01_01/bootloader/src/bl_hsmmcsd.c",line 273,column 21,is_stmt,isa 2
        LDRB      A2, [SP, #4]          ; [DPU_8_PIPE0] |273| // Load A2 from the byte at address of temp+0
	.dwpsn	file "/opt/ti/AM335X_StarterWare_02_00_01_01/bootloader/src/bl_hsmmcsd.c",line 272,column 21,is_stmt,isa 2
        STRB      A2, [V9, #4]!         ; [DPU_8_PIPE0] |273| // Write A2 to the byte hsmmcsd_buffer[i], which is the value from temp on the current iteration
	.dwpsn	file "/opt/ti/AM335X_StarterWare_02_00_01_01/bootloader/src/bl_hsmmcsd.c",line 274,column 21,is_stmt,isa 2

As a work-around the following was used to compile the HSMMCSDXferStatusGet function at optimization level 1, leaving the rest of the source file compiled at optimization level 2:

#pragma FUNCTION_OPTIONS(HSMMCSDXferStatusGet,"--opt_level=1")

With this change the program operated correctly.

Note that I haven't tried to create a smaller standalone test case, hopefully the attached preprocessed source file will allow the problem to be re-created.

  • Two thoughts:
    - *(TYPE*)&X is the hallmark of a type pun that is likely to cause trouble.
    - Why not "memcpy(&temp, &hsmmcsd_buffer[0], hsmmcsd_dataLen)"?
  • I forgot to mention, but I didn't write the code in question. I was just using the unmodified source file bootloader/src/bl_hsmmcsd.c from StarterWare 02.00.01.01. When the bootloader didn't operate correctly when compiled for Optimization Level 2 I traced the problem to the above mentioned loop in HSMMCSDXferStatusGet.

    pf said:
    - *(TYPE*)&X is the hallmark of a type pun that is likely to cause trouble.

    Do you mean the code is implementation undefined in terms of the C standard, or that such expressions can cause problems for the optimizer?

    pf said:
    - Why not "memcpy(&temp, &hsmmcsd_buffer[0], hsmmcsd_dataLen)"?

    The loop is reading 4 bytes from a hardware FIFO register each iteration, and storing each word read into memory. Therefore, a single memcpy copy for the entire hsmmcsd_dataLen bytes can't be used.

  • Chester Gillon said:

    Do you mean the code is implementation undefined in terms of the C standard, or that such expressions can cause problems for the optimizer?

    Both.  Technically one is not supposed to access an object of one type by treating it as another.  On the other hand, certain idioms are so common that we do try to accommodate it.  If it's in code we supply, then we'd better get it right.

    Chester Gillon said:

    The loop is reading 4 bytes from a hardware FIFO register each iteration, and storing each word read into memory. Therefore, a single memcpy copy for the entire hsmmcsd_dataLen bytes can't be used.

    You're right, I had that wrong.  I should have said, why not "for(i=0;  i < hsmmcsd_dataLen;  i+=4) memcpy(&temp, &hsmmcsd_buffer[i], sizeof(int))".  But I suppose that might run into the same issue, depending on how the memcpy() call compiles.

  • I am pursuing this as a bug in Starterware.  I don't know how that process works.  But I expect I will post an update here once the bug is filed.

    Thanks and regards,

    -George

  • After further investigation, it now appears this is a bug in the compiler.  I filed SDSCM00052185 in the SDOWP system to have this investigated.  If it is a compiler bug, it will be fixed.  If it is not a compiler bug, then a fix will be pursued with the development team which supplies Starterware.  Feel free to follow it with the SDOWP link below in my signature.

    Thanks and regards,

    -George

  • George Mock said:
    I filed SDSCM00052185 in the SDOWP system to have this investigated.  If it is a compiler bug, it will be fixed.

    SDSCM00052185 has been fixed in ARM compiler v5.2.6.

    I confirm that ARM compiler v5.2.6 now generates the correct code for the HSMMCSDXferStatusGet function, in which the bug was detected, when Optimization Level 2 is used.