Hello E2E Experts,
Good day.
I would like to implement a minmax function of three values. I first implemented it as follow:
void minmax(float a, float b, float c, float &min, float &max) { float max_ab = a > b ? a : b; max = max_ab > c ? max_ab : c; float min_ab = a < b ? a : b; min = min_ab < c ? min_ab : c; }
Built with this Makefile :
CCX = /home/ycr/ti/ccs1230/ccs/tools/compiler/ti-cgt-c2000_22.6.0.LTS/bin/cl2000 CXXFLAGS = --define=CPU1 \ -v28 -ml -mt --float_support=fpu32 --tmu_support=tmu0 --vcu_support=vcu0 -O4 --fp_mode=relaxed \ --c11 --c++03 --relaxed_ansi --cpp_default --float_operations_allowed=all \ --display_error_number --gen_func_subsections=on --gen_data_subsections=on \ --include_path=/home/ycr/ti/ccs1230/ccs/tools/compiler/ti-cgt-c2000_22.6.0.LTS/include/ \ --abi=eabi \ all: test.obj %.obj : %.cpp $(CCX) $(CXXFLAGS) -c $< --asm_cross_reference_listing -al -ss
Would generate this ASM:
MOV32 R3H,R0H ; [CPU_FPU] MINF32 R0H,R1H ; [CPU_FPU] MAXF32 R3H,R1H ; [CPU_FPU] MINF32 R0H,R2H ; [CPU_FPU] MAXF32 R3H,R2H ; [CPU_FPU] MOV32 *+XAR5[0],R3H ; [CPU_FPU] MOV32 *+XAR4[0],R0H ; [CPU_FPU]
However, for sake of portability, I rewrote the example using <algorithm>:
#include <algorithm> void minmax(float a, float b, float c, float &min, float &max) { min = std::min(std::min(a, b), c); max = std::max(std::max(a, b), c); }
The produced code is far from optimum:
;----------------------------------------------------------------------
; 4 | void minmax(float a, float b, float c, float &min, float &max) {
; 5 | min = std::min(std::min(a, b), c);
; 6 | max = std::max(std::max(a, b), c);
; 8 |
;----------------------------------------------------------------------
CMPF32 R1H,R0H ; [CPU_FPU] |146|
MOVST0 ZF, NF ; [CPU_FPU] |146|
MOV32 *-SP[4],R0H ; [CPU_FPU] |4|
MOV32 *-SP[6],R1H ; [CPU_FPU] |4|
MOV32 *-SP[8],R2H ; [CPU_FPU] |4|
B ||$C$L1||,LT ; [CPU_ALU] |146|
; branchcc occurs ; [] |146|
MOVZ AR6,SP ; [CPU_ALU] |146|
SUBB XAR6,#4 ; [CPU_ARAU] |146|
B ||$C$L2||,UNC ; [CPU_ALU] |146|
; branch occurs ; [] |146|
||$C$L1||:
MOVZ AR6,SP ; [CPU_ALU] |146|
SUBB XAR6,#6 ; [CPU_ARAU] |146|
||$C$L2||:
MOVZ AR6,AR6 ; [CPU_ALU] |146|
MOV32 R1H,*-SP[8] ; [CPU_FPU] |146|
MOV32 R0H,*+XAR6[0] ; [CPU_FPU] |146|
CMPF32 R1H,R0H ; [CPU_FPU] |146|
MOVST0 ZF, NF ; [CPU_FPU] |146|
B ||$C$L3||,GEQ ; [CPU_ALU] |146|
; branchcc occurs ; [] |146|
MOVZ AR6,SP ; [CPU_ALU] |146|
SUBB XAR6,#8 ; [CPU_ARAU] |146|
MOVZ AR6,AR6 ; [CPU_ALU] |146|
||$C$L3||:
MOVL ACC,*+XAR6[0] ; [CPU_ALU] |5|
MOV32 R0H,*-SP[6] ; [CPU_FPU] |148|
MOV32 R1H,*-SP[4] ; [CPU_FPU] |148|
MOVL *+XAR4[0],ACC ; [CPU_ALU] |5|
CMPF32 R1H,R0H ; [CPU_FPU] |148|
MOVST0 ZF, NF ; [CPU_FPU] |148|
B ||$C$L4||,LT ; [CPU_ALU] |148|
; branchcc occurs ; [] |148|
MOVZ AR4,SP ; [CPU_ALU] |148|
SUBB XAR4,#4 ; [CPU_ARAU] |148|
B ||$C$L5||,UNC ; [CPU_ALU] |148|
; branch occurs ; [] |148|
||$C$L4||:
MOVZ AR4,SP ; [CPU_ALU] |148|
SUBB XAR4,#6 ; [CPU_ARAU] |148|
||$C$L5||:
MOVZ AR4,AR4 ; [CPU_ALU] |148|
MOV32 R0H,*-SP[8] ; [CPU_FPU] |148|
MOV32 R1H,*+XAR4[0] ; [CPU_FPU] |148|
CMPF32 R1H,R0H ; [CPU_FPU] |148|
MOVST0 ZF, NF ; [CPU_FPU] |148|
B ||$C$L6||,GEQ ; [CPU_ALU] |148|
; branchcc occurs ; [] |148|
MOVZ AR4,SP ; [CPU_ALU] |148|
SUBB XAR4,#8 ; [CPU_ARAU] |148|
MOVZ AR4,AR4 ; [CPU_ALU] |148|
||$C$L6||:
MOVL ACC,*+XAR4[0] ; [CPU_ALU] |6|
SUBB SP,#8 ; [CPU_ARAU]
MOVL *+XAR5[0],ACC ; [CPU_ALU] |6|
LRETR ; [CPU_ALU]
Why?
Regards,
CSC