This thread has been locked.
If you have a related question, please click the "Ask a related question" button in the top right corner. The newly created question will be automatically linked to this question.
Tool/software: TI C/C++ Compiler
Compiling the code snippet:
void vec_f32toi16r( const float32_t * a, int16_t * b, int num )
{
int i = num;
NASSERT( num > 0 );
while ( i > 0 )
{
*b++ = ( int16_t )__f32toi16r( *a++ );
i--;
}
}
using with the command line:
"C:/ti/ccsv6/tools/compiler/ti-cgt-c2000_16.9.3.LTS/bin/cl2000" -v28 -ml -mt --cla_support=cla1 --float_support=fpu32 --tmu_support=tmu0 --vcu_support=vcu2 -O4 --opt_for_speed=5 --fp_mode=relaxed --include_path="C:/ti/ccsv6/tools/compiler/ti-cgt-c2000_16.9.3.LTS/include" --advice:performance=all --define=_INLINE --symdebug:none --c99 --diag_warning=225 --diag_wrap=off --display_error_number --gen_func_subsections=on --sat_reassoc=on --asm_listing --src_interlist --gen_func_info_listing --gen_opt_info=1 --gen_preprocessor_listing --section_sizes=on --preproc_with_compile --preproc_dependency="cast.d" "cast.c"
produces the assembly:
;***************************************************************
;* TMS320C2000 C/C++ Codegen PC v16.9.3.LTS *
;* Date/Time created: Mon Jul 31 13:30:47 2017 *
;***************************************************************
.compiler_opts --abi=coffabi --cla_support=cla1 --diag_wrap=off --float_support=fpu32 --hll_source=on --mem_model:code=flat --mem_model:data=large --object_format=coff --section_sizes=on --silicon_version=28 --symdebug:none --tmu_support=tmu0
.asg XAR2, FP
; C:\ti\ccsv6\tools\compiler\ti-cgt-c2000_16.9.3.LTS\bin\opt2000.exe --gen_opt_info=1 C:\\Users\\rist-1\\AppData\\Local\\Temp\\{5E007277-930E-4FFF-89A3-E5D2264630F8} C:\\Users\\rist-1\\AppData\\Local\\Temp\\{E99D5E60-315B-4450-BAEF-20CF0C532130} --opt_info_filename=cast.nfo
; C:\ti\ccsv6\tools\compiler\ti-cgt-c2000_16.9.3.LTS\bin\ac2000.exe -@C:\\Users\\rist-1\\AppData\\Local\\Temp\\{71B923CF-4803-4321-8D4C-D9A0DD543BA3}
.sect ".text:_vec_f32toi16r"
.clink
.global _vec_f32toi16r
;***************************************************************
;* FNAME: _vec_f32toi16r FR SIZE: 0 *
;* *
;* FUNCTION ENVIRONMENT *
;* *
;* FUNCTION PROPERTIES *
;* 0 Parameter, 0 Auto, 0 SOE *
;***************************************************************
_vec_f32toi16r:
;*** 11 ----------------------- if ( num < 2 ) goto g4;
CMPB AL,#2 ; [CPU_] |11|
B $C$L2,LT ; [CPU_] |11|
; branchcc occurs ; [] |11|
;*** ----------------------- L$1 = (num>>1)-1;
;*** ----------------------- #pragma MUST_ITERATE(1, 16383, 1)
;*** ----------------------- // LOOP BELOW UNROLLED BY FACTOR(2)
;*** ----------------------- #pragma LOOP_FLAGS(4102u)
;*** -----------------------g3:
;*** 13 ----------------------- *b++ = __f32toi16r(*a++);
;*** 13 ----------------------- *b++ = __f32toi16r(*a++);
;*** 11 ----------------------- if ( (--L$1) != (-1) ) goto g3;
MOV AH,AL ; [CPU_]
ASR AH,1 ; [CPU_]
ADDB AH,#-1 ; [CPU_]
MOVZ AR6,AH ; [CPU_]
RPTB $C$L2,AR6 ; [CPU_] |11|
; repeat block starts ; []
$C$L1:
MOV32 R0H,*XAR4++ ; [CPU_] |13|
MOV32 R1H,*XAR4++ ; [CPU_] |13|
F32TOI16R R3H,R1H ; [CPU_] |13|
F32TOI16R R0H,R0H ; [CPU_] |13|
NOP ; [CPU_]
MOV32 XAR7,R3H ; [CPU_] |13|
MOV32 P,R0H ; [CPU_] |13|
MOV *XAR5++,P ; [CPU_] |13|
MOV *XAR5++,AR7 ; [CPU_] |13|
; repeat block ends ; []
$C$L2:
;*** -----------------------g4:
;*** ----------------------- if ( !(num&1) ) goto g6;
TBIT AL,#0 ; [CPU_]
B $C$L3,NTC ; [CPU_]
; branchcc occurs ; []
; Peeled loop iterations for unrolled loop:
;*** 13 ----------------------- *b = __f32toi16r(*a);
;*** -----------------------g6:
;*** ----------------------- return;
MOV32 R0H,*+XAR4[0] ; [CPU_] |13|
F32TOI16R R0H,R0H ; [CPU_] |13|
NOP ; [CPU_]
NOP ; [CPU_]
MOV32 ACC,R0H ; [CPU_] |13|
MOV *+XAR5[0],AL ; [CPU_] |13|
$C$L3:
LRETR ; [CPU_]
; return occurs ; []
My issue is that the result of F32TOI16R instruction is placed into a floating-point register, then moved to an integer register before being save to memory. Why is a MOV16 instruction not emitted to produce an inner loop of :
MOV32 R0H,*XAR4++ ; [CPU_] |13|
MOV32 R1H,*XAR4++ ; [CPU_] |13|
F32TOI16R R3H,R1H ; [CPU_] |13|
F32TOI16R R0H,R0H ; [CPU_] |13|
MOV16 *XAR5++,R3H ; [CPU_] |13|
MOV16 *XAR5++,R0H ; [CPU_] |13|
Thanks