;****************************************************************************** ;* TMS320C6x C/C++ Codegen PC v7.2.3 * ;* Date/Time created: Sun Apr 15 15:16:40 2012 * ;****************************************************************************** .compiler_opts --abi=coffabi --c64p_l1d_workaround=default --endian=little --hll_source=linasm --long_precision_bits=40 --mem_model:code=near --mem_model:const=data --mem_model:data=far_aggregates --object_format=coff --silicon_version=6400 --symdebug:none ;****************************************************************************** ;* GLOBAL FILE PARAMETERS * ;* * ;* Architecture : TMS320C64xx * ;* Optimization : Enabled at level 3 * ;* Optimizing for : Speed * ;* Based on options: -o3, no -ms * ;* Endian : Little * ;* Interrupt Thrshld : Disabled * ;* Data Access Model : Far Aggregate Data * ;* Pipelining : Enabled * ;* Speculate Loads : Disabled * ;* Memory Aliases : Presume not aliases (optimistic) * ;* Debug Info : No Debug Info * ;* * ;****************************************************************************** .asg A15, FP .asg B14, DP .asg B15, SP .global $bss .sect ".text" .clink ;****************************************************************************** ;* FUNCTION NAME: test * ;* * ;* Regs Modified : A0,A1,A2,A3,A4,A5,A6,B0,B1,B2,B4,B5,B6,B7,B8,B9,B16 * ;* Regs Used : A0,A1,A2,A3,A4,A5,A6,B0,B1,B2,B3,B4,B5,B6,B7,B8,B9, * ;* DP,SP,B16 * ;****************************************************************************** _test: .map N/B4 .map N'/A0 .map ref_u/B6 .map break_flag/B0 .map j/A1 .map j$1/A0 .map j$2/B8 .map j$3/B9 .map j$4/B7 .map j$5/A2 .map j$6/A6 .map j$7/B4 .map u/A4 .map u'/A6 .map p_u/A3 .map p_u'/B5 .map p_u''/A4 .map p_v/A5 .map p_v'/A6 ;** --------------------------------------------------------------------------* ; EXCLUSIVE CPU CYCLES: 7 ; ; _test: .cproc p_u, N, p_v ; .reg j, u, ref_u, break_flag ; .no_mdep ; loop: .trip 16 MV .L1X N,N' ; |2| MV .L1X N,j ; |2| MV .L1X N,j$5 ; |2| [ j$1] ADD .L1X 0xffffffff,N,j ; |7| || ZERO .L2 break_flag ; |13| (P) <0,2> || MVC .S2 CSR,B16 || MV .S1 p_v',p_v ; |2| MV .L1 j,j$1 ; |7| || LDH .D1T2 *+p_v[j],ref_u ; |10| (P) <0,4> || MVK .L2 0x1,B1 || MV .S1 p_u'',p_u ; |2| || MV .S2X p_u'',p_u' ; |2| || MV .D2 N,j$2 ; |2| [ j$1] ADD .L1 0xffffffff,j,j ; |7| (P) <1,1> ^ || AND .L2 -2,B16,B4 || LDH .D1T1 *+p_u[j],u ; |9| (P) <0,3> || MV .S1 j,j$5 ; |7| || [!j] MVK .S2 0x1,break_flag ; |15| (P) <0,3> || [ j$5] ADD .D2 0xffffffff,N,j$2 ; |7| MV .L1 j,j$6 ; |7| (P) <1,2> ^ Split a long life(pre-sched) || ZERO .L2 break_flag ; |13| (P) <1,2> || MVC .S2 B4,CSR ; interrupts off || [ break_flag] ZERO .D2 B1 ; |15| (P) <0,5> ;*----------------------------------------------------------------------------* ;* SOFTWARE PIPELINE INFORMATION ;* ;* Loop source line : 7 ;* Loop closing brace source line : 16 ;* Known Minimum Trip Count : 16 ;* Known Max Trip Count Factor : 1 ;* Loop Carried Dependency Bound(^) : 3 ;* Unpartitioned Resource Bound : 3 ;* Partitioned Resource Bound(*) : 3 ;* Resource Partition: ;* A-side B-side ;* .L units 0 0 ;* .S units 1 0 ;* .D units 2 1 ;* .M units 0 0 ;* .X cross paths 1 0 ;* .T address paths 2 1 ;* Long read paths 0 0 ;* Long write paths 0 0 ;* Logical ops (.LS) 0 0 (.L or .S unit) ;* Addition ops (.LSD) 6 8 (.L or .S or .D unit) ;* Bound(.L .S .LS) 1 0 ;* Bound(.L .S .D .LS .LSD) 3* 3* ;* ;* Searching for software pipeline schedule at ... ;* ii = 3 Schedule found with 5 iterations in parallel ;* ;* Register Usage Table: ;* +-----------------------------------------------------------------+ ;* |AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA|BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB| ;* |00000000001111111111222222222233|00000000001111111111222222222233| ;* |01234567890123456789012345678901|01234567890123456789012345678901| ;* |--------------------------------+--------------------------------| ;* 0: |** *** |*** **** | ;* 1: |****** |*** ***** | ;* 2: |** **** |*** ****** | ;* +-----------------------------------------------------------------+ ;* ;* Done ;* ;* Collapsed epilog stages : 4 ;* Prolog not removed ;* Collapsed prolog stages : 0 ;* ;* Minimum required memory pad : 0 bytes ;* ;* For further improvement on this loop, try option -mh14 ;* ;* Minimum safe trip count : 1 ;* Min. prof. trip count (est.) : 3 ;* ;* Mem bank conflicts/iter(est.) : { min 0.000, est 0.000, max 0.000 } ;* Mem bank perf. penalty (est.) : 0.0% ;* ;* ;* Total cycles (est.) : 11 + trip_cnt * 3 ;*----------------------------------------------------------------------------* ;* SETUP CODE ;* ;* MVK 0x1,B1 ;* MV A1,B8 ;* MV A3,B5 ;* MV B1,B2 ;* MV A1,A0 ;* ;* SINGLE SCHEDULED ITERATION ;* ;* $C$C29: ;* 0 NOP 1 ;* 1 [ A0] ADD .S1 0xffffffff,A1,A1 ; |7| ^ ;* 2 MV .L1 A0,A2 ; |7| Split a long life(pre-sched) ;* || MV .D1 A1,A0 ; |7| ^ Split a long life(pre-sched) ;* || ZERO .S2 B0 ; |13| ;* 3 [ A2] ADD .S2 0xffffffff,B8,B8 ; |7| Define a twin register ;* || [ B1] LDH .D1T1 *+A3[A1],A4 ; |9| ;* || [!A1] MVK .D2 0x1,B0 ; |15| ;* 4 [ B1] LDH .D1T2 *+A5[A1],B6 ; |10| ;* 5 [ B0] ZERO .L2 B1 ; |15| ;* 6 MV .L2 B8,B9 ; |7| Split a long life(pre-sched) ;* 7 MV .S2 B9,B7 ; |7| Split a long life(pre-sched) ;* 8 MV .D2 B1,B4 ; |15| Split a long life(pre-sched) ;* || [ B1] B .S1 $C$C29 ; |16| ;* 9 ADD .L1X A4,B6,A6 ; |11| ;* 10 [ B2] STH .D2T1 A6,*+B5[B7] ; |12| ;* || MV .L2 B4,B2 ; |15| Split a long life(pre-sched) ;* 11 NOP 3 ;* 14 ; BRANCHCC OCCURS {$C$C29} ; |16| ;*----------------------------------------------------------------------------* $C$L1: ; PIPED LOOP PROLOG ; EXCLUSIVE CPU CYCLES: 5 MV .L2 j$2,j$7 ; |7| (P) <0,6> Split a long life(pre-sched) || [ j$5] ADD .S2 0xffffffff,j$2,j$2 ; |7| (P) <1,3> Define a twin register || [ B1] LDH .D1T1 *+p_u[j],u ; |9| (P) <1,3> || [!j] MVK .D2 0x1,break_flag ; |15| (P) <1,3> || MV .L1 j$6,j$1 ; |7| (P) <2,0> ^ Copy to predicate register MVK .L2 0x1,B2 || MV .S2 j$7,j$4 ; |7| (P) <0,7> Split a long life(pre-sched) || [ B1] LDH .D1T2 *+p_v[j],ref_u ; |10| (P) <1,4> || MV .L1 j$1,j$6 ; |7| (P) <2,1> Split a long life(pre-sched) || [ j$1] ADD .S1 0xffffffff,j,j ; |7| (P) <2,1> ^ [ B1] B .S1 $C$L2 ; |16| (P) <0,8> || MV .L2 B1,B4 ; |15| (P) <0,8> Split a long life(pre-sched) || [ break_flag] ZERO .S2 B1 ; |15| (P) <1,5> || ZERO .D2 break_flag ; |13| (P) <2,2> || MV .L1 j$6,j$5 ; |7| (P) <2,2> Split a long life(pre-sched) || MV .D1 j,j$6 ; |7| (P) <2,2> ^ Split a long life(pre-sched) ADD .L1X u,ref_u,u' ; |11| (P) <0,9> || MV .L2 j$2,j$3 ; |7| (P) <1,6> Split a long life(pre-sched) || [ j$5] ADD .S2 0xffffffff,j$2,j$2 ; |7| (P) <2,3> Define a twin register || [ B1] LDH .D1T1 *+p_u[j],u ; |9| (P) <2,3> || [!j] MVK .D2 0x1,break_flag ; |15| (P) <2,3> || MV .S1 j$6,j$1 ; |7| (P) <3,0> ^ Copy to predicate register MV .L2 B4,B2 ; |15| (P) <0,10> Split a long life(pre-sched) || [ B2] STH .D2T1 u',*+p_u'[j$4] ; |12| (P) <0,10> || MV .S2 j$3,j$4 ; |7| (P) <1,7> Split a long life(pre-sched) || [ B1] LDH .D1T2 *+p_v[j],ref_u ; |10| (P) <2,4> || [ j$1] ADD .S1 0xffffffff,j,j ; |7| (P) <3,1> ^ ;** --------------------------------------------------------------------------* $C$L2: ; PIPED LOOP KERNEL ; EXCLUSIVE CPU CYCLES: 3 [ B1] B .S1 $C$L2 ; |16| <1,8> || MV .D2 B1,B4 ; |15| <1,8> Split a long life(pre-sched) || [ break_flag] ZERO .L2 B1 ; |15| <2,5> || ZERO .S2 break_flag ; |13| <3,2> || MV .L1 j$1,j$5 ; |7| <3,2> Split a long life(pre-sched) || MV .D1 j,j$1 ; |7| <3,2> ^ Split a long life(pre-sched) ADD .L1X u,ref_u,u' ; |11| <1,9> || MV .L2 j$2,j$3 ; |7| <2,6> Split a long life(pre-sched) || [ j$5] ADD .S2 0xffffffff,j$2,j$2 ; |7| <3,3> Define a twin register || [ B1] LDH .D1T1 *+p_u[j],u ; |9| <3,3> || [!j] MVK .D2 0x1,break_flag ; |15| <3,3> MV .L2 B4,B2 ; |15| <1,10> Split a long life(pre-sched) || [ B2] STH .D2T1 u',*+p_u'[j$4] ; |12| <1,10> || MV .S2 j$3,j$4 ; |7| <2,7> Split a long life(pre-sched) || [ B1] LDH .D1T2 *+p_v[j],ref_u ; |10| <3,4> || [ j$1] ADD .S1 0xffffffff,j,j ; |7| <4,1> ^ ;** --------------------------------------------------------------------------* $C$L3: ; PIPED LOOP EPILOG ;** --------------------------------------------------------------------------* ; EXCLUSIVE CPU CYCLES: 6 RETNOP .S2 B3,4 ; |18| MVC .S2 B16,CSR ; interrupts on ; BRANCH OCCURS {B3} ; |18| .clearmap ; .endproc ;****************************************************************************** ;* BUILD ATTRIBUTES * ;****************************************************************************** .battr "TI", Tag_File, 1, Tag_ABI_stack_align_needed(0) .battr "TI", Tag_File, 1, Tag_ABI_stack_align_preserved(0)