This thread has been locked.

If you have a related question, please click the "Ask a related question" button in the top right corner. The newly created question will be automatically linked to this question.

Question about optimizing IF and ELSE statement

The example as follow, running this section would cost 26 ms.

    for(; src_ptr < src_end_ptr;)

    {

       g = src_ptr[1];

       r = src_ptr[2];

       if(r == g)

       {

           h = 0;

       }

       else

       {

           h = r + g;

       }

       s = ((r == 0) ? 0 : g) << 8;

       dest_ptr+=3;

       src_ptr+=3;

    }

 

But if change it into following section, it would cost more than 80 ms.

    for(; src_ptr < src_end_ptr;)

    {

       g = src_ptr[1];

       r = src_ptr[2];

       if(r == g)

       {

           h = 0;

       }

       else

       {

           100 * r/(g+1);

       }

       s = ((r == 0) ? 0 : g) << 8;

       dest_ptr+=3;

       src_ptr+=3;

    }

The difference is just in the ELSE section. And I’m sure it won’t go into ELSE section when running the program.  Why the duration is so different?  And it should not cost so much time even if it go into ELSE section?

  • helloti,

    Your else statement does not include an equals sign. Should it be

    h = 100 * r/(g+1);

     

    What processor is this for? If you can post the generated assembly, that would also be very helpful.

  • hello Quote

    The code as below. It looks the assembly code to be quite different. And the 80ms's is shorter.

    20ms

    register unsigned int r, g;

    register unsigned int h, s;

    for(; src_ptr < src_end_ptr;)

    {

       g = src_ptr[1];

       r = src_ptr[2];

       if(r == g)

       {

           h = 0;

       }

       else

       {

           h = r + g;

       }

       s = ((r == 0) ? 0 : g) << 8;

       *((unsigned int *)dest_ptr) = h | s;

       dest_ptr+=3;

       src_ptr+=3;

    }

     

    $C$L3:    ; PIPED LOOP PROLOG

     

               LDBU    .D1T1   *A6++(3),A4       ; |316| (P) <0,0>

    ||         B       .S2     $C$L4             ; |273| (P) <0,6>

     

               ZERO    .L2     B0                ; init loop condition

    ||         SUB     .S2     B5,9,B5

    ||         LDBU    .D1T2   *-A6(2),B1        ; |316| (P) <0,1>

     

               MV      .S2X    A3,B4

    ||         MVK     .L1     0x2,A1            ; init prolog collapse predicate

    || [!B0]   CMPLTU  .L2     B5,3,B0           ; |273| (P) <1,5>

     

    ;** --------------------------------------------------------------------------*

    $C$L4:    ; PIPED LOOP KERNEL

     

       [!B1]   ZERO    .S1     A5                ; |316| <1,6>  ^

    ||         CMPEQ   .L1X    B1,A4,A0          ; |316| <1,6>

    || [!B0]   B       .S2     $C$L4             ; |273| <1,6>

    ||         LDBU    .D1T1   *A6++(3),A4       ; |316| <3,0>

     

               OR      .S1X    B6,A3,A3          ; |316| <0,10>

    || [ B1]   MV      .L1     A4,A5             ; |316| <1,7>  ^

    || [!A0]   ADD     .S2X    A4,B1,B6          ; |316| <1,7>  ^

    || [ A0]   ZERO    .D2     B6                ; |316| <1,7>  ^

    ||         SUB     .L2     B5,3,B5           ; |273| <2,4>

    ||         LDBU    .D1T2   *-A6(2),B1        ; |316| <3,1>

     

       [ A1]   SUB     .L1     A1,1,A1           ; <0,11>

    || [!A1]   STW     .D2T1   A3,*B4            ; |316| <0,11>  ^

    || [!A1]   ADD     .S2     3,B4,B4           ; |316| <0,11>  ^

    ||         SHL     .S1     A5,8,A3           ; |316| <1,8>

    || [!B0]   CMPLTU  .L2     B5,3,B0           ; |273| <2,5>

     

    ;** --------------------------------------------------------------------------*

    $C$L5:    ; PIPED LOOP EPILOG

     

               ADD     .L2     3,B4,B5           ; |316| (E) <1,11>  ^

    || [!B1]   ZERO    .S1     A5                ; |316| (E) <2,6>  ^

    ||         CMPEQ   .L1X    B1,A4,A0          ; |316| (E) <2,6>

     

               ADD     .L2     3,B5,B31          ; |316| (E) <2,11>  ^

    ||         OR      .L1X    B6,A3,A3          ; |316| (E) <1,10>

    || [ B1]   MV      .S1     A4,A5             ; |316| (E) <2,7>  ^

    || [!A0]   ADD     .S2X    A4,B1,B6          ; |316| (E) <2,7>  ^

    || [ A0]   ZERO    .D2     B6                ; |316| (E) <2,7>  ^

     

    ;** --------------------------------------------------------------------------*

               STW     .D2T1   A3,*B4            ; |316| (E) <1,11>  ^

     

               SHL     .S1     A5,8,A3           ; |316| (E) <2,8>

    ||         CMPEQ   .L1X    B1,A4,A0          ; |316| (E) <3,6>

     

               OR      .L1X    B6,A3,A3          ; |316| (E) <2,10>

    || [ A0]   ZERO    .L2     B6                ; |316| (E) <3,7>  ^

    || [!A0]   ADD     .S2X    A4,B1,B6          ; |316| (E) <3,7>  ^

    || [ B1]   MV      .S1     A4,A5             ; |316| (E) <3,7>  ^

    || [!B1]   ZERO    .D1     A5                ; |316| (E) <3,6>  ^

     

               SHL     .S1     A5,8,A31          ; |316| (E) <3,8>

     

               STW     .D2T1   A3,*B5            ; |316| (E) <2,11>  ^

    ||         OR      .L1X    B6,A31,A3         ; |316| (E) <3,10>

     

               STW     .D2T1   A3,*B31           ; |316| (E) <3,11>  ^

    ||         RINT                              ; interrupts on

    80ms

    register unsigned int r, g;

    register unsigned int h, s;

    for(; src_ptr < src_end_ptr;)

    {

       g = src_ptr[1];

       r = src_ptr[2];

       if(r == g)

       {

           h = 0;

       }

       else

       {

           h = 100 * r/(g+1);

       }

       s = ((r == 0) ? 0 : g) << 8;

       *((unsigned int *)dest_ptr) = h | s;

       dest_ptr+=3;

       src_ptr+=3;

    }

    ;*----------------------------------------------------------------------------*

    ;*   SOFTWARE PIPELINE INFORMATION

    ;*      Disqualified loop: Loop contains control code

    ;*----------------------------------------------------------------------------*

    $C$L1:   

               LDBU    .D2T1   *+B5(1),A3        ; |316|

               LDBU    .D2T2   *+B5(2),B4        ; |316|

               NOP             4

               CMPEQ   .L1X    B4,A3,A0          ; |316|

     

       [ A0]   B       .S2     $C$L2             ; |316|

    ||         MV      .L1     A0,A1             ; guard predicate rewrite

    || [!A0]   MPYU    .M2     B4,B8,B7          ; |316|

    ||         MV      .S1X    B4,A5             ; |316|

    || [ A0]   ZERO    .D1     A4                ; |316|

     

       [!A1]   MVK     .L2     0x1,B0            ; |325| nullify predicate

    || [ A0]   MV      .L1     A5,A0

    || [!A0]   CALL    .S1     __c6xabi_divu     ; |316|

     

       [ A1]   SUB     .L2     B6,3,B6           ; |273|

       [ A1]   CMPLTU  .L2     B6,3,B0           ; |273|

       [!B0]   BNOP    .S1     $C$L1,1           ; |273|

               ; BRANCHCC OCCURS {$C$L2}         ; |316|

    ;** --------------------------------------------------------------------------*

     

               ADD     .L2X    1,A3,B4           ; |316|

    ||         MV      .L1X    B7,A4             ; |316|

    ||         ADDKPC  .S2     $C$RL0,B3,0       ; |316|

    ||         SUB     .D2     B6,3,B6           ; |273|

     

    $C$RL0:    ; CALL OCCURS {__c6xabi_divu} {0}  ; |316|

     

               CMPLTU  .L2     B6,3,B0           ; |273|

    ||         MV      .L1     A5,A0

     

       [!B0]   BNOP    .S1     $C$L1,1           ; |273|

    ;** --------------------------------------------------------------------------*

    $C$L2:   

     

       [!A0]   ZERO    .L1     A3                ; |316|

    ||         ADD     .L2     3,B5,B5           ; |273|

     

               SHL     .S1     A3,8,A3           ; |316|

               OR      .L1     A4,A3,A3          ; |316|

     

               STW     .D1T1   A3,*A7            ; |316|

    ||         ADD     .L1     3,A7,A7           ; |325|

     

               ; BRANCHCC OCCURS {$C$L1}         ; |273|

    ;** --------------------------------------------------------------------------*

  • Your problem is with the division required in the second version of the code. Notice the line:

    $C$RL0:    ; CALL OCCURS {__c6xabi_divu} {0}  ; |316|

    There can be no function calls in a software pipelined loop. Division is not a simple math instruction, so it is calling a math library to do it. Maybe there is a way you can use a look-up table to replace division?