Is it possible to tell compilers that it is safe to move IFs outside the loop?

Imagine such code in Fortran format

DO i=1,n
  IF (alpha == 0.0) THEN
    x(i) = y(i)
  ELSE
    x(i) = alpha*x(i)+y(i)
  END IF
END DO

(this, of course, is just a stupid example, in real situations the loop will be more complicated and not just rewritten as array constructions)

Since it alphais a constant, an operator IFcan move outside and have two different cycles: one for alpha==0and one for other cases. This should be more efficient, as it IFis evaluated only once, but this leads to duplication of code and complicates its reading and maintenance.

So my question is: are compilers generally smart enough to make changes on their own? Can I do something (with preprocessor instructions) to tell the compiler that this IFcan be safely moved outside the loop?

+3
source share
3 answers

I don't know Fortran, but when they tell him to optimize, C compilers are usually smart enough to do this. Here's what GCC does with the following equivalent C code:

void foo(int n, float x[], float y[], float alpha) 
{
  int i;
  for (i=0; i<n; i++) 
    {
      if (alpha == 0.0) x[i] = y[i];
      else x[i] = alpha*x[i]+y[i];
    }
}

Here are two snippets of compiled code, optimization level 2 ( -O3). My comment.

Vectorized loop without multiplication:

    movl    %edi, %r8d              ; %r8d = n
    xorps   %xmm1, %xmm1            ; clear xmm1
    shrl    $2, %r8d                ; %r8d = n >> 2
    xorl    %eax, %eax              ; clear eax = pointeur increment 
    xorl    %ecx, %ecx              ; clear ecx = (i>>2)
    leal    0(,%r8,4), %r9d         ; not relevant here
L19:
    movaps  %xmm1, %xmm0            ; xmm0 = 0
    addl    $1, %ecx                ; (i>>2) ++
    movlps  (%rdx,%rax), %xmm0      ; Load floats into xmm0 (vector registers)
    movhps  8(%rdx,%rax), %xmm0     ; Load floats into xmm0 (vector registers)
    movlps  %xmm0, (%rsi,%rax)      ; store floats in xmm0 into memory
    movhps  %xmm0, 8(%rsi,%rax)     ; store floats in xmm0 into memory
    addq    $16, %rax               ; increment pointer by 16
    cmpl    %r8d, %ecx              ; if (i>>2) < (n>>2)
    jb      .L19                    ; go back to .L19
                                    ; else finish the non vectorized part of the loop

Vectorized loop with multiplication:

    movaps  %xmm0, %xmm4            ; alpha -> xmm4
    movl    %edi, %r8d              ; %r8d = n
    shrl    $2, %r8d                ; %r8d = n >> 2
    xorps   %xmm3, %xmm3            ; clear xmm3
    shufps  $0, %xmm4, %xmm4        ; distribute xmm4 to all vector elements
    leal    0(,%r8,4), %r9d         ; not relevant here
    xorl    %eax, %eax              ; clear eax = pointeur increment 
    xorl    %ecx, %ecx              ; clear ecx = (i>>2)
.L11:
    movaps  %xmm3, %xmm1            ; xmm1 = 0
    addl    $1, %ecx                ; (i>>2) ++
    movaps  %xmm3, %xmm2            ; xmm2 = 0
    movlps  (%rsi,%rax), %xmm1      ; Load floats X into xmm1 (vector registers)
    movlps  (%rdx,%rax), %xmm2      ; Load floats Y into xmm2 (vector registers)
    movhps  8(%rsi,%rax), %xmm1     ; Load floats X into xmm1 (vector registers)
    movhps  8(%rdx,%rax), %xmm2     ; Load floats Y into xmm2 (vector registers)
    mulps   %xmm4, %xmm1            ; multiply xmm1 by xmm4
    addps   %xmm2, %xmm1            ; add xmm2 to xmm1
    movlps  %xmm1, (%rsi,%rax)      ; store floats in xmm1 into memory
    movhps  %xmm1, 8(%rsi,%rax)     ; store floats in xmm1 into memory
    addq    $16, %rax               ; increment pointer by 16
    cmpl    %r8d, %ecx              ; if i>>2 < n>>2 then
    jb      .L11                    ; go back to .L19
                                    ; else finish the non vectorized part of the loop

- , -, n 4. , , . , Fortran .

SO , , , , , , . ,

for i in 1..n for j in 1..n

for j in 1..n for i in 1..n

: , , , http://en.wikipedia.org/wiki/Polytope_model

+4

.

IF (alpha == 0.0) THEN
    DO i=1,n
        x(i) = y(i)
    END DO
ELSE
    DO i=1,n
       x(i) = alpha*x(i)+y(i)
    END DO
END IF

, , , . LLVM (-loop-unswitch) gcc ( -funswitch-loops).

+2

, #. , , #, JITter ! :

static void foo( int n, float[ ] x, float[ ] y, float alpha ) {
    System.Diagnostics.Debugger.Break( );

    for ( int i = 0; i < n; i++ ) {
        if ( alpha == 0.0 ) x[ i ] = y[ i ];
        else x[ i ] = alpha * x[ i ] + y[ i ];
    }

    Console.WriteLine( "END" );
}

. 32- .NET 4.0 VS2010 :

001F00C3    57              PUSH EDI
001F00C4    56              PUSH ESI
001F00C5    53              PUSH EBX
001F00C6    83EC 0C         SUB ESP,0C
001F00C9    8BF9            MOV EDI,ECX
001F00CB    8BF2            MOV ESI,EDX
001F00CD    8B5D 0C         MOV EBX,DWORD PTR SS:[EBP+0C]
001F00D0    D945 08         FLD DWORD PTR SS:[EBP+8]
001F00D3    D95D F0         FSTP DWORD PTR SS:[EBP-10]

                 System.Diagnostics.Debugger.Break( );
001F00D6    E8 9D7C915C     CALL 5CB07D78
001F00DB    D945 F0         FLD DWORD PTR SS:[EBP-10]
001F00DE    33D2            XOR EDX,EDX
001F00E0    85FF            TEST EDI,EDI
001F00E2    7F 04           JG SHORT 001F00E8
001F00E4    DDD8            FSTP ST
001F00E6    EB 45           JMP SHORT 001F012D

                 if ( alpha == 0.0 )
001F00E8    D9C0            FLD ST
001F00EA    DD5D E8         FSTP QWORD PTR SS:[EBP-18]
001F00ED    DD45 E8         FLD QWORD PTR SS:[EBP-18]
001F00F0    D9EE            FLDZ
001F00F2    DFF1            FCOMIP ST,ST(1)
001F00F4    DDD8            FSTP ST
001F00F6    7A 16           JPE SHORT 001F010E
001F00F8    75 14           JNE SHORT 001F010E

                 x[ i ] = y[ i ]
001F00FA    3B53 04         CMP EDX,DWORD PTR DS:[EBX+4]
001F00FD    73 4D           JNB SHORT 001F014C
001F00FF    D94493 08       FLD DWORD PTR DS:[EDX*4+EBX+8]
001F0103    3B56 04         CMP EDX,DWORD PTR DS:[ESI+4]
001F0106    73 44           JNB SHORT 001F014C
001F0108    D95C96 08       FSTP DWORD PTR DS:[EDX*4+ESI+8]
001F010C    EB 18           JMP SHORT 001F0126

                 else x[ i ] = alpha * x[ i ] + y[ i ];
001F010E    D9C0            FLD ST
001F0110    3B56 04         CMP EDX,DWORD PTR DS:[ESI+4]
001F0113    73 37           JNB SHORT 001F014C
001F0115    D84C96 08       FMUL DWORD PTR DS:[EDX*4+ESI+8]
001F0119    3B53 04         CMP EDX,DWORD PTR DS:[EBX+4]
001F011C    73 2E           JNB SHORT 001F014C
001F011E    D84493 08       FADD DWORD PTR DS:[EDX*4+EBX+8]
001F0122    D95C96 08       FSTP DWORD PTR DS:[EDX*4+ESI+8]

                 End of loop body
001F0126    42              INC EDX
001F0127    3BD7            CMP EDX,EDI
001F0129    7C BD           JL SHORT 001F00E8

                 Console.WriteLine( "END" );
001F012B    DDD8            FSTP ST
001F012D    E8 12F9305C     CALL 5C4FFA44
001F0132    8BC8            MOV ECX,EAX
001F0134    8B15 30201203   MOV EDX,DWORD PTR DS:[3122030]
001F013A    8B01            MOV EAX,DWORD PTR DS:[ECX]
001F013C    8B40 3C         MOV EAX,DWORD PTR DS:[EAX+3C]
001F013F    FF50 10         CALL DWORD PTR DS:[EAX+10]

                 return;
001F0142    8D65 F4         LEA ESP,[EBP-0C]
001F0145    5B              POP EBX
001F0146    5E              POP ESI
001F0147    5F              POP EDI
001F0148    5D              POP EBP
001F0149    C2 0800         RETN 8

, ; , . , .

0

All Articles