Speed difference between If-Else and Ternary operator in C...?

There's a good chance that the ternary operator gets compiled into a cmov while the if/else results in a cmp+jmp. Just take a look at the assembly (using -S) to be sure. With optimizations enabled, it won't matter any more anyway, as any good compiler should produce the same code in both cases.

You could also go completely branchless and measure if it makes any difference:

int m = -(i & 1);
a = (b & m) | (c & ~m);

On today's architectures, this style of programming has grown a bit out of fashion.

This is a nice explanation: http://www.nynaeve.net/?p=178

Basically, there are "conditional set" processor instructions, which is faster than branching and setting in separate instructions.

If there is any, change your compiler!

For this kind of questions I use the Try Out LLVM page. It's an old release of LLVM (still using the gcc front-end), but those are old tricks.

Here is my little sample program (simplified version of yours):

#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>

int main (int argc, char* argv[]) {
  int N = atoi(argv[0]);

  int a = 0, d = 0, b = atoi(argv[1]), c = atoi(argv[2]);

  int i;
  for(i = 0; i < N; i++)
  {
     a = i & 1;
     if(a) a = b+i; else a = c+i;
  }

  for(i = 0; i < N; i++)
  {
     d = i & 1;
     d = d ? b+i : c+i;
  }

  printf("%d %d", a, d);

  return 0;
}

And there is the corresponding LLVM IR generated:

define i32 @main(i32 %argc, i8** nocapture %argv) nounwind {
entry:
  %0 = load i8** %argv, align 8                   ; <i8*> [#uses=1]
  %N = tail call i32 @atoi(i8* %0) nounwind readonly ; <i32> [#uses=5]

  %2 = getelementptr inbounds i8** %argv, i64 1   ; <i8**> [#uses=1]
  %3 = load i8** %2, align 8                      ; <i8*> [#uses=1]
  %b = tail call i32 @atoi(i8* %3) nounwind readonly ; <i32> [#uses=2]

  %5 = getelementptr inbounds i8** %argv, i64 2   ; <i8**> [#uses=1]
  %6 = load i8** %5, align 8                      ; <i8*> [#uses=1]
  %c = tail call i32 @atoi(i8* %6) nounwind readonly ; <i32> [#uses=2]

  %8 = icmp sgt i32 %N, 0                         ; <i1> [#uses=2]
  br i1 %8, label %bb, label %bb11

bb:                                               ; preds = %bb, %entry
  %9 = phi i32 [ %10, %bb ], [ 0, %entry ]        ; <i32> [#uses=2]
  %10 = add nsw i32 %9, 1                         ; <i32> [#uses=2]
  %exitcond22 = icmp eq i32 %10, %N               ; <i1> [#uses=1]
  br i1 %exitcond22, label %bb10.preheader, label %bb

bb10.preheader:                                   ; preds = %bb
  %11 = and i32 %9, 1                             ; <i32> [#uses=1]
  %12 = icmp eq i32 %11, 0                        ; <i1> [#uses=1]
  %.pn13 = select i1 %12, i32 %c, i32 %b          ; <i32> [#uses=1]
  %tmp21 = add i32 %N, -1                         ; <i32> [#uses=1]
  %a.1 = add i32 %.pn13, %tmp21                   ; <i32> [#uses=2]
  br i1 %8, label %bb6, label %bb11

bb6:                                              ; preds = %bb6, %bb10.preheader
  %13 = phi i32 [ %14, %bb6 ], [ 0, %bb10.preheader ] ; <i32> [#uses=2]
  %14 = add nsw i32 %13, 1                        ; <i32> [#uses=2]
  %exitcond = icmp eq i32 %14, %N                 ; <i1> [#uses=1]
  br i1 %exitcond, label %bb10.bb11_crit_edge, label %bb6

bb10.bb11_crit_edge:                              ; preds = %bb6
  %15 = and i32 %13, 1                            ; <i32> [#uses=1]
  %16 = icmp eq i32 %15, 0                        ; <i1> [#uses=1]
  %.pn = select i1 %16, i32 %c, i32 %b            ; <i32> [#uses=1]
  %tmp = add i32 %N, -1                           ; <i32> [#uses=1]
  %d.1 = add i32 %.pn, %tmp                       ; <i32> [#uses=1]
  br label %bb11

bb11:                                             ; preds = %bb10.bb11_crit_edge, %bb10.preheader, %entry
  %a.0 = phi i32 [ %a.1, %bb10.bb11_crit_edge ], [ %a.1, %bb10.preheader ], [ 0, %entry ] ; <i32> [#uses=1]
  %d.0 = phi i32 [ %d.1, %bb10.bb11_crit_edge ], [ 0, %bb10.preheader ], [ 0, %entry ] ; <i32> [#uses=1]
  %17 = tail call i32 (i8*, ...)* @printf(i8* noalias getelementptr inbounds ([6 x i8]* @.str, i64 0, i64 0), i32 %a.0, i32 %d.0) nounwind ; <i32> [#uses=0]
  ret i32 0
}

Okay, so it's likely to be chinese, even though I went ahead and renamed some variables to make it a bit easier to read.

The important bits are these two blocks:

  %.pn13 = select i1 %12, i32 %c, i32 %b          ; <i32> [#uses=1]
  %tmp21 = add i32 %N, -1                         ; <i32> [#uses=1]
  %a.1 = add i32 %.pn13, %tmp21                   ; <i32> [#uses=2]

  %.pn = select i1 %16, i32 %c, i32 %b            ; <i32> [#uses=1]
  %tmp = add i32 %N, -1                           ; <i32> [#uses=1]
  %d.1 = add i32 %.pn, %tmp                       ; <i32> [#uses=1]

Which respectively set a and d.

And the conclusion is: No difference

Note: in a simpler example the two variables actually got merged, it seems here that the optimizer did not detect the similarity...

Speed difference between If-Else and Ternary operator in C...?

Related

Recent Posts