Optimize Binary Search Algorithm
Solution 1
I would try the bsearch() standard function first. Chances are good that it performes better than your approach.
Solution 2
It's not advisable to try and optimize in the way you describe. From the Binary Search Algorithm article on Wikipedia:
About half the time, the first test will be true so that there will be only one comparison of a and b, but the other half of the time it will be false, and a second comparison forced. This is so grievous that some versions are recast so as not to make a second test at all thus not determining equality until the span has been reduced to zero, and thereby foregoing the possibility of early termination – remember that about half the time the search will happen on a matching value one iteration short of the limit.
It is quite easy to make this problem still worse (e.g. as in 3) by using an order such as
if a = b then action3
else if a > b then action2
else action1;
Rather than detecting equality early (as it might appear to), this will force two comparisons to be performed for all but the last iteration of a search.
Some languages, like Fortran, have a three-way comparison that allows this step to be done with one comparison that branches to three different sections (see the tenth line of the Three-way comparison example). If your language doesn't support a three-way test (most languages don't) then two comparisons is the best you can do.
I would advise you to check out the iterative implementation from the same article, though.
Solution 3
I tried to reconstruct the optimization steps of the binary search algorithm. I start with this iterative version:
int binsearch_1( arr_t array[], size_t size, arr_t key, size_t *index ){
if( !array || !size ) return 0;
arr_t *p=array;
int found=0;
while( size > 0 ){
size_t w=size/2;
if( p[w] < key ){ p+=w+1; size-=w+1; }
else if( p[w] > key ){ size =w; }
else /* p[w] == key */{ p+=w; found=1; break; }
}
*index=p-array; return found;
}
Eliminating comparisons from the loop:
int binsearch_2( arr_t array[], size_t size, arr_t key, size_t *index ){
if( !array || !size ) return 0;
arr_t *p=array;
while( size > 0 ){
size_t w=size/2;
if( p[w+1] <= key ){ p+=w+1; size-=w+1; } else size =w;
}
*index=p-array; return p[0]==key;
}
Unrolling small sizes from the loop:
int binsearch_3( arr_t array[], size_t size, arr_t key, size_t *index ){
if( !array || !size ) return 0;
arr_t *p=array;
while( size > 8 ){
size_t w=size/2;
if( p[w+1] <= key ){ p+=w+1; size-=w+1; } else size =w;
}
if( size==8 ){ if( p[5] <= key ){ p+=5; size=3; } else size=4; }
if( size==7 ){ if( p[4] <= key ){ p+=4; size=3; } else size=3; }
if( size==6 ){ if( p[4] <= key ){ p+=4; size=2; } else size=3; }
if( size==5 ){ if( p[3] <= key ){ p+=3; size=2; } else size=2; }
if( size==4 ){ if( p[3] <= key ){ p+=3; size=1; } else size=2; }
if( size==3 ){ if( p[2] <= key ){ p+=2; size=1; } else size=1; }
if( size==2 ){ if( p[2] <= key ){ p+=2; size=0; } else size=1; }
if( size==1 ){ if( p[1] <= key ){ p+=1; } }
*index=p-array; return p[0]==key;
}
Reordering if statements, moving special cases [size==pow(2,N)-1] to the end:
int binsearch_4( arr_t array[], size_t size, arr_t key, size_t *index ){
if( !array || !size ) return 0;
arr_t *p=array;
while( size > 8 ){
size_t w=size/2;
if( p[w+1] <= key ){ p+=w+1; size-=w+1; } else size =w;
}
if( size==8 ){ if( p[5] <= key ){ p+=5; size=3; } else size=4; }
if( size==6 ){ if( p[4] <= key ){ p+=4; size=2; } else size=3; }
if( size==5 ){ if( p[3] <= key ){ p+=3; size=2; } else size=2; }
if( size==4 ){ if( p[3] <= key ){ p+=3; size=1; } else size=2; }
if( size==2 ){ if( p[2] <= key ){ p+=2; size=0; } else size=1; }
if( size==7 ){ if( p[4] <= key ){ p+=4; size=3; } else size=3; }
if( size==3 ){ if( p[2] <= key ){ p+=2; size=1; } else size=1; }
if( size==1 ){ if( p[1] <= key ){ p+=1; } }
*index=p-array; return p[0]==key;
}
Changing if statements to a switch statement:
int binsearch_5( arr_t array[], size_t size, arr_t key, size_t *index ){
if( !array || !size ) return 0;
arr_t *p=array;
while( size > 8 ){
size_t w=size/2;
if( p[w+1] <= key ){ p+=w+1; size-=w+1; } else size =w;
}
if( size==8 ){ if( p[5] <= key ){ p+=5; size=3; } else size=4; }
if( size==6 ){ if( p[4] <= key ){ p+=4; size=2; } else size=3; }
if( size==5 ){ if( p[3] <= key ){ p+=3; size=2; } else size=2; }
if( size==4 ){ if( p[3] <= key ){ p+=3; size=1; } else size=2; }
if( size==2 ){ if( p[2] <= key ){ p+=2; size=0; } else size=1; }
switch(size){
case 7: if( p[4] <= key ) p+=4;
case 3: if( p[2] <= key ) p+=2;
case 1: if( p[1] <= key ) p+=1;
}
*index=p-array; return p[0]==key;
}
Extending switch statement to handle all of the special cases:
int binsearch_6( arr_t array[], size_t size, arr_t key, size_t *index ){
if( !array || !size ) return 0;
arr_t *p=array;
switch( size ){
#define C(n) case ((size_t)1<<n)-1: if( p[1<<(n-1)]<=key ) p+=1<<(n-1);
#if SIZE_MAX == UINT64_MAX
C(63) C(62) C(61)
C(60) C(59) C(58) C(57) C(56) C(55) C(54) C(53) C(52) C(51)
C(50) C(49) C(48) C(47) C(46) C(45) C(44) C(43) C(42) C(41)
C(40) C(39) C(38) C(37) C(36) C(35) C(34) C(33) C(32)
#endif
C(31)
C(30) C(29) C(28) C(27) C(26) C(25) C(24) C(23) C(22) C(21)
C(20) C(19) C(18) C(17) C(16) C(15) C(14) C(13) C(12) C(11)
C(10) C( 9) C( 8) C( 7) C( 6) C( 5) C( 4) C( 3) C( 2) C( 1)
#undef C
break;
default:
while( size > 0 ){
size_t w=size/2;
if( p[w] < key ){ p+=w+1; size-=w+1; } else size=w;
}
}
*index=p-array; return p[0]==key;
}
Eliminating the general case handling from the code: [ w is the smallest number: w==pow(2,N)-1; size<=2*(w+1) ]
int binsearch_7( arr_t array[], size_t size, arr_t key, size_t *index ){
if( !array || !size ) return 0;
arr_t *p=array;
size_t w=(size-1)>>1; w|=w>>1; w|=w>>2; w|=w>>4; w|=w>>8; w|=w>>16;
#if SIZE_MAX == UINT64_MAX
w|=w>>32;
#endif
if( p[w]<key ) p+=size-w-1;
switch( w ){
#define C(n) case ((size_t)1<<n)-1: if( p[1<<(n-1)]<=key ) p+=1<<(n-1);
#if SIZE_MAX == UINT64_MAX
C(63) C(62) C(61)
C(60) C(59) C(58) C(57) C(56) C(55) C(54) C(53) C(52) C(51)
C(50) C(49) C(48) C(47) C(46) C(45) C(44) C(43) C(42) C(41)
C(40) C(39) C(38) C(37) C(36) C(35) C(34) C(33) C(32)
#endif
C(31)
C(30) C(29) C(28) C(27) C(26) C(25) C(24) C(23) C(22) C(21)
C(20) C(19) C(18) C(17) C(16) C(15) C(14) C(13) C(12) C(11)
C(10) C( 9) C( 8) C( 7) C( 6) C( 5) C( 4) C( 3) C( 2) C( 1)
#undef C
}
*index=p-array; return p[0]==key;
}
The last step what i made was the simplifying of the case labels [from: '((size_t)1<<n)-1' to: 'n'] but i found that the modified code was slower on my old PC than the previous version.
Solution 4
In the code you posted, you can remove the last comparison. That is, if key
is not less than array[mid]
or greater than array[mid]
, then by definition it's equal. So your code becomes:
mid = left + (right-left)/2;
if (key < array[mid])
return binSearch(array, key, left, mid-1);
else if (key > array[mid])
return binSearch(array, key, mid+1, right);
else
return TRUE; // Found
Also note that I've removed the last line. In your original code, it's impossible for the return FALSE;
ever to be executed. I would assume that you have a check at the beginning of your binSearch
which checks to see if left
<= right
.
In C, there is no way to do a three-way comparison/branch based on less than, equal, greater than, so you have to do two comparisons to select among three possibilities.
Solution 5
If you want to optimize your binary search algorithm you must replace recursion with iteration. See examples at wikipedia.
Let me know if you need more details.
Ganesh M
Updated on June 28, 2022Comments
-
Ganesh M almost 2 years
In a binary search, we have two comparisons one for greater than and other for less than, otherwise its the mid value. How would you optimize so that we need to check only once?
bool binSearch(int array[], int key, int left, int right) { mid = left + (right-left)/2; if (key < array[mid]) return binSearch(array, key, left, mid-1); else if (key > array[mid]) return binSearch(array, key, mid+1, right); else if (key == array[mid]) return TRUE; // Found return FALSE; // Not Found }
-
Admin over 15 yearsactually, I think it is worth it for integers as it potentially removes two array accesses. You'd have to profile to be sure, of course.
-
starblue over 15 years@Neil If the array were accessed more than once that would be a really lousy compiler.
-
paperhorse almost 15 yearsI did the equality test first and it was faster than equality test last (probably modern superscalar processor weirdness). You may want to check Knuth ex 6.2.1 23 where he states the one way comparison is only faster for N> 2**36
-
sambowry over 14 yearsglibc implementation of bsearch() looks unoptimized (ftp.gnu.org/gnu/glibc)
-
Hasturkun over 14 yearsthis looks quite decent to me, as an iterative implementation sourceware.org/git/?p=glibc.git;a=blob_plain;f=stdlib/…
-
quinmars over 14 years@sambowry, how would you optimize it?
-
sambowry over 14 years@quinmars, a good book describe the optimization process cs.bell-labs.com/cm/cs/pearls
-
quinmars over 14 years@sambowry, sorry, I don't have that book, and I won't buy it just to see how he is implementing a binary search.
-
jason over 14 years@quinmars: Use your local library.
-
t0mm13b over 11 yearsWhat is that language - hardly looks like C to me... copy 'n' paste not good enough....
-
Nick almost 9 yearsno such big prblem that is not C, but it doesn not show anything new?
-
Peter Cordes over 5 yearsYou could do
end = cmp>0 ? mid-1 : end
to make it branchless. That's not always best, though, if the data is cold in cache; speculative execution can effectively trigger prefetch and avoid a data dependency chain of cache misses. About the branchless binary search has more, and shows a way of writing it that only needs onecmov
/ other branchless ALU select. -
Peter Cordes over 5 years@Hasturkun: and more importantly, it's defined in
<bits/stdlib-bsearch.h>
so it can inline and optimize away the indirection through the callback function. (Modern compilers can inline away function pointers when the pointer is a compile-time constant.) This is a major advantage over stdlibqsort
which does actually have tocall
even a trivial tiny compare function passing it pointers, not values, even for simple types. -
jjxtra almost 5 yearsUpdated with pointer array. It’s possible the compiler is smarter but this way absolutely ensures there is no inner branch at the cost of an array index and add instruction.