What is the fastest search method for a sorted array?

c++ c algorithm sorting optimization

33,865

Solution 1

If you have some control over the in-memory layout of the data, you might want to look at Judy arrays.

Or to put a simpler idea out there: a binary search always cuts the search space in half. An optimal cut point can be found with interpolation (the cut point should NOT be the place where the key is expected to be, but the point which minimizes the statistical expectation of the search space for the next step). This minimizes the number of steps but... not all steps have equal cost. Hierarchical memories allow executing a number of tests in the same time as a single test, if locality can be maintained. Since a binary search's first M steps only touch a maximum of 2**M unique elements, storing these together can yield a much better reduction of search space per-cacheline fetch (not per comparison), which is higher performance in the real world.

n-ary trees work on that basis, and then Judy arrays add a few less important optimizations.

Bottom line: even "Random Access Memory" (RAM) is faster when accessed sequentially than randomly. A search algorithm should use that fact to its advantage.

Solution 2

Benchmarked on Win32 Core2 Quad Q6600, gcc v4.3 msys. Compiling with g++ -O3, nothing fancy.

Observation - the asserts, timing and loop overhead is about 40%, so any gains listed below should be divided by 0.6 to get the actual improvement in the algorithms under test.

Simple answers:

On my machine replacing the int64_t with int for "low", "high" and "mid" in interpolationSearch gives a 20% to 40% speed up. This is the fastest easy method I could find. It is taking about 150 cycles per look-up on my machine (for the array size of 100000). That's roughly the same number of cycles as a cache miss. So in real applications, looking after your cache is probably going to be the biggest factor.
Replacing binarySearch's "/2" with a ">>1" gives a 4% speed up.
Using STL's binary_search algorithm, on a vector containing the same data as "arr", is about the same speed as the hand coded binarySearch. Although on the smaller "size"s STL is much slower - around 40%.

Solution 3

I have an excessively complicated solution, which requires a specialized sorting function. The sort is slightly slower than a good quicksort, but all of my tests show that the search function is much faster than a binary or interpolation search. I called it a regression sort before I found out that the name was already taken, but didn't bother to think of a new name (ideas?).

There are three files to demonstrate.

The regression sort/search code:

#include <sstream>
#include <math.h>
#include <ctime>
#include "limits.h"

void insertionSort(int array[], int length) {
   int key, j;
   for(int i = 1; i < length; i++) {
      key = array[i];
      j = i - 1;
      while (j >= 0 && array[j] > key) {
         array[j + 1] = array[j];
         --j;
      }
      array[j + 1] = key;
   }
}

class RegressionTable {
   public:
      RegressionTable(int arr[], int s, int lower, int upper, double mult, int divs);
      RegressionTable(int arr[], int s);
      void sort(void);
      int find(int key);
      void printTable(void);
      void showSize(void);
   private:
      void createTable(void);
      inline unsigned int resolve(int n);
      int * array;
      int * table;
      int * tableSize;
      int size;
      int lowerBound;
      int upperBound;
      int divisions;
      int divisionSize;
      int newSize;
      double multiplier;
};

RegressionTable::RegressionTable(int arr[], int s) {
   array = arr;
   size = s;
   multiplier = 1.35;
   divisions = sqrt(size);
   upperBound = INT_MIN;
   lowerBound = INT_MAX;
   for (int i = 0; i < size; ++i) {
      if (array[i] > upperBound)
         upperBound = array[i];
      if (array[i] < lowerBound)
         lowerBound = array[i];
   }
   createTable();
}

RegressionTable::RegressionTable(int arr[], int s, int lower, int upper, double mult, int divs) {
   array = arr;
   size = s;
   lowerBound = lower;
   upperBound = upper;
   multiplier = mult;
   divisions = divs;
   createTable();
}

void RegressionTable::showSize(void) {
   int bytes = sizeof(*this);
   bytes = bytes + sizeof(int) * 2 * (divisions + 1);
}

void RegressionTable::createTable(void) {
   divisionSize = size / divisions;
   newSize = multiplier * double(size);
   table = new int[divisions + 1];
   tableSize = new int[divisions + 1];

   for (int i = 0; i < divisions; ++i) {
      table[i] = 0;
      tableSize[i] = 0;
   }

   for (int i = 0; i < size; ++i) {
      ++table[((array[i] - lowerBound) / divisionSize) + 1];
   }

   for (int i = 1; i <= divisions; ++i) {
      table[i] += table[i - 1];
   }
   table[0] = 0;

   for (int i = 0; i < divisions; ++i) {
      tableSize[i] = table[i + 1] - table[i];
   }
}

int RegressionTable::find(int key) {
   double temp = multiplier;
   multiplier = 1;

   int minIndex = table[(key - lowerBound) / divisionSize];
   int maxIndex = minIndex + tableSize[key / divisionSize];
   int guess = resolve(key);
   double t;
   while (array[guess] != key) {
      // uncomment this line if you want to see where it is searching.
      //cout << "Regression Guessing " << guess << ", not there." << endl;
      if (array[guess] < key) {
         minIndex = guess + 1;
      }
      if (array[guess] > key) {
         maxIndex = guess - 1;
      }
      if (array[minIndex] > key || array[maxIndex] < key) {
         return -1;
      }
      t = ((double)key - array[minIndex]) / ((double)array[maxIndex] - array[minIndex]);
      guess = minIndex + t * (maxIndex - minIndex);
   }

   multiplier = temp;

   return guess;
}

inline unsigned int RegressionTable::resolve(int n) {
   float temp;
   int subDomain = (n - lowerBound) / divisionSize;
   temp = n % divisionSize;
   temp /= divisionSize;
   temp *= tableSize[subDomain];
   temp += table[subDomain];
   temp *= multiplier;
   return (unsigned int)temp;
}

void RegressionTable::sort(void) {
   int * out = new int[int(size * multiplier)];
   bool * used = new bool[int(size * multiplier)];
   int higher, lower;
   bool placed;

   for (int i = 0; i < size; ++i) {

      /* Figure out where to put the darn thing */
      higher = resolve(array[i]);
      lower = higher - 1;

      if (higher > newSize) {
         higher = size;
         lower = size - 1;
      } else if (lower < 0) {
         higher = 0;
         lower = 0;
      }
      placed = false;
      while (!placed) {
         if (higher < size && !used[higher]) {
            out[higher] = array[i];
            used[higher] = true;
            placed = true;
         } else if (lower >= 0 && !used[lower]) {
            out[lower] = array[i];
            used[lower] = true;
            placed = true;
         }
         --lower;
         ++higher;
      }
   }
   int index = 0;
   for (int i = 0; i < size * multiplier; ++i) {
      if (used[i]) {
         array[index] = out[i];
         ++index;
      }
   }

   insertionSort(array, size);
}

And then there is the regular search functions:

#include <iostream>
using namespace std;

int binarySearch(int array[], int start, int end, int key) {
   // Determine the search point.
   int searchPos = (start + end) / 2;
   // If we crossed over our bounds or met in the middle, then it is not here.
   if (start >= end)
      return -1;
   // Search the bottom half of the array if the query is smaller.
   if (array[searchPos] > key)
      return binarySearch (array, start, searchPos - 1, key);
   // Search the top half of the array if the query is larger.
   if (array[searchPos] < key)
      return binarySearch (array, searchPos + 1, end, key);
   // If we found it then we are done.
   if (array[searchPos] == key)
      return searchPos;
}

int binarySearch(int array[], int size, int key) {
   return binarySearch(array, 0, size - 1, key);
}

int interpolationSearch(int array[], int size, int key) {
   int guess = 0;
   double t;
   int minIndex = 0;
   int maxIndex = size - 1;
   while (array[guess] != key) {

      t = ((double)key - array[minIndex]) / ((double)array[maxIndex] - array[minIndex]);
      guess = minIndex + t * (maxIndex - minIndex);

      if (array[guess] < key) {
         minIndex = guess + 1;
      }
      if (array[guess] > key) {
         maxIndex = guess - 1;
      }
      if (array[minIndex] > key || array[maxIndex] < key) {
         return -1;
      }
   }

   return guess;
}

And then I wrote a simple main to test out the different sorts.

    #include <iostream>
    #include <iomanip>
    #include <cstdlib>
    #include <ctime>
    #include "regression.h"
    #include "search.h"
    using namespace std;

    void randomizeArray(int array[], int size) {
       for (int i = 0; i < size; ++i) {
          array[i] = rand() % size;
       }
    }

    int main(int argc, char * argv[]) {

       int size = 100000;
       string arg;
       if (argc > 1) {
          arg = argv[1];
          size = atoi(arg.c_str());
       }
       srand(time(NULL));
       int * array;
       cout << "Creating Array Of Size " << size << "...\n";
       array = new int[size];

       randomizeArray(array, size);
       cout << "Sorting Array...\n";
       RegressionTable t(array, size, 0, size*2.5, 1.5, size);
       //RegressionTable t(array, size);
       t.sort();
       int trials = 10000000;
       int start;

       cout << "Binary Search...\n";
       start = clock();
       for (int i = 0; i < trials; ++i) {
          binarySearch(array, size, i % size);
       }
       cout << clock() - start << endl;

       cout << "Interpolation Search...\n";
       start = clock();
       for (int i = 0; i < trials; ++i) {
          interpolationSearch(array, size, i % size);
       }
       cout << clock() - start << endl;

       cout << "Regression Search...\n";
       start = clock();
       for (int i = 0; i < trials; ++i) {
          t.find(i % size);
       }
       cout << clock() - start << endl;

       return 0;
}

Give it a try and tell me if it's faster for you. It's super complicated, so it's really easy to break it if you don't know what you are doing. Be careful about modifying it.

I compiled the main with g++ on ubuntu.

Solution 4

Look first at the data and whether a big gain can be got by data specific method over a general method.

For large static sorted datasets, you can create an additional index to provide partial pigeon holing, based on the amount of memory you're willing to use. e.g. say we create a 256x256 two dimensional array of ranges, which we populate with the start and end positions in the search array of elements with corresponding high order bytes. When we come to search, we then use the high order bytes on the key to find the range / subset of the array we need to search. If we did have ~ 20 comparisons on our binary search of 100,000 elements O(log2(n)) we're now down to ~4 comarisons for 16 elements, or O(log2 (n/15)). The memory cost here is about 512k

Another method, again suited to data that doesn't change much, is to divide the data into arrays of commonly sought items and rarely sought items. For example, if you leave your existing search in place running a wide number of real world cases over a protracted testing period, and log the details of the item being sought, you may well find that the distribution is very uneven, i.e. some values are sought far more regularly than others. If this is the case, break your array into a much smaller array of commonly sought values and a larger remaining array, and search the smaller array first. If the data is right (big if!), you can often achieve broadly similar improvements to the first solution without the memory cost.

There are many other data specific optimizations which score far better than trying to improve on tried, tested and far more widely used general solutions.

Solution 5

Unless your data is known to have special properties, pure interpolation search has the risk of taking linear time. If you expect interpolation to help with most data but don't want it to hurt in the case of pathological data, I would use a (possibly weighted) average of the interpolated guess and the midpoint, ensuring a logarithmic bound on the run time.

View more solutions

33,865

Author by

kriss

Updated on July 18, 2022

Comments

kriss almost 2 years

Answering to another question, I wrote the program below to compare different search methods in a sorted array. Basically I compared two implementations of Interpolation search and one of binary search. I compared performance by counting cycles spent (with the same set of data) by the different variants.

However I'm sure there is ways to optimize these functions to make them even faster. Does anyone have any ideas on how can I make this search function faster? A solution in C or C++ is acceptable, but I need it to process an array with 100000 elements.

#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#include <stdint.h>
#include <assert.h>

static __inline__ unsigned long long rdtsc(void)
{
  unsigned long long int x;
     __asm__ volatile (".byte 0x0f, 0x31" : "=A" (x));
     return x;
}

int interpolationSearch(int sortedArray[], int toFind, int len) {
    // Returns index of toFind in sortedArray, or -1 if not found
    int64_t low = 0;
    int64_t high = len - 1;
    int64_t mid;

    int l = sortedArray[low];
    int h = sortedArray[high];

    while (l <= toFind && h >= toFind) {
        mid = low + (int64_t)((int64_t)(high - low)*(int64_t)(toFind - l))/((int64_t)(h-l));

        int m = sortedArray[mid];

        if (m < toFind) {
            l = sortedArray[low = mid + 1];
        } else if (m > toFind) {
            h = sortedArray[high = mid - 1];
        } else {
            return mid;
        }
    }

    if (sortedArray[low] == toFind)
        return low;
    else
        return -1; // Not found
}

int interpolationSearch2(int sortedArray[], int toFind, int len) {
    // Returns index of toFind in sortedArray, or -1 if not found
    int low = 0;
    int high = len - 1;
    int mid;

    int l = sortedArray[low];
    int h = sortedArray[high];

    while (l <= toFind && h >= toFind) {
        mid = low + ((float)(high - low)*(float)(toFind - l))/(1+(float)(h-l));
        int m = sortedArray[mid];

        if (m < toFind) {
            l = sortedArray[low = mid + 1];
        } else if (m > toFind) {
            h = sortedArray[high = mid - 1];
        } else {
            return mid;
        }
    }

    if (sortedArray[low] == toFind)
        return low;
    else
        return -1; // Not found
}

int binarySearch(int sortedArray[], int toFind, int len) 
{
    // Returns index of toFind in sortedArray, or -1 if not found
    int low = 0;
    int high = len - 1;
    int mid;

    int l = sortedArray[low];
    int h = sortedArray[high];

    while (l <= toFind && h >= toFind) {
        mid = (low + high)/2;

        int m = sortedArray[mid];

        if (m < toFind) {
            l = sortedArray[low = mid + 1];
        } else if (m > toFind) {
            h = sortedArray[high = mid - 1];
        } else {
            return mid;
        }
    }

    if (sortedArray[low] == toFind)
        return low;
    else
        return -1; // Not found
}

int order(const void *p1, const void *p2) { return *(int*)p1-*(int*)p2; }

int main(void) {
    int i = 0, j = 0, size = 100000, trials = 10000;
    int searched[trials];
    srand(-time(0));
    for (j=0; j<trials; j++) { searched[j] = rand()%size; }

    while (size > 10){
        int arr[size];
        for (i=0; i<size; i++) { arr[i] = rand()%size; }
        qsort(arr,size,sizeof(int),order);

        unsigned long long totalcycles_bs = 0;
        unsigned long long totalcycles_is_64 = 0;
        unsigned long long totalcycles_is_float = 0;
        unsigned long long totalcycles_new = 0;
        int res_bs, res_is_64, res_is_float, res_new;
        for (j=0; j<trials; j++) {
            unsigned long long tmp, cycles = rdtsc();
            res_bs = binarySearch(arr,searched[j],size);
            tmp = rdtsc(); totalcycles_bs += tmp - cycles; cycles = tmp;

            res_is_64 = interpolationSearch(arr,searched[j],size);
            assert(res_is_64 == res_bs || arr[res_is_64] == searched[j]); 
            tmp = rdtsc(); totalcycles_is_64 += tmp - cycles; cycles = tmp;

            res_is_float = interpolationSearch2(arr,searched[j],size);
            assert(res_is_float == res_bs || arr[res_is_float] == searched[j]); 
            tmp = rdtsc(); totalcycles_is_float += tmp - cycles; cycles = tmp;
        }
        printf("----------------- size = %10d\n", size);
        printf("binary search          = %10llu\n", totalcycles_bs);
        printf("interpolation uint64_t = %10llu\n",  totalcycles_is_64);
        printf("interpolation float    = %10llu\n",  totalcycles_is_float);
        printf("new                    = %10llu\n",  totalcycles_new);
        printf("\n");
        size >>= 1;
    }
}