finding quartiles

17,618

Solution 1

Instead of doing std::sort(quantile.begin(), quantile.end()) a somewhat cheaper way would be

auto const Q1 = quantile.size() / 4;
auto const Q2 = quantile.size() / 2;
auto const Q3 = Q1 + Q2;

std::nth_element(quantile.begin(),          quantile.begin() + Q1, quantile.end());
std::nth_element(quantile.begin() + Q1 + 1, quantile.begin() + Q2, quantile.end());
std::nth_element(quantile.begin() + Q2 + 1, quantile.begin() + Q3, quantile.end());

This would not sort the complete array, but only do a "between groups" sort of the 4 quartile. This saves on the "within groups" sort that a full std::sort would do.

If your quantile array is not large, it's a small optimization. But the scaling behavior of std::nth_element is O(N) however, rather than O(N log N) of a std::sort.

Solution 2

Here is Quantile function which is MATLAB's equivalent with linear interpolation:

#include <algorithm>
#include <cmath>
#include <vector>

template<typename T>
static inline double Lerp(T v0, T v1, T t)
{
    return (1 - t)*v0 + t*v1;
}

template<typename T>
static inline std::vector<T> Quantile(const std::vector<T>& inData, const std::vector<T>& probs)
{
    if (inData.empty())
    {
        return std::vector<T>();
    }

    if (1 == inData.size())
    {
        return std::vector<T>(1, inData[0]);
    }

    std::vector<T> data = inData;
    std::sort(data.begin(), data.end());
    std::vector<T> quantiles;

    for (size_t i = 0; i < probs.size(); ++i)
    {
        T poi = Lerp<T>(-0.5, data.size() - 0.5, probs[i]);

        size_t left = std::max(int64_t(std::floor(poi)), int64_t(0));
        size_t right = std::min(int64_t(std::ceil(poi)), int64_t(data.size() - 1));

        T datLeft = data.at(left);
        T datRight = data.at(right);

        T quantile = Lerp<T>(datLeft, datRight, poi - left);

        quantiles.push_back(quantile);
    }

    return quantiles;
}

Find quartiles:

std::vector<double> in = { 1,2,3,4,5,6,7,8,9,10,11 };
auto quartiles = Quantile<double>(in, { 0.25, 0.5, 0.75 });

Solution 3

You need to preallocate first and third vectors before you set the contents.

vector<double> first(mid);
vector<double> third(size-mid);

or use push_back instead of assignments to first[i] and third[i]

Solution 4

This C++ template function calculates quartile for you. It assumes x to be sorted.

#include <assert.h>

template <typename T1, typename T2> typename T1::value_type quant(const T1 &x, T2 q)
{
    assert(q >= 0.0 && q <= 1.0);
    
    const auto n  = x.size();
    const auto id = (n - 1) * q;
    const auto lo = floor(id);
    const auto hi = ceil(id);
    const auto qs = x[lo];
    const auto h  = (id - lo);
    
    return (1.0 - h) * qs + h * x[hi];
}

To use it do:

std::vector<float> x{1,1,2,2,3,4,5,6};
std::cout << quant(x, 0.25) << std::endl;
std::cout << quant(x, 0.50) << std::endl;
std::cout << quant(x, 0.75) << std::endl;
Share:
17,618
Emir
Author by

Emir

Updated on June 14, 2022

Comments

  • Emir
    Emir almost 2 years

    I've written a program where the user can enter any number of values into a vector and it's supposed to return the quartiles, but I keep getting a "vector subscript out of range" error :

    #include "stdafx.h"
    #include <iostream>
    #include <string>
    #include <algorithm>
    #include <iomanip>
    #include <ios>
    #include <vector>
    
    int main () {
        using namespace std;
    
        cout << "Enter a list of numbers: ";
    
        vector<double> quantile;
        double x;
        //invariant: homework contains all the homework grades so far
        while (cin >> x)
            quantile.push_back(x);
    
        //check that the student entered some homework grades
        //typedef vector<double>::size_type vec_sz;
        int size = quantile.size();
    
        if (size == 0) {
            cout << endl << "You must enter your numbers . "
                            "Please try again." << endl;
            return 1;
        }
    
        sort(quantile.begin(), quantile.end());
    
        int mid = size/2;
        double median;
        median = size % 2 == 0 ? (quantile[mid] + quantile[mid-1])/2 : quantile[mid];
    
        vector<double> first;
        vector<double> third;
    
        for (int i = 0; i!=mid; ++i)
        {
            first[i] = quantile[i];
    
        }
    
            for (int i = mid; i!= size; ++i)
        {
            third[i] = quantile[i];
        }
            double fst;
            double trd;
    
            int side_length = 0;
    
            if (size % 2 == 0) 
            {
                side_length = size/2;
            }
            else {
                side_length = (size-1)/2;
            }
    
            fst = (size/2) % 2 == 0 ? (first[side_length/2]/2 + first[(side_length-1)/2])/2 : first[side_length/2];
            trd = (size/2) % 2 == 0 ? (third[side_length/2]/2 + third[(side_length-1)/2])/2 : third[side_length/2];
    
        streamsize prec = cout.precision();
        cout << "The quartiles are" <<  setprecision(3) << "1st"
            << fst << "2nd" << median << "3rd" << trd << setprecision(prec) << endl;
    
        return 0;   
    
    }