finding quartiles
Solution 1
Instead of doing std::sort(quantile.begin(), quantile.end())
a somewhat cheaper way would be
auto const Q1 = quantile.size() / 4;
auto const Q2 = quantile.size() / 2;
auto const Q3 = Q1 + Q2;
std::nth_element(quantile.begin(), quantile.begin() + Q1, quantile.end());
std::nth_element(quantile.begin() + Q1 + 1, quantile.begin() + Q2, quantile.end());
std::nth_element(quantile.begin() + Q2 + 1, quantile.begin() + Q3, quantile.end());
This would not sort the complete array, but only do a "between groups" sort of the 4 quartile. This saves on the "within groups" sort that a full std::sort
would do.
If your quantile
array is not large, it's a small optimization. But the scaling behavior of std::nth_element
is O(N)
however, rather than O(N log N)
of a std::sort
.
Solution 2
Here is Quantile function which is MATLAB's equivalent with linear interpolation:
#include <algorithm>
#include <cmath>
#include <vector>
template<typename T>
static inline double Lerp(T v0, T v1, T t)
{
return (1 - t)*v0 + t*v1;
}
template<typename T>
static inline std::vector<T> Quantile(const std::vector<T>& inData, const std::vector<T>& probs)
{
if (inData.empty())
{
return std::vector<T>();
}
if (1 == inData.size())
{
return std::vector<T>(1, inData[0]);
}
std::vector<T> data = inData;
std::sort(data.begin(), data.end());
std::vector<T> quantiles;
for (size_t i = 0; i < probs.size(); ++i)
{
T poi = Lerp<T>(-0.5, data.size() - 0.5, probs[i]);
size_t left = std::max(int64_t(std::floor(poi)), int64_t(0));
size_t right = std::min(int64_t(std::ceil(poi)), int64_t(data.size() - 1));
T datLeft = data.at(left);
T datRight = data.at(right);
T quantile = Lerp<T>(datLeft, datRight, poi - left);
quantiles.push_back(quantile);
}
return quantiles;
}
Find quartiles:
std::vector<double> in = { 1,2,3,4,5,6,7,8,9,10,11 };
auto quartiles = Quantile<double>(in, { 0.25, 0.5, 0.75 });
Solution 3
You need to preallocate first
and third
vectors before you set the contents.
vector<double> first(mid);
vector<double> third(size-mid);
or use push_back
instead of assignments to first[i]
and third[i]
Solution 4
This C++ template function calculates quartile for you. It assumes x
to be sorted.
#include <assert.h>
template <typename T1, typename T2> typename T1::value_type quant(const T1 &x, T2 q)
{
assert(q >= 0.0 && q <= 1.0);
const auto n = x.size();
const auto id = (n - 1) * q;
const auto lo = floor(id);
const auto hi = ceil(id);
const auto qs = x[lo];
const auto h = (id - lo);
return (1.0 - h) * qs + h * x[hi];
}
To use it do:
std::vector<float> x{1,1,2,2,3,4,5,6};
std::cout << quant(x, 0.25) << std::endl;
std::cout << quant(x, 0.50) << std::endl;
std::cout << quant(x, 0.75) << std::endl;
Emir
Updated on June 14, 2022Comments
-
Emir almost 2 years
I've written a program where the user can enter any number of values into a vector and it's supposed to return the quartiles, but I keep getting a "vector subscript out of range" error :
#include "stdafx.h" #include <iostream> #include <string> #include <algorithm> #include <iomanip> #include <ios> #include <vector> int main () { using namespace std; cout << "Enter a list of numbers: "; vector<double> quantile; double x; //invariant: homework contains all the homework grades so far while (cin >> x) quantile.push_back(x); //check that the student entered some homework grades //typedef vector<double>::size_type vec_sz; int size = quantile.size(); if (size == 0) { cout << endl << "You must enter your numbers . " "Please try again." << endl; return 1; } sort(quantile.begin(), quantile.end()); int mid = size/2; double median; median = size % 2 == 0 ? (quantile[mid] + quantile[mid-1])/2 : quantile[mid]; vector<double> first; vector<double> third; for (int i = 0; i!=mid; ++i) { first[i] = quantile[i]; } for (int i = mid; i!= size; ++i) { third[i] = quantile[i]; } double fst; double trd; int side_length = 0; if (size % 2 == 0) { side_length = size/2; } else { side_length = (size-1)/2; } fst = (size/2) % 2 == 0 ? (first[side_length/2]/2 + first[(side_length-1)/2])/2 : first[side_length/2]; trd = (size/2) % 2 == 0 ? (third[side_length/2]/2 + third[(side_length-1)/2])/2 : third[side_length/2]; streamsize prec = cout.precision(); cout << "The quartiles are" << setprecision(3) << "1st" << fst << "2nd" << median << "3rd" << trd << setprecision(prec) << endl; return 0; }