Binning an array in javascript for a histogram

13,064

Solution 1

With the release of D3.js v6 d3.layout.histogram has been superseded by d3.bin() which now belongs to the d3-array module.

To bin your array you create a histogram generator:

var histGenerator = d3.bin()
  .domain([0,1])    // Set the domain to cover the entire intervall [0,1]
  .thresholds(19);  // number of thresholds; this will create 19+1 bins

There are some more options available to configure your thresholds and thereby your bins, but this generator will do exactly what you asked for. You retrieve the computed bins as an array by calling the generator with your array of values:

var bins = histGenerator(arr);

Have a look at this working example:

var arr = [
  "0.362743", "0.357969", "0.356322", "0.355757", "0.358511",
  "0.357218", "0.356696", "0.354579", "0.828295", "0.391186",
  "0.378577", "0.39372", "0.396416", "0.395641", "0.37573",
  "0.379666", "0.377443", "0.391842", "0.402021", "0.377516",
  "0.38936", "0.38936", "0.400883", "0.393171", "0.374419",
  "0.400821", "0.380502", "0.396098", "0.388256", "0.398968",
  "0.392525", "0.401858", "0.387297", "0.376471", "0.378183",
  "0.379787", "0.382024", "0.387928", "0.395367", "0.391972",
  "0.381295", "0.391183", "0.383598", "0.386424", "0.384338",
  "0.401834", "0.406253", "0.392854", "0.399266", "0.400804",
  "0.391146", "0.395441", "0.396265", "0.397894", "0.384822",
  "0.385181", "0.395443", "0.400981", "0.401716", "0.406633",
  "0.406887", "0.40694", "0.391219", "0.387946", "0.398858",
  "0.402233", "0.388583", "0.389772", "0.397084", "0.711566",
  "0.954557", "0.524007", "0.672288", "0.668441", "0.421726",
  "0.549536", "0.932952", "0.397851", "0.395536", "0.354818",
  "0.374355", "0.375257", "0.362613", "0.391271", "0.379219",
  "0.363316", "0.866006", "0.862254", "0.864403", "0.861346",
  "0.845225", "0.784467", "0.801275", "0.638579", "0.847282",
  "0.847402", "0.847747", "0.790411", "0.835979", "0.838546"
];

var histGenerator = d3.bin()
  .domain([0,1])    // Set the domain to cover the entire intervall [0;]
  .thresholds(19);  // number of thresholds; this will create 19+1 bins

var bins = histGenerator(arr);
console.log(bins);
<script src="http://d3js.org/d3.v6.js"></script>

Solution 2

The d3js library has a d3.layout.histogram() function that returns a histogram layout object for grouping data into bins. The layout object is both an object and a function. You can call methods on the layout object to set the desired behavior of the layout. You can then call the layout object to group the data into an array of bins. Each bin is an array of values. Each bin has addition properties of x, dx, dy.

For example, the following code will group the data into 20 bins that cover the range from 0 to 1.

var arr = ["0.362743", "0.357969", "0.356322", "0.355757", "0.358511", "0.357218", "0.356696", "0.354579", "0.828295", "0.391186", "0.378577", "0.39372", "0.396416", "0.395641", "0.37573", "0.379666", "0.377443", "0.391842", "0.402021", "0.377516", "0.38936", "0.38936", "0.400883", "0.393171", "0.374419", "0.400821", "0.380502", "0.396098", "0.388256", "0.398968", "0.392525", "0.401858", "0.387297", "0.376471", "0.378183", "0.379787", "0.382024", "0.387928", "0.395367", "0.391972", "0.381295", "0.391183", "0.383598", "0.386424", "0.384338", "0.401834", "0.406253", "0.392854", "0.399266", "0.400804", "0.391146", "0.395441", "0.396265", "0.397894", "0.384822", "0.385181", "0.395443", "0.400981", "0.401716", "0.406633", "0.406887", "0.40694", "0.391219", "0.387946", "0.398858", "0.402233", "0.388583", "0.389772", "0.397084", "0.711566", "0.954557", "0.524007", "0.672288", "0.668441", "0.421726", "0.549536", "0.932952", "0.397851", "0.395536", "0.354818", "0.374355", "0.375257", "0.362613", "0.391271", "0.379219", "0.363316", "0.866006", "0.862254", "0.864403", "0.861346", "0.845225", "0.784467", "0.801275", "0.638579", "0.847282", "0.847402", "0.847747", "0.790411", "0.835979", "0.838546"];
var bins = d3.layout.histogram()  // create layout object
    .bins(20)       // to use 20 bins
    .range([0, 1])  // to cover range from 0 to 1
    (arr);          // group the data into the bins

After the code runs...

bins[i] is an array of values in the ith bin
bins[i].x is the lower bounds of the ith bin
bins[i].dx is the width of the ith bin
bins[i].x + bins[i].dx is the upper bounds of the ith bin
bins[i].y is the number of values in the ith bin

The documentation for the histogram layout object is at...

https://github.com/d3/d3/wiki/Histogram-Layout

Note: By default, the layout object converts string values to number values. Thus, the layout function will work with your string values.

Solution 3

You could make the bins yourself using some JS:

var arr = [
  "0.362743", "0.357969", "0.356322", "0.355757", "0.358511",
  "0.357218", "0.356696", "0.354579", "0.828295", "0.391186",
  "0.378577", "0.39372", "0.396416", "0.395641", "0.37573",
  "0.379666", "0.377443", "0.391842", "0.402021", "0.377516",
  "0.38936", "0.38936", "0.400883", "0.393171", "0.374419",
  "0.400821", "0.380502", "0.396098", "0.388256", "0.398968",
  "0.392525", "0.401858", "0.387297", "0.376471", "0.378183",
  "0.379787", "0.382024", "0.387928", "0.395367", "0.391972",
  "0.381295", "0.391183", "0.383598", "0.386424", "0.384338",
  "0.401834", "0.406253", "0.392854", "0.399266", "0.400804",
  "0.391146", "0.395441", "0.396265", "0.397894", "0.384822",
  "0.385181", "0.395443", "0.400981", "0.401716", "0.406633",
  "0.406887", "0.40694", "0.391219", "0.387946", "0.398858",
  "0.402233", "0.388583", "0.389772", "0.397084", "0.711566",
  "0.954557", "0.524007", "0.672288", "0.668441", "0.421726",
  "0.549536", "0.932952", "0.397851", "0.395536", "0.354818",
  "0.374355", "0.375257", "0.362613", "0.391271", "0.379219",
  "0.363316", "0.866006", "0.862254", "0.864403", "0.861346",
  "0.845225", "0.784467", "0.801275", "0.638579", "0.847282",
  "0.847402", "0.847747", "0.790411", "0.835979", "0.838546"
]

var bins = [];
var binCount = 0;
var interval = .05;
var numOfBuckets = 1;

//Setup Bins
for(var i = 0; i < numOfBuckets; i += interval){
  bins.push({
    binNum: binCount,
    minNum: i,
    maxNum: i + interval,
    count: 0
  })
  binCount++;
}

//Loop through data and add to bin's count
for (var i = 0; i < arr.length; i++){
  var item = arr[i];
  for (var j = 0; j < bins.length; j++){
    var bin = bins[j];
    if(item > bin.minNum && item <= bin.maxNum){
      bin.count++;
      break;  // An item can only be in one bin.
    }
  }  
}

https://jsbin.com/keropoyadu/edit?js,output

Solution 4

The feature you want is the histogram layout. You can do something like this:

var data = d3.layout.histogram()
    .bins(20)
    (arr);

This is just an general example, you'll have to adjust the values. Check the documentation: https://github.com/d3/d3/wiki/Histogram-Layout

Share:
13,064
NodeJS_dev
Author by

NodeJS_dev

Updated on June 09, 2022

Comments

  • NodeJS_dev
    NodeJS_dev almost 2 years

    I have below array in Javascript which I need to bin into 20 buckets. The data values are between 0 and 1, so the bin size would be .05. I feel like there should be a function out there that takes two arguments, an array and a bin size, but I cannot find one. I know that D3.js has some feature that help building such an array, but I cannot figure out which function might help.

    var arr = [
      "0.362743", "0.357969", "0.356322", "0.355757", "0.358511",
      "0.357218", "0.356696", "0.354579", "0.828295", "0.391186",
      "0.378577", "0.39372", "0.396416", "0.395641", "0.37573",
      "0.379666", "0.377443", "0.391842", "0.402021", "0.377516",
      "0.38936", "0.38936", "0.400883", "0.393171", "0.374419",
      "0.400821", "0.380502", "0.396098", "0.388256", "0.398968",
      "0.392525", "0.401858", "0.387297", "0.376471", "0.378183",
      "0.379787", "0.382024", "0.387928", "0.395367", "0.391972",
      "0.381295", "0.391183", "0.383598", "0.386424", "0.384338",
      "0.401834", "0.406253", "0.392854", "0.399266", "0.400804",
      "0.391146", "0.395441", "0.396265", "0.397894", "0.384822",
      "0.385181", "0.395443", "0.400981", "0.401716", "0.406633",
      "0.406887", "0.40694", "0.391219", "0.387946", "0.398858",
      "0.402233", "0.388583", "0.389772", "0.397084", "0.711566",
      "0.954557", "0.524007", "0.672288", "0.668441", "0.421726",
      "0.549536", "0.932952", "0.397851", "0.395536", "0.354818",
      "0.374355", "0.375257", "0.362613", "0.391271", "0.379219",
      "0.363316", "0.866006", "0.862254", "0.864403", "0.861346",
      "0.845225", "0.784467", "0.801275", "0.638579", "0.847282",
      "0.847402", "0.847747", "0.790411", "0.835979", "0.838546"
    ]
    
  • Emman
    Emman over 2 years
    I'm trying to get this solution to work on node.js but so far unsuccessfully. There seems to be an issue with loading the d3 module. This is frustrating because this solution seems perfect.