Elasticsearch show all results using scroll in node js

19,368

Solution 1

You need to repeatedly call client.scroll until no more records are returned. There's a good example in the elasticsearch documentation. I've reproduced their example code below, slightly modified to match your question

var allRecords = [];

// first we do a search, and specify a scroll timeout
client.search({
  index: 'test',
  type: 'records',
  scroll: '10s',
  body: {
     query: {
         "match_all": {}
     }
  }
}, function getMoreUntilDone(error, response) {
  // collect all the records
  response.hits.hits.forEach(function (hit) {
    allRecords.push(hit);
  });

  if (response.hits.total !== allRecords.length) {
    // now we can call scroll over and over
    client.scroll({
      scrollId: response._scroll_id,
      scroll: '10s'
    }, getMoreUntilDone);
  } else {
    console.log('all done', allRecords);
  }
});

Solution 2

Thanks @Ceilingfish. Here's a modified ES6 version of the above using await

let allRecords = [];

// first we do a search, and specify a scroll timeout
var { _scroll_id, hits } = await esclient.search({
    index: 'test',
    type: 'records',
    scroll: '10s',
    body: {
        query: {
            "match_all": {}
        },
        _source: false
    }
})

while(hits && hits.hits.length) {
    // Append all new hits
    allRecords.push(...hits.hits)

    console.log(`${allRecords.length} of ${hits.total}`)

    var { _scroll_id, hits } = await esclient.scroll({
        scrollId: _scroll_id,
        scroll: '10s'
    })
}

console.log(`Complete: ${allRecords.length} records retrieved`)

Solution 3

Query for getting all data from elastic search using Node.js client using scroll with async/await.

const elasticsearch = require('@elastic/elasticsearch');
async function esconnection(){
  let es =  await new elasticsearch.Client({
    node: "http://192.168.1.1:7200"
  });
  return es;
}
async function getAllUserList(){
    try{
        let userArray = [];
        let query ={
            "query":{
                "match_all": {}
            }
        }   
        let es = await esconnection();
        let {body}=  await es.search({
                    index: 'esIndex',
                    type :"esIndexType",           
                    scroll :'2m', //# Specify how long a consistent view of the index should be maintained for scrolled search
                    size: 100,    //  # Number of hits to return (default: 10)
                    body: query
                    });
        let sid = body['_scroll_id']
        let scroll_size = body['hits']['total']
        let dataLength = body['hits']['hits'].length
        while (scroll_size > 0){
        for(let i=0; i<dataLength;i++){
            if(body['hits']['hits'][i])
            {
            let userData = (body['hits']['hits'][i]['_source'])
            userArray.push(userData)
            }
        }
        sid = body['_scroll_id']
        body = await es.scroll({
            scrollId: sid,
            scroll: '10s'
        })
        body=body.body
        scroll_size = (body['hits']['hits']).length;
        }
        es.close();
        return userArray;
    }  catch(error){
        console.log("Code not working properly: ",`${error}`)
    }
}

Solution 4

NodeJS failed when elastic had over 10000 results. This is how i used scroll.

async function getResultsFromElastic() {
    let responseAll = {};
    responseAll["hits"] = {};
    responseAll.hits.hits = [];
    const responseQueue = [];

    searchQuery = {
                    index: 'test',
                    type: 'records',
                    body: { 
                            query: {
                               "match_all": {}
                            }
                    }
    }
    searchQuery.scroll='10s';
    searchQuery.size=10000;

    responseQueue.push(await esclient.search(searchQuery));

    while (responseQueue.length) {
      const response = responseQueue.shift();

      responseAll.hits.hits = responseAll.hits.hits.concat(response.hits.hits);

      if (response.hits.total == responseAll.hits.hits.length) {
        break;
      }

      // get the next response if there are more to fetch
      responseQueue.push(
        await esclient.scroll({
          scrollId: response._scroll_id,
          scroll: '30s'
        })
      );
    }

    return responseAll;
}
Share:
19,368
Jane
Author by

Jane

Updated on June 22, 2022

Comments

  • Jane
    Jane almost 2 years

    I am basically trying to show all records of an index type. Now, if you use match_all() in query elasticsearch shows 10 results by default. One can show all results using scroll. I am trying to implement scroll api, but can't get it to work. It is showing only 10 results, my code:

    module.exports.searchAll = function (searchData, callback) {
    
    client.search({
        index: 'test',
        type: 'records',
        scroll: '10s',
        //search_type: 'scan', //if I use search_type then it requires size otherwise it shows 0 result
        body: {
            query: {
                "match_all": {}
            }
        }
    }, function (err, resp) {
        client.scroll({
            scrollId: resp._scroll_id,
            scroll: '10s'
        }, callback(resp.hits.hits));
    });
    }
    

    Can anyone help, please?

  • Jane
    Jane over 7 years
    I tried this before asking this question here, but it didn't work. I figured it out though. If I use search_type: 'scan' then nothing shows up in output, if you omit that then this works
  • Pieter Schreurs
    Pieter Schreurs over 7 years
    Good investigation @Jane. I've updated the code so that future googlers can get some working code from the outset.
  • Mohd Shahid
    Mohd Shahid almost 6 years
    Which param is correct, scrollId or scroll_id in scroll function?
  • Mohd Shahid
    Mohd Shahid almost 6 years
    Which param is correct, scrollId or scroll_id in scroll function?
  • DirtyMind
    DirtyMind about 5 years
    Very helpful !! Simple and running perfectly.
  • Calle Engene
    Calle Engene almost 5 years
    This query can only handle small datsets, if you know your querying a large dataset, you should split the query according to the documentation: elastic.co/guide/en/elasticsearch/reference/6.7/…
  • Pieter Schreurs
    Pieter Schreurs almost 5 years
    @CarlEngene splits allows you to process data sets in parallel. There's no reason that the above code couldn't process a large number of records, it would just take longer.
  • Rahul Saini
    Rahul Saini over 4 years
    I tried, but no data come in the block of >>es_stream.on('data', function(data) { // Process your results here });
  • Sri Harsha Kappala
    Sri Harsha Kappala over 4 years
    What is the elasticsearch version you are using?
  • Rahul Saini
    Rahul Saini over 4 years
    elasticsearch version = "^16.4.0"
  • Dipesh Raichana
    Dipesh Raichana over 2 years
    i checked and for me, scroll_id is working