Elasticsearch show all results using scroll in node js
Solution 1
You need to repeatedly call client.scroll
until no more records are returned. There's a good example in the elasticsearch documentation. I've reproduced their example code below, slightly modified to match your question
var allRecords = [];
// first we do a search, and specify a scroll timeout
client.search({
index: 'test',
type: 'records',
scroll: '10s',
body: {
query: {
"match_all": {}
}
}
}, function getMoreUntilDone(error, response) {
// collect all the records
response.hits.hits.forEach(function (hit) {
allRecords.push(hit);
});
if (response.hits.total !== allRecords.length) {
// now we can call scroll over and over
client.scroll({
scrollId: response._scroll_id,
scroll: '10s'
}, getMoreUntilDone);
} else {
console.log('all done', allRecords);
}
});
Solution 2
Thanks @Ceilingfish. Here's a modified ES6 version of the above using await
let allRecords = [];
// first we do a search, and specify a scroll timeout
var { _scroll_id, hits } = await esclient.search({
index: 'test',
type: 'records',
scroll: '10s',
body: {
query: {
"match_all": {}
},
_source: false
}
})
while(hits && hits.hits.length) {
// Append all new hits
allRecords.push(...hits.hits)
console.log(`${allRecords.length} of ${hits.total}`)
var { _scroll_id, hits } = await esclient.scroll({
scrollId: _scroll_id,
scroll: '10s'
})
}
console.log(`Complete: ${allRecords.length} records retrieved`)
Solution 3
Query for getting all data from elastic search using Node.js client using scroll with async/await.
const elasticsearch = require('@elastic/elasticsearch');
async function esconnection(){
let es = await new elasticsearch.Client({
node: "http://192.168.1.1:7200"
});
return es;
}
async function getAllUserList(){
try{
let userArray = [];
let query ={
"query":{
"match_all": {}
}
}
let es = await esconnection();
let {body}= await es.search({
index: 'esIndex',
type :"esIndexType",
scroll :'2m', //# Specify how long a consistent view of the index should be maintained for scrolled search
size: 100, // # Number of hits to return (default: 10)
body: query
});
let sid = body['_scroll_id']
let scroll_size = body['hits']['total']
let dataLength = body['hits']['hits'].length
while (scroll_size > 0){
for(let i=0; i<dataLength;i++){
if(body['hits']['hits'][i])
{
let userData = (body['hits']['hits'][i]['_source'])
userArray.push(userData)
}
}
sid = body['_scroll_id']
body = await es.scroll({
scrollId: sid,
scroll: '10s'
})
body=body.body
scroll_size = (body['hits']['hits']).length;
}
es.close();
return userArray;
} catch(error){
console.log("Code not working properly: ",`${error}`)
}
}
Solution 4
NodeJS failed when elastic had over 10000 results. This is how i used scroll.
async function getResultsFromElastic() {
let responseAll = {};
responseAll["hits"] = {};
responseAll.hits.hits = [];
const responseQueue = [];
searchQuery = {
index: 'test',
type: 'records',
body: {
query: {
"match_all": {}
}
}
}
searchQuery.scroll='10s';
searchQuery.size=10000;
responseQueue.push(await esclient.search(searchQuery));
while (responseQueue.length) {
const response = responseQueue.shift();
responseAll.hits.hits = responseAll.hits.hits.concat(response.hits.hits);
if (response.hits.total == responseAll.hits.hits.length) {
break;
}
// get the next response if there are more to fetch
responseQueue.push(
await esclient.scroll({
scrollId: response._scroll_id,
scroll: '30s'
})
);
}
return responseAll;
}
Jane
Updated on June 22, 2022Comments
-
Jane almost 2 years
I am basically trying to show all records of an index type. Now, if you use match_all() in query elasticsearch shows 10 results by default. One can show all results using scroll. I am trying to implement scroll api, but can't get it to work. It is showing only 10 results, my code:
module.exports.searchAll = function (searchData, callback) { client.search({ index: 'test', type: 'records', scroll: '10s', //search_type: 'scan', //if I use search_type then it requires size otherwise it shows 0 result body: { query: { "match_all": {} } } }, function (err, resp) { client.scroll({ scrollId: resp._scroll_id, scroll: '10s' }, callback(resp.hits.hits)); }); }
Can anyone help, please?
-
Jane over 7 yearsI tried this before asking this question here, but it didn't work. I figured it out though. If I use search_type: 'scan' then nothing shows up in output, if you omit that then this works
-
Pieter Schreurs over 7 yearsGood investigation @Jane. I've updated the code so that future googlers can get some working code from the outset.
-
Mohd Shahid almost 6 yearsWhich param is correct, scrollId or scroll_id in scroll function?
-
Mohd Shahid almost 6 yearsWhich param is correct, scrollId or scroll_id in scroll function?
-
DirtyMind about 5 yearsVery helpful !! Simple and running perfectly.
-
Calle Engene almost 5 yearsThis query can only handle small datsets, if you know your querying a large dataset, you should split the query according to the documentation: elastic.co/guide/en/elasticsearch/reference/6.7/…
-
Pieter Schreurs almost 5 years@CarlEngene splits allows you to process data sets in parallel. There's no reason that the above code couldn't process a large number of records, it would just take longer.
-
Rahul Saini over 4 yearsI tried, but no data come in the block of >>es_stream.on('data', function(data) { // Process your results here });
-
Sri Harsha Kappala over 4 yearsWhat is the elasticsearch version you are using?
-
Rahul Saini over 4 yearselasticsearch version = "^16.4.0"
-
Dipesh Raichana over 2 yearsi checked and for me,
scroll_id
is working