node.js - mongooose / mongodb streams vs array performance

问题描述:

I'm seeing some unexpected performance numbers when it comes to sending a collection of documents from MongoDB using Mongoose, Express, and JSONStream when applicable. I wanted to compare mongoose find vs. stream. I had expected stream to be faster for larger document collections, but was surprised to see the toArray variants outperform them consistently. I am thinking the wildcard might be my use of JSONStream to pipe the response to express. My simple endpoints are below:

// run this after connecting to mongoose

var app = express();

var myModel = ...; // get mongoose model

var myCollection = myModel.collection;

// fetch 500 - use lean w/ mongoose

var queryOpts = { lean : true, limit : 500 };

// 35.958

app.get("/api/v1/stream", function(req, res) {

res.set('Content-Type', 'application/json');

myModel.find({ }, null, queryOpts)

.stream().pipe(JSONStream.stringify()).pipe(res);

});

// 36.228

app.get("/api/v1/mongostream", function(req, res) {

res.set('Content-Type', 'application/json');

myCollection.find({ }, queryOpts)

.stream().pipe(JSONStream.stringify()).pipe(res);

});

// 23.399ms

app.get("/api/v1/mongoarray", function(req, res) {

myCollection.find({ }, queryOpts)

.toArray(function(err, results) {

res.json(results);

});

});

// 23.908

app.get("/api/v1/array", function(req, res) {

myModel.find({ }, null, queryOpts, function(err, results) {

res.json(results);

});

});

app.listen(4000);

The comment above each endpoint indicates the average request time reported by ab -k -n 1000 <endpoint>. I am surprised that piping a cursor stream to JSONStream to the express response is around 50% slower than just fetching all at once and sending. I had expected streaming the data to perform better.

Is there something I am doing that is obviously wrong? Am I wrong to think that streams should be faster? If JSONStream is the culprit, what is the best way to go from cursor stream to express response - if I buffer it all, wouldn't that be the same thing as one of the array variants?

Note the DB server is mongo 2.4.x and the mongo driver is 1.4.x.

Update I timed only the fetching / streaming portion and not the serialization aspect. Both array variants and streaming were similar in times with a slight edge to the array variants (14.9ms vs. 15.3ms). Below are the endpoints:

// No serialization - just timing

// all are nearly the same - slight edge to

// arrays

app.get("/api/v2/stream", function(req, res) {

var start = process.hrtime();

res.set('Content-Type', 'application/json');

myModel.find({ }, null, queryOpts)

.stream().on('end', function() {

res.json(process.hrtime(start));

});

});

app.get("/api/v2/mongostream", function(req, res) {

var start = process.hrtime();

res.set('Content-Type', 'application/json');

myCollection.find({ }, queryOpts)

.stream().on('end', function() {

res.json(process.hrtime(start));

});

});

app.get("/api/v2/mongoarray", function(req, res) {

var start = process.hrtime();

myCollection.find({ }, queryOpts)

.toArray(function(err, results) {

res.json(process.hrtime(start));

});

});

app.get("/api/v2/array", function(req, res) {

var start = process.hrtime();

myModel.find({ }, null, queryOpts, function(err, results) {

res.json(process.hrtime(start));

});

});

Update 2 The output of collection.stats() and collection.find({}).explain() are below:

> db.myCollection.stats();

{

"ns" : "myDb.myCollection",

"count" : 1000,

"size" : 419264,

"avgObjSize" : 419.264,

"storageSize" : 847872,

"numExtents" : 4,

"nindexes" : 2,

"lastExtentSize" : 655360,

"paddingFactor" : 1,

"systemFlags" : 1,

"userFlags" : 0,

"totalIndexSize" : 98112,

"indexSizes" : {

"_id_" : 40880,

"_meta.tags_1" : 57232

},

"ok" : 1

}

> db.myCollection.find({}).explain();

{

"cursor" : "BasicCursor",

"isMultiKey" : false,

"n" : 1000,

"nscannedObjects" : 1000,

"nscanned" : 1000,

"nscannedObjectsAllPlans" : 1000,

"nscannedAllPlans" : 1000,

"scanAndOrder" : false,

"indexOnly" : false,

"nYields" : 0,

"nChunkSkips" : 0,

"millis" : 0,

"indexBounds" : {

},

"server" : "LOCAL:27017"

}

网友答案:

It would be helpful to see the output of db.mycollection.stats() and db.mycollection.find({}).explain() from the mongo shell.

Given the performance numbers you're seeing, it may be simply that for a large enough collection, a cold btree hit for a non-indexed query is where all the time goes, rather than to any overhead in Node per se.