Why are my mongodb queries so slow (on Swisscom cloud)? - mongodb

I am using a (small, 256 MB) MongoDB 3.2.9 service instance through Swisscom CloudFoundry. As long as our entire DB fits into the available RAM, we see somewhat acceptable query performance.
However, we are experiencing very long query times on aggregation operations when our DB does not fit into RAM. We have created indexes for the accessed fields, but as far as I can tell it doesn't help.
Example document entry:
_id: 5a31...
description: Object
location: "XYZ"
name: "ABC"
status: "A"
m_nr: null
k_nr: null
city: "QWE"
high_value: 17
right_value: 71
more_data: Object
number: 101
interval: 1
next_date: "2016-01-16T00:00:00Z"
last_date: null
status: null
classification: Object
priority_value: "?"
redundancy_value: "?"
active_value: "0"
Example Query:
db.getCollection('a').aggregate(
[{ $sort:
{"description.location": 1}
},
{ $group:
{_id: "$description.location"}
}],
{ explain: true }
)
This query takes 25sec on a DB that only has 20k entries and produces 1k output fields.
The explain info for this query:
db.getCollection('a').aggregate([{ $group: {_id: "$description.location"} }], { explain: true }):
{
"waitedMS" : NumberLong(0),
"stages" : [
{
"$cursor" : {
"query" : {},
"fields" : {
"description.location" : 1,
"_id" : 0
},
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "Z.a",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : []
},
"winningPlan" : {
"stage" : "COLLSCAN",
"filter" : {
"$and" : []
},
"direction" : "forward"
},
"rejectedPlans" : []
}
}
},
{
"$group" : {
"_id" : "$description.location"
}
}
],
"ok" : 1.0
}
[UPDATE] Output of db.a.getIndexes():
/* 1 */
[
{
"v" : 1,
"key" : {
"_id" : 1
},
"name" : "_id_",
"ns" : "db.a"
},
{
"v" : 1,
"key" : {
"description.location" : 1.0
},
"name" : "description.location_1",
"ns" : "db.a"
}
]

Looks like it's doing a collection scan, have you tried adding an index on description.location?
db.a.createIndex({"description.location" : 1});

Related

MongoDB $group + $sum aggregation is very slow

I have an aggregation query in MongoDB:
[{
$group: {
_id: '$status',
status: {
$sum: 1
}
}
}]
It is running on a collection that has ~80 million documents. The status field is indexed, yet the query is very slow and runs for around 60 seconds or more.
I did an explain() on the query, but still got almost nowhere:
{
"explainVersion" : "1",
"stages" : [
{
"$cursor" : {
"queryPlanner" : {
"namespace" : "loa.document",
"indexFilterSet" : false,
"parsedQuery" : {
},
"queryHash" : "B9878693",
"planCacheKey" : "8EAA28C6",
"maxIndexedOrSolutionsReached" : false,
"maxIndexedAndSolutionsReached" : false,
"maxScansToExplodeReached" : false,
"winningPlan" : {
"stage" : "PROJECTION_SIMPLE",
"transformBy" : {
"status" : 1,
"_id" : 0
},
"inputStage" : {
"stage" : "COLLSCAN",
"direction" : "forward"
}
},
"rejectedPlans" : [ ]
}
}
},
{
"$group" : {
"_id" : "$status",
"status" : {
"$sum" : {
"$const" : 1
}
}
}
}
],
"serverInfo" : {
"host" : "rack-compute-2",
"port" : 27017,
"version" : "5.0.6",
"gitVersion" : "212a8dbb47f07427dae194a9c75baec1d81d9259"
},
"serverParameters" : {
"internalQueryFacetBufferSizeBytes" : 104857600,
"internalQueryFacetMaxOutputDocSizeBytes" : 104857600,
"internalLookupStageIntermediateDocumentMaxSizeBytes" : 104857600,
"internalDocumentSourceGroupMaxMemoryBytes" : 104857600,
"internalQueryMaxBlockingSortMemoryUsageBytes" : 104857600,
"internalQueryProhibitBlockingMergeOnMongoS" : 0,
"internalQueryMaxAddToSetBytes" : 104857600,
"internalDocumentSourceSetWindowFieldsMaxMemoryBytes" : 104857600
},
"command" : {
"aggregate" : "document",
"pipeline" : [
{
"$group" : {
"_id" : "$status",
"status" : {
"$sum" : 1
}
}
}
],
"explain" : true,
"cursor" : {
},
"lsid" : {
"id" : UUID("a07e17fe-65ff-4d38-966f-7517b7a5d3f2")
},
"$db" : "loa"
},
"ok" : 1
}
I see that it does a full COLLSCAN, I just can't understand why.
I plan on supporting a couple hundred million (or even a billion) documents in that collection, but this problem hijacks my plans for seemingly no reason.
You can advice the query planner to use the index as follow:
db.test.explain("executionStats").aggregate(
[
{$group:{ _id:"$status" ,status:{$sum:1} }}
],
{hint:"status_1"}
)
Make sure the index name in the hint is same as created ...
(db.test.getIndexes() will show you the exact index name )

MongoDB aggregate count is too much slow

I have around 60 thousand document in users collection, and have the following query:
db.getCollection('users').aggregate([
{"$match":{"userType":"employer"}},
{"$lookup":{"from":"companies","localField":"_id","foreignField":"owner.id","as":"company"}},
{"$unwind":"$company"},
{"$lookup":{"from":"companytypes","localField":"company.type.id","foreignField":"_id","as":"companyType"}},
{"$unwind":"$companyType"},
{ $group: { _id: null, count: { $sum: 1 } } }
])
It takes around 12 seconds to count, even I call count function before list function, but my list function with limit: 10 response faster than count.
And following is explain result:
{
"stages" : [
{
"$cursor" : {
"query" : {
"userType" : "employer"
},
"fields" : {
"company" : 1,
"_id" : 1
},
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "jobs.users",
"indexFilterSet" : false,
"parsedQuery" : {
"userType" : {
"$eq" : "employer"
}
},
"winningPlan" : {
"stage" : "COLLSCAN",
"filter" : {
"userType" : {
"$eq" : "employer"
}
},
"direction" : "forward"
},
"rejectedPlans" : []
}
}
},
{
"$lookup" : {
"from" : "companies",
"as" : "company",
"localField" : "_id",
"foreignField" : "owner.id",
"unwinding" : {
"preserveNullAndEmptyArrays" : false
}
}
},
{
"$match" : {
"$nor" : [
{
"company" : {
"$eq" : []
}
}
]
}
},
{
"$group" : {
"_id" : {
"$const" : null
},
"total" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$project" : {
"_id" : false,
"total" : true
}
}
],
"ok" : 1.0
}
$lookup operations are slow since they mimic the left join behavior, from the DOCS:
$lookup performs an equality match on the localField to the
foreignField from the documents of the from collection
Hence if there are no indexes in the fields used for joining the collections Mongodb is force to do a collection scan.
Adding an index for the foreignField attributes should prevent a collection scan and increase the performance even of a magnitude

MongoDB performs slow query on sum() based on monthly groups

This is What I tried so far on aggregated query:
db.getCollection('storage').aggregate([
{
"$match": {
"user_id": 2
}
},
{
"$project": {
"formattedDate": {
"$dateToString": { "format": "%Y-%m", "date": "$created_on" }
},
"size": "$size"
}
},
{ "$group": {
"_id" : "$formattedDate",
"size" : { "$sum": "$size" }
} }
])
This is the result:
/* 1 */
{
"_id" : "2018-02",
"size" : NumberLong(10860595386)
}
/* 2 */
{
"_id" : "2017-12",
"size" : NumberLong(524288)
}
/* 3 */
{
"_id" : "2018-01",
"size" : NumberLong(21587971)
}
And this is the document structure:
{
"_id" : ObjectId("5a59efedd006b9036159e708"),
"user_id" : NumberLong(2),
"is_transferred" : false,
"is_active" : false,
"process_id" : NumberLong(0),
"ratio" : 0.000125759169459343,
"type_id" : 201,
"size" : NumberLong(1687911),
"is_processed" : false,
"created_on" : ISODate("2018-01-13T11:39:25.000Z"),
"processed_on" : ISODate("1970-01-01T00:00:00.000Z")
}
And last, the explain result:
/* 1 */
{
"stages" : [
{
"$cursor" : {
"query" : {
"user_id" : 2.0
},
"fields" : {
"created_on" : 1,
"size" : 1,
"_id" : 1
},
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "data.storage",
"indexFilterSet" : false,
"parsedQuery" : {
"user_id" : {
"$eq" : 2.0
}
},
"winningPlan" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"user_id" : 1
},
"indexName" : "user_id",
"isMultiKey" : false,
"multiKeyPaths" : {
"user_id" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"user_id" : [
"[2.0, 2.0]"
]
}
}
},
"rejectedPlans" : []
}
}
},
{
"$project" : {
"_id" : true,
"formattedDate" : {
"$dateToString" : {
"format" : "%Y-%m",
"date" : "$created_on"
}
},
"size" : "$size"
}
},
{
"$group" : {
"_id" : "$formattedDate",
"size" : {
"$sum" : "$size"
}
}
}
],
"ok" : 1.0
}
The problem:
I can navigate and get all results in almost instantly like in 0,002sec. However, when I specify user_id and sum them by grouping on each month, My result came in between 0,300s to 0,560s. I do similar tasks in one request and it becaomes more than a second to finish.
What I tried so far:
I've added an index for user_id
I've added an index for created_on
I used more $match conditions. However, This makes even worse.
This collection have almost 200,000 documents in it currently and approximately 150,000 of them are belongs to user_id = 2
How can I minimize the response time for this query?
Note: MongoDB 3.4.10 used.
Pratha,
try to add sort on "created_on" and "size" fields as the first stage in aggregation pipeline.
db.getCollection('storage').aggregate([
{
"$sort": {
"created_on": 1, "size": 1
}
}, ....
Before that, add compound key index:
db.getCollection('storage').createIndex({created_on:1,size:1})
If you sort data before the $group stage, it will improve the efficiency of accumulation of the totals.
Note about sort aggregation stage:
The $sort stage has a limit of 100 megabytes of RAM. By default, if the stage exceeds this limit, $sort will produce an error. To allow for the handling of large datasets, set the allowDiskUse option to true to enable $sort operations to write to temporary files.
P.S
get rid of match stage by userID to test performance, or add userID to compound key also.

Should mongodb insert data ordered according with their indexes?

I would like to know if mongodb should re-order data after insert data according with the indexes configured previously, for instance:
After insert data according with sequence bellow:
db.test.insert(_id:1)
db.test.insert(_id:5)
db.test.insert(_id:2)
And executing the following search:
db.test.find();
We can see the result:
{ "_id" : 1 }
{ "_id" : 5 }
{ "_id" : 3 }
As we know, the field _id by default has a index, the question here is why after executing search the results are not return in sequence as presented bellow?
{ "_id" : 1 }
{ "_id" : 3 }
{ "_id" : 5 }
MongoDB indexes are separate data structures that contain the portion of the collection being indexed. The index does not dictate the order of documents in storage during insert.
Indexes are only used for sorting when a sort order is specified in the search:
db.test.find().sort({ _id: 1 })
{ "_id" : 1 }
{ "_id" : 2 }
{ "_id" : 5 }
You can verify the index usage using explain:
db.test.explain().find().sort({_id:1})
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "so.test",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [ ]
},
"winningPlan" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"_id" : 1
},
"indexName" : "_id_",
"isMultiKey" : false,
"direction" : "forward",
"indexBounds" : {
"_id" : [
"[MinKey, MaxKey]"
]
}
}
},
"rejectedPlans" : [ ]
},
"serverInfo" : {
"host" : "KSA303096TX2",
"port" : 27017,
"version" : "3.0.11",
"gitVersion" : "48f8b49dc30cc2485c6c1f3db31b723258fcbf39 modules: enterprise"
},
"ok" : 1
}

MongoDB Aggregate Slow Performance When Using Sort

I have collection (tvshow episodes) with more than 1,200,000 document,
here is my schema :
var episodeSchema = new Schema({
imdbId: { type : String },
showId: {type : String},
episodeId: { type : String },
episodeIdNumber:{ type : Number },
episodeTitle:{ type : String },
showTitle:{type : String},
seasonNumber:{type : Number},
episodeNumber:{type : Number},
airDate : {type : String},
summary:{type : String}
});
I created Index for episodeTitle episodeIdNumber seasonNumber episodeNumber episodeId and showId
Now i used mongodb aggregate group to get every tvshow episodes
here is the aggregate query i used :
episode.aggregate( [
{ $match : { showId : "scorpion" } },
{$sort:{"episodeNumber":-1}},
{ $group: {
_id: "$seasonNumber", count: { $sum: 1 } ,
episodes : { $push: { episodeId : "$episodeId" , episodeTitle: "$episodeTitle" , episodeNumber: "$episodeNumber" , seasonNumber: "$seasonNumber" , airDate: "$airDate" } }
} }
,
{ $sort : { _id : -1 } }
] )
Now when i am run this query its take more than 2605.907 ms , after some digging i found out why its slow , it was because of using {$sort:{"episodeNumber":-1}} , without using {$sort:{"episodeNumber":-1}} its take around 19.178 ms to run.
As i mentioned above i create an index for episodeNumber field and based on MongoDB Aggregation Pipeline Optimization
i used sort after match so basically everything was ok , and i didn't anything wrong.
So after this i thought something wrong with my indexes , so i removed episodeNumber index and reindexd , but i had same time nothing changed.
At end one time i tried run aggregate group query without episodeNumber indexed and surprisingly it was faster ! its take around 20.118 ms .
I wants know why this happened , isn't indexes to get faster query ?
Update
query explain output :
{
"waitedMS" : NumberLong(0),
"stages" : [
{
"$cursor" : {
"query" : {
"showId" : "scorpion"
},
"sort" : {
"episodeNumber" : -1
},
"fields" : {
"airDate" : 1,
"episodeId" : 1,
"episodeNumber" : 1,
"episodeTitle" : 1,
"seasonNumber" : 1,
"_id" : 0
},
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "test.episodes",
"indexFilterSet" : false,
"parsedQuery" : {
"showId" : {
"$eq" : "scorpion"
}
},
"winningPlan" : {
"stage" : "EOF"
},
"rejectedPlans" : [ ]
}
}
},
{
"$group" : {
"_id" : "$seasonNumber",
"count" : {
"$sum" : {
"$const" : 1
}
},
"episodes" : {
"$push" : {
"episodeId" : "$episodeId",
"episodeTitle" : "$episodeTitle",
"episodeNumber" : "$episodeNumber",
"seasonNumber" : "$seasonNumber",
"airDate" : "$airDate"
}
}
}
},
{
"$sort" : {
"sortKey" : {
"_id" : -1
}
}
}
],
"ok" : 1
}