Mongodb Searching sharding key is very slow - mongodb

I set up a mongodb with 3 shards by using bitnami's chart: mongodb-sharded
and I got two problems.
My sharding collection has more than 38 millions records.
The first problem is some mongodb pods take a lot of memory, more than 10G from the whole memory of 32G in a node. I found some similar issues from stackoverflow and I would try to tackle this problem.
The other problem is: when I do a search which matches the sharding key, it's very slow and sometimes it will be time out.
Here is my collection's indexes:
ice-shard-mongodb-sharded-shard-0:PRIMARY> db.LogInfo.getIndexes()
[
{
"v" : 2,
"key" : {
"_id" : 1
},
"name" : "_id_",
"collation" : {
"locale" : "en_US",
"caseLevel" : false,
"caseFirst" : "off",
"strength" : 2,
"numericOrdering" : false,
"alternate" : "non-ignorable",
"maxVariable" : "punct",
"normalization" : false,
"backwards" : false,
"version" : "57.1"
}
},
{
"v" : 2,
"key" : {
"Properties.Time" : 1
},
"name" : "Properties.Time_1",
"collation" : {
"locale" : "en_US",
"caseLevel" : false,
"caseFirst" : "off",
"strength" : 2,
"numericOrdering" : false,
"alternate" : "non-ignorable",
"maxVariable" : "punct",
"normalization" : false,
"backwards" : false,
"version" : "57.1"
}
},
{
"v" : 2,
"key" : {
"Properties.Plot" : 1,
"Properties.Time" : 1
},
"name" : "Properties.Plot_1_Properties.Time_1",
"collation" : {
"locale" : "en_US",
"caseLevel" : false,
"caseFirst" : "off",
"strength" : 2,
"numericOrdering" : false,
"alternate" : "non-ignorable",
"maxVariable" : "punct",
"normalization" : false,
"backwards" : false,
"version" : "57.1"
}
},
{
"v" : 2,
"key" : {
"Properties.Scenario" : 1,
"Properties.Time" : 1
},
"name" : "Properties.Scenario_1_Properties.Time_1",
"collation" : {
"locale" : "en_US",
"caseLevel" : false,
"caseFirst" : "off",
"strength" : 2,
"numericOrdering" : false,
"alternate" : "non-ignorable",
"maxVariable" : "punct",
"normalization" : false,
"backwards" : false,
"version" : "57.1"
}
},
{
"v" : 2,
"key" : {
"Properties.SecsFunction" : 1,
"Properties.Time" : 1
},
"name" : "Properties.SecsFunction_1_Properties.Time_1",
"collation" : {
"locale" : "en_US",
"caseLevel" : false,
"caseFirst" : "off",
"strength" : 2,
"numericOrdering" : false,
"alternate" : "non-ignorable",
"maxVariable" : "punct",
"normalization" : false,
"backwards" : false,
"version" : "57.1"
}
},
{
"v" : 2,
"key" : {
"Properties.EapId" : "hashed",
"Properties.Time" : 1
},
"name" : "Properties.EapId_hashed_Properties.Time_1"
}
]
The key "Properties.EapId_hashed_Properties.Time_1" is the sharding key.
If I search
db.LogInfo.find({"Properties.SecsFunction": "s6f11", "Properties.Time": {$gte: ISODate("2021-03-10")}})
This search matches an index and it will search all the 3 shards. It's very fast within 0.01 seconds.
But if I search
db.LogInfo.find(
{
"Properties.EapId": "12eap012",
"Properties.Time": {
"$gte": ISODate("2021-03-17")
}
}
)
which should use the sharding key, I think, it's very flow. Sometimes it will take more than 5 seconds and sometimes it will be time out. I've checked the indexes several times but I can't figure out why.
Please advise. Thanks.
update: Mongodb version: 4.4.3
update: shards are built as replica set = 3.
update: below is the query plan for the problematic search. (I drop the executionStat() due to stackoverflow's 30000 characters limit.)
{
"queryPlanner" : {
"mongosPlannerVersion" : NumberInt(1),
"winningPlan" : {
"stage" : "SHARD_MERGE",
"shards" : [
{
"shardName" : "ice-shard-mongodb-sharded-shard-1",
"connectionString" : "ice-shard-mongodb-sharded-shard-1/ice-shard-mongodb-sharded-shard1-data-0.ice-shard-mongodb-sharded-headless.ice-system.svc.cluster.local:27017,ice-shard-mongodb-sharded-shard1-data-1.ice-shard-mongodb-sharded-headless.ice-system.svc.cluster.local:27017,ice-shard-mongodb-sharded-shard1-data-2.ice-shard-mongodb-sharded-headless.ice-system.svc.cluster.local:27017",
"serverInfo" : {
"host" : "ice-shard-mongodb-sharded-shard1-data-1",
"port" : NumberInt(27017),
"version" : "4.4.3",
"gitVersion" : "913d6b62acfbb344dde1b116f4161360acd8fd13"
},
"plannerVersion" : NumberInt(1),
"namespace" : "LogCenter.LogInfo",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"Properties.EapId" : {
"$eq" : "12eap012"
}
},
{
"Properties.Time" : {
"$gte" : ISODate("2021-03-17T00:00:00.000+0000")
}
}
]
},
"collation" : {
"locale" : "en_US",
"caseLevel" : false,
"caseFirst" : "off",
"strength" : NumberInt(2),
"numericOrdering" : false,
"alternate" : "non-ignorable",
"maxVariable" : "punct",
"normalization" : false,
"backwards" : false,
"version" : "57.1"
},
"winningPlan" : {
"stage" : "SHARDING_FILTER",
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"Properties.EapId" : {
"$eq" : "12eap012"
}
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"Properties.Time" : 1.0
},
"indexName" : "Properties.Time_1",
"collation" : {
"locale" : "en_US",
"caseLevel" : false,
"caseFirst" : "off",
"strength" : NumberInt(2),
"numericOrdering" : false,
"alternate" : "non-ignorable",
"maxVariable" : "punct",
"normalization" : false,
"backwards" : false,
"version" : "57.1"
},
"isMultiKey" : false,
"multiKeyPaths" : {
"Properties.Time" : [
]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : NumberInt(2),
"direction" : "forward",
"indexBounds" : {
"Properties.Time" : [
"[new Date(1615939200000), new Date(9223372036854775807)]"
]
}
}
}
},
"rejectedPlans" : [
]
},
{
"shardName" : "ice-shard-mongodb-sharded-shard-0",
"connectionString" : "ice-shard-mongodb-sharded-shard-0/ice-shard-mongodb-sharded-shard0-data-0.ice-shard-mongodb-sharded-headless.ice-system.svc.cluster.local:27017,ice-shard-mongodb-sharded-shard0-data-1.ice-shard-mongodb-sharded-headless.ice-system.svc.cluster.local:27017,ice-shard-mongodb-sharded-shard0-data-2.ice-shard-mongodb-sharded-headless.ice-system.svc.cluster.local:27017",
"serverInfo" : {
"host" : "ice-shard-mongodb-sharded-shard0-data-0",
"port" : NumberInt(27017),
"version" : "4.4.3",
"gitVersion" : "913d6b62acfbb344dde1b116f4161360acd8fd13"
},
"plannerVersion" : NumberInt(1),
"namespace" : "LogCenter.LogInfo",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"Properties.EapId" : {
"$eq" : "12eap012"
}
},
{
"Properties.Time" : {
"$gte" : ISODate("2021-03-17T00:00:00.000+0000")
}
}
]
},
"collation" : {
"locale" : "en_US",
"caseLevel" : false,
"caseFirst" : "off",
"strength" : NumberInt(2),
"numericOrdering" : false,
"alternate" : "non-ignorable",
"maxVariable" : "punct",
"normalization" : false,
"backwards" : false,
"version" : "57.1"
},
"winningPlan" : {
"stage" : "SHARDING_FILTER",
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"Properties.EapId" : {
"$eq" : "12eap012"
}
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"Properties.Time" : 1.0
},
"indexName" : "Properties.Time_1",
"collation" : {
"locale" : "en_US",
"caseLevel" : false,
"caseFirst" : "off",
"strength" : NumberInt(2),
"numericOrdering" : false,
"alternate" : "non-ignorable",
"maxVariable" : "punct",
"normalization" : false,
"backwards" : false,
"version" : "57.1"
},
"isMultiKey" : false,
"multiKeyPaths" : {
"Properties.Time" : [
]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : NumberInt(2),
"direction" : "forward",
"indexBounds" : {
"Properties.Time" : [
"[new Date(1615939200000), new Date(9223372036854775807)]"
]
}
}
}
},
"rejectedPlans" : [
]
},
{
"shardName" : "ice-shard-mongodb-sharded-shard-2",
"connectionString" : "ice-shard-mongodb-sharded-shard-2/ice-shard-mongodb-sharded-shard2-data-0.ice-shard-mongodb-sharded-headless.ice-system.svc.cluster.local:27017,ice-shard-mongodb-sharded-shard2-data-1.ice-shard-mongodb-sharded-headless.ice-system.svc.cluster.local:27017,ice-shard-mongodb-sharded-shard2-data-2.ice-shard-mongodb-sharded-headless.ice-system.svc.cluster.local:27017",
"serverInfo" : {
"host" : "ice-shard-mongodb-sharded-shard2-data-0",
"port" : NumberInt(27017),
"version" : "4.4.3",
"gitVersion" : "913d6b62acfbb344dde1b116f4161360acd8fd13"
},
"plannerVersion" : NumberInt(1),
"namespace" : "LogCenter.LogInfo",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"Properties.EapId" : {
"$eq" : "12eap012"
}
},
{
"Properties.Time" : {
"$gte" : ISODate("2021-03-17T00:00:00.000+0000")
}
}
]
},
"collation" : {
"locale" : "en_US",
"caseLevel" : false,
"caseFirst" : "off",
"strength" : NumberInt(2),
"numericOrdering" : false,
"alternate" : "non-ignorable",
"maxVariable" : "punct",
"normalization" : false,
"backwards" : false,
"version" : "57.1"
},
"winningPlan" : {
"stage" : "SHARDING_FILTER",
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"Properties.EapId" : {
"$eq" : "12eap012"
}
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"Properties.Time" : 1.0
},
"indexName" : "Properties.Time_1",
"collation" : {
"locale" : "en_US",
"caseLevel" : false,
"caseFirst" : "off",
"strength" : NumberInt(2),
"numericOrdering" : false,
"alternate" : "non-ignorable",
"maxVariable" : "punct",
"normalization" : false,
"backwards" : false,
"version" : "57.1"
},
"isMultiKey" : false,
"multiKeyPaths" : {
"Properties.Time" : [
]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : NumberInt(2),
"direction" : "forward",
"indexBounds" : {
"Properties.Time" : [
"[new Date(1615939200000), new Date(9223372036854775807)]"
]
}
}
}
},
"rejectedPlans" : [
]
}
]
}
},
"serverInfo" : {
"host" : "ice-shard-mongodb-sharded-mongos-59576fb8b8-n47n5",
"port" : NumberInt(27017),
"version" : "4.4.3",
"gitVersion" : "913d6b62acfbb344dde1b116f4161360acd8fd13"
},
"ok" : 1.0,
"operationTime" : Timestamp(1616372015, 12),
"$clusterTime" : {
"clusterTime" : Timestamp(1616372015, 14),
"signature" : {
"hash" : BinData(0, "vVxiZGic6un9QIInadoHCfBCX2Y="),
"keyId" : NumberLong(6924833040434724866)
}
}
}

I found the root cause why my search didn't use the sharding key. It's all about the collation.
Sharding key always use collation : { locale : "simple" } to do a binary comparison, while in my case my collection and indexes are designed to use "collation" : { "locale" : "en_US", "strength" : 2 }.
After redesigning my collection without any collation, now everything is fine.

Related

MongoDB performing COLLSCAN even with an index specified in the compound index

I have a database with the following compound index NameHandleCreationTime
"metadata" : {
"formatVersion" : 8,
"infoObj" : "{ \"v\" : 2, \"key\" : { \"Name\" : 1, \"Handle\" : 1, \"CreationTime\" : -1 }, \"name\" : \"NameHandleCreationTime\", \"ns\" : \"intenv.UserCollection_test\", \"collation\" : { \"locale\" : \"en\", \"caseLevel\" : false, \"caseFirst\" : \"off\", \"strength\" : 1, \"numericOrdering\" : false, \"alternate\" : \"non-ignorable\", \"maxVariable\" : \"punct\", \"normalization\" : false, \"backwards\" : false, \"version\" : \"57.1\" } }"
},
...
So I thought if I make a query with Name: xyz to get a single document from the UserCollection, I'd get it through an IXSCAN, because the compound index's first field is Name. However, the logs show that I am COLLSCANing and even winning plan is a COLLSCAN.
//db.getCollection("intenv.UserCollection_test").find({Name:"xyz"}).collation({locale:"en"}).explain()
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "intenv.UserCollection_test",
"indexFilterSet" : false,
"parsedQuery" : {
"Name" : {
"$eq" : "xyz"
}
},
"collation" : {
"locale" : "en",
"caseLevel" : false,
"caseFirst" : "off",
"strength" : 3,
"numericOrdering" : false,
"alternate" : "non-ignorable",
"maxVariable" : "punct",
"normalization" : false,
"backwards" : false,
"version" : "57.1"
},
"winningPlan" : {
"stage" : "COLLSCAN",
"filter" : {
"Name" : {
"$eq" : "xyz"
}
},
"direction" : "forward"
},
"rejectedPlans" : [ ]
},
...
What is going on...?
Also, I toyed around with collation (https://docs.mongodb.com/manual/reference/collation/) while writing this question: setting collation({ locale: "en", strength: 1}) gives me an IXSCAN as the winning plan...
Why is this?
The collation requested in the query is different than the collation the index was created with, so it can't be used for that query.

mongodb is not using an index for a find command

I have approximately 40M documents in a mongo collection. There is an index on the location.country field:
MongoDB Enterprise cluster-0-shard-0:PRIMARY> db.cases.getIndexes()
[
{
"v" : 2,
"key" : {
"_id" : 1
},
"name" : "_id_"
},
//...
{
"v" : 2,
"key" : {
"location.country" : -1
},
"name" : "countriesIdx",
"collation" : {
"locale" : "en_US",
"caseLevel" : false,
"caseFirst" : "off",
"strength" : 2,
"numericOrdering" : false,
"alternate" : "non-ignorable",
"maxVariable" : "punct",
"normalization" : false,
"backwards" : false,
"version" : "57.1"
}
},
//...
]
But queries don't use it:
MongoDB Enterprise cluster-0-shard-0:PRIMARY> db.cases.find({'location.country':'ghana'}).explain({verbosity: 'executionStats'})
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "covid19.cases",
"indexFilterSet" : false,
"parsedQuery" : {
"location.country" : {
"$eq" : "ghana"
}
},
"winningPlan" : {
"stage" : "COLLSCAN",
"filter" : {
"location.country" : {
"$eq" : "ghana"
}
},
"direction" : "forward"
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 0,
"executionTimeMillis" : 195892,
"totalKeysExamined" : 0,
"totalDocsExamined" : 39264034,
"executionStages" : {
"stage" : "COLLSCAN",
"filter" : {
"location.country" : {
"$eq" : "ghana"
}
},
"nReturned" : 0,
"executionTimeMillisEstimate" : 99032,
"works" : 39264036,
"advanced" : 0,
"needTime" : 39264035,
"needYield" : 0,
"saveState" : 39503,
"restoreState" : 39503,
"isEOF" : 1,
"direction" : "forward",
"docsExamined" : 39264034
},
"allPlansExecution" : [ ]
},
"serverInfo" : {
"host" : "cluster-0-shard-00-01-vwhx6.mongodb.net",
"port" : 27017,
"version" : "4.4.8",
"gitVersion" : "83b8bb8b6b325d8d8d3dfd2ad9f744bdad7d6ca0"
},
"ok" : 1,
"$clusterTime" : {
"clusterTime" : Timestamp(1629732226, 1),
"signature" : {
"hash" : BinData(0,"piKWDwLDv7FRcnwCe51PZDLR4UM="),
"keyId" : NumberLong("6958739380580122625")
}
},
"operationTime" : Timestamp(1629732226, 1)
}
Do I need to set up the index differently or do something else to get mongo to use the index? I have tried to hint that it should, but it still does a COLLSCAN. While the examples I've shown above are using mongosh, the behaviour is the same in my node app using mongoose.

MongoDB index doing a collection scan instead index scan [duplicate]

Here are the compound index and single index I have for this Collection:
///db.Collection.getIndexes()
/* 1 */
{
"v" : 2,
"key" : {
"_id" : 1
},
"name" : "_id_",
"ns" : "service.Collection"
},
/* 2 */
{
"v" : 2,
"key" : {
"FirstId" : 1,
"SecondId" : 1,
"CreationTime" : -1
},
"name" : "FirstIdSecondIdCreationTime",
"collation" : {
"locale" : "en",
"caseLevel" : false,
"caseFirst" : "off",
"strength" : 1,
"numericOrdering" : false,
"alternate" : "non-ignorable",
"maxVariable" : "punct",
"normalization" : false,
"backwards" : false,
"version" : "57.1"
},
"ns" : "service.Collection"
},
/* 3 */
{
"v" : 2,
"key" : {
"CreationTime" : 1
},
"name" : "CreationTime",
"collation" : {
"locale" : "en",
"caseLevel" : false,
"caseFirst" : "off",
"strength" : 1,
"numericOrdering" : false,
"alternate" : "non-ignorable",
"maxVariable" : "punct",
"normalization" : false,
"backwards" : false,
"version" : "57.1"
},
"ns" : "service.Collection"
}
The expected result is an IXSCAN using the FirstIdSecondIdCreationTime index:
///service.Collection.find({ FirstId: "771367b7-4bef-49ab-bda1-6230254c6349", ///SecondId: "3bffb3cd-fb5e-43e5-abd1-e0b48c97f78f" })
/// .projection({})
/// .sort({_id:-1}).hint("FirstIdSecondIdCreationTime").explain('executionStats')
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "service.Collection",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"FirstId" : {
"$eq" : "771367b7-4bef-49ab-bda1-6230254c6349"
}
},
{
"SecondId" : {
"$eq" : "3bffb3cd-fb5e-43e5-abd1-e0b48c97f78f"
}
}
]
},
"winningPlan" : {
"stage" : "SORT",
"sortPattern" : {
"_id" : -1
},
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"FirstId" : {
"$eq" : "771367b7-4bef-49ab-bda1-6230254c6349"
}
},
{
"SecondId" : {
"$eq" : "3bffb3cd-fb5e-43e5-abd1-e0b48c97f78f"
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"FirstId" : 1,
"SecondId" : 1,
"CreationTime" : -1
},
"indexName" : "FirstIdSecondIdCreationTime",
"collation" : {
"locale" : "en",
"caseLevel" : false,
"caseFirst" : "off",
"strength" : 1,
"numericOrdering" : false,
"alternate" : "non-ignorable",
"maxVariable" : "punct",
"normalization" : false,
"backwards" : false,
"version" : "57.1"
},
"isMultiKey" : false,
"multiKeyPaths" : {
"FirstId" : [ ],
"SecondId" : [ ],
"CreationTime" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"FirstId" : [
"[MinKey, MaxKey]"
],
"SecondId" : [
"[MinKey, MaxKey]"
],
"CreationTime" : [
"[MaxKey, MinKey]"
]
}
}
}
}
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 1,
"executionTimeMillis" : 5491,
"totalKeysExamined" : 856730,
"totalDocsExamined" : 856730,
"executionStages" : {
"stage" : "SORT",
"nReturned" : 1,
"executionTimeMillisEstimate" : 5261,
"works" : 856734,
"advanced" : 1,
"needTime" : 856732,
"needYield" : 0,
"saveState" : 6697,
"restoreState" : 6697,
"isEOF" : 1,
"invalidates" : 0,
"sortPattern" : {
"_id" : -1
},
"memUsage" : 432,
"memLimit" : 33554432,
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"nReturned" : 1,
"executionTimeMillisEstimate" : 5201,
"works" : 856732,
"advanced" : 1,
"needTime" : 856730,
"needYield" : 0,
"saveState" : 6697,
"restoreState" : 6697,
"isEOF" : 1,
"invalidates" : 0,
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"FirstId" : {
"$eq" : "771367b7-4bef-49ab-bda1-6230254c6349"
}
},
{
"SecondId" : {
"$eq" : "3bffb3cd-fb5e-43e5-abd1-e0b48c97f78f"
}
}
]
},
"nReturned" : 1,
"executionTimeMillisEstimate" : 5131,
"works" : 856731,
"advanced" : 1,
"needTime" : 856729,
"needYield" : 0,
"saveState" : 6697,
"restoreState" : 6697,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 856730,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 856730,
"executionTimeMillisEstimate" : 820,
"works" : 856731,
"advanced" : 856730,
"needTime" : 0,
"needYield" : 0,
"saveState" : 6697,
"restoreState" : 6697,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"FirstId" : 1,
"SecondId" : 1,
"CreationTime" : -1
},
"indexName" : "FirstIdSecondIdCreationTime",
"collation" : {
"locale" : "en",
"caseLevel" : false,
"caseFirst" : "off",
"strength" : 1,
"numericOrdering" : false,
"alternate" : "non-ignorable",
"maxVariable" : "punct",
"normalization" : false,
"backwards" : false,
"version" : "57.1"
},
"isMultiKey" : false,
"multiKeyPaths" : {
"FirstId" : [ ],
"SecondId" : [ ],
"CreationTime" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"FirstId" : [
"[MinKey, MaxKey]"
],
"SecondId" : [
"[MinKey, MaxKey]"
],
"CreationTime" : [
"[MaxKey, MinKey]"
]
},
"keysExamined" : 856730,
"seeks" : 1,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0,
"indexDef" : {
"indexName" : "FirstIdSecondIdCreationTime",
"isMultiKey" : false,
"multiKeyPaths" : {
"FirstId" : [ ],
"SecondId" : [ ],
"CreationTime" : [ ]
},
"keyPattern" : {
"FirstId" : 1,
"SecondId" : 1,
"CreationTime" : -1
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"direction" : "forward"
}
}
}
}
}
but the actual result is a COLLSCAN that takes over 8000ms:
"event": {
"dataset": "mongodb.log",
"module": "mongodb"
},
"service": {
"type": "mongodb"
},
"message": "command service.Collection command: find { find: \"Collection\",
filter: { FirstId: \"771367b7-4bef-49ab-bda1-6230254c6349\", SecondId: \"3bffb3cd-fb5e-43e5-abd1-e0b48c97f78f\" }, sort: { CreationTime: -1 }, limit: 1,
planSummary: COLLSCAN keysExamined:0 docsExamined:784787 hasSortStage:1 cursorExhausted:1 numYields:6175 nreturned:1 reslen:677
locks:{ Global: { acquireCount: { r: 12352 } }, Database: { acquireCount: { r: 6176 } }, Collection: { acquireCount: { r: 6176 } } } protocol:op_msg 8441ms",
"mongodb.docsExamined": 784787,
"fileset": {
"name": "log"
},
Why am I COLLSCANing instead of IXSCANing with the FirstIdSecondIDCreationTime compound index? Is there a way to change my index/ my query to speed up the query?
Per a suggestion in the comments, I've run explain("allPlansExecution").
///db.Collection.find({ FirstId: "771367b7-4bef-49ab-bda1-6230254c6349", ///SecondId: "3bffb3cd-fb5e-43e5-abd1-e0b48c97f78f" })
/// .projection({})
/// .sort({_id:-1}).explain('allPlansExecution')
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "service.Collection",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"FirstId" : {
"$eq" : "771367b7-4bef-49ab-bda1-6230254c6349"
}
},
{
"SecondId" : {
"$eq" : "3bffb3cd-fb5e-43e5-abd1-e0b48c97f78f"
}
}
]
},
"winningPlan" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"FirstId" : {
"$eq" : "771367b7-4bef-49ab-bda1-6230254c6349"
}
},
{
"SecondId" : {
"$eq" : "3bffb3cd-fb5e-43e5-abd1-e0b48c97f78f"
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"_id" : 1
},
"indexName" : "_id_",
"isMultiKey" : false,
"multiKeyPaths" : {
"_id" : [ ]
},
"isUnique" : true,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "backward",
"indexBounds" : {
"_id" : [
"[MaxKey, MinKey]"
]
}
}
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 1,
"executionTimeMillis" : 5408,
"totalKeysExamined" : 856748,
"totalDocsExamined" : 856748,
"executionStages" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"FirstId" : {
"$eq" : "771367b7-4bef-49ab-bda1-6230254c6349"
}
},
{
"SecondId" : {
"$eq" : "3bffb3cd-fb5e-43e5-abd1-e0b48c97f78f"
}
}
]
},
"nReturned" : 1,
"executionTimeMillisEstimate" : 4862,
"works" : 856749,
"advanced" : 1,
"needTime" : 856747,
"needYield" : 0,
"saveState" : 6694,
"restoreState" : 6694,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 856748,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 856748,
"executionTimeMillisEstimate" : 1220,
"works" : 856749,
"advanced" : 856748,
"needTime" : 0,
"needYield" : 0,
"saveState" : 6694,
"restoreState" : 6694,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"_id" : 1
},
"indexName" : "_id_",
"isMultiKey" : false,
"multiKeyPaths" : {
"_id" : [ ]
},
"isUnique" : true,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "backward",
"indexBounds" : {
"_id" : [
"[MaxKey, MinKey]"
]
},
"keysExamined" : 856748,
"seeks" : 1,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
},
"allPlansExecution" : [ ]
}
}
The "FirstIdSecondIdCreationTime" index was not automatically considered because it was created with a collation, and the query is being run without a collation.
Use the .collation() cursor method to specify the same collation for the query that was used for the index.
The 5.5 second run time using that index is pretty slow as well.
You may see some improvement in that query if you create an index on {FirstId: 1, SecondId: 1, _id: 1} so that they query executor can use the index to meet the sort instead of an in-memory sort.
Can you please sort by leading indexes i.e firstId, secondId and creationTime in the same sequence and see index is used. it will give an idea whether leading indexes fields should be there in sort as well.

Why is MongoDB treating queries differently when served from same index?

I have a collection where my documents looks like:
{
"_id" : ObjectId("591dbe4a77d4ede22d765250"),
"name" : [
{
"de" : true,
"text" : "Brunhilde"
},
{
"sk" : true,
"text" : "Šimon"
}
]
}
I have defined an index as:
> db.names.createIndex({ 'name.de': 1, 'name.text': 1 }, { name: 'name_de', partialFilterExpression: { 'name.de': { $exists: true } }, collation: { locale: 'de' } });
When I do a query like:
> db.names.find({ 'name.de': true, 'name.text': 'Rüdi' }).collation({ locale: 'de' });
The explain plan looks like:
"winningPlan" : {
"stage" : "FETCH",
"filter" : {
"name.text" : {
"$eq" : "Rüdi"
}
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"name.de" : 1,
"name.text" : 1
},
"indexName" : "name_de",
"collation" : {
"locale" : "de",
"caseLevel" : false,
"caseFirst" : "off",
"strength" : 3,
"numericOrdering" : false,
"alternate" : "non-ignorable",
"maxVariable" : "punct",
"normalization" : false,
"backwards" : false,
"version" : "57.1"
},
"isMultiKey" : true,
"multiKeyPaths" : {
"name.de" : [
"name"
],
"name.text" : [
"name"
]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : true,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"name.de" : [
"[true, true]"
],
"name.text" : [
"[MinKey, MaxKey]"
]
}
}
}
It does IXSCAN followed by FETCH stage with filter. I've already created an question about the filter here.
The more interesting is what will happen when I just change the matching part of the query to:
> db.names.find({ 'name.de': { $exists: true }, 'name.text': 'Rüdi' }).collation({ locale: 'de' });
i.e. expression 'name.de': { $exists: true } should be still subset of partialFilterExpression. As stated in documentation:
To use the partial index, a query must contain the filter expression (or a modified filter expression that specifies a subset of the filter expression) as part of its query condition.
But the explain plan looks like this:
...
"winningPlan" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"name.de" : {
"$exists" : true
}
},
{
"name.text" : {
"$eq" : "Rüdi"
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"name.de" : 1,
"name.text" : 1
},
"indexName" : "name_de",
"collation" : {
"locale" : "de",
"caseLevel" : false,
"caseFirst" : "off",
"strength" : 3,
"numericOrdering" : false,
"alternate" : "non-ignorable",
"maxVariable" : "punct",
"normalization" : false,
"backwards" : false,
"version" : "57.1"
},
"isMultiKey" : true,
"multiKeyPaths" : {
"name.de" : [
"name"
],
"name.text" : [
"name"
]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : true,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"name.de" : [
"[MinKey, MaxKey]"
],
"name.text" : [
"[MinKey, MaxKey]"
]
}
}
}
...
As you can see index is used, but the whole filtering is happening in FETCH stage.
Question is: why the filtering is done in FETCH stage and what is so different between these 2 queries that MongoDB them differently?
Additionaly, sort query with $exists as:
> db.names.find({ 'name.de': { $exists: true } }).sort({ 'name.text': 1 }).collation({ locale: "de" })
Behaves the same, whole filtering and sorting is done after IXSCAN stage:
...
"winningPlan" : {
"stage" : "SORT",
"sortPattern" : {
"name.text" : 1
},
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"name.de" : {
"$exists" : true
}
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"name.de" : 1,
"name.text" : 1
}
...
It even produces the incorrect results, while index is not used for sorting.

MongoDB use index with $nin seems not to work in combination with $regex

It seems that my index in my MongoDB is not correct.
I have created 3 indexes. These:
{
_id: 1
}
{
isbn: 1
}
{
_id: 1,
isbn: 1
}
When doing a query with isbn or _id its working perfect. Even with isbn and _id. For example:
db.getCollection('books').find({
isbn: {
$regex: '^978048627.*'
},
_id: 'vGXejKQH5kw8Kfutk'
}
needs around 3ms.
But lets now say I want to search for an ISBN and need to exclude some _ids - I do this:
db.getCollection('books').find({
isbn: {
$regex: '^97804862731.*'
},
_id: {
$nin:['vGXejKQH5kw8Kfutk']
}
})
Now its not working as it should. The query took more then 10 seconds!
When I do a isbn search without $regex but with $nin its works perfect - again around 3ms for the query. Example:
db.getCollection('books').find({
isbn: '9780486273136',
_id: {
$nin:['vGXejKQH5kw8Kfutk']
}
})
Am I doing something wrong ? And why the index is not working correctly as it should ?
Here is the .explain() output when querying the 10 seconds query:
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "***.books",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"isbn" : /^97804862731.*/
},
{
"$not" : {
"_id" : {
"$in" : [
"vGXejKQH5kw8Kfutk"
]
}
}
}
]
},
"winningPlan" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"filter" : {
"isbn" : /^97804862731.*/
},
"keyPattern" : {
"isbn" : 1.0,
"_id" : 1.0
},
"indexName" : "isbn_1__id_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"isbn" : [
"[\"97804862731\", \"97804862732\")",
"[/^97804862731.*/, /^97804862731.*/]"
],
"_id" : [
"[MinKey, \"vGXejKQH5kw8Kfutk\")",
"(\"vGXejKQH5kw8Kfutk\", MaxKey]"
]
}
}
},
"rejectedPlans" : [
{
"stage" : "FETCH",
"filter" : {
"isbn" : /^97804862731.*/
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"_id" : 1
},
"indexName" : "_id_",
"isMultiKey" : false,
"isUnique" : true,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"_id" : [
"[MinKey, \"vGXejKQH5kw8Kfutk\")",
"(\"vGXejKQH5kw8Kfutk\", MaxKey]"
]
}
}
},
{
"stage" : "FETCH",
"filter" : {
"$not" : {
"_id" : {
"$in" : [
"vGXejKQH5kw8Kfutk"
]
}
}
},
"inputStage" : {
"stage" : "IXSCAN",
"filter" : {
"isbn" : /^97804862731.*/
},
"keyPattern" : {
"isbn" : 1
},
"indexName" : "isbn_1",
"isMultiKey" : false,
"isUnique" : true,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"isbn" : [
"[\"97804862731\", \"97804862732\")",
"[/^97804862731.*/, /^97804862731.*/]"
]
}
}
}
]
},
"serverInfo" : {
"host" : "Ubuntu-1604-xenial-64-minimal",
"port" : 27017,
"version" : "3.2.11",
"gitVersion" : "009580ad490190ba33d1c6253ebd8d91808923e4"
},
"ok" : 1.0
}
Solution
My solution - I do not know why - but is to use $and and $ne instead of $nin.
My query looks like this now:
db.getCollection('books').find({isbn:{$regex: '^97804862731.*'}, $and: [
{
_id: {
$ne: 'vGXejKQH5kw8Kfutk'
}
},
{
_id: {
$ne: 'another-id'
}
}
]})
and just takes around 3ms
Maybe someone can explain how this can happen ?
The explain() of this query
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "***.books",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"isbn" : /^97804862731.*/
},
{
"$not" : {
"_id" : {
"$eq" : "vGXejKQH5kw8Kfutk"
}
}
},
{
"$not" : {
"_id" : {
"$eq" : "another-id"
}
}
}
]
},
"winningPlan" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"filter" : {
"isbn" : /^97804862731.*/
},
"keyPattern" : {
"isbn" : 1.0,
"_id" : 1.0
},
"indexName" : "isbn_1__id_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"isbn" : [
"[\"97804862731\", \"97804862732\")",
"[/^97804862731.*/, /^97804862731.*/]"
],
"_id" : [
"[MinKey, \"another-id\")",
"(\"another-id\", \"vGXejKQH5kw8Kfutk\")",
"(\"vGXejKQH5kw8Kfutk\", MaxKey]"
]
}
}
},
"rejectedPlans" : [
{
"stage" : "FETCH",
"filter" : {
"isbn" : /^97804862731.*/
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"_id" : 1
},
"indexName" : "_id_",
"isMultiKey" : false,
"isUnique" : true,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"_id" : [
"[MinKey, \"another-id\")",
"(\"another-id\", \"vGXejKQH5kw8Kfutk\")",
"(\"vGXejKQH5kw8Kfutk\", MaxKey]"
]
}
}
},
{
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"$not" : {
"_id" : {
"$eq" : "vGXejKQH5kw8Kfutk"
}
}
},
{
"$not" : {
"_id" : {
"$eq" : "another-id"
}
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"filter" : {
"isbn" : /^97804862731.*/
},
"keyPattern" : {
"isbn" : 1
},
"indexName" : "isbn_1",
"isMultiKey" : false,
"isUnique" : true,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"isbn" : [
"[\"97804862731\", \"97804862732\")",
"[/^97804862731.*/, /^97804862731.*/]"
]
}
}
}
]
},
"serverInfo" : {
"host" : "Ubuntu-1604-xenial-64-minimal",
"port" : 27017,
"version" : "3.2.11",
"gitVersion" : "009580ad490190ba33d1c6253ebd8d91808923e4"
},
"ok" : 1.0
}