MongoDB not using Index - possible collation issue?
We have a MongoDB Query, fully indexed, that should be scanning at most 4/5 rows. However the query appears to use one only element of the index (the integer) and ignore the string portion.
We are using a case-insensitive collation (strength=2), but it makes no difference if we specify this or not. Documentation: https://docs.mongodb.com/manual/core/index-case-insensitive/
Do collations use indexes? Is there a more efficient way of do we manually need to de-normalise? NOTE: we're not actually using any non-standard characters, the collation is specified purely for case insensitivity.
Version (supports collation):
MongoDB server version: 3.6.13
db.version() => 3.6.13
db.adminCommand( { getParameter: 1, featureCompatibilityVersion: 1 } )
gives:
{
"featureCompatibilityVersion" : {
"version" : "3.6"
},
"ok" : 1,
"operationTime" : Timestamp(1565754388, 51),
"$clusterTime" : {
"clusterTime" : Timestamp(1565754388, 51),
"signature" : {
"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
"keyId" : NumberLong(0)
}
}
}
DB Structure (summarised) is
{
"PrimaryID": (int)XXX,
"aTables": {
"userExt": [
{
"userExtPlatform": (int)XXX,
"userExtID": (string)XXX,
"userExtActive": (int 1 | 0)XXX,
},
{
"userExtPlatform": (int)XXX,
"userExtID": (string)XXX,
"userExtActive": (int 1 | 0)XXX,
},
...
],
"userOtherData": [
{
"otherDataField1": XXX,
"otherDataField2": XXX,
},
...
],
...
}
}
Index is set up as follows (note - collation is specified as {locale:en, strength: 2}:
{
"v" : 2,
"key" : {
"aTables.userExt.userExtPlatform" : 1,
"aTables.userExt.userExtID" : 1
},
"name" : "extPlatform",
"background" : false,
"ns" : "archive.users",
"collation" : {
"locale" : "en",
"caseLevel" : false,
"caseFirst" : "off",
"strength" : 2,
"numericOrdering" : false,
"alternate" : "non-ignorable",
"maxVariable" : "punct",
"normalization" : false,
"backwards" : false,
"version" : "57.1"
}
}
The query (trying with/without specifying collation)
use archive;
db.users.find(
{
"aTables.userExt.userExtPlatform": 4,
"aTables.userExt.userExtID": "AStringValue",
"aTables.userExt.userExtActive": 1,
"deleted": { "$exists": false }
}
)
db.users.find(
{
"aTables.userExt.userExtPlatform": 4,
"aTables.userExt.userExtID": "AStringValue",
"aTables.userExt.userExtActive": 1,
"deleted": { "$exists": false }
}
).collation( { locale: "en", strength: 2 } )
Note: Removing the 'deleted' clause makes no difference to the speed / results / explain.
Here is the explain, and it shows a vast number of keys and documents queried.
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "archive.users",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"aTables.userExt.userExtActive" : {
"$eq" : 1
}
},
{
"aTables.userExt.userExtID" : {
"$eq" : "PrivateStringRemoved"
}
},
{
"aTables.userExt.userExtPlatform" : {
"$eq" : 4
}
},
{
"$nor" : [
{
"deleted" : {
"$exists" : true
}
}
]
}
]
},
"collation" : {
"locale" : "en",
"caseLevel" : false,
"caseFirst" : "off",
"strength" : 2,
"numericOrdering" : false,
"alternate" : "non-ignorable",
"maxVariable" : "punct",
"normalization" : false,
"backwards" : false,
"version" : "57.1"
},
"winningPlan" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"aTables.userExt.userExtActive" : {
"$eq" : 1
}
},
{
"aTables.userExt.userExtID" : {
"$eq" : "PrivateStringRemoved"
}
},
{
"$nor" : [
{
"deleted" : {
"$exists" : true
}
}
]
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"aTables.userExt.userExtPlatform" : 1,
"aTables.userExt.userExtID" : 1
},
"indexName" : "extPlatform",
"collation" : {
"locale" : "en",
"caseLevel" : false,
"caseFirst" : "off",
"strength" : 2,
"numericOrdering" : false,
"alternate" : "non-ignorable",
"maxVariable" : "punct",
"normalization" : false,
"backwards" : false,
"version" : "57.1"
},
"isMultiKey" : true,
"multiKeyPaths" : {
"aTables.userExt.userExtPlatform" : [
"aTables.userExt"
],
"aTables.userExt.userExtID" : [
"aTables.userExt"
]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"aTables.userExt.userExtPlatform" : [
"[4.0, 4.0]"
],
"aTables.userExt.userExtID" : [
"[MinKey, MaxKey]"
]
}
}
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 0,
"executionTimeMillis" : 1304,
"totalKeysExamined" : 290114,
"totalDocsExamined" : 290114,
"executionStages" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"aTables.userExt.userExtActive" : {
"$eq" : 1
}
},
{
"aTables.userExt.userExtID" : {
"$eq" : "PrivateStringRemoved"
}
},
{
"$nor" : [
{
"deleted" : {
"$exists" : true
}
}
]
}
]
},
"nReturned" : 0,
"executionTimeMillisEstimate" : 1245,
"works" : 290115,
"advanced" : 0,
"needTime" : 290114,
"needYield" : 0,
"saveState" : 2267,
"restoreState" : 2267,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 290114,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 290114,
"executionTimeMillisEstimate" : 270,
"works" : 290115,
"advanced" : 290114,
"needTime" : 0,
"needYield" : 0,
"saveState" : 2267,
"restoreState" : 2267,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"aTables.userExt.userExtPlatform" : 1,
"aTables.userExt.userExtID" : 1
},
"indexName" : "extPlatform",
"collation" : {
"locale" : "en",
"caseLevel" : false,
"caseFirst" : "off",
"strength" : 2,
"numericOrdering" : false,
"alternate" : "non-ignorable",
"maxVariable" : "punct",
"normalization" : false,
"backwards" : false,
"version" : "57.1"
},
"isMultiKey" : true,
"multiKeyPaths" : {
"aTables.userExt.userExtPlatform" : [
"aTables.userExt"
],
"aTables.userExt.userExtID" : [
"aTables.userExt"
]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"aTables.userExt.userExtPlatform" : [
"[4.0, 4.0]"
],
"aTables.userExt.userExtID" : [
"[MinKey, MaxKey]"
]
},
"keysExamined" : 290114,
"seeks" : 1,
"dupsTested" : 290114,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
},
"serverInfo" : {
"host" : "api-mdb-archive-03",
"port" : 27017,
"version" : "3.6.13",
"gitVersion" : "db3c76679b7a3d9b443a0e1b3e45ed02b88c539f"
},
"ok" : 1,
"operationTime" : Timestamp(1565753056, 9),
"$clusterTime" : {
"clusterTime" : Timestamp(1565753056, 9),
"signature" : {
"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
"keyId" : NumberLong(0)
}
}
}
The log output also confirms that it's checking a huge number of documents, and that collation comes through.
2019-08-14T03:23:53.912+0000 I COMMAND [conn20679] command archive.users appName: "MongoDB Shell" command: find { find: "users", filter: { aTables.userExt.userExtPlatform: 4.0, aTables.userExt.userExtID: "PrivateStringRemoved", aTables.userExt.userExtActive: 1.0, deleted: { $exists: false } }, collation: { locale: "en", strength: 2.0 }, lsid: { id: UUID("3178aa31-5ee9-4a79-9848-f01c1842f542") }, $clusterTime: { clusterTime: Timestamp(1565753015, 41), signature: { hash: BinData(0, 0000000000000000000000000000000000000000), keyId: 0 } }, $db: "archive" } planSummary: IXSCAN { aTables.userExt.userExtPlatform: 1, aTables.userExt.userExtID: 1 } keysExamined:289966 docsExamined:289966 cursorExhausted:1 numYields:2267 nreturned:0 reslen:228 locks:{ Global: { acquireCount: { r: 4536 } }, Database: { acquireCount: { r: 2268 } }, Collection: { acquireCount: { r: 2268 } } } protocol:op_msg 1546ms
2019-08-14T03:24:16.864+0000 I COMMAND [conn20679] command archive.users appName: "MongoDB Shell" command: explain { explain: { find: "users", filter: { aTables.userExt.userExtPlatform: 4.0, aTables.userExt.userExtID: "PrivateStringRemoved", aTables.userExt.userExtActive: 1.0, deleted: { $exists: false } }, collation: { locale: "en", strength: 2.0 } }, verbosity: "executionStats", lsid: { id: UUID("3178aa31-5ee9-4a79-9848-f01c1842f542") }, $clusterTime: { clusterTime: Timestamp(1565753033, 128), signature: { hash: BinData(0, 0000000000000000000000000000000000000000), keyId: 0 } }, $db: "archive" } numYields:2267 reslen:3578 locks:{ Global: { acquireCount: { r: 4536 } }, Database: { acquireCount: { r: 2268 } }, Collection: { acquireCount: { r: 2268 } } } protocol:op_msg 1341ms
For completeness, this is driven by PHP, but verified in Mongo CMD as above. Here is the PHP:
$aParams = [
'aTables.userExt.userExtID' => 4,
'aTables.userExt.userExtPlatform' => 'PrivateStringRemoved',
'aTables.userExt.userExtActive' => 1,
'deleted': [
'$exists' => false
]
];
$aOptions = [
'readPreference' => new \MongoDB\Driver\ReadPreference(\MongoDB\Driver\ReadPreference::RP_NEAREST),
'skip' => $start,
'limit' => $limit,
'typeMap' => [
'root' => 'array',
'document' => 'array',
'array' => 'array'
],
'collation' => [
'locale' => 'en',
'strength' => 2
],
];
try {
$aResults = $collectionArchive->find($aParams, $aOptions);
} catch (\Exception $exception) {
throw new ArchiverException('Mongo Error', ArchiverRequest::ERROR_MONGO, $exception->getMessage());
}