Sync mongodb database in aws 3 instance with each other - mongodb

We are setting replica "rs0" in mongodb but show like this
We tried from last many days but not got proper solutions.
Replica configurations
rs0 [direct: primary] test> rs.config()
{
_id: 'rs0',
version: 17,
term: 66,
members: [
{
_id: 0,
host: 'ip-172-31-35-197.ec2.internal:27017',
arbiterOnly: false,
buildIndexes: true,
hidden: false,
priority: 1,
tags: {},
secondaryDelaySecs: Long("0"),
votes: 1
},
{
_id: 1,
host: '172.31.40.27:27017',
arbiterOnly: false,
buildIndexes: true,
hidden: false,
priority: 2,
tags: {},
secondaryDelaySecs: Long("0"),
votes: 1
},
{
_id: 2,
host: '172.31.38.235:27017',
arbiterOnly: false,
buildIndexes: true,
hidden: false,
priority: 3,
tags: {},
secondaryDelaySecs: Long("0"),
votes: 1
}
],
protocolVersion: Long("1"),
writeConcernMajorityJournalDefault: true,
settings: {
chainingAllowed: true,
heartbeatIntervalMillis: 2000,
heartbeatTimeoutSecs: 10,
electionTimeoutMillis: 10000,
catchUpTimeoutMillis: -1,
catchUpTakeoverDelayMillis: 30000,
getLastErrorModes: {},
getLastErrorDefaults: { w: 1, wtimeout: 0 },
replicaSetId: ObjectId("63ce58d964a80e070f6c2ee1")
}
}
Replica status
rs0 [direct: primary] test> rs.status()
{
set: 'rs0',
date: ISODate("2023-02-10T07:58:37.751Z"),
myState: 1,
term: Long("66"),
syncSourceHost: '',
syncSourceId: -1,
heartbeatIntervalMillis: Long("2000"),
majorityVoteCount: 1,
writeMajorityCount: 1,
votingMembersCount: 1,
writableVotingMembersCount: 1,
optimes: {
lastCommittedOpTime: { ts: Timestamp({ t: 1676015917, i: 40 }), t: Long("66") },
lastCommittedWallTime: ISODate("2023-02-10T07:58:37.695Z"),
readConcernMajorityOpTime: { ts: Timestamp({ t: 1676015917, i: 40 }), t: Long("66") },
appliedOpTime: { ts: Timestamp({ t: 1676015917, i: 40 }), t: Long("66") },
durableOpTime: { ts: Timestamp({ t: 1676015917, i: 40 }), t: Long("66") },
lastAppliedWallTime: ISODate("2023-02-10T07:58:37.695Z"),
lastDurableWallTime: ISODate("2023-02-10T07:58:37.695Z")
},
lastStableRecoveryTimestamp: Timestamp({ t: 1676015890, i: 35 }),
electionCandidateMetrics: {
lastElectionReason: 'electionTimeout',
lastElectionDate: ISODate("2023-02-09T06:40:25.035Z"),
electionTerm: Long("66"),
lastCommittedOpTimeAtElection: { ts: Timestamp({ t: 0, i: 0 }), t: Long("-1") },
lastSeenOpTimeAtElection: { ts: Timestamp({ t: 1675924812, i: 15 }), t: Long("65") },
numVotesNeeded: 1,
priorityAtElection: 1,
electionTimeoutMillis: Long("10000"),
numCatchUpOps: Long("0"),
newTermStartDate: ISODate("2023-02-09T06:40:25.038Z"),
wMajorityWriteAvailabilityDate: ISODate("2023-02-09T06:40:25.041Z")
},
members: [
{
_id: 0,
name: 'ip-172-31-35-197.ec2.internal:27017',
health: 1,
state: 1,
stateStr: 'PRIMARY',
uptime: 91105,
optime: { ts: Timestamp({ t: 1676015917, i: 40 }), t: Long("66") },
optimeDate: ISODate("2023-02-10T07:58:37.000Z"),
lastAppliedWallTime: ISODate("2023-02-10T07:58:37.695Z"),
lastDurableWallTime: ISODate("2023-02-10T07:58:37.695Z"),
syncSourceHost: '',
syncSourceId: -1,
infoMessage: '',
electionTime: Timestamp({ t: 1675924825, i: 1 }),
electionDate: ISODate("2023-02-09T06:40:25.000Z"),
configVersion: 17,
configTerm: 66,
self: true,
lastHeartbeatMessage: ''
},
{
_id: 1,
name: '172.31.40.27:27017',
health: 0,
state: 8,
stateStr: '(not reachable/healthy)',
uptime: 0,
optime: { ts: Timestamp({ t: 0, i: 0 }), t: Long("-1") },
optimeDurable: { ts: Timestamp({ t: 0, i: 0 }), t: Long("-1") },
optimeDate: ISODate("1970-01-01T00:00:00.000Z"),
optimeDurableDate: ISODate("1970-01-01T00:00:00.000Z"),
lastAppliedWallTime: ISODate("1970-01-01T00:00:00.000Z"),
lastDurableWallTime: ISODate("1970-01-01T00:00:00.000Z"),
lastHeartbeat: ISODate("2023-02-10T07:58:36.506Z"),
lastHeartbeatRecv: ISODate("1970-01-01T00:00:00.000Z"),
pingMs: Long("0"),
lastHeartbeatMessage: "replica set IDs do not match, ours: 63ce58d964a80e070f6c2ee1; remote node's: 63cfe6dc919add7e21a86867",
syncSourceHost: '',
syncSourceId: -1,
infoMessage: '',
configVersion: -1,
configTerm: -1
},
{
_id: 2,
name: '172.31.38.235:27017',
health: 0,
state: 8,
stateStr: '(not reachable/healthy)',
uptime: 0,
optime: { ts: Timestamp({ t: 0, i: 0 }), t: Long("-1") },
optimeDurable: { ts: Timestamp({ t: 0, i: 0 }), t: Long("-1") },
optimeDate: ISODate("1970-01-01T00:00:00.000Z"),
optimeDurableDate: ISODate("1970-01-01T00:00:00.000Z"),
lastAppliedWallTime: ISODate("1970-01-01T00:00:00.000Z"),
lastDurableWallTime: ISODate("1970-01-01T00:00:00.000Z"),
lastHeartbeat: ISODate("2023-02-10T07:58:36.502Z"),
lastHeartbeatRecv: ISODate("1970-01-01T00:00:00.000Z"),
pingMs: Long("0"),
lastHeartbeatMessage: "replica set IDs do not match, ours: 63ce58d964a80e070f6c2ee1; remote node's: 63cfe728bc3c837fff6957a5",
syncSourceHost: '',
syncSourceId: -1,
infoMessage: '',
configVersion: -1,
configTerm: -1
}
],
ok: 1,
'$clusterTime': {
clusterTime: Timestamp({ t: 1676015917, i: 40 }),
signature: {
hash: Binary(Buffer.from("0000000000000000000000000000000000000000", "hex"), 0),
keyId: Long("0")
}
},
operationTime: Timestamp({ t: 1676015917, i: 40 })
}
If we start 3 instances each other then service get 404 error in service because of database not synced with all instances.
Please help to solve this problem in aws instance mongodb database sync on 3 instances each other?

Related

How to create this tsvector generated always as column with sequelize?

I see that sequelize has DataTypes.TSVECTOR for postgres dialect.
I have a column whose definition in raw SQL is as follows
tsvector GENERATED ALWAYS AS (((
setweight(to_tsvector('english'::regconfig, (COALESCE(title, ''::character varying))::text), 'A'::"char") ||
setweight(to_tsvector('english'::regconfig, COALESCE(summary, ''::text)), 'B'::"char")) ||
setweight(to_tsvector('english'::regconfig, (COALESCE(content, ''::character varying))::text), 'C'::"char")))
STORED
How can I define this in my sequelize model
const FeedItem = sequelize.define(
'FeedItem', {
feedItemId: {
type: DataTypes.UUID,
primaryKey: true,
allowNull: false,
defaultValue: DataTypes.UUIDV4,
},
pubdate: {
type: DataTypes.DATE,
allowNull: false,
defaultValue: sequelize.literal('CURRENT_TIMESTAMP'),
validate: {
isDate: true,
},
},
link: {
type: DataTypes.STRING,
allowNull: false,
validate: {
len: [0, 2047],
},
},
guid: {
type: DataTypes.STRING,
validate: {
len: [0, 2047],
},
},
title: {
type: DataTypes.TEXT,
allowNull: false,
validate: {
len: [0, 65535],
},
},
summary: {
type: DataTypes.TEXT,
validate: {
len: [0, 65535],
},
},
content: {
type: DataTypes.TEXT,
validate: {
len: [0, 1048575],
},
},
author: {
type: DataTypes.STRING,
validate: {
len: [0, 63],
},
},
tags: {
type: DataTypes.ARRAY(DataTypes.STRING),
defaultValue: [],
},
// How to do that generated always part here???
searchable: {
type: DataTypes.TSVECTOR
},
}, {
timestamps: false,
underscored: true,
indexes: [
{
name: 'idx_feed_items_searchable',
fields: ['searchable'],
using: 'gin',
},
],
}
);
The model needs to be modified as follows to get this working
const FeedItem = sequelize.define(
'FeedItem',
{
feedItemId: {
type: DataTypes.UUID,
primaryKey: true,
allowNull: false,
defaultValue: DataTypes.UUIDV4,
},
pubdate: {
type: DataTypes.DATE,
allowNull: false,
defaultValue: sequelize.literal('CURRENT_TIMESTAMP'),
validate: {
isDate: true,
},
},
link: {
type: DataTypes.STRING,
allowNull: false,
validate: {
len: [0, 2047],
},
},
guid: {
type: DataTypes.STRING,
validate: {
len: [0, 2047],
},
},
title: {
type: DataTypes.TEXT,
allowNull: false,
validate: {
len: [0, 65535],
},
},
summary: {
type: DataTypes.TEXT,
validate: {
len: [0, 65535],
},
},
content: {
type: DataTypes.TEXT,
validate: {
len: [0, 1048575],
},
},
author: {
type: DataTypes.STRING,
validate: {
len: [0, 63],
},
},
tags: {
type: DataTypes.ARRAY(DataTypes.STRING),
defaultValue: [],
},
// https://stackoverflow.com/questions/67051281/use-postgres-generated-columns-in-sequelize-model
searchable: {
type: `tsvector GENERATED ALWAYS AS (((setweight(to_tsvector('english'::regconfig, (COALESCE(title, ''::character varying))::text), 'A'::"char") || setweight(to_tsvector('english'::regconfig, COALESCE(summary, ''::text)), 'B'::"char")) || setweight(to_tsvector('english'::regconfig, (COALESCE(content, ''::character varying))::text), 'C'::"char"))) STORED`,
set() {
throw new Error('generatedValue is read-only');
},
},
},
{
timestamps: false,
underscored: true,
indexes: [
{
name: 'idx_feed_items_pubdate_feed_item_id_desc',
fields: [
{ attribute: 'pubdate', order: 'DESC' },
{ attribute: 'feed_item_id', order: 'DESC' },
],
},
{
name: 'idx_feed_items_tags',
fields: ['tags'],
using: 'gin',
},
{
name: 'idx_feed_items_searchable',
fields: ['searchable'],
using: 'gin',
},
],
}
);
Does not work with sequelize.sync({alter: true}) you have to force:true or sequelize migrations

How to make MongoDB use indexes?

I am using a copy of the listingsAndReviews collection from the sample_airbnb sample database.
I have added an index on the address.country field, thus when performing
db.listingsAndReviews.getIndexes ()
I get
[
{ v: 2, key: { _id: 1 }, name: '_id_' },
{
v: 2,
key: { 'address.country': 1 },
name: 'address_country',
background: false,
collation: {
locale: 'en_US',
caseLevel: false,
caseFirst: 'off',
strength: 1,
numericOrdering: false,
alternate: 'non-ignorable',
maxVariable: 'punct',
normalization: false,
backwards: false,
version: '57.1'
}
}
]
Now, I am trying to use the following aggregate:
db.listingsAndReviews.aggregate( [{ $sort: { "address.country": 1 } },
{ $project: { "address.country": 1, _id: 0 } },
{ $group: { _id: "$address.country", count: { $sum: 1 } } }],
{ cursor: { batchSize: 32 }, allowDiskUse: false })
Using explain("executionStats") I see that my index is not used!
{
stages: [
{
'$cursor': {
queryPlanner: {
plannerVersion: 1,
namespace: 'mytests.listingsAndReviews',
indexFilterSet: false,
parsedQuery: {},
queryHash: '6E8AB948',
planCacheKey: '6E8AB948',
winningPlan: {
stage: 'SORT',
sortPattern: { 'address.country': 1 },
memLimit: 104857600,
type: 'simple',
inputStage: {
stage: 'PROJECTION_DEFAULT',
transformBy: { address: { country: true }, _id: false },
inputStage: { stage: 'COLLSCAN', direction: 'forward' }
}
},
rejectedPlans: []
},
executionStats: {
executionSuccess: true,
nReturned: 5555,
executionTimeMillis: 17,
totalKeysExamined: 0,
totalDocsExamined: 5555,
executionStages: {
stage: 'SORT',
nReturned: 5555,
executionTimeMillisEstimate: 1,
works: 11113,
advanced: 5555,
needTime: 5557,
needYield: 0,
saveState: 12,
restoreState: 12,
isEOF: 1,
sortPattern: { 'address.country': 1 },
memLimit: 104857600,
type: 'simple',
totalDataSizeSorted: 318167,
usedDisk: false,
inputStage: {
stage: 'PROJECTION_DEFAULT',
nReturned: 5555,
executionTimeMillisEstimate: 1,
works: 5557,
advanced: 5555,
needTime: 1,
needYield: 0,
saveState: 12,
restoreState: 12,
isEOF: 1,
transformBy: { address: { country: true }, _id: false },
inputStage: {
stage: 'COLLSCAN',
nReturned: 5555,
executionTimeMillisEstimate: 0,
works: 5557,
advanced: 5555,
needTime: 1,
needYield: 0,
saveState: 12,
restoreState: 12,
isEOF: 1,
direction: 'forward',
docsExamined: 5555
}
}
}
}
},
nReturned: Long("5555"),
executionTimeMillisEstimate: Long("15")
},
{
'$group': { _id: '$address.country', count: { '$sum': { '$const': 1 } } },
nReturned: Long("9"),
executionTimeMillisEstimate: Long("15")
}
],
serverInfo: {
host: 'ldp-2103',
port: 27017,
version: '4.4.6',
gitVersion: '72e66213c2c3eab37d9358d5e78ad7f5c1d0d0d7'
},
ok: 1
}
Why the index is not used?
I've tried the same query using mongodb-compass and it clearly shows
Index Keys Examined:0

MongoDB – Slow text search query with multiple words on a large dataset

I have a M10 Atlas instance.
The issue happens in a DB with 2.5m of documents, but only 900k of { status: "active" } ones.
The search is quick when no sorting/projection is performed, but in order to show the most accurate results first, it needs to be done otherwise the results are a mess.
For some reason this text search is being very slow for medium/long phrases and I have no idea how to speed it up.
Having the following document:
{
_id: "FOO123",
track_date: { $date: "2019-03-09T05:49:22.000Z" },
category_id: "foo",
parent_category_id: "foo",
title: "Disco Ssd Solido Kingston 480g Macbook Pro Air iMac Martinez",
status: "active",
site_id: "foo",
seller_id: 9999,
price: 9999,
permalink:
"https://example.com",
secure_thumbnail:
"https://example.com/img.jpg",
images: [
"https://example.com/img.jpg",
],
fluctuation: 80,
last_update: { $date: "2020-12-11T14:00:24.715Z" },
original_price: null,
}
Performing the following query:
db.articles.aggregate([
{
"$match": {
"status": "active",
"site_id": "foo",
"$text": {
"$search": "Disco Ssd solido kingston 480g macbook pro air",
"$caseSensitive": false
}
}
},
{
"$sort": {
"score": {
"$meta": "textScore"
}
}
},
{
"$skip": 0
},
{
"$limit": 10
}
])
Takes around 35s to yield results.
These are the indexes that I have set up for the active and title fields (I also have compound indexes that mix these two and a few more properties, I can show them here if needed)
{
v: 2,
key: { _fts: "text", _ftsx: 1 },
name: "title_text",
language_override: "language",
weights: { title: 1 },
default_language: "english",
ns: "foo.articles",
textIndexVersion: 3,
},
{
v: 2,
key: { status: 1 },
name: "status_1",
ns: "foo.articles",
}
This is the result of explain("executionStats")
Query (as aggregation did not work with "executionStats":
collection.find(
{
status: "active",
site_id: "foo",
$text: {
$search: "Disco Ssd solido kingston 480g macbook pro air",
$caseSensitive: false,
},
},
{ score: { $meta: "textScore" } }
)
.sort({ score: { $meta: "textScore" } })
.skip(0)
.limit(10)
.explain("executionStats")
Explain with execution stats:
{
queryPlanner: {
plannerVersion: 1,
namespace: "foo.articles",
indexFilterSet: false,
parsedQuery: {
$and: [
{ site_id: { $eq: "foo" } },
{ status: { $eq: "active" } },
{
$text: {
$search: "Disco Ssd solido kingston 480g macbook pro air",
$language: "english",
$caseSensitive: false,
$diacriticSensitive: false,
},
},
],
},
winningPlan: {
stage: "PROJECTION_DEFAULT",
transformBy: { score: { $meta: "textScore" } },
inputStage: {
stage: "SORT",
sortPattern: { score: { $meta: "textScore" } },
limitAmount: 10,
inputStage: {
stage: "SORT_KEY_GENERATOR",
inputStage: {
stage: "FETCH",
filter: {
$and: [
{ site_id: { $eq: "foo" } },
{ status: { $eq: "active" } },
],
},
inputStage: {
stage: "TEXT",
indexPrefix: {},
indexName: "title_text",
parsedTextQuery: {
terms: [
"480g",
"air",
"disco",
"kingston",
"macbook",
"pro",
"solido",
"ssd",
],
negatedTerms: [],
phrases: [],
negatedPhrases: [],
},
textIndexVersion: 3,
inputStage: {
stage: "TEXT_MATCH",
inputStage: {
stage: "TEXT_OR",
inputStages: [
{
stage: "IXSCAN",
keyPattern: { _fts: "text", _ftsx: 1 },
indexName: "title_text",
isMultiKey: true,
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: "backward",
indexBounds: {},
},
{
stage: "IXSCAN",
keyPattern: { _fts: "text", _ftsx: 1 },
indexName: "title_text",
isMultiKey: true,
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: "backward",
indexBounds: {},
},
{
stage: "IXSCAN",
keyPattern: { _fts: "text", _ftsx: 1 },
indexName: "title_text",
isMultiKey: true,
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: "backward",
indexBounds: {},
},
{
stage: "IXSCAN",
keyPattern: { _fts: "text", _ftsx: 1 },
indexName: "title_text",
isMultiKey: true,
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: "backward",
indexBounds: {},
},
{
stage: "IXSCAN",
keyPattern: { _fts: "text", _ftsx: 1 },
indexName: "title_text",
isMultiKey: true,
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: "backward",
indexBounds: {},
},
{
stage: "IXSCAN",
keyPattern: { _fts: "text", _ftsx: 1 },
indexName: "title_text",
isMultiKey: true,
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: "backward",
indexBounds: {},
},
{
stage: "IXSCAN",
keyPattern: { _fts: "text", _ftsx: 1 },
indexName: "title_text",
isMultiKey: true,
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: "backward",
indexBounds: {},
},
{
stage: "IXSCAN",
keyPattern: { _fts: "text", _ftsx: 1 },
indexName: "title_text",
isMultiKey: true,
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: "backward",
indexBounds: {},
},
],
},
},
},
},
},
},
},
rejectedPlans: [],
},
executionStats: {
executionSuccess: true,
nReturned: 10,
executionTimeMillis: 30458,
totalKeysExamined: 169654,
totalDocsExamined: 279454,
executionStages: {
stage: "PROJECTION_DEFAULT",
nReturned: 10,
executionTimeMillisEstimate: 27307,
works: 309403,
advanced: 10,
needTime: 309392,
needYield: 0,
saveState: 3211,
restoreState: 3211,
isEOF: 1,
transformBy: { score: { $meta: "textScore" } },
inputStage: {
stage: "SORT",
nReturned: 10,
executionTimeMillisEstimate: 27299,
works: 309403,
advanced: 10,
needTime: 309392,
needYield: 0,
saveState: 3211,
restoreState: 3211,
isEOF: 1,
sortPattern: { score: { $meta: "textScore" } },
memUsage: 9275,
memLimit: 33554432,
limitAmount: 10,
inputStage: {
stage: "SORT_KEY_GENERATOR",
nReturned: 31606,
executionTimeMillisEstimate: 27262,
works: 309392,
advanced: 31606,
needTime: 277785,
needYield: 0,
saveState: 3211,
restoreState: 3211,
isEOF: 1,
inputStage: {
stage: "FETCH",
filter: {
$and: [
{ site_id: { $eq: "foo" } },
{ status: { $eq: "active" } },
],
},
nReturned: 31606,
executionTimeMillisEstimate: 27251,
works: 309391,
advanced: 31606,
needTime: 277784,
needYield: 0,
saveState: 3211,
restoreState: 3211,
isEOF: 1,
docsExamined: 139727,
alreadyHasObj: 139727,
inputStage: {
stage: "TEXT",
nReturned: 139727,
executionTimeMillisEstimate: 27191,
works: 309391,
advanced: 139727,
needTime: 169663,
needYield: 0,
saveState: 3211,
restoreState: 3211,
isEOF: 1,
indexPrefix: {},
indexName: "title_text",
parsedTextQuery: {
terms: [
"480g",
"air",
"disco",
"kingston",
"macbook",
"pro",
"solido",
"ssd",
],
negatedTerms: [],
phrases: [],
negatedPhrases: [],
},
textIndexVersion: 3,
inputStage: {
stage: "TEXT_MATCH",
nReturned: 139727,
executionTimeMillisEstimate: 27191,
works: 309391,
advanced: 139727,
needTime: 169663,
needYield: 0,
saveState: 3211,
restoreState: 3211,
isEOF: 1,
docsRejected: 0,
inputStage: {
stage: "TEXT_OR",
nReturned: 139727,
executionTimeMillisEstimate: 27191,
works: 309391,
advanced: 139727,
needTime: 169663,
needYield: 0,
saveState: 3211,
restoreState: 3211,
isEOF: 1,
docsExamined: 139727,
inputStages: [
{
stage: "IXSCAN",
nReturned: 291,
executionTimeMillisEstimate: 0,
works: 292,
advanced: 291,
needTime: 0,
needYield: 0,
saveState: 3211,
restoreState: 3211,
isEOF: 1,
keyPattern: { _fts: "text", _ftsx: 1 },
indexName: "title_text",
isMultiKey: true,
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: "backward",
indexBounds: {},
keysExamined: 291,
seeks: 1,
dupsTested: 291,
dupsDropped: 0,
},
{
stage: "IXSCAN",
nReturned: 26973,
executionTimeMillisEstimate: 121,
works: 26974,
advanced: 26973,
needTime: 0,
needYield: 0,
saveState: 3211,
restoreState: 3211,
isEOF: 1,
keyPattern: { _fts: "text", _ftsx: 1 },
indexName: "title_text",
isMultiKey: true,
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: "backward",
indexBounds: {},
keysExamined: 26973,
seeks: 1,
dupsTested: 26973,
dupsDropped: 0,
},
{
stage: "IXSCAN",
nReturned: 29687,
executionTimeMillisEstimate: 69,
works: 29688,
advanced: 29687,
needTime: 0,
needYield: 0,
saveState: 3211,
restoreState: 3211,
isEOF: 1,
keyPattern: { _fts: "text", _ftsx: 1 },
indexName: "title_text",
isMultiKey: true,
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: "backward",
indexBounds: {},
keysExamined: 29687,
seeks: 1,
dupsTested: 29687,
dupsDropped: 0,
},
{
stage: "IXSCAN",
nReturned: 13595,
executionTimeMillisEstimate: 28,
works: 13596,
advanced: 13595,
needTime: 0,
needYield: 0,
saveState: 3211,
restoreState: 3211,
isEOF: 1,
keyPattern: { _fts: "text", _ftsx: 1 },
indexName: "title_text",
isMultiKey: true,
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: "backward",
indexBounds: {},
keysExamined: 13595,
seeks: 1,
dupsTested: 13595,
dupsDropped: 0,
},
{
stage: "IXSCAN",
nReturned: 10730,
executionTimeMillisEstimate: 28,
works: 10731,
advanced: 10730,
needTime: 0,
needYield: 0,
saveState: 3211,
restoreState: 3211,
isEOF: 1,
keyPattern: { _fts: "text", _ftsx: 1 },
indexName: "title_text",
isMultiKey: true,
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: "backward",
indexBounds: {},
keysExamined: 10730,
seeks: 1,
dupsTested: 10730,
dupsDropped: 0,
},
{
stage: "IXSCAN",
nReturned: 53358,
executionTimeMillisEstimate: 248,
works: 53359,
advanced: 53358,
needTime: 0,
needYield: 0,
saveState: 3211,
restoreState: 3211,
isEOF: 1,
keyPattern: { _fts: "text", _ftsx: 1 },
indexName: "title_text",
isMultiKey: true,
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: "backward",
indexBounds: {},
keysExamined: 53358,
seeks: 1,
dupsTested: 53358,
dupsDropped: 0,
},
{
stage: "IXSCAN",
nReturned: 7847,
executionTimeMillisEstimate: 13,
works: 7848,
advanced: 7847,
needTime: 0,
needYield: 0,
saveState: 3211,
restoreState: 3211,
isEOF: 1,
keyPattern: { _fts: "text", _ftsx: 1 },
indexName: "title_text",
isMultiKey: true,
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: "backward",
indexBounds: {},
keysExamined: 7847,
seeks: 1,
dupsTested: 7847,
dupsDropped: 0,
},
{
stage: "IXSCAN",
nReturned: 27173,
executionTimeMillisEstimate: 17,
works: 27174,
advanced: 27173,
needTime: 0,
needYield: 0,
saveState: 3211,
restoreState: 3211,
isEOF: 1,
keyPattern: { _fts: "text", _ftsx: 1 },
indexName: "title_text",
isMultiKey: true,
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: "backward",
indexBounds: {},
keysExamined: 27173,
seeks: 1,
dupsTested: 27173,
dupsDropped: 0,
},
],
},
},
},
},
},
},
},
},
serverInfo: {
host: "host",
port: "port",
version: "4.2.11",
gitVersion: "ea38428f0c6742c7c2c7f677e73d79e17a2aab96",
},
ok: 1,
$clusterTime: {
clusterTime: Timestamp(1, 1609902178),
signature: {
hash: BinData(0, "cunb+7FEjXmbwN22uz3F+nV0LKE="),
keyId: NumberLong("6911938552179720195"),
},
},
operationTime: Timestamp(1, 1609902178),
}
The issue is not related to the status field, as I've tried removing it and the issue persists.
I don't really mind a query that's a bit slow, but more than half a minute is too much for a search.
At first I was doing an AND query for the text search instead of an OR, but it was even slower for long phrases, so decided to give the score a try.
I've also tried removing the sort pipeline, but it still takes 30s~
I'll take any tip anyone can give me at this point.
EDIT: According to the explain("executionStats") bit, it seems that the sort/projection of score is the time sink, but I've no idea how to improve that in order to get the best matches first
Mongodb Atlas specific feature
Since you are using Atlas you may find https://docs.atlas.mongodb.com/atlas-search a better alternative. It uses Lucene engine similar to Elasticsearch, Solr, etc, instead of rather limited native text index which is available on self-hosted versions of the database.
Atlas search is way more flexible and in most cases faster than mongo text index. The downside is a noticeable lag (like very eventual consistency), and higher demand on disk space (especially for ngram tokenizers) but these are not critical in your case.
The search is available on clusters of v4.2+:
Give it a try, it's included in your subscription.
The highlighted parts of the question must be confusing and are deserved some explanations:
The issue happens in a DB with 2.5m of documents, but only 900k of { status: "active" } ones.
It doesn't really matter how many active documents there are. There is no index intersection so the only text index is used. Filter by status and site_id is done only on the FETCH stage. In other words you search all 2.5m documents regardless of other filters.
The search is quick when no sorting/projection is performed
$sort is a blocking stage, means it requires previous stage to complete. You have $skip-$limit stages to return 10 documents only. Without $sort stage the pipeline returns 10 documents that were found first. With $sort stage the pipeline waits for all documents to be searched to find 10 with highest score.
For some reason this text search is being very slow for medium/long phrases
The reason is obvious - the more words in the phrase the more searches mongo need to do. It's one search per word, then calculation of the score based on the results of all searches.
Memory shouldn't be an issue for the sorting stage. The limit is 100 MB, then it starts using disk regardless of how much RAM you have.
Lack of memory might be a problem if all indexes and the working set don't fit into RAM. If it is the case it should be highlighted in the "Performance Advisor" tab.
db.collection.totalIndexSize() can also give you some idea of how much RAM you would need for optimal performance. It is not specific to the text index and affects performance of the whole cluster.

mongo aggregate how to choose ‘$graphLookup or $lookup’

I don't know how to choose ‘$graphLookup or $lookup’ Other similar
I am looking forward to mongodb officially more complete documentation
example:
{parentId: 0, cid: 1, other: 'a'},
{parentId: 0, cid: 2, other: 'b'},
{parentId: 0, cid: 3, other: 'c'},
{parentId: 1, cid: 11, other: 'aa'},
{parentId: 2, cid: 12, other: 'ab'},
{parentId: 3, cid: 13, other: 'ac'},
result:
{
parentId: 0, cid: 1, other: 'a',
children: [
{parentId: 1, cid: 11, other: 'aa'},
]
},{
parentId: 0, cid: 2, other: 'b',
children: [
{parentId: 2, cid: 12, other: 'ab'},
]
},{
parentId: 0, cid: 3, other: 'c',
children: [
{parentId: 3, cid: 13, other: 'ac'},
]
}
},
how do?
You need to use $graphLookup
db.collection.aggregate([
{
$match: {
"parentId": {
$eq: 0
}
}
},
{
$graphLookup: {
from: "collection",
startWith: "$cid",
connectFromField: "cid",
connectToField: "parentId",
as: "children"
}
}
])
$lookup is used to “joined” collection for processing.

mongodb runs slow on IXSCAN

i cannot figure out with some requests in mongodb, i created an index which match all fields from request but it didn't help
is the problem in index ? which fields i should include in index?
here is a part of slowlog
query: {
$query: {
moderation.blocked: false,
project_id: "9fd6db37-049f-4af3-bea4-8301345dc109",
dates.posted: { $gt: 1458720426.177019 },
parent_ids: "31cd1be3-adbd-4108-a05e-58c283176738",
moderation.visible: true },
$orderby: { dates.posted: 1 }
}
planSummary: IXSCAN {
parent_ids: 1,
dates.posted: 1,
owner_id: 1,
project_id: 1,
moderation.blocked: 1,
moderation.visible: -1
}
ntoreturn:0
ntoskip:0
nscanned:0
nscannedObjects:0
keyUpdates:0
writeConflicts:0
numYields:1
nreturned:0
reslen:20
locks:{ Global: { acquireCount: { r: 4 } }, Database: { acquireCount: { r: 2 } }, Collection: { acquireCount: { r: 2 } } } 235ms
explain() for same query
{
'queryPlanner': {
'plannerVersion': 1,
'parsedQuery': {
'$and': [{
'moderation.blocked': {
'$eq': False
}
}, {
'moderation.visible': {
'$eq': True
}
}, {
'parent_ids': {
'$eq': '31cd1be3-adbd-4108-a05e-58c283176738'
}
}, {
'project_id': {
'$eq': '9fd6db37-049f-4af3-bea4-8301345dc109'
}
}, {
'dates.posted': {
'$gt': 1458720426.177019
}
}]
},
'namespace': 'comments.comments',
'rejectedPlans': [],
'winningPlan': {
'filter': {
'moderation.visible': {
'$eq': True
}
},
'stage': 'FETCH',
'inputStage': {
'indexName': 'parent_ids_1_dates.posted_1_owner_id_1_project_id_1_moderation.blocked_1_moderation.visible_-1',
'keyPattern': {
'parent_ids': 1,
'moderation.blocked': 1,
'owner_id': 1,
'moderation.visible': -1,
'dates.posted': 1,
'project_id': 1
},
'isMultiKey': True,
'indexBounds': {
'parent_ids': ['["31cd1be3-adbd-4108-a05e-58c283176738", "31cd1be3-adbd-4108-a05e-58c283176738"]'],
'moderation.blocked': ['[false, false]'],
'owner_id': ['[MinKey, MaxKey]'],
'moderation.visible': ['[MaxKey, MinKey]'],
'dates.posted': ['(1458720426.177019, inf.0]'],
'project_id': ['["9fd6db37-049f-4af3-bea4-8301345dc109", "9fd6db37-049f-4af3-bea4-8301345dc109"]']
},
'stage': 'IXSCAN',
'direction': 'forward'
}
},
'indexFilterSet': False
},
'executionStats': {
'totalDocsExamined': 0,
'allPlansExecution': [],
'totalKeysExamined': 0,
'executionSuccess': True,
'nReturned': 0,
'executionStages': {
'invalidates': 0,
'filter': {
'moderation.visible': {
'$eq': True
}
},
'isEOF': 1,
'needTime': 0,
'alreadyHasObj': 0,
'advanced': 0,
'inputStage': {
'invalidates': 0,
'isEOF': 1,
'needTime': 0,
'dupsDropped': 0,
'dupsTested': 0,
'keyPattern': {
'parent_ids': 1,
'moderation.blocked': 1,
'owner_id': 1,
'moderation.visible': -1,
'dates.posted': 1,
'project_id': 1
},
'advanced': 0,
'seenInvalidated': 0,
'nReturned': 0,
'indexBounds': {
'parent_ids': ['["31cd1be3-adbd-4108-a05e-58c283176738", "31cd1be3-adbd-4108-a05e-58c283176738"]'],
'moderation.blocked': ['[false, false]'],
'owner_id': ['[MinKey, MaxKey]'],
'moderation.visible': ['[MaxKey, MinKey]'],
'dates.posted': ['(1458720426.177019, inf.0]'],
'project_id': ['["9fd6db37-049f-4af3-bea4-8301345dc109", "9fd6db37-049f-4af3-bea4-8301345dc109"]']
},
'needFetch': 0,
'saveState': 0,
'keysExamined': 0,
'matchTested': 0,
'executionTimeMillisEstimate': 0,
'indexName': 'parent_ids_1_dates.posted_1_owner_id_1_project_id_1_moderation.blocked_1_moderation.visible_-1',
'restoreState': 0,
'works': 1,
'direction': 'forward',
'stage': 'IXSCAN',
'isMultiKey': True
},
'stage': 'FETCH',
'saveState': 0,
'docsExamined': 0,
'nReturned': 0,
'restoreState': 0,
'works': 1,
'executionTimeMillisEstimate': 0,
'needFetch': 0
},
'executionTimeMillis': 0
}