Atlas Search works too slow when using facet - mongodb

I have a big collection (over 22M records, approx. 25GB) on an M10 cluster with MongoDB version 4.4.10. I set up an Atlas search index on one field (address) and it works pretty fast when I request through the search tester. However, when I try to paginate it by specifying a facet, it gets extremely slow in comparison with the query without the facet. Is there a way to optimize the facet or somehow replace the facet with one that works faster ? Below are the plain query and another one with the facet:
db.getCollection("users").aggregate([{
$search: {
index: 'address',
text: {
query: '7148 BIG WOODS DR',
path: {
'wildcard': '*'
}
}
}
}]);
db.getCollection("users").aggregate([{
$search: {
index: 'address',
text: {
query: '7148 BIG WOODS DR',
path: {
'wildcard': '*'
}
}
}
}, {
$facet: {
paginatedResult: [
{
$limit: 50
},
{
$skip: 0
}
],
totalCount: [
{
$count: 'total'
}
]
}
}]);

The fast and recommend way is using facet with the $searchMeta stage to retrieve metadata results only for the query
"$searchMeta": {
"index":"search_index_with_facet_fields",
"facet":{
"operator":{
"compound":{
"must":[
{
"text":{
"query":"red shirt",
"path":{
"wildcard":"*"
}
}
},
{
"compound":{
"filter":[
{
"text":{
"query":["clothes"],
"path":"category"
}
},
{
"text":{
"query":[
"maroon",
"blackandred",
"blackred",
"crimson",
"burgandy",
"burgundy"
],
"path":"color"
}
}
]
}
}
]
}
},
"facets":{
"brand":{
"type":"string",
"path":"brand"
},
"size":{
"type":"string",
"path":"size"
},
"color":{
"type":"string",
"path":"color"
}
}
}
}
}
Here we are fetching 3 facets brand, size, and color, which we need to be defined in your search_index as Facet fields such as
{
"mappings": {
"dynamic": false,
"fields": {
"category": [
{
"type": "string"
}
],
"brand": [
{
"type": "string"
},
{
"type": "stringFacet"
}
],
"size": [
{
"type": "string"
},
{
"type": "stringFacet"
}
],
"color": [
{
"type": "string"
},
{
"type": "stringFacet"
}
]
}
}
}
category is defined only as string since we are not using it in facets but only as a filter field.
We can also replace filter op with must or should based on our requirement.
Finally, we will get as our result.
*p.s. I am also new to mongo and got to this solution after searching a lot, so please upvote if you find it useful, also let me know if there is any error/improvement you notice. Thanks *

Related

MongoDB Atlas Search not working as expected with Must and Should?

I am using MongoDB Atlas search for full-text search, as I write the query for Must and Should, must always work faster than should I don't know why?
As per my understanding and documentation -
Must work as AND Operator
Should work as OR Operator
MongoDB Schema -
{
"_id": {
"$oid": "63876f3ad75881cafe41a3e9"
},
"articleid": "b89bfa05-70b3-11ed-b775-2c59e5044e7b",
"headline": "Innovative Lessons for Rest of the World",
"subtitle": "",
"fulltext": "While the world wants to indigenize high-tech, weuses simple, local technologies to solve most of the problems.",
"pubdate": "2022-12-01",
"article_type": "print",
"date": 2022-12-01T00:00:00.000+00:00
}
}
Now, whenever I Search with must it works really fast and get results in 1-2 seconds from 2 million records.
[
{
"$search":{
"index":"fulltext",
"compound":{
"filter":[
{
"range":{
"path":"date",
"gte":"2023-01-30T00:00:00.000Z",
"lte":"2023-02-05T00:00:00.000Z"
}
}
],
"must":[
{
"text":{
"query":"indigenize",
"path":[
"headline",
"fulltext",
"subtitle"
]
}
},
{
"text":{
"query":"technologies",
"path":[
"headline",
"fulltext",
"subtitle"
]
}
}
]
}
}
}
]
And when I used Should Operator for search it takes 10X times to search and get the results, I don't understand why?
[
{
"$search":{
"index":"fulltext",
"compound":{
"filter":[
{
"range":{
"path":"date",
"gte":"2023-01-30T00:00:00.000Z",
"lte":"2023-02-05T00:00:00.000Z"
}
}
],
"should":[
{
"text":{
"query":"indigenize",
"path":[
"headline",
"fulltext",
"subtitle"
]
}
},
{
"text":{
"query":"technologies",
"path":[
"headline",
"fulltext",
"subtitle"
]
}
}
]
}
}
}
]
Do I miss anything? My goal is to search with Must (AND) Operator and Should (OR) Operator with multiple words. How will I search efficiently in the above collection schema to get data within seconds?

MongoDB -Query documents where nested array fields is equal to some value

I have a JSON object:
{
"ownershipSheetB": {
"lrUnitShares": [{
"description": "blabla1",
"lrOwners": [{
"lrOwnerId": 35780527,
"name": "somename1",
"address": "someadress1",
"taxNumber": "12345678910"
}
],
"lrUnitShareId": 29181970,
"subSharesAndEntries": [],
"orderNumber": "1"
}, {
"description": "blabla2",
"lrOwners": [{
"lrOwnerId": 35780528,
"name": "somename2",
"address": "someadress2",
"taxNumber": "12345678911"
}
],
"lrUnitShareId": 29181971,
"subSharesAndEntries": [],
"orderNumber": "2"
}
],
"lrEntries": []
}
}
I would like to query all documents that have taxNumber field equal to some string (say "12345678911" from the example above).
I have tried this query:
{"ownershipSheetB.lrUnitShares": { "lrOwners": {"taxNumber": "12345678910"}}}
but it returns no documents.
Solution 1: With dot notation
db.collection.find({
"ownershipSheetB.lrUnitShares.lrOwners.taxNumber": "12345678910"
})
Demo Solution 1 # Mongo Playground
Solution 2: With $elemMatch
db.collection.find({
"ownershipSheetB.lrUnitShares": {
$elemMatch: {
"lrOwners": {
$elemMatch: {
"taxNumber": "12345678910"
}
}
}
}
})
Demo Solution 2 # Mongo Playground

Mongodb Atlas Search with directive insensitive

I am using MongoDB Atlas Search to perform a search in Collection, for this I created a Atlas Search Index:
{
"mappings": {
"dynamic": false,
"fields": {
"caption": {
"type": "string"
}
}
}
}
Here is my aggregation:
[
{
"$search":{
"text":{
"path":"caption",
"query":"Ingocnitáá",
"fuzzy":{
}
},
"highlight":{
"path":"caption"
}
}
}
]
I have below document in my collection:
{caption:"Ct tyu test Ingocnitáá"}
Issue: When I searching Ingocnitaa agreegation returning 0 result.
Is there anything wrong with my Search Index? I want an directive insensitive Search with highlight.
There are two things missing:
Include index name (Recommend to not use a default index, create a new index)
Always pass fuzzy:{}
Here is a working query:
[
{
"$search":{
"index": 'messageText',
"text":{
"path":"caption",
"query":"Ingocnitaa",
"fuzzy":{
}
},
"highlight":{
"path":"caption"
}
}
}
]
Where messageText is search index name.
Search Index Formattion:
{
"mappings": {
"dynamic": false,
"fields": {
"caption": {
"type": "string"
}
}
}
}
Reference: CLick here

MongoDb aggregation project onto collection

I've a problem with a huge MongoDb aggregation pipeline. I've many constraint and I've simplified the problem a lot. Hence, don't discuss the goal for this query.
I've a mongo aggregation that gives something similar to this:
[
{
"content": {
"processes": [
{
"id": "101a",
"title": "delivery"
},
{
"id": "101b",
"title": "feedback"
}
]
}
}
]
To this intermediate result I'm forced to apply a project operation in order to obtain something similar to this:
[
{
"results":
{
"titles": [
{
"id": "101a",
"value": "delivery"
},
{
"id": "101b",
"value": "feedback"
}
]
}
}
]
enter code here
But applying this projections:
"results.titles.id": "$content.processes.id",
"results.titles.value": "$content.processes.title"
I obtain this:
[
{
"results":
{
"titles": {
"id": ["101a", "101b"]
"value": ["delivery", "feedback"]
}
}
}
}
]
Collection are created but not in the proper position.
Is it possible to exploit some operator inside the project operation in order to tell mongo to create an array in a parent position?
Something like this:
"results.titles.$[x].value" : "$content.processes.value"
You can use the dot notation to project entire array:
db.col.aggregate([
{
$project: {
"results.titles": "$content.processes"
}
}
])
and if you need to rename title to value then you have to apply $map operator:
db.col.aggregate([
{
$project: {
"results.titles": {
$map: {
input: "$content.processes",
as: "process",
in: {
id: "$$process.id",
value: "$$process.title"
}
}
}
}
}
])

Mongo create request for embedded subfields

I'm new to mongo and need help finding db entries created at requested time. In my example there are a lot of embedded fields, and I do not understand syntaxis for request:
{
"_id": "54e1a045e4b03f5930293da6",
"_version": 31867,
"_transId": "4ae4d0e6-d3df-4a24-9621-1cdb7f12362f-10489329",
"accountBalances": {
"BALANCE": {
"thresholds": {
},
"quotas": "ROLLOVER_QUOTA": {
"thresholds": {
},
"quotaCode": "ROLLOVER_QUOTA",
"credits": {
"_1HVa0dJoEeSUwbM1-xYKvg": {
"startDate": ISODate("2015-03-24T21:00:00Z"),
"creditAmount": "547194099151",
"endDate": ISODate("2020-03-24T21:00:00Z"),
"started": true,
"debits": {
"consolidated": {
"creationDate": ISODate("2015-04-17T18:00:01.469Z"),
"debitAmount": "547194090291",
"debitId": "consolidated"
}
},
"creditId": "_1HVa0dJoEeSUwbM1-xYKvg"
}
}
}
}
}
I need to search for entries which have debit creation date $gte:ISODate("2015-03-16T00:00:00.000Z"), $lte:ISODate("2015-03-16T04:00:00.000Z"
You can use the dot notation to access the fields of an embedded document:
db.collection.find(
{
"accountBalances.BALANCE.quotas.ROLLOVER_QUOTA.credits._1HVa0dJoEeSUwbM1-xYKvg.debits.creationDate": {
"$gte": ISODate("2015-03-16T00:00:00.000Z"),
"$lte": ISODate("2015-03-16T04:00:00.000Z")
}
}
);