Use regex in expr in Mongodb aggragtion pipeline - mongodb

I have Country Table
{
"_id" : ObjectId("627cd43f48aea72fdc0d88e0"),
"county" : "india"
},
{
"_id" : ObjectId("627cd43f48aea72fdc0d88e1"),
"county" : "china"
}
And City Table
{
"_id" : ObjectId("627cd43f48aea72fdc0d88e0"),
"county_name" : "India"
},
{
"_id" : ObjectId("627cd43f48aea72fdc0d88e1"),
"county_name" : "China"
}
In Country Table Country name is in lower letter and In City Table Country Name is in Upper Letter. So Now how can use like condition to join both collection with matching same name.
Ex. Get Data of india Like India
db.getCollection('countryTable').aggregate([
{
$lookup: {
from: "cityTable",
let: { county: "$county" },
pipeline: [{
$match: {
"$expr": {
"$regexMatch": {
"input": "$county_name",
"regex": "$$county",
"options": "i"
}
}
}
}],
as: "citydetails"
}
},
{ $unwind: "$citydetails" }
])

With MongoDB v4.0, you can do the $match in subpipeline with $toLower
db.countryTable.aggregate([
{
$lookup: {
from: "cityTable",
let: {
county: "$county"
},
pipeline: [
{
$match: {
"$expr": {
$eq: [
{
"$toLower": "$$county"
},
{
"$toLower": "$county_name"
}
]
}
}
}
],
as: "citydetails"
}
},
{
$unwind: "$citydetails"
}
])
Here is the Mongo playground for your reference.

Related

MongoDB - Aggregate (join two collection) with nested array as join key

I have two collections. (updated)
I need to join them and return only a few fields.
As I understand, I should use aggregate and projection features.
I try to find examples, but can't find the right, in my case foreign key is placed in the array in the nested field.
It is not easy for me, I’m new at this, and I couldn’t win against this MongoDB query.
In the code block below I wrote the simplified equivalent models (doc1, doc2) and expected results.
Does anyone have any ideas?
db.doc1.deleteMany({});
db.doc2.deleteMany({});
// tow related documents
db.doc1.insert( [
{
"version" : 123456,
"doc" : {
"code":"A1",
"name":"some document A1",
"doc2CodeArray":[
{"code":"B01"},
{"code":"B02"},
{"code":"B03"},
{"code":"B04"},
{"code":"B05"},
{"code":"B06"}
]
}
},
{
"version" : 123457,
"doc" : {
"code":"A2",
"name":"some document A2",
"doc2CodeArray":[
{"code":"B07"},
{"code":"B08"},
{"code":"B09"},
{"code":"B10"},
{"code":"B11"},
{"code":"B12"}
]
}
},
{
"version" : 123457,
"doc" : {
"code":"A2",
"name":"some document A2",
"doc2CodeArray":null
}
}
]);
db.doc2.insert( [
{
"version" : 567890,
"doc" : {
"code":"B01",
"valueArray":[{"valueType":"int","valueData":"1"}],
"doc2Type":{"code":"C1"}
}
},
{"version" : 567890,"doc" : { "code":"B02", "valueArray":[{"valueType":"int","valueData":"2","isDefault":false}],"doc2Type":{"code":"C2","addInfo":"1234567890"}}},
{"version" : 567890,"doc" : { "code":"B03", "valueArray":[{"valueType":"int","valueData":"3","isDefault":false}],"doc2Type":{"code":"C3","addInfo":"1234567890"}}},
{"version" : 567890,"doc" : { "code":"B04", "valueArray":[{"valueType":"int","valueData":"4","isDefault":false}],"doc2Type":{"code":"C4","addInfo":"1234567890"}}},
{"version" : 567890,"doc" : { "code":"B05", "valueArray":[{"valueType":"int","valueData":"5","isDefault":false}],"doc2Type":{"code":"C5","addInfo":"1234567890"}}},
{"version" : 567890,"doc" : { "code":"B06", "valueArray":[{"valueType":"int","valueData":"6","isDefault":false},
{"valueType":"str","valueData":"F","isDefault":false}],"doc2Type":{"code":"C6","addInfo":"1234567890"}}},
{"version" : 567890,"doc" : { "code":"B07", "valueArray":[{"valueType":"int","valueData":"1","isDefault":false}],"doc2Type":{"code":"C1","addInfo":"1234567890"}}},
{"version" : 567890,"doc" : { "code":"B08", "valueArray":[{"valueType":"int","valueData":"2","isDefault":false}],"doc2Type":{"code":"C2","addInfo":"1234567890"}}},
{"version" : 567890,"doc" : { "code":"B09", "valueArray":[{"valueType":"int","valueData":"3","isDefault":false}],"doc2Type":{"code":"C3","addInfo":"1234567890"}}},
{"version" : 567890,"doc" : { "code":"B10", "valueArray":[{"valueType":"int","valueData":"4","isDefault":false}],"doc2Type":{"code":"C4","addInfo":"1234567890"}}},
{"version" : 567890,"doc" : { "code":"B11", "valueArray":[{"valueType":"int","valueData":"5","isDefault":false}],"doc2Type":{"code":"C5","addInfo":"1234567890"}}},
{"version" : 567890,"doc" : { "code":"B12", "valueArray":[{"valueType":"int","valueData":"6","isDefault":false}],"doc2Type":{"code":"C6","addInfo":"1234567890"}}},
]);
The result that I want
I need join collections and select only few props
{
"doc":{code:"A1"},
"doc2Items":[
{"code":"C2", "value":{"value":"3"}},
{"code":"C3", "value":{"value":"4"}},
]
},
{
"doc":{code:"A2"},
"doc2Items":[
{"code":"C2", "value":{"value":"2"}},
{"code":"C3", "value":{"value":"3"}},
{"code":"C4", "value":{"value":"4"}},
]
}
https://mongoplayground.net/p/1-1SU8SgbTQ
db.doc1.aggregate([
{
$lookup: {
from: "doc2",
localField: "doc.doc2CodeArray.code",
foreignField: "doc.code",
as: "doc.doc2Items"
}
},
{
$replaceRoot: {
newRoot: "$doc"
}
},
{
$addFields: {
"doc2Items": {
$map: {
input: "$doc2Items",
in: {
doc2TypeCode: "$$this.doc.doc2Type.code",
doc2ValueArray: "$$this.doc.valueArray"
}
}
}
}
},
{
$unset: [
"doc2CodeArray"
]
}
])
$lookup - Join doc1 with doc2.
Pipeline:
1.1. $match - Matching doc.code (from doc2) is within the doc.doc2CodeArray.code array (from doc1).
1.2. $project - Decorate output document to be returned in doc2Items.
$project - Decorate output document.
db.doc1.aggregate([
{
$lookup: {
from: "doc2",
let: {
doc2CodeArray: "$doc.doc2CodeArray.code"
},
pipeline: [
{
$match: {
$expr: {
$in: [
"$doc.code",
"$$doc2CodeArray"
]
}
}
},
{
$project: {
_id: 0,
"doc2TypeCode": "$doc.doc2Type.code",
"doc2ValueArray": "$doc.valueArray"
}
}
],
as: "doc2Items"
}
},
{
$project: {
_id: 0,
"doc1Code": "$doc.code",
"doc1Name": "$doc.name",
"doc2Items": "$doc2Items"
}
}
])
Sample Mongo Playground
Last few days I deep dived to reading.
And I have got an accepted result.
I updated my question with example of more relevan data and I has given the my solution in the example below.
db.doc1.aggregate([
{
$match: {
// filter by doc.code preffix
"doc.code": { '$regex': 'A', '$options': 'i' }, "doc.doc2CodeArray": { $ne: null }
//"doc.code": "A1"
}
},
{
$lookup: { // join doc1 & doc2
from: "doc2",
localField: "doc.doc2CodeArray.code",
foreignField: "doc.code",
as: "doc2Items"
}
},
{
$project: {
_id: 0,
"doc.code": 1,
"doc2Items": { // items from doc2
$map: { // I need not all props from doc2 - do $map
"input": {
$filter: { // I need items only with 3 codes C2, C3, C4 - do $filter
"input": "$doc2Items",
"as": "d0",
"cond": { $or:[
{ "$eq": ["$$d0.doc.doc2Type.code", "C2"] },
{ "$eq": ["$$d0.doc.doc2Type.code", "C3"] },
{ "$eq": ["$$d0.doc.doc2Type.code", "C4"] },
]}
},
},
"as": "d",
"in": {
"code": "$$d.doc.doc2Type.code",
"value": {
$arrayElemAt: [ // I need only first record of nested array
{
$map: { // I need not all props of first record of nested array
"input": "$$d.doc.valueArray",
"as": "d2",
"in": {
"value": "$$d2.valueData"
}
}
}
, 0]
},
}
}// map
}
}// project
}
])
Now I read the needed data two time faster then before.
I think I began to understand mongo db query philosophy.
Thank everyone for the answers and tips! 'Yong Shun' and 'qtxo' helped me find the correct solution.

mongodb aggregate to find,count and project unique documnets

Below are the sample collection.
col1:
"_id" : ObjectId("5ec293782bc00b43b463b67c")
"status" : ["running"],
"name" : "name1 ",
"dcode" : "dc001",
"address" : "address1",
"city" : "city1"
col2:
"_id" : ObjectId("5ec296182bc00b43b463b68f"),
"scode" : ObjectId("5ec2933df6079743c0a2a1f8"),
"ycode" : ObjectId("5ec293782bc00b43b463b67c"),
"city" : "city1",
"lockedDate" : ISODate("2020-05-20T00:00:00Z"),
"_id" : ObjectId("5ec296182bc00b43b463688b"),
"scode" : ObjectId("5ec2933df6079743c0a2a1ff"),
"ycode" : ObjectId("5ec293782bc00b43b463b67c"),
"city" : "city1",
"lockedDate" : ISODate("2020-05-20T00:00:00Z"),
"_id" : ObjectId("5ec296182bc00b43b44fc6cb"),
"scode" :null,
"ycode" : ObjectId("5ec293782bc00b43b463b67c"),
"city" : "city1",
"lockedDate" : ISODate("2020-05-20T00:00:00Z"),
problemStatement:
I want to display name from col1 & count of documents from col2 according to ycode where scode is != null
Tried attempt:
db.col1.aggregate([
{'$match':{
city:'city1'
}
},
{
$lookup:
{
from: "col2",
let: {
ycode: "$_id",city:'$city'
},
pipeline: [
{
$match: {
scode:{'$ne':null},
lockedDate:ISODate("2020-05-20T00:00:00Z"),
$expr: {
$and: [
{
$eq: [
"$ycode",
"$$ycode"
]
},
{
$eq: [
"$city",
"$$city"
]
}
]
},
},
},
], as: "col2"
}
},
{'$unwind':'$col2'},
{'$count':'ycode'},
{
$project: {
name: 1,
status: 1,
}
},
])
now problem with this query is it either displays the count or project the name & status i.e if i run this query in the current format it gives {} if I remove {'$count':'ycode'} then it project the values but doesn't give the count and if I remove $project then i do get the count {ycode:2} but then project doesn't work but I want to achieve both in the result. Any suggestions
ORM: mongoose v>5, mongodb v 4.0
You can try below query :
db.col1.aggregate([
{ "$match": { city: "city1" } },
{
$lookup: {
from: "col2",
let: { id: "$_id", city: "$city" }, /** Create local variables from fields of `col1` but not from `col2` */
pipeline: [
{
$match: { scode: { "$ne": null }, lockedDate: ISODate("2020-05-20T00:00:00Z"),
$expr: { $and: [ { $eq: [ "$ycode", "$$id" ] }, { $eq: [ "$city", "$$city" ] } ] }
}
},
{ $project: { _id: 1 } } // Optional, But as we just need count but not the entire doc, holding just `_id` helps in reduce size of doc
],
as: "col2" // will be an array either empty (If no match found) or array of objects
}
},
{
$project: { _id: 0, name: 1, countOfCol2: { $size: "$col2" } }
}
])
Test : mongoplayground

How to $lookup by avoiding null values in mongodb aggregate

In here i'm using $lookup to to a left join from other collections, the query works fine but when some records missing values it returns
errmsg : $in requires an array as a second argument, found: null
Heres the querying document structure :
{
"no" : "2020921008981",
"sale" : {
"soldItems" : [
{
"itemId" : "5b55ac7f0550de00210a3b24",
},
{
"itemId" : "5b55ac7f0550de00215584re",
}
],
"bills" : [
{
"billNo" : "2020921053467",
"insurancePlanId" : "160",
},
{
"billNo" : "2020921053467",
"insurancePlanId" : "170",
}
],
"visitIds" : [
5b55ac7f0550de00210a3b24, 5b55ac7f0550de00210a3b24
]
}
}
the query :
db.case.aggregate([
{
$lookup: {
from: "insurance",
let: { ipids: "$sale.bill.insurancePlanId" },
pipeline: [
{
$unwind: "$coveragePlans"
},
{
$match: { $expr: { $in: ["$coveragePlans._id", "$$ipids"] } }
},
{
$project: { _id: 0, name: 1 }
}
],
as: "insurances"
}
},
{
$lookup: {
from: "item",
let: { iid: "$salesOrder.purchaseItems.itemRefId" },
pipeline: [
{
$match: {
$expr: {
$in: ["$_id", {
$map: {
input: "$$iid",
in: { $toObjectId: "$$this" }
}
}
]
}
}
}
],
as: "items"
}
}
])
insurance collection :
{
"_id" : ObjectId("5b55aca20550de00210a6d25"),
"name" : "HIJKL"
"coveragePlans" : [
{
"_id" : "160",
"name" : "UVWZ",
},
{
"_id" : "161",
"name" : "LMNO",
}
]
},
{
"_id" : ObjectId("5b55aca20550de00210a6d25"),
"name" : "WXYZ"
"coveragePlans" : [
{
"_id" : "169",
"name" : "5ABC",
},
{
"_id" : "170",
"name" : "4XYZ",
}
]
}
item collection :
{
"_id" : ObjectId("5b55ac7f0550de00210a3b24"),
"code" : "ABCDE"
},
{
"_id" : ObjectId("5b55ac7f0550de00215584re"),
"code" : "PQRST"
}
How to avoid this and do null checks effectively before pipe-lining into the next stages? Tried with { $match: { "fieldName": { $exists: true, $ne: null } } } but it returns mongo error regarding the format. If its the way to go please mention the stage i should put that.. Thanks in advance
You can use $ifNull operator
let: { ipids: {$ifNull:["$sale.bill.insurancePlanId", [] ]} },
EDIT: To skip empty "$salesOrder.purchaseItems.itemRefId" values
let: { iid: {$filter: {input:"$salesOrder.purchaseItems.itemRefId", cond:{$ne:["$$this", ""]}}} },
You can get around that by not using $in.
It looks like this $map is executed separately for every document in the items collection. If you were to run the map in an $addFields stage, you could used the simple form of lookup to match the added field to _id, which would automagically handle missing, null, and array.
Remove the added field with a $project stage if necessary.
db.case.aggregate([
{$lookup: {
from: "insurance",
let: { ipids: "$sale.bill.insurancePlanId" },
pipeline: [
{$unwind: "$coveragePlans"},
{$match: { $expr: { $in: ["$coveragePlans._id", "$$ipids"] } }},
{$project: { _id: 0, name: 1 }}
],
as: "insurances"
}}
{$addFields:{
matchArray:{$map: {
input: "$$iid",
in: { $toObjectId: "$$this" }
}}
}},
{$lookup: {
from: "item",
localField: "matchArray",
foreignField:"_id",
as: "items"
}},
{$project:{
arrayField: 0
}}
])

How to avoid possible null error scenarios in mongodb Aggregate

I've set up a fairly long mongo aggregate query to join several mongo collections together and shape up them into output of set of string fields. The query works fine as long as all the required values (ie : ids) exists but it breaks when it encounters null or empty values when doing the $lookup.
Following is the patientFile collection thats being queried :
{
"no" : "2020921008981",
"startDateTime" : ISODate("2020-04-01T05:19:02.263+0000")
"saleId" : "5e8424464475140d19c6941b",
"patientId" : "5e8424464475140d1955941b"
}
sale collection :
{
"_id" : ObjectId("5e8424464475140d19c6941b"),
"invoices" : [
{
"billNumber" : "2020921053467",
"type" : "CREDIT",
"insurancePlanId" : "160"
},
{
"billNumber" : "2020921053469",
"type" : "DEBIT",
"insurancePlanId" : "161"
}
],
"status" : "COMPLETE"
}
insurance collection :
{
"_id" : ObjectId("5b55aca20550de00210a6d25"),
"name" : "HIJKL"
"plans" : [
{
"_id" : "160",
"name" : "UVWZ",
},
{
"_id" : "161",
"name" : "LMNO",
}
]
}
patient collection :
{
"_id" : ObjectId("5b55cc5c0550de00217ae0f3"),
"name" : "TAN NAI",
"userId" : {
"number" : "787333128H"
}
}
Heres the aggregate query :
db.getCollection("patientFile").aggregate([
{ $match: { "startDateTime": { $gte: ISODate("2020-01-01T00:00:00.000Z"),
$lt: ISODate("2020-05-01T00:00:00.000Z") } } },
{
$lookup:
{
from: "patient",
let: { pid: "$patientId" },
pipeline: [
{
$match: {
$expr: {
$eq: ["$_id", { $toObjectId: "$$pid" }]
}
}
},
{ "$project": { "name": 1, "userId.number": 1, "_id": 0 } }
],
as: "patient"
}
},
{
$lookup:
{
from: "sale",
let: { sid: "$saleId" },
pipeline: [
{
$match: {
$expr: {
$eq: ["$_id", { $toObjectId: "$$sid" }]
}
}
}
],
as: "sale"
}
},
{ $unwind: "$sale" },
{ $unwind: "$patient" },
{
$lookup: {
from: "insurance",
let: { pid: {$ifNull:["$sale.bill.insurancePlanId", [] ]} },
pipeline: [
{
$unwind: "$plans"
},
{
$match: { $expr: { $in: ["$plans._id", "$$pid"] } }
},
{
$project: { _id: 0, name: 1 }
}
],
as: "insurances"
}
},
{ $match: { "insurances.name": { $exists: true, $ne: null } } },
{
$addFields: {
invoice: {
$reduce: {
input: {$ifNull:["$sale.bill.billNumber", [] ]},
initialValue: "",
in: {
$cond: [{ "$eq": ["$$value", ""] }, "$$this", { $concat: ["$$value", "\n", "$$this"] }]
}
}
},
insurances: {
$reduce: {
input: {$ifNull:["$insurances.name", [] ]},
initialValue: "",
in: {
$cond: [{ "$eq": ["$$value", ""] }, "$$this", { $concat: ["$$value", "\n", "$$this"] }]
}
}
}
}
},
{
"$project": {
"startDateTime": 1,
"patientName": "$patient.name",
"invoice": 1,
"insurances": 1
}
}
],
{ allowDiskUse: true }
)
Error :
Unable to execute the selected commands
Mongo Server error (MongoCommandException): Command failed with error 241 (ConversionFailure): 'Failed to parse objectId '' in $convert with no onError value: Invalid string length for parsing to OID, expected 24 but found 0' on server localhost:27017.
The full response is:
{
"ok" : 0.0,
"errmsg" : "Failed to parse objectId '' in $convert with no onError value: Invalid string length for parsing to OID, expected 24 but found 0",
"code" : NumberInt(241),
"codeName" : "ConversionFailure"
}
As a solution i have found, used $ifNull but this error keeps coming. What would be the best step to take for this scenario?
I see a couple of ways:
Instead of converting the string value to an ObjectId to test, convert the ObjectId to a string
$match: {
$expr: {
$eq: [{$toString: "$_id"}, "$$pid" ]
}
}
Instead of the $toObjectId helper, use $convert and provide onError and/or onNull values:
$match: {
$expr: {
$eq: ["$_id", { $convert: {
input: "$$pid",
to: "objectId",
onError: {error:true},
onNull: {isnull:true}
}}]
}
}

Mongodb aggretate apply sort to lookup results, and add field index number

The aggregate was executed.
I got the results using lookup, but I need a sort.
In addition, I want to assign an index to the result value.
CollectionA :
{
"_id" : ObjectId("5a6cf47415621604942386cd"),
"contents" : [
ObjectId("AAAAAAAAAAAAAAAAAAAAAAAA"),
ObjectId("BBBBBBBBBBBBBBBBBBBBBBBB")
],
"name" : "jason"
}
CollectionB :
{
"_id" : ObjectId("AAAAAAAAAAAAAAAAAAAAAAAA")
"title" : "a title",
"date" : 2018-01-02
},
{
"_id" : ObjectId("BBBBBBBBBBBBBBBBBBBBBBBB")
"title" : "a title",
"date" : 2018-01-01
}
Query:
db.getCollection('A').aggregate([
{
$match : { "_id" : ObjectId("5a6cf47415621604942386cd") }
},
{
$lookup : {
from: "B",
localField: "contents",
foreignField: "_id",
as: "item"
}
},
{ $sort: { "item.date" : -1 } }
]);
Want Result:
{
"_id" : ObjectId("5a6cf47415621604942386cd"),
"contents" : [
{
"_id" : ObjectId("BBBBBBBBBBBBBBBBBBBBBBBB")
"title" : "a title",
"date" : 2018-01-01,
"index" : 0
},
{
"_id" : ObjectId("AAAAAAAAAAAAAAAAAAAAAAAA")
"title" : "a title",
"date" : 2018-01-02,
"index" : 1
}],
"name" : "jason"
}
The current problem does not apply to the sort.
And I don't know how to designate an index.
Below Aggregation may you. For your desire result.
db.CollectionA.aggregate([
{
$match: { "_id": ObjectId("5a6cf47415621604942386cd") }
},
{
$lookup: {
from: "CollectionB",
let: { contents: "$contents" },
pipeline: [
{
$match: { $expr: { $in: ["$_id", "$$contents"] } }
},
{ $sort: { date: 1 } }
],
as: "contents"
}
},
{
$project: {
contents: {
$map: {
input: { $range: [0, { $size: "$contents" }, 1 ] },
as: "element",
in: {
$mergeObjects: [
{ index: "$$element" },
{ $arrayElemAt: [ "$contents", "$$element" ]}
]
}
}
}
}
}
])
One way to go about it would be to unwind the array, sort it and then group it back
db.A.aggregate([
{
$match: {
"_id": ObjectId("5a6cf47415621604942386cd")
}
},
{
$lookup: {
from: "B",
localField: "contents",
foreignField: "_id",
as: "item"
}
},
{
$unwind: "$item"
},
{
$sort: {
"item.date": -1
}
},
{
$group: {
_id: "$_id",
contents: {
$push: "$item"
}
}
}
])
Another method is, (this is applicable only if the date field corresponds to the document creation date),
db.A.aggregate([
{
$match: {
"_id": ObjectId("5a6cf47415621604942386cd")
}
},
{
$lookup: {
from: "B",
localField: "contents",
foreignField: "_id",
as: "item"
}
},
{
$sort: {
"item": -1
}
}
])
Basically, this sorts on the basis of _id, and since _id is created using the creation date, it should sort accordingly.