Mongo Aggregation using $Max - mongodb

I have a collection that stores history, i.e. a new document is created every time a change is made to the data, I need to extract fields based on the max value of a date field, however my query keeps returning either all of the dates or requires me to push the fields into an array which make the data hard to analyze for an end-user.
Expected output as CSV:
MAX(DATE), docID, url, type
1579719200216, 12371, www.foodnetwork.com, food
1579719200216, 12371, www.cnn.com, news,
1579719200216, 12371, www.wikipedia.com, info
Sample Doc:
{
"document": {
"revenueGroup": "fn",
"metaDescription": "",
"metaData": {
"audit": {
"lastModified": 1312414124,
"clientId": ""
},
"entities": [],
"docId": 1313943,
"url": ""
},
"rootUrl": "",
"taggedImages": {
"totalSize": 1,
"list": [
{
"image": {
"objectId": "woman-reaching-for-basket",
"caption": "",
"url": "",
"height": 3840,
"width": 5760,
"owner": "Facebook",
"alt": "Woman reaching for basket"
},
"tags": {
"totalSize": 4,
"list": []
}
}
]
},
"title": "The 8 Best Food Items of 2020",
"socialTitle": "The 8 Best Food Items of 2020",
"primaryImage": {
"objectId": "woman-reaching-for-basket.jpg",
"caption": "",
"url": "",
"height": 3840,
"width": 5760,
"owner": "Hero Images / Getty Images",
"alt": "Woman reaching for basket in laundry room"
},
"subheading": "Reduce your footprint with these top-performing diets",
"citations": {
"list": []
},
"docId": 1313943,
"revisionId": "1313943_1579719200216",
"templateType": "LIST",
"documentState": {
"activeDate": 579719200166,
"state": "ACTIVE"
}
},
"url": "",
"items": {
"totalSize": "",
"list": [
{
"type": "recipe",
"data": {
"comInfo": {
"list": [
{
"type": "food",
"id": "https://www.foodnetwork.com"
}
]
},
"type": ""
},
"id": 4,
"uuid": "1313ida-qdad3-42c3-b41d-223q2eq2j"
},
{
"type": "recipe",
"data": {
"comInfo": {
"list": [
{
"type": "news",
"id": "https://www.cnn.com"
},
{
"type": "info",
"id": "https://www.wikipedia.com"
}
]
},
"type": "PRODUCT"
},
"id": 11,
"uuid": "318231jc-da12-4475-8994-283u130d32"
}
]
},
"vertical": "food"
}
Below query:
db.collection.aggregate([
{
$match: {
vertical: "food",
"document.documentState.state": "ACTIVE",
"document.templateType": "LIST"
}
},
{
$unwind: "$document.items"
},
{
$unwind: "$document.items.list"
},
{
$unwind: "$document.items.list.contents"
},
{
$unwind: "$document.items.list.contents.list"
},
{
$match: {
"document.items.list.contents.list.type": "recipe",
"document.revenueGroup": "fn"
}
},
{
$sort: {
"document.revisionId": -1
}
},
{
$group: {
_id: {
_id: {
docId: "$document.docId",
date: {$max: "$document.revisionId"}
},
url: "$document.items.list.contents.list.data.comInfo.list.id",
type: "$document.items.list.contents.list.data.comInfo.list.type"
}
}
},
{
$project: {
_id: 1
}
},
{
$sort: {
"document.items.list.contents.list.id": 1, "document.revisionId": -1
}
}
], {
allowDiskUse: true
})

First of all, you need to go through the documentation of the $group aggregation here.
you should be doing this instead:
{
$group: {
"_id": "$document.docId"
"date": {
$max: "$document.revisionId"
},
"url": {
$first: "$document.items.list.contents.list.data.comInfo.list.id"
},
"type": {
$first:"$document.items.list.contents.list.data.comInfo.list.type"
}
}
}
This will give you the required output.

Related

Adding a nested value as a field - MongDB aggregation

So I have a parent document with users, as well as an array that has users too. I want to add the DisplayName from the nested users array to the aggregation output. Any ideas?
Output I'm looking to achieve:
[
{
"user": {
"_id": "11",
"Name": "Dave",
"DocID": "1",
"DocDisplyName": "ABC"
},
{
"user": {
"_id": "33",
"Name": "Henry",
"DocID": "1",
"DocDisplyName": "ABC",
"BranchDisplayName:"BranchA"
}
}
]
And so on.. So an array of all users and for users that belong to a branch, add the branch display Name to the output.
// Doc 1
{
"_id": "1",
"DisplayName": "ABC",
"Users": [
{ "_id": "11", "Name": "Dave" },
{ "_id": "22", "Name": "Steve" }
],
"Branches": [
{
"_id": "111",
"DisplayName": "BranchA",
"Users": [
{ "_id": "33", "Name": "Henry" },
{ "_id": "44", "Name": "Josh" },
],
},
{
"_id": "222",
"DisplayName": "BranchB",
"Users": [
{ "_id": "55", "Name": "Mark" },
{ "_id": "66", "Name": "Anton" },
],
}
]
}
``Doc 2
{
"_id": "2",
"DisplayName": "DEF",
"Users": [
{ "_id": "77", "Name": "Josh" },
{ "_id": "88", "Name": "Steve" }
],
"Branches": [
{
"_id": "333",
"DisplayName": "BranchA",
"Users": [
{ "_id": "99", "Name": "Henry" },
{ "_id": "10", "Name": "Josh" },
],
},
{
"_id": "444",
"DisplayName": "BranchB",
"Users": [
{ "_id": "112", "Name": "Susan" },
{ "_id": "112", "Name": "Mary" },
],
}
]
}
Collection.aggregate([
{
$addFields: {
branchUsers: {
$reduce: {
input: "$Branches.Users",
initialValue: [],
in: {
$concatArrays: ["$$this", "$$value"],
},
},
},
},
},
{
$addFields: {
user: {
$concatArrays: ["$branchUsers", "$Users"],
},
},
},
{
$addFields: {
"user.DocID": "$_id","user.DocDisaplyName": "$DisplayName"
},
},
{
$unwind: "$user",
},
{
$project: {
_id: 0,
user: 1,
},
}
])
Thanks in advance!
OK I found a solution.
{
$addFields: {
"branchUsers.BranchDisplayName": {
$let: {
vars: {
first: {
$arrayElemAt: [ "$Branches", 0 ]
}
},
in: "$$first.DisplayName"
}
}
}
},
This creates the field only for the users that belong to the branch

How can I merge two documents, get rid of duplicates and keep certain data?

I have the following data, which describes who is going to do what work.
Basically I want to replace the "workId" and "userId" with objects that contain all the data from their respective documents and retain the "when" data.
I am starting with this data:
{
"schedule": {
"WorkId": "4e51dc1069c27c015ede4e3e",
"daily": [
{
"when": 1,
"U_W": [
{
"workId": "3a60dc1069c27c015ede1111",
"userId": "5f60c3b7f93d8e00a1cdf414"
},
{
"workId": "3a60dc1069c27c015ede1122",
"userId": "5f60c3b7f93d8e00a1cdf415"
}
]
}
]
}
}
Here is the user table
"userSchema": [
{
_id: "5f60c3b7f93d8e00a1cdf414",
Name: "Bob"
},
{
_id: "5f60c3b7f93d8e00a1cdf415",
Name: "Joe"
}
],
Here is the work table
"workSchema": [
{
_id: "3a60dc1069c27c015ede1111",
Name: "shovel"
},
{
_id: "3a60dc1069c27c015ede1122",
Name: "hammer"
}
]
what I want to end up with is this
{
"schedule": {
"WorkId": "4e51dc1069c27c015ede4e3e",
"daily": [
{
"when": 1,
"U_W": [
{
"work": {
"id": "3a60dc1069c27c015ede1111",
"name": "shovel"
},
"user": {
"id": "5f60c3b7f93d8e00a1cdf414",
"name": "bob"
}
},
{
"work": {
"id": "3a60dc1069c27c015ede1122",
"name": "hammer"
},
"user": {
"id": "5f60c3b7f93d8e00a1cdf415",
"name": "joe"
}
}
]
}
]
}
}
Here is my first attempt:
I have it joining the the two documents
How can I get rid of the duplicates ( bob:hammer and joe:shovel ) ?
and how do I include the "when" ?
Here is the playground that provides the following :
[
{
"_id": ObjectId("5a934e000102030405000000"),
"user_info": {
"Name": "Bob",
"_id": "5f60c3b7f93d8e00a1cdf414"
},
"work_role": {
"Name": "shovel",
"_id": "3a60dc1069c27c015ede1111"
}
},
{
"_id": ObjectId("5a934e000102030405000000"),
"user_info": {
"Name": "Bob",
"_id": "5f60c3b7f93d8e00a1cdf414"
},
"work_role": {
"Name": "hammer",
"_id": "3a60dc1069c27c015ede1122"
}
},
{
"_id": ObjectId("5a934e000102030405000000"),
"user_info": {
"Name": "Joe",
"_id": "5f60c3b7f93d8e00a1cdf415"
},
"work_role": {
"Name": "shovel",
"_id": "3a60dc1069c27c015ede1111"
}
},
{
"_id": ObjectId("5a934e000102030405000000"),
"user_info": {
"Name": "Joe",
"_id": "5f60c3b7f93d8e00a1cdf415"
},
"work_role": {
"Name": "hammer",
"_id": "3a60dc1069c27c015ede1122"
}
}
]
After beating my head against the wall for some time...
I found a pretty cool feature of mongo "references"
eg:
REF_work: { type: Schema.Types.ObjectId, required: true, ref: 'work' },
REF_person: { type: Schema.Types.ObjectId, required: true, ref: 'users' },
then when I call it from my get function I add a populate to the find
assignments.find(query).populate('daily.cp.REF_person').populate('daily.cp.REF_work');
I get exactly what I want:
[
{
"_id": ObjectId("5a934e000102030405000000"),
"REF_person": {
"Name": "Bob",
"_id": "5f60c3b7f93d8e00a1cdf414"
},
"REF_work": {
"Name": "shovel",
"_id": "3a60dc1069c27c015ede1111"
}
},
{
"_id": ObjectId("5a934e000102030405000000"),
"REF_person": {
"Name": "Joe",
"_id": "5f60c3b7f93d8e00a1cdf415"
},
"REF_work": {
"Name": "hammer",
"_id": "3a60dc1069c27c015ede1122"
}
}
]

mongodb aggregate lookup with a query

I have collections with following values:
reports
{
"_id": { "$oid": "5f05e1d13e0f6637739e215b" },
"testReport": [
{
"name": "Calcium",
"value": "87",
"slug": "ca",
"details": {
"description": "description....",
"recommendation": "recommendation....",
"isNormal": false
}
},
{
"name": "Magnesium",
"value": "-98",
"slug": "mg",
"details": {
"description": "description....",
"recommendation": "recommendation....",
"isNormal": false
}
}
],
"patientName": "Patient Name",
"clinicName": "Clinic",
"gender": "Male",
"bloodGroup": "A",
"createdAt": { "$date": "2020-07-08T15:10:09.612Z" },
"updatedAt": { "$date": "2020-07-08T15:10:09.612Z" }
},
setups
{
"_id": { "$oid": "5efcba7503f4693d164e651d" },
"code": "Ca",
"codeLower": "ca",
"name": "Calcium",
"valueFrom": -75,
"valueTo": -51,
"treatmentDescription": "description...",
"isNormal": false,
"gender": "",
"recommendation": "recommendation...",
"createdAt": { "$date": "2020-07-01T16:31:50.205Z" },
"updatedAt": { "$date": "2020-07-01T16:31:50.205Z" }
},
{
"_id": { "$oid": "5efcba7503f4693d164e651e" }, // <=== should find this for Calcium
"code": "Ca",
"codeLower": "ca",
"name": "Calcium",
"valueFrom": 76,
"valueTo": 100,
"treatmentDescription": "description...",
"isNormal": false,
"gender": "",
"recommendation": "recommendation...",
"createdAt": { "$date": "2020-07-01T16:31:50.205Z" },
"updatedAt": { "$date": "2020-07-01T16:31:50.205Z" }
},
{
"_id": { "$oid": "5efcba7603f4693d164e65bb" }, // <=== should find this for Magnesium
"code": "Mg",
"codeLower": "mg",
"name": "Magnesium",
"valueFrom": -100,
"valueTo": -76,
"treatmentDescription": "description...",
"isNormal": false,
"gender": "",
"recommendation": "recommendation...",
"createdAt": { "$date": "2020-07-01T16:31:50.205Z" },
"updatedAt": { "$date": "2020-07-01T16:31:50.205Z" }
},
{
"_id": { "$oid": "5efcba7503f4693d164e6550" },
"code": "Mg",
"codeLower": "mg",
"name": "Magnesium",
"valueFrom": 76,
"valueTo": 100,
"treatmentDescription": "description...",
"isNormal": false,
"gender": "",
"recommendation": "recommendation...",
"createdAt": { "$date": "2020-07-01T16:31:50.205Z" },
"updatedAt": { "$date": "2020-07-01T16:31:50.205Z" }
}
I want to search the value from reports collection and check whether the value is in range from the setups collection and return the _id and add the returned _ids in setupIds field on reports collection.
I tried with the following aggregation framework:
db.reports.aggegrate([
{
'$match': {
'_id': new ObjectId('5f05e1d13e0f6637739e215b')
}
}, {
'$lookup': {
'from': 'setups',
'let': {
'testValue': '$testReport.value',
'testName': '$testReport.name'
},
'pipeline': [
{
'$match': {
'$expr': {
{
'$and': [
{
'$eq': [
'$name', '$$testName'
]
}, {
'$gte': [
'$valueTo', '$$testValue'
]
}, {
'$lte': [
'$valueFrom', '$$testValue'
]
}
]
}
}
}
}
],
'as': 'setupIds'
}
}
])
This query didn't find the expected results.
This is the updated reports collection I want:
{
"_id": { "$oid": "5f05e1d13e0f6637739e215b" },
"setupIds": [{ "$oid": "5efcba7503f4693d164e651e" }, { "$oid": "5efcba7603f4693d164e65bb" }], // <=== Here, array of the ObjectId (ref: "Setups")
"patientName": "Patient Name",
"clinicName": "Clinic",
"gender": "Male",
"bloodGroup": "A",
"createdAt": { "$date": "2020-07-08T15:10:09.612Z" },
"updatedAt": { "$date": "2020-07-08T15:10:09.612Z" }
},
You can try like following
[{
$match: {
_id: ObjectId('5f05e1d13e0f6637739e215b')
}
}, {
$unwind: {
path: "$testReport"
}
}, {
$lookup: {
from: 'setup',
'let': {
testValue: {
$toInt: '$testReport.value'
},
testName: '$testReport.name'
},
pipeline: [{
$match: {
$expr: {
$and: [{
"$eq": [
"$name",
"$$testName"
]
},
{
"$gte": [
"$valueTo",
"$$testValue"
]
},
{
"$lte": [
"$valueFrom",
"$$testValue"
]
}
]
}
}
}],
as: 'setupIds'
}
}, {
$group: {
_id: "$_id",
patientName: {
$first: "$patientName"
},
clinicName: {
$first: "$clinicName"
},
gender: {
$first: "$gender"
},
bloodGroup: {
$first: "$bloodGroup"
},
createdAt: {
$first: "$createdAt"
},
updatedAt: {
$first: "$updatedAt"
},
setupIds: {
$addToSet: "$setupIds._id"
}
}
}, {
$addFields: {
setupIds: {
$reduce: {
input: "$setupIds",
initialValue: [],
in: {
$setUnion: ["$$this", "$$value"]
}
}
}
}
}]
Working Mongo playground

Need to return matched data from mongo db JSON

I have Json which have values like state_city details this contains information like which city belongs to which state -
Need to query it for particular state name which will gives me all cities that belongs to that state.
db.collection.find({
"count": 10,
"state.name": "MP"
})
[
{
"collection": "collection1",
"count": 10,
"state": [
{
"name": "MH",
"city": "Mumbai"
},
{
"name": "MH",
"city": "Pune"
},
{
"name": "UP",
"city": "Kanpur"
},
{
"name": "CG",
"city": "Raipur"
}
]
},
{
"collection": "collection2",
"count": 20,
"state": [
{
"name": "MP",
"city": "Indore"
},
{
"name": "MH",
"city": "Bhopal"
},
{
"name": "UP",
"city": "Kanpur"
},
{
"name": "CG",
"city": "Raipur"
}
]
}
]
You have to use aggregate query to get only matching elements in array :
db.collection.aggregate([{
$unwind: "$content.state"
},
{
$match: {
"content.state.name": "MH",
"count": 10
}
},
{
$group: {
_id: "$content.state.city",
}
},
{
$addFields: {
key: 1
}
},
{
$group: {
_id: "$key",
cities: {
$push: "$_id"
}
}
},
{
$project: {
_id: 0,
cities: 1
}
}
])
This query will return :
{
"cities": [
"Pune",
"Mumbai"
]
}
The following query would be the solution.
db.collection.find({ "count": 10, "state":{"name": "MP"}})
For more complex queries, $elemMatch is also available.

Aggregate nested arrays

I have multiple documents, and I'm trying to aggregate all documents with companyId = xxx and return one array with all the statuses.
So it will look like this:
[
{
"status": "created",
"date": "2019-03-16T10:59:59.200Z"
},
{
"status": "completed",
"date": "2019-03-16T11:00:37.750Z"
},
{
"status": "created",
"date": "2019-03-16T10:59:59.200Z"
},
{
"status": "completed",
"date": "2019-03-16T11:00:37.750Z"
},
{
"status": "created",
"date": "2019-03-16T10:59:59.200Z"
},
{
"status": "completed",
"date": "2019-03-16T11:00:37.750Z"
},
{
"status": "created",
"date": "2019-03-16T10:59:59.200Z"
},
{
"status": "completed",
"date": "2019-03-16T11:00:37.750Z"
}
]
The document look like this:
[
{
"companyId": "xxx",
"position": "",
"section": "",
"comment": "",
"items": [
{
"any": "111",
"name": "some name",
"description": "some description",
"version": "3",
"status": [
{
"status": "created",
"date": "2019-03-16T10:59:59.200Z"
},
{
"status": "completed",
"date": "2019-03-16T11:00:37.750Z"
}
]
},
{
"any": "222",
"name": "some name",
"description": "some description",
"version": "3",
"status": [
{
"status": "created",
"date": "2019-03-16T10:59:59.200Z"
},
{
"status": "completed",
"date": "2019-03-16T11:00:37.750Z"
}
]
}
]
},
{
"companyId": "xxx",
"position": "",
"section": "",
"comment": "",
"items": [
{
"any": "111",
"name": "some name",
"description": "some description",
"version": "3",
"status": [
{
"status": "created",
"date": "2019-03-16T10:59:59.200Z"
},
{
"status": "completed",
"date": "2019-03-16T11:00:37.750Z"
}
]
},
{
"any": "222",
"name": "some name",
"description": "some description",
"version": "3",
"status": [
{
"status": "created",
"date": "2019-03-16T10:59:59.200Z"
},
{
"status": "completed",
"date": "2019-03-16T11:00:37.750Z"
}
]
}
]
}
]
Any suggestion, how to implement this?
Then I want to loop over the array (in code) and count how many items in status created, and completed. maybe it could be done with the query?
Thanks in advance
You can use below aggregation:
db.col.aggregate([
{
$match: { companyId: "xxx" }
},
{
$unwind: "$items"
},
{
$unwind: "$items.status"
},
{
$replaceRoot: {
newRoot: "$items.status"
}
},
{
$group: {
_id: "$status",
count: { $sum: 1 }
}
}
])
Double $unwind will return single status per document and then you can use $replaceRoot to promote each status to root level of your document.
Additionally you can add $group stage to count documents by status.
In addition to the #mickl answer, you can add $project pipeline to get the result as a flat list of status and count.
db.collectionName.aggregate([
{
$match: { companyId: "xxx" }
},
{
$unwind: "$items"
},
{
$unwind: "$items.status"
},
{
$replaceRoot: {
newRoot: "$items.status"
}
},
{
$group: {
_id: "$status",
count: { $sum: 1 }
}
},
{
$project: {
"status":"$_id",
"count":1,
_id:0
}
}
])
If the number of documents on which you are executing the above query is too much then you should avoid using $unwind in the initial stage of aggregation pipeline.
Either you should use $project after $match to reduce the selection of fields or you can use below query:
db.col.aggregate([
{
$match: {
companyId: "xxx"
}
},
{
$project: {
_id: 0,
data: {
$reduce: {
input: "$items.status",
initialValue: [
],
in: {
$concatArrays: [
"$$this",
"$$value"
]
}
}
}
}
},
{
$unwind: "$data"
},
{
$replaceRoot: {
newRoot: "$data"
}
}
])