MongoDB/Mongoose: append related records to each aggreation result - mongodb

Given the following Mongo collection called "members"
{
{name: "Joe", hobby: "Food"}, {name: "Lyn", hobby: "Food"},
{name: "Rex", hobby: "Play"}, {name: "Rex", hobby: "Shop"},...
}
I have an aggregation query that returns a paged set of records along with metadata for the total records found:
db.members.aggregate([
{
$facet: {
pipe1: [{ $count: 'count' }],
pipe2: [{ $skip: 0 }, { $limit: 4 }],
},
},
{
$unwind: '$pipe1',
},
{
$project: {
count: '$pipe1.count',
results: '$pipe2',
},
},
])
This gives me:
{count: 454, results: [<First 4 records here>]}
I am now trying to add to each record, an array of all member names that have the same hobby. So for the collection above, something like:
{
count: 454,
results: [
{name: "Joe", hobby: "Food", fanClub: ["Joe", "Lyn", "Alfred"]},
{name: "Lyn", hobby: "Food", fanClub: ["Joe", "Lyn", "Alfred"]},
{name: "Rex", hobby: "Play", fanClub: ["Rex"]},
{name: "Rex", hobby: "Shop", fanClub: ["Rex", "Rita"]}
]
}
I can't figure out how to run the follow up query within the aggregate. I've tried:
db.members.aggregate([
{
$facet: {
pipe1: [{ $count: 'count' }],
pipe2: [
{ $skip: 0 },
{ $limit: 2 },
{
$lookup: {
from: 'members',
pipeline: [{ $match: { hobby: '$hobby' } }],
as: 'fanClub',
},
},
],
},
},
{
$unwind: '$pipe1',
},
{
$project: {
count: '$pipe1.count',
results: '$pipe2',
},
},
])
Alas, the fanClub array is always empty.
Update 1
If I hardcode the hobby, for instance replace
{ $match: { hobby: '$hobby' }
with
{ $match: { hobby: 'Food' }
Then I do get results and all the fanClub arrays contain the results for Joe, Lyn and Alfred. So I must not be referring to the value within the pipeline correctly

Please try this :
db.membersHobby.aggregate([
{
$facet: {
pipe1: [{ $count: 'count' }],
pipe2: [{
$lookup:
{
from: "membersHobby",
let: { hobby: "$hobby" },
pipeline: [
{
$match:
{ $expr: { $eq: ["$hobby", "$$hobby"] } }
},
{ $project: { name: 1, _id: 0 } }
],
as: "fanClub"
}
}, { $skip: 0 }, { $limit: 4 }]
}
},
{
$unwind: '$pipe1'
},
{
$project: {
count: '$pipe1.count',
results: '$pipe2'
}
}
])
Result :
/* 1 */
{
"count" : 4,
"results" : [
{
"_id" : ObjectId("5e20a63ed3c98f2a7100fd4a"),
"name" : "Joe",
"hobby" : "Food",
"fanClub" : [
{
"name" : "Joe"
},
{
"name" : "Lyn"
}
]
},
{
"_id" : ObjectId("5e20a63ed3c98f2a7100fd4b"),
"name" : "Lyn",
"hobby" : "Food",
"fanClub" : [
{
"name" : "Joe"
},
{
"name" : "Lyn"
}
]
},
{
"_id" : ObjectId("5e20a63ed3c98f2a7100fd4c"),
"name" : "Rex",
"hobby" : "Play",
"fanClub" : [
{
"name" : "Rex"
}
]
},
{
"_id" : ObjectId("5e20a63ed3c98f2a7100fd4d"),
"name" : "Rex",
"hobby" : "Shop",
"fanClub" : [
{
"name" : "Rex"
}
]
}
]
}

If #srinivasy's answer meets your requierements, please grant my points him :)
If you want to get such structure:
{
count: 454,
results: [
{name: "Joe", hobby: "Food", fanClub: ["Joe", "Lyn", "Alfred"]},
{name: "Lyn", hobby: "Food", fanClub: ["Joe", "Lyn", "Alfred"]},
{name: "Rex", hobby: "Play", fanClub: ["Rex"]},
{name: "Rex", hobby: "Shop", fanClub: ["Rex", "Rita"]}
]
}
Use this query ($reduce is used to return single value, in you case fanClub as array):
db.members.aggregate([
{
$facet: {
pipe1: [
{
$count: "count"
}
],
pipe2: [
{
$skip: 0
},
{
$limit: 4
},
{
$lookup: {
from: "members",
let: {
hobby: "$hobby"
},
pipeline: [
{
$match: {
$expr: {
$eq: [
"$hobby",
"$$hobby"
]
}
}
}
],
as: "fanClub"
}
}
]
}
},
{
$unwind: "$pipe1"
},
{
$project: {
count: "$pipe1.count",
results: {
$map: {
input: "$pipe2",
as: "pipe2",
in: {
_id: "$$pipe2._id",
hobby: "$$pipe2.hobby",
name: "$$pipe2.name",
fanClub: {
$reduce: {
input: "$$pipe2.fanClub",
initialValue: [],
in: {
$concatArrays: [
"$$value",
[
"$$this.name"
]
]
}
}
}
}
}
}
}
}
])
MongoPlayground

Related

Compare integers stored as Strings in Mongodb

In the below collection, column "qty" holds the integer values but the datatype is string.
I want to compare the "qty" field with an integer in the aggregate and "warehouse" field with a string "A". ("qty" > 2 and "warehouse" = "A")
[Can't change the datatype in the collection to integer as huge dependency is present]
Edit : Need to retrieve all the columns and all the documents matching the criteria.
Query : getting improper results
db.runCommand(
{
aggregate: "products", pipeline: [
{
$match: {
instock: {
$elemMatch: {
warehouse: "A",
qty: { $gt: "2" }
}
}
}
},
{ $project: { _id: 0 } }],
cursor: { batchSize: 200 }
});
Result : not getting documents where item = journal though it satisfies the conditions
/* 1 */
{
"item" : "paper",
"instock" : [
{
"warehouse" : "A",
"qty" : "60"
},
{
"warehouse" : "B",
"qty" : "15"
}
]
},
/* 2 */
{
"item" : "planner",
"instock" : [
{
"warehouse" : "A",
"qty" : "22"
},
{
"warehouse" : "B",
"qty" : "5"
}
]
}
Products Collection
[
{
"item": "journal",
"instock": [
{
"warehouse": "A",
"qty": "11"
},
{
"warehouse": "C",
"qty": "15"
}
]
},
{
"item": "paper",
"instock": [
{
"warehouse": "A",
"qty": "60"
},
{
"warehouse": "B",
"qty": "15"
}
]
},
{
"item": "planner",
"instock": [
{
"warehouse": "A",
"qty": "22"
},
{
"warehouse": "B",
"qty": "5"
}
]
}
]
Getting improper results as greater than operator in this case is working lexicographically but it should work like integers. Though I tried converting that to double but I am getting no results.
Query with $convert to double : no result
db.runCommand(
{
aggregate: "products", pipeline: [
//{ $match: { "item": { $in: ["planner", "paper","journal"] } } },
{
$match: {
instock: {
$elemMatch: {
warehouse: "A",
qty: {
$gt: [
{$convert:{ input: "$qty", to: "double" }}, 5]
}
}
}
}
},
{ $project: { _id: 0 } }],
cursor: { batchSize: 200 }
});
Try this:
db.products.aggregate([
{
$unwind: "$instock"
},
{
$match: {
$expr: {
$and: [
{
$eq: [
"$instock.warehouse",
"A"
]
},
{
$gt: [
{
$toInt: "$instock.qty"
},
2
]
}
]
}
}
},
{
$group: {
_id: "$_id",
item: {
$first: "$item"
},
instock: {
$push: "$instock"
}
}
},
{
$project: {
_id: 0
}
}
])
MongoPlayground
Try this, it uses $filter to retain objects has criteria :
db.runCommand(
{
aggregate: "products", pipeline: [
{ $match: { 'instock.warehouse': 'A' } },
{
$addFields: {
instockCheck: {
$filter: {
input: '$instock', as: 'each', cond: {
$and: [{ $gt: [{ $toInt: '$$each.qty' }, 2] },
{ $eq: ['$$each.warehouse', 'A'] }]
}
}
}
}
}, { $match: { instockCheck: { $gt: [] } } }, { $project: { instockCheck: 0, _id: 0 } }],
cursor: { batchSize: 200 }
});
Test : MongoDB-Playground

MongoDB Aggregate how to pair relevant records for processing

I've got some event data captured in a MongoDB database, and some of these events occur in pairs.
Eg: DOOR_OPEN and DOOR_CLOSE are two events that occur in pairs
Events collection:
{ _id: 1, name: "DOOR_OPEN", userID: "user1", timestamp: t }
{ _id: 2, name: "DOOR_OPEN", userID: "user2", timestamp: t+5 }
{ _id: 3, name: "DOOR_CLOSE", userID: "user1", timestamp:t+10 }
{ _id: 4, name: "DOOR_OPEN", userID: "user1", timestamp:t+30 }
{ _id: 5, name: "SOME_OTHER_EVENT", userID: "user3", timestamp:t+35 }
{ _id: 6, name: "DOOR_CLOSE", userID: "user2", timestamp:t+40 }
...
Assuming the records are sorted on the timestamp, the _id: 1 and _id: 3 are a "pair" for "user1. _id: 2 and _id: 6 for "user2".
I'd like to take all these DOOR_OPEN & DOOR_CLOSE pairs per user and calculate the average duration etc. the door has been opened by each user.
Can this be achieved using the aggregate framework?
You can use $lookup and $group for achieving this.
db.getCollection('TestColl').aggregate([
{ $match: {"name": { $in: [ "DOOR_OPEN", "DOOR_CLOSE" ] } }},
{ $lookup:
{
from: "TestColl",
let: { userID_lu: "$userID", name_lu: "$name", timestamp_lu :"$timestamp" },
pipeline: [
{ $match:
{ $expr:
{ $and:
[
{ $eq: [ "$userID", "$$userID_lu" ] },
{ $eq: [ "$$name_lu", "DOOR_OPEN" ]},
{ $eq: [ "$name", "DOOR_CLOSE" ]},
{ $gt: [ "$timestamp", "$$timestamp_lu" ] }
]
}
}
},
],
as: "close_dates"
}
},
{ $addFields: { "close_time": { $arrayElemAt: [ "$close_dates.timestamp", 0 ] } } },
{ $addFields: { "time_diff": { $divide: [ { $subtract: [ "$close_time", "$timestamp" ] }, 1000 * 60 ]} } }, // Minutes
{ $group: { _id: "$userID" ,
events: { $push: { "eventId": "$_id", "name": "$name", "timestamp": "$timestamp" } },
averageTimestamp: {$avg: "$time_diff"}
}
}
])
Sample Data:
[
{ _id: 1, name: "DOOR_OPEN", userID: "user1", timestamp: ISODate("2019-10-24T08:00:00Z") },
{ _id: 2, name: "DOOR_OPEN", userID: "user2", timestamp: ISODate("2019-10-24T08:05:00Z") },
{ _id: 3, name: "DOOR_CLOSE", userID: "user1", timestamp:ISODate("2019-10-24T08:10:00Z") },
{ _id: 4, name: "DOOR_OPEN", userID: "user1", timestamp:ISODate("2019-10-24T08:30:00Z") },
{ _id: 5, name: "SOME_OTHER_EVENT", userID: "user3", timestamp:ISODate("2019-10-24T08:35:00Z") },
{ _id: 6, name: "DOOR_CLOSE", userID: "user2", timestamp:ISODate("2019-10-24T08:40:00Z") },
{ _id: 7, name: "DOOR_CLOSE", userID: "user1", timestamp:ISODate("2019-10-24T08:50:00Z") },
{ _id: 8, name: "DOOR_OPEN", userID: "user2", timestamp:ISODate("2019-10-24T08:55:00Z") }
]
Result:
/* 1 */
{
"_id" : "user2",
"events" : [
{
"eventId" : 2.0,
"name" : "DOOR_OPEN",
"timestamp" : ISODate("2019-10-24T08:05:00.000Z")
},
{
"eventId" : 6.0,
"name" : "DOOR_CLOSE",
"timestamp" : ISODate("2019-10-24T08:40:00.000Z")
},
{
"eventId" : 8.0,
"name" : "DOOR_OPEN",
"timestamp" : ISODate("2019-10-24T08:55:00.000Z")
}
],
"averageTimestamp" : 35.0
}
/* 2 */
{
"_id" : "user1",
"events" : [
{
"eventId" : 1.0,
"name" : "DOOR_OPEN",
"timestamp" : ISODate("2019-10-24T08:00:00.000Z")
},
{
"eventId" : 3.0,
"name" : "DOOR_CLOSE",
"timestamp" : ISODate("2019-10-24T08:10:00.000Z")
},
{
"eventId" : 4.0,
"name" : "DOOR_OPEN",
"timestamp" : ISODate("2019-10-24T08:30:00.000Z")
},
{
"eventId" : 7.0,
"name" : "DOOR_CLOSE",
"timestamp" : ISODate("2019-10-24T08:50:00.000Z")
}
],
"averageTimestamp" : 15.0
}
You could use the $group operator of the aggregate framework to group by userID and calculate the averages:
db.events.aggregate([{
$group: {
_id: "$userID",
averageTimestamp: {$avg: "$timestamp"}
}
}]);
If you also want to discard any other event other than DOOR_OPEN or DOOR_CLOSED, you can add a filter adding a $match in the aggregate pipeline:
db.events.aggregate([{
$match: {
$or: [{name: "DOOR_OPEN"},{name: "DOOR_CLOSE"}]
}
}, {
$group: {
_id: "$userID",
averageTimestamp: {$avg: "$timestamp"}
}
}]);

mongodb aggregate get last object from sub document that mutch a query

We have the following set of documents stored in mongodb:
Conversation record:
{_id: "x", lang: "en", timestamp: "", ... }
Each conversation has many processes, each process has set of messages as child-document list.
Process record:
{_id: "y", conversationId: "x", name: "", timestamp: "", messages: [
{
"direction" : "out",
"text" : "How can I help you?",
"timestamp" : ISODate("2019-05-23T11:08:18.423Z"),
"_id" : 3
},
{
"direction" : "out",
"text" : "Hello",
"timestamp" : ISODate("2019-05-23T11:08:17.423Z"),
"_id" : 1
},
{
"direction" : "in",
"text" : "Hi",
"timestamp" : ISODate("2019-05-23T11:08:18.423Z"),
"_id" : 2
}
], completed: "true"}
I need to make aggregate query, and get list of conversations, while each conversation should have list of processes ordered by timestamp, and each process should have only the last message (based on id field) from both "in" and "out" directions.
We need to get something like this:
[
{
conversationId: "x",
timestamp: "",
processes: [
{
_id: "y",
name: "",
timestamp: "",
lastInMessage: {
"direction": "in",
"text": "Hi",
"timestamp": ISODate("2019-05-23T11:08:18.423Z"),
"_id": 2
},
lastOutMessage: {
"direction": "out",
"text": "How can I help you?",
"timestamp": ISODate("2019-05-23T11:08:18.423Z"),
"_id": 3
}
},
{
_id: "y",
name: "",
....
}
]
}
]
The query I tried is:
conversation.aggregate([
{
$match: {
timestamp: query.timestamp
}
},
{
$project: {
_id: "$conversationId",
timestamp: "$timestamp",
conversationId: "$conversationId"
}
},
{
"$lookup":
{
"from": "processes",
"localField": "conversationId",
"foreignField": "conversationId",
"as": "process"
}
},
{
$project: {
_id: "$_id",
timestamp: "$timestamp",
// messages: "$process.messages",
processes: "$process"
}
},
// here I don't know what to do.
You can use $reduce to calculate min and max value of an array and use $map to generate aggregates for all process values:
{
$project: {
_id: "$_id",
timestamp: "$timestamp",
processes: {
$map: {
input: "$processes",
as: "process",
in: {
_id: "$$process._id",
name: "$$process.name",
timestamp: "$$process.timestamp",
lastInMessage: {
$reduce: {
input: "$$process.messages",
initialValue: null,
in: {
$cond: [
{ $and: [ { $eq: [ "$$this.direction", "in" ] }, { $gt: [ "$$this.timestamp", "$$value.timestamp" ] } ] },
"$$this",
"$$value"
]
}
}
},
lastOutMessage: {
$reduce: {
input: "$$process.messages",
initialValue: null,
in: {
$cond: [
{ $and: [ { $eq: [ "$$this.direction", "out" ] }, { $gt: [ "$$this.timestamp", "$$value.timestamp" ] } ] },
"$$this",
"$$value"
]
}
}
}
}
}
}
}
}
MongoDB Playground example

Query datevalue of a inner Array element

Need help with some MongoDB query:
The document I have is below and I am trying to search based on 2 conditions
The meta.tags.code = "ABC"
Its LastSyncDateTime should
meta.extension.value == "" (OR)
the meta.extension.value is less than meta.lastUpdated
Data :
{
"meta" : {
"extension" : [
{
"url" : "LastSyncDateTime",
"value" : "20190206-00:49:25.694"
},
{
"url" : "RetryCount",
"value" : "0"
}
],
"lastUpdate" : "20190207-01:21:41.095",
"tags" : [
{
"code" : "ABC",
"system" : "type"
},
{
"code" : "XYZ",
"system" : "SourceSystem"
}
]
}
}
Query:
db.proc_patients_service.find({
"meta.tags.code": "ABC",
$or: [{
"meta.extension.value": ""
}, {
$expr: { "$lt": [{ "mgfunc": "ISODate", "params": [{ "$arrayElemAt": ["$meta.extension.value", 0] }] }, { "mgfunc": "ISODate", "params": ["$meta.lastUpdate"] }] }
}]
})
But it is only fetching ABC Patients whose LastSyncDateTime is empty and ignores the other condition.
Using MongoDB Aggregation, I have converted your string to date with operator $dateFromString and then compare the value as per your criteria.
db.proc_patients_service.aggregate([
{ $match: { "meta.tags.code": "ABC", } },
{ $unwind: "$meta.extension" },
{
$project: {
'meta.tags': '$meta.tags',
'meta.lastUpdate': { '$dateFromString': { 'dateString': '$meta.lastUpdate', format: "%Y%m%d-%H:%M:%S.%L" } },
'meta.extension.url': '$meta.extension.url',
'meta.extension.value': {
$cond: {
if: { $ne: ["$meta.extension.value", "0"] }, then: { '$dateFromString': { 'dateString': '$meta.extension.value', format: "%Y%m%d-%H:%M:%S.%L" } }, else: 0
}
}
}
},
{
$match: {
$or: [
{ "meta.extension.value": 0 },
{ $expr: { $lt: ["$meta.extension.value", "$meta.lastUpdate"] } }
]
}
},
{
$group: { _id: '_id', 'extension': { $push: '$meta.extension' }, "lastUpdate": { $first: '$meta.lastUpdate' }, 'tags': { $first: '$meta.tags' } }
},
{
$project: { meta: { 'extension': '$extension', lastUpdate: '$lastUpdate', 'tags': '$tags' } }
}
])

Compare 2 count aggregations

I have a collection in MongoDB that looks something like the following:
{ "_id" : 1, "type" : "start", userid: "101", placementid: 1 }
{ "_id" : 2, "type" : "start", userid: "101", placementid: 2 }
{ "_id" : 3, "type" : "start", userid: "101", placementid: 3 }
{ "_id" : 4, "type" : "end", userid: "101", placementid: 1 }
{ "_id" : 5, "type" : "end", userid: "101", placementid: 2 }
and I want to group results by userid then placementid and then count the types of "start" and "end", but only when the two counts are different. In this particular example I would want to get placementid: 3 because when grouped and counted this is the only case where the counts don't match.
I've written a query that gets the 2 counts and the grouping but I can't do the filtering when counts don't match. This is my query:
db.getCollection('mycollection').aggregate([
{
$project: {
userid: 1,
placementid: 1,
isStart: {
$cond: [ { $eq: ["$type", "start"] }, 1, 0]
},
isEnd: {
$cond: [ { $eq: ["$type", "end"] }, 1, 0]
}
}
},
{
$group: {
_id: { userid:"$userid", placementid:"$placementid" },
countStart:{ $sum: "$isStart" },
countEnd: { $sum: "$isEnd" }
}
},
{
$match: {
countStart: {$ne: "$countEnd"}
}
}
])
It seems like I'm using the match aggregation incorrectly because I'm seeing results where countStart and countEnd are the same.
{ "_id" : {"userid" : "101", "placementid" : "1"}, "countStart" : 1.0, "countEnd" : 1.0 }
{ "_id" : {"userid" : "101", "placementid" : "2"}, "countStart" : 1.0, "countEnd" : 1.0 }
{ "_id" : {"userid" : "101", "placementid" : "3"}, "countStart" : 1.0, "countEnd" : 0 }
Can anybody point into the right direction please?
To compare two fields inside $match stage you need $expr which is available in MongoDB 3.6:
db.myCollection.aggregate([
{
$project: {
userid: 1,
placementid: 1,
isStart: {
$cond: [ { $eq: ["$type", "start"] }, 1, 0]
},
isEnd: {
$cond: [ { $eq: ["$type", "end"] }, 1, 0]
}
}
},
{
$group: {
_id: { userid:"$userid", placementid:"$placementid" },
countStart:{ $sum: "$isStart" },
countEnd: { $sum: "$isEnd" }
}
},
{
$match: {
$expr: { $ne: [ "$countStart", "$countEnd" ] }
}
}
])
If you're using older version of MongoDB you can use $redact:
db.myCollection.aggregate([
{
$project: {
userid: 1,
placementid: 1,
isStart: {
$cond: [ { $eq: ["$type", "start"] }, 1, 0]
},
isEnd: {
$cond: [ { $eq: ["$type", "end"] }, 1, 0]
}
}
},
{
$group: {
_id: { userid:"$userid", placementid:"$placementid" },
countStart:{ $sum: "$isStart" },
countEnd: { $sum: "$isEnd" }
}
},
{
$redact: {
$cond: { if: { $ne: [ "$countStart", "$countEnd" ] }, then: "$$KEEP", else: "$$PRUNE" }
}
}
])
You run do the following pipeline to get this - no need to use $expr or $redact or anything special really:
db.mycollection.aggregate({
$group: {
_id: {
"userid": "$userid",
"placementid": "$placementid"
},
"sum": {
$sum: {
$cond: {
if: { $eq: [ "$type", "start" ] },
then: 1, // +1 for start
else: -1 // -1 for anything else
}
}
}
}
}, {
$match: {
"sum": { $ne: 0 } // only return the non matching-up ones
}
})