Get last and minimal values from grouped documents - mongodb

My document model looks like:
{
"model": "ABC123",
"date": "2018-12-24T23:00:00.000+0000",
"price": "2000" ,
}
I would like to retrive collection to get array of documents:
[
{ "_id" : "ABC123", "newestDate" : ISODate("2018-12-26T23:00:00Z"), "newestPrice" : 2801.33, "lowestPriceAtAll": 1300 },
{ "_id" : "ABC124", "newestDate" : ISODate("2018-12-26T23:00:00Z"), "newestPrice" : 2801.33, "lowestPriceAtAll": 990}
]
where _id is model field, newestPrice is price of newest document (grouped by model) and lowestPriceAtAll is lowest price in all documents with the same model.
I grilled two queries.
First is to find lowest price documents:
offers.aggregate([
{ $sort: { "model": 1, "price": 1 }},
{
$group: {
_id: "$model",
lowestPrice: { "$first": "$price" },
lowestPriceDate: { "$first": "$date"},
}
}
])
the second is to find newest documents:
offers.aggregate([
{ $sort: { "model": 1, "date": -1 }},
{
$group: {
_id: "$model",
newestDate: { "$first": "$date" },
newestPrice: { "$first": "$price"},
}
}
])
Is it possible to merge these two queries into one? (the most important thing is that documents have to be grouped by model field).

you can use $facet
db.offers.aggregate([
{$facet :{
lowest: [
{ $sort: { "model": 1, "price": 1 }},
{
$group: {
_id: "$model",
lowestPrice: { "$first": "$price" },
lowestPriceDate: { "$first": "$date"},
}
}
],
newest: [
{ $sort: { "model": 1, "date": -1 }},
{
$group: {
_id: "$model",
newestDate: { "$first": "$date" },
newestPrice: { "$first": "$price"},
}
}
]
}}
])

Related

How to use $match (multiple conditions) and $group in Mongodb

have list of records with the following fields - postBalance, agentId, createdAt, type. I want to filter by “type” and date. After this is done I want to get the $last postBalance for each agent based on the filter and sum up the postBalance. I have been struggling with this using this query
db.transaction.aggregate(
[{ $match: {
$and: [ {
createdAt: { $gte: ISODate('2022-09-15'), $lt:
('2022-09-16') } },
{ type: "CASH_OUT"}]}},
{
$group:
{
_id: {createdAt: {$last: "$createdAt"}},
totalAmount: { $sum: "$postBalance" },
}
}
]
)
An empty array is returned with this query and there are data in the collection.
Below are samples of the documents
{
"_id": {
"$oid": "6334cefd0048787d5535ff16"
},
"type": "CASH_OUT",
"postBalance": {
"$numberDecimal": "23287.625"
},
"createdAt": {
"$date": {
"$numberLong": "1664405245000"
}
},
}
{
"_id": {
"$oid": "6334d438c1ab8a577677cbf3"
},
"userID": {
"$oid": "62f27bc29f51747015fdb941"
},
"aggregatorID": "0000116",
"transactionFee": {
"$numberDecimal": "0.0"
},
"type": "AIRTIME_VTU",
"postBalance": {
"$numberDecimal": "2114.675"
},
"walletHistoryID": 613266,
"walletID": 1720,
"walletActionAt": {
"$date": {
"$numberLong": "1664406584000"
}
},
{
"type": "FUNDS_TRANSFER",
"postBalance": {
"$numberDecimal": "36566.39"
},
"createdAt": {
"$date": {
"$numberLong": "1664407090000"
}
}
}
This is the output I am expecting
{
"date" : 2022-10-09,
"CASHOUT ": 897663,088,
"FUNDS_TRANSFER": 8900877,
"AIRTIME_VTU": 8890000
}
How can my query be aggregated to get this? Thanks
It look like you want something like:
db.collection.aggregate([
{$match: {
createdAt: {
$gte: ISODate("2022-09-15T00:00:00.000Z"),
$lt: ISODate("2022-09-30T00:00:00.000Z")
}
}
},
{$group: {
_id: "$type",
createdAt: {$first: "$createdAt"},
totalAmount: {$sum: "$postBalance"}
}
},
{$group: {
_id: 0,
createdAt: {$first: "$createdAt"},
data: {$push: {k: "$_id", v: "$totalAmount"}}
}
},
{$project: {
data: {$arrayToObject: "$data"},
createdAt: 1,
_id: 0
}
},
{$set: {"data.date": "$createdAt"}},
{$replaceRoot: {newRoot: "$data"}}
])
See how it works on the playground example

MongoDB merge $facet results with corresponding items

Hello I have a collection like this
"_id" :"601bd0f4be72d839303adcd3",
"title":"Payment-1",
"initialBalance": {"$numberDecimal": "75"},
"paymentHistory":[
{"_id": "601bd1542df40f2ca8a769df","payment": {"$numberDecimal": "10"}},
{"_id": "601bd1542df40f2ca8a769de","payment": {"$numberDecimal": "20"}},
]
I want to calculate active balance (initialBalance - total of paymentHistory) for each payment.
I calculated total payments from paymentHistory for each document in collection.
this.aggregate([
{$match:{...}},
{
$facet:{
info:[
{$project:{_id:1,title:1,initialBalance:1}}
],
subPayments:[
{$unwind:"$paymentHistory"},
{$group:{_id:"$_id",total:{$sum:"$paymentHistory.payment"}}},
]
}
}
])
I get this result for above query.
"info": [
{
"_id": "601bd0f4be72d839303adcd3",
"title": "Payment-1",
"initialBalance": {"$numberDecimal": "580"},
},
...
],
"subPayments": [
{
"_id": "601bd0f4be72d839303adcd3",
"total": {"$numberDecimal": "80.75"}
},
...
]
I added following lines to aggregation.
{$facet:{...}},
{$project: {payments:{$setUnion:['$info','$subPayments']}}},
{$unwind: '$payments'},
{$replaceRoot: { newRoot: "$payments" }},
Now, I get this result
{
"_id": "601bd0f4be72d839303adcd3",
"total": {"$numberDecimal": "80.75"}
},
{
"_id": "601bd0f4be72d839303adcd3",
"title": "Payment-1",
"initialBalance": {"$numberDecimal": "580"},
},
{...}
I think if I group them via _id, then I calculate activeBalance in $project aggregation.
...
{
$group:{
_id:"$_id",
title:{$first:"$title"},
initialBalance:{$first:"$initialBalance"},
totalPayment:{$first: "$total"},
}
},
{
$set:{activeBalance:{$subtract:[{$ifNull:["$initialBalance",0]}, {$ifNull:["$totalPayment",0]}]}}
}
The problem is after $group aggregation fields return null.
{
"_id": "601bd0f4be72d839303adcd3",
"title": null,
"initialBalance": null,
"totalPayment": {
"$numberDecimal": "80.75"
},
"activeBalance": {
"$numberDecimal": "-80.75"
}
}
How can I solve this problem?
This is what #prasad_ suggested:
db.accounts.aggregate([
{
$addFields: {
activeBalance: {
$subtract: [
"$initialBalance",
{
$reduce: {
input: "$paymentHistory",
initialValue: 0,
in: { $sum: ["$$value", "$$this.payment"] }
}
}
]
}
}
}
]);

Aggregation at each document level mongodb

I have a list of documents like this
[{
"_id": "5dbc95f921d7625303fe2369",
"name": "John",
"itemsPurchased": [{
"offer": "o1",
"items": ["p1"]
},{
"offer": "o1",
"items": ["p1"]
},
{
"offer": "o1",
"items": ["p2"]
},
{
"offer": "o2",
"items": ["p1"]
}, {
"offer": "o7",
"items": ["p1"]
}
]
},
{
"_id": "zbc95f921d7625303fe2363",
"name": "Doe",
"itemsPurchased": [{
"offer": "o1",
"items": ["p11"]
},{
"offer": "o1",
"items": ["p11"]
},
{
"offer": "o2",
"items": ["p13"]
},
{
"offer": "o1",
"items": ["p22"]
},
{
"offer": "o2",
"items": ["p11"]
}, {
"offer": "o3",
"items": ["p11"]
}
]
}
]
And i am trying to compute unique offers on unique products by each customer, expecting the resultant to be like:
[
{
"_id": "5dbc95f921d7625303fe2369",
"name": "John",
"offersAndProducts": {
"o1":2,
"o2":2,
"o3":1
},
{
"_id": "zbc95f921d7625303fe2363",
"name": "Doe",
"offersAndProducts": {
"o1":2,
"o2":1,
"o7":1
}
]
I want to apply aggregations per document, After performing $unwind on itemsPurchased, applied $group on items and then on offer to eliminate the duplication:
{
"$group" : {
"_id" : {
"item" : {
"$arrayElemAt" : [
"$itemsPurchased.item",
0.0
]
},
"count" : {
"$sum" : 1.0
},
"offer" : "$itemsPurchased.offer"
}
}
}
then,
{
"$group" : {
"_id" : "$_id.offer",
"count" : {
"$sum" : 1.0
}
}
}
this gives the array of products and offers for all documents:
[
{o1:4,o2:3,o3:1,o7:1}
]
But i need it at document level.
tried $addFeild, but $unwind and $match operators gives invalid error.
Any other way of achieving this?
Generally speaking, it's an anti-pattern to $unwind an array and then to $group on the original _id since most operations can be done on the array directly, in a single stage. Here is what such a stage would look like:
{$addFields:{
offers:{$arrayToObject:{
$map:{
input:{$setUnion:"$itemsPurchased.offer"},
as:"o",
in:[
"$$o",
{$size:{$setUnion:{$let:{
vars:{items:{$filter:{
input:"$itemsPurchased",
cond:{$eq:["$$this.offer","$$o"]}
}}},
in:{$reduce:{
input:"$$items",
initialValue:[],
in:{$concatArrays:["$$value","$$items.items"]}
}}
}}}
}]
}
}}
}}
What this does is create an array where each element is a two element array (which is a syntax that $arrayToObject can convert to an object where first element is key name and second is value) and the input is a unique set of offers and for each we accumulate an array of products, get rid of duplicates (with $setUnion) and then get the size of the result. What this produces on your input is this:
"offers" : {
"o1" : 2,
"o2" : 2,
"o3" : 1
}
You need to run $unwind and $group twice. To count only unique items you can use $addToSet. To build your keys dynamically you need to use $arrayToObject:
db.collection.aggregate([
{
$unwind: "$itemsPurchased"
},
{
$unwind: "$itemsPurchased.items"
},
{
$group: {
_id: {
_id: "$_id",
offer: "$itemsPurchased.offer"
},
name: { $first: "$name" },
items: { $addToSet: "$itemsPurchased.items" }
}
},
{
$group: {
_id: "$_id._id",
name: { $first: "$name" },
offersAndProducts: { $push: { k: "$_id.offer", v: { $size: "$items" } } }
}
},
{
$project: {
_id: 1,
name: 1,
offersAndProducts: { $arrayToObject: "$offersAndProducts" }
}
}
])
Mongo Playground

Mongo aggregation $group conditional $eq for string field

I use Mongo v2.2.0.
I wrote the query but the main issue is $arrayElemAt. Standard replacement with $unwind-$first doesn't work for me and I suppose that better solution exists. I have a restriction to run this aggregation pipeline as a single operation instead of running a query for positive and negative data and later merge results in a code. I need to apply a $sort, $limit and $skip for the resulting query to restrict count of words to be used for filtering records from other collection and combine data from both collections in Java code.
Aggregation query:
[
{
$match: {
"merchantId": ObjectId("59520e6ccc7a701fbed31f94"),
"date": {
"$gte": NumberLong(1389644800000),
"$lt": NumberLong(1502409599999)
},
"isbn": "a123",
}
},
{
$project: {
"word": 1,
"sentence": 1,
"type": 1,
"date": 1
}
},
{
$sort: {
"date": -1
}
},
{
$group: {
"_id": {
"word": "$word",
"type": "$type"
},
"date": {
$max: "$date"
},
"sentence": {
$first: "$sentence"
},
"sentenceCount": {
"$sum": 1
}
},
},
{
$group: {
"_id": "$_id.word",
"word": { $first: "$_id.word"},
"positiveCount": {$sum: {$cond: [{$eq: ["$_id.type", "positive"]}, "$sentenceCount", 0]}},
"count": {$sum: "$sentenceCount"},
"positiveSentence": {
"$push": {
"$cond": [{$eq: ["$_id.type", "positive"]}, "$sentence", "$noval"]
}
},
"negativeSentence": {
"$push": {
"$cond": [{$eq: ["$_id.type", "negative"]}, "$sentence", "$noval"]
}
}
}
},
{
$project: {
"_id": 0,
"word": 1,
"sentimentPercentage": {$cond: [{$eq: ["$count", 0]}, 0, {$multiply: [{$divide: ["$positiveCount", "$count"]}, 100]}]},
"positiveSentence": {$arrayElemAt: ["$positiveSentence", 0]},
"negativeSentence": {$arrayElemAt: ["$negativeSentence", 0]},
}
},
{
$sort: {
sentimentPercentage: -1
}
},
{
$limit: 50
}
]
Collection document "schema":
{
"_id" : ObjectId("59887424e4b099e00724aa44"),
"merchantId" : ObjectId("59520e6ccc7a701fbed31f94"),
"isbn" : "a123",
"sentence" : "Great, friendly service.",
"word" : "service",
"type" : "positive",
"date" : NumberLong(1466809200000),
}
Expected output:
{
"word" : "expectations",
"sentimentPercentage" : 100.0,
"positiveSentence" : "The service exceeded our expectations."
},
{
"word" : "representative",
"sentimentPercentage" : 87.5,
"positiveSentence" : "Excellent local representative, met the flight and gave us all the relevant information to ensure a great holiday.",
"negativeSentence" : "The representative at resort was poor."
},
{
"word" : "seats",
"sentimentPercentage" : 0.0,
"negativeSentence" : "Long delay and pre booked seats were lost ."
}
Please, could you advise me how to replace $arrayElemAt operator or even better how to optimise this query to the desired output using just features of Mongo <=2.2.0?
This appears to give me reasonable results. I think it will not work properly, though, in cases where you have no positive or no negative sentence because of the $unwind stage which does not support the preserveNullAndEmptyArrays parameter in v2.2...
db.getCollection('test').aggregate([
{
$project: {
"word": 1,
"sentence": 1,
"type": 1,
"date": 1
}
},
{
$sort: {
"date": -1
}
},
{
$group: {
"_id": {
"word": "$word",
"type": "$type"
},
"date": {
$max: "$date"
},
"sentence": {
$first: "$sentence"
},
"sentenceCount": {
"$sum": 1
}
},
},
{
$group: {
"_id": "$_id.word",
"word": { $first: "$_id.word"},
"positiveCount": {$sum: {$cond: [{$eq: ["$_id.type", "positive"]}, "$sentenceCount", 0]}},
"count": {$sum: "$sentenceCount"},
"positiveSentence": {
"$push": {
"$cond": [{$eq: ["$_id.type", "positive"]}, "$sentence", "$noval"]
}
},
"negativeSentence": {
"$push": {
"$cond": [{$eq: ["$_id.type", "negative"]}, "$sentence", "$noval"]
}
}
}
},
{ $unwind: "$positiveSentence" },
{ $group:
{
"_id": "$_id",
"word": { $first: "$word" },
"count": { $first: "$count" },
"positiveCount": { $first: "$positiveCount" },
"positiveSentence": { $first: "$positiveSentence" },
"negativeSentence": { $first: "$negativeSentence" },
}
},
{ $unwind: "$negativeSentence" },
{ $group:
{
"_id": "$_id",
"word": { $first: "$word" },
"count": { $first: "$count" },
"positiveCount": { $first: "$positiveCount" },
"positiveSentence": { $first: "$positiveSentence" },
"negativeSentence": { $first: "$negativeSentence" },
}
},
{
$project: {
"_id": 0,
"word": 1,
"sentimentPercentage": {$cond: [{$eq: ["$count", 0]}, 0, {$multiply: [{$divide: ["$positiveCount", "$count"]}, 100]}]},
"positiveSentence": 1,
"negativeSentence": 1
}
}
])
You might be able to simplify this further, e.g. get rid of the first projection and grouping stage. I can perhaps look into that in a few hours if you'd like me to.

Using the aggregation framework to compare array element overlap

I have a collections with documents structured like below:
{
carrier: "abc",
flightNumber: 123,
dates: [
ISODate("2015-01-01T00:00:00Z"),
ISODate("2015-01-02T00:00:00Z"),
ISODate("2015-01-03T00:00:00Z")
]
}
I would like to search the collection to see if there are any documents with the same carrier and flightNumber that also have dates in the dates array that over lap. For example:
{
carrier: "abc",
flightNumber: 123,
dates: [
ISODate("2015-01-01T00:00:00Z"),
ISODate("2015-01-02T00:00:00Z"),
ISODate("2015-01-03T00:00:00Z")
]
},
{
carrier: "abc",
flightNumber: 123,
dates: [
ISODate("2015-01-03T00:00:00Z"),
ISODate("2015-01-04T00:00:00Z"),
ISODate("2015-01-05T00:00:00Z")
]
}
If the above records were present in the collection I would like to return them because they both have carrier: abc, flightNumber: 123 and they also have the date ISODate("2015-01-03T00:00:00Z") in the dates array. If this date were not present in the second document then neither should be returned.
Typically I would do this by grouping and counting like below:
db.flights.aggregate([
{
$group: {
_id: { carrier: "$carrier", flightNumber: "$flightNumber" },
uniqueIds: { $addToSet: "$_id" },
count: { $sum: 1 }
}
},
{
$match: {
count: { $gt: 1 }
}
}
])
But I'm not sure how I could modify this to look for array overlap. Can anyone suggest how to achieve this?
You $unwind the array if you want to look at the contents as "grouped" within them:
db.flights.aggregate([
{ "$unwind": "$dates" },
{ "$group": {
"_id": { "carrier": "$carrier", "flightnumber": "$flightnumber", "date": "$dates" },
"count": { "$sum": 1 },
"_ids": { "$addToSet": "$_id" }
}},
{ "$match": { "count": { "$gt": 1 } } },
{ "$unwind": "$_ids" },
{ "$group": { "_id": "$_ids" } }
])
That does in fact tell you which documents where the "overlap" resides, because the "same dates" along with the other same grouping key values that you are concerned about have a "count" which occurs more than once. Indicating the overlap.
Anything after the $match is really just for "presentation" as there is no point reporting the same _id value for multiple overlaps if you just want to see the overlaps. In fact if you want to see them together it would probably be best to leave the "grouped set" alone.
Now you could add a $lookup to that if retrieving the actual documents was important to you:
db.flights.aggregate([
{ "$unwind": "$dates" },
{ "$group": {
"_id": { "carrier": "$carrier", "flightnumber": "$flightnumber", "date": "$dates" },
"count": { "$sum": 1 },
"_ids": { "$addToSet": "$_id" }
}},
{ "$match": { "count": { "$gt": 1 } } },
{ "$unwind": "$_ids" },
{ "$group": { "_id": "$_ids" } },
}},
{ "$lookup": {
"from": "flights",
"localField": "_id",
"foreignField": "_id",
"as": "_ids"
}},
{ "$unwind": "$_ids" },
{ "$replaceRoot": {
"newRoot": "$_ids"
}}
])
And even do a $replaceRoot or $project to make it return the whole document. Or you could have even done $addToSet with $$ROOT if it was not a problem for size.
But the overall point is covered in the first three pipeline stages, or mostly in just the "first". If you want to work with arrays "across documents", then the primary operator is still $unwind.
Alternately for a more "reporting" like format:
db.flights.aggregate([
{ "$addFields": { "copy": "$$ROOT" } },
{ "$unwind": "$dates" },
{ "$group": {
"_id": {
"carrier": "$carrier",
"flightNumber": "$flightNumber",
"dates": "$dates"
},
"count": { "$sum": 1 },
"_docs": { "$addToSet": "$copy" }
}},
{ "$match": { "count": { "$gt": 1 } } },
{ "$group": {
"_id": {
"carrier": "$_id.carrier",
"flightNumber": "$_id.flightNumber",
},
"overlaps": {
"$push": {
"date": "$_id.dates",
"_docs": "$_docs"
}
}
}}
])
Which would report the overlapped dates within each group and tell you which documents contained the overlap:
{
"_id" : {
"carrier" : "abc",
"flightNumber" : 123.0
},
"overlaps" : [
{
"date" : ISODate("2015-01-03T00:00:00.000Z"),
"_docs" : [
{
"_id" : ObjectId("5977f9187dcd6a5f6a9b4b97"),
"carrier" : "abc",
"flightNumber" : 123.0,
"dates" : [
ISODate("2015-01-03T00:00:00.000Z"),
ISODate("2015-01-04T00:00:00.000Z"),
ISODate("2015-01-05T00:00:00.000Z")
]
},
{
"_id" : ObjectId("5977f9187dcd6a5f6a9b4b96"),
"carrier" : "abc",
"flightNumber" : 123.0,
"dates" : [
ISODate("2015-01-01T00:00:00.000Z"),
ISODate("2015-01-02T00:00:00.000Z"),
ISODate("2015-01-03T00:00:00.000Z")
]
}
]
}
]
}