MongoDB Aggregation to get count and Y sample entries - mongodb

MongoDB version:4.2.17.
Trying out aggregation on data in a collection.
Example data:
{
"_id" : "244",
"pubName" : "p1",
"serviceIdRef" : "36e9c779-7865-4b74-a30b-e4d6a0cc5295",
"serviceName" : "my-service",
"subName" : "c1",
"pubState" : "INVITED"
}
I would like to:
Do a match by something (let’s say subName) and group by serviceIdRef and then limit to return X entries
Also return for each of the serviceIdRefs, the count of the documents in each of ACTIVE or INVITED states. And Y (for this example, say Y=3) documents that are in this state.
For example, the output would appear as (in brief):
[
{
serviceIdRef: "36e9c779-7865-4b74-a30b-e4d6a0cc5295",
serviceName:
state:[
{
pubState: "INVITED"
count: 200
sample: [ // Get those Y entries (here Y=3)
{
// sample1 like:
"_id" : "244",
"pubName" : "p1",
"serviceIdRef" : "36e9c779-7865-4b74-a30b-e4d6a0cc5295",
"serviceName" : "my-service",
"subName" : "c1",
"pubState" : "INVITED"
},
{
sample2
},
{
sample3
}
]
},
{
pubState: "ACTIVE", // For this state, repeat as we did for "INVITED" state above.
......
}
]
}
{
repeat for another service
}
]
So far I have written this but am not able to get those Y entries. Is there a (better) way?
This is what I have so far (not complete and not exactly outputs in the format above):
db.sub.aggregate(
[{
$match:
{
"subName": {
$in: ["c1", "c2"]
},
"$or": [
{
"pubState": "INVITED",
},
{
"pubState": "ACTIVE",
}
]
}
},
{
$group: {
_id: "$serviceIdRef",
subs: {
$push: "$$ROOT",
}
}
},
{
$sort: {
_id: -1,
}
},
{
$limit: 22
},
{
$facet:
{
facet1: [
{
$unwind: "$subs",
},
{
$group:
{
_id: {
"serviceName" : "$_id",
"pubState": "$subs.pubState",
"subState": "$subs.subsState"
},
count: {
$sum: 1
}
}
}
]
}
}
])

You have to do the second $group stage to manage nested structure,
$match your conditions
$sort by _id in descending order
$group by serviceIdRef and pubState, get first required fields and prepare the array for sample, and get count of documents
$group by only serviceIdRef and construct the state array
$slice for limit the document in sample
db.collection.aggregate([
{
$match: {
subName: { $in: ["c1", "c2"] },
pubState: { $in: ["INVITED", "ACTIVE"] }
}
},
{ $sort: { _id: -1 } },
{
$group: {
_id: {
serviceIdRef: "$serviceIdRef",
pubState: "$pubState"
},
serviceName: { $first: "$serviceName" },
sample: { $push: "$$ROOT" },
count: { $sum: 1 }
}
},
{
$group: {
_id: "$_id.serviceIdRef",
serviceName: { $first: "$serviceName" },
state: {
$push: {
pubState: "$_id.pubState",
count: "$count",
sample: { $slice: ["$sample", 22] }
}
}
}
}
])
Playground

Related

Mongodb project results into single document

I have a collection like this:
{
"_id" : ObjectId("5f4e81f1da5ea3cb7c248a8f"),
"type" : "TYPE_1",
"updateTime" : ISODate("2020-08-24T11:10:43.219+0000")
}
{
"_id" : ObjectId("5f4e8206da5ea3cb7c248a90"),
"type" : "TYPE_1",
"updateTime" : ISODate("2020-09-24T11:10:43.219+0000")
}
{
"_id" : ObjectId("5f4e821fda5ea3cb7c248a91"),
"type" : "TYPE_2",
"updateTime" : ISODate("2020-09-25T11:10:43.219+0000")
}
I want to know how many documents there are of each type and also obtain the date of the last global modification. For now I can get these results like this:
db.getCollection("test").aggregate(
// Pipeline
[
// Stage 1
{
$group: {
_id : "$type",
count: { $sum: 1 },
lastUpdate: { "$max": "$updateTime" }
}
},
// Stage 2
{
$sort: {
lastUpdate : -1
}
},
]
);
With which I get the results this way:
{
"_id" : "TYPE_2",
"count" : 1.0,
"lastUpdate" : ISODate("2020-09-25T11:10:43.219+0000")
}
{
"_id" : "TYPE_1",
"count" : 2.0,
"lastUpdate" : ISODate("2020-09-24T11:10:43.219+0000")
}
So I have both the sum of each document and the last modification (thanks to the sort).
But I would like to project and get the results like this, in a single result document:
{
"type1" : 2.0,
"type2" : 1.0,
"lastUpdate" : ISODate("2020-09-25T11:10:43.219+0000")
}
#varman's answer is good, this is just in different way,
$group you have already done by your self
$group create types array to combine all documents
$replaceWith to replace root with field types to convert $arrayToObject
db.collection.aggregate([
{
$group: {
_id: "$type",
count: { $sum: 1 },
lastUpdate: { $max: "$updateTime" }
}
},
{
$group: {
_id: null,
types: {
$push: {
k: "$_id",
v: "$count"
}
},
lastUpdate: { $max: "$lastUpdate" }
}
},
{
$replaceWith: {
$mergeObjects: [
{ lastUpdate: "$lastUpdate" },
{ $arrayToObject: "$types" }
]
}
}
])
Playground
You can use following stages after your stage.
{
$group: {
_id: null,
data: {
$push: {
type: "$_id",
count: "$count"
}
},
lastUpdate: {
$first: "$lastUpdate"
}
}
},
{
$project: {
data: {
$arrayToObject: {
$map: {
input: "$data",
in: {
k: "$$this.type",
v: "$$this.count"
}
}
}
},
lastUpdate: 1
}
},
{
$addFields: {
"data.lastUpdate": "$lastUpdate"
}
},
{
"$replaceRoot": {
"newRoot": "$data"
}
}
Working Mongo playground

Mongo rank calculations based on count

I am trying the mongo rank calculation based on count and mentioned in below db schema. I am not getting the expecting results. Anyone help to resolve this?
Mongo Query:
db.company.aggregate([
{
"$group": {
"_id": {
"name1": "$name1",
"name2": "$name2",
},
"expanded": {
"$push": {
"name1": "$name1",
"name2": "$name2",
}
},
"count": { "$sum": 1 }
}
},
{ "$sort": { "count": -1 } },
{
$unwind: {
path: '$expanded',
includeArrayIndex: 'count'
}
}
]);
Expecting results like
Name|Count|Rank
Google|3|1
FB|2|2
Yahoo|1| 3
DB Schema :
{
"_id" : 1.0,
"name1" : "Yahoo",
"name2" : "Google",
"salary" : 1000.0
}
/* 2 */
{
"_id" : 2.0,
"name1" : "FB",
"name2" : "Google",
"salary" : 2000.0
}
/* 3 */
{
"_id" : 3.0,
"name1" : "Google",
"name2" : "FB",
"salary" : 1500.0
}
It seems like you should count name1 and name2 separately so you can create a temporary 2-element array and then run $unwind on that array. Additionally to get rank you have to $group by null to get single array of all groups, try:
db.collection.aggregate([
{
$project: {
key: [ "$name1", "$name2" ]
}
},
{
$unwind: "$key"
},
{
$group: {
_id: "$key",
count: { $sum: 1 }
}
},
{
$sort: {
count: -1
}
},
{
$group: {
_id: null,
groups: { $push: "$$ROOT" }
}
},
{
$unwind: {
path: '$groups',
includeArrayIndex: 'rank'
}
},
{
$project: {
_id: 0,
name: "$groups._id",
rank: { $add: [ "$rank", 1 ] },
count: "$groups.count"
}
}
])
Mongo Playground
try this
db.company.aggregate([
{
$group: {
_id:null,
names1: {$push: "$name1"},
names2: {$push:"$name2"},
}
},
{
$project: {
_id: 0,
names:{$concatArrays: ["$names1", "$names2"]}
}
},
{$unwind: "$names"},
{$sortByCount: "$names"},
{$addFields:{name: "$_id"}},
{
$group : {
_id: null,
records : { $push : {count : "$count", name : "$name"}}
}
},
{
$project: {
total_docs: {$size: "$records"},
records: 1
}
},
{$unwind: "$records"},
{
$project: {
_id: 0,
name: "$records.name",
count:"$records.count",
rank: {
$add:[
{
$subtract:["$total_docs", "$records.count"]
}, 1]
}
}
}])

Adding up values from array elements in MongoDB

I have done some aggregation to arrive at the below document structure for my given data:
{
"_id" : "test",
"NoOfQuestions" : 3.0,
"info" : [
{
"AnswerrCount" : 3
},
{
"AnswerrCount" : 3
},
{
"AnswerrCount" : 2
}
]
}
However, I am trying to add up all the values in the AnswerrCount column. So from the above example, I want another column that says TotalAnswers:8, (3+3+2) and then eventually have a from using the NoOfQuestions, FinalTotal:11, (8+3)
You can use $sum aggregation to add array values
db.collection.aggregate([
{ "$addFields": {
"TotalAnswers": {
"$sum": "$info.AnswerrCount"
},
"FinalTotal": {
"$add": [{ "$sum": "$info.AnswerrCount" }, "$NoOfQuestions"]
}
}}
])
db.collection.aggregate([{
$unwind: "$info"
}, {
$group: {
_id: null,
TotalAnswers: {
$sum: '$info.AnswerrCount'
},
doc: {
$first: '$$CURRENT'
}
}
}, {
$project: {
TotalAnswers: 1,
FinalTotal: {
'$add': ['$TotalAnswers', '$doc.NoOfQuestions']
},
_id: 0
}
}])

Matching in MongoDB returns unwanted results

I have a collection named flights. In that collection I have two fields: origin_country and the dest_country.
These fields just keep record of the origin country and the destination country of a particular flight.
I'm trying to write a query which will return:
All theinternational flights only (i.e - where the origin and destination countries are different).
The sum of each (i.e - # of occurences of that flight in the collection).
The problem with the query which I have already is that it's also returning the domestic flights when ran:
QUERY:
db.flights.aggregate([
{$match: {
checking_countries_dont_match: { $ne: ["origin_country", "dest_country"] }
} },
{$group: {
_id: {
origin_country: "$origin_country",
dest_country: "$dest_country"
},
"count": {$sum:1}
} },
{$sort: {"count":-1} }
])
DOCUMENT SAMPLES:
{
"_id" : "2675594611",
"origin_country" : "Germany",
"dest_country" : "United Arab Emirates",
"airline_name" : "etihad-airways-etd"
}
{
"_id" : "2661517182",
"origin_country" : "Thailand",
"dest_country" : "Thailand",
"airline_name" : "nok-air-nok",
}
UPDATE
I changed the query to the following, but I still get results where the origin and destination are the same:
db.flights.aggregate([
{ $project: {
dont_match: { $ne: ["origin_country", "dest_country"] },
origin_country: "$origin_country",
dest_country: "$dest_country",
airline_name: "$airline_name"
} },
{ $match: {
airline_name: "etihad-airways-etd",
dont_match: true
} },
{ $group: {
id: {
origin_country: "$origin_country",
dest_country: "$dest_country"
}
} }
]);
UPDATE 2
Wokring Query:
db.flights.aggregate([
{ $project: {
dont_match: { $ne: ["$origin_country", "$dest_country"] },
origin_country: "$origin_country",
dest_country: "$dest_country",
airline_name: "$airline_name"
} },
{ $match: {
airline_name: "etihad-airways-etd",
dont_match: true
} },
{ $group: {
_id: {
origin_country: "$origin_country",
dest_country: "$dest_country"
},
count: {$sum:1}
} },
{ $sort: {count:-1}}
]);
Thanks for the help everyone :)
All you need to do is to modify your query a little bit. Look at this example, should give you an idea:
db.flights.aggregate([
{
$project: {
dont_match: { $ne: ["$origin_country", "$dest_country"] },
...
}
},
{
$match: {
dont_match: true
}
},
...
]);
Can you please try below query whether it match your expectation or not:
db.flights.aggregate([
{ $project: {
dont_match: {$cmp: ['$origin_country', '$dest_country']},
origin_country: "$origin_country",
dest_country: "$dest_country"
} },
{ $match: { dont_match: {$ne: 0} } }
]);

MongoDB: elemMatch match the last element in an array

I have the data like below:
{
"order_id" : 1234567,
"order_pay_time" : 1437373297,
"pay_info" : [
{
"pay_type" : 0,
"pay_time" : 1437369046
},
{
"pay_type" : 0,
"pay_time" : 1437369123
},
{
"pay_type" : 0,
"pay_time" : 1437369348
}
]}
what I want to get is the last payment is of type 1, but $elemMatch just match the list where pay_type:1 exists, how can I match the orders which last payment is of "pay_type" : 1
You can use aggregation to get expected output. The query will be like following:
db.collection.aggregate({
$unwind: "$pay_info"
}, {
$match: {
"pay_info.pay_type": 1
}
}, {
$group: {
_id: "$_id",
"pay_info": {
$push: "$pay_info"
},
"order_id": {
$first: "$order_id"
},
"order_pay_time": {
$first: "$order_pay_time"
}
}
})
Moreover if you want latest pay_info.pay_time then you can sort it by descending order with limit 1, some what like following:
db.collection.aggregate({
$unwind: "$pay_info"
}, {
$match: {
"pay_info.pay_type": 1
}
}, {
$sort: {
"pay_info.pay_time": -1
}
}, {
$limit: 1
}, {
$group: {
_id: "$_id",
"pay_info": {
$push: "$pay_info"
},
"order_id": {
$first: "$order_id"
},
"order_pay_time": {
$first: "$order_pay_time"
}
}
})
Edit
Also you can use $redact to avoid $unwind like following:
db.collection.aggregate({
$match: {
"pay_info": {
$elemMatch: {
"pay_type": 1
}
}
}
}, {
$sort: {
"pay_info.pay_time": -1
}
}, {
$limit: 1
}, {
$redact: {
$cond: {
if: {
$eq: [{
"$ifNull": ["$pay_type", 1]
}, 1]
},
then: "$$DESCEND",
else: "$$PRUNE"
}
}
}).pretty()
Just found this thread for a similar problem I've had.
I ended up doing this, maybe that will be of interest to someone:
db.collection.find({
$where: function(){
return this.pay_info[this.pay_info.length-1].pay_type === 1
}
})