Aggregate Fields together - mongodb

I have the following structure as an input from which data needs to be aggregated:
I need to aggregate the data such that I end up with the following structure:
start: A {
tripdetails: [{
destination: B [{
duration: 10,
type: male
},
duration: 12,
type: female
},
duration: 9,
type: female
}]
]}
}
Basically I need to group "type" and "duration" together under the same destination.
I came up with the following query, but this results in a a single field for "type" for each "destination", but not for every "duration".
db.test.aggregate(
{
$group: {
_id: {"StationID": "$start", "EndStationID": "$destination"},
durations: {$addToSet: "$duration" },
usertypes: {$addToSet: "$type" }
}
},
{
$group: {
_id: "$_id.StationID",
Tripcount_out: {$sum: "durations"},
Trips_out: { $addToSet: { EndStationID: "$_id.EndStationID", Tripduration: "$durations", Usertype: "$usertypes"} }
}
}
)
My question is how I can achieve the structure described above.

You could try running the following aggregate pipeline:
db.test.aggregate([
{
"$group": {
"_id": { "StationID": "$start", "EndStationID": "$destination" },
"details": {
"$push": {
"duration": "$duration",
"type": "$type"
}
}
}
},
{
"$group": {
"_id": "$_id.StationID",
"tripdetails": {
"$push": {
"destination": "$_id.EndStationID",
"trips": "$details"
}
}
}
}
])
which yields:
{
"_id" : "A",
"tripdetails" : [
{
"destination" : "B",
"trips" : [
{
"duration" : 10,
"type" : "male"
},
{
"duration" : 9,
"type" : "female"
},
{
"duration" : 12,
"type" : "female"
}
]
}
]
}

Related

Need help to MongoDB aggregate $group state

I have a collection of 1000 documents like this:
{
"_id" : ObjectId("628b63d66a5951db6bb79905"),
"index" : 0,
"name" : "Aurelia Gonzales",
"isActive" : false,
"registered" : ISODate("2015-02-11T04:22:39.000+0000"),
"age" : 41,
"gender" : "female",
"eyeColor" : "green",
"favoriteFruit" : "banana",
"company" : {
"title" : "YURTURE",
"email" : "aureliagonzales#yurture.com",
"phone" : "+1 (940) 501-3963",
"location" : {
"country" : "USA",
"address" : "694 Hewes Street"
}
},
"tags" : [
"enim",
"id",
"velit",
"ad",
"consequat"
]
}
I want to group those by year and gender. Like In 2014 male registration 105 and female registration 131. And finally return documents like this:
{
_id:2014,
male:105,
female:131,
total:236
},
{
_id:2015,
male:136,
female:128,
total:264
}
I have tried till group by registered and gender like this:
db.persons.aggregate([
{ $group: { _id: { year: { $year: "$registered" }, gender: "$gender" }, total: { $sum: NumberInt(1) } } },
{ $sort: { "_id.year": 1,"_id.gender":1 } }
])
which is return document like this:
{
"_id" : {
"year" : 2014,
"gender" : "female"
},
"total" : 131
}
{
"_id" : {
"year" : 2014,
"gender" : "male"
},
"total" : 105
}
Please guide to figure out from this whole.
db.collection.aggregate([
{
"$group": { //Group things
"_id": "$_id.year",
"gender": {
"$addToSet": {
k: "$_id.gender",
v: "$total"
}
},
sum: { //Sum it
$sum: "$total"
}
}
},
{
"$project": {//Reshape it
g: {
"$arrayToObject": "$gender"
},
_id: 1,
sum: 1
}
},
{
"$project": { //Reshape it
_id: 1,
"g.female": 1,
"g.male": 1,
sum: 1
}
}
])
Play
Just add one more group stage to your aggregation pipeline, like this:
db.persons.aggregate([
{ $group: { _id: { year: { $year: "$registered" }, gender: "$gender" }, total: { $sum: NumberInt(1) } } },
{ $sort: { "_id.year": 1,"_id.gender":1 } },
{
$group: {
_id: "$_id.year",
male: {
$sum: {
$cond: {
if: {
$eq: [
"$_id.gender",
"male"
]
},
then: "$total",
else: 0
}
}
},
female: {
$sum: {
$cond: {
if: {
$eq: [
"$_id.gender",
"female"
]
},
then: "$total",
else: 0
}
}
},
total: {
$sum: "$total"
}
},
}
]);
Here's the working link. We are grouping by year in this last step, and calculating the counts for gender conditionally and the total is just the total of the counts irrespective of the gender.
Besides #Gibbs mentioned in the comment which proposes the solution with 2 $group stages,
You can achieve the result as below:
$group - Group by year of registered. Add gender value into genders array.
$sort - Order by _id.
$project - Decorate output documents.
3.1. male - Get the size of array from $filter the value of "male" in "genders" array.
3.2. female - Get the size of array from $filter the value of "female" in "genders" array.
3.3. total - Get the size of "genders" array.
Propose this method if you are expected to count and return the "male" and "female" gender fields.
db.collection.aggregate([
{
$group: {
_id: {
$year: "$registered"
},
genders: {
$push: "$gender"
}
}
},
{
$sort: {
"_id": 1
}
},
{
$project: {
_id: 1,
male: {
$size: {
$filter: {
input: "$genders",
cond: {
$eq: [
"$$this",
"male"
]
}
}
}
},
female: {
$size: {
$filter: {
input: "$genders",
cond: {
$eq: [
"$$this",
"female"
]
}
}
}
},
total: {
$size: "$genders"
}
}
}
])
Sample Mongo Playground

mongodb: match, group by multiple fields, project and count

So I'm learning mongodb and I got a collection of writers to train.
Here I'm trying to count works by sorting them by country and gender of the author. This is what I accoplished so far:
db.writers.aggregate([
{ "$match": { "gender": {"$ne": male}}},
{ "$group": {
"_id": {
"country_id": "$country_id",
"type": "$type"
},
}},
{ "$group": {
"_id": "$_id.country_id",
"literary_work": {
"$push": {
"type": "$_id.type",
"count": { "$sum": "$type" }
}
},
"total": { "$sum": "$type" }
}},
{ "$sort": { "country_id": 1 } },
{ "$project": {
"literary_work": { "$slice": [ "$literary_work", 3 ] },
"total": { "$sum": "$type" }
}}
])
Sadly, the output that I get is not the one I'm expecting:
"_id" : GREAT BRITAIN,
"literary_work" : [
{
"type" : "POEM",
"count" : 0
},
{
"type" : "NOVEL",
"count" : 0
},
{
"type" : "SHORT STORY",
"count" : 0
}
],
"total" : 0
Could anyone tell me where do I insert the count stage or what is my mistake?)
upd:
Data sample:
{
"_id" : ObjectId("5f115c5d5f62f9f482cd7a49"),
"author" : George Sand,
"gender" : female,
"country_id" : FRANCE,
"title": "Consuelo",
"type" : "NOVEL",
}
Expected result (NB! this is a result for both genders):
{
"_id" : FRANCE,
"count" : 59.0,
"literary_work" : [
{
"type" : "POEM",
"count" : 14.0
},
{
"type" : "NOVEL",
"count" : 34.0
},
{
"type" : "SHORT STORY",
"count" : 11.0
}
]
}
Your implementation is correct way but there are missing things:
missed count in first $group
on the base of first group count it can count whole count of literary_work
and $project is not needed from your query
Corrected things in query,
db.writers.aggregate([
{
$match: {
gender: { $ne: "male" }
}
},
{
$group: {
_id: {
country_id: "$country_id",
type: "$type"
},
// missed this
count: { $sum: 1 }
}
},
{
$group: {
_id: "$_id.country_id",
// this count will be on the base of first group count
count: { $sum: "$count" },
literary_work: {
$push: {
type: "$_id.type",
// add count in inner count
count: "$count"
}
}
}
},
// corrected from country_id to _id
{
$sort: { "_id": 1 }
}
])
Working Playground: https://mongoplayground.net/p/JWP7qdDY6cc

Use of something like $group inside $addFields

Below is one of my document from collection movies:
{
"_id" : 4,
"startYear" : 1892,
"title" : "Un bon bock",
"originalTitle" : "Un bon bock",
"rating" : 6.4,
"type" : "short",
"numVotes" : 105,
"genres" : [
"Short",
"Animation"
]
}
I would like every document to have a field called normalizedRating that is calculated as follows:
normalizedRating = (rating - min(rating)) / (max(rating) - min(rating))
So, I get document like:
{
"_id" : 4,
"startYear" : 1892,
"title" : "Un bon bock",
"originalTitle" : "Un bon bock",
"rating" : 6.4,
"type" : "short",
"numVotes" : 105,
"genres" : [
"Short",
"Animation"
],
"normalizedRating": 6.3
}
I am able to get the above result by using two different queries. I'm curious to know if it can be done using a single query.
If You wanted to do it in one query, then try either one of these two :
Query 1 :
db.collection.aggregate([
{
$group: {
_id: "",
maxRating: { $max: "$rating" },
minRating: { $min: "$rating" },
data: { $push: "$$ROOT" },
},
},
{
$unwind: "$data",
},
{
$addFields: {
"data.normalizedRating": {
$divide: [
{ $subtract: ["$data.rating", "$minRating"] },
{ $subtract: ["$maxRating", "$minRating"] },
],
},
},
},
{
$replaceRoot: { newRoot: "$data" },
},
]);
Test : MongoDB-playground
Query 2 :
db.collection.aggregate([
{
$facet: {
data: [{ $match: {} }],
ratingValues: [
{
$group: {
_id: "",
maxRating: { $max: "$rating" },
minRating: { $min: "$rating" },
},
},
],
},
},
{
$unwind: "$data",
},
{
$unwind: "$ratingValues",
},
{
$addFields: {
"data.normalizedRating": {
$divide: [
{ $subtract: ["$data.rating", "$ratingValues.minRating"] },
{ $subtract: ["$ratingValues.maxRating", "$ratingValues.minRating"] },
],
},
},
},
{
$project: { ratingValues: 0 },
},
{
$replaceRoot: { newRoot: "$data" },
},
]);
Test : MongoDB-playground
At end of the day if your dataset is medium one then these can perform good, but on huge datasets these might or might not work well - I would say to split this task into two to do some work in code or multiple calls if really needed or try to implement the same task using mapReduce if aggregation is really slow.

Compare integers stored as Strings in Mongodb

In the below collection, column "qty" holds the integer values but the datatype is string.
I want to compare the "qty" field with an integer in the aggregate and "warehouse" field with a string "A". ("qty" > 2 and "warehouse" = "A")
[Can't change the datatype in the collection to integer as huge dependency is present]
Edit : Need to retrieve all the columns and all the documents matching the criteria.
Query : getting improper results
db.runCommand(
{
aggregate: "products", pipeline: [
{
$match: {
instock: {
$elemMatch: {
warehouse: "A",
qty: { $gt: "2" }
}
}
}
},
{ $project: { _id: 0 } }],
cursor: { batchSize: 200 }
});
Result : not getting documents where item = journal though it satisfies the conditions
/* 1 */
{
"item" : "paper",
"instock" : [
{
"warehouse" : "A",
"qty" : "60"
},
{
"warehouse" : "B",
"qty" : "15"
}
]
},
/* 2 */
{
"item" : "planner",
"instock" : [
{
"warehouse" : "A",
"qty" : "22"
},
{
"warehouse" : "B",
"qty" : "5"
}
]
}
Products Collection
[
{
"item": "journal",
"instock": [
{
"warehouse": "A",
"qty": "11"
},
{
"warehouse": "C",
"qty": "15"
}
]
},
{
"item": "paper",
"instock": [
{
"warehouse": "A",
"qty": "60"
},
{
"warehouse": "B",
"qty": "15"
}
]
},
{
"item": "planner",
"instock": [
{
"warehouse": "A",
"qty": "22"
},
{
"warehouse": "B",
"qty": "5"
}
]
}
]
Getting improper results as greater than operator in this case is working lexicographically but it should work like integers. Though I tried converting that to double but I am getting no results.
Query with $convert to double : no result
db.runCommand(
{
aggregate: "products", pipeline: [
//{ $match: { "item": { $in: ["planner", "paper","journal"] } } },
{
$match: {
instock: {
$elemMatch: {
warehouse: "A",
qty: {
$gt: [
{$convert:{ input: "$qty", to: "double" }}, 5]
}
}
}
}
},
{ $project: { _id: 0 } }],
cursor: { batchSize: 200 }
});
Try this:
db.products.aggregate([
{
$unwind: "$instock"
},
{
$match: {
$expr: {
$and: [
{
$eq: [
"$instock.warehouse",
"A"
]
},
{
$gt: [
{
$toInt: "$instock.qty"
},
2
]
}
]
}
}
},
{
$group: {
_id: "$_id",
item: {
$first: "$item"
},
instock: {
$push: "$instock"
}
}
},
{
$project: {
_id: 0
}
}
])
MongoPlayground
Try this, it uses $filter to retain objects has criteria :
db.runCommand(
{
aggregate: "products", pipeline: [
{ $match: { 'instock.warehouse': 'A' } },
{
$addFields: {
instockCheck: {
$filter: {
input: '$instock', as: 'each', cond: {
$and: [{ $gt: [{ $toInt: '$$each.qty' }, 2] },
{ $eq: ['$$each.warehouse', 'A'] }]
}
}
}
}
}, { $match: { instockCheck: { $gt: [] } } }, { $project: { instockCheck: 0, _id: 0 } }],
cursor: { batchSize: 200 }
});
Test : MongoDB-Playground

count array occurrences across all documents with mongo

Im trying to pull data on a collection of documents which looks like:
[
{
name: 'john',
sex: 'male',
hobbies: ['football', 'tennis', 'swimming']
},
{
name: 'betty'
sex: 'female',
hobbies: ['football', 'tennis']
},
{
name: 'frank'
sex: 'male',
hobbies: ['football', 'tennis']
}
]
I am trying to use the aggregation framework to present the data, split by sex, counting the most common hobbies. The results should look something like.
{ _id: 'male',
total: 2,
hobbies: {
football: 2,
tennis: 2,
swimming: 1
}
},
{ _id: 'female',
total: 1,
hobbies: {
football: 1,
tennis: 1
}
}
So far I can get the total of each sex, but i'm not sure how I could possibly use unwind to get the totals of the hobbies array.
My code so far:
collection.aggregate([
{
$group: {
_id: '$sex',
total: { $sum: 1 }
}
}
])
Personally I am not a big fan of transforming "data" as the names of keys in a result. The aggregation framework principles tend to aggree as this sort of operation is not supported either.
So the personal preference is to maintain "data" as "data" and accept that the processed output is actually better and more logical to a consistent object design:
db.people.aggregate([
{ "$group": {
"_id": "$sex",
"hobbies": { "$push": "$hobbies" },
"total": { "$sum": 1 }
}},
{ "$unwind": "$hobbies" },
{ "$unwind": "$hobbies" },
{ "$group": {
"_id": {
"sex": "$_id",
"hobby": "$hobbies"
},
"total": { "$first": "$total" },
"hobbyCount": { "$sum": 1 }
}},
{ "$group": {
"_id": "$_id.sex",
"total": { "$first": "$total" },
"hobbies": {
"$push": { "name": "$_id.hobby", "count": "$hobbyCount" }
}
}}
])
Which produces a result like this:
[
{
"_id" : "female",
"total" : 1,
"hobbies" : [
{
"name" : "tennis",
"count" : 1
},
{
"name" : "football",
"count" : 1
}
]
},
{
"_id" : "male",
"total" : 2,
"hobbies" : [
{
"name" : "swimming",
"count" : 1
},
{
"name" : "tennis",
"count" : 2
},
{
"name" : "football",
"count" : 2
}
]
}
]
So the initial $group does the count per "sex" and stacks up the hobbies into an array of arrays. Then to de-normalize you $unwind twice to get singular items, $group to get the totals per hobby under each sex and finally regroup an array for each sex alone.
It's the same data, it has a consistent and organic structure that is easy to process, and MongoDB and the aggregation framework was quite happy in producing this output.
If you really must convert your data to names of keys ( and I still recommend you do not as it is not a good pattern to follow in design ), then doing such a tranformation from the final state is fairly trivial for client code processing. As a basic JavaScript example suitable for the shell:
var out = db.people.aggregate([
{ "$group": {
"_id": "$sex",
"hobbies": { "$push": "$hobbies" },
"total": { "$sum": 1 }
}},
{ "$unwind": "$hobbies" },
{ "$unwind": "$hobbies" },
{ "$group": {
"_id": {
"sex": "$_id",
"hobby": "$hobbies"
},
"total": { "$first": "$total" },
"hobbyCount": { "$sum": 1 }
}},
{ "$group": {
"_id": "$_id.sex",
"total": { "$first": "$total" },
"hobbies": {
"$push": { "name": "$_id.hobby", "count": "$hobbyCount" }
}
}}
]).toArray();
out.forEach(function(doc) {
var obj = {};
doc.hobbies.sort(function(a,b) { return a.count < b.count });
doc.hobbies.forEach(function(hobby) {
obj[hobby.name] = hobby.count;
});
doc.hobbies = obj;
printjson(doc);
});
And then you are basically processing each cursor result into the desired output form, which really isn't an aggregation function that is really required on the server anyway:
{
"_id" : "female",
"total" : 1,
"hobbies" : {
"tennis" : 1,
"football" : 1
}
}
{
"_id" : "male",
"total" : 2,
"hobbies" : {
"tennis" : 2,
"football" : 2,
"swimming" : 1
}
}
Where that should also be fairly trival to implement that sort of manipulation into stream processing of the cursor result to tranform as required, as it is basically just the same logic.
On the other hand, you can always implement all the manipulation on the server using mapReduce instead:
db.people.mapReduce(
function() {
emit(
this.sex,
{
"total": 1,
"hobbies": this.hobbies.map(function(key) {
return { "name": key, "count": 1 };
})
}
);
},
function(key,values) {
var obj = {},
reduced = {
"total": 0,
"hobbies": []
};
values.forEach(function(value) {
reduced.total += value.total;
value.hobbies.forEach(function(hobby) {
if ( !obj.hasOwnProperty(hobby.name) )
obj[hobby.name] = 0;
obj[hobby.name] += hobby.count;
});
});
reduced.hobbies = Object.keys(obj).map(function(key) {
return { "name": key, "count": obj[key] };
}).sort(function(a,b) {
return a.count < b.count;
});
return reduced;
},
{
"out": { "inline": 1 },
"finalize": function(key,value) {
var obj = {};
value.hobbies.forEach(function(hobby) {
obj[hobby.name] = hobby.count;
});
value.hobbies = obj;
return value;
}
}
)
Where mapReduce has it's own distinct style of output, but the same principles are used in accumulation and manipulation, if not likely as efficient as the aggregation framework can do:
"results" : [
{
"_id" : "female",
"value" : {
"total" : 1,
"hobbies" : {
"football" : 1,
"tennis" : 1
}
}
},
{
"_id" : "male",
"value" : {
"total" : 2,
"hobbies" : {
"football" : 2,
"tennis" : 2,
"swimming" : 1
}
}
}
]
At the end of the day, I still say that the first form of processing is the most efficient and provides to my mind the most natural and consistent working of the data output, without even attempting to convert the data points into the names of keys. It's probably best to consider following that pattern, but if you really must, then there are ways to manipulate results into a desired form in various approaches to processing.
Since mongoDB version 3.4 you can use $reduce avoid the first grouping by sex which means holding the entire collection in t2o documents. You can also avoid the need for code, by using $arrayToObject
db.collection.aggregate([
{
$group: {
_id: {sex: "$sex", hobbies: "$hobbies"},
count: {$sum: 1},
totalIds: {$addToSet: "$_id"}
}
},
{
$group: {
_id: "$_id.sex",
hobbies: {$push: {k: "$_id.hobbies", v: "$count"}},
totalIds: {$push: "$totalIds"}
}
},
{
$set: {
hobbies: {$arrayToObject: "$hobbies"},
totalIds: {
$reduce: {
input: "$totalIds",
initialValue: [],
in: {$concatArrays: ["$$value", "$$this"]}}
}
}
},
{
$set: {
count: {$size: {$setIntersection: "$totalIds"}},
totalIds: "$$REMOVE"
}
}
])
Which works if you have an ObjectId.
Playground example 3.4
Otherwise, you can start with $unwind and $group, or since mongoDB version 4.4 you can add an ObjectId with a stage:
{
$set: {
o: {
$function: {
"body": "function (x) {x._id=new ObjectId(); return x}",
"args": [{_id: 1}],
"lang": "js"
}
}
}
},
Playground example creating _id
Since mongoDB version 5.0 you can calculate the total using $setWindowFields:
db.collection.aggregate([
{
$setWindowFields: {
partitionBy: "$sex",
output: {totalCount: {$count: {}}}
}
},
{$unwind: "$hobbies"},
{
$group: {
_id: {sex: "$sex", hobbies: "$hobbies"},
count: {$sum: 1},
totalCount: {$first: "$totalCount"}
}
},
{
$group: {
_id: "$_id.sex",
hobbies: {$push: {k: "$_id.hobbies", v: "$count"}},
total: {$first: "$totalCount"}
}
},
{$set: {hobbies: {$arrayToObject: "$hobbies"}}}
])
Playground example 5.0