Is it possible to group by values across multiple columns?
Let's say I'm storing interactions between people by day, and keep track of from's and to's with a count as follows.
db.collection =
[
{ from : 'bob', to : 'mary', day : 1, count : 2 },
{ from : 'bob', to : 'steve', day : 2, count : 1 },
{ from : 'mary', to : 'bob', day : 1, count : 3 },
{ from : 'mary', to : 'steve', day : 3, count : 1 },
{ from : 'steve', to : 'bob', day : 2, count : 2 },
{ from : 'steve', to : 'mary', day : 1, count : 1 }
]
This allows me to get all interactions for, lets say, 'bob' with any one by grouping on from:, and summing count:.
Now I want to get all interaction for a user, so basically group by values across from: and to:. Essentially, sum up count: for each name, regardless whether it was in from: or to:
[UPDATE]
The desired output would be:
[
{ name : 'bob', count : 8 },
{ name : 'mary', count : 7 },
{ name : 'steve', count : 3 }
]
The easiest would be to create a new column names: and store from: and to: inside, then $unwind, but that seems wasteful.
Any hints?
Thanks
Is it possible to group by values across multiple columns?
Yes, it's possible in MongoDB to group values across different columns.
It's very straight forward to do it via MapReduce. But it's also possible to do it with aggregation framework, even if you don't store an array of participants (if you had array of names with both participants, then it's just an $unwind, and a $group - quite simple and I think more elegant than either MapReduce or the pipeline you'd have to use with the current schema).
Pipeline that works with your schema as is:
db.collection.aggregate( [
{
"$group" : {
"_id" : "$from",
"sum" : {
"$sum" : "$count"
},
"tos" : {
"$push" : {
"to" : "$to",
"count" : "$count"
}
}
}
}
{ "$unwind" : "$tos" }
{
"$project" : {
"prev" : {
"id" : "$_id",
"sum" : "$sum"
},
"tos" : 1
}
}
{
"$group" : {
"_id" : "$tos.to",
"count" : {
"$sum" : "$tos.count"
},
"prev" : {
"$addToSet" : "$prev"
}
}
}
{ "$unwind" : "$prev" }
{
"$group" : {
"_id" : "1",
"t" : {
"$addToSet" : {
"id" : "$_id",
"c" : "$count"
}
},
"f" : {
"$addToSet" : {
"id" : "$prev.id",
"c" : "$prev.sum"
}
}
}
}
{ "$unwind" : "$t" }
{ "$unwind" : "$f" }
{
"$project" : {
"name" : {
"$cond" : [
{
"$eq" : [
"$t.id",
"$f.id"
]
},
"$t.id",
"nobody"
]
},
"count" : {
"$add" : [
"$t.c",
"$f.c"
]
},
"_id" : 0
}
}
{ "$match" : { "name" : { "$ne" : "nobody" } } }
]);
On your sample input the output is:
{
"result" : [
{
"name" : "bob",
"count" : 8
},
{
"name" : "mary",
"count" : 7
},
{
"name" : "steve",
"count" : 5
}
],
"ok" : 1
}
$unwind can be expensive. Wouldn't this be easier to query?
db.collection =
[
{ name : 'bob', to : 'mary', day : 1, count : 2 },
{ name : 'mary', from : 'bob', day : 1, count : 2 },
{ name : 'bob', to : 'steve', day : 2, count : 1 },
{ name : 'bob', from : 'steve',day : 2, count : 1 },
{ name : 'mary', to : 'bob', day : 1, count : 3 },
{ name : 'mary', from : 'bob', day : 1, count : 3 },
{ name : 'mary', to : 'steve', day : 3, count : 1 },
{ name : 'mary', from : 'steve' day : 3, count : 1 },
{ name : 'steve', to : 'bob', day : 2, count : 2 },
{ name : 'steve', from : 'bob', day : 2, count : 2 },
{ name : 'steve', to : 'mary', day : 1, count : 1 }
{ name : 'steve', from : 'mary', day : 1, count : 1 }
]
[Update]
With your existing structure, here's how you can do it with Map-Reduce, but this isn't really for real-time results. It will be slower overall, but likely more efficient though than a massive $unwind operation in AF;
db.so.drop();
db.so.insert(
[
{ from: 'bob', to: 'mary', day: 1, count: 2 },
{ from: 'bob', to: 'steve', day: 2, count: 1 },
{ from: 'mary', to: 'bob', day: 1, count: 3 },
{ from: 'mary', to: 'steve', day: 3, count: 1 },
{ from: 'steve', to: 'bob', day: 2, count: 2 },
{ from: 'steve', to: 'mary', day: 1, count: 1 }
]);
db.runCommand(
{
"mapreduce": "so", // don't need the collection name here if it's above
"map": function(){
emit(this.from, {count: this.count});
emit(this.to, {count: this.count});
},
"reduce": function (name, values) {
var result = { count: 0 };
values.forEach(function (v) {
result.count += v.count;
});
return result;
},
query: {},
out: { inline: 1 },
}
);
which produces;
{
"results" : [
{
"_id" : "bob",
"value" : {
"count" : 8
}
},
{
"_id" : "mary",
"value" : {
"count" : 7
}
},
{
"_id" : "steve",
"value" : {
"count" : 5
}
}
],
"timeMillis" : 1,
"counts" : {
"input" : 6,
"emit" : 12,
"reduce" : 3,
"output" : 3
},
"ok" : 1
}
Related
The only thing I am trying to do is to get the average of Emision_C02 consumed at 10pm for all the days in location:1. The collection, db.datos_sensores2, has documents within like:
{
"_id" : ObjectId("609c2c2d420a73728827e87f"),
"timestamp" : ISODate("2020-07-01T02:15:00Z"),
"sensor_id" : 1,
"location_id" : 1,
"medidas" : [
{
"tipo_medida" : "Temperatura",
"valor" : 14.03,
"unidad" : "ºC"
},
{
"tipo_medida" : "Humedad_relativa",
"valor" : 84.32,
"unidad" : "%"
}
]
}
{
"_id" : ObjectId("609c2c2d420a73728827e880"),
"timestamp" : ISODate("2020-07-01T02:15:00Z"),
"sensor_id" : 2,
"location_id" : 1,
"medidas" : [
{
"tipo_medida" : "Emision_CO2",
"valor" : 1.67,
"unidad" : "gCO2/m2"
},
{
"tipo_medida" : "Consumo_electrico",
"valor" : 0.00155,
"unidad" : "kWh/m2"
}
]
}
I wrote this:
db.datos_sensores2.aggregate([
{$project:{timestamp:{$dateFromString:{dateString:'$timestamp'}},"_id":0, "me-didas":{$slice:["$medidas",-1]},"location_id":1}},
{$addFields:{Hora:{$hour:"$timestamp"}}},
{$match:{'Hora':{$in:[10]},'medidas.tipo_medida':"Emision_CO2", "location_id":1}},
{$group:{ _id: null, Avg_Emision_CO2:{$avg: "$medidas.valores"}}}])
But nothing happen....
pls refer to https://mongoplayground.net/p/-LqswomHWsY
I have noticed few things first of all hour comes to be 2 in above example and not 10. Second the variable/field names are not correct so i have updated it.
[{$unwind: {
path: '$medidas',
}}, {$addFields: {
Hora: {
$hour: "$timestamp"
}
} }, {$match: {
"Hora": {
$in: [2]
},
"medidas.tipo_medida": "Emision_CO2",
"location_id": 1
} }, {$group: {
_id: null,
Avg_Emision_CO2: {
$avg: "$medidas.valor"
}
}}]
Pipeline stages:
unwind: as $medidas is array we can unwind it so it will be easy to filter only "Emision_CO2",
addfield: add houre from timestamp
match: to match "medidas.tipo_medida": "Emision_CO2",
group: to get average
I am trying to filter my search results by id and version from a list of input. If the version is not provided, then get the latest for that ID. the below query works when I have both id and version. However, for id:2, I need only the latest to be returned.
db.mycollection.find({
$or: [
{
id: 1,
version: 1
},
{
id : 1,
version: 2
},
{
id : 2,
version: {$max: "$version"} // get the latest version if not provided
}
]})
I get the below error
{
"message" : "unknown operator: $max",
"ok" : 0,
"code" : 2,
"codeName" : "BadValue",
"name" : "MongoError"}
The aggregation query using facets can get documents for the two different conditions:
db.myCollection.aggregate( [
{ $facet: {
with_version: [
{ $match: { $or: [ { id: 1, version: 1 }, { id: 3, version: 2 } ] } },
],
without_version: [
{ $match: { id: 2 } },
{ $sort: { version: -1 } },
{ $limit: 1 },
],
} },
{ $project: { docs: { $concatArrays: [ "$with_version", "$without_version" ] } } },
{ $unwind: "$docs" },
{ $project: { _id: "$docs._id", id: "$docs.id", version: "$docs.version" } },
] )
Using the following documents as input:
{ "_id" : "a", "id" : 1, "version" : 1 }
{ "_id" : "b", "id" : 2, "version" : 1 }
{ "_id" : "c", "id" : 3, "version" : 2 }
{ "_id" : "d", "id" : 2, "version" : 2 }
{ "_id" : "e", "id" : 4, "version" : 2 }
{ "_id" : "f", "id" : 3, "version" : 1 }
{ "_id" : "g", "id" : 3, "version" : 0 }
{ "_id" : "h", "id" : 2, "version" : 0 }
The result shows the matching documents for { id: 1, version: 1 }, { id: 3, version: 2 }, and the maximum version number for id: 2.
{ "_id" : "a", "id" : 1, "version" : 1 }
{ "_id" : "c", "id" : 3, "version" : 2 }
{ "_id" : "d", "id" : 2, "version" : 2 }
Considering the following document in my mongo DB instance :
{
"_id": 1,
"people": [
{"id": 1, "name": "foo"},
{"id": 2, "name": "bar"},
/.../
],
"stats": [
{"peopleId": 1, "workHours": 24},
{"peopleId": 2, "workHours": 36},
/.../
}
Each element in my collection represent the work of every employee in my company, each weeks. As an important note, peopleId may change from one week to another !
I would like to get all weeks where foo worked more than 24 hours. As you can see, the format is kinda annoying since the people name and the work hours are separated in my database. A simple $and is not enough.
I wonder if, using some $ and $elemMatch I can achieve doing this query.
Can I use this to group the "people" entities with "stats" entities ?
Query to get foo worked more than 24 hours.
db.collection.aggregate([
{$unwind: { path : "$people"}},
{$unwind: { path : "$stats"}},
{$match: { "people.name" : "foo"}},
{$group: {
_id: "$_id",
peopleIdMoreThan24: { $addToSet: {
$cond : { if : { $and : [ {"$eq" : ["$people.id", "$stats.peopleId" ] },
{"$gt" : ["$stats.workHours", 24] }]} , then : "$people.id", else: "Not satisfying the condition"}}}
}
},
{$unwind: { path : "$peopleIdMoreThan24" }},
{$match: { "peopleIdMoreThan24" : {$nin : [ "Not satisfying the condition"]}}},
]);
Data in collection:-
/* 1 */
{
"_id" : 1,
"people" : [
{
"id" : 1,
"name" : "foo"
},
{
"id" : 2,
"name" : "bar"
}
],
"stats" : [
{
"peopleId" : 1,
"workHours" : 24
},
{
"peopleId" : 2,
"workHours" : 36
}
]
}
/* 2 */
{
"_id" : 2,
"people" : [
{
"id" : 1,
"name" : "foo"
},
{
"id" : 2,
"name" : "bar"
}
],
"stats" : [
{
"peopleId" : 1,
"workHours" : 25
},
{
"peopleId" : 2,
"workHours" : 36
}
]
}
/* 3 */
{
"_id" : 3,
"people" : [
{
"id" : 1,
"name" : "foo"
},
{
"id" : 2,
"name" : "bar"
}
],
"stats" : [
{
"peopleId" : 1,
"workHours" : 25
},
{
"peopleId" : 2,
"workHours" : 36
}
]
}
Output:-
The output has document id and people id of foo worked more than 24 hours.
/* 1 */
{
"_id" : 3,
"peopleIdMoreThan24" : 1
}
/* 2 */
{
"_id" : 2,
"peopleIdMoreThan24" : 1
}
I have been learning MongoDB, while doing so I tried to implement the aggregation property for my database collection. I grouped the details of the employee based on their age and by using match function, my question is it possible to display the other key-value once they pass the age criteria?
db.employee.aggregate([
{ $match: { age: { $gte: 23 } } },
{
$group: {
_id:'$age',
total: { $sum: 1 },
name: { $addToSet: '$name' }
}
}
])
and the output was like this
{ "_id" : 27, "total" : 2, "name" : [ "indhu", "logesh" ] }
{ "_id" : 26, "total" : 1, "name" : [ "keerthana" ] }
{ "_id" : 25, "total" : 1, "name" : [ "sneha" ] }
{ "_id" : 24, "total" : 1, "name" : [ "dhiva" ] }
{ "_id" : 23, "total" : 1, "name" : [ "elango" ] }
where _id denotes their age.
The structure is the following:
{
"_id" : "79f00e2f-5ff6-42e9-a341-3d50410168de",
"bookings" : [
{
"name" : "name1",
"email" : "george_bush#gov.us",
"startDate" : ISODate("2013-12-31T22:00:00Z"),
"endDate" : ISODate("2014-01-09T22:00:00Z")
},
{
"name" : "name2",
"email" : "george_bush#gov.us",
"startDate" : ISODate("2014-01-19T22:00:00Z"),
"endDate" : ISODate("2014-01-24T22:00:00Z")
}
],
"name" : "Hotel0",
"price" : 0,
"rating" : 2
}
Now, I want to generate a report telling me how many bookings were made, grouped by booking month (assume that only booking start date matters) and also grouped by hotels rating.
I expect the answer to be like that:
{
{
rating: 0,
counts: {
month1: 10,
month2: 20,
...
month12: 7
}
}
{
rating: 1,
counts: {
month1: 5,
month2: 8,
...
month12: 9
}
}
...
{
rating: 6,
counts: {
month1: 22,
month2: 23,
...
month12: 24
}
}
}
I tried this with aggregation framework but I'm a little bit stuck.
The following query:
db.book.aggregate([
{ $unwind: '$bookings' },
{ $project: { bookings: 1, rating: 1, month: { $month: '$bookings.startDate' } } },
{ $group: { _id: { rating: '$rating', month: '$month' }, count: { $sum: 1 } } }
]);
Will give you the result per rating/month, but it does not make a subdocument for months. In general, you can not convert a value (such as the month nr) to a key (such as month1)—this is something you can probably quite easily handle in your application though.
The above aggregation results in:
"result" : [
{
"_id" : {
"rating" : 2,
"month" : 1
},
"count" : 1
},
{
"_id" : {
"rating" : 2,
"month" : 12
},
"count" : 1
}
],
"ok" : 1