mongodb aggregation conditional adding field based on value in array - mongodb

please excuse the title. could find a better description for what iam trying to do.
I have a collection of messages which stores the following information
code: a unique identification code of the message
from: phone number the message was sent from
to: phone number the message was sent to
message: the message text
readings: an array of ObjectIds. The ids reference documents in another collection names "users". if an ObjectId is here it means, that this message has been read by that particular user.
Example Data
{
"_id" : ObjectId("59ba30c95869d32a803e4c4d"),
"code" : "SM54c9366e9b8544e89bdcf2ee841adea7",
"from" : "+49157xxxxxxx",
"to" : "+49160xxxxxxxx",
"message" : "xxxxxxxx",
"createdAt" : ISODate("2017-09-14T07:33:39.000Z"),
"lastModifiedAt" : ISODate("2017-09-14T07:33:32.324Z"),
"status" : "delivered",
"room" : ObjectId("59bfa293bd7717251cecfae7"),
"readings" : [
ObjectId("59c25751dcfdaf2944ee2fae"),
ObjectId("59c25751dcfdaf2944e32fae")
],
}
/* 2 */
{
"_id" : ObjectId("59ba3270f53b7f2fb4fa807f"),
"code" : "SM04585672d02644018e3ff466d73c571d",
"from" : "+49xxxxxxx",
"to" : "+49xxxxxxxx",
"message" : "xxxxxxx",
"createdAt" : ISODate("2017-09-14T07:40:42.000Z"),
"lastModifiedAt" : ISODate("2017-09-14T07:40:34.338Z"),
"status" : "delivered",
"room" : ObjectId("59bfa293bd7717251cecfae7"),
"readings" : [
ObjectId("59c25751dcfdaf2944ee2fae")
],
}
Want i want to achieve is that a message gets an additional field "hasRead" if a specific user has read the message.
Here is the result i want to achieve
{
"_id" : ObjectId("59ba30c95869d32a803e4c4d"),
"code" : "SM54c9366e9b8544e89bdcf2ee841adea7",
"to" : "+491606983534",
"message" : "Schau mer mal",
"createdAt" : ISODate("2017-09-14T07:33:39.000Z"),
"lastModifiedAt" : ISODate("2017-09-14T07:33:32.324Z"),
"status" : "delivered",
"room" : ObjectId("59bfa293bd7717251cecfae7"),
"hasRead" : true
}
/* 2 */
{
"_id" : ObjectId("59ba3270f53b7f2fb4fa807f"),
"code" : "SM04585672d02644018e3ff466d73c571d",
"to" : "+491606983534",
"message" : "Schau mer mal",
"createdAt" : ISODate("2017-09-14T07:40:42.000Z"),
"lastModifiedAt" : ISODate("2017-09-14T07:40:34.338Z"),
"status" : "delivered",
"room" : ObjectId("59bfa293bd7717251cecfae7"),
"hasRead" : true
}
I constructed an aggregation with the following stages but it looks so BIG for such a simple task and i wonder if there is a more elegant, lighter way to do so ?
The stages are:
$addFields: Checks if the readings array is 0. if 0 it adds a dummy ObjectId, else it sets the readings array
$unwind: Unwind the readings array
$addFields: adds a field "hasRead" upon checking if a specific ObjectId matches the "readings" field. True if equal else false
$group: Group by all fields except the "hasRead" field, "hasRead" based in $max hasRead
$project: Constructing the result to make it a flat object.
And here is my code:
db.getCollection('sms').aggregate([
{ $addFields: {
"readings": {
"$cond": {
if: { $or: [ { "$gt": [ {"$size": "$readings"},0] } ]} ,
then: "$readings",
else: [ObjectId("000000000000000000000000")]
}
}
}},
{ $unwind: "$readings" },
{ $addFields: {
"hasRead": {
$cond: {
if: {
$eq: ["$readings", ObjectId("59c25751dcfdaf2944ee2fae")]
},
then: true,
else: false
}
}
}
},
{ $group: {
_id: {
_id: "$_id",
code: "$code",
from: "$from",
to: "$to",
message: "$message",
createdAt: "$createdAt",
lastModifiedAt: "$lastModifiedAt",
room: "$room"
},
hasRead: { $max: "$hasRead" }
}},
{ $project: {
"_id": "$_id._id",
"code": "$_id.code",
"from": "$_id.from",
"to": "$_id.to",
"message": "$_id.message",
"createdAt": "$_id.createdAt",
"lastModifiedAt": "$_id.lastModifiedAt",
"room": "$_id.room",
"hasRead": "$hasRead"
}}
])
After browsing thru answers Neil (see comment) gave to another questioni could simplfiy the query to this:
db.getCollection('sms').aggregate([
{ "$addFields": {
"hasRead" : {
"$filter": {
"input": { "$setUnion": [ "$readings", []] },
"as": "o",
"cond" : {
"$eq": [ "$$o",ObjectId("59c25751dcfdaf2944ee2fae")]
}
}
}
}
},
{ "$project": {
"_id": 1,
"code": 1,
"to": 1,
"message": 1,
"createdAt": 1,
"lastModifiedAt" : 1,
"status": 1,
"room": 1,
"hasRead": {
"$cond": {
if: { $or: [ { "$gt": [ {"$size": "$readings"},0] } ]} ,
then: true,
else: false
}
}
}
}
])

Way too late for this, but you can simply write:
db.getCollection("sms").aggregate([
{
$project: {
_id: 1,
code: 1,
to: 1,
message: 1,
createdAt: 1,
lastModifiedAt: 1,
status: 1,
room: 1,
hasRead: {
$in: [ObjectId("59c25751dcfdaf2944ee2fae"), "$readings"],
},
},
},
]);
often the simplest solution is the correct one :)

Related

Counting results in aggregate selection

My MongoDB database have a structure
{
"_id" : ObjectId("5c1ccc20fc0f60769227d455"),
"type" : 0,
"id" : "hwJyzAHyfjXUlrGhblT7txWd",
"userowner" : 1.0,
"campid" : "9548",
"date" : 1545391136,
"useragent" : "mozilla/5.0 (windows nt 10.0; win64; x64; rv:65.0) gecko/20100101 firefox/65.0",
"domain" : "",
"referer" : "",
"country" : "en",
"language" : "en-US",
"languages" : [
"en-US",
"en"
],
"screenres" : [
"1920*1080"
],
"avscreenres" : [
"1080*1858"
],
"webgl" : "angle (nvidia geforce gtx 1060 6gb direct3d11 vs_5_0 ps_5_0)",
"hash" : 123,
"timezone" : -180,
"result" : true,
"resultreason" : "learning",
"remoteip" : "0.0.0.0"
}
Every a document have a vield "result" with a bool value.
I make aggregation selection:
db.getCollection('clicks').aggregate([
{ $match: {userowner: 1, date:{$gte: 0, $lte: 9545392055}} },
{ $group : {_id : "$campid",
number: {$sum: 1}}}
])
and get a Result:
/* 1 */
{
"_id" : "4587",
"number" : 2.0
}
/* 2 */
{
"_id" : "9548",
"number" : 1346.0
}
How can count the amount of value "true" and "false" in a field "result" and get a result like this:
/* 1 */
{
"_id" : "4587",
"number" : 2.0,
"passed":100,
"blocked":120
}
/* 2 */
{
"_id" : "9548",
"number" : 1346.0,
"passed":100,
"blocked":120
}
I hope this works as per your requirement.
db.getCollection('clicks').aggregate(
[
{
$match: {
userowner: 1, date: {
$gte: 0, $lte: 9545392055
}
}
},
{
$group: {
_id: "$campid", passed: {
$sum: {
$cond:
[
{ $eq: ["$result", true] },
1, 0
]
}
},
blocked: {
$sum: {
$cond:
[
{
$eq: ["$result", false]
}
, 1, 0]
}
},
number: { $sum: 1 }
}
},
{
$project: {
_id: 0,
campid: "$_id",
number: 1,
passed: 1,
blocked: 1
}
}
])
Output:-
{
"passed" : 3,
"blocked" : 2,
"number" : 5,
"campid" : "4587"
}
{
"passed" : 2,
"blocked" : 1,
"number" : 3,
"campid" : "9548"
}
Refer $group, $cond, and $eq for more info.
With MongoDb 3.6 and newer, you can leverage the use of $arrayToObject operator within a $replaceRoot pipeline to get the desired result.
You would need to group the documents intially by the campid and the result field, aggregate the sum and pass the results to yet another group pipeline stage. This group stage will prepare the documents in a way that $arrayToObject operator will give you the desired object by creating a key-value array using $push.
The result from this is then fed to the $replaceRoot pipeline to bring the passed and blocked fields to the root of the document.
The following aggregate pipeline describes the above:
db.getCollection('clicks').aggregate([
{ "$match": { "userowner": 1, "date": { "$gte": 0, "$lte": 9545392055 } } },
{ "$group": {
"_id": {
"campid": "$campid",
"result": { "$cond": [ "$result", "passed", "blocked" ] }
},
"count": { "$sum": 1 }
} },
{ "$group": {
"_id": "$_id.campid",
"number": { "$sum": "$count" },
"counts": {
"$push": {
"k": "$_id.result",
"v": "$count"
}
}
} },
{ "$replaceRoot": {
"newRoot": {
"$mergeObjects": [
{ "$arrayToObject": "$counts" },
"$$ROOT"
]
}
} },
{ "$project": { "counts": 0 } }
])

Project embedded document key value, based on condition in mongoDB aggregation

I have a mongo collection called tickets and we are storing ticket details in similar structure documents like this:
[
{
"status": "PAUSED",
"lifecycle_dates": {
"OPEN": "d1",
"CLOSED": "d2",
"PAUSED": "d3"
}
},
{
"status": "OPEN",
"lifecycle_dates": {
"OPEN": "d1",
"PAUSED": "d3"
}
},
{
"status": "CLOSED",
"lifecycle_dates": {
"OPEN": "d1",
"CLOSED": "d2"
}
}
]
I need to fetch the data which says current status of ticket and status date on.
and I want to project data like :
[
{
"status": "PAUSED",
"lifecycle_date": "d3"
},
{
"status": "OPEN",
"lifecycle_date": "d1"
},
{
"status": "CLOSED",
"lifecycle_date": "d2"
}
]
How can I project single lifecycle date based on current status in mongo aggregation pipeline?
something like this:
{
$project : {
"status" : 1,
"lifecycle_date" : $lifecycle_dates[$status]
}
}
couldn't find any reference or similar problem in mongo reference document here
current mongo version : 3.2
Updated Answer :
Since you need to fetch the date as per the status, you can use this aggregate query :
db.test.aggregate([
{
$project : {
_id : 0,
status : 1,
lifecycle_date : { $cond: [ {$eq : ["$status","OPEN"]}, "$lifecycle_dates.OPEN", { $cond: [ {$eq : ["$status","CLOSED"]}, "$lifecycle_dates.CLOSED", { $cond: [ {$eq : ["$status","PAUSED"]}, "$lifecycle_dates.PAUSED", "-1" ]} ]} ]}
}
}])
This is compatible with Mongo 3.2 as well.
Output :
{ "status" : "PAUSED", "lifecycle_date" : "d3" }
{ "status" : "OPEN", "lifecycle_date" : "d1" }
{ "status" : "CLOSED", "lifecycle_date" : "d2" }
=========================================================================
This answer was for the previous question -
Use this aggregate :
db.test.aggregate([
{
$project : {
_id : 0,
status : 1,
lifecycle_date : "$lifecycle_dates.PAUSED"
}
}
])
Output :
{ "status" : "PAUSED", "lifecycle_date" : "d3" }
You can try below aggregation
db.collection.aggregate([
{ "$project": {
"status": 1,
"lifecycle_date": {
"$arrayElemAt": [
{ "$filter": {
"input": { "$objectToArray": "$lifecycle_dates" },
"as": "life",
"cond": { "$eq": ["$$life.k", "$status"] }
}},
0
]
}
}},
{ "$project": {
"status": 1,
"lifecycle_date": "$lifecycle_date.v"
}}
])
db.tickets.aggregate(
// Pipeline
[
// Stage 1
{
$project: {
"status": 1,
_id: 0,
"lifecycle_dates": {
$switch: {
branches: [{
case: {
$eq: ["$status", "PAUSED"]
},
then: "$lifecycle_dates.PAUSED"
},
{
case: {
$eq: ["$status", "OPEN"]
},
then: "$lifecycle_dates.OPEN"
},
{
case: {
$eq: ["$status", "CLOSED"]
},
then: "$lifecycle_dates.OPEN"
}
],
}
}
}
},
])

MongoDB aggregate count based on multiple query fields - (Multiple field count)

My collection will look this,
{
"_id" : ObjectId("55c8bd1d85b83e06dc54c0eb"),
"name" : "xxx",
"salary" : 10000,
"type" : "type1"
}
{
"_id" : ObjectId("55c8bd1d85b83e06dc54c0eb"),
"name" : "aaa",
"salary" : 10000,
"type" : "type2"
}
{
"_id" : ObjectId("55c8bd1d85b83e06dc54c0eb"),
"name" : "ccc",
"salary" : 10000,
"type" : "type2"
}
My query params will be coming as,
{salary=10000, type=type2}
so based on the query I need to fetch the count of above query params
The result should be something like this,
{ category: 'type1', count: 500 } { category: 'type2', count: 200 } { category: 'name', count: 100 }
Now I am getting count by hitting three different queries and constructing the result (or) server side iteration I can get the result.
Can anyone suggest or provide me good way to get above result
Your quesstion is not very clearly presented, but what it seems you wanted to do here was count the occurances of the data in the fields, optionally filtering those fields by the values that matches the criteria.
Here the $cond operator allows you to tranform a logical condition into a value:
db.collection.aggregate([
{ "$group": {
"_id": null,
"name": { "$sum": 1 },
"salary": {
"$sum": {
"$cond": [
{ "$gte": [ "$salary", 1000 ] },
1,
0
]
}
},
"type": {
"$sum": {
"$cond": [
{ "$eq": [ "$type", "type2" ] },
1,
0
]
}
}
}}
])
All values are in the same document, and it does not really make any sense to split them up here as this is additional work in the pipeline.
{ "_id" : null, "name" : 3, "salary" : 3, "type" : 2 }
Otherwise in the long form, which is not very performant due to needing to make a copy of each document for every key looks like this:
db.collection.aggregate([
{ "$project": {
"name": 1,
"salary": 1,
"type": 1,
"category": { "$literal": ["name","salary","type"] }
}},
{ "$unwind": "$category" },
{ "$group": {
"_id": "$category",
"count": {
"$sum": {
"$cond": [
{ "$and": [
{ "$eq": [ "$category", "name"] },
{ "$ifNull": [ "$name", false ] }
]},
1,
{ "$cond": [
{ "$and": [
{ "$eq": [ "$category", "salary" ] },
{ "$gte": [ "$salary", 1000 ] }
]},
1,
{ "$cond": [
{ "$and": [
{ "$eq": [ "$category", "type" ] },
{ "$eq": [ "$type", "type2" ] }
]},
1,
0
]}
]}
]
}
}
}}
])
And it's output:
{ "_id" : "type", "count" : 2 }
{ "_id" : "salary", "count" : 3 }
{ "_id" : "name", "count" : 3 }
If your documents do not have uniform key names or otherwise cannot specify each key in your pipeline condition, then apply with mapReduce instead:
db.collection.mapReduce(
function() {
var doc = this;
delete doc._id;
Object.keys(this).forEach(function(key) {
var value = (( key == "salary") && ( doc[key] < 1000 ))
? 0
: (( key == "type" ) && ( doc[key] != "type2" ))
? 0
: 1;
emit(key,value);
});
},
function(key,values) {
return Array.sum(values);
},
{
"out": { "inline": 1 }
}
);
And it's output:
"results" : [
{
"_id" : "name",
"value" : 3
},
{
"_id" : "salary",
"value" : 3
},
{
"_id" : "type",
"value" : 2
}
]
Which is basically the same thing with a conditional count, except that you only specify the "reverse" of the conditions you want and only for the fields you want to filter conditions on. And of course this output format is simple to emit as separate documents.
The same approach applies where to test the condition is met on the fields you want conditions for and return 1 where the condition is met or 0 where it is not for the summing the count.
You can use aggregation as following query:
db.collection.aggregate({
$match: {
salary: 10000,
//add any other condition here
}
}, {
$group: {
_id: "$type",
"count": {
$sum: 1
}
}
}, {
$project: {
"category": "$_id",
"count": 1,
_id: 0
}
}

MongoDB Aggregation group and count strings

I have a problem with counting different LogStatusses from my collection. I'd like the following result from a query:
Month | ImporterName | NrOfError | NrOfDebug | NrOfInfo | NrOfWarning
So this includes grouping by Month and ImporterName and counting the number of documents with the different statusses.
My MongoDB Collection:
{
"_id" : "8ec84cb7-5099-4a9d-be00-a40200a67c5a",
"Messages" : [
{
"LogStatus" : "Error",
"Message" : "My test message"
},
{
"LogStatus" : "Error",
"Message" : "My test message"
},
{
"LogStatus" : "Error",
"Message" : "My test message"
},
{
"LogStatus" : "Error",
"Message" : "My test message"
},
{
"LogStatus" : "Error",
"Message" : "My test message"
}
],
"StartTime" : new Date("2014-12-15T10:06:09.00Z"),
"EndTime" : new Date("2014-12-15T13:06:09.00Z"),
"HasErrors" : true,
"HasWarnings" : false,
"ImporterName" : "MyImporter"
}
I already have the following query's:
db.SessionLogItems.aggregate
([
{
$project:
{
month :{$month : "$StartTime"},
name: "$ImporterName",
status: "$Messages.LogStatus",
_id: 0
}
}
])
result:
month: 12, "name" : "importername", status: ["Error", "Error", "Info"]
and
db.SessionLogItems.aggregate
([
{
$unwind: "$Messages"
},
{
$group: { _id: "$Messages", Number : {$sum : 1 }}
},
{
$sort: {Number : -1 }
}
])
result:
"_id" : { "LogStatus" : "Warning", "Message" : "My test warning" }, "Number" :5
"_id" : { "LogStatus" : "Error", "Message" : "My test message" }, "Number" : 5
But I can't seem to figure out the correct query. Any help is appreciated!
EDIT:
My example above is just one out of many documents. I have several importers which have a startTime and EndTime. The importers have several logmessages and four possible LogStatusses: "Error", "Info", "Debug", "Warning". I'd like to have an overview per month and per importer how many errors, infos, debugs and warnings they produced.
Assuming there is no overlap in your "month" between StartTime and EndTime values then you can simply use the StartTime value as the basis for a grouping key. Most of the magic for your other "fields" comes from the $cond operator which decides whether to count the value or not:
db.SessionLogItems.aggregate([
// Unwind the array to de-normalize the documents contained
{ "$unwind": "$Messages" },
// Month and Importer form the grouping key
{ "$group": {
"_id": {
"month": { "$month": "$StartTime" },
"ImporterName": "$ImporterName"
},
"NrOfError": {
"$sum": {
"$cond": [
{ "$eq": [ "$Messages.LogStatus", "Error" ] },
1,
0
]
}
},
"NrOfDebug": {
"$sum": {
"$cond": [
{ "$eq": [ "$Messages.LogStatus", "Debug" ] },
1,
0
]
}
},
"NrOfInfo": {
"$sum": {
"$cond": [
{ "$eq": [ "$Messages.LogStatus", "Info" ] },
1,
0
]
}
},
"NrOfWarning": {
"$sum": {
"$cond": [
{ "$eq": [ "$Messages.LogStatus", "Warning" ] },
1,
0
]
}
}
}}
])
So basically the "Status" value is tested and where it is matched or not then the appropriate count value is added to the appropriate field.

MongoDB aggregation on another aggreatation suggestions

I have a Json file imported into MongoDB. Every line on it is a user, and I have a field product, with the name of it. I know the value of every product, they are just few.
But this information is not stored on the Json.
I was able to do aggregation to retrieve the number of time that a user bought a product, but I would like to do a query to get directly the amount of money that each user spent.
This is my query:
db.source.aggregate([
{"$match": {
"$and":[
{"productName":{
"$in":[
"product2","product2","product3",
"product4","product5","product6"
]
}},
{ "$or": [
{"appID" : "nameOfAPP"},
{"appID": "NameOfAPP2"}
]}
]
}},
{ "$group": {
"_id": {
"id_user": "$id_user",
"productName": "$productName"
},
"count": { "$sum": 1}
}},
{ "$sort" : { "count": -1 } }
])
so the output is like that:
{ "_id" : { "id_user" : "user1", "productID" : "product2" }, "count" : 433 }
{ "_id" : { "id_user" : "user2", "productID" : "product1" }, "count" : 370 }
{ "_id" : { "id_user" : "user1", "productID" : "product3" }, "count" : 300 }
{ "_id" : { "id_user" : "user3", "productID" : "product6" }, "count" : 250 }
{ "_id" : { "id_user" : "user2", "productID" : "product5" }, "count" : 140 }
{ "_id" : { "id_user" : "user3", "productID" : "product4" }, "count" : 90 }
I know that product 1 costs 20$, product 2 costs 40$, product 3 costs 55$, product 4 costs -90$, product 5 costs 110$, product 6 costs 200$.
I would like to have an output like that:
{ "_id" : { "id_user" : "user1"}, "money_spent" : 600$ }
{ "_id" : { "id_user" : "user2"}, "money_spent" : 400$ }
etc
Can you help to get that result, I am new with MongoDB.
Thanks in advance.
If you cannot go to the original source data an are only working with an import then do this:
db.source.aggregate([
{"$match": {
"$and":[
{ "productName": {
"$in":[
"product1","product2","product3",
"product4","product5","product6"
]
}},
{ "$or": [
{"appID" : "nameOfAPP"},
{"appID": "NameOfAPP2"}
]}
]
}},
{ "$group": {
"_id": "$id_user",
"cost": {
"$sum": {
"$cond": [
{ "$eq": ["$_id.productId", "product1"] },
20,
{ "$cond": [
{ "$eq": ["$productName", "product2"] },
40,
{ "$cond": [
{ "$eq": [ "$productName", "product3"] },
55,
{ "$cond": [
{ "$eq": [ "$productName", "product4" ] },
-90,
{ "$cond": [
{ "$eq": [ "$productName", "product5" ] },
110,
200
]}
]}
]}
]}
}
}
}
}}
])
The $cond operator evaluates whether your field value matches the condition and places the appropriate value simply just $sum to get your result.
$cond provides a "ternary" operator or "if .. then .. else" that is used to evaluate the condition you provide in the first argument. You construct this to "cascade" where the condition evaluates to false in order to move on to the next condition to evaluate, otherwise return the value that matches your condition.
In this way your "known" values are applied as you aggregate for your expected total.