Change Values to Keys in MongoDB aggregation query - mongodb

I have analytics documents in MongoDB that look like this:
{
"_id" : ObjectId("id1"),
"userObjectId" : "abc",
"eventType" : "First Signup",
"date" : ISODate("2017-09-10T20:46:42.144Z")
}
{
"_id" : ObjectId("id2"),
"userObjectId" : "abc",
"eventType" : "First Launch",
"date" : ISODate("2017-09-10T20:46:31.291Z")
}
Now I have constructed a query to group the results by date and event type:
{
"collection": "Analytics",
"aggregate": [
{
"$project": {
"yearMonthDay": {
"$dateToString": {
"format": "%Y-%m-%d",
"date": "$date"
}
},
"date": 1,
"userObjectId": 1,
"_id": 1,
"eventType": 1
}
},
{
"$group": {
"_id": { "ymd": "$yearMonthDay", "event": "$eventType" },
"num_in_group_count": {
"$sum": 1
},
"date": { "$last": "$yearMonthDay" },
"event": { "$last": "$eventType" }
}
}
]
}
This does generate data by date and type like so:
{
"_id" : {
"ymd" : "2017-09-10",
"event" : "First Signup"
},
"num_in_group_count" : 2.0,
"date" : "2017-09-10",
"event" : "First Signup"
}
{
"_id" : {
"ymd" : "2017-09-10",
"event" : "First End Onboarding"
},
"num_in_group_count" : 1.0,
"date" : "2017-09-10",
"event" : "First Launch"
}
However I would like to graph this in Redash, so would really like the data structured with the events changed to keys like so:
{
"_id" : "2017-09-10",
"First Signup" : 2.0,
"First Launch" : 1.0,
"date" : "2017-09-12"
}
How can I achieve this query?

you should group by date first
Then use this query to get result kindly check for null values for First Signup & Login
db.getCollection('heya').aggregate([
{
"$group": {
"_id": "$date",
"First Login": {
"$sum": {
"$cond": {
"if": {
"$eq": [
"$eventType",
"First Launch"
]
},
"then": 1,
"else": 0
}
}
},
"First Signup": {
"$sum": {
"$cond": {
"if": {
"$eq": [
"$eventType",
"First Signup"
]
},
"then": 1,
"else": 0
}
}
},
"date": {
"$addToSet": "$date"
}
}
},
{
"$unwind": "$date"
}
]);

Related

Combine results based on condition during group by

Mongo query generated out of java code:
{
"pipeline": [{
"$match": {
"Id": "09cd9a5a-85c5-4948-808b-20a52d92381a"
}
},
{
"$group": {
"_id": "$result",
"id": {
"$first": "$result"
},
"labelKey": {
"$first": {
"$ifNull": ["$result",
"$result"]
}
},
"value": {
"$sum": 1
}
}
}]
}
Field 'result' can have values like Approved, Rejected, null and "" (empty string). What I am trying to achieve is combining the count of both null and empty together.
So that the empty string Id will have the count of both null and "", which is equal to 4
I'm sure theres a more "proper" way but this is what i could quickly come up with:
[
{
"$group" : {
"_id" : "$result",
"id" : {
"$first" : "$result"
},
"labelKey" : {
"$first" : {
"$ifNull" : [
"$result",
"$result"
]
}
},
"value" : {
"$sum" : 1.0
}
}
},
{
"$group" : {
"_id" : {
"$cond" : [{
$or: [
{"$eq": ["$_id", "Approved"]},
{"$eq": ["$_id", "Rejected"]},
]}},
"$_id",
""
]
},
"temp" : {
"$push" : {
"_id" : "$_id",
"labelKey" : "$labelKey"
}
},
"count" : {
"$sum" : "$value"
}
}
},
{
"$unwind" : "$temp"
},
{
"$project" : {
"_id" : "$temp._id",
"labelKey": "$temp.labelKey",
"count" : "$count"
}
}
],
);
Due to the fact the second group is only on 4 documents tops i don't feel too bad about doing this.
I have used $facet.
The MongoDB stage $facet lets you run several independent pipelines within the stage of a pipeline, all using the same data. This means that you can run several aggregations with the same preliminary stages, and successive stages.
var queries = [{
"$match": {
"Id": "09cd9a5a-85c5-4948-808b-20a52d92381a"
}
},{
$facet: {//
"empty": [
{
$match : {
result : { $in : ['',null]}
}
},{
"$group" : {
"_id" : null,
value : { $sum : 1}
}
}
],
"non_empty": [
{
$match : {
result : { $nin : ['',null]}
}
},{
"$group" : {
"_id" : '$result',
value : { $sum : 1}
}
}
]
}
},
{
$project: {
results: {
$concatArrays: [ "$empty", "$non_empty" ]
}
}
}];
Output :
{
"results": [{
"_id": null,
"value": 52 // count of both '' and null.
}, {
"_id": "Approved",
"value": 83
}, {
"_id": "Rejected",
"value": 3661
}]
}
Changing the group by like below solved the problem
{
"$group": {
"_id": {
"$ifNull": ["$result", ""]
},
"id": {
"$first": "$result"
},
"labelKey": {
"$first": {
"$ifNull": ["$result",
"$result"]
}
},
"value": {
"$sum": 1
}
}
}

mongo aggregation framework group by quarter/half year/year

I have a database with this schema structure :
{
"name" : "Carl",
"city" : "paris",
"time" : "1-2018",
"notes" : [
"A",
"A",
"B",
"C",
"D"
]
}
And this query using the aggregation framework :
db.getCollection('collection').aggregate(
[{
"$match": {
"$and": [{
"$or": [ {
"time": "1-2018"
}, {
"time": "2-2018"
} ]
}, {
"name": "Carl"
}, {
"city": "paris"
}]
}
}, {
"$unwind": "$notes"
}, {
"$group": {
"_id": {
"notes": "$notes",
"time": "$time"
},
"count": {
"$sum": 1
}
}
}
, {
"$group": {
"_id": "$_id.time",
"count": {
"$sum": 1
}
}
}, {
"$project": {
"_id": 0,
"time": "$_id",
"count": 1
}
}])
It working correcly and i'm getting these results these results :
{
"count" : 4.0,
"time" : "2-2018"
}
{
"count" : 4.0,
"time" : "1-2018"
}
My issue is that i'd like to keep the same match stage and i'd like to group by quarter.
Here the result i'd like to have :
{
"count" : 8.0,
"time" : "1-2018" // here quarter 1
}
Thanks

MongoDB Group By count occurences of values and output as new field

I have a 3 Collections Assignments, Status, Assignee.
Assignments Fields : [_id, status, Assignee]
Assignee and Status Fields : [_id, name].
There can be many assignments associated with various Status and Assignee collections(linked via _id field), There is no nesting or complex data.
I need a query for all assignments ids where Assignees are the row, Status are the Columns, there combined cell is the count with Total counts at the end.
To help you visualize, I am attaching below image. I am new to complex Mongo DB Aggregate framework, kindly guide me to achieve query.
Note: Data in Status and Assignee collection will be dynamic. Nothing is predetermined in the Query. So, the Rows and Columns are going to grow dynamically in future, If the query is given pagination, then it would be of great help. I cannot write a query with hard coded status names like 'pending', 'completed' etc. As data shall grow and existing data may change like 'pending task', 'completed work'.
Below is my query
db.getCollection('Assignments').aggregate([
{
"$group": {
"_id": {
"assignee": "$assignee",
"statusId": "$statusId"
},
"statusCount": { "$sum": 1 }
}
},
{
"$group": {
"_id": "$_id.assignee",
"statuses": {
"$push": {
"statusId": "$_id.statusId",
"count": "$statusCount"
},
},
"count": { "$sum": "$statusCount" }
}
},
]);
Below is the output format:
{
"_id" : "John",
"statuses" : {
"statusId" : "Pending",
"count" : 3.0
},
"count" : 3.0
}
{
"_id" : "Katrina",
"statuses" : [{
"statusId" : "Pending",
"count" : 1.0
},
{
"statusId" : "Completed",
"count" : 1.0
},
{
"statusId" : "Assigned",
"count" : 1.0
}],
"count" : 3.0
}
{
"_id" : "Collins",
"statuses" : {
"statusId" : "Pending",
"count" : 4.0
},
"count" : 4.0
}
Expected Output is:
{
"_id" : "Katrina",
"Pending" : 1.0,
"Completed" : 1.0,
"Assigned" : 1.0,
"totalCount" : 3.0
}
Any Idea on how to many various statusId for different assignee as keys and not values in single document.
You need another $group stage after $unwind to count number of status based on statusId string value:
{
"$group": {
"_id": "$_id",
"Pending" : {
"$sum": {
"$cond": [
{ "$eq": [
"$statuses.statusId",
"Pending"
]},
"$statuses.count",
0
]
}
},
"Completed" : {
"$sum": {
"$cond": [
{ "$eq": [
"$statuses.statusId",
"Completed"
]},
"$statuses.count",
0
]
}
},
"Assigned" : {
"$sum": {
"$cond": [
{ "$eq": [
"$statuses.statusId",
"Assigned"
]},
"$statuses.count",
0
]
}
},
"totalCount": { "$sum": 1 }
}
}
The final aggregate command:
db.getCollection('Assignments').aggregate([
{
"$group": {
"_id": {
"assignee": "$assignee",
"statusId": "$statusId"
},
"statusCount": { "$sum": 1 }
}
},
{
"$group": {
"_id": "$_id.assignee",
"statuses": {
"$push": {
"statusId": "$_id.statusId",
"count": "$statusCount"
},
},
"count": { "$sum": "$statusCount" }
}
},
{ "$unwind": "$statuses" },
{
"$group": {
"_id": "$_id",
"Pending" : {
"$sum": {
"$cond": [
{ "$eq": [
"$statuses.statusId",
"Pending"
]},
"$statuses.count",
0
]
}
},
"Completed" : {
"$sum": {
"$cond": [
{ "$eq": [
"$statuses.statusId",
"Completed"
]},
"$statuses.count",
0
]
}
},
"Assigned" : {
"$sum": {
"$cond": [
{ "$eq": [
"$statuses.statusId",
"Assigned"
]},
"$statuses.count",
0
]
}
},
"totalCount": { "$sum": 1 }
}
}
]);
Why not just keep statuses as an object so each status is a key/val pair. If that works you do the following
db.getCollection('Assignments').aggregate([
[
{
"$group": {
"_id": {
"assignee": "$assignee",
"statusId": "$statusId"
},
"statusCount": { "$sum": 1 }
},
},
{
"$group" : {
"_id" : "$_id.assignee",
"statuses" : {
"$push" : {
"k" : "$_id.statusId", // <- "k" as key value important for $arrayToObject Function
"v" : "$statusCount" // <- "v" as key value important for $arrayToObject Function
}
},
"count" : {
"$sum" : "$statusCount"
}
}
},
{
"$project" : {
"_id" : 1.0,
"statuses" : {
"$arrayToObject" : "$statuses"
},
"totalCount" : "$count"
}
}
],
{
"allowDiskUse" : false
}
);
This gives you:
{
"_id" : "Katrina",
"statuses": {
"Pending" : 1.0,
"Completed" : 1.0,
"Assigned" : 1.0,
},
"totalCount" : 3.0
}
A compromise having it one layer deeper but still the shape of statuses you wanted and dynamic with each new statusId added.

Rewind data of two nested array field after $unwind and $lookup and $filter on date range in $project

{
"_id" : ObjectId("590b12b6330e1567acd29e69"),
"name": "Foo",
"sales_history" : [
{
"_id" : ObjectId("593ce8e4cfaa652df543d9e3"),
"sold_at" : ISODate("2017-06-11T06:53:24.881Z"),
"sold_to" : ObjectId("593509e938792e046ba14a02"),
"sold_products" : [
{
"product_dp" : 100,
"quantity" : 1,
"product_id" : ObjectId("591068be1f4c6c79a442a788"),
"_id" : ObjectId("593ce8e4cfaa652df543d9e5")
},
{
"product_dp" : 100,
"quantity" : 1,
"product_id" : ObjectId("593a33dccfaa652df543d924"),
"_id" : ObjectId("593ce8e4cfaa652df543d9e4")
}
]
},
{
"_id" : ObjectId("5944cb7142a04740357020b9"),
"sold_at" : ISODate("2017-06-17T06:25:53.332Z"),
"sold_to" : ObjectId("5927d4a59e58ba0c61066f3b"),
"sold_products" : [
{
"product_dp" : 500,
"quantity" : 1,
"price" : 5650,
"product_id" : ObjectId("593191ed53a2741dd9bffeb5"),
"_id" : ObjectId("5944cb7142a04740357020ba")
}
]
}
]
}
I have User schema like this. I want detail of product_id reference, with a date range search criteria on sold_at date field.
My expected data like following when I searched in sold_at at: 2017-06-11
{
"_id" : ObjectId("590b12b6330e1567acd29e69"),
"name": "Foo",
"sales_history" : [
{
"_id" : ObjectId("593ce8e4cfaa652df543d9e3"),
"sold_at" : ISODate("2017-06-11T06:53:24.881Z"),
"sold_to" : ObjectId("593509e938792e046ba14a02"),
"sold_products" : [
{
"product_dp" : 100,
"quantity" : 1,
"product_id": {
_id:ObjectId("hsfgg123412yh3gy1u2g3"),
name: "Product1",
code: "FG0154"
},
}
]
}
]
}
Product detail need to be populate in product_id, sales_history array need to be filtered in date range.
You can try below aggregation query.
$filter sales history on date range followed by $unwinding sales history & sold_products.
$lookup sold_products to get the product details.
$group back sold_products & sales history
db.collection.aggregate([
{
"$project": {
"name": 1,
"sales_history": {
"$filter": {
"input": "$sales_history",
"as": "history",
"cond": {
"$and": [
{
"$gte": [
"$$history.sold_at",
ISODate("2017-06-11T00:00:00.000Z")
]
},
{
"$lt": [
"$$history.sold_at",
ISODate("2017-06-12T00:00:00.000Z")
]
}
]
}
}
}
}
},
{
"$unwind": "$sales_history"
},
{
"$unwind": "$sales_history.sold_products"
},
{
"$lookup": {
"from": lookupcollection,
"localField": "sales_history.sold_products.product_id",
"foreignField": "_id",
"as": "sales_history.sold_products.product_id"
}
},
{
"$group": {
"_id": {
"_id": "$_id",
"sales_history_id": "$sales_history._id"
},
"name": {
"$first": "$name"
},
"sold_at": {
"$first": "$sales_history.sold_at"
},
"sold_to": {
"$first": "$sales_history.sold_to"
},
"sold_products": {
"$push": "$sales_history.sold_products"
}
}
},
{
"$group": {
"_id": "$_id._id",
"name": {
"$first": "$name"
},
"sales_history": {
"$push": {
"_id": "$_id.sales_history_id",
"sold_at": "$sold_at",
"sold_to": "$sold_to",
"sold_products": "$sold_products"
}
}
}
}
]);

Group Multiple Values in Aggregation

I want to group the all field of a collection with unique total. Let's assume there is collection like this:
id country state operator
121 IN HR AIRTEL
212 IN MH AIRTEL
213 US LA AT&T
214 UK JK VODAFONE
Output should be like this:
{
"country": { "IN": 2, "US":1, "UK":1 },
"state": { "HR":1, "MH":1, "LA":1, "JK": 1 },
"operator": { "AIRTEL":2, "AT&T": 1, "VODAFONE": 1 }
}
I am trying to use mongo aggregation framework, but can't really think how to do this?
I find out some similar to your output using aggregation check below code
db.collectionName.aggregate({
"$group": {
"_id": null,
"countryOfIN": {
"$sum": {
"$cond": [{
$eq: ["$country", "IN"]
}, 1, 0]
}
},
"countryOfUK": {
"$sum": {
"$cond": [{
$eq: ["$country", "UK"]
}, 1, 0]
}
},
"countryOfUS": {
"$sum": {
"$cond": [{
$eq: ["$country", "US"]
}, 1, 0]
}
},
"stateOfHR": {
"$sum": {
"$cond": [{
$eq: ["$state", "HR"]
}, 1, 0]
}
},
"stateOfMH": {
"$sum": {
"$cond": [{
$eq: ["$state", "MH"]
}, 1, 0]
}
},
"stateOfLA": {
"$sum": {
"$cond": [{
$eq: ["$state", "LA"]
}, 1, 0]
}
},
"stateOfJK": {
"$sum": {
"$cond": [{
$eq: ["$state", "JK"]
}, 1, 0]
}
},
"operatorOfAIRTEL": {
"$sum": {
"$cond": [{
$eq: ["$operator", "AIRTEL"]
}, 1, 0]
}
},
"operatorOfAT&T": {
"$sum": {
"$cond": [{
$eq: ["$operator", "AT&T"]
}, 1, 0]
}
},
"operatorOfVODAFONE": {
"$sum": {
"$cond": [{
$eq: ["$operator", "VODAFONE"]
}, 1, 0]
}
}
}
}, {
"$group": {
"_id": null,
"country": {
"$push": {
"IN": "$countryOfIN",
"UK": "$countryOfUK",
"US": "$countryOfUS"
}
},
"STATE": {
"$push": {
"HR": "$stateOfHR",
"MH": "$stateOfMH",
"LA": "$stateOfLA",
"JK": "$stateOfJK"
}
},
"operator": {
"$push": {
"AIRTEL": "$operatorOfAIRTEL",
"AT&T": "$operatorOfAT&T",
"VODAFONE": "$operatorOfVODAFONE"
}
}
}
}, {
"$project": {
"_id": 0,
"country": 1,
"STATE": 1,
"operator": 1
}
})
using $cond created groups of matched data and pushed them in second groups to combine.
An output format like you are looking for is not really suited to the aggregation framework since you are tranforming part of your data in to "key" names. The aggregation framework does not do this but rather sticks to database "best practice" as does not transform "data" to "key" names in any way.
You can perform a mapReduce operation instead with allows more flexibilty with the manipulation, but not as good performance due to the need to use JavaScript code to perform the manipulation:
db.collection.mapReduce(
function () {
var obj = {},
doc = this;
delete doc._id;
Object.keys(doc).forEach(function(key) {
obj[key] = {};
obj[key][doc[key]] = 1;
});
emit( null, obj );
},
function (key,values) {
var result = {};
values.forEach(function(value) {
Object.keys(value).forEach(function(outerKey) {
Object.keys(value[outerKey]).forEach(function(innerKey) {
if ( !result.hasOwnProperty(outerKey) ) {
result[outerKey] = {};
}
if ( result[outerKey].hasOwnProperty(innerKey) ) {
result[outerKey][innerKey] += value[outerKey][innerKey];
} else {
result[outerKey][innerKey] = value[outerKey][innerKey];
}
});
});
});
return result;
},
{ "out": { "inline": 1 } }
)
And in the stucture that applies to all mapReduce results:
{
"results" : [
{
"_id" : null,
"value" : {
"country" : {
"IN" : 2,
"US" : 1,
"UK" : 1
},
"state" : {
"HR" : 1,
"MH" : 1,
"LA" : 1,
"JK" : 1
},
"operator" : {
"AIRTEL" : 2,
"AT&T" : 1,
"VODAFONE" : 1
}
}
}
]
}
For the aggregation framework itself, it is better suited to producing aggregation results that are more consistently structured:
db.mapex.aggregate([
{ "$project": {
"country": 1,
"state": 1,
"operator": 1,
"type": { "$literal": ["country","state","operator"] }
}},
{ "$unwind": "$type" },
{ "$group": {
"_id": {
"type": "$type",
"key": { "$cond": {
"if": { "$eq": [ "$type", "country" ] },
"then": "$country",
"else": { "$cond": {
"if": { "$eq": [ "$type", "state" ] },
"then": "$state",
"else": "$operator"
}}
}}
},
"count": { "$sum": 1 }
}}
])
Which would output:
{ "_id" : { "type" : "state", "key" : "JK" }, "count" : 1 }
{ "_id" : { "type" : "country", "key" : "UK" }, "count" : 1 }
{ "_id" : { "type" : "country", "key" : "US" }, "count" : 1 }
{ "_id" : { "type" : "operator", "key" : "AT&T" }, "count" : 1 }
{ "_id" : { "type" : "state", "key" : "LA" }, "count" : 1 }
{ "_id" : { "type" : "operator", "key" : "AIRTEL" }, "count" : 2 }
{ "_id" : { "type" : "state", "key" : "MH" }, "count" : 1 }
{ "_id" : { "type" : "state", "key" : "HR" }, "count" : 1 }
{ "_id" : { "type" : "operator", "key" : "VODAFONE" }, "count" : 1 }
{ "_id" : { "type" : "country", "key" : "IN" }, "count" : 2 }
But is fairly easy to transform in client code while iterating the results:
var result = {};
db.mapex.aggregate([
{ "$project": {
"country": 1,
"state": 1,
"operator": 1,
"type": { "$literal": ["country","state","operator"] }
}},
{ "$unwind": "$type" },
{ "$group": {
"_id": {
"type": "$type",
"key": { "$cond": {
"if": { "$eq": [ "$type", "country" ] },
"then": "$country",
"else": { "$cond": {
"if": { "$eq": [ "$type", "state" ] },
"then": "$state",
"else": "$operator"
}}
}}
},
"count": { "$sum": 1 }
}}
]).forEach(function(doc) {
if ( !result.hasOwnProperty(doc._id.type) )
result[doc._id.type] = {};
result[doc._id.type][doc._id.key] = doc.count;
})
Which gives the final structure in "result":
{
"state" : {
"JK" : 1,
"LA" : 1,
"MH" : 1,
"HR" : 1
},
"country" : {
"UK" : 1,
"US" : 1,
"IN" : 2
},
"operator" : {
"AT&T" : 1,
"AIRTEL" : 2,
"VODAFONE" : 1
}
}