How to do a pivot and conditional group in MongoDB? - mongodb

I have data like this:
[
{
"invoice_id": "GRS",
"segment": "D",
"metric": "2019",
"revenue": 100,
},
{
"invoice_id": "RET",
"segment": "D",
"metric": "2019",
"revenue": 100,
},
{
"invoice_id": "GRS",
"segment": "R",
"metric": "2020",
"revenue": 100,
},
{
"invoice_id": "RET",
"segment": "R",
"metric": "2021",
"revenue": 100,
},
{
"invoice_id": "GRS",
"segment": "R",
"metric": "2021",
"revenue": 100,
},
{
"invoice_id": "RET",
"segment": "D",
"metric": "2020",
"revenue": 100,
},
{
"invoice_id": "GRS",
"segment": "D",
"metric": "2021",
"revenue": 100,
}
]
After aggregation, I want the result to come in the following format:
[
{
"invoice_id": "GRS",
"segment": "D",
"2019": 100,
"2021": 100
},
{
"invoice_id": "RET",
"segment": "D",
"2019": 100,
"2020": 100
},
{
"invoice_id": "GRS",
"segment": "R",
"2019": 100,
"2021": 100
},
{
"invoice_id": "RET",
"segment": "R",
"2021": 100
},
]

$group by invoice_id, segment and metric and get total sum of revenue
$group by invoice_id and segment and construct array of years in key-value pair
$arrayToObject convert years array to object key-value format
$mergeObject to merge _id object and years object
$replaceRoot to replace above merge object to root
db.collection.aggregate([
{
$group: {
_id: {
invoice_id: "$invoice_id",
segment: "$segment",
metric: "$metric"
},
revenue: { $sum: "$revenue" }
}
},
{
$group: {
_id: {
invoice_id: "$_id.invoice_id",
segment: "$_id.segment"
},
years: {
$push: {
k: "$_id.metric",
v: "$revenue"
}
}
}
},
{
$replaceRoot: {
newRoot: {
$mergeObjects: [
"$_id",
{ $arrayToObject: "$years" }
]
}
}
}
])
Playground

Related

How to do mongodb inner join and grouping

// orders
[
{
"id": 1,
"orderName": "a",
"seqId": 100,
"etc": [],
"desc": [],
},
{
"id": 2,
"orderName": "b",
"seqId": 200,
"etc": [],
"desc": []
},
{
"id": 3,
"orderName": "c",
"seqId": 100,
},
]
// goods collection
[
{
"id": 1,
"title": "example1",
"items": [
{
"id": 10,
"details": [
{
"id": 100
},
{
"id": 101,
}
]
},
{
"id": 20,
"details": [
{
"id": 102,
},
{
"id": 103,
}
]
},
]
},
[
{
"id": 2,
"title": "example2",
"items": [
{
"id": 30,
"details": [
{
"id": 200
},
{
"id": 201
}
]
},
{
"id": 40,
"details": [
{
"id": 202
},
{
"id": 203
}
]
},
]
},
]
When the etc field and desc field arrays of the orders collection are empty, or the non-empty document's seqId field value and the goods collection's "goods.details.id field value are the same.
I want to express the sum operation based on the title of the product collection and the sum if it is not empty.
{example1: 1, total: 2}
{example2: 1, total: 1}
For example, "example1" and "example2" represent the sum of the cases where the etc and desc field arrays are empty (the title of the goods collection), and the total represents the total regardless of whether the array is empty or not.
If so, it should be marked aboveas:
Following our discussion here, we can remove the early filtering for the 2 empty arrays and move it to a conditional sum at the $group stage.
db.orders.aggregate([
{
"$lookup": {
"from": "goods",
"localField": "seqId",
"foreignField": "items.details.id",
"as": "goodsLookup"
}
},
{
"$unwind": "$goodsLookup"
},
{
$group: {
_id: "$goodsLookup.title",
emptySum: {
$sum: {
"$cond": {
"if": {
$and: [
{
$eq: [
"$desc",
[]
]
},
{
$eq: [
"$etc",
[]
]
}
]
},
"then": 1,
"else": 0
}
}
},
total: {
$sum: 1
}
}
}
])
Mongo Playground

How can I do an inner join of two collections in mongodb

// orders collection
[
{
"id": 1,
"orderName": "a",
"seqId": 100,
"etc": [],
"desc": [],
},
{
"id": 2,
"orderName": "b",
"seqId": 200,
"etc": [],
"desc": []
},
{
"id": 3,
"orderName": "c",
"seqId": 100,
"etc": [],
"desc": [],
},
]
// goods collection
[
{
"id": 1,
"title": "example1",
"items": [
{
"id": 10,
"details": [
{
"id": 100
},
{
"id": 101,
}
]
},
{
"id": 20,
"details": [
{
"id": 102,
},
{
"id": 103,
}
]
},
]
},
[
{
"id": 2,
"title": "example2",
"items": [
{
"id": 30,
"details": [
{
"id": 200
},
{
"id": 201
}
]
},
{
"id": 40,
"details": [
{
"id": 202
},
{
"id": 203
}
]
},
]
},
]
When the value of the seqId field of the document whose etc field and desc field arrays of the orders collection are empty and the value of the "goods.details.id field of the goods collection are the same, I want to get the following output. How can I do that?
[
{orderName: "a", title: "example1"},
{orderName: "b", title: "example2"},
{orderName: "c", title: "example1"},
]
Additionally, I would like to perform a sum operation based on the title of the goods
collection.
[
{"example1": 2},
{"example2": 1}
]
Simply perform a $lookup between orders.seqId and goods.items.details.id. Use $unwind to eliminate empty lookups(i.e. inner join behaviour). Finally, do a $group with $sum to get the count.
db.orders.aggregate([
{
"$match": {
"etc": [],
"desc": []
}
},
{
"$lookup": {
"from": "goods",
"localField": "seqId",
"foreignField": "items.details.id",
"pipeline": [
{
$project: {
_id: 0,
title: 1
}
}
],
"as": "goodsLookup"
}
},
{
"$unwind": "$goodsLookup"
},
{
$group: {
_id: "$goodsLookup.title",
cnt: {
$sum: 1
}
}
}
])
Mongo Playground

Calculate running total across for different groups by day

I'm trying to aggreate a collection of transactions into a running total of owners by day.
The initial collection looks like this:
[
{ "to": "A", "from": "0", "ts": 1 },
{ "to": "A", "from": "0", "ts": 1 },
{ "to": "B", "from": "0", "ts": 1 },
{ "to": "B", "from": "0", "ts": 2 },
{ "to": "C", "from": "0", "ts": 3 },
{ "to": "A", "from": "B", "ts": 4 }
]
What I would like to get is something like this:
[
{
"ts": 1,
"holdings": [
{ "owner": "0", "holdings": -3 },
{ "owner": "A", "holdings": 2 },
{ "owner": "B", "holdings": 1 }
]
},
{
"ts": 2,
"holdings": [
{ "owner": "0", "holdings": -4 },
{ "owner": "A", "holdings": 2 },
{ "owner": "B", "holdings": 2 }
]
},
{
"ts": 4,
"holdings": [
{ "owner": "0", "holdings": -5 },
{ "owner": "A", "holdings": 3 },
{ "owner": "B", "holdings": 1 },
{ "owner": "C", "holdings": 1 }
]
}
]
I've already understood how to generate this for a single ts that I'm setting, but I don't know how to do it across all ts.
The aggregation pipeline for a single ts looks like this:
db.collection.aggregate([
// start with: { "to": "A", "from": "0", "ts": 1 }
{
// create a doc with an array with subset of fields:
// { "_id": ObjectId("5a934e000102030405000000"),
// "data": [ { "change": 1, "owner": "A", "ts": "1" },
// { "change": -1, "owner": "0", "ts": "1" } ] }
$project: {
data: [
{
owner: '$to',
ts: '$ts',
change: 1,
},
{
owner: '$from',
ts: '$ts',
change: -1,
},
],
},
},
{
// unwind the array into 2 docs:
// { "_id": ObjectId("5a934e000102030405000000"), "data": { "change": 1, "owner": "A", "ts": "1" } },
// { "_id": ObjectId("5a934e000102030405000000"), "data": { "change": -1, "owner": "0", "ts": "1" } },
$unwind: '$data',
},
{
// use data as root:
// { "data": { "change": 1, "owner": "A", "ts": "1" } },
// { "data": { "change": -1, "owner": "0", "ts": "1" } }
$replaceRoot: {
newRoot: '$data',
},
},
{
// select day to calc totals
$match: {
ts: {
$lt: 6,
},
},
},
{
// sum totals, grouped by owner
$group: {
_id: '$owner',
//_id: null,
holdings: {
$sum: '$change',
},
},
},
])
This gives the correct result for a particular day (selected in the match stage). I don't understand how I can now generalize that to all days.
One way to do it is using $setWindowFields, which has a built-in accumulation:
db.collection.aggregate([
{
$project: {
ts: "$ts",
data: [{owner: "$to", change: 1}, {owner: "$from", change: -1}]
}
},
{$unwind: "$data"},
{
$group: {
_id: {ts: "$ts", owner: "$data.owner"},
holdings: {$sum: "$data.change"}
}
},
{
$setWindowFields: {
partitionBy: "$_id.owner",
sortBy: {"_id.ts": 1},
output: {
cumulativeHoldings: {
$sum: "$holdings",
window: {documents: ["unbounded", "current"]}
}
}
}
},
{
$group: {
_id: "$_id.ts",
holdings: {$push: {owner: "$_id.owner", holdings: "$cumulativeHoldings"}}
}
}
])
Playground

MongoDB count value of attribute and group by month and year

I have documents in my mongodb and i try to count every same description i have and classify to month they created.
I want to return an array that include array of objects that includes month number with sub array of description value with the count.
I want to be able to choose which description value to count and choose by what year he will show me the data
for example this is my documents:
[
{
"username": "ron",
"skills": [
{
"rank": "high",
list: [
{
"subject": "Football"
},
{
"subject": "Swim"
}
]
},
{
"rank": "low",
list: [
{
"subject": "Baseball"
},
]
}
],
"duration": 0,
"date": ISODate("2021-07-24T12:05:27.127Z"),
"createdAt": ISODate("2021-07-24T12:05:49.985Z"),
"updatedAt": ISODate("2021-07-24T12:05:49.985Z"),
"__v": 0
},
{
"username": "john",
"skills": [
{
"rank": "low",
list: [
{
"subject": "Football"
},
]
}
],
"duration": 0,
"date": ISODate("2021-07-25T12:05:53.000Z"),
"createdAt": ISODate("2021-07-24T12:05:59.249Z"),
"updatedAt": ISODate("2021-07-24T12:05:59.249Z"),
"__v": 0
},
{
"username": "david",
"skills": [
{
"rank": "high",
list: [
{
"subject": "Football"
},
{
"subject": "Baseball"
}
]
},
{
"rank": "low",
list: [
{
"subject": "Swim"
},
]
}
],
"duration": 0,
"date": ISODate("2021-08-26T12:06:13.000Z"),
"createdAt": ISODate("2021-07-24T12:06:21.328Z"),
"updatedAt": ISODate("2021-07-24T12:06:21.328Z"),
"__v": 0
},
{
"username": "david",
"skills": [
{
"request": "high",
list: [
{
"subject": "Swim"
},
]
},
{
"request": "low",
list: [
{
"subject": "Football"
},
{
"subject": "Baseball"
},
]
}
],
"duration": 0,
"date": ISODate("2021-01-21T13:07:50.000Z"),
"createdAt": ISODate("2021-07-24T12:08:05.552Z"),
"updatedAt": ISODate("2021-07-24T12:14:51.285Z"),
"__v": 0
},
{
"username": "david",
"skills": [
{
"rank": "high",
list: [
{
"subject": "Football"
},
]
},
],
"duration": 0,
"date": ISODate("2022-01-21T13:07:50.000Z"),
"createdAt": ISODate("2022-07-24T12:08:05.552Z"),
"updatedAt": ISODate("2022-07-24T12:14:51.285Z"),
"__v": 0
}
]
The result I expect to get if i want to get match only the description that equal to "Football" or "Baseball":
{
[ {"month_number":7,"result":[{"description":'Football',"count":2},{"description":"Baseball","count":1}]},
{"month_number":8,"result":[{"description":'Football',"count":1}]}]
}
I'm new to mongodb ... so far I have been able to count how many there are of each value and display only the values I want but I do not know how to classify it into months depending on the year I choose.
I tried this:
db.exercises.aggregate([{$match:{ $and:[{description:{$in: ["Baseball","Football"]}}]}} ,{$group:{_id:"$description",count:{$sum:1}}}])
and this
db.exercises.aggregate(
[
{
$project:
{
_id: 0,
year: { $year: "$date" },
month: { $month: "$date" },
description:"$description"
}
},
{
$match:{$and:[{description:{$in: ["Baseball","Football","Swim"]},year:2021}]}
},
{$group:{_id:"$description" , count:{$sum:1}}}
]
)
You need add 1 more grouping i.e. by month.
Working playground
db.collection.aggregate([
{
$project: {
_id: 0,
year: {
$year: "$date"
},
month: {
$month: "$date"
},
description: "$description"
}
},
{
$match: {
$and: [
{
description: {
$in: [
"Baseball",
"Football",
"Swim"
]
},
year: 2021
}
]
}
},
{
$group: {
_id: {
description: "$description",
month: "$month"
},
count: {
$sum: 1
}
}
},
{
$group: {
_id: {
month_number: "$_id.month"
},
results: {
$push: {
description: "$_id.description",
count: "$count"
}
}
}
},
{
$project: {
_id: 0,
month_number: "$_id.month_number",
results: 1
}
}
])

Multiple Group By with condition in MongoDB

I have the fields, group_id, category, account, sku and revenue. I want to do an aggregation where the revenue is grouped by the group_id, category, sku and account separately and in sku there will be two divisions, one where there is a comma and one where there isn't. How do I do this?
For example, if my data is
[
{
"group_id": "ABC",
"category": "test",
"account": "abc1",
"sku": "grre,qrvf",
"revenue": 100,
},
{
"group_id": "ABC2",
"category": "test",
"account": "abc",
"sku": "grre,qrvf",
"revenue": 100,
},
{
"group_id": "ABC3",
"category": "test2",
"account": "abc2",
"sku": "qwe",
"revenue": 100,
},
{
"group_id": "ABC",
"category": "test2",
"account": "abc",
"sku": "qwe",
"revenue": 100,
},
{
"group_id": "ABC2",
"category": "test3",
"account": "abc2",
"sku": "asd,fgh",
"revenue": 100,
},
{
"group_id": "ABC",
"category": "test3",
"account": "abc",
"sku": "asd,fgh",
"revenue": 100,
},
{
"group_id": "ABC3",
"category": "test",
"account": "abc2",
"sku": "grre,qrvf",
"revenue": 100,
}
]
then the result should be like
[
{groups: [{group_id: "ABC", "revenue": 300}, {group_id: "ABC2", "revenue": 200}, {group_id: "ABC3", "revenue": 200}]},
{categories: [{category: "test", "revenue": 300}, {category: "test2", "revenue": 200}, {category: "test3", "revenue": 200}]},
{accounts: [{account: "abc", "revenue": 300}, {account: "abc2", "revenue": 100}, {account: "abc3", "revenue": 300}]},
{skus: [{single: [{sku: "qwe", revenue: 100}, {sku: "iou", revenue: 100}]}, {multi: [{sku: "grre,qrvf", revenue: 300}, {sku: "asd,fgh", revenue: 200}]}]},
]
You can use $facet to run multiple aggregations separately in one round-trip and $indexOfBytes to split skus:
db.collection.aggregate([
{
$facet: {
group: [ { $group: { _id: "$group_id", revenue: { $sum: "$revenue" } } }, { $project: { _id: 0, group: "$_id", revenue: 1 } } ],
categories: [ { $group: { _id: "$category", revenue: { $sum: "$revenue" } } }, { $project: { _id: 0, category: "$_id", revenue: 1 } } ],
accounts: [ { $group: { _id: "$account", revenue: { $sum: "$revenue" } } }, { $project: { _id: 0, account: "$_id", revenue: 1 } } ],
skus: [
{ $group: { _id: "$sku", revenue: { $sum: "$revenue" } } },
{ $group: { _id: null, aggregates: { $push: { sku: "$_id", revenue: "$revenue" } } } },
{ $project: {
_id: 0,
single: { $filter: { input: "$aggregates", cond: { $eq: [ { $indexOfBytes: [ "$$this.sku", "," ] }, -1 ] } } },
multi: { $filter: { input: "$aggregates", cond: { $ne: [ { $indexOfBytes: [ "$$this.sku", "," ] }, -1 ] } } },
},
}
],
}
}
])
Mongo Playground