MongoDB aggregation: count appearances of each value of a field per id - mongodb

Data example:
{ id: 1, field: a, .. }
{ id: 1, field: a, .. }
{ id: 1, field: b, .. }
{ id: 2, field: b, .. }
Desired result:
{ id: 1, countA: 2, countB: 1 }
{ id :2, countA:0, countB: 1 }
'field' is an enum, so I know all the values in advance and can give names to the counters.
I have a solution but it seems that there is a better one. My solution:
db.collection.aggregate([
{ $group: { _id: { id: "$id", field: "$field"}, count: { $sum : 1}}},
{ $project: {
_id: 1,
countA: { $cond: { if: { $eq: ["$_id.field", "a"] }, then: "$count", else: 0 }},
countB: { $cond: { if: { $eq: ["$_id.field", "b"] }, then: "$count", else: 0 }}
}
},
{ $group:
{_id: "$_id.id", countA: { $max: "$countA"}, countB: { $max :"$countB"}}
}
])
upd: I have a better solution - placing the project before grouping in some way and no need for 2 groupings, but it uses the same principle. But it still seems that there should be somehting more built-in for this purpose
Thanks!

I believe you just need one $group stage to acheive what you want.
Just use $sum to count the number of fields with value a and B with $cond.
Try this:
db.collection.aggregate([
{
$group: {
_id: "$id",
id: {
$first: "$id"
},
countA: {
$sum: {
$cond: {
if: { $eq: [ "$field","a"] },
then: 1,
else: 0
}
}
},
countB: {
$sum: {
$cond: {
if: { $eq: [ "$field", "b"] },
then: 1,
else: 0
}
}
}
}
}
])
Have a look at this Mongo Playground for working demo of the query.
I hope this is what you are looking for!

Related

Mongodb aggregation, get expected result on groupBy without hard-coding categories

My objective is to write an efficient query, that with the given input, gives me the expected output. I have some working solution, but all "types" are "manually" written, so I guess I'm looking for help to get the same output but in a different way.
input
reportId
type
weight
A
"fish"
4
A
"fish"
2
A
"cow"
0
B
"fish"
2
B
"tuna"
1
B
"bird"
Expected output
[
{
reportId: "A",
totalCount: 3,
totalWeight: 6,
fishCount: 2,
tunaCount: 0,
cowCount: 1,
birdCount: 0
},
{
reportId: "A",
totalCount: 3,
totalWeight: 2,
fishCount: 1,
tunaCount: 1,
cowCount: 0,
birdCount: 1
},
]
Partial "hard-coded" solution
What I have been doing so far is to create 2 group-by steps: It kind of get's the job done, but in my real use-case there are a lot of types, and therefore the group-stages are very long.
[
{
$group: {
_id: { reportId: "$reportId", type: $type },
count: { $sum: 1 },
totalWeight: { $sum: "$weight" }
}
},
{
$group: {
_id: "$_id.reportId",
totalCount: { $sum: "$totalCount" },
totalWeight: { $sum: "$totalWeight" },
fishCount: {
$sum: {
$cond: {
"if": { $eq: ["$_id.type", "fish"] },
then: "$count",
else: 0
}
}
},
tunaCount: {
$sum: {
$cond: {
"if": { $eq: ["$_id.type", "tuna"] },
then: "$count",
else: 0
}
}
},
// <== And here I have a count blog for each type. Can I get the same result in a better way?
}
}
]
I will focus to the second part, which is the difficult one. I don't know whether there is a shorter and better solution, but this one should work:
db.collection.aggregate([
{
$unset: "_id"
},
{
$set: {
data: {
"$objectToArray": "$$ROOT"
}
}
},
{
$group: {
_id: "$reportId",
data: {
$push: "$data"
}
}
},
{
$set: {
data: {
$reduce: {
input: "$data",
initialValue: [],
in: {
$concatArrays: [
"$$value",
"$$this"
]
}
}
}
}
},
{
$set: {
data: {
$filter: {
input: "$data",
cond: {
$not: {
$in: [
"$$this.k",
[
"totalCount",
"totalWeight"
]
]
}
}
}
}
}
},
{
$unwind: "$data"
},
{
$group: {
_id: "$_id",
data: {
$push: "$data"
}
}
},
{
$replaceRoot: {
newRoot: {
$arrayToObject: "$data"
}
}
}
])
See Mongo playground

MongoDB to return formatted object when no results can be found

I have the following stage in my MongoDB aggregation pipeline that returns the qty and sum of sales, which works fine:
{
$lookup: {
from: 'sales',
let: { part: '$_id' },
pipeline: [
{ $match: { $and: [{ $expr: { $eq: ['$partner', '$$part'] } }] } },
{ $group: { _id: null, qty: { $sum: 1 }, soldFor: { $sum: '$soldFor' } } },
{ $project: { _id: 0, qty: 1, soldFor: 1 } }],
as: 'sales'}},
{ $unwind: { path: '$sales', preserveNullAndEmptyArrays: true } },
{ $project: { _id: 1, sales: 1 }
}
However, if there are no sales, then the $project projection returns an empty sales object, but what I'd really like is it to return a completed object, but with 0 - like this:
{
sales: {
qty: 0,
soldFor: 0
}
}
You can use $cond operator here
{
"$project": {
"_id": 1,
"sales": {
"$cond": [
{ "$eq": [{ "$size": "$sales" }, 0] },
{
"sales": {
"qty": 0,
"soldFor": 0
}
},
"$sales"
]
}
}
}

MongoDB count occurances with group and unwind

I have a MongoDB database with the following document structure:
{
"name": "ServiceA",
"areas": ["X", "Y", "Z"],
"tags": [
{
"name": "Financial",
"type": "A"
},
{
"name": "Consumer",
"type": "B"
}
]
}
There's many entries each with the same structure. Containing the same areas.
There's many predefined tag names, sorted into a few types.
The aim is to group by area and then count the number of occurrences of each tag. So an output like this:
{
"area": "X",
"count": 100, // Total entries with X as an area
"tagNameCount": {
"Financial": 20,
"Consumer": 10,
...
},
"tagTypeCount": {
"A": 70,,
"B: 40
}
}
I've been starting of using $unwind on areas, but it's the next steps from there I'm stuck on. I get that I need to use $group, but I can't work out how to count occurrences.
You may use $facet operator which allows perform several aggregation in one.
Walkthrough
1. We $unwind by area and tags
2. With $facet, we perform 3 parallel aggregations:
2.1 We count unique areas
2.2 We count unique tag names for each area
2.3 We count unique tag type for each area
3. We join 2 parallel arrays by flatten areas
4. We assemble desired output
db.collection.aggregate([
{
$unwind: "$areas"
},
{
$unwind: "$tags"
},
{
$facet: {
areas: [
{
$group: {
_id: "$areas",
count: {
$addToSet: "$_id"
}
}
},
{
$project: {
_id: 0,
area: "$_id",
count: {
$size: "$count"
}
}
}
],
tagNameCount: [
{
$group: {
_id: {
name: "$tags.name",
areas: "$areas"
},
count: {
$addToSet: "$_id"
}
}
},
{
$group: {
_id: "$_id.areas",
tagNameCount: {
$push: {
k: "$_id.name",
v: {
$size: "$count"
}
}
}
}
},
{
$addFields: {
tagNameCount: {
$arrayToObject: "$tagNameCount"
}
}
}
],
tagTypeCount: [
{
$group: {
_id: {
type: "$tags.type",
areas: "$areas"
},
count: {
$addToSet: "$_id"
}
}
},
{
$group: {
_id: "$_id.areas",
tagTypeCount: {
$push: {
k: "$_id.type",
v: {
$size: "$count"
}
}
}
}
},
{
$addFields: {
tagTypeCount: {
$arrayToObject: "$tagTypeCount"
}
}
}
]
}
},
{
$unwind: "$areas"
},
{
$addFields: {
"tagNameCount": {
$filter: {
input: "$tagNameCount",
cond: {
$eq: [
"$areas.area",
"$$this._id"
]
}
}
},
"tagTypeCount": {
$filter: {
input: "$tagTypeCount",
cond: {
$eq: [
"$areas.area",
"$$this._id"
]
}
}
}
}
},
{
$project: {
area: "$areas.area",
count: "$areas.count",
tagNameCount: {
$arrayElemAt: [
"$tagNameCount.tagNameCount",
0
]
},
tagTypeCount: {
$arrayElemAt: [
"$tagTypeCount.tagTypeCount",
0
]
}
}
},
{
$sort: {
area: 1
}
}
])
MongoPlayground
Here's one method:
unwind both areas and tags
for each area collect the applicable tags, and the unique names and types
count the names to get the total number of tags
for each unique name, count the matching values in the tags
do the same for each unique type
project out the unique fields
db.collection.aggregate([
{$unwind: "$areas"},
{$unwind: "$tags"},
{$group: {
_id: "$areas",
names: {$push: "$tags.name"},
uniqueNames: {$addToSet: "$tags.name"},
types: {$push: "$tags.type"},
uniqueTypes: {$addToSet: "$tags.type"}
}},
{$addFields: {
count: {$size: "$names"},
names: {
$arrayToObject: {
$map: {
input: "$uniqueNames",
as: "needle",
in: {
k: "$$needle",
v: {
$size: {
$filter: {
input: "$names",
cond: {$eq: ["$$this","$$needle"]}
}}}}}}},
types: {
$arrayToObject: {
$map: {
input: "$uniqueTypes",
as: "needle",
in: {
k: "$$needle",
v: {$size: {
$filter: {
input: "$types",
cond: { $eq: [ "$$this","$$needle"]}
}}}}}}}}},
{
$project: {
uniqueNames: 0,
uniqueTypes: 0
}}
])
Playground

MongoDB Aggregation based on userID and time period

I would like to achieve something like
{ _id: "A", count: 2 }
{ _id: "B", count: 1 }
from
{ userId: "A", timeStamp: "12:30PM" } <- start of 5 min interval A: 1
{ userId: "B", timeStamp: "12:30PM" } <- start of 5 min interval B: 1
{ userId: "B", timeStamp: "12:31PM" } <- ignored
{ userId: "A", timeStamp: "12:32PM" } <- ignored
{ userId: "B", timeStamp: "12:33PM" } <- ignored
{ userId: "A", timeStamp: "12:37PM" } <- start of next 5 min A : 2
where it groups based on userId and then after userId is group, the count is triggered every 5 mins.
For example: Within any 5 min period, starting at say midnight, an unlimited number of collections can have a timeStamp from 00:00 to 00:05 but would only be counted as 1 hit.
Hopefully I am explaining this clearly.
I'm able to group by userId and get the count in general but setting a condition of the count seems to be tricky.
You can try $bucket and $addToSet - the drawback is that you have to specify all the ranges manually:
db.col.aggregate([
{
$bucket: {
groupBy: "$timeStamp",
boundaries: [ "12:30PM", "12:35PM", "12:40PM", "12:45PM", "12:50PM", "12:55PM", "13:00PM" ],
output: {
"users" : { $addToSet: "$userId" }
}
}
},
{
$unwind: "$users"
},
{
$group: { _id: "$users", count: { $sum: 1 } }
}
])
Micki's solution is better if you have mongo 3.6.
If you have mongo 3.4 you can use $switch.
Obviously you would need to add all the cases in the day.
db.getCollection('user_timestamps').aggregate(
{
$group: {
_id: '$userId',
timeStamp: {$push: '$timeStamp'}
}
},
{
$project: {
timeStamps: {
$map: {
input: '$timeStamp',
as: 'timeStamp',
in: {
$switch: {
branches: [
{
case: {
$and: [
{$gte: ['$$timeStamp', '12:30PM']},
{$lt: ['$$timeStamp', '12:35PM']}
]
},
then: 1
},
{
case: {
$and: [
{$gte: ['$$timeStamp', '12:35PM']},
{$lt: ['$$timeStamp', '12:40PM']}
]
},
then: 2
}
],
default: 0
}
}
}
}
}
},
{
$unwind: '$timeStamps'
},
{
$group: {
_id: '$_id',
count: {
$addToSet: '$timeStamps'
}
}
},
{
$project: {
_id: true,
count: {$size: '$count'}
}
}
)
If you don't have mongo 3.4 you can replace the $switch with
cond: [
{
$and: [
{$gte: ['$$timeStamp', '12:30PM']},
{$lt: ['$$timeStamp', '12:35PM']}
]
},
1,
{
cond: [
{
$and: [
{$gte: ['$$timeStamp', '12:35PM']},
{$lt: ['$$timeStamp', '12:40PM']}
]
},
2,
0
]
}
]

Grouping and counting across documents?

I have a collection with documents similar to the following format:
{
departure:{name: "abe"},
arrival:{name: "tom"}
},
{
departure:{name: "bob"},
arrival:{name: "abe"}
}
And to get output like so:
{
name: "abe",
departureCount: 1,
arrivalCount: 1
},
{
name: "bob",
departureCount: 1,
arrivalCount: 0
},
{
name: "tom",
departureCount: 0,
arrivalCount: 1
}
I'm able to get the counts individually by doing a query for the specific data like so:
db.sched.aggregate([
{
"$group":{
_id: "$departure.name",
departureCount: {$sum: 1}
}
}
])
But I haven't figured out how to merge the arrival and departure name into one document along with counts for both. Any suggestions on how to accomplish this?
You should use a $map to split your doc into 2, then $unwind and $group..
[
{
$project: {
dep: '$departure.name',
arr: '$arrival.name'
}
},
{
$project: {
f: {
$map: {
input: {
$literal: ['dep', 'arr']
},
as: 'el',
in : {
type: '$$el',
name: {
$cond: [{
$eq: ['$$el', 'dep']
}, '$dep', '$arr']
}
}
}
}
}
},
{
$unwind: '$f'
}, {
$group: {
_id: {
'name': '$f.name'
},
departureCount: {
$sum: {
$cond: [{
$eq: ['$f.type', 'dep']
}, 1, 0]
}
},
arrivalCount: {
$sum: {
$cond: [{
$eq: ['$f.type', 'arr']
}, 1, 0]
}
}
}
}, {
$project: {
_id: 0,
name: '$_id.name',
departureCount: 1,
arrivalCount: 1
}
}
]