Use of something like $group inside $addFields - mongodb

Below is one of my document from collection movies:
{
"_id" : 4,
"startYear" : 1892,
"title" : "Un bon bock",
"originalTitle" : "Un bon bock",
"rating" : 6.4,
"type" : "short",
"numVotes" : 105,
"genres" : [
"Short",
"Animation"
]
}
I would like every document to have a field called normalizedRating that is calculated as follows:
normalizedRating = (rating - min(rating)) / (max(rating) - min(rating))
So, I get document like:
{
"_id" : 4,
"startYear" : 1892,
"title" : "Un bon bock",
"originalTitle" : "Un bon bock",
"rating" : 6.4,
"type" : "short",
"numVotes" : 105,
"genres" : [
"Short",
"Animation"
],
"normalizedRating": 6.3
}
I am able to get the above result by using two different queries. I'm curious to know if it can be done using a single query.

If You wanted to do it in one query, then try either one of these two :
Query 1 :
db.collection.aggregate([
{
$group: {
_id: "",
maxRating: { $max: "$rating" },
minRating: { $min: "$rating" },
data: { $push: "$$ROOT" },
},
},
{
$unwind: "$data",
},
{
$addFields: {
"data.normalizedRating": {
$divide: [
{ $subtract: ["$data.rating", "$minRating"] },
{ $subtract: ["$maxRating", "$minRating"] },
],
},
},
},
{
$replaceRoot: { newRoot: "$data" },
},
]);
Test : MongoDB-playground
Query 2 :
db.collection.aggregate([
{
$facet: {
data: [{ $match: {} }],
ratingValues: [
{
$group: {
_id: "",
maxRating: { $max: "$rating" },
minRating: { $min: "$rating" },
},
},
],
},
},
{
$unwind: "$data",
},
{
$unwind: "$ratingValues",
},
{
$addFields: {
"data.normalizedRating": {
$divide: [
{ $subtract: ["$data.rating", "$ratingValues.minRating"] },
{ $subtract: ["$ratingValues.maxRating", "$ratingValues.minRating"] },
],
},
},
},
{
$project: { ratingValues: 0 },
},
{
$replaceRoot: { newRoot: "$data" },
},
]);
Test : MongoDB-playground
At end of the day if your dataset is medium one then these can perform good, but on huge datasets these might or might not work well - I would say to split this task into two to do some work in code or multiple calls if really needed or try to implement the same task using mapReduce if aggregation is really slow.

Related

How to regroup (or merge) objects in aggregation pipeline in MongoDB?

I currently have an aggregation pipeline:
db.getCollection('forms').aggregate([
{ $unwind: //unwind },
{
$match: {
//some matches
}
},
{
$project: {
//some projections
}
},
{
//Finally, im grouping the results
$group: {
_id: {
year: { $year: '$createdAt' },
month: { $month: '$createdAt' },
raceEthnicity: '$demographic.raceEthnicity'
},
count: { $sum: 1 },
}
]
My current results are similar to:
[{
"_id" : {
"year" : 2020,
"month" : 11,
"raceEthnicity" : "Asian"
},
"count" : 1.0
},
{
"_id" : {
"year" : 2020,
"month" : 11,
"raceEthnicity" : "Multiracial"
},
"count" : 3.0
},
{
"_id" : {
"year" : 2020,
"month" : 11,
"raceEthnicity" : "White"
},
"count" : 3.0
},
{
"_id" : {
"year" : 2020,
"month" : 10,
"raceEthnicity" : "White"
},
"count" : 33.0
}]
Is there a way to add a new stage on the pipeline to "merge" results of the same year/month into a single object?
I want to achieve something like:
{
"_id" : {
"year" : 2020,
"month" : 11,
},
"Asian" : 1.0,
"Multiracial": 3.0,
"White": 1.0
},
{
"_id" : {
"year" : 2020,
"month" : 10,
},
"White": 33
}
Is it possible? How can I do that?
Add this one to your aggregation pipeline.
db.collection.aggregate([
{ $set: { "data": { k: "$_id.raceEthnicity", v: "$count" } } },
{ $group: { _id: { year: "$_id.year", month: "$_id.month" }, data: { $push: "$data" } } },
{ $set: { "data": { $arrayToObject: "$data" } } },
{ $replaceRoot: { newRoot: { $mergeObjects: ["$$ROOT", "$data"] } } },
{ $unset: "data" }
])
Unlike the solution from #wak786 you don't need to know all ethnicity at design time. It works for arbitrary ethnicity.
Add these stages to your pipeline.
db.collection.aggregate([
{
$replaceRoot: {
newRoot: {
"$mergeObjects": [
"$$ROOT",
{
$arrayToObject: [
[
{
k: "$_id.raceEthnicity",
v: "$count"
}
]
]
}
]
}
}
},
{
"$group": {
"_id": {
year: "$_id.year",
month: "$_id.month",
},
"Asian": {
"$sum": "$Asian"
},
"Multiracial": {
"$sum": "$Multiracial"
},
"White": {
"$sum": "$White"
}
}
}
])
Below is the mongo playground link. I have taken the current result of your pipeline as input to my query.
Try it here

Combine results based on condition during group by

Mongo query generated out of java code:
{
"pipeline": [{
"$match": {
"Id": "09cd9a5a-85c5-4948-808b-20a52d92381a"
}
},
{
"$group": {
"_id": "$result",
"id": {
"$first": "$result"
},
"labelKey": {
"$first": {
"$ifNull": ["$result",
"$result"]
}
},
"value": {
"$sum": 1
}
}
}]
}
Field 'result' can have values like Approved, Rejected, null and "" (empty string). What I am trying to achieve is combining the count of both null and empty together.
So that the empty string Id will have the count of both null and "", which is equal to 4
I'm sure theres a more "proper" way but this is what i could quickly come up with:
[
{
"$group" : {
"_id" : "$result",
"id" : {
"$first" : "$result"
},
"labelKey" : {
"$first" : {
"$ifNull" : [
"$result",
"$result"
]
}
},
"value" : {
"$sum" : 1.0
}
}
},
{
"$group" : {
"_id" : {
"$cond" : [{
$or: [
{"$eq": ["$_id", "Approved"]},
{"$eq": ["$_id", "Rejected"]},
]}},
"$_id",
""
]
},
"temp" : {
"$push" : {
"_id" : "$_id",
"labelKey" : "$labelKey"
}
},
"count" : {
"$sum" : "$value"
}
}
},
{
"$unwind" : "$temp"
},
{
"$project" : {
"_id" : "$temp._id",
"labelKey": "$temp.labelKey",
"count" : "$count"
}
}
],
);
Due to the fact the second group is only on 4 documents tops i don't feel too bad about doing this.
I have used $facet.
The MongoDB stage $facet lets you run several independent pipelines within the stage of a pipeline, all using the same data. This means that you can run several aggregations with the same preliminary stages, and successive stages.
var queries = [{
"$match": {
"Id": "09cd9a5a-85c5-4948-808b-20a52d92381a"
}
},{
$facet: {//
"empty": [
{
$match : {
result : { $in : ['',null]}
}
},{
"$group" : {
"_id" : null,
value : { $sum : 1}
}
}
],
"non_empty": [
{
$match : {
result : { $nin : ['',null]}
}
},{
"$group" : {
"_id" : '$result',
value : { $sum : 1}
}
}
]
}
},
{
$project: {
results: {
$concatArrays: [ "$empty", "$non_empty" ]
}
}
}];
Output :
{
"results": [{
"_id": null,
"value": 52 // count of both '' and null.
}, {
"_id": "Approved",
"value": 83
}, {
"_id": "Rejected",
"value": 3661
}]
}
Changing the group by like below solved the problem
{
"$group": {
"_id": {
"$ifNull": ["$result", ""]
},
"id": {
"$first": "$result"
},
"labelKey": {
"$first": {
"$ifNull": ["$result",
"$result"]
}
},
"value": {
"$sum": 1
}
}
}

Aggregation error: $arrayElemAt's first argument must be an array, but is object

I'm trying to aggregate a collection in mongo using the following pipeline:
const results = await Price.aggregate([
{ $match: { date: today } },
{ $unwind: '$points' },
{ $match: { 'points.time': { $gte: start, $lte: now } } },
{ $sort: { 'points.time': 1 } },
{ $project: {
'high': { $max: '$points.price' },
'low': { $min: '$points.price' },
'open': { $arrayElemAt: ['$points', 0] },
'close': { $arrayElemAt: ['$points', -1] }
} }
])
However the $arrayElemAt operator isn't working preseumably because one of the preceding stages ($unwind I believe) converts the array of points I have in my documents to an object. How can I fix this?
Example document:
{
"_id" : ObjectId("5c93ac3ab89045027259a23f"),
"date" : ISODate("2019-03-21T00:00:00Z"),
"symbol" : "CC6P",
"points" : [
{
"_id" : ObjectId("5c93ac3ab89045027259a244"),
"volume" : 553,
"time" : ISODate("2019-03-21T09:35:34.239Z"),
"price" : 71
},
{
"_id" : ObjectId("5c93ac3ab89045027259a243"),
"volume" : 1736,
"time" : ISODate("2019-03-21T09:57:34.239Z"),
"price" : 49
},
....
],
My expected result is an array of objects where the points that should be passed to the project stage should be points in the specified range in the second $match. I tried combining the two $match stages and removing the $unwind stage and the error is gone however the time range isn't being applied
I believe you are missing a $group stage to rollback your points array
const results = await Price.aggregate([
{ "$match": { "date": today } },
{ "$unwind": "$points" },
{ "$match": { "points.time": { "$gte": start, "$lte": now } } },
{ "$sort": { "points.time": 1 } },
{ "$group": {
"_id": "$_id",
"points": { "$push": "$points" },
"date": { "$first": "$date" },
"symbol": { "$first": "$symbol" }
}},
{ "$project": {
"high": { "$max": "$points.price" },
"low": { "$min": "$points.price" },
"open": { "$arrayElemAt": ["$points", 0] },
"close": { "$arrayElemAt": ["$points", -1] }
}}
])

Aggregate Fields together

I have the following structure as an input from which data needs to be aggregated:
I need to aggregate the data such that I end up with the following structure:
start: A {
tripdetails: [{
destination: B [{
duration: 10,
type: male
},
duration: 12,
type: female
},
duration: 9,
type: female
}]
]}
}
Basically I need to group "type" and "duration" together under the same destination.
I came up with the following query, but this results in a a single field for "type" for each "destination", but not for every "duration".
db.test.aggregate(
{
$group: {
_id: {"StationID": "$start", "EndStationID": "$destination"},
durations: {$addToSet: "$duration" },
usertypes: {$addToSet: "$type" }
}
},
{
$group: {
_id: "$_id.StationID",
Tripcount_out: {$sum: "durations"},
Trips_out: { $addToSet: { EndStationID: "$_id.EndStationID", Tripduration: "$durations", Usertype: "$usertypes"} }
}
}
)
My question is how I can achieve the structure described above.
You could try running the following aggregate pipeline:
db.test.aggregate([
{
"$group": {
"_id": { "StationID": "$start", "EndStationID": "$destination" },
"details": {
"$push": {
"duration": "$duration",
"type": "$type"
}
}
}
},
{
"$group": {
"_id": "$_id.StationID",
"tripdetails": {
"$push": {
"destination": "$_id.EndStationID",
"trips": "$details"
}
}
}
}
])
which yields:
{
"_id" : "A",
"tripdetails" : [
{
"destination" : "B",
"trips" : [
{
"duration" : 10,
"type" : "male"
},
{
"duration" : 9,
"type" : "female"
},
{
"duration" : 12,
"type" : "female"
}
]
}
]
}

Aggregate query in MongoDB

I'm trying to create some daily stats from a MongoDB table. The document contains messages that have a create-date, state (Warn, Error, Complete). I'd like to product a query that results in one record per - Date,Count of Warn, Count of Error, Count of Complete. I'm a newbie with Mongo and just learning the query language. I've tried aggregation with mixed results:
db.TransactionLogs.aggregate(
{ $group : {
_id : {
category: {$substr:["$startDate",0,10]},
term: "$Status",
},
total: { $sum : 2 }
}
})
results in multiple records per date by status:
"result" : [
{
"_id" : {
"category" : "2015-02-10",
"term" : "Completed",
},
"total" : 532
},
{
"_id" : {
"category" : "2015-02-10",
"term" : "Error",
},
"total" : 616
},
Message:
{ "_id" : "2ceda481-3dd3-480d-800d-95288edce6f2", "MID" : "02de5194-7a1d-4854-922c-934902840136", "Status" : "Completed", "firstName" : "Willy", "lastName" : "Wire", "allocation" : "100", "initEvent" : "Marriage", "system" : "Oracle", "startDate" : "2015-02-06T19:03:34.237Z", "stopDate" : "2015-02-06T19:23:34.237Z", "plan" : "445-A" }
I'm sure that its a lack of understanding of aggregation on my part. Any help or direction is greatly appreciated!
I figured it out. I needed to look at how to "pivot" in Mongo. This works:
db.TransactionLogs.aggregate([ { $project: { startdate: {$substr:["$startDate",0,10]},
cnt_e1: { $cond: [ { $eq: [ "$Status", "Error" ] }, "$count", 1 ] },
cnt_e2: { $cond: [ { $eq: [ "$Status", "Warning" ] }, "$count", 1 ] },
cnt_e3: { $cond: [ { $eq: [ "$Status", "Completed" ] }, "$count", 1 ] },
} },
{ $group: { _id: "$startdate", cnt_e1: { $sum: "$cnt_e1" }, cnt_e2: { $sum: "$cnt_e2" }, cnt_e3: { $sum: "$cnt_e3" } } },
{ $sort: { _id: 1 } },
Here's the code...
db.TransactionLogs.aggregate([ { $project: { startdate: {$substr:["$startDate",0,10]},
cnt_e1: { $cond: [ { $eq: [ "$Status", "Error" ] }, "$count", 1 ] },
cnt_e2: { $cond: [ { $eq: [ "$Status", "Warning" ] }, "$count", 1 ] },
cnt_e3: { $cond: [ { $eq: [ "$Status", "Completed" ] }, "$count", 1 ] },
} },
{ $group: { _id: "$startdate", cnt_e1: { $sum: "$cnt_e1" }, cnt_e2: { $sum: "$cnt_e2" }, cnt_e3: { $sum: "$cnt_e3" } } },
{ $sort: { _id: 1 } },