MongoDB select distinct and sort - mongodb

Consider i have data like this:
| id | property | score | remark |
-------------------------------------------
| aaaa | alpha | 1 | alpha lowest |
| bbbb | beta | 2 | beta highest |
| cccc | alpha | 2 | alpha highest |
Given data above, i want to select distinct by field property and sort it by score highest value, so the expected results will be
| id | property | score | remark |
-------------------------------------------
| bbbb | beta | 2 | beta highest |
| cccc | alpha | 2 | alpha highest |
How to do this with mongodb?

You can easily do this by running the following aggregation pipeline operation:
db.collection.aggregate([
{ "$sort": { "score": -1 } },
{
"$group": {
"_id": "$property",
"id": { "$first": "$_id" },
"score": { "$first": "$score" },
"remark": { "$first": "$remark" }
}
},
{
"$project": {
"_id": 0,
"property": "$_id",
"score": 1,
"remark": 1,
"id": 1
}
}
])
Sample Output
{ "id" : "bbbb", "score" : 2, "remark" : "beta highest ", "property" : "beta" }
{ "id" : "cccc", "score" : 2, "remark" : "alpha highest ", "property" : "alpha" }

MongoDB has a distinct operator. However, it only returns one "column". You'd need to do multiple queries.
The best option is to use MongoDB Aggregation Framework. It works using a pipeline of operations.
I don't know the data, but it could be someting like:
db.collection.aggregate([
{ '$group' : { _id : '$property' , score: { '$max': '$score' } } }
])
You can use Robomongo client to experiment with this method.

Related

How do we calculate a monthly average value in MongoDB?

I have a list of documents in Mongo, each containing the value of a metric for a service for that day. The scheme is as follows:
_id: ObjectId("..."),
name: '<some_metric_name>',
service: '<service_name>',
timestamp_unix: Long("1635033600000"),
timestamp: '2021-10-24 03:00:00 +0300 EEST',
value: 99.9810785241248
},
I would like to have a dashboard in Redash where I can see the calculated average of these values per month, grouped by service.
e.g. over a span of 4 months it should look like this:
| service | date | avg
| my_svc | 01-01-2022 to 31-01-2022 | 99.500
| my_svc | 01-02-2022 to 28-02-2022 | 99.100
| my_svc | 01-03-2022 to 31-03-2022 | 99.400
| my_svc | 01-04-2022 to 30-04-2022 | 99.900
| my_svc_total | 01-01-2022 to 30-04-2022 | 99.475
| my_svc_2 | 01-01-2022 to 31-01-2022 | 99.150
.
.
.
So I need a query that is going to aggregate the documents by month, calculate the average and group them by month and service. So far I have this that can calculate the overall average:
{
"collection": "metrics",
"aggregate": [
{
"$group": {
"_id": {
"$dateToString": {
"format": "%Y-%m-%d",
"date": {
"$toDate": "$timestamp_unix"
}
}
},
"total_average": {
"$avg": "$value"
}
}
}
]
}
So how would I go about implementing this?
I think you want your data group by month+year and service. So you should put both condition in your group stage. Then, you can get the average with the accumulator as you did.
{
"collection": "metrics",
"aggregate": [
{
"$group": {
"_id": {
"service" : "$service",
"month" :
{ "$dateToString": {
"format": "%Y-%m",
"date": {
"$toDate": "$timestamp_unix"
}
}
}
},
"average": {
"$avg": "$value"
}
}
},
{ "$project" :
{"service" : "$_id.service",
"year-month":"$_id.month",
"average":1,
"_id":0
}
},
{ "$sort": {"service" : 1, "year-month" : 1} }
]
}
This should give you data you want.
If you also want the total average per service, i would suggest to do use a $facet stage and do the exact same group stage but without the date condition.

MongoError: PlanExecutor error during aggregation

I have tree records in mongodb but there could be many more, I'm getting shops by an ID coming from frontend
I need to get 20 records and group them by itemId and colorId, and get counts for every shop. the count of shops can be 1,2,3,....10etc..
this is output I need:
+--------+----------+-------+-------+-------+
| itemId | colorId | shop1 | shop2 | shop3 |
+========+==========+=======+=======+=======+
| 1 | colorId1 | 5 | 0 | 3 |
+--------+----------+-------+-------+-------+
| 2 | colorId2 | 3 | 0 | 0 |
+--------+----------+-------+-------+-------+
| 3 | colorId2 | 0 | 3 | 0 |
+--------+----------+-------+-------+-------+
| 2 | colorId1 | 0 | 5 | 0 |
+--------+----------+-------+-------+-------+
| 3 | colorId1 | 0 | 0 | 5 |
+--------+----------+-------+-------+-------+
here is my data and query - here shopId is string and it's work good.
but when I use this query on my local mashine, I'm getting this error:
MongoError: PlanExecutor error during aggregation :: caused by :: $arrayToObject requires an object with keys 'k' and 'v', where the value of 'k' must be of type string. Found type: objectId
but when I change shopId to the ObjectId I'm getting error.
ObjectId versoin
Per your request in the comments (if I got it right):
db.collection.aggregate([
{
"$match": {}// <-- Highly recommend you to use match due to the complexity of this query
},
{
$group: {
_id: 0,
data: {
$push: {
shopId: "$shopId",
shopItems: "$shopItems"
}
},
shopIds: {
"$push": {
shopId: "$shopId",
"count": 0
}
}
}
},
{
$unwind: "$data"
},
{
$unwind: "$data.shopItems"
},
{
$group: {
_id: {
itemId: "$data.shopItems.itemId",
colorId: "$data.shopItems.colorId"
},
data: {
$push: {
shopId: "$data.shopId",
count: "$data.shopItems.itemCount"
}
},
existing: {
$push: {
shopId: "$data.shopId",
"count": 0
}
},
shopIds: {
$first: "$shopIds"
}
}
},
{
"$addFields": {
"missing": {
"$setDifference": [
"$shopIds",
"$existing"
]
}
}
},
{
$project: {
data: {
$concatArrays: [
"$data",
"$missing"
]
}
}
},
{
$unwind: "$data"
},
{
$sort: {
"data.shopId": 1
}
},
{
$group: {
_id: "$_id",
counts: { // here you can change this key
$push: "$data"
},
totalCount: {
$sum: "$data.count" // if you want it
}
}
}
])
After the first $match, we $group in order to get all shopIds in each document.
Next we $unwind and $group by the group you wanted: by colorId and itemId. Then we are adding all the shops with count 0 and removing the ones that do have actual count. Last three steps are just for sorting, summing and formating.
You can play with it here.

$group result as the key value pair mongo db

In the pipeline stage before the $group stage computed the document like bellow
cycle_id | entity1 | entity 2
1 | 0 | 1
1 | 1 | 5
2 | 0 | 3
I am able to group them using bellow script
{
"$group" : {
"_id" : "$cycle_id",
"entity1" : {
"$sum" : "$entity1"
},
"entity2" : {
"$sum" : "$entity3"
},
"entity3" : {
"$sum" : "$entity3"
}
}
}
This generated output like bellow:
cycle_id | entity1 | entity 2
1 | 1 | 6
2 | 0 | 3
but what I am looking for is something where I can project it like an array of key-value pair
{
1:{
entity1: 1,
entiry2: 6
},
2:{
entity1: 3,
entiry2: 4
}
}
Is there a way I can achieve the above result. I tried to look around $arrayToObject but wasn't quite successful with that yet.
Thanks,
Nixit
Query :
db.collection.aggregate([
/** Group without any condition and merge all documents (Converted objects) into an object */
{
$group: {
_id: "",
data: {
$mergeObjects: {
$arrayToObject: [ [ { k: { $toString: "$_id" }, v: { "entity1": "$entity1", "entity2": "$entity2" } } ] ]
}
}
}
},
/** Replace `data` field as a root of the document */
{
$replaceRoot: { newRoot: "$data" }
}
])
Test : mongoplayground
Ref : aggregation-pipeline

Aggregate counting logical values

Hello friend I am not friendly with mongodb aggregation I want is that I have array of object that contains subjects with its score for each question and I am using node js so I want is that full calculation with mongo query if possible that include subject name with its total score and count of attempt and not attempt my Json Array is as bellow
{
"examId": ObjectId("597367af7d8d3219d88c4341"),
"questionId": ObjectId("597368207d8d3219d88c4342"),
"questionNo": 1,
"subject": "Reasoning Ability",
"yourChoice": "A",
"correctMark": "1",
"attempt": true,
"notAttempt": false,
}
here in object one field is for correct marks subject are different an I want an output like
|Subject Name | Total attempts | total not attempts | total score |
| A | 5 | 3 | 10 |
| B | 10 | 5 | 25 |
I am trying with aggregation but not done yet I have tried this query
db.examscores.aggregate([
{ $group:{
_id:"$examId",
score: { $sum: '$correctMark' },
count: { $sum: 1 }
}}
])
Any one has idea how to achieve this type of output.
and if another way to achieve this using node than also good.
I have solved this here is my query
[
{ $match: { subject:'Reasoning Ability' } },
{
$group:
{
_id:{id:"$examId",subject:'$subject'},
totalAttempt: { $sum: {$cond : [ "$attempt", 1, 0 ]} },
totalNotAttempt: { $sum: {$cond : [ "$notAttempt", 1, 0 ]} },
markedForReview:{ $sum: {$cond : [ "$markedForReview", 1, 0 ]} },
answerAndMarkedForReview:{ $sum: {$cond : [ "$answerAndMarkedForReview", 1, 0 ]} },
score: { $sum: '$correctMark' },
count: { $sum: 1 }
}
}
]

Group count with MongoDB using aggregation framework

Let's say my MongoDB schema looks like this:
{car_id: "...", owner_id: "..."}
This is a many-to-many relationship. For example, the data might look like this:
+-----+----------+--------+
| _id | owner_id | car_id |
+-----+----------+--------+
| 1 | 1 | 1 |
| 2 | 1 | 2 |
| 3 | 1 | 3 |
| 4 | 2 | 1 |
| 5 | 2 | 2 |
| 6 | 3 | 4 |
| 7 | 3 | 5 |
| 8 | 3 | 6 |
| 9 | 3 | 7 |
| 10 | 1 | 1 | <-- not unique
+-----+----------+--------+
I want to get the number of cars owned by each owner. In SQL, this might look like:
SELECT owner_id, COUNT(*) AS cars_owned
FROM (SELECT owner_id FROM car_owners GROUP BY owner_id, car_id) AS t
GROUP BY owner_id;
In this case, the result would look like this:
+----------+------------+
| owner_id | cars_owned |
+----------+------------+
| 1 | 3 |
| 2 | 2 |
| 3 | 4 |
+----------+------------+
How can I accomplish this same thing using MongoDB using the aggregation framework?
To accommodate the potential duplicates, you need to use two $group operations:
db.test.aggregate([
{ $group: {
_id: { owner_id: '$owner_id', car_id: '$car_id' }
}},
{ $group: {
_id: '$_id.owner_id',
cars_owned: { $sum: 1 }
}},
{ $project: {
_id: 0,
owner_id: '$_id',
cars_owned: 1
}}]
, function(err, result){
console.log(result);
}
);
Gives a result with a format of:
[ { cars_owned: 2, owner_id: 10 },
{ cars_owned: 1, owner_id: 11 } ]
$group is similar to SQL Group by command. In the below example, we're going to aggregate companies on the basis of the year in which they were founded. And calculate the average number of employees for each company.
db.companies.aggregate([{
$group: {
_id: {
founded_year: "$founded_year"
},
average_number_of_employees: {
$avg: "$number_of_employees"
}
}
}, {
$sort: {
average_number_of_employees: -1
}
}
])
This aggregation pipeline has 2 stages
$group
$sort
Now, fundamental to the $group stage is the _id field that we specify as the part of the document. That is the value of the $group operator itself using a very strict interpretation of the arrogation framework syntax. _id is how we define, how we control, how we tune what the group stage uses to organize the documents that it sees.
The below query find the relationships of the people with companies using $sum operator:
db.companies.aggregate([{
$match: {
"relationships.person": {
$ne: null
}
}
}, {
$project: {
relationships: 1,
_id: 0
}
}, {
$unwind: "$relationships"
}, {
$group: {
_id: "$relationships.person",
count: {
$sum: 1
}
}
}, {
$sort: {
count: -1
}
}])