MongoDB select distinct and count - mongodb

I have a product collection which looks like that:
products = [
{
"ref": "1",
"facets": [
{
"type":"category",
"val":"kitchen"
},
{
"type":"category",
"val":"bedroom"
},
{
"type":"material",
"val":"wood"
}
]
},
{
"ref": "2",
"facets": [
{
"type":"category",
"val":"kitchen"
},
{
"type":"category",
"val":"livingroom"
},
{
"type":"material",
"val":"plastic"
}
]
}
]
I would like to select and count the distinct categories and the number of products that have the category (Note that a product can have more than one category). Something like that:
[
{
"category": "kitchen",
"numberOfProducts": 2
},
{
"category": "bedroom",
"numberOfProducts": 1
},
{
"category": "livingroom",
"numberOfProducts": 1
}
]
And it would be better if I could get the same result for each different facet type, something like that:
[
{
"facetType": "category",
"distinctValues":
[
{
"val": "kitchen",
"numberOfProducts": 2
},
{
"val": "livingroom",
"numberOfProducts": 1
},
{
"val": "bedroom",
"numberOfProducts": 1
}
]
},
{
"facetType": "material",
"distinctValues":
[
{
"val": "wood",
"numberOfProducts": 1
},
{
"val": "plastic",
"numberOfProducts": 1
}
]
}
]
I am doing tests with distinct, aggregate and mapReduce. But can't achieve the results needed. Can anybody tell me the good way?
UPDATE:
With aggregate, this give me the different facet categories that a product have, but not the values nor the count of different values:
db.products.aggregate([
{$match:{'content.facets.type':'category'}},
{$group:{ _id: '$content.facets.type'} }
]).pretty();

The following aggregation pipeline will give you the desired result. In the first pipeline step, you need to do an $unwind operation on the facets array so that it's deconstructed to output a document for each element. After the $unwind stage is the first of the $group operations which groups the documents from the previous stream by category and type and calculates the number of products in each group using $sum. The next $group operation in the next pipeline stage then creates the array that holds the aggregated values by using $addToSet operator. The final pipeline stage is the $project operation which then transforms the document in the stream by modifying existing fields:
var pipeline = [
{ "$unwind": "$facets" },
{
"$group": {
"_id": {
"facetType": "$facets.type",
"value": "$facets.val"
},
"count": { "$sum": 1 }
}
},
{
"$group": {
"_id": "$_id.facetType",
"distinctValues": {
"$addToSet": {
"val": "$_id.value",
"numberOfProducts": "$count"
}
}
}
},
{
"$project": {
"_id": 0,
"facetType": "$_id",
"distinctValues": 1
}
}
];
db.product.aggregate(pipeline);
Output
/* 0 */
{
"result" : [
{
"distinctValues" : [
{
"val" : "kitchen",
"numberOfProducts" : 2
},
{
"val" : "bedroom",
"numberOfProducts" : 1
},
{
"val" : "livingroom",
"numberOfProducts" : 1
}
],
"facetType" : "category"
},
{
"distinctValues" : [
{
"val" : "wood",
"numberOfProducts" : 1
},
{
"val" : "plastic",
"numberOfProducts" : 1
}
],
"facetType" : "material"
}
],
"ok" : 1
}

Related

mongodb update document from first element of array

Consider a collection client with the following documents:
[
{
"id": 1,
"Name": "Susie",
"ownership" : {
"ownershipContextCode" : "C1"
},
"clientIds": [
{
"clientClusterCode": "clientClusterCode_1",
"clientId": "11"
}
]
},
{
"id": 2,
"Name": "John",
"ownership" : {
"ownershipContextCode" : "C2"
},
"clientIds": [
{
"clientClusterCode": "clientClusterCode_2",
"clientId": "22"
}
]
}
]
I am attempting to set a field (ownershipClientCode) as the first element of the clientIds array.
The result should be like that:
[
{
"id": 1,
"Name": "Susie",
"ownership" : {
"ownershipContextCode" : "C1",
"ownershipClientCode" : "clientClusterCode_1"
},
"clientIds": [
{
"clientClusterCode": "clientClusterCode_1",
"clientId": "11"
}
],
},
{
"id": 2,
"Name": "John",
"ownership" : {
"ownershipContextCode" : "C2",
"ownershipClientCode" : "clientClusterCode_2"
},
"clientIds": [
{
"clientClusterCode": "clientClusterCode_2",
"clientId": "22"
}
],
}
]
I'm using this query but I can't get sub object from the first element in the array
db.collection.aggregate([
{
$addFields: {
"Last Semester": {
"$arrayElemAt": [
"$clientIds",
0
]
}
}
}
])
This query add the all object but I want only the field (clientClusterCode).
Some thing like that
db.collection.aggregate([
{
$addFields: {
"Last Semester": {
"$arrayElemAt": [
"$clientIds",
0
].clientClusterCode
}
}
}
])
I'm using mongodb 4.0.0
You're very close: https://mongoplayground.net/p/HY1Pj0P4z12
db.collection.aggregate([
{
$addFields: {
"ownership.ownershipClientCode": {
"$arrayElemAt": [
"$clientIds.clientClusterCode",
0
]
}
}
}
])
You can use the dot notation within the $arrayElemAt as well as when you defining the field name.
To directly set the field, do something like this (use aggregation in the update): https://mongoplayground.net/p/js-usEJSH_A
db.collection.update({},
[
{
$set: {
"ownership.ownershipClientCode": {
"$arrayElemAt": [
"$clientIds.clientClusterCode",
0
]
}
}
}
],
{
multi: true
})
Note: The second method to update needs to be an array, so that it functions as an pipeline.

MongoDB $lookup on array of objects

Categories
{
"_id" : ObjectId("61740086893f048528d166b9"),
"name": "Category1",
"tracks" : [
"61c65353565a2d9a1cd3020d",
"61c74518962dc3efb96c3438",
"61c74775703176a6f72df444"
]
}
Tracks
{
"_id" : ObjectId("61c65353565a2d9a1cd3020d"),
"name" : "Track1",
"categoryId" : ObjectId("61740086893f048528d166b9"),
"creatorId" : ObjectId("61c6478304e98ed63e8ee7d3"),
"thumbnailId" : ObjectId("61c65353565a2d9a1cd3020c"),
"plays" : [],
"media" : {
"type" : "wav",
"url" : ""
},
"status" : "approved",
"downloads" : [],
"uploadedDate" : 1640387411
}
Assuming that I have 5 categories and each category has many tracks ID, I wanna get N last tracks for each category so I used this code below
categories.aggregate([
{
$project: {
tracks: { $slice: ["$tracks", -2] },
},
},
]
And the response is
[
{
"_id": "61740086893f048528d166b9",
"tracks": [
"61c74518962dc3efb96c3438",
"61c74775703176a6f72df444"
]
},
{
"_id": "61740094893f048528d166c1",
"tracks": []
},
{
"_id": "617400a0893f048528d166cb",
"tracks": []
}
]
So far it's good, but the question is how can I replace each category's tracks from an array of IDs to an array of objects?
I tried $loopup but I probably didn't implement the localField correctly.
Expected result
[
{
"_id": "61740086893f048528d166b9",
"tracks": [
{
"_id": ObjectId("61c74518962dc3efb96c3438")
...
},
{
"_id": ObjectId("61c74775703176a6f72df444")
...
}
]
},
{
"_id": "61740094893f048528d166c1",
"tracks": []
},
{
"_id": "617400a0893f048528d166cb",
"tracks": []
}
]
***** UPDATE *****
I'm trying to replace the creatorId by createdBy which is an object of the users from the users collection
Users
{
"_id": ObjectId("61c6478304e98ed63e8ee7cb"),
"email": "USER888#gmail.com",
"username": "USER999",
"tracks": [
ObjectId("61c65353565a2d9a1cd3020d"),
],
}
The expected result should be
[
{
"_id": "61740086893f048528d166b9",
"tracks": [
{
"_id": ObjectId("61c74518962dc3efb96c3438"),
"createdBy": {
"_id": "userId"
...
},
...
},
{
"_id": ObjectId("61c74775703176a6f72df444"),
"createdBy": {
"_id": "userId"
...
}
...
}
]
},
{
"_id": "61740094893f048528d166c1",
"tracks": []
},
{
"_id": "617400a0893f048528d166cb",
"tracks": []
}
]
In addition to the solution below by ray, I added the code here https://mongoplayground.net/p/8AjmnL-vhtz
The createdBy is at the top level but not under every track
$lookup is the correct way for you to find the corresponding object in Tracks collection. Why your code does not work is that you are storing strings in tracks array in Categories collection; while the _id of Tracks collection is ObjectId. There will be no $lookup result as the datatypes do not match. What you can do is converting the strings to ObjectId by using $toObjectId in a $map, and then do the $lookup
db.categories.aggregate([
{
$project: {
tracks: {
$slice: [
"$tracks",
-2
]
}
}
},
{
$project: {
tracks: {
"$map": {
"input": "$tracks",
"as": "t",
"in": {
"$toObjectId": "$$t"
}
}
}
}
},
{
"$lookup": {
"from": "tracks",
let: {
t: "$tracks"
},
pipeline: [
{
$match: {
$expr: {
"$in": [
"$_id",
"$$t"
]
}
}
}
],
"as": "tracks"
}
}
])
Here is the Mongo playground for your reference.

How to retrieve just the array values only of a nested field of MongoDB document? [duplicate]

This question already has answers here:
How to return just the nested documents of an array from all documents
(2 answers)
Closed 3 years ago.
I'm trying to deep query and retrieve specific fields from MongoDB, but unfortunately couldn't able to figure out the correct solution.
Document data:
[ {
"_id": 39127198,
"name": "Mike",
"details": {
"age": 25,
"vehicles":[
{"brand":"Chevrolet","model":"Silverado","plate":"AB11"},
{"brand":"Jeep","model":"Cherokee","plate":"CG678"}
]
}
}, {
"_id": 39127198,
"name": "Taylor",
"details": {
"age": 25,
"vehicles": [
{"brand":"GMC","model":"Sierra","plate":"748397"}
]
}
} ]
My requirement: Return "vehicles" array alone for a specific player. Let's say for user "Mike" in this case.
Here is what I tried;
collection.find( {"name":"Mike"} )
.project( {"details.vehicles" : 1, "_id": 0, "name": 0} )
.toArray(function(err, result) { ... } )
collection.aggregate([
{ $match: { "name":"Mike" } },
{ $project: {"details.vehicles" : 1, "_id": 0, "name": 0} }
]).toArray(function(err, result) { ... } )
Here is what I get for the above code:
[
{
"details": {
"vehicles": [
{"brand":"Chevrolet","model":"Silverado","plate":"AB11"},
{"brand":"Jeep","model":"Cherokee","plate":"CG678"}
]
}
}
]
Expected:
[
{"brand":"Chevrolet","model":"Silverado","plate":"AB11"},
{"brand":"Jeep","model":"Cherokee","plate":"CG678"}
]
I am using MongoClient. MongoDB shell version v4.2.1
You can use $unwind and $replaceRoot stages to achieve this :
db.collection.aggregate([
{
$match: {
"name": "Mike"
}
},
{
$unwind: "$details.vehicles"
},
{
$replaceRoot: {
newRoot: "$details.vehicles"
}
}
])
Will output exactly what you need.
Hope it helps
The query:
db.vehi.aggregate( [
{ $match: { "name":"Mike" } },
{ $project: { "vehicles": "$details.vehicles", "_id": 0 } }
] ).next().vehicles
The exact output:
[
{
"brand" : "Chevrolet",
"model" : "Silverado",
"plate" : "AB11"
},
{
"brand" : "Jeep",
"model" : "Cherokee",
"plate" : "CG678"
}
]
- OR -
This also gets the same result:
db.vehi.find(
{ "name" : "Mike" },
{ "details.vehicles" : 1, _id : 0 }
).next().details.vehicles

MongoDB aggregate count based on multiple query fields - (Multiple field count)

My collection will look this,
{
"_id" : ObjectId("55c8bd1d85b83e06dc54c0eb"),
"name" : "xxx",
"salary" : 10000,
"type" : "type1"
}
{
"_id" : ObjectId("55c8bd1d85b83e06dc54c0eb"),
"name" : "aaa",
"salary" : 10000,
"type" : "type2"
}
{
"_id" : ObjectId("55c8bd1d85b83e06dc54c0eb"),
"name" : "ccc",
"salary" : 10000,
"type" : "type2"
}
My query params will be coming as,
{salary=10000, type=type2}
so based on the query I need to fetch the count of above query params
The result should be something like this,
{ category: 'type1', count: 500 } { category: 'type2', count: 200 } { category: 'name', count: 100 }
Now I am getting count by hitting three different queries and constructing the result (or) server side iteration I can get the result.
Can anyone suggest or provide me good way to get above result
Your quesstion is not very clearly presented, but what it seems you wanted to do here was count the occurances of the data in the fields, optionally filtering those fields by the values that matches the criteria.
Here the $cond operator allows you to tranform a logical condition into a value:
db.collection.aggregate([
{ "$group": {
"_id": null,
"name": { "$sum": 1 },
"salary": {
"$sum": {
"$cond": [
{ "$gte": [ "$salary", 1000 ] },
1,
0
]
}
},
"type": {
"$sum": {
"$cond": [
{ "$eq": [ "$type", "type2" ] },
1,
0
]
}
}
}}
])
All values are in the same document, and it does not really make any sense to split them up here as this is additional work in the pipeline.
{ "_id" : null, "name" : 3, "salary" : 3, "type" : 2 }
Otherwise in the long form, which is not very performant due to needing to make a copy of each document for every key looks like this:
db.collection.aggregate([
{ "$project": {
"name": 1,
"salary": 1,
"type": 1,
"category": { "$literal": ["name","salary","type"] }
}},
{ "$unwind": "$category" },
{ "$group": {
"_id": "$category",
"count": {
"$sum": {
"$cond": [
{ "$and": [
{ "$eq": [ "$category", "name"] },
{ "$ifNull": [ "$name", false ] }
]},
1,
{ "$cond": [
{ "$and": [
{ "$eq": [ "$category", "salary" ] },
{ "$gte": [ "$salary", 1000 ] }
]},
1,
{ "$cond": [
{ "$and": [
{ "$eq": [ "$category", "type" ] },
{ "$eq": [ "$type", "type2" ] }
]},
1,
0
]}
]}
]
}
}
}}
])
And it's output:
{ "_id" : "type", "count" : 2 }
{ "_id" : "salary", "count" : 3 }
{ "_id" : "name", "count" : 3 }
If your documents do not have uniform key names or otherwise cannot specify each key in your pipeline condition, then apply with mapReduce instead:
db.collection.mapReduce(
function() {
var doc = this;
delete doc._id;
Object.keys(this).forEach(function(key) {
var value = (( key == "salary") && ( doc[key] < 1000 ))
? 0
: (( key == "type" ) && ( doc[key] != "type2" ))
? 0
: 1;
emit(key,value);
});
},
function(key,values) {
return Array.sum(values);
},
{
"out": { "inline": 1 }
}
);
And it's output:
"results" : [
{
"_id" : "name",
"value" : 3
},
{
"_id" : "salary",
"value" : 3
},
{
"_id" : "type",
"value" : 2
}
]
Which is basically the same thing with a conditional count, except that you only specify the "reverse" of the conditions you want and only for the fields you want to filter conditions on. And of course this output format is simple to emit as separate documents.
The same approach applies where to test the condition is met on the fields you want conditions for and return 1 where the condition is met or 0 where it is not for the summing the count.
You can use aggregation as following query:
db.collection.aggregate({
$match: {
salary: 10000,
//add any other condition here
}
}, {
$group: {
_id: "$type",
"count": {
$sum: 1
}
}
}, {
$project: {
"category": "$_id",
"count": 1,
_id: 0
}
}

Order by date in sub-document and then by document

I have a simple "Event" mongo schema. Two sample documents are below :
Event Document #1
{
"event_name": "Some nice event",
"venues": [
{
"venue_name": "venue #1",
"shows": [
{
"show_time": "2014-06-18T07:46:02.415Z",
"capacity": 20
},
{
"show_time": "2014-06-20T07:46:02.415Z",
"capacity": 40
}
]
},
{
"venue_name": "venue #2",
"shows": [
{
"show_time": "2014-06-17T07:46:02.415Z",
"capacity": 20
},
{
"show_time": "2014-06-24T07:46:02.415Z",
"capacity": 40
}
]
}
]
}
Event Document #2
{
"event_name": "Another nice event",
"venues": [
{
"venue_name": "venue #1",
"shows": [
{
"show_time": "2014-06-19T07:46:02.415Z",
"capacity": 20
},
{
"show_time": "2014-06-16T07:46:02.415Z",
"capacity": 40
}
]
}
]
}
I need to query this collection of event documents and fetch the events with the closest shows, with respective to a particular time.
So for e.g., if I had to find events happening on or after 16 Jun, I should get document #2 followed by document #1, with the venue sub-document order as [venue #2, venue #1].
On the other hand, if I wanted events happening on or after 18 Jun, I should get document #1, with [venue #1, venue #2], followed by document #2.
Essentially, I need to be able to sort by the start_time of the nested sub-document. And this sorting should work on multiple venue sub-documents.
According to mongo's documentation, this doesn't seem to be supported, so is there a way of using aggregation to achieve this?
Or is there a way to rejig the schema to support such queries?
Or is mongoDB the wrong use-case for such scenarios altogether?
Really good question. Hoping that your dates are real date but the lexical form should not really matter here. The following form should do it, as long as you take the dates into consideration:
db.event.aggregate([
// Match the "documents" that meet the condition
{ "$match": {
"venues.shows.show_time": { "$gte": new Date("2014-06-16") }
}},
// Unwind the arrays
{ "$unwind": "$venues" },
{ "$unwind": "$venues.shows" },
// Sort the entries just to float the nearest result
{ "$sort": { "venues.shows.show_time": 1 } },
// Find the "earliest" for the venue while grouping
{ "$group": {
"_id": {
"_id": "$_id",
"event_name": "$event_name",
"venue_name": "$venues.venue_name"
},
"shows": {
"$push": "$venues.shows"
},
"earliest": {
"$min": {
"$cond": [
{ "$gte": [
"$venues.shows.show_time",
new Date("2014-06-16")
]},
"$venues.shows.show_time",
null
]
}
}
}},
// Sort those because of the order you want
{ "$sort": { "earliest": 1 } },
// Group back and with the "earliest" document
{ "$group": {
"_id": "$_id._id",
"event_name": { "$first": "$_id.event_name" },
"venues": {
"$push": {
"venue_name": "$_id.venue_name",
"shows": "$shows"
}
},
"earliest": {
"$min": {
"$cond": [
{ "$gte": [
"$earliest",
new Date("2014-06-16")
]},
"$earliest",
null
]
}
}
}},
// Sort by the earliest document
{ "$sort": { "earliest": 1 } },
// Project the fields
{ "$project": {
"event_name": 1,
"venues": 1
}}
])
So most of this looks reasonable straightforward if you have some experience with the aggregation framework. If not then there is some general explaining, plus there are some "funky" things happening as we evaluate further.
The first steps in aggregation are to $match just like any normal query and then to $unwind the arrays you want to process. The "unwind" statement effectively "de-normalizes" the documents contained in the array to be standard documents by themselves.
The next $sort ends up as a "prettying up" function as the "earliest" event in each "set" will be at the top as a result.
As there are "two" levels of arrays, you do the grouping in two stages via the $group pipeline stage.
The first $group "groups" by "document", "event_name" and "venue". All of the shows are put back into their original array form, but at this time we extract the $min value for the "show_time".
The value taken is not just the ordinary "minimal" value. Here we use the $cond operator to make sure that the value returned must be "greater than or equal to" the date that you were requesting in the query initially. This makes sure that any "earlier" values are not taken into consideration when "sorting".
The next thing to do is to $sort on that "earliest" date, to keep the entries for the "venues" in order. The following stages then do the same as above, but "grouping" back to the original documents this time, then finally "sorting" in the order of which "show_time" would be the "earliest".
The result from the dates shown as input would be your desired result for the 16th:
{
"_id" : ObjectId("53a95263a1923f45a6c2d3dd"),
"event_name" : "Another nice event",
"venues" : [
{
"venue_name" : "venue #1",
"shows" : [
{
"show_time" : ISODate("2014-06-16T07:46:02.415Z"),
"capacity" : 40
},
{
"show_time" : ISODate("2014-06-19T07:46:02.415Z"),
"capacity" : 20
}
]
}
]
}
{
"_id" : ObjectId("53a952b5a1923f45a6c2d3de"),
"event_name" : "Some nice event",
"venues" : [
{
"venue_name" : "venue #2",
"shows" : [
{
"show_time" : ISODate("2014-06-17T07:46:02.415Z"),
"capacity" : 20
},
{
"show_time" : ISODate("2014-06-24T07:46:02.415Z"),
"capacity" : 40
}
]
},
{
"venue_name" : "venue #1",
"shows" : [
{
"show_time" : ISODate("2014-06-18T07:46:02.415Z"),
"capacity" : 20
},
{
"show_time" : ISODate("2014-06-20T07:46:02.415Z"),
"capacity" : 40
}
]
}
]
}
And by changing the input to the 18th you also get the desired result:
{
"_id" : ObjectId("53a952b5a1923f45a6c2d3de"),
"event_name" : "Some nice event",
"venues" : [
{
"venue_name" : "venue #1",
"shows" : [
{
"show_time" : ISODate("2014-06-18T07:46:02.415Z"),
"capacity" : 20
},
{
"show_time" : ISODate("2014-06-20T07:46:02.415Z"),
"capacity" : 40
}
]
},
{
"venue_name" : "venue #2",
"shows" : [
{
"show_time" : ISODate("2014-06-17T07:46:02.415Z"),
"capacity" : 20
},
{
"show_time" : ISODate("2014-06-24T07:46:02.415Z"),
"capacity" : 40
}
]
}
]
}
{
"_id" : ObjectId("53a95263a1923f45a6c2d3dd"),
"event_name" : "Another nice event",
"venues" : [
{
"venue_name" : "venue #1",
"shows" : [
{
"show_time" : ISODate("2014-06-16T07:46:02.415Z"),
"capacity" : 40
},
{
"show_time" : ISODate("2014-06-19T07:46:02.415Z"),
"capacity" : 20
}
]
}
]
}
Also if you want to go further with this, just add an additional $match stage, and that can filter out "events" that occur before the date that is requested in the query:
db.event.aggregate([
{ "$match": {
"venues.shows.show_time": { "$gte": new Date("2014-06-18") }
}},
{ "$unwind": "$venues" },
{ "$unwind": "$venues.shows" },
{ "$match": {
"venues.shows.show_time": { "$gte": new Date("2014-06-18") }
}},
{ "$sort": { "venues.shows.show_time": 1 } },
{ "$group": {
"_id": {
"_id": "$_id",
"event_name": "$event_name",
"venue_name": "$venues.venue_name"
},
"shows": {
"$push": "$venues.shows"
},
"earliest": {
"$min": {
"$cond": [
{ "$gte": [
"$venues.shows.show_time",
new Date("2014-06-18")
]},
"$venues.shows.show_time",
null
]
}
}
}},
{ "$sort": { "earliest": 1 } },
{ "$group": {
"_id": "$_id._id",
"event_name": { "$first": "$_id.event_name" },
"venues": {
"$push": {
"venue_name": "$_id.venue_name",
"shows": "$shows"
}
},
"earliest": {
"$min": {
"$cond": [
{ "$gte": [
"$earliest",
new Date("2014-06-18")
]},
"$earliest",
null
]
}
}
}},
{ "$sort": { "earliest": 1 } },
{ "$project": {
"event_name": 1,
"venues": 1
}}
])
With the result:
{
"_id" : ObjectId("53a952b5a1923f45a6c2d3de"),
"event_name" : "Some nice event",
"venues" : [
{
"venue_name" : "venue #1",
"shows" : [
{
"show_time" : ISODate("2014-06-18T07:46:02.415Z"),
"capacity" : 20
},
{
"show_time" : ISODate("2014-06-20T07:46:02.415Z"),
"capacity" : 40
}
]
},
{
"venue_name" : "venue #2",
"shows" : [
{
"show_time" : ISODate("2014-06-24T07:46:02.415Z"),
"capacity" : 40
}
]
}
]
}
{
"_id" : ObjectId("53a95263a1923f45a6c2d3dd"),
"event_name" : "Another nice event",
"venues" : [
{
"venue_name" : "venue #1",
"shows" : [
{
"show_time" : ISODate("2014-06-19T07:46:02.415Z"),
"capacity" : 20
}
]
}
]
}