group by field and count mongodb [duplicate] - mongodb

I have following collection
{
"_id" : ObjectId("5b18d14cbc83fd271b6a157c"),
"status" : "pending",
"description" : "You have to complete the challenge...",
}
{
"_id" : ObjectId("5b18d31a27a37696ec8b5773"),
"status" : "completed",
"description" : "completed...",
}
{
"_id" : ObjectId("5b18d31a27a37696ec8b5775"),
"status" : "pending",
"description" : "pending...",
}
{
"_id" : ObjectId("5b18d31a27a37696ec8b5776"),
"status" : "inProgress",
"description" : "inProgress...",
}
I need to group by status and get all the keys dynamically which are in status
[
{
"completed": [
{
"_id": "5b18d31a27a37696ec8b5773",
"status": "completed",
"description": "completed..."
}
]
},
{
"pending": [
{
"_id": "5b18d14cbc83fd271b6a157c",
"status": "pending",
"description": "You have to complete the challenge..."
},
{
"_id": "5b18d31a27a37696ec8b5775",
"status": "pending",
"description": "pending..."
}
]
},
{
"inProgress": [
{
"_id": "5b18d31a27a37696ec8b5776",
"status": "inProgress",
"description": "inProgress..."
}
]
}
]

Not that I think it's a good idea and mostly because I don't see any "aggregation" here at all is that after "grouping" to add to an array you similarly $push all that content into array by the "status" grouping key and then convert into keys of a document in a $replaceRoot with $arrayToObject:
db.collection.aggregate([
{ "$group": {
"_id": "$status",
"data": { "$push": "$$ROOT" }
}},
{ "$group": {
"_id": null,
"data": {
"$push": {
"k": "$_id",
"v": "$data"
}
}
}},
{ "$replaceRoot": {
"newRoot": { "$arrayToObject": "$data" }
}}
])
Returns:
{
"inProgress" : [
{
"_id" : ObjectId("5b18d31a27a37696ec8b5776"),
"status" : "inProgress",
"description" : "inProgress..."
}
],
"completed" : [
{
"_id" : ObjectId("5b18d31a27a37696ec8b5773"),
"status" : "completed",
"description" : "completed..."
}
],
"pending" : [
{
"_id" : ObjectId("5b18d14cbc83fd271b6a157c"),
"status" : "pending",
"description" : "You have to complete the challenge..."
},
{
"_id" : ObjectId("5b18d31a27a37696ec8b5775"),
"status" : "pending",
"description" : "pending..."
}
]
}
That might be okay IF you actually "aggregated" beforehand, but on any practically sized collection all that is doing is trying force the whole collection into a single document, and that's likely to break the BSON Limit of 16MB, so I just would not recommend even attempting this without "grouping" something else before this step.
Frankly, the same following code does the same thing, and without aggregation tricks and no BSON limit problem:
var obj = {};
// Using forEach as a premise for representing "any" cursor iteration form
db.collection.find().forEach(d => {
if (!obj.hasOwnProperty(d.status))
obj[d.status] = [];
obj[d.status].push(d);
})
printjson(obj);
Or a bit shorter:
var obj = {};
// Using forEach as a premise for representing "any" cursor iteration form
db.collection.find().forEach(d =>
obj[d.status] = [
...(obj.hasOwnProperty(d.status)) ? obj[d.status] : [],
d
]
)
printjson(obj);
Aggregations are used for "data reduction" and anything that is simply "reshaping results" without actually reducing the data returned from the server is usually better handled in client code anyway. You're still returning all data no matter what you do, and the client processing of the cursor has considerably less overhead. And NO restrictions.

Related

Get paginated records and total records in one mongodb aggregate query

I have my monogdb departments data structure like as shown below
[{
category: "ABC",
sections: [
{
section_hod: "x111",
section_name: "SECTION A",
section_staff_count: "v11111",
section_id: "a1111",
:
},
{
section_hod: "x2222",
section_name: "SECTION B",
section_staff_count: "v2222",
section_id: "a2222",
:
}
]
}
:
:
]
I wrote a mongodb query like as shown below
db.getSiblingDB("departments").getCollection("DepartmentDetails").aggregate([
{ $unwind : "$sections"},
{ $match : { $and : [{ "sections.section_name" : "SECTION A"},
{ $or : [{ "category" : "ABC"}]}]}},
{$project : { "name" : "$sections.section_name", "hod" : "$sections.section_hod", "staff_count" : "$sections.section_staff_count", "id" : "$sections.section_id"}},
{$skip: 0}, {$limit: 10}
]);
which gives me a list of section details as shown below which contains name, hod, staff_count, id etc
[
{
"name": "xxxxx",
"hod": "xxxxx",
"staff_count": "xxxxx",
"id": "xxxxx"
},
{
"name": "yyyyy",
"hod": "yyyyy",
"staff_count": "yyyyy",
"id": "yyyyy"
}
:
:
:
]
Everything looks good, but the problem is I have so many records in the list with which I am trying to build a pagination. For implementing pagination I know I can use the skip and limit function for iterating the pages, but for doing that I need to know the total counts of all the records.
I can do this in two ways, First way is I can execute two queries one which will be a count and then the aggregate query passing the skip and limit, second way is execute one query which return me the total counts and the documents in the order of first paginated page.
I am trying to implement the second way and bring the expected result is as shown below
{
"documents": [
{
"name": "xxxxx",
"hod": "xxxxx",
"staff_count": "xxxxx",
"id": "xxxxx"
},
{
"name": "yyyyy",
"hod": "yyyyy",
"staff_count": "yyyyy",
"id": "yyyyy"
}
:
:
:
],
"totalCount": 5444
}
Not sure if this is achievable. Can someone please help me on this. My default limit is 10
You can do it like this, it will give you total records and paginated results in one go,
db.getSiblingDB("departments").getCollection("DepartmentDetails")
.aggregate([
{ $unwind : "$sections"},
{ $match : { $and : [{ "sections.section_name" : "SECTION A"},
{ $or : [{ "category" : "ABC"}]}]}},
{
$project : {
"name" : "$sections.section_name",
"hod" : "$sections.section_hod",
"staff_count" : "$sections.section_staff_count",
"id" : "$sections.section_id"
}
},
{
$facet: {
metaData: [{
$count: 'total'
}],
records: [
{$skip: 0},
{$limit: 10}
]
}
},
{
$project: {
records: 1,
total: {
$let: {
vars: {
totalObj: {
$arrayElemAt: ['$metaData', 0]
}
},
in: '$$totalObj.total'
}
},
}
}
]);

MongoDB Nested Document query

im having trouble with mongodb querying
my object document:
[
{
"_id": "5a2ca2227c42ad67682731d4",
"name": "name",
"photos": [
{
"_id": "5a2ca22b7c42ad67682731d5",
"approved": false,
"text":"good"
},
{
"_id": "5a2ca72b0a1aa173aaae07da",
"approved": true,
"text":"bad"
}
]
},
{
"_id": "4v2ca2227cad676821731d4",
"name": "name"
}
]
im trying to query all documents.. in this documents, the documents somethings will have photos, if exists , would like to bring the photos that have attribute approved equals to true.. but i have to bring all documents, thoses who have photos, and those who not.. im trying to use aggregation, project, unwind, but im not got any good result yet :(
i need the results be like :
[
{
"_id": "5a2ca2227c42ad67682731d4",
"name": "name",
"photos": [
{
"_id": "5a2ca22b7c42ad67682731d5",
"approved": false,
"text":"good"
}
]
},
{
"_id": "4v2ca2227cad676821731d4",
"name": "name"
}
]
thank you!
You might need to $unwind all including the non existing photos array for getting the results
db.ph.aggregate(
[
{
$unwind:
{
path: "$photos",
preserveNullAndEmptyArrays: true
}
},
{
$match : {"photos.approved" : {$ne : true} }
}
]
)
result
{
"_id" : "5a2ca2227c42ad67682731d4",
"name" : "name",
"photos" : {
"_id" : "5a2ca22b7c42ad67682731d5",
"approved" : false,
"text" : "good"
}
}
{ "_id" : "4v2ca2227cad676821731d4", "name" : "name" }
>

mongodb aggregation conditional adding field based on value in array

please excuse the title. could find a better description for what iam trying to do.
I have a collection of messages which stores the following information
code: a unique identification code of the message
from: phone number the message was sent from
to: phone number the message was sent to
message: the message text
readings: an array of ObjectIds. The ids reference documents in another collection names "users". if an ObjectId is here it means, that this message has been read by that particular user.
Example Data
{
"_id" : ObjectId("59ba30c95869d32a803e4c4d"),
"code" : "SM54c9366e9b8544e89bdcf2ee841adea7",
"from" : "+49157xxxxxxx",
"to" : "+49160xxxxxxxx",
"message" : "xxxxxxxx",
"createdAt" : ISODate("2017-09-14T07:33:39.000Z"),
"lastModifiedAt" : ISODate("2017-09-14T07:33:32.324Z"),
"status" : "delivered",
"room" : ObjectId("59bfa293bd7717251cecfae7"),
"readings" : [
ObjectId("59c25751dcfdaf2944ee2fae"),
ObjectId("59c25751dcfdaf2944e32fae")
],
}
/* 2 */
{
"_id" : ObjectId("59ba3270f53b7f2fb4fa807f"),
"code" : "SM04585672d02644018e3ff466d73c571d",
"from" : "+49xxxxxxx",
"to" : "+49xxxxxxxx",
"message" : "xxxxxxx",
"createdAt" : ISODate("2017-09-14T07:40:42.000Z"),
"lastModifiedAt" : ISODate("2017-09-14T07:40:34.338Z"),
"status" : "delivered",
"room" : ObjectId("59bfa293bd7717251cecfae7"),
"readings" : [
ObjectId("59c25751dcfdaf2944ee2fae")
],
}
Want i want to achieve is that a message gets an additional field "hasRead" if a specific user has read the message.
Here is the result i want to achieve
{
"_id" : ObjectId("59ba30c95869d32a803e4c4d"),
"code" : "SM54c9366e9b8544e89bdcf2ee841adea7",
"to" : "+491606983534",
"message" : "Schau mer mal",
"createdAt" : ISODate("2017-09-14T07:33:39.000Z"),
"lastModifiedAt" : ISODate("2017-09-14T07:33:32.324Z"),
"status" : "delivered",
"room" : ObjectId("59bfa293bd7717251cecfae7"),
"hasRead" : true
}
/* 2 */
{
"_id" : ObjectId("59ba3270f53b7f2fb4fa807f"),
"code" : "SM04585672d02644018e3ff466d73c571d",
"to" : "+491606983534",
"message" : "Schau mer mal",
"createdAt" : ISODate("2017-09-14T07:40:42.000Z"),
"lastModifiedAt" : ISODate("2017-09-14T07:40:34.338Z"),
"status" : "delivered",
"room" : ObjectId("59bfa293bd7717251cecfae7"),
"hasRead" : true
}
I constructed an aggregation with the following stages but it looks so BIG for such a simple task and i wonder if there is a more elegant, lighter way to do so ?
The stages are:
$addFields: Checks if the readings array is 0. if 0 it adds a dummy ObjectId, else it sets the readings array
$unwind: Unwind the readings array
$addFields: adds a field "hasRead" upon checking if a specific ObjectId matches the "readings" field. True if equal else false
$group: Group by all fields except the "hasRead" field, "hasRead" based in $max hasRead
$project: Constructing the result to make it a flat object.
And here is my code:
db.getCollection('sms').aggregate([
{ $addFields: {
"readings": {
"$cond": {
if: { $or: [ { "$gt": [ {"$size": "$readings"},0] } ]} ,
then: "$readings",
else: [ObjectId("000000000000000000000000")]
}
}
}},
{ $unwind: "$readings" },
{ $addFields: {
"hasRead": {
$cond: {
if: {
$eq: ["$readings", ObjectId("59c25751dcfdaf2944ee2fae")]
},
then: true,
else: false
}
}
}
},
{ $group: {
_id: {
_id: "$_id",
code: "$code",
from: "$from",
to: "$to",
message: "$message",
createdAt: "$createdAt",
lastModifiedAt: "$lastModifiedAt",
room: "$room"
},
hasRead: { $max: "$hasRead" }
}},
{ $project: {
"_id": "$_id._id",
"code": "$_id.code",
"from": "$_id.from",
"to": "$_id.to",
"message": "$_id.message",
"createdAt": "$_id.createdAt",
"lastModifiedAt": "$_id.lastModifiedAt",
"room": "$_id.room",
"hasRead": "$hasRead"
}}
])
After browsing thru answers Neil (see comment) gave to another questioni could simplfiy the query to this:
db.getCollection('sms').aggregate([
{ "$addFields": {
"hasRead" : {
"$filter": {
"input": { "$setUnion": [ "$readings", []] },
"as": "o",
"cond" : {
"$eq": [ "$$o",ObjectId("59c25751dcfdaf2944ee2fae")]
}
}
}
}
},
{ "$project": {
"_id": 1,
"code": 1,
"to": 1,
"message": 1,
"createdAt": 1,
"lastModifiedAt" : 1,
"status": 1,
"room": 1,
"hasRead": {
"$cond": {
if: { $or: [ { "$gt": [ {"$size": "$readings"},0] } ]} ,
then: true,
else: false
}
}
}
}
])
Way too late for this, but you can simply write:
db.getCollection("sms").aggregate([
{
$project: {
_id: 1,
code: 1,
to: 1,
message: 1,
createdAt: 1,
lastModifiedAt: 1,
status: 1,
room: 1,
hasRead: {
$in: [ObjectId("59c25751dcfdaf2944ee2fae"), "$readings"],
},
},
},
]);
often the simplest solution is the correct one :)

Project an array with MongoDB

I'm using MongoDB's aggregation pipeline, to get my documents in the form that I want. As the last step of aggregation, I use $project to put the documents into their final form.
But I'm having trouble projecting and array of sub-documents. Here is what I currently get from aggrgation:
{
"_id": "581c8c3df1325f68ffd23386",
"count": 14,
"authors": [
{
"author": {
"author": "57f246b9e01e6c6f08e1d99a",
"post": "581c8c3df1325f68ffd23386"
},
"count": 13
},
{
"author": {
"author": "5824382511f16d0f3fd5aaf2",
"post": "581c8c3df1325f68ffd23386"
},
"count": 1
}
]
}
I want to $project the authors array so that the return would be this:
{
"_id": "581c8c3df1325f68ffd23386",
"count": 14,
"authors": [
{
"_id": "57f246b9e01e6c6f08e1d99a",
"count": 13
},
{
"_id": "5824382511f16d0f3fd5aaf2",
"count": 1
}
]
}
How would I go about achieving that?
You can unwind the array and wind it u again after projecting.
Something like this:
db.collectionName.aggregate([
{$unwind:'$authors'},
{$project:{_id:1,count:1,'author.id':'$authors.author.author','author.count':'$authors.count'}},
{$group:{_id:{_id:'$_id',count:'$count'},author:{$push:{id:'$author.id',count:'$author.count'}}}},
{$project:{_id:0,_id:'$_id._id',count:'$_id.count',author:1}}
])
the output for above will be:
{
"_id" : "581c8c3df1325f68ffd23386",
"author" : [
{
"id" : "57f246b9e01e6c6f08e1d99a",
"count" : 13.0
},
{
"id" : "5824382511f16d0f3fd5aaf2",
"count" : 1.0
}
],
"count" : 14.0
}
I have been having the same problem and just now found a simple and elegant solution that has not been mentioned anywhere, so i thought I'd share it here:
You can iterate the array using $map and project each author. With the given structure, the aggregation should look somewhat like this
db.collectionName.aggregate([
$project: {
_id: 1,
count:1,
authors: {
$map: {
input: "$authors",
as: "author",
in: {
id: "$$author.author.author",
count: $$author.author.count
}
}
}
}
])
Hope this helps anyone who is looking, like me :)
Question:
"customFields" : [
{
"index" : "1",
"value" : "true",
"label" : "isOffline",
"dataType" : "check_box",
"placeholder" : "cf_isoffline",
"valueFormatted" : "true"
},
{
"index" : "2",
"value" : "false",
"label" : "tenure_extended",
"dataType" : "check_box",
"placeholder" : "cf_tenure_extended",
"valueFormatted" : "false"
}
],
Answer:
db.subscription.aggregate([
{$match:{"autoCollect" : false,"remainingBillingCycles" : -1,"customFields.value":"false", "customFields.label" : "isOffline"}},
{$project: {first: { $arrayElemAt: [ "$customFields", 1 ] }}}
])

basic mongodb nested sets search, only want to retrieve single value

I just started with document based datastores around 3-4 hours and I have a basic problem that I want to understand.
{
"_id": "5527e5ae06e55c02049bd114",
"owner": "John Doe",
"customers" : ["5527e3c806e55c01dad3a132", "5527e3c806e55c01dad3a133", "5527e3c806e55c01dad3a134"],
"location" : [
{
"address": "Wall St",
"location_id": "123123213",
"vendor" : [
{
"name" : "hello 123",
"price" : "3",
"serial" : "000000009730978e"
},
{
"name" : "hello abc",
"price" : "3.5",
"serial" : "0000000097308888"
}
]
},
{
"address" : "PCH 1",
"location_id": "987987",
"vendor" : [
{
"name" : "hello 456342",
"price" : "4",
"serial" : "000000009733452435"
},
{
"name" : "hello sdfsdg",
"price" : "4.5",
"serial" : "0000000095243532453"
}
]
}
]
}
So how can I find location.serial.price?
db.test.find_one( {"location.location_id" : "123123213" , "location.vendor.serial" : "000000009730978e" } )
would returns the entire object but I am just interested in location.serial.price where these conditions match.
Thanks a lot,
Ben
Usually you would use the positional-operator ($) to refer to array entries. But unfortunately this operator has a serious limitation: it does not work with nested arrays. So it does not help you in this case.
What you can do instead is use an aggregation pipeline which unwinds both arrays and then matches the serial.
db.test.aggregate([
// create a stream of location-documents
{ $unwind: "$location" },
// filter the stream by location-id
{ $match: { "location.id" : "123123213" },
// expand the remaining stream further to individual vendor-documents
{ $unwind: "$vendor" },
// filter the stream by serial
{ $match: { "location.vendor.serial": "000000009730978e" } }
]);
Keep in mind that aggregation can become quite slow. It also has a limitation of 16MB per aggregation step. You can avoid that limit with the allowDiskUse:true option, but that makes it even slower. So when you have lots of data and performance is a concern, you might want to reconsider your database schema.
Mongodb aggregation use here, below query will satisfied your criteria
db.collectionName.aggregate({
"$unwind": "$location"
},
{
"$match": {
"location.location_id": "123123213"
}
},
{
"$unwind": "$location.vendor"
},
{
"$match": {
"location.vendor.serial": "000000009730978e"
}
},
{
"$project": {
"serial": "$location.vendor.serial",
"price": "$location.vendor.price",
"_id": 0
}
}).pretty()