Is there a way to add a counter to mongodb query? - mongodb

I would like to add a counter to the documents that match my query. E.g., 1st document has counter = 1, 2nd document has counter = 2, and so on.
Here's a snippet of the data:
"_id": ObjectId("5d1b9aea5c1dd54e8c773f42")
"timestamp":
[
"systemTimestamp": 2019-07-02T17:56:53.765+00:00
"serverTimestamp": 0001-01-01T00:00:00.000+00:00
"systemTimeZone": "System.CurrentSystemTimeZone"
]
"urlData":
[0]:
"fullUrl":"https://imgur.com/gallery/EfaQnPY"
"UID":"00000-W3W6C42GWTRE960"
"safety": "safe"
My query (this is copied from the Compass UI):
$match:
{
$and: [{"UID": "00000-WVUCW3JW7OTHDVE"},
{"timestamp.serverTimestamp":
{
$gte:ISODate("2019-08-01T00:00"),
$lte:ISODate("2019-09-30T00:00")
}}]
}
$unwind:
{
path: "$urlData",
includeArrayIndex: 'index'
}
$match:
{
"index": 0
}
$project:
{
_id: 0,
date: { $dateToString: {
format: "%Y-%m-%d",
date: "$timestamp.serverTimestamp"}},
safety: "$safety",
url: "$urlData.fullUrl",
UID: "$UID"
}
Is there any way to add something to $project to include a counter?

The answer is there in your question itself. We can get the expected output if the output of the last pipeline is added into an array and again unwinded with the included index.
Let's say, I have the following data:
{
"_id" : ObjectId("5d81c3b7a832f81a9e02337b"),
"first" : "John",
"last" : "Smith"
}
{
"_id" : ObjectId("5d81c3b7a832f81a9e02337c"),
"first" : "Alice",
"last" : "Johnson"
}
{
"_id" : ObjectId("5d81c3b7a832f81a9e02337d"),
"first" : "Bob",
"last" : "Williams"
}
On running the following query:
db.collection.aggregate([
{
$group:{
"_id":null,
"data":{
$push:"$$ROOT"
}
}
},
{
$unwind:{
"path":"$data",
includeArrayIndex: 'counter'
}
},
{
$addFields:{
"data.counter":{
$sum:["$counter",1]
}
}
},
{
$replaceRoot:{
"newRoot":"$data"
}
}
]).pretty()
Output would be:
{
"_id" : ObjectId("5d81c3b7a832f81a9e02337b"),
"first" : "John",
"last" : "Smith",
"counter" : 1
}
{
"_id" : ObjectId("5d81c3b7a832f81a9e02337c"),
"first" : "Alice",
"last" : "Johnson",
"counter" : 2
}
{
"_id" : ObjectId("5d81c3b7a832f81a9e02337d"),
"first" : "Bob",
"last" : "Williams",
"counter" : 3
}

Related

Mongo Aggregate Group List / Filtered Results

Here my records for mongdb
{ "_id" : 1,"userId" : "x", "name" : "Central", "borough": "Manhattan"},
{ "_id" : 2,"userId" : "x", "name" : "Rock", "borough" : "Queens"},
{ "_id" : 3,"userId" : "y", "name" : "Empire", "borough" : "Brooklyn"},
{ "_id" : 4,"userId" : "y", "name" : "Stana", "borough" : "Manhattan"},
{ "_id" : 5,"userId" : "y", "name" : "Jane", "borough" :"Brooklyn"}
how can we take result with aggregate by userId field like this
[
{
x : [{"_id":1,"name":"Central"},{"_id":2,"name":"Rock"}],
y:[{ "_id" : 3,"name":"Empire"},{ "_id" : 4,"name":"Stana"},{ "_id" : 5,"name":"Jane"}]
}
]
$group by userId and construct docs array with required fields
$group by null and construct array of docs in key-value format
$arrayToObject convert docs array to object
$replaceRoot to replace above converted object to root
db.collection.aggregate([
{
$group: {
_id: "$userId",
docs: {
$push: {
_id: "$_id",
name: "$name"
}
}
}
},
{
$group: {
_id: null,
docs: {
$push: {
k: "$_id",
v: "$docs"
}
}
}
},
{ $replaceRoot: { newRoot: { $arrayToObject: "$docs" } } }
])
Playground

MongoDB - sum specific array element under conditions exclude duplicate

I have a bunch of docs that look like below:
{
"_id" : ObjectId("8f30b453c2ece001364dc04d"),
"SessionId" : "awkuTQjj53kgqAZ4J",
"StartDate" : ISODate("2020-02-24T11:51:36.918+0000"),
"EndDate" : ISODate("2020-02-24T11:51:36.918+0000"),
"List1" : "X",
"List2" : "Y",
"rating" : [
{
"ObjectId" : "5d09e98380c5d5eb89ac5069",
"List" : "List 2",
"Rate" : NumberInt(5),
"RatedDate" : ISODate("2020-02-24T11:55:47.774+0000")
},
{
"ObjectId" : "5d09e98380c5d5eb89ac5069",
"List" : "List 2",
"Rate" : NumberInt(4),
"RatedDate" : ISODate("2020-02-24T11:55:48.408+0000")
},
{
"ObjectId" : "5d09e98380c5d5eb89ac505b",
"List" : "List 2",
"Rate" : NumberInt(3),
"RatedDate" : ISODate("2020-02-24T11:55:49.520+0000")
},
{
"ObjectId" : "5d09e98380c5d5eb89ac505c",
"List" : "List 2",
"Rate" : NumberInt(3),
"RatedDate" : ISODate("2020-02-24T11:55:51.787+0000")
},
{
"ObjectId" : "5d09e98380c5d5eb89ac5057",
"List" : "List 1",
"Rate" : NumberInt(4),
"RatedDate" : ISODate("2020-02-24T11:55:53.865+0000")
},
{
"ObjectId" : "5d09e98380c5d5eb89ac5058",
"List" : "List 1",
"Rate" : NumberInt(4),
"RatedDate" : ISODate("2020-02-24T11:55:53.865+0000")
},
],
"Answers" : {
"SelectedList" : "1",
},
}
I need to sum up all the rating.Rate where rating.List:'List 1' and respectively sum up all rating.Rate where rating.List:'List 2', also exclude duplicate records (by rating.ObjectId) and count only the ones with latest rating.RatedDate. I suppose this is a group aggregation.
Also they should match the criteria
List1:'X' ,
Answers.selectedList:1
What I have written looks like below so far:
[
{
"$match" : {
"List1" : "X",
"Answers.SelectedList" : "1"
}
},
{
"$unwind" : {
"path" : "$rating"
}
},
{
"$group" : {
"_id" : null,
"sum" : {
"$sum" : "$Rate"
}
}
}
]
can you please help me?
I was a little confused around the List1/List2 however I think this will get you most of the way to your required aggregation query.
db.test.aggregate([
{
$match: {
"List1": "X",
"Answers.SelectedList": "1"
}
},
{
"$unwind" : "$rating"
},
{
$group:{
_id: {
id: "$rating.ObjectId",
list: "$rating.List"
},
maxRatedDate: { $max: "$rating.RatedDate" },
ratings: { $push: "$rating" }
}
},{
$addFields: {
ratings: {
$filter: {
input: "$ratings",
as: "item",
cond: { $eq: [ "$$item.RatedDate", "$maxRatedDate" ] }
}
}
}
},
{
$unwind: "$ratings"
},
{
$group:{
_id: "$ratings.List",
sum : {
$sum : "$ratings.Rate"
}
}
}
])
This will output the following
{ "_id" : "List 1", "sum" : 8 }
{ "_id" : "List 2", "sum" : 10 }
However, let's try to break it down.
To start with we've got a simple match, the same as yours in your question. this just limits the number of documents we pass back
$match: {
"List1": "X",
"Answers.SelectedList": "1"
}
Then we unwind all the array items so we get a document for each rating, this allows us to do some extra querying on the data.
{
"$unwind" : "$rating"
}
Next, we've got a group by, here we're a group on the ObjectId of the rating so we can later remove duplicates, we're also finding out in the group which rating we've group has the highest date so we can take that one later in a projection. we're then pushing all the rating back in the array for later.
$group:{
_id: {
id: "$rating.ObjectId",
list: "$rating.List"
},
maxRatedDate: { $max: "$rating.RatedDate" },
ratings: { $push: "$rating" }
}
Next we want to project the ratings array in to a single element in which it only contains the latest rating, for this we use a $filter on the array and filter them all out that don't match our max date we calculated in our previous step.
$addFields: {
ratings: {
$filter: {
input: "$ratings",
as: "item",
cond: { $eq: [ "$$item.RatedDate", "$maxRatedDate" ] }
}
}
}
The next two steps are fairly simple and are just unwinding the array again (we've only got one element, then grouping them to get the total sum for the lists.
{
$unwind: "$ratings"
},
{
$group:{
_id: "$ratings.List",
sum : {
$sum : "$ratings.Rate"
}
}
}
At this point you only need to provide the $group stage with the field that you're actually grouping on as the _id field and reference the fields properly as they are still inside of the rating array:
"$group" : {
"_id" : "$rating.List",
"sum" : {
"$sum" : "$rating.Rate"
}
}

How to find duplicate entries in collection using aggregation

{ "_id" : ObjectId("4f127fa55e7242718200002d"), "id":1, "name" : "foo"}
{ "_id" : ObjectId("4f127fa55e7242718200002d"), "id":2, "name" : "bar"}
{ "_id" : ObjectId("4f127fa55e7242718200002d"), "id":3, "name" : "baz"}
{ "_id" : ObjectId("4f127fa55e7242718200002d"), "id":4, "name" : "foo"}
{ "_id" : ObjectId("4f127fa55e7242718200002d"), "id":5, "name" : "bar"}
{ "_id" : ObjectId("4f127fa55e7242718200002d"), "id":6, "name" : "bar"}
I want to find all the duplicated entries in this collection by the "name" field using aggregation. E.g. "foo" appears twice and "bar" appears 3 times.
You can use group stage in aggrgation
db.collection.aggregate([{
$group: {
_id: "$name",
count: { $sum: 1 },
name: { $first: "$name" }
}
}])
You can group by name and count. And then filter with a count greater that 1.
db.collection.aggregate([
{
$group: {
_id: "$name",
count: { $sum: 1 }
}
},
{
$match:{count:{$gt:1}}
}
])
Output:
{ "_id" : "foo", "count":2}
{ "_id" : "bar", "count":3}

How to use $unwind and $match with MongoDB?

I have a document of the following format:
{
"P": {
"Workspaces": [
{
"Key": "Value1",
"Size": 2.27,
"Status": 'something'
},
{
"Key": "Value2",
"Size": 3.27,
"Status": 'somethingelse'
}
]
}
}
The following query returns the average correctly.
db.collection.aggregate([
{ $unwind: "$P.Workspaces" },
{ $group: { _id: "$P.Workspaces.Key", average: { $avg: "$P.Workspaces.Size" } } }
])
I am trying to add a match to filter the status as shown below. However I am not getting no result even though there are documents with matching status. I am trying to filter the results before taking the average. Am I missing something here?
db.collection.aggregate([
{ $unwind: "$P.Workspaces" },
{ $match: { "P.Workspaces.Status":'something'}},
{ $group: { _id: "$P.Workspaces.Key", average: { $avg: "$P.Workspaces.Size" } } }
])
db.articles.aggregate(
[ { $match : { author : "dave" } } ]
);
The examples use a collection named articles with the following documents:
{ "_id" : ObjectId("512bc95fe835e68f199c8686"), "author" : "dave", "score" : 80, "views" : 100 } { "_id" : ObjectId("512bc962e835e68f199c8687"), "author" : "dave", "score" : 85, "views" : 521 } { "_id" : ObjectId("55f5a192d4bede9ac365b257"), "author" : "ahn", "score" : 60, "views" : 1000 } { "_id" : ObjectId("55f5a192d4bede9ac365b258"), "author" : "li", "score" : 55, "views" : 5000 } { "_id" : ObjectId("55f5a1d3d4bede9ac365b259"), "author" : "annT", "score" : 60, "views" : 50 }

Count of MongoDB aggregation match results

I'm working with a MongoDB collection that has a lot of duplicate keys. I regularly do aggregation queries to find out what those duplicates are, so that I can dig in and find out what is and isn't different about them.
Unfortunately the database is huge and duplicates are often intentional. What I'd like to do is to find the count of keys that have duplicates, instead of printing a result with thousands of lines of output. Is this possible?
(Side Note: I do all of my querying through the shell, so solutions that don't require external tools or a lot of code would be preferred, but I understand that's not always possible.)
Example Records:
{ "_id" : 1, "type" : "example", "key" : "111111", "value" : "abc" }
{ "_id" : 2, "type" : "example", "key" : "222222", "value" : "def" }
{ "_id" : 3, "type" : "example", "key" : "222222", "value" : "ghi" }
{ "_id" : 4, "type" : "example", "key" : "333333", "value" : "jkl" }
{ "_id" : 5, "type" : "example", "key" : "333333", "value" : "mno" }
{ "_id" : 6, "type" : "example", "key" : "333333", "value" : "pqr" }
{ "_id" : 7, "type" : "example", "key" : "444444", "value" : "stu" }
{ "_id" : 8, "type" : "example", "key" : "444444", "value" : "vwx" }
{ "_id" : 9, "type" : "example", "key" : "444444", "value" : "yz1" }
{ "_id" : 10, "type" : "example", "key" : "444444", "value" : "234" }
Here is the query that I've been using to find duplicates based on key:
db.collection.aggregate([
{
$match: {
type: "example"
}
},
{
$group: {
_id: "$key",
count: {
$sum: 1
}
}
},
{
$match: {
count: {
$gt: 1
}
}
}
])
Which gives me an output of:
{
"_id": "222222",
"count": 2
},
{
"_id": "333333",
"count": 3
},
{
"_id": "444444",
"count": 4
}
The result I want to get instead:
3
You are almost there, just missing the last $count:
db.collection.aggregate([
{
$match: {
type: "example"
}
},
{
$group: {
_id: "$key",
count: {
$sum: 1
}
}
},
{
$match: {
count: {
$gt: 1
}
}
},
{
$count: "count"
}
])
Akrion's answer seems to be correct, but I can't test it because we're on an older version of MongoDB. A coworker gave me an alternative solution that works on 3.2 (not sure about other versions).
Adding .toArray() will convert the results to an array, and you can then get the size of the array using .length.
db.collection.aggregate([
{
$match: {
type: "example"
}
},
{
$group: {
_id: "$key",
count: {
$sum: 1
}
}
},
{
$match: {
count: {
$gt: 1
}
}
}
]).toArray().length