Mongodb Aggregate calculate average and add it to the document - mongodb

I have websites which contains 2 documents:
{
"_id" : ObjectId("58503934034b512b419a6eab"),
"website" : "https://www.stackoverflow.com",
"name" : "Stack Exchange",
"keywords" : [
"helping",
"C#",
"PYTHON"
]
}
{
"_id" : ObjectId("58503934034b512b419a6eab"),
"website" : "https://www.google.com.com",
"name" : "Stack Exchange",
"keywords" : [
"search",
"engine",
]
}
I also have another seo_tracking which contains:
{
"_id" : ObjectId("587373d6f6325811c8a0b3ad"),
"position" : "2",
"real_url" : "https://www.stackoverflow.com",
"created_at" : ISODate("2017-01-09T11:28:22.104Z"),
"keyword" : "helping"
},
{
"_id" : ObjectId("587373d6f6325811c8a0b3ad"),
"position" : "4",
"real_url" : "https://www.stackoverflow.com",
"created_at" : ISODate("2017-01-09T11:28:22.104Z"),
"keyword" : "C#"
}
etc.. This contains around 100+ documents
What I want to do is is aggregate the seo_tracking with website on the specific URL (www.stackexchange (in websites) would match www.stackoverflow.com in (seo_tracking)) which I can do fine. However, I would like to return for each of the websites the following:
{
"_id" : ObjectId("587373d6f6325811c8a0b3ad"),
"website":"https://www.stackoverflow.com",
"avg_position" : "2"
}
Then for Google etc.. Even if the avg_position is 0 .. I have tried the following:
db.seo_tracking.aggregate([
{
$lookup:
{
from: "websites",
localField: "real_url",
foreignField: "website",
as: "post_websites"
},
},
{
"$group": {
_id:null,
avg_position:{$avg:"$position"}
}
}
])
However, this just produces:
{
"_id" : null,
"avg_position" : 2.0
}
What I need to do is have website and ideally also need the ID
Any ideas to where I'm going wrong here?

You can try something like this. You'll need to $unwind to access the fields from joined collection and change your grouping key to use the _id from joined collection to get average for each website:
db.seo_tracking.aggregate([{
$lookup: {
from: "website",
localField: "real_url",
foreignField: "website",
as: "post_websites"
},
}, {
$unwind: "$post_websites"
}, {
"$group": {
_id: "$post_websites._id",
avg_position: {
$avg: "$position"
},
website: {
$first: "$real_url"
}
}
}])

Related

Unable to aggregate two collections using lookup in MongoDB Atlas

I have an orders collection that looks like this:
{
"_id" : "wJNEiSYwBd5ozGtLX",
"orderId" : 52713,
"createdAt" : ISODate("2020-01-31T04:34:13.790Z"),
"status" : "closed",
"orders" : [
{
"_id" : "ziPzwLuZrz9MNkaRT",
"productId" : 10290,
"quantity" : 2
}
]
}
I have an products collection that looks like this
{
"_id" : "238cwwLkZa6gKNN86",
"productId" : 10290,
"title" : "Product Title",
"price" : 9.9
}
I am trying to merge the price information into the orders information.
Something like:
{
"_id" : "wJNEiSYwBd5ozGtLX",
"orderId" : 52713,
"createdAt" : ISODate("2020-01-31T04:34:13.790Z"),
"status" : "closed",
"orders" : [
{
"_id" : "ziPzwLuZrz9MNkaRT",
"productId" : 10290,
"quantity" : 2,
"price": 9.9
}
]
}
If I try a $lookup command on MongoDB Atlas Dashboard like this:
{
from: 'products',
localField: 'orders.productId',
foreignField: 'productId',
as: 'priceInfo'
}
The aggregated output is (not what I wanted):
{
"_id" : "wJNEiSYwBd5ozGtLX",
"orderId" : 52713,
"createdAt" : ISODate("2020-01-31T04:34:13.790Z"),
"status" : "closed",
"orders" : [
{
"_id" : "ziPzwLuZrz9MNkaRT",
"productId" : 10290,
}
],
"priceInfo": [
{
"_id" : "238cwwLkZa6gKNN86",
"productId" : 10290,
"title" : "Product Title",
"price" : 9.9
}
]
}
I do not need a separate priceInfo array. It will be best if I have the product details information merged into the "orders" array. What should be the aggregation lookup syntax to achieve the desired output?
Demo - https://mongoplayground.net/p/bLqcN7tauWU
Read - $lookup $unwind $first $set $push $group
db.orders.aggregate([
{ $unwind: "$orders" }, // break array of orders into individual documents
{
$lookup: { // join
"from": "products",
"localField": "orders.productId",
"foreignField": "productId",
"as": "products"
}
},
{
$set: {
"orders.price": { "$arrayElemAt": [ "$products.price", 0 ] } // set the price
}
},
{
$group: { // group records back
_id: "$_id",
createdAt: { $first: "$createdAt" },
status: { $first: "$status" },
orderId: { $first: "$orderId" },
orders: { $push: "$orders" }
}
}
])

How can I use MongoDB's aggregate with $lookup to replace an attribute that holds an id with the whole document?

I have two collections:
user:
{
"_id" : "9efb42e5-514d-44bd-a4b8-6f74e6313ec2",
"name" : "Haralt",
"age" : 21,
"bloodlineId" : "c59a2d02-f304-49a8-a52a-44018fc15fe6",
"villageId" : "foovillage"
}
bloodlines:
{
"_id" : "c59a2d02-f304-49a8-a52a-44018fc15fe6",
"name" : "Tevla",
"legacy" : 0
}
Now I'd like to do an aggregate to replace user.bloodlineId with the whole bloodline document.
This is what I tried to far:
db.getCollection('character').aggregate([
{
"$match": { _id: "9efb42e5-514d-44bd-a4b8-6f74e6313ec2" }
},
{
"$lookup": {
from: "bloodline",
localField: "bloodlineId",
foreignField: "_id",
as: "bloodline"
}
}])
The result is almost where I want it:
{
"_id" : "9efb42e5-514d-44bd-a4b8-6f74e6313ec2",
"name" : "Haralt",
"age" : 21,
"bloodlineId" : "c59a2d02-f304-49a8-a52a-44018fc15fe6",
"villageId" : "foovillage",
"bloodline" : [
{
"_id" : "c59a2d02-f304-49a8-a52a-44018fc15fe6",
"name" : "Tevla",
"legacy" : 0
}
]
}
Only two issues here. The first is that bloodlineId is still there and bloodline was just added to the result. I'd like to have bloodline replace the bloodlineId attribute.
The second problem is that bloodline is an array. I'd love to have it a single object.
I think this pipeline might do the trick:
[
{
"$match": {
_id: "9efb42e5-514d-44bd-a4b8-6f74e6313ec2"
}
},
{
"$lookup": {
from: "bloodlines",
localField: "bloodlineId",
foreignField: "_id",
as: "bloodline"
}
},
{
$project: {
"age": 1,
"bloodlineId": {
$arrayElemAt: [
"$bloodline",
0
]
},
"name": 1,
"villageId": 1
}
}
]
Mongo Playground
If there's anything I'm missing, please let me know!

Why "as" in $lookup is replacing the complete set?

Let me first introduce you to the 2 collections I am using :
Collection 1 : users
> db.users.find().pretty()
{
"_id" : ObjectId("5ee4e727d04e4b4ac1ef115b"),
"name" : "Ashutosh Tiwari",
"age" : 21,
"email" : "ashutosh#gmail.com"
}
{
"_id" : ObjectId("5ee4e727d04e4b4ac1ef115c"),
"name" : "Maximilian",
"age" : 32,
"email" : "max#yahoo.com"
}
Collection 2 : posts
> db.posts.find().pretty()
{
"_id" : ObjectId("5ee51b7ed9f661cad505fcc6"),
"title" : "First One",
"text" : "Hey this is the first Author",
"author" : ObjectId("5ee4e727d04e4b4ac1ef115c"),
"comments" : [
{
"user" : ObjectId("5ee4e727d04e4b4ac1ef115b"),
"comment" : "This is my comment"
}
]
}
{
"_id" : ObjectId("5ee5353cd9f661cad505fcc8"),
"title" : "First One",
"author" : ObjectId("5ee4e727d04e4b4ac1ef115c"),
"comments" : [
{
"user" : ObjectId("5ee4e727d04e4b4ac1ef115b"),
"comment" : "This is my comment"
}
]
}
I want to have the user inside comments array in 2nd Collection(posts) to be replaced by the user who has written that comment.
I have tried the query below but it is replacing the comments section !
> db.posts.aggregate([
{ $lookup:
{from: "users",
localField:"comments.user",
foreignField:"_id",
as:"comments.user"
}
} ]).pretty()
{
"_id" : ObjectId("5ee51b7ed9f661cad505fcc6"),
"title" : "First One",
"text" : "Hey this is the first Author",
"author" : ObjectId("5ee4e727d04e4b4ac1ef115c"),
"comments" : {
"user" : [
{
"_id" : ObjectId("5ee4e727d04e4b4ac1ef115b"),
"name" : "Ashutosh Tiwari",
"age" : 21,
"email" : "ashutosh#gmail.com"
}
]
}
}
{
"_id" : ObjectId("5ee5353cd9f661cad505fcc8"),
"title" : "First One",
"author" : ObjectId("5ee4e727d04e4b4ac1ef115c"),
"comments" : {
"user" : [
{
"_id" : ObjectId("5ee4e727d04e4b4ac1ef115b"),
"name" : "Ashutosh Tiwari",
"age" : 21,
"email" : "ashutosh#gmail.com"
}
]
}
}
So, here, whole comments section is now replaced whereas I wanted to have the details in comments.user section so I could see the comment and the user who has posted that comment.
you need to unwind the comments array first
your query may look something like this
db.posts.aggregate([
{
$unwind: "$comments" // unwind the comments array to get a stream of documents, each document has only one comment
},
{
$lookup: {
from: "users",
localField: "comments.user",
foreignField: "_id",
as: "comments.user"
}
},
{
$unwind: "$comments.user" // we know there is only one user inside a single comment, so we can unwind this user array to be an object too (as the lookup returns an array)
},
{
$group: { // then do a group by the document _id to get unique documents with comments array instead of the same document duplicated with different comments
_id: "$_id",
author: {
$first: "$author"
},
text: {
$first: "$text"
},
title: {
$first: "$title"
},
comments: {
$push: "$comments"
}
}
}
])
you can test it here
hope it helps
You can handle it in the projection.
db.posts.aggregate([
{ $lookup:
{from: "users",
localField:"comments.user",
foreignField:"_id",
as:"cu"
}
},
{$unwind:{path:"$cu"}},
{
$project:{
"title":1,
"text":1,
"author":1,
"comments":{
user: "$cu",
comment: { $arrayElemAt: [ "$comments.comment", 0 ] },
}
}
}
])

MongoDB aggregate two collections, return additional field as count

(See edit below)
I am trying to aggregate data from two separate collections within the same MongoDB database.
The "accounts" collection contains user information (cleansed):
{
_id: ObjectId("5c0d64a4224a2900108c005f"),
"username" : "mike22",
"email" : "mike22#<domain>.com",
"country" : GB,
"created" : ISODate("2018-11-26T23:37:49.051Z")
},
{
_id: ObjectId("5a0d64a4527h2880108c0445"),
"username" : "mike23",
"email" : "mike23#<domain>.com",
"country" : DE,
"created" : ISODate("2018-11-26T23:37:49.051Z")
},
{
_id: ObjectId("5a3334a45zzz2884448c0445"),
"username" : "mike24",
"email" : "mike24#<domain>.com",
"country" : DE,
"created" : ISODate("2018-11-26T23:37:49.051Z")
}
The "devices" collection contains device definitions for all users. A user is likely to have many devices defined in this collection and many users devices are in this collection.
A single device within this collection is defined as follows:
{
"_id" : ObjectId("5c10138c73bbe0001018e415"),
"capabilities" : [
"BrightnessController",
"PowerController"
],
"displayCategories" : [
"LIGHT"
],
"friendlyName" : "Test1",
"description" : "Test device 1",
"reportState" : true,
"username" : "mike22",
"endpointId" : 11,
"__v" : 0
},
{
"_id" : ObjectId("5c10138c73bbe0001018e415"),
"capabilities" : [
"PowerController"
],
"displayCategories" : [
"SWITCH"
],
"friendlyName" : "Test2",
"description" : "Test device 2",
"reportState" : true,
"username" : "mike23",
"endpointId" : 12,
"__v" : 0
},
{
"_id" : ObjectId("5c10138c73bbe0001018e415"),
"capabilities" : [
"PowerController"
],
"displayCategories" : [
"SMARTPLUG"
],
"friendlyName" : "Test3",
"description" : "Test device 3",
"reportState" : true,
"username" : "mike22",
"endpointId" : 13,
"__v" : 0
}
I'm able to use the aggregate below to show me a count of device per-user:
db.accounts.aggregate([
{
$lookup: {
from : "devices",
localField : "username",
foreignField : "username",
as : "userdevs"
},
},
{ $unwind:"$userdevs" },
{ $group : { _id : "$username", count : { $sum : 1 } } }
])
Example output from the data/ aggregate above:
{ "_id" : "mike22", "count" : 2 },
{ "_id" : "mike23", "count" : 1 }
(Note user with no devices is now missing/ should be there with a zero count?!)
However, I want to return all fields for each user plus a new field which shows me the count of devices they have in the "devices" collection. The output I am looking for is as below:
{
"_id" : ObjectId("5c0d64a4224a2900108c005f"),
"username" : "mike22",
"email" : "mike22#<domain>.com",
"country" : GB,
"created" : ISODate("2018-11-26T23:37:49.051Z"),
"countDevices": 2
},
{
"_id" : ObjectId("5a0d64a4527h2880108c0445"),
"username" : "mike23",
"email" : "mike23#<domain>.com",
"country" : DE,
"created" : ISODate("2018-11-26T23:37:49.051Z"),
"countDevices": 1
},
{
"_id" : ObjectId("5a0d64a4527h2880108c0445"),
"username" : "mike24",
"email" : "mike24#<domain>.com",
"country" : DE,
"created" : ISODate("2018-11-26T23:37:49.051Z"),
"countDevices": 0
}
Edit 16/12: So I am nearly there with the aggregate below. Zero-count users are missing though.
use users
db.accounts.aggregate([
{
$lookup: {
from : "devices",
localField : "username",
foreignField : "username",
as : "userdevs"
},
},
{ $unwind: "$userdevs"},
{ $group : { _id : {
_id: "$_id",
username: "$username",
email: "$email",
country: "$country",
region: "$region",
},
countDevices : { $sum : 1 } } }
])
2nd Edit 16/12:
I have found the aggregate needed below:
db.accounts.aggregate([
{ "$lookup": {
"from": "devices",
"let": { "username": "$username" },
"pipeline": [
{ "$match": {
"$expr": { "$eq": [ "$$username", "$username" ] }
}},
{ "$count": "count" }
],
"as": "deviceCount"
}},
{ "$addFields": {
"countDevices": { "$sum": "$deviceCount.count" }
}}
])
First of All, you can flatten the answer you have got with a projection like below:
{ $project : {
_id : '$_id._id',
username : '$_id.username',
email : '$_id.email',
country : '$_id.country',
region : '$_id.region',
countDevices: 1
}
}
add this after the $group in your pipeline, you will get your result as you wanted in the question.
About zero-count users, there is a way to handle this in database using mongoDB, as explained in detail here but I do not recommend it, its better that you handle this kind of problem client side.
As-per second edit, the aggregate I used is as below:
db.accounts.aggregate([
{ "$lookup": {
"from": "devices",
"let": { "username": "$username" },
"pipeline": [
{ "$match": {
"$expr": { "$eq": [ "$$username", "$username" ] }
}},
{ "$count": "count" }
],
"as": "deviceCount"
}},
{ "$addFields": {
"countDevices": { "$sum": "$deviceCount.count" }
}}
])

MongoDB - Find first and last in the embedded array for a single matching document

I have a collection of websites, which each contain a list of websites and their keywords that are being tracked. I also have another collection called "rankings" which for each of the keywords in the website contains a ranking. The collection so far looks like this:
{
"_id" : ObjectId("58503934034b512b419a6eab"),
"website" : "https://www.google.com",
"name" : "Google",
"keywords" : [
"Search",
"Websites",
],
"tracking" : [
{
"_id" : ObjectId("5874aa1df63258286528598d"),
"position" : 0,
"created_at" : ISODate("2017-01-1T09:32:13.831Z"),
"real_url" : "https://www.google.com",
"keyword" : "Search"
},
{
"_id" : ObjectId("5874aa1ff63258286528598e"),
"keyword" : "Search",
"real_url" : "https://www.google.com",
"created_at" : ISODate("2017-01-2T09:32:15.832Z"),
"found_url" : "https://google.com/",
"position" : 3
},
{
"_id" : ObjectId("5874aa21f63258286528598f"),
"keyword" : "Search",
"real_url" : "https://www.foamymedia.com",
"created_at" : ISODate("2017-01-3T09:32:17.017Z"),
"found_url" : "https://google.com/",
"position" : 2
},
{
"_id" : ObjectId("5874aa21f63258286528532f"),
"keyword" : "Websites",
"real_url" : "https://www.google.com",
"created_at" : ISODate("2017-01-1T09:32:17.017Z"),
"found_url" : "https://google.com/",
"position" : 1
},
{
"_id" : ObjectId("5874aa21f63258286528542f"),
"keyword" : "Websites",
"real_url" : "https://www.google.com",
"created_at" : ISODate("2017-01-1T09:32:17.017Z"),
"found_url" : "https://google.com/",
"position" : 2
},
]
}
What I want to do is:
1) Group all of the keywords together by their keyword
2) Find the starting position (at the very start of the month)
3) Find the current position (as of today)
So in theory I want to be given an object like:
{
"_id" : ObjectId("58503934034b512b419a6eab"),
"website" : "https://www.google.com",
"tracking" : [
{
"_id" : ObjectId("5874aa1df63258286528598d"),
"keyword": "Search",
"start_position": 0,
"todays_position": 3,
},
{
"_id" : ObjectId("5874aa1df63258286528598d"),
"keyword": "Website",
"start_position": 0,
"todays_position": 2,
},
]
I am confused about how to do the grouping on another field, though. I have tried the following so far:
db.getCollection('websites').aggregate([
{
$lookup: {
from: "seo_tracking",
localField: "website",
foreignField: "real_url",
as: "tracking"
}
},
{
$match: {
"_id" : ObjectId("58503934034b512b419a6eab")
}
},
{
$group: {
"_id" : "$_id",
"keyword" : {
$first: "$tracking.keyword",
},
}
}
]);
But this is not grouping by the keyword, nor can I figure out how I would get the expected value.
You can try something like this. $unwind the tracking array followed by $sort on tracking.keyword and tracking.created_at. $group by tracking.keyword and $first to get starting position, $avg to get average position and $last to get the today's position. Final $group to roll up everything back to tracking array.
db.website.aggregate([{
$match: {
"_id": ObjectId("58503934034b512b419a6eab")
}
}, {
$lookup: {
from: "seo_tracking",
localField: "website",
foreignField: "real_url",
as: "tracking"
}
}, {
$unwind: "$tracking"
}, {
$sort: {
"tracking.keyword": 1,
"tracking.created_at": -1
}
}, {
$group: {
"_id": "$tracking.keyword",
"website": {
$first: "$website"
},
"website_id": {
$first: "$_id"
},
"avg_position": {
$avg: "$tracking.position"
},
"start_position": {
$first: "$tracking.position"
},
"todays_position": {
$last: "$tracking.position"
}
}
}, {
$group: {
"_id": "$website_id",
"website": {
$first: "$website"
},
"tracking": {
$push: {
"keyword": "$_id",
"avg_position":"$avg_position",
"start_position": "$start_position",
"todays_position": "$todays_position"
}
}
}
}]);