Get average of values over a period of time - mongodb

I have the following collection:
{
"_id" : ObjectId("58503934034b512b419a6eab"),
"website" : "https://www.google.com",
"name" : "Google",
"keywords" : [
"Search",
"Websites",
],
"tracking" : [
{
"_id" : ObjectId("5874aa1df63258286528598d"),
"position" : 0,
"created_at" : ISODate("2017-01-1T09:32:13.831Z"),
"real_url" : "https://www.google.com",
"keyword" : "Search"
},
{
"_id" : ObjectId("5874aa1ff63258286528598e"),
"keyword" : "Page",
"real_url" : "https://www.google.com",
"created_at" : ISODate("2017-01-1T09:32:15.832Z"),
"found_url" : "https://google.com/",
"position" : 3
},
{
"_id" : ObjectId("5874aa21f63258286528598f"),
"keyword" : "Search",
"real_url" : "https://www.foamymedia.com",
"created_at" : ISODate("2017-01-2T09:32:17.017Z"),
"found_url" : "https://google.com/",
"position" : 2
},
{
"_id" : ObjectId("5874aa21f63258286528532f"),
"keyword" : "Search",
"real_url" : "https://www.google.com",
"created_at" : ISODate("2017-01-2T09:32:17.017Z"),
"found_url" : "https://google.com/",
"position" : 1
},
]
}
What I want to do is group all of the keywords together and calculate the average for that particular day, over a certain period.
So let's say for example:
Between: 2017-01-01 to 2017-01-31 the following keywords was tracked:
2017-01-01:
'Seach' => 1,
'Page' => 3,
Average = 2
2017-01-02:
'Search' => 4,
'Page' => 6,
Average = 5
....
So in the end result, I would be finished with (in this case):
{
"_id" : ObjectId("5874dccb9cd90425e41b7c54"),
"website" : "www.google.com",
"averages" : [
"2",
"5"
]
}

You can try something like this.
$unwind the tracking array followed by $sort on tracking.keyword and tracking.created_at.$group by day to get average for day across all categories. Final $group to push all the day's average values into array for a website.
db.website.aggregate([{
$match: {
"_id": ObjectId("58503934034b512b419a6eab")
}
}, {
$lookup: {
from: "seo_tracking",
localField: "website",
foreignField: "real_url",
as: "tracking"
}
}, {
$unwind: "$tracking"
}, {
$sort: {
"tracking.keyword": 1,
"tracking.created_at": -1
}
}, {
$group: {
_id: {
$dayOfMonth: "$tracking.created_at"
},
"website": {
$first: "$website"
},
"website_id": {
$first: "$_id"
},
"averageByDay": {
$avg: "$tracking.position"
}
}
}, {
$group: {
"_id": "$website_id",
"website": {
$first: "$website"
},
"average": {
$push: "$averageByDay"
}
}
}]);

Related

Mongodb Aggregation get Data per user

Report table sample data
{
"_id" : ObjectId("614415f4a6566a001623b622"),
"record" : [
{
"dateTime" : ISODate("2021-09-17T04:13:39.465Z"),
"status" : "time-in",
"month" : 9,
"day" : 17,
"year" : 2021,
"time" : 1631852019465.0,
"date" : ISODate("2021-09-17T00:00:00.000Z"),
},
{
"dateTime" : ISODate("2021-09-17T04:14:01.182Z"),
"status" : "time-out",
"month" : 9,
"day" : 17,
"year" : 2021,
"time" : 1631852041182.0,
"date" : ISODate("2021-09-17T00:00:00.000Z"),
}
],
"uid" : ObjectId("614415b0a6566a001623b80b"),
"date" : ISODate("2021-09-17T00:00:00.000Z"),
"status" : "time-out",
"createdAt" : ISODate("2021-09-17T04:13:40.102Z"),
"updatedAt" : ISODate("2021-09-17T04:14:01.831Z"),
"__v" : 0
}
Users table sample data
{
"_id" : ObjectId("615c0f6db30aff375cd05ac1"),
"displayName" : "test test",
"firstName" : "test",
"lastName" : "test",
"email" : "test#gmail.com",
"brand" : "Jollibee",
"phone" : "+632312312312312",
"role" : 1,
"isVerified" : true,
"isArchived" : false,
"createdAt" : ISODate("2021-10-05T08:40:13.208Z"),
"updatedAt" : ISODate("2021-10-05T08:40:13.208Z"),
"__v" : 0
}
I have a data like this
db.getCollection('users').aggregate([
{
"$match": { brand: "Jollibee" }
},
{
$lookup: {
from: "orders",
let: { id: 'id' },
pipeline: [
{
$match: {
date: { $gte: ISODate("2020-11-01"), $lt: ISODate("2021-11-31") },
}
}
],
as: "orders",
},
},
{
$project: {
"_id": 1,
"name": 1,
"orders": 1
}
}
])
when I'm using this aggregation I'm getting all the data inserted per user.
What I want to happen is that. I will only get the data that belong to the user and not all the data of all users.
Added the sample documents for each collection
You are not comparing the userIds of both collections. You should add that on your $match. Playground
db.users.aggregate([
{
"$match": {
brand: "Jollibee"
}
},
{
$lookup: {
from: "orders",
let: {
id: "$_id"
},
pipeline: [
{
$match: {
date: {
$gte: ISODate("2020-11-01"),
$lt: ISODate("2021-11-30")
},
$expr: {
$eq: [
"$uid",
"$$id"
]
}
}
}
],
as: "orders",
},
},
{
$project: {
"_id": 1,
"name": 1,
"orders": 1
}
}
])

How to fetch all fields of parent document?

I have tried to fetch all fields of parent document using $group but it send only specific fields.
Another Question. How to fetch all fields of parent document using '$project'?
Collection "Opportunity"
Document #1
{
"_id" : ObjectId("5c42b737cd94891a57fbf33e"),
"accountId" : [
ObjectId("5c29f198b248d845931a1830"),
ObjectId("5c29f198b248d845931a1831"),
]
"name" : "OppNewTest01",
"value" : 10000,
"status" : "open",
}
Collection "Account"
Document #1
{
"_id" : ObjectId("5c29f198b248d845931a1830"),
"name" : "MyTestAcc",
"phone" : "7845124578",
}
My MongoDB Query,
con.collection('opportunity').aggregate([
{
$unwind: "account"
},
{
$lookup: {
from: 'account',
localField: 'accountId',
foreignField: '_id',
as: 'accounts',
},
},
{
$unwind: '$accounts'
},
{
$group: {
"_id": "$_id",
"account": { "$push": "$accounts" }
}
},
]).toArray((err, res) => {
cbFun(res, err);
});
Output of the above query,
{
"_id" : "5c42b737cd94891a57fbf33e",
"account": [
{
"_id" : ObjectId("5c29f198b248d845931a1830"),
"name" : "MyTestAcc",
"phone" : "7845124578",
},
{
"_id" : ObjectId("5c29f198b248d845931a1831"),
"name" : "MyTestAcc1",
"phone" : "7845124579",
},
]
}
I want to below output,
{
"_id" : "5c42b737cd94891a57fbf33e",
"accountId" : [
ObjectId("5c29f198b248d845931a1830"),
ObjectId("5c29f198b248d845931a1831"),
],
"name" : "OppNewTest01",
"value" : 10000,
"status" : "open",
"account": [
{
"_id" : ObjectId("5c29f198b248d845931a1830"),
"name" : "MyTestAcc",
"phone" : "7845124578",
},
{
"_id" : ObjectId("5c29f198b248d845931a1831"),
"name" : "MyTestAcc1",
"phone" : "7845124579",
},
]
}
I have tried to fetch above output but not getting.
You are just losing that data when you $group, save those fields in that stage to keep them.
{
$group: {
"_id": "$_id",
"account": { "$push": "$accounts" },
value: {$first: "$value"},
status: {$first: "$status},
name: {$first: "$name"},
accountId: {$push: "$accountId},
}
}
EDIT:
[
{
$group: {
"_id": "$_id",
"accounts": { "$push": "$accounts" },
"doc": {$first: "$$ROOT"},
}
},
{
$addFields: {
"doc.accounts" :"$accounts"
}
}
]

Get average per year and label?

I can use this query to get the average sqmPrice for a myArea
db.getCollection('sold').aggregate([
{$match:{}},
{$group: {_id: "$myArea", "sqmPrice": {$avg: "$sqmPrice"} }}
])
Output:
[
{
"_id" : "Yttre Aspudden",
"sqmPrice" : 48845.7777777778
},
{
"_id" : "Hägerstensåsen",
"sqmPrice" : 120
}
]
I would like to group this by year, ideally an object that looks like this:
{
"Yttre Aspudden": {
2008: 1232,
2009: 1244
...
}
...
}
but the formatting is not the most important.
Here is a sample object, I would like to use soldDate:
{
"_id" : ObjectId("5beca41c78f21248ab47f4a6"),
"location" : {
"address" : {
"streetAddress" : "Ljusstöparbacken 26C"
},
"position" : {
"latitude" : 59.31427884,
"longitude" : 18.00892421
},
"namedAreas" : [
"Hägersten-Liljeholmen"
],
"region" : {
"municipalityName" : "Stockholm",
"countyName" : "Stockholms län"
},
"distance" : {
"ocean" : 3777
}
},
"listPrice" : 1895000,
"rent" : 1959,
"floor" : 1,
"livingArea" : 38.5,
"source" : {
"name" : "Fastighetsbyrån",
"id" : 1573,
"type" : "Broker",
"url" : "http://www.fastighetsbyran.se/"
},
"rooms" : 1.5,
"published" : ISODate("2018-11-02T20:55:19.000Z"),
"constructionYear" : 1959,
"objectType" : "Lägenhet",
"booliId" : 3278478,
"soldDate" : ISODate("2018-11-14T00:00:00.000Z"),
"soldPrice" : 2620000,
"soldPriceSource" : "bid",
"url" : "https://www.booli.se/annons/3278478",
"publishedDays" : 1735,
"soldDays" : 1747,
"daysUp" : 160,
"street" : "Ljusstöparbacken",
"streetYear" : "Ljusstöparbacken Hägersten-Liljeholmen 1959",
"yearDay" : 318,
"yearWeek" : 46,
"roughSize" : 40,
"sqmPrice" : 49221,
"myArea" : "Gröndal",
"hotlist" : true
}
You need to generate your keys dynamically so you have to use $arrayToObject. To build an object which aggregates the data you need three $group stages and to create new root of your document you can use $replaceRoot, try:
db.sold.aggregate([
{ $group: {_id: { area: "$myArea", year: { $year: "$soldDate" } }, "sqmPrice": {$avg: "$sqmPrice"} }},
{ $group: { _id: "$_id.area", avgs: { $push: { k: { $toString: "$_id.year" }, v: "$sqmPrice" } } } },
{ $group: { _id: null, areas: { $push: { k: "$_id", v: { $arrayToObject: "$avgs" } } } } },
{ $replaceRoot: { newRoot: { $arrayToObject: "$areas" } } }
])

Match documents with their inner array element variables in MongoDB

I can't understand how to compare a document variable to another document variable. My goal is to match all Authors who have at least one book written in their mothertongue (native language).
However, after unwinding the books array, My $match: { mothertongue: "$bookLang"}} doesn't return return anything, eventhough they're the same in the $project stage.
Can you help me without javascript?
This is my current query:
db.author.aggregate([
{
$unwind: "$books"
},
{
$project: {
books: true,
mothertongue: true,
bookLang: "$books.lang"
}
},
{
$match: { mothertongue: "$bookLang"}
}
])
And here is a sample of the dataset
{
"_id" : ObjectId("5aa7b34a338571a7470be0eb"),
"fname" : "Minna",
"lname" : "Canth",
"mothertongue" : "Finnish",
"birthdate" : ISODate("1844-03-19T00:00:00Z"),
"deathdate" : ISODate("1897-05-12T00:00:00Z"),
"books" : [
{
"title" : "Anna Liisa",
"lang" : "Finnish",
"language" : "finnish",
"edition" : 1,
"cover" : "Hard",
"year" : 1895,
"categorytags" : [
"Finland"
],
"publisher" : [
{
"name" : "Tammi",
"pubId" : ObjectId("5aa7b34a338571a7470be0e4")
}
]
},
{
"title" : "The Burglary and The House of Roinila",
"lang" : "English (UK)",
"translator" : ObjectId("5aa7b34a338571a7470be0ee"),
"cover" : "Soft",
"year" : 2010,
"categorytags" : [
"Finland"
],
"publisher" : [
{
"name" : "Jonathan Cape",
"pubId" : ObjectId("5aa7b34a338571a7470be0e7")
}
]
},
{
"title" : "Anna Liisa 2 ed.",
"lang" : "Finnish",
"language" : "finnish",
"edition" : 2,
"cover" : "hard",
"year" : 1958,
"categorytags" : [
"Finland"
],
"publisher" : [
{
"name" : "Otava",
"pubId" : ObjectId("5aa7b34a338571a7470be0e9")
}
]
}
]
}
End goal. note I'm not interested in formatting just yet, just the filtering
{
"Author" : "Charles Bukowski",
"BooksInMothertongue" : [
"Love Is a Dog from Hell"
]
}
{
"Author" : "Minna Canth",
"BooksInMothertongue" : [
"Anna Liisa",
"Anna Liisa 2 ed."
]
}
...
Try this
db.author.aggregate([{
$match: {
books: {
$ne: []
}
}
},
{
$project: {
books: {
$filter: {
input: "$books",
as: "book",
cond: {
$eq: ["$$book.lang", "$mothertongue"]
}
}
},
fname: 1
}
}, {
$unwind: "$books"
},
{
$group: {
_id: "$_id",
Author: {
$first: '$fname'
},
BooksInMothertongue: {
$push: "$books.title"
}
}
}
])

MongoDB - Find first and last in the embedded array for a single matching document

I have a collection of websites, which each contain a list of websites and their keywords that are being tracked. I also have another collection called "rankings" which for each of the keywords in the website contains a ranking. The collection so far looks like this:
{
"_id" : ObjectId("58503934034b512b419a6eab"),
"website" : "https://www.google.com",
"name" : "Google",
"keywords" : [
"Search",
"Websites",
],
"tracking" : [
{
"_id" : ObjectId("5874aa1df63258286528598d"),
"position" : 0,
"created_at" : ISODate("2017-01-1T09:32:13.831Z"),
"real_url" : "https://www.google.com",
"keyword" : "Search"
},
{
"_id" : ObjectId("5874aa1ff63258286528598e"),
"keyword" : "Search",
"real_url" : "https://www.google.com",
"created_at" : ISODate("2017-01-2T09:32:15.832Z"),
"found_url" : "https://google.com/",
"position" : 3
},
{
"_id" : ObjectId("5874aa21f63258286528598f"),
"keyword" : "Search",
"real_url" : "https://www.foamymedia.com",
"created_at" : ISODate("2017-01-3T09:32:17.017Z"),
"found_url" : "https://google.com/",
"position" : 2
},
{
"_id" : ObjectId("5874aa21f63258286528532f"),
"keyword" : "Websites",
"real_url" : "https://www.google.com",
"created_at" : ISODate("2017-01-1T09:32:17.017Z"),
"found_url" : "https://google.com/",
"position" : 1
},
{
"_id" : ObjectId("5874aa21f63258286528542f"),
"keyword" : "Websites",
"real_url" : "https://www.google.com",
"created_at" : ISODate("2017-01-1T09:32:17.017Z"),
"found_url" : "https://google.com/",
"position" : 2
},
]
}
What I want to do is:
1) Group all of the keywords together by their keyword
2) Find the starting position (at the very start of the month)
3) Find the current position (as of today)
So in theory I want to be given an object like:
{
"_id" : ObjectId("58503934034b512b419a6eab"),
"website" : "https://www.google.com",
"tracking" : [
{
"_id" : ObjectId("5874aa1df63258286528598d"),
"keyword": "Search",
"start_position": 0,
"todays_position": 3,
},
{
"_id" : ObjectId("5874aa1df63258286528598d"),
"keyword": "Website",
"start_position": 0,
"todays_position": 2,
},
]
I am confused about how to do the grouping on another field, though. I have tried the following so far:
db.getCollection('websites').aggregate([
{
$lookup: {
from: "seo_tracking",
localField: "website",
foreignField: "real_url",
as: "tracking"
}
},
{
$match: {
"_id" : ObjectId("58503934034b512b419a6eab")
}
},
{
$group: {
"_id" : "$_id",
"keyword" : {
$first: "$tracking.keyword",
},
}
}
]);
But this is not grouping by the keyword, nor can I figure out how I would get the expected value.
You can try something like this. $unwind the tracking array followed by $sort on tracking.keyword and tracking.created_at. $group by tracking.keyword and $first to get starting position, $avg to get average position and $last to get the today's position. Final $group to roll up everything back to tracking array.
db.website.aggregate([{
$match: {
"_id": ObjectId("58503934034b512b419a6eab")
}
}, {
$lookup: {
from: "seo_tracking",
localField: "website",
foreignField: "real_url",
as: "tracking"
}
}, {
$unwind: "$tracking"
}, {
$sort: {
"tracking.keyword": 1,
"tracking.created_at": -1
}
}, {
$group: {
"_id": "$tracking.keyword",
"website": {
$first: "$website"
},
"website_id": {
$first: "$_id"
},
"avg_position": {
$avg: "$tracking.position"
},
"start_position": {
$first: "$tracking.position"
},
"todays_position": {
$last: "$tracking.position"
}
}
}, {
$group: {
"_id": "$website_id",
"website": {
$first: "$website"
},
"tracking": {
$push: {
"keyword": "$_id",
"avg_position":"$avg_position",
"start_position": "$start_position",
"todays_position": "$todays_position"
}
}
}
}]);