Get average per year and label? - mongodb

I can use this query to get the average sqmPrice for a myArea
db.getCollection('sold').aggregate([
{$match:{}},
{$group: {_id: "$myArea", "sqmPrice": {$avg: "$sqmPrice"} }}
])
Output:
[
{
"_id" : "Yttre Aspudden",
"sqmPrice" : 48845.7777777778
},
{
"_id" : "Hägerstensåsen",
"sqmPrice" : 120
}
]
I would like to group this by year, ideally an object that looks like this:
{
"Yttre Aspudden": {
2008: 1232,
2009: 1244
...
}
...
}
but the formatting is not the most important.
Here is a sample object, I would like to use soldDate:
{
"_id" : ObjectId("5beca41c78f21248ab47f4a6"),
"location" : {
"address" : {
"streetAddress" : "Ljusstöparbacken 26C"
},
"position" : {
"latitude" : 59.31427884,
"longitude" : 18.00892421
},
"namedAreas" : [
"Hägersten-Liljeholmen"
],
"region" : {
"municipalityName" : "Stockholm",
"countyName" : "Stockholms län"
},
"distance" : {
"ocean" : 3777
}
},
"listPrice" : 1895000,
"rent" : 1959,
"floor" : 1,
"livingArea" : 38.5,
"source" : {
"name" : "Fastighetsbyrån",
"id" : 1573,
"type" : "Broker",
"url" : "http://www.fastighetsbyran.se/"
},
"rooms" : 1.5,
"published" : ISODate("2018-11-02T20:55:19.000Z"),
"constructionYear" : 1959,
"objectType" : "Lägenhet",
"booliId" : 3278478,
"soldDate" : ISODate("2018-11-14T00:00:00.000Z"),
"soldPrice" : 2620000,
"soldPriceSource" : "bid",
"url" : "https://www.booli.se/annons/3278478",
"publishedDays" : 1735,
"soldDays" : 1747,
"daysUp" : 160,
"street" : "Ljusstöparbacken",
"streetYear" : "Ljusstöparbacken Hägersten-Liljeholmen 1959",
"yearDay" : 318,
"yearWeek" : 46,
"roughSize" : 40,
"sqmPrice" : 49221,
"myArea" : "Gröndal",
"hotlist" : true
}

You need to generate your keys dynamically so you have to use $arrayToObject. To build an object which aggregates the data you need three $group stages and to create new root of your document you can use $replaceRoot, try:
db.sold.aggregate([
{ $group: {_id: { area: "$myArea", year: { $year: "$soldDate" } }, "sqmPrice": {$avg: "$sqmPrice"} }},
{ $group: { _id: "$_id.area", avgs: { $push: { k: { $toString: "$_id.year" }, v: "$sqmPrice" } } } },
{ $group: { _id: null, areas: { $push: { k: "$_id", v: { $arrayToObject: "$avgs" } } } } },
{ $replaceRoot: { newRoot: { $arrayToObject: "$areas" } } }
])

Related

Creating measures in a mongodb aggregation pipeline

I have a report that has been developed in PowerBI. It runs over a collection of jobs, and for a given month and year counts the number of jobs that were created, due or completed in that month using measures.
I am attempting to reproduce this report using a mongoDB aggregation pipeline. At first, I thought I could just use the $group stage to do this, but quickly realised that grouping by a specific date would exclude jobs.
Some sample documents are below (most fields excluded as they are not relevant):
{
"_id": <UUID>,
"createdOn": ISODate("2022-07-01T00:00"),
"dueOn": ISODate("2022-08-01T00:00"),
"completedOn": ISODate("2022-07-29T00:00")
},
{
"_id": <UUID>,
"createdOn": ISODate("2022-06-01T00:00"),
"dueOn": ISODate("2022-08-01T00:00"),
"completedOn": ISODate("2022-07-24T00:00")
}
For example, if I group by created date, the record for July 2022 would show 1 created job and only 1 completed job, but it should show 2.
How can I go about recreating this report? One idea was that I needed to determine the minimum and maximum of all the possible dates across those 3 date fields in my collection, but I don't know where to go from there
I ended up solving this by using a facet. I followed this process:
Each facet field grouped by a different date field from the source document, and then aggregated the relevant field (e.g. counts, or sums as required). I ensured each of these fields in the facet had a unique name.
I then did a project stage where I took each of the facet stage fields (arrays), and concat them into a single array
I unwound the array, and then replaced the root to make it simpler to work with
I then grouped again by the _id field which was set to the relevant date during the facet field, and then grabbed the relevant fields.
The relevant parts of the pipeline are below:
db.getCollection("jobs").aggregate(
// Pipeline
[
// Stage 3
{
$facet: {
//Facet 1, group by created date, count number of jobs created
//facet 2, group by completed date, count number of jobs completed
//facet 3, group by due date, count number of jobs due
"created" : [
{
$addFields : {
"monthStarting" : {
"$dateFromString" : {
"dateString" : {
"$dateToString" : {
"date" : {
"$dateTrunc" : {
"date" : "$createdAt",
"unit" : "month",
"binSize" : 1.0,
"timezone" : "$timezone",
"startOfWeek" : "mon"
}
},
"timezone" : "$timezone"
}
}
}
},
"yearStarting" : {
"$dateFromString" : {
"dateString" : {
"$dateToString" : {
"date" : {
"$dateTrunc" : {
"date" : "$createdAt",
"unit" : "year",
"binSize" : 1.0,
"timezone" : "$timezone"
}
},
"timezone" : "$timezone"
}
}
}
}
}
},
{
$group : {
"_id" : {
"year" : "$yearStarting",
"month" : "$monthStarting"
},
"monthStarting" : {
"$first" : "$monthStarting"
},
"yearStarting" : {
"$first" : "$yearStarting"
},
"createdCount": {$sum: 1}
}
}
],
"completed" : [
{
$addFields : {
"monthStarting" : {
"$dateFromString" : {
"dateString" : {
"$dateToString" : {
"date" : {
"$dateTrunc" : {
"date" : "$completedDate",
"unit" : "month",
"binSize" : 1.0,
"timezone" : "$timezone",
"startOfWeek" : "mon"
}
},
"timezone" : "$timezone"
}
}
}
},
"yearStarting" : {
"$dateFromString" : {
"dateString" : {
"$dateToString" : {
"date" : {
"$dateTrunc" : {
"date" : "$completedDate",
"unit" : "year",
"binSize" : 1.0,
"timezone" : "$timezone"
}
},
"timezone" : "$timezone"
}
}
}
}
}
},
{
$group : {
"_id" : {
"year" : "$yearStarting",
"month" : "$monthStarting"
},
"monthStarting" : {
"$first" : "$monthStarting"
},
"yearStarting" : {
"$first" : "$yearStarting"
},
"completedCount": {$sum: 1}
}
}
],
"due": [
{
$match: {
"dueDate": {$ne: null}
}
},
{
$addFields : {
"monthStarting" : {
"$dateFromString" : {
"dateString" : {
"$dateToString" : {
"date" : {
"$dateTrunc" : {
"date" : "$dueDate",
"unit" : "month",
"binSize" : 1.0,
"timezone" : "$timezone",
"startOfWeek" : "mon"
}
},
"timezone" : "$timezone"
}
}
}
},
"yearStarting" : {
"$dateFromString" : {
"dateString" : {
"$dateToString" : {
"date" : {
"$dateTrunc" : {
"date" : "$dueDate",
"unit" : "year",
"binSize" : 1.0,
"timezone" : "$timezone"
}
},
"timezone" : "$timezone"
}
}
}
}
}
},
{
$group : {
"_id" : {
"year" : "$yearStarting",
"month" : "$monthStarting"
},
"monthStarting" : {
"$first" : "$monthStarting"
},
"yearStarting" : {
"$first" : "$yearStarting"
},
"dueCount": {$sum: 1},
"salesRevenue": {$sum: "$totalSellPrice"},
"costGenerated": {$sum: "$totalBuyPrice"},
"profit": {$sum: "$profit"},
"avgValue": {$avg: "$totalSellPrice"},
"finalisedRevenue": {$sum: {
$cond: {
"if": {$in: ["$status",["Finalised","Closed"]]},
"then": "$totalSellPrice",
"else": 0
}
}}
}
}
]
}
},
// Stage 4
{
$project: {
"docs": {$concatArrays: ["$created","$completed","$due"]}
}
},
// Stage 5
{
$unwind: {
path: "$docs",
}
},
// Stage 6
{
$replaceRoot: {
// specifications
"newRoot": "$docs"
}
},
// Stage 7
{
$group: {
_id: "$_id",
"monthStarting" : {
"$first" : "$monthStarting"
},
"yearStarting" : {
"$first" : "$yearStarting"
},
"monthStarting" : {
"$first" : "$monthStarting"
},
"createdCountSum" : {
"$sum" : "$createdCount"
},
"completedCountSum" : {
"$sum" : "$completedCount"
},
"dueCountSum" : {
"$sum" : "$dueCount"
},
"salesRevenue" : {
"$sum" : "$salesRevenue"
},
"costGenerated" : {
"$sum" : "$costGenerated"
},
"profit" : {
"$sum" : "$profit"
},
"finalisedRevenue" : {
"$sum" : "$finalisedRevenue"
},
"avgJobValue": {
$sum: "$avgValue"
}
}
},
],
);

Mongodb - Array indexed by string in $addToSet operator

Suppose we have these two documents:
{
"_id" : ObjectId("5f3cdd1d0fefeba343ff3093"),
"country" : "C1",
"time" : "1994",
"value" : NumberInt(100),
"type" : "type1",
"origin" : "O1"
}
{
"_id" : ObjectId("5f3cdd1d0fefeba343ff3094"),
"country" : "C1",
"time" : "1994",
"value" : NumberInt(200),
"type" : "type1",
"origin" : "O2"
}
I want to retrieve the aggregation with the origin array indexed by strings (the value of "type"); expected output:
{
"_id" : {
"country" : "C1",
"time" : "1994"
},
"TOT" : NumberInt(300),
"count" : 2.0,
"origin" : [
"O1": NumberInt(100),
"O2": NumberInt(200)
]
}
Here we have the type of the "origin" array as {[key: string]: number}.
With the following query, the origin array is instead indexed by numbers:
use local;
db.getCollection("test_collection").aggregate(
[
{
"$match" : {
"type" : {
"$in" : [
"type1"
]
}
}
},
{
"$group" : {
"_id" : {
"country" : "$country",
"time" : "$time"
},
"TOT" : {
"$sum" : "$value"
},
"count" : {
"$sum" : 1.0
},
"origin" : {
"$addToSet" : "$value"
}
}
}
],
{
"allowDiskUse" : false
}
);
You can try $arrayToObject after adding in origin,
use local;
db.getCollection("test_collection").aggregate([
{ "$match": { "type": { "$in": ["type1"] } } },
{
"$group": {
"_id": {
"country": "$country",
"time": "$time"
},
"TOT": { "$sum": "$value" },
"count": { "$sum": 1.0 },
// add object like this
"origin": {
"$addToSet": {
k: "$origin",
v: "$value"
}
}
}
},
// add this
{ $addFields: { origin: { $arrayToObject: "$origin" } } }
],
{ "allowDiskUse": false }
)
Playground

How to get percentage total of data with group by date in MongoDB

How to get percentage total of data with group by date in MongoDB ?
Link example : https://mongoplayground.net/p/aNND4EPQhcb
I have some collection structure like this
{
"_id" : ObjectId("5ccbb96706d1d47a4b2ced4b"),
"date" : "2019-05-03T10:39:53.108Z",
"id" : 166,
"update_at" : "2019-05-03T10:45:36.208Z",
"type" : "image"
}
{
"_id" : ObjectId("5ccbb96706d1d47a4b2ced4c"),
"date" : "2019-05-03T10:39:53.133Z",
"id" : 166,
"update_at" : "2019-05-03T10:45:36.208Z",
"type" : "image"
}
{
"_id" : ObjectId("5ccbb96706d1d47a4b2ced4d"),
"date" : "2019-05-03T10:39:53.180Z",
"id" : 166,
"update_at" : "2019-05-03T10:45:36.208Z",
"type" : "image"
}
{
"_id" : ObjectId("5ccbb96706d1d47a4b2ced4e"),
"date" : "2019-05-03T10:39:53.218Z",
"id" : 166,
"update_at" : "2019-05-03T10:45:36.208Z",
"type" : "image"
}
And I have query in mongodb to get data of collection, how to get percentage of total data. in bellow example query to get data :
db.name_collection.aggregate(
[
{ "$match": {
"update_at": { "$gte": "2019-11-04T00:00:00.0Z", "$lt": "2019-11-06T00:00:00.0Z"},
"id": { "$in": [166] }
} },
{
"$group" : {
"_id": {
$substr: [ '$update_at', 0, 10 ]
},
"count" : {
"$sum" : 1
}
}
},
{
"$project" : {
"_id" : 0,
"date" : "$_id",
"count" : "$count"
}
},
{
"$sort" : {
"date" : 1
}
}
]
)
and this response :
{
"date" : "2019-11-04",
"count" : 39
},
{
"date" : "2019-11-05",
"count" : 135
}
how to get percentage data total from key count ? example response to this :
{
"date" : "2019-11-04",
"count" : 39,
"percentage" : "22%"
},
{
"date" : "2019-11-05",
"count" : 135,
"percentage" : "78%"
}
You have to group by null to get total count and then use $map to calculate the percentage. $round will be a useful operator in such case. Finally you can $unwind and $replaceRoot to get back the same number of documents:
db.collection.aggregate([
// previous aggregation steps
{
$group: {
_id: null,
total: { $sum: "$count" },
docs: { $push: "$$ROOT" }
}
},
{
$project: {
docs: {
$map: {
input: "$docs",
in: {
date: "$$this.date",
count: "$$this.count",
percentage: { $concat: [ { $toString: { $round: { $multiply: [ { $divide: [ "$$this.count", "$total" ] }, 100 ] } } }, '%' ] }
}
}
}
}
},
{
$unwind: "$docs"
},
{
$replaceRoot: { newRoot: "$docs" }
}
])
Mongo Playground

Mongodb aggregation json format by month result

Hello I am working with the reporting api which will going to use in highcharts and I am new to mongodb.
Below is my aggregation query (suggest me modification) :
db.product_sold.aggregate({
$group: {
_id: { year: { $year: "$solddate" }, month: { $month: "$solddate" }, productid: "$productid" },
totalQty: { $sum: "$qty" }
}
})
Output:
{
"_id" : {
"year" : NumberInt(2019),
"month" : NumberInt(2),
"productid" : "11"
},
"totalQty" : 6.0
}
// ----------------------------------------------
{
"_id" : {
"year" : NumberInt(2019),
"month" : NumberInt(2),
"productid" : "14"
},
"totalQty" : 7.0
}
// ----------------------------------------------
{
"_id" : {
"year" : NumberInt(2019),
"month" : NumberInt(1),
"productid" : "13"
},
"totalQty" : 3.0
}
// ----------------------------------------------
{
"_id" : {
"year" : NumberInt(2019),
"month" : NumberInt(2),
"productid" : "10"
},
"totalQty" : 6.0
}
// ----------------------------------------------
{
"_id" : {
"year" : NumberInt(2018),
"month" : NumberInt(2),
"productid" : "12"
},
"totalQty" : 5.0
}
// ----------------------------------------------
{
"_id" : {
"year" : NumberInt(2019),
"month" : NumberInt(2),
"productid" : "15"
},
"totalQty" : 8.0
}
// ----------------------------------------------
{
"_id" : {
"year" : NumberInt(2019),
"month" : NumberInt(1),
"productid" : "11"
},
"totalQty" : 2.0
}
// ----------------------------------------------
What I want in output is something like :
status: 200,
msg: "SUCCESS"
data: [{
1:[
{
"productid": 11,
"totalQty": 3
},
{
"productid": 12,
"totalQty": 27
}
],
2:[
{
"productid": 11,
"totalQty": 64
},
{
"productid": 12,
"totalQty": 10
}
]
}]
For reference attaching the image of the collection
Is there any way to achieve it using aggregation or anything else or I will have to do it manually by code ?
You can append below aggreagation stages to your current pipeline:
db.product_sold.aggregate([
// your current $group stage
{
$group: {
_id: "$_id.month",
docs: { $push: { productid: "$_id.productid", totalQty: "$totalQty" } }
}
},
{
$project: {
_id: 0,
k: { $toString: "$_id" },
v: "$docs"
}
},
{
$group: {
_id: null,
data: { $push: "$$ROOT" }
}
},
{
$project: {
_id: 0,
data: { $arrayToObject: "$data" }
}
}
])
The idea here is that you can use $group with _id set to null to get all the data into single document and then use $arrayToObject to get month number as key and all the aggregates for that month as value.

MongoDb get distinct items after grouping

I'm using mongodb with the following collection sample
{
"_id" : ObjectId("5703750ca9c436386c4814c9"),
"user_id" : NumberLong(17),
"activitytype_id" : NumberLong(1),
"created_date" : ISODate("2015-10-03T03:52:03.000Z")
},
{
"_id" : ObjectId("5703750ca9c436386c4814ca"),
"s_id" : NumberLong(132919),
"user_id" : NumberLong(17),
"activitytype_id" : NumberLong(4),
"created_date" : ISODate("2016-03-18T17:13:43.000Z")
},
{
"_id" : ObjectId("5703750ca9c436386c4814cb"),
"s_id" : NumberLong(215283),
"user_id" : NumberLong(17),
"activitytype_id" : NumberLong(4),
"created_date" : ISODate("2015-10-03T04:12:33.000Z")
}
,
{
"_id" : ObjectId("5703750ca9c436386c4814cc"),
"s_id" : NumberLong(360888),
"user_id" : NumberLong(17),
"activitytype_id" : NumberLong(4),
"created_date" : ISODate("2015-10-03T04:12:41.000Z")
}
This is my aggregation pipeline
db.activitylogs.aggregate([
{ $group: {
_id: {
user_id: "$user_id",
activitytype_id: "$activitytype_id"
},
activity_log_docs: {
$addToSet: {
s_id: "$s_id",
friend_id: "$friend_id",
playlist_id: "$playlist_id",
created_date:"$created_date"
}
}
}},
])
I need to get distinct s_id in activity_log_docs.
here is a screenshot for the result,
screen shot for the result
i need to avoid duplicated s_id in activity_log_docs array, so i will get distinct s_id
I think something like this should do :
db.activitylogs.aggregate([
{ $group: {
_id: {
user_id: "$user_id",
activitytype_id: "$activitytype_id" ,
s_id:"$s_id"
},
friend_id: {$first:"$friend_id"}}},
playlist_id: {$first:"$playlist_id"}}},
created_date: {$first:"$created_date"}}},
{ $group: {
_id: {
user_id: "$_id.user_id",
activitytype_id: "$_id.activitytype_id"
},
activity_log_docs: {
$addToSet: {
s_id: "$_id.s_id",
friend_id: "$friend_id",
playlist_id: "$playlist_id",
created_date:"$created_date"
}
}
}},
])
But please double check your own field's name.