I would like to aggregate data in meteor/MongoDB. I have a few thousand entries formatted like the following
{_id: sadsadjhsjdys7ad67as8d, t: 1464162907, prod: 123, sys: xyz}
I would like to sort them into their relative dates and aggregate the prod field.
I currently have the following
var project = {
"$project" : {
"_id" : 0,
"y" : {
"$year" : {
"$add" : [
new Date(0), {
"$multiply" : [1000, "$t"]
}
]
}
},
"m" : {
"$month" : {
"$add" : [
new Date(0), {
"$multiply" : [1000, "$t"]
}
]
}
},
"d" : {
"$dayOfMonth" : {
"$add" : [
new Date(0), {
"$multiply" : [1000, "$t"]
}
]
}
},
"prod" : "$prod",
"sys" : "$sys"
}
};
var group = {
"$group" : {
"_id" : {
"year" : "$y",
"month" : "$m",
"day" : "$d",
"test" : "$test"
},
sys : "sys",
prod : {
$sum : "$prod"
}
}
};
var result = power_stats.aggregate([match, project, group]);
However this makes my _id into [y: 2016, m: 5, d: 25] which is the form of aggregation that I want, however it will be completely useless if I want to do a conditional find with the date later on with this data.
How can I preserve a UNIX datestamp with MongoDBe aggregation?
to achieve that you can do:
Add t field in $project phase and keep populating it thru pipeline - so there will be always reference to oryginal timestamp
Create dateTime field 'time: new Date("$_id.year", "$_id.month", "_id.day");'
var group = {
"$group" : {
"_id" : {
"year" : "$y",
"month" : "$m",
"day" : "$d",
"test" : "$test"
},
prod : {
$sum : "$prod"
},
dateTime : {
$addToSet : new Date("$_id.year", "$_id.month", "$_id.day")
}
}
};
Related
The only thing I am trying to do is to get the average of Emision_C02 consumed at 10pm for all the days in location:1. The collection, db.datos_sensores2, has documents within like:
{
"_id" : ObjectId("609c2c2d420a73728827e87f"),
"timestamp" : ISODate("2020-07-01T02:15:00Z"),
"sensor_id" : 1,
"location_id" : 1,
"medidas" : [
{
"tipo_medida" : "Temperatura",
"valor" : 14.03,
"unidad" : "ºC"
},
{
"tipo_medida" : "Humedad_relativa",
"valor" : 84.32,
"unidad" : "%"
}
]
}
{
"_id" : ObjectId("609c2c2d420a73728827e880"),
"timestamp" : ISODate("2020-07-01T02:15:00Z"),
"sensor_id" : 2,
"location_id" : 1,
"medidas" : [
{
"tipo_medida" : "Emision_CO2",
"valor" : 1.67,
"unidad" : "gCO2/m2"
},
{
"tipo_medida" : "Consumo_electrico",
"valor" : 0.00155,
"unidad" : "kWh/m2"
}
]
}
I wrote this:
db.datos_sensores2.aggregate([
{$project:{timestamp:{$dateFromString:{dateString:'$timestamp'}},"_id":0, "me-didas":{$slice:["$medidas",-1]},"location_id":1}},
{$addFields:{Hora:{$hour:"$timestamp"}}},
{$match:{'Hora':{$in:[10]},'medidas.tipo_medida':"Emision_CO2", "location_id":1}},
{$group:{ _id: null, Avg_Emision_CO2:{$avg: "$medidas.valores"}}}])
But nothing happen....
pls refer to https://mongoplayground.net/p/-LqswomHWsY
I have noticed few things first of all hour comes to be 2 in above example and not 10. Second the variable/field names are not correct so i have updated it.
[{$unwind: {
path: '$medidas',
}}, {$addFields: {
Hora: {
$hour: "$timestamp"
}
} }, {$match: {
"Hora": {
$in: [2]
},
"medidas.tipo_medida": "Emision_CO2",
"location_id": 1
} }, {$group: {
_id: null,
Avg_Emision_CO2: {
$avg: "$medidas.valor"
}
}}]
Pipeline stages:
unwind: as $medidas is array we can unwind it so it will be easy to filter only "Emision_CO2",
addfield: add houre from timestamp
match: to match "medidas.tipo_medida": "Emision_CO2",
group: to get average
I have a MongoDB aggregation pipeline that has been frustrating me for a while now, because it never seems to be accurate or correct to my needs. The aim is to count the number of new unique users each day per chatbot, starting from the very beginning.
Here's what my pipeline looks like right now.
[
{
"$project" : {
"_id" : 0,
"bot_id" : 1,
"customer_id" : 1,
"timestamp" : {
"$ifNull" : [
'$incoming_log.created_at', '$outcome_log.created_at'
]
}
}
},
{
"$project" : {
"customer_id" : 1,
"bot_id" : 1,
"timestamp" : {
"$dateFromString" : {
"dateString" : {
"$substr" : [
"$timestamp", 0, 10
]
}
}
}
}
},
{
"$group" : {
"_id" : "$customer_id",
"timestamp" : {
"$first" : "$timestamp"
},
"bot_id" : {
"$addToSet" : "$bot_id"
}
}
},
{
"$unwind" : "$bot_id"
},
{
"$group" : {
"_id" : {
"bot_id" : "$bot_id",
"customer_id" : "$_id"
},
"timestamp" : {
"$first" : "$timestamp"
}
}
},
{
"$project" : {
"_id" : 0,
"timestamp" : 1,
"customer_id" : "$_id.customer_id",
"bot_id" : "$_id.bot_id"
}
},
{
"$group" : {
"_id": {
"timestamp" : "$timestamp",
"bot_id" : "$bot_id"
},
"new_users" : {
"$sum" : 1
}
}
},
{
"$project" : {
"_id" : 0,
"timestamp" : "$_id.timestamp",
"bot_id" : "$_id.bot_id",
"new_users" : 1
}
}
]
Some sample data for an idea of what the data looks like...
{
"mid" : "...",
"bot_id" : "...",
"bot_name" : "JOBBY",
"customer_id" : "U122...",
"incoming_log" : {
"created_at" : ISODate("2020-12-08T09:14:16.237Z"),
"event_payload" : "",
"event_type" : "text"
},
"outcome_log" : {
"created_at" : ISODate("2020-12-08T09:14:18.145Z"),
"distance" : 0.25,
"incoming_msg" : "🥺"
}
}
My expected outcome is something along the lines of:
{
"new_users" : 1187.0,
"timestamp" : ISODate("2021-01-27T00:00:00.000Z"),
"bot_id" : "5ffd......."
},
{
"new_users" : 1359.0,
"timestamp" : ISODate("2021-01-27T00:00:00.000Z"),
"bot_id" : "6def......."
}
Have I overcomplicated my pipeline somewhere? I seem to get a reasonable number of new users per bot each day, but for some reason my colleague tells me that the number is too high. I need some tips, please!
I have really no idea what you are looking for.
"The aim is to count the number of new unique users each day per chatbot, starting from the very beginning."
What is "new unique users"? What do you mean by "starting from the very beginning"? You ask for count per day but you use {"$group": {"_id": "$customer_id", "timestamp": { "$first": "$timestamp" } } }
For me your grouping does not make any sense. With only one single sample document, it is almost impossible to guess what you like to count.
Regarding group per day: I prefer to work always with Date values, rather than strings. It is less error prone. Maybe you have to consider time zones, because UTC midnight is not your local midnight. When you work with Dates then you have better control over it.
The $project stages are useless when you do $group afterwards. Typically you have only one $project stage at the end.
So, put something to start.
db.collection.aggregate([
{
$set: {
day: {
$dateToParts: {
date: { $ifNull: ["$incoming_log.created_at", "$outcome_log.created_at"] }
}
}
}
},
{
$group: {
_id: "$customer_id",
timestamp: {$min: { $dateFromParts: { year: "$day.year", month: "$day.month", day: "$day.day" } }}
}
}
]);
I'm trying to sum (spending by month/year) of a collection with nested amounts - with no luck.
This is the collection (extract):
[
{
"_id" : ObjectId("5faaf88d0657287993e541a5"),
"segment" : {
"l1" : "Segment A",
"l2" : "001"
},
"invoiceNo" : "2020.10283940",
"invoicePos" : 3,
"date" : ISODate("2019-09-06T00:00:00.000Z"),
"amount" : {
"document" : {
"amount" : NumberDecimal("125.000000000000"),
"currCode" : "USD"
},
"local" : {
"amount" : NumberDecimal("123.800000000000"),
"currCode" : "CHF"
},
"global" : {
"amount" : NumberDecimal("123.800000000000"),
"currCode" : "CHF"
}
}
},
...
]
I would like to sum up the aggregated invoice volume per month in "global" currency.
I tried this query on MongoDB:
db.invoices.aggregate(
{$project : {
month : {$month : "$date"},
year : {$year : "$date"},
amount : 1
}},
{$unwind: '$amount'},
{$group : {
_id : {month : "$month" ,year : "$year" },
total : {$sum : "$amount.global.amount"}
}})
I am getting as result this:
/* 1 */
{
"_id" : ObjectId("5faaf88d0657287993e541a5"),
"amount" : {
"document" : {
"amount" : NumberDecimal("125.000000000000"),
"currCode" : "USD"
},
"local" : {
"amount" : NumberDecimal("123.800000000000"),
"currCode" : "CHF"
},
"global" : {
"amount" : NumberDecimal("123.800000000000"),
"currCode" : "CHF"
}
},
"month" : 9,
"year" : 2019
}
/* 2 */
{
"_id" : ObjectId("5faaf88d0657287993e541ac"),
"amount" : {
"document" : {
"amount" : NumberDecimal("105.560000000000"),
"currCode" : "CHF"
},
"local" : {
"amount" : NumberDecimal("105.560000000000"),
"currCode" : "CHF"
},
"global" : {
"amount" : NumberDecimal("105.560000000000"),
"currCode" : "CHF"
}
},
"month" : 11,
"year" : 2020
}
This however does not sum up all invoices per month, but looks like single invoice lines - no aggregation.
I would like to get a result like this:
[
{
"month": 11,
"year": 2020,
"amount" : NumberDecimal("99999.99")
},
{
"month": 10,
"year": 2020,
"amount" : NumberDecimal("99999.99")
},
{
"month": 9,
"year": 2020,
"amount" : NumberDecimal("99999.99")
}
]
What is wrong with my query?
Would this be helpful?
db.invoices.aggregate([
{
$group: {
_id: {
month: {
$month: "$date"
},
year: {
$year: "$date"
}
},
total: {
$sum: "$amount.global.amount"
}
}
},
{$sort:{"_id.year":-1, "_id.month":-1}}
])
Playground
If you need any extra explanation let me know, but the code is pretty short and self-explanatory.
In principle your aggregation pipeline is fine, there a few mistakes:
An aggregation pipeline expects an array
$unwind is useless, because $amount is not an array. One element in -> one document out
You can use date function directly
So, short and simple:
db.invoices.aggregate([
{
$group: {
_id: { month: { $month: "$date" }, year: { $year: "$date" } },
total: { $sum: "$amount.global.amount" }
}
}
])
I have been learning MongoDB, while doing so I tried to implement the aggregation property for my database collection. I grouped the details of the employee based on their age and by using match function, my question is it possible to display the other key-value once they pass the age criteria?
db.employee.aggregate([
{ $match: { age: { $gte: 23 } } },
{
$group: {
_id:'$age',
total: { $sum: 1 },
name: { $addToSet: '$name' }
}
}
])
and the output was like this
{ "_id" : 27, "total" : 2, "name" : [ "indhu", "logesh" ] }
{ "_id" : 26, "total" : 1, "name" : [ "keerthana" ] }
{ "_id" : 25, "total" : 1, "name" : [ "sneha" ] }
{ "_id" : 24, "total" : 1, "name" : [ "dhiva" ] }
{ "_id" : 23, "total" : 1, "name" : [ "elango" ] }
where _id denotes their age.
I've the following structure of docs:
{
"_id" : ObjectId("5786458371d24d924d8b4575"),
"uniqueNumber" : "3899822714",
"lastUpdatedAt" : ISODate("2016-07-13T20:11:11.000Z"),
"new" : [
{
"price" : 8.4,
"created" : ISODate("2016-07-13T13:11:28.000Z")
},
{
"price" : 10.0,
"created" : ISODate("2016-07-13T14:50:56.000Z")
}
],
"used" : [
{
"price" : 10.99,
"created" : ISODate("2016-07-08T13:46:31.000Z")
},
{
"price" : 8.59,
"created" : ISODate("2016-07-13T13:11:28.000Z")
}
]
}
Now I need to get a list that gives me the lowest price of each array per date.
So, as example:
{
"uniqueNumber" : 1234,
"prices" : {
"created" : 2016-07-08,
"minNew" : 123,
"minUsed" : 22
}
}
By now I've built the following query
db.getCollection('col').aggregate([
{
$match : {
"uniqueNumber" : "3899822714"
}
},
{
$unwind : "$used"
},
{
$project : {
"uniqueNumber" : "$uniqueNumber",
"price" : "$used.price",
"ts" : "$used.created"
}
},
{
$sort : { "ts" : 1 }
},
{
$group : {_id: "$uniqueNumber", priceOfMaxTS : { $min: "$price" }, ts : { $last: "$ts" }}
}
]);
But this one will only give me the lowest price for the highest date. I couldn't really find anything that pushes me to the right direction to get the desired result.
UPDATE
I've found a way to get the lowest price of the used array grouped by day with this query:
db.getCollection('col').aggregate([
{
$match : {
"uniqueNumber" : "3899822714"
}
},
{
$unwind : "$used"
},
{
$project : {
"asin" : "$uniqueNumber",
"price" : "$used.price",
"ts" : "$used.created",
"y" : { "$year" : "$used.created" },
"m" : { "$month" : "$used.created" },
"d" : { "$dayOfMonth" : "$used.created" }
}
},
{
$group : { _id : { "year" : "$y", "month" : "$m", "day" : "$d" }, minPriceOfDay : { $min: "$price" }}
}
]);
No I only need to find a way to do this also to the new array in the same query.