Related
I'm not sure that my question is correct, but it seems so:
I have a set of rows in my Mongodb, like:
[{'_id': '5b4c9aa7ddc752c1f5844315',
'ccode': 'RU',
'date': '2018-07-16T00:00:00.000Z',
'rates': {'reg_emails_confirmed': 4,
'registered': 1,
'regs_age1': 1,
'regs_male': 1}},
{'_id': '5b4cad0dddc752c1f5844322',
'ccode': 'US',
'date': '2018-07-16T00:00:00.000Z',
'rates': {'reg_emails_confirmed': 4,
'registered': 2,
'regs_age1': 2,
'regs_male': 2}},
{'_id': '5bd88204af4c814883a414b2',
'ccode': 'US',
'date': '2018-10-30T00:00:00.000Z',
'rates': {'reg_emails_confirmed': 2,
'registered': 1,
'regs_age1': 1,
'regs_male': 1}},
{'_id': '5bd88204af4c814883a414b3',
'ccode': 'RU',
'date': '2018-10-30T00:00:00.000Z',
'rates': {'reg_emails_confirmed': 2,
'registered': 1,
'regs_age1': 1,
'regs_male': 1}}]
And I want to sort them by date and combine because for the same date there are multiple rows from different countries.
So the result should look something like ...
[{'2018-07-16T00:00:00.000Z': [{'_id': '5b4c9aa7ddc752c1f5844315',
'ccode': 'RU',
'date': '2018-07-16T00:00:00.000Z',
'rates': {'reg_emails_confirmed': 4,
'registered': 1,
'regs_age1': 1,
'regs_male': 1}},
{'_id': '5b4cad0dddc752c1f5844322',
'ccode': 'US',
'date': '2018-07-16T00:00:00.000Z',
'rates': {'reg_emails_confirmed': 4,
'registered': 2,
'regs_age1': 2,
'regs_male': 2}}]},
{'2018-10-30T00:00:00.000Z': [{'_id': '5bd88204af4c814883a414b2',
'ccode': 'US',
'date': '2018-10-30T00:00:00.000Z',
'rates': {'reg_emails_confirmed': 2,
'registered': 1,
'regs_age1': 1,
'regs_male': 1}},
{'_id': '5bd88204af4c814883a414b3',
'ccode': 'RU',
'date': '2018-10-30T00:00:00.000Z',
'rates': {'reg_emails_confirmed': 2,
'registered': 1,
'regs_age1': 1,
'regs_male': 1}}]}]
I tried:
db.getCollection('daily_stats').aggregate([
{'$match': some_condition},
{'$group': {'ccode': 1}}, # ccode or date?
{'$sort': {"date": 1}},
])
But got an error
The field * must be an accumulator object
I googled the error, it's pretty clear, but not seems that related to my case. I don't need any sum, avg, etc functions
Query
sort by date (asceding here, if you need descending put -1)
group by date and collect the ROOT documents
replace the root so you have the date as key
*this assumes you have dates on strings, which is bad idea, if you convert them to date objects, you can still use the query but add
"k":{"$dateToString" : {"date" :"$_id"}}
Test code here
aggregate(
[{"$sort":{"date":1}},
{"$group":{"_id":"$date", "docs":{"$push":"$$ROOT"}}},
{"$replaceRoot":
{"newRoot":{"$arrayToObject":[[{"k":"$_id", "v":"$docs"}]]}}}])
When using $group, you need an _id
From the docs
{
$group:
{
_id: <expression>, // Group By Expression
<field1>: { <accumulator1> : <expression1> },
...
}
}
In your case...
db.getCollection('daily_stats').aggregate([
{'$match': some_condition},
{'$group': {
'_id': "$ccode",
'rates': { $addToSet: '$rates' },
'date': { $first: '$date' }
}},
{'$sort': {"date": 1}},
{'$project: { "_id": 0, "country": "$_id", "rates": 1, "date": 1 }}
])
Playground: https://mongoplayground.net/p/B31XLS9p-6W
Say the collection store data in the below format. Every day a new entry is added in the collection. Dates are in ISO format.
|id|dt|data|
---
|1|2021-03-17|{key:"A", value:"B"}
...
|1|2021-03-14|{key:"A", value:"B"}
...
|1|2021-02-28|{key:"A", value:"B"}
|1|2021-02-27|{key:"A", value:"B"}
...
|1|2021-02-01|{key:"A", value:"B"}
|1|2021-01-31|{key:"A", value:"B"}
|1|2021-01-30|{key:"A", value:"B"}
...
|1|2021-01-01|{key:"A", value:"B"}
|1|2020-12-31|{key:"A", value:"B"}
...
|1|2020-11-30|{key:"A", value:"B"}
...
I need help with a query that gives me the last day of each month for a given period of time. Below is the query I was able to do which is not giving last day of the current month as I am sorting it by day, month and year.
db.getCollection('data').aggregate([
{
$match: {dt: {$gt: ISODate("2020-01-01")}
},
{
$project: {
dt: "$dt",
month: {
$month: "$dt"
},
day: {
$dayOfMonth: "$dt"
},
year: {
$year: "$dt"
},
data: "$data"
}
},
{
$sort: {day: -1, month: -1, year: -1}
},
{ $limit: 24},
{
$sort: {dt: -1}
},
])
The results I am after is:
|1|2021-03-17|{key:"A", value:"B"}
|1|2021-02-28|{key:"A", value:"B"}
|1|2021-01-31|{key:"A", value:"B"}
|1|2020-12-31|{key:"A", value:"B"}
|1|2020-11-30|{key:"A", value:"B"}
...
|1|2020-01-31|{key:"A", value:"B"}
Group the records by year and month, get the max date for that month.
db.getCollection('data').aggregate([
{ $match: { dt: { $gt: ISODate("2020-01-01") } } },
{ $group: { // group by
_id: { $substr: ['$dt', 0, 7] }, // get year and month eg 2020-01
dt: { $max: "$dt" }, // find the max date
doc:{ "$first" : "$$ROOT" } } // to get the document
},
{ "$replaceRoot": { "newRoot": "$doc"} }, // project the document
{ $sort: { dt: -1 } }
]);
$substr
$group
$replaceRoot
$max
$first
I monkey patched a possible solution for you in Python, but without your DB, I can't be positive that this works.
First there's a function that takes in an integer representing a month and returns the last day of that month.
import datetime as dt
def last_day_of_month(month):
return dt.datetime(2021, month+1, 1) - dt.timedelta(days=1)
Next, I built the query with a separate function.
def build_query(last_month):
return [
{
"$and": [
{"date": {"$gte": last_day_of_month(i)}},
{"date": {"$lt": last_day_of_month(i) + dt.timedelta(days=1)}}
]
}
for i in range(0, last_month)
]
Here's the output. It would be inside an $or operator in the $match stage.
{'$match': {'$or': [{'$and': [{'date': {'$gte': datetime.datetime(2020, 12, 31, 0, 0)}},
{'date': {'$lt': datetime.datetime(2021, 1, 1, 0, 0)}}]},
{'$and': [{'date': {'$gte': datetime.datetime(2021, 1, 31, 0, 0)}},
{'date': {'$lt': datetime.datetime(2021, 2, 1, 0, 0)}}]},
{'$and': [{'date': {'$gte': datetime.datetime(2021, 2, 28, 0, 0)}},
{'date': {'$lt': datetime.datetime(2021, 3, 1, 0, 0)}}]},
{'$and': [{'date': {'$gte': datetime.datetime(2021, 3, 31, 0, 0)}},
{'date': {'$lt': datetime.datetime(2021, 4, 1, 0, 0)}}]},
{'$and': [{'date': {'$gte': datetime.datetime(2021, 4, 30, 0, 0)}},
{'date': {'$lt': datetime.datetime(2021, 5, 1, 0, 0)}}]},
{'$and': [{'date': {'$gte': datetime.datetime(2021, 5, 31, 0, 0)}},
{'date': {'$lt': datetime.datetime(2021, 6, 1, 0, 0)}}]},
{'$and': [{'date': {'$gte': datetime.datetime(2021, 6, 30, 0, 0)}},
{'date': {'$lt': datetime.datetime(2021, 7, 1, 0, 0)}}]},
{'$and': [{'date': {'$gte': datetime.datetime(2021, 7, 31, 0, 0)}},
{'date': {'$lt': datetime.datetime(2021, 8, 1, 0, 0)}}]},
{'$and': [{'date': {'$gte': datetime.datetime(2021, 8, 31, 0, 0)}},
{'date': {'$lt': datetime.datetime(2021, 9, 1, 0, 0)}}]},
{'$and': [{'date': {'$gte': datetime.datetime(2021, 9, 30, 0, 0)}},
{'date': {'$lt': datetime.datetime(2021, 10, 1, 0, 0)}}]},
{'$and': [{'date': {'$gte': datetime.datetime(2021, 10, 31, 0, 0)}},
{'date': {'$lt': datetime.datetime(2021, 11, 1, 0, 0)}}]},
{'$and': [{'date': {'$gte': datetime.datetime(2021, 11, 30, 0, 0)}},
{'date': {'$lt': datetime.datetime(2021, 12, 1, 0, 0)}}]}]}}
I work with mongodb and aggregation-framework. I have fallowing data in database I need to sort and group it by year
[{
"_id": "5df537d615a0cd001759f5e4",
"timeDoneA": {
"year":2020,
"day": 5,
"month": 12
},
"timeDoneB": {
"year": 2020,
"day": 4,
"month": 2
},
"timeDoneC": {
"year": 2020,
"day": 4,
"month": 2
},
},
{
"_id": "5df6595dab96a000174e29d7",
"timeDoneA": {
"year": 2020,
"day": 12,
"month": 12
},
"timeDoneB": {
"year": 2019,
"day": 15,
"month": 12
},
"timeDoneC": {
"year": 2019,
"day": 15,
"month": 12
},
}...etc],
I want to get something like this to sort data by years:
2019: {
timeDoneA: 0,
timeDoneB: 1,
timeDoneC: 1
},
2020: {
timeDoneA: 2,
timeDoneB: 1,
timeDoneC: 1
}
How can I do that with aggregation query?
You could use a $facet stage to collect the year values and remove unnecessary fields, then $unwind both of these, and finally use $group to collate the values.
A quick and dirty example:
db.aggtest.aggregate([
{$facet:{
years:[
{$group:{ _id:null,
listC:{$addToSet: "$timeDoneC.year"},
listB:{$addToSet:"$timeDoneB.year"},
listA:{$addToSet:"$timeDoneA.year"}}},
{$project:{ _id:0,
list:{$setUnion:["$listA","$listB","$listC"]}}}],
done:[{$project:{ _id:0,
timeDoneA:"$timeDoneA.year",
timeDoneB:"$timeDoneB.year",
timeDoneC:"$timeDoneC.year"}}]}},
{$unwind:"$done"},
{$unwind:"$years"},
{$unwind:"$years.list"},
{$group:{ _id:"$years.list",
timeDoneA:{$sum:{$cond:[{$eq:["$done.timeDoneA","$years.list"]},1,0]}},
timeDoneB:{$sum:{$cond:[{$eq:["$done.timeDoneB","$years.list"]},1,0]}},
timeDoneC:{$sum:{$cond:[{$eq:["$done.timeDoneC","$years.list"]},1,0]}}}}])
Testing this on your sample data gives:
{ "_id" : 2020, "timeDoneA" : 2, "timeDoneB" : 1, "timeDoneC" : 1 }
{ "_id" : 2019, "timeDoneA" : 0, "timeDoneB" : 1, "timeDoneC" : 1 }
Here's how my collection looks:
{"_id": 1, "price_history": [{date: 10-01-19, price: 10}, {date: 10-05-19, price: 15}...]...}
{"_id": 2, "price_history": [{date: 10-01-19, price: 12}, {date: 10-05-19, price: 14}...]...}
{"_id": 3, "price_history": [{date: 10-01-19, price: 17}, {date: 10-05-19, price: 25}...]...}
{"_id": 4, "price_history": [{date: 10-01-19, price: 10}, {date: 10-05-19, price: 16}...]...}
(The dates are all date objects, just wrote them this way to read easier)
So I'm able to get the max price from the "price_history" array, but I also want to get that date object that matches with that max price.
Here's what I have so far, I've removed a lot of irrelevant stuff to the question.
{
$group: {
'_id': 'stats',
'price_history_stats': {
$push: {
'_id': '$_id',
'highest': {
$max: '$price_history.price'
}
}
}
}
}
The output I am getting is:
{
'_id': 'stats',
'price_history_stats': [
{'_id': 1, 'highest': 15},
{'_id': 1, 'highest': 14},
{'_id': 1, 'highest': 25},
{'_id': 1, 'highest': 16}
]
}
But I'm looking for a way to achieve this with the dates:
{
'_id': 'stats',
'price_history_stats': [
{'_id': 1, 'highest': 15, date: 10-05-10},
{'_id': 1, 'highest': 14, date: 10-05-10},
{'_id': 1, 'highest': 25, date: 10-05-10},
{'_id': 1, 'highest': 16, date: 10-05-10}
]
}
(Excuse any typos, I reformatted a lot of stuff for the question)
Any help would be appreciated. Thanks
If the intent is to find the max document for the group based on price, a combination of $sort first on price then $group with $last will produce a similar output.
Query: Link
db.collection.aggregate([
{
$unwind: "$price_history"
},
{
$sort: {
"price_history.price": 1
}
},
{
$group: {
_id: "$_id",
max_price_doc: {
$last: "$price_history"
}
}
}
]);
Output:(Demo)
[
{
"_id": 1,
"max_price_doc": {
"date": "10 - 05 - 19",
"price": 15
}
},
{
"_id": 4,
"max_price_doc": {
"date": "10 - 05 - 19",
"price": 16
}
},
{
"_id": 3,
"max_price_doc": {
"date": "10 - 05 - 19",
"price": 25
}
},
{
"_id": 2,
"max_price_doc": {
"date": "10 - 05 - 19",
"price": 14
}
}
]
I have many documents in a MongoDB database which look like the following four documents (note the first 3 are Feb 2017 and the last one is March 2017):
{"_id": 0,
"date": ISODate("2017-02-01T00:00:00Z),
"item": "Basketball",
"category": "Sports"}
{"_id": 1,
"date": ISODate("2017-02-13T00:00:00Z),
"item": "Football",
"category": "Sports"}
{"_id": 2,
"date": ISODate("2017-02-22T00:00:00Z),
"item": "Massage",
"category": "Leisure"}
{"_id": 3,
"date": ISODate("2017-03-05T00:00:00Z),
"item": "Golf club",
"category": "Sports"}
I'm trying to group the items by MONTH/YEAR and within that, group the items by CATEGORY. So the aggregation pipeline should return something that looks like this for the four docs above:
{"_id": {
"month": 2,
"year": 2017
},
"data": [
{"category": "Sports",
"items": ["Basketball", "Football"]
},
{"category": "Leisure",
"items": ["Massage"]
}
]
},
{"_id": {
"month": 3,
"year": 2017
},
"data": [
{"category": "Sports",
"items": ["Golf Club"]
}
]
}
I also want the returned cursor to be in order with year as the primary sort and month as the secondary sort.
Figured it out. Here's the answer using the pymongo api:
from bson.son import SON
cursor = db.collection.aggregate([
{'$group': {
'_id': {'month': {'$month': '$date'},
'year': {'$year': '$date'},
'$category': '$category'},
'items': {'$push': '$item'}
}},
{'$group': {
'_id': {'month': '_id.month',
'year': '_id.year'}
'data': {
'$push': {
'category': '$_id.category',
'items': '$items'
}
}
}},
{'$sort': SON([('_id.year', 1), ('_id.month', 1)])}
])
my_data = list(cursor)