Find most recent data per day - mongodb

Hey guys I have a database that is updated every few hours but I'm having a hard time trying to query the most recent data from each day from a date range.
My database structure looks like this
{
"_id" : ObjectId("5b96787ebe9d44528eaa18a0"),
"values" : [{}],
"user": "user10",
"date" : "2018-09-09 00:00:00",
"type" : "patient",
"extracted_date" : "2018-09-10 10:58:18"
}
I was able to get close to the behaivor that I want by grouping date and getting the last extracted date, like this:
{ "_id" : "2018-09-15 00:00:00", "extracted_date" : "2018-09-19 13:50:22" }
{ "_id" : "2018-09-16 00:00:00", "extracted_date" : "2018-09-19 13:47:26" }
{ "_id" : "2018-09-17 00:00:00", "extracted_date" : "2018-09-19 13:45:00" }
{ "_id" : "2018-09-11 00:00:00", "extracted_date" : "2018-09-12 10:09:17" }
{ "_id" : "2018-09-12 00:00:00", "extracted_date" : "2018-09-14 15:34:59" }
{ "_id" : "2018-09-14 00:00:00", "extracted_date" : "2018-09-19 13:54:34" }
{ "_id" : "2018-09-13 00:00:00", "extracted_date" : "2018-09-14 15:36:10" }
{ "_id" : "2018-09-18 00:00:00", "extracted_date" : "2018-09-19 13:42:23" }
But when I group the data I end up getting all the values from that day and I only need the last one. Here's an example of the query I used:
db.collection.aggregate({'$match': {'type': 'user', 'date': {'$gte': '2018-09-11 00:00:00', '$lte': '2018-09-18 00:00:00'}}}, {'$group': {'_id': {'type': '$type', 'user': '$user', 'date': '$date'}, 'extracted_date': {'$last': '$extracted_date'}, 'values': {'$push': '$values'}}})
If possile I would like to retrive the information as close to the structure utilized by the database.
Thank you very much for your help!
Edit: This is a case example that I need.
database objects:
{
"_id" : ObjectId("5b96787ebe9d44528eaa18a0"),
"values" : [{'field1': 1, 'field2': 3}],
"user": "user10",
"date" : "2018-09-09 00:00:00",
"type" : "patient",
"extracted_date" : "2018-09-10 10:58:18"
}
{
"_id" : ObjectId("5b96787ebe9d44528eaa18a0"),
"values" : [{'field1': 1, 'field2': 4}],
"user": "user10",
"date" : "2018-09-09 00:00:00",
"type" : "patient",
"extracted_date" : "2018-09-11 10:58:18"
}
{
"_id" : ObjectId("5b96787ebe9d44528eaa18a0"),
"values" : [{'field11': 2, 'field2': 10}],
"user": "user11",
"date" : "2018-09-05 00:00:00",
"type" : "patient",
"extracted_date" : "2018-09-10 10:58:18"
}
Expected return:
{
"_id" : ObjectId("5b96787ebe9d44528eaa18a0"),
"values" : [{'field1': 1, 'field2': 4}],
"user": "user10",
"date" : "2018-09-09 00:00:00",
"type" : "patient",
"extracted_date" : "2018-09-11 10:58:18"
}
{
"_id" : ObjectId("5b96787ebe9d44528eaa18a0"),
"values" : [{'field11': 2, 'field2': 10}],
"user": "user11",
"date" : "2018-09-05 00:00:00",
"type" : "patient",
"extracted_date" : "2018-09-10 10:58:18"
}
Since there's 2 objected from the same date it only returns the one with the most recent extracted_date

If I understand you correctly you want to just get the last values array and not all of them for that day combined ...
So just do $last for the values like you did for extracted_date.
UPDATE:
Since you are looking for the most recent data for that range you need to use the sort as per matthPen suggestion and just get the needed fields out of the _id and then hide the _id:
db.collection.aggregate([{
"$match": {
"type": "patient",
"date": {
"$gte": "2018-09-05 00:00:00",
"$lte": "2018-09-18 00:00:00"
}
}
},
{
"$sort": {
"extracted_date": 1
}
},
{
"$group": {
"_id": {
"type": "$type",
"user": "$user",
"date": "$date"
},
"id": {
$last: "$_id"
},
"date": {
$last: "$date"
},
"type": {
$last: "$type"
},
"extracted_date": {
$last: "$extracted_date"
},
"values": {
$last: "$values"
}
}
},
{
"$project": {
"_id": 0
}
}
])
You can see it here

Think you are confusing between 'the last per day' (the one which appened the later in each day) and $last (the last coming from previous stage in pipeline)!
You need to add a sort stage before grouping, to ensure that $last is 'the last'.
db.collection.aggregate({
"$match": {
"type": "user",
"date": {
"$gte": "2018-09-11 00:00:00",
"$lte": "2018-09-18 00:00:00"
}
}
},
{$sort:{
date:1,
extracted_date:1
}
},
{
"$group": {
"_id": {
"type": "$type",
"user": "$user",
"date": "$date"
},
"extracted_date": {
"$last": "$extracted_date"
},
"values": {
"$last": "$values"
}
}
})

Related

Mongodb how to reduce the array within the matching key and calculate avg

{
"_id" : {
"state" : "NY",
"st" : "value"
},
"List" : [
{
"id" : "21",
"score" : 18.75,
"name" : "PU"
},
{
"id" : "21",
"score" : 25.0,
"name" : "PU"
},
{
"id" : "23",
"score" : 25.0,
"name" : "CL"
},
{
"id" : "23",
"score" : 56.25,
"name" : "CL"
}
]
}
Desired result:
Match the key with id within the array and calculate avg of score.
{
"_id" : {
"state" : "New York",
"st" : "value"
},
"List" : [
{
"id" : "21",
"score" : 21.875,
"name" : "PU"
},
{
"id" : "23",
"score" : 40.625,
"name" : "CL"
}
]
}
Thank you in advance.
Query
(returns the expected result)
unwind List
group with including the id, and find avg
fix the structure to be similar with the document you want
group back to restore the document structure (reverse the unwind)
if 2 sames ids have different name(if possible to happen)
query will make them seperated members in the array.
(alternativly it could make them same member and pack the names in an array, but that would produce different schema from the one you expect to see)
Test code here
db.collection.aggregate([
{
"$unwind": {
"path": "$List"
}
},
{
"$group": {
"_id": {
"state": "$_id.state",
"st": "$_id.st",
"id": "$List.id",
"name": "$List.name"
},
"avg": {
"$avg": "$List.score"
}
}
},
{
"$project": {
"_id": {
"state": "$_id.state",
"st": "$_id.st"
},
"List": {
"name": "$_id.name",
"id": "$_id.id",
"avg": "$avg"
}
}
},
{
"$group": {
"_id": "$_id",
"List": {
"$push": "$List"
}
}
}
])

Mongodb aggregate with cond and query value

I'm new to mongodb. I need to know how it is possible to query item for set to the value with aggregate
Data
[
{
"_id" : "11111",
"parent_id" : "99",
"name" : "AAAA"
},
{
"_id" : "11112",
"parent_id" : "99",
"name" : "BBBB"
},
{
"_id" : "11113",
"parent_id" : "100",
"name" : "CCCC"
},
{
"_id" : "11114",
"parent_id" : "99",
"name" : "DDDD"
}
]
mongoshell
Assume $check is false
db.getCollection('test').aggregate(
[
{
"$group": {
"_id": "$id",
//...,
"item": {
"$last": {
"$cond": [
{"$eq": ["$check", true]},
"YES",
* * ANSWER **,
}
]
}
},
}
]
)
So i need the result for item is all the name contain with same parent_id as string of array
Expect result
[
{
"_id" : "11111",
"parent_id" : "99",
"name" : "AAAA",
"item" : ["AAAA","BBBB","DDDD"]
},
{
"_id" : "11112",
"parent_id" : "99",
"name" : "BBBB",
"item" : ["AAAA","BBBB","DDDD"]
},
{
"_id" : "11113",
"parent_id" : "100",
"name" : "CCCC",
"item" : ["CCCC"]
},
{
"_id" : "11114",
"parent_id" : "99",
"name" : "DDDD",
"item" : ["AAAA","BBBB","DDDD"]
}
]
Try this..
Sample live demo
db.collection.aggregate([
{
"$group": {
"_id": "$parent_id",
"item": {
"$push": "$name"
},
"data": {
"$push": {
"_id": "$_id",
"name": "$name"
}
}
}
},
{
"$unwind": "$data"
},
{
"$project": {
"_id": "$data._id",
"parent_id": "$_id",
"name": "$data.name",
"item": 1
}
}
])

How can I group by ID and Month?

How can I group by ID and Month in MongoDB?
My data looks like this:
{
"_id" : ObjectId("597225c62e7cbfc9a0b099f8"),
"LogId" : NumberInt(17351963),
"EntryId" : NumberInt(22),
"Date" : "2013-08-11 00:00:00",
"LogTypeId" : NumberInt(6),
"Count" : NumberInt(1),
"EntryType" : NumberInt(1)
}
{
"_id" : ObjectId("597225c62e7cbfc9a0b099f9"),
"LogId" : NumberInt(17352356),
"EntryId" : NumberInt(23),
"Date" : "2013-08-11 00:00:00",
"LogTypeId" : NumberInt(6),
"Count" : NumberInt(2),
"EntryType" : NumberInt(1)
}
{
"_id" : ObjectId("597225c62e7cbfc9a0b099fa"),
"LogId" : NumberInt(17360483),
"EntryId" : NumberInt(28),
"Date" : "2013-08-11 00:00:00",
"LogTypeId" : NumberInt(6),
"Count" : NumberInt(1),
"EntryType" : NumberInt(1)
}
My simplified aggregation query runs without errors, but it doesn't group:
db.log.aggregate([
{"$group":{"_id":"$EntryId", "Count":{"$sum":"$Count"}}},
{"$sort": {"EntryId": 1}}
])
Ultimately, I want to group by EntryID and the month of the date column.
First of all "convert your strings to date" with a very simple operation:
let ops = [];
db.log.find().forEach(doc => {
ops.push({ "updateOne": {
"filter": { "_id": doc._id },
"update": { "$set": { "Date": new Date(doc.Date.replace(" ","T")) } }
}});
if ( ops.length >= 500 ) {
db.log.bulkWrite(ops);
ops = [];
}
});
if ( ops.length > 0 ) {
db.log.bulkWrite(ops);
ops = [];
};
Then run the new aggregate:
db.log.aggregate([
{ "$group": {
"_id": {
"EntryId": "$EntryId",
"year": { "$year": "$Date" },
"month": { "$month": "$Date" }
},
"Count": { "$sum": 1 }
}},
{ "$sort": { "_id": 1 } }
])
Also noting that even a "compound _id" like this one will sort correctly to it't numeric values.

Merge Multiple Document from same collection MongoDB

I have a JSON data like this and i wanted to apply aggregation on this data in such a way that i should group by from data:
{
"series": [
{
"id": "1",
"element": "111",
"data": [
{
"timeFrame": {
"from": "2016-01-01T00:00:00Z",
"to": "2016-01-31T23:59:59Z"
},
"value": 1
},
{
"timeFrame": {
"from": "2016-02-01T00:00:00Z",
"to": "2016-02-29T23:59:59Z"
},
"value": 2
}
]
}
]
}
and i have acheived this by the above aggregation:
db.getCollection('col1').aggregate([
{$unwind: "$data"},
{$group :{
element: {$first:"$relatedElement"},
_id : {
day : {$dayOfMonth: "$values.timeFrame.from"},
month:{$month: "$values.timeFrame.from"},
year:{$year: "$values.timeFrame.from"}
},
fromDate : { $first : "$values.timeFrame.from" },
total : {$sum : "$values.value"},
count : {$sum : 1},
}
},
{
$project: {
_id : 0,
element:1,
fromDate : '$fromDate',
avgValue : { $divide: [ "$total", "$count" ] }
}
}])
OutPut:
{
"id" : "1",
"element" : "3",
"fromDate" : ISODate("2017-05-01T00:00:00.000Z"),
"avgValue" : 0.0378787878787879
}
{
"id" : "1",
"element" : "3",
"fromDate" : ISODate("2017-04-30T22:00:00.000Z"),
"avgValue" : 0.416666666666667
}
But, i am getting two document and this i want to merge as a single document like :
{
"id" : "1",
"element" : "3",
"average" : [
{
"fromDate" : ISODate("2017-05-01T00:00:00.000Z"),
"avgValue" : 0.0378787878787879
},
{
"fromDate" : ISODate("2017-04-30T22:00:00.000Z"),
"avgValue" : 0.416666666666667
}
]
}
Can anyone help me on this.
Add following $group at the end of your aggregate pipeline to merge current output documents into single document -
{$group:{
_id:"$_id",
element: {$first: "$element"},
average:{$push:{
"fromDate": "$fromDate",
"avgValue": "$avgValue"
}}
}}

How to get the maximum value of a field for each group with the array of corresponding documents?

I have a collection like
{
"_id" : ObjectId("5738cb363bb56eb8f76c2ba8"),
"records" : [
{
"Name" : "Joe",
"Salary" : 70000,
"Department" : "IT"
}
]
},
{
"_id" : ObjectId("5738cb363bb56eb8f76c2ba9"),
"records" : [
{
"Name" : "Henry",
"Salary" : 80000,
"Department" : "Sales"
},
{
"Name" : "Jake",
"Salary" : 40000,
"Department" : "Sales"
}
]
},
{
"_id" : ObjectId("5738cb363bb56eb8f76c2baa"),
"records" : [
{
"Name" : "Sam",
"Salary" : 90000,
"Department" : "IT"
},
{
"Name" : "Tom",
"Salary" : 50000,
"Department" : "Sales"
}
]
}
I want to have the results with the highest salary by each department
{"Name": "Sam", "Salary": 90000, "Department": "IT"}
{"Name": "Henry", "Salary": 80000, "Department": "Sales"}
I could get the highest salary. But I could not get the corresponding employee names.
db.HR.aggregate([
{ "$unwind": "$records" },
{ "$group":
{
"_id": "$records.Department",
"max_salary": { "$max": "$records.Salary" }
}
}
])
Could somebody help me?
You need to $sort your document after $unwind and use the $first operator in the $group stage. You can also use the $last operator in which case you will need to sort your documents in ascending order
db.HR.aggregate([
{ '$unwind': '$records' },
{ '$sort': { 'records.Salary': -1 } },
{ '$group': {
'_id': '$records.Department',
'Name': { '$first': '$records.Name' } ,
'Salary': { '$first': '$records.Salary' }
}}
])
which produces:
{ "_id" : "Sales", "Name" : "Henry", "Salary" : 80000 }
{ "_id" : "IT", "Name" : "Sam", "Salary" : 90000 }
To return the maximum salary and employees list for each department you need to use the $max in your group stage to return the maximum "Salary" for each group then use $push accumulator operator to return a list of "Name" and "Salary" for all employees for each group. From there you need to use the $map operator in your $project stage to return a list of names alongside the maximum salary. Of course the $cond here is used to compare each employee salary to the maximum value. The $setDifference does his work which is filter out all false and is fine as long as the data being filtered is "unique". In this case it "should" be fine, but if any two results contained the same "name" then it would skew results by considering the two to be one.
db.HR.aggregate([
{ '$unwind': '$records' },
{ '$group': {
'_id': '$records.Department',
'maxSalary': { '$max': '$records.Salary' },
'persons': {
'$push': {
'Name': '$records.Name',
'Salary': '$records.Salary'
}
}
}},
{ '$project': {
'maxSalary': 1,
'persons': {
'$setDifference': [
{ '$map': {
'input': '$persons',
'as': 'person',
'in': {
'$cond': [
{ '$eq': [ '$$person.Salary', '$maxSalary' ] },
'$$person.Name',
false
]
}
}},
[false]
]
}
}}
])
which yields:
{ "_id" : "Sales", "maxSalary" : 80000, "persons" : [ "Henry" ] }
{ "_id" : "IT", "maxSalary" : 90000, "persons" : [ "Sam" ] }
Its not the most intuitive thing, but instead of $max you should be using $sort and $first:
{ "$unwind": "$records" },
{ "$sort": { "$records.Salary": -1},
{ "$group" :
{
"_id": "$records.Department",
"max_salary": { "$first": "$records.Salary" },
"name": {$first: "$records.Name"}
}
}
Alternatively, I think this is doable using the $$ROOT operator (fair warning: I've not actually tried this) -
{ "$unwind": "$records" },
{ "$group":
{
"_id": "$records.Department",
"max_salary": { "$max": "$records.Salary" }
"name" : "$$ROOT.records.Name"
}
}
}
Another possible solution:
db.HR.aggregate([
{"$unwind": "$records"},
{"$group":{
"_id": "$records.Department",
"arr": {"$push": {"Name":"$records.Name", "Salary":"$records.Salary"}},
"maxSalary": {"$max":"$records.Salary"}
}},
{"$unwind": "$arr"},
{"$project": {
"_id":1,
"arr":1,
"isMax":{"$eq":["$arr.Salary", "$maxSalary"]}
}},
{"$match":{
"isMax":true
}}
])
This solution takes advantage of the $eq operator to compare two fields in the $project stage.
Test case:
db.HR.insert({"records": [{"Name": "Joe", "Salary": 70000, "Department": "IT"}]})
db.HR.insert({"records": [{"Name": "Henry", "Salary": 80000, "Department": "Sales"}, {"Name": "Jake", "Salary": 40000, "Department": "Sales"}, {"Name": "Santa", "Salary": 90000, "Department": "IT"}]})
db.HR.insert({"records": [{"Name": "Sam", "Salary": 90000, "Department": "IT"}, {"Name": "Tom", "Salary": 50000, "Department": "Sales"}]})
Result:
{ "_id" : "Sales", "arr" : { "Name" : "Henry", "Salary" : 80000 }, "isMax" : true }
{ "_id" : "IT", "arr" : { "Name" : "Santa", "Salary" : 90000 }, "isMax" : true }
{ "_id" : "IT", "arr" : { "Name" : "Sam", "Salary" : 90000 }, "isMax" : true }