How can I group by ID and Month? - mongodb

How can I group by ID and Month in MongoDB?
My data looks like this:
{
"_id" : ObjectId("597225c62e7cbfc9a0b099f8"),
"LogId" : NumberInt(17351963),
"EntryId" : NumberInt(22),
"Date" : "2013-08-11 00:00:00",
"LogTypeId" : NumberInt(6),
"Count" : NumberInt(1),
"EntryType" : NumberInt(1)
}
{
"_id" : ObjectId("597225c62e7cbfc9a0b099f9"),
"LogId" : NumberInt(17352356),
"EntryId" : NumberInt(23),
"Date" : "2013-08-11 00:00:00",
"LogTypeId" : NumberInt(6),
"Count" : NumberInt(2),
"EntryType" : NumberInt(1)
}
{
"_id" : ObjectId("597225c62e7cbfc9a0b099fa"),
"LogId" : NumberInt(17360483),
"EntryId" : NumberInt(28),
"Date" : "2013-08-11 00:00:00",
"LogTypeId" : NumberInt(6),
"Count" : NumberInt(1),
"EntryType" : NumberInt(1)
}
My simplified aggregation query runs without errors, but it doesn't group:
db.log.aggregate([
{"$group":{"_id":"$EntryId", "Count":{"$sum":"$Count"}}},
{"$sort": {"EntryId": 1}}
])
Ultimately, I want to group by EntryID and the month of the date column.

First of all "convert your strings to date" with a very simple operation:
let ops = [];
db.log.find().forEach(doc => {
ops.push({ "updateOne": {
"filter": { "_id": doc._id },
"update": { "$set": { "Date": new Date(doc.Date.replace(" ","T")) } }
}});
if ( ops.length >= 500 ) {
db.log.bulkWrite(ops);
ops = [];
}
});
if ( ops.length > 0 ) {
db.log.bulkWrite(ops);
ops = [];
};
Then run the new aggregate:
db.log.aggregate([
{ "$group": {
"_id": {
"EntryId": "$EntryId",
"year": { "$year": "$Date" },
"month": { "$month": "$Date" }
},
"Count": { "$sum": 1 }
}},
{ "$sort": { "_id": 1 } }
])
Also noting that even a "compound _id" like this one will sort correctly to it't numeric values.

Related

How to get percentage total of data with group by date in MongoDB

How to get percentage total of data with group by date in MongoDB ?
Link example : https://mongoplayground.net/p/aNND4EPQhcb
I have some collection structure like this
{
"_id" : ObjectId("5ccbb96706d1d47a4b2ced4b"),
"date" : "2019-05-03T10:39:53.108Z",
"id" : 166,
"update_at" : "2019-05-03T10:45:36.208Z",
"type" : "image"
}
{
"_id" : ObjectId("5ccbb96706d1d47a4b2ced4c"),
"date" : "2019-05-03T10:39:53.133Z",
"id" : 166,
"update_at" : "2019-05-03T10:45:36.208Z",
"type" : "image"
}
{
"_id" : ObjectId("5ccbb96706d1d47a4b2ced4d"),
"date" : "2019-05-03T10:39:53.180Z",
"id" : 166,
"update_at" : "2019-05-03T10:45:36.208Z",
"type" : "image"
}
{
"_id" : ObjectId("5ccbb96706d1d47a4b2ced4e"),
"date" : "2019-05-03T10:39:53.218Z",
"id" : 166,
"update_at" : "2019-05-03T10:45:36.208Z",
"type" : "image"
}
And I have query in mongodb to get data of collection, how to get percentage of total data. in bellow example query to get data :
db.name_collection.aggregate(
[
{ "$match": {
"update_at": { "$gte": "2019-11-04T00:00:00.0Z", "$lt": "2019-11-06T00:00:00.0Z"},
"id": { "$in": [166] }
} },
{
"$group" : {
"_id": {
$substr: [ '$update_at', 0, 10 ]
},
"count" : {
"$sum" : 1
}
}
},
{
"$project" : {
"_id" : 0,
"date" : "$_id",
"count" : "$count"
}
},
{
"$sort" : {
"date" : 1
}
}
]
)
and this response :
{
"date" : "2019-11-04",
"count" : 39
},
{
"date" : "2019-11-05",
"count" : 135
}
how to get percentage data total from key count ? example response to this :
{
"date" : "2019-11-04",
"count" : 39,
"percentage" : "22%"
},
{
"date" : "2019-11-05",
"count" : 135,
"percentage" : "78%"
}
You have to group by null to get total count and then use $map to calculate the percentage. $round will be a useful operator in such case. Finally you can $unwind and $replaceRoot to get back the same number of documents:
db.collection.aggregate([
// previous aggregation steps
{
$group: {
_id: null,
total: { $sum: "$count" },
docs: { $push: "$$ROOT" }
}
},
{
$project: {
docs: {
$map: {
input: "$docs",
in: {
date: "$$this.date",
count: "$$this.count",
percentage: { $concat: [ { $toString: { $round: { $multiply: [ { $divide: [ "$$this.count", "$total" ] }, 100 ] } } }, '%' ] }
}
}
}
}
},
{
$unwind: "$docs"
},
{
$replaceRoot: { newRoot: "$docs" }
}
])
Mongo Playground

How to count occurences of values in a nested array?

I've searched but could not find an answer to my problem. I need to count the occurences of the field "nationalCode". I've got a collection with this sample structure in MongoDB:
{
"_id" : ObjectId("5d7519cc6c17d65d4983f048"),
"origin" : "Base1",
"topic" : [
{
"nationalTopic" : {
"nationalCode" : 26
},
"dateTime" : NumberLong(20120927000000)
},
{
"nationalTopic" : {
"nationalCode" : 132
},
"dateTime" : NumberLong(20120927000000)
},
{
"nationalTopic" : {
"nationalCode" : 26
},
"dateTime" : NumberLong(20120927000000)
},
{
"nationalTopic" : {
"nationalCode" : 26
},
"dateTime" : NumberLong(20121005000000)
}
]
}
I've used the following code (I tried many variations of it, but none of them got me the right results):
db.processos.aggregate(
[
{ "$unwind": "$topic" },
{"$match": {"origin": "Base1"}},
{"$group": { "_id": { nationalCode: "$topic.nationalTopic.nationalCode", "count": { "$sum": 1 }} } }
]
)
I'm expecting something like this:
{
"_id" : {
"nationalCode" : 26,
"count" : 3.0
}
}
/* 2 */
{
"_id" : {
"nationalCode" : 132,
"count" : 1.0
}
}
You should extract the count element from the _id.
The following query worked for me.
db.data.aggregate(
[
{ "$unwind": "$topic" },
{"$match": {"origin": "Base1"}},
{"$group": { _id: { "nationalCode": "$topic.nationalTopic.nationalCode" },
"count": {$sum: 1} }
}
]
)
just do it with $project to change your format
do it like this
MongoDB Enterprise >
db.ggg.aggregate(
[
{$unwind:"$topic"},
{"$match": {"origin": "Base1"}},
{"$group": { "_id": { nationalCode: "$topic.nationalTopic.nationalCode"},
"count": { "$sum": 1 } }},
{$project :{"_id.nationalCode":1,"_id.count":"$count"}}
]
)
here it the result !
{ "_id" : { "nationalCode" : 26, "count" : 3 } }
{ "_id" : { "nationalCode" : 132, "count" : 1 } }

MongoDB - Sum value in nested array

I have an object looks like this
{
"_id" : {
"import_type" : "MANUAL_UPLOAD",
"supplier" : "jabino.de",
"unit_price" : "0"
},
"statuses" : [
{
"status" : "DUPLICATED",
"count" : 14
},
{
"status" : "BLACKLISTED",
"count" : 2
},
{
"status" : "USABLE",
"count" : 2239
},
{
"status" : "INVALID_EMAIL_ADDRESS",
"count" : 1
},
{
"status" : "DUPLICATED",
"count" : 14
},
{
"status" : "BLACKLISTED",
"count" : 2
},
{
"status" : "USABLE",
"count" : 2239
},
{
"status" : "INVALID_EMAIL_ADDRESS",
"count" : 1
}
]
}
How I can sum all the count in the statuses array which has the same status without losing keys-values in _id. E.g. in this case
Duplicated: 28
Blacklisted: 4
Usable: 4478
Invalid email address: 2
You can use below aggregation
db.collection.aggregate([
{ "$unwind": "$statuses" },
{ "$group": {
"_id": {
"_id": "$_id",
"statuses": "$statuses.status"
},
"count": { "$sum": "$statuses.count" }
}},
{ "$group": {
"_id": "$_id._id",
"statuses": {
"$push": {
"status": "$_id.statuses",
"count": "$count"
}
}
}}
])

Find most recent data per day

Hey guys I have a database that is updated every few hours but I'm having a hard time trying to query the most recent data from each day from a date range.
My database structure looks like this
{
"_id" : ObjectId("5b96787ebe9d44528eaa18a0"),
"values" : [{}],
"user": "user10",
"date" : "2018-09-09 00:00:00",
"type" : "patient",
"extracted_date" : "2018-09-10 10:58:18"
}
I was able to get close to the behaivor that I want by grouping date and getting the last extracted date, like this:
{ "_id" : "2018-09-15 00:00:00", "extracted_date" : "2018-09-19 13:50:22" }
{ "_id" : "2018-09-16 00:00:00", "extracted_date" : "2018-09-19 13:47:26" }
{ "_id" : "2018-09-17 00:00:00", "extracted_date" : "2018-09-19 13:45:00" }
{ "_id" : "2018-09-11 00:00:00", "extracted_date" : "2018-09-12 10:09:17" }
{ "_id" : "2018-09-12 00:00:00", "extracted_date" : "2018-09-14 15:34:59" }
{ "_id" : "2018-09-14 00:00:00", "extracted_date" : "2018-09-19 13:54:34" }
{ "_id" : "2018-09-13 00:00:00", "extracted_date" : "2018-09-14 15:36:10" }
{ "_id" : "2018-09-18 00:00:00", "extracted_date" : "2018-09-19 13:42:23" }
But when I group the data I end up getting all the values from that day and I only need the last one. Here's an example of the query I used:
db.collection.aggregate({'$match': {'type': 'user', 'date': {'$gte': '2018-09-11 00:00:00', '$lte': '2018-09-18 00:00:00'}}}, {'$group': {'_id': {'type': '$type', 'user': '$user', 'date': '$date'}, 'extracted_date': {'$last': '$extracted_date'}, 'values': {'$push': '$values'}}})
If possile I would like to retrive the information as close to the structure utilized by the database.
Thank you very much for your help!
Edit: This is a case example that I need.
database objects:
{
"_id" : ObjectId("5b96787ebe9d44528eaa18a0"),
"values" : [{'field1': 1, 'field2': 3}],
"user": "user10",
"date" : "2018-09-09 00:00:00",
"type" : "patient",
"extracted_date" : "2018-09-10 10:58:18"
}
{
"_id" : ObjectId("5b96787ebe9d44528eaa18a0"),
"values" : [{'field1': 1, 'field2': 4}],
"user": "user10",
"date" : "2018-09-09 00:00:00",
"type" : "patient",
"extracted_date" : "2018-09-11 10:58:18"
}
{
"_id" : ObjectId("5b96787ebe9d44528eaa18a0"),
"values" : [{'field11': 2, 'field2': 10}],
"user": "user11",
"date" : "2018-09-05 00:00:00",
"type" : "patient",
"extracted_date" : "2018-09-10 10:58:18"
}
Expected return:
{
"_id" : ObjectId("5b96787ebe9d44528eaa18a0"),
"values" : [{'field1': 1, 'field2': 4}],
"user": "user10",
"date" : "2018-09-09 00:00:00",
"type" : "patient",
"extracted_date" : "2018-09-11 10:58:18"
}
{
"_id" : ObjectId("5b96787ebe9d44528eaa18a0"),
"values" : [{'field11': 2, 'field2': 10}],
"user": "user11",
"date" : "2018-09-05 00:00:00",
"type" : "patient",
"extracted_date" : "2018-09-10 10:58:18"
}
Since there's 2 objected from the same date it only returns the one with the most recent extracted_date
If I understand you correctly you want to just get the last values array and not all of them for that day combined ...
So just do $last for the values like you did for extracted_date.
UPDATE:
Since you are looking for the most recent data for that range you need to use the sort as per matthPen suggestion and just get the needed fields out of the _id and then hide the _id:
db.collection.aggregate([{
"$match": {
"type": "patient",
"date": {
"$gte": "2018-09-05 00:00:00",
"$lte": "2018-09-18 00:00:00"
}
}
},
{
"$sort": {
"extracted_date": 1
}
},
{
"$group": {
"_id": {
"type": "$type",
"user": "$user",
"date": "$date"
},
"id": {
$last: "$_id"
},
"date": {
$last: "$date"
},
"type": {
$last: "$type"
},
"extracted_date": {
$last: "$extracted_date"
},
"values": {
$last: "$values"
}
}
},
{
"$project": {
"_id": 0
}
}
])
You can see it here
Think you are confusing between 'the last per day' (the one which appened the later in each day) and $last (the last coming from previous stage in pipeline)!
You need to add a sort stage before grouping, to ensure that $last is 'the last'.
db.collection.aggregate({
"$match": {
"type": "user",
"date": {
"$gte": "2018-09-11 00:00:00",
"$lte": "2018-09-18 00:00:00"
}
}
},
{$sort:{
date:1,
extracted_date:1
}
},
{
"$group": {
"_id": {
"type": "$type",
"user": "$user",
"date": "$date"
},
"extracted_date": {
"$last": "$extracted_date"
},
"values": {
"$last": "$values"
}
}
})

Get Individual results of Mongodb Average of Two columns

I have a dataset like t
{
"_id" : ObjectId("5a867bae000e4f1c9c77d36d"),
"userid" : "5a20ee1acdacc7086ce7742d",
"sprice" : null,
"lprice" : 4.2,
"fruit" : "#Apple",
"createdate" : ISODate("2018-02-16T06:35:26.285Z"),
"__v" : 0
},
{
"_id" : ObjectId("5a867bae000e4f1c9c77d36e"),
"userid" : "5a20ee1acdacc7086ce7742e",
"sprice" : 3.5,
"lprice" : null,
"fruit" : "#Apple",
"createdate" : ISODate("2018-02-16T06:35:26.285Z"),
"__v" : 0
},
{
"_id" : ObjectId("5a867bae000e4f1c9c77d36e"),
"userid" : "5a20ee1acdacc7086ce7742e",
"sprice" : 8.6,
"lprice" : 2.2,
"fruit" : "#Apple",
"createdate" : ISODate("2018-02-16T06:35:26.285Z"),
"__v" : 0
}
for this I have to calculate the Average sprice of '#Apple' and neglect those entry which have value NULL.
For this My query is like this which returns exactly what I want, i.e
db.Collection.aggregate([
{ "$match": {
"fruit": "#Apple",
"sprice": {$ne:null}
}},
{ "$group": {
"_id": null,
"sprice": { "$avg": "$sprice" }
}}
])
It gives me the result. Now my Question is If I want to get the Individual Result of sprice and lprice then How My query is modified.
Expected answer will be like this:
{ "_id" : null, "sprice" : 6.05 } // Already Get from this query
{ "_id" : null, "lprice" : 3.2 } //Desired Result.
Any Help is Appreciated
With $facet
db.Collection.aggregate([
{ "$match": { "fruit": "#Apple" } },
{
"$facet": {
"sprice": [
{ "$match": { "sprice": { "$ne": null } } },
{ "$group": {
"_id": null,
"sprice": { "$avg": "$sprice" }
}}
],
"lprice": [
{ "$match": { "lprice": { "$ne": null } } },
{ "$group": {
"_id": null,
"lprice": { "$avg": "$lprice" }
}}
]
}
}
])
Sample Output
[
{ "sprice": { "_id" : null, "sprice" : 6.05 } },
{ "lprice": { "_id" : null, "lprice" : 3.2 } }
]
$avg ignores non numeric values by default so no explicit null filter is required, below pipeline will give you the desired results
db.Collection.aggregate([
{ "$match": {
"fruit": "#Apple"
}},
{ "$group": {
"_id": null,
"sprice": { "$avg": "$sprice" },
"lprice": { "$avg": "$lprice" }
}}
])