Group by Date mongoDB - mongodb

I have a set of data in mongoDB that I have to sum up grouped by $timestamp. This field contains a date, but's formatted as String (example data above).
How should I proceed to convert $timestamp into a date so I can group them all together?
Next, I have to sum each scores_today for each date and iden, and the same with each scores_total.
Example data:
[
{
_id: "1442",
timestamp: "2016-03-15T22:24:02.000Z",
iden: "15",
scores_today: "0.000000",
scores_total: "52337.000000"
}
]
My code
var project = {
"$project":{
"_id": 0,
"y": {
"$year": "$timestamp" // tried this way, not working
},
"m": {
"$month": new Date("$timestamp") // tried either this, not working
},
"d": {
"$dayOfMonth": new Date("$timestamp")
},
"iden" : "$iden"
}
},
group = {
"$group": {
"_id": {
"iden" : "$iden",
"year": "$y",
"month": "$m",
"day": "$d"
},
"count" : { "$sum" : "$scores_today" }
}
};
mongoDB.collection('raw').aggregate([ project, group ]).toArray()....
This is the error logged by node.js service
Err: { [MongoError: exception: can't convert from BSON type String to
Date] name: 'MongoError', message: 'exception: can\'t convert from
BSON type String to Date', errmsg: 'exception: can\'t convert from
BSON type String to Date', code: 16006, ok: 0 }

You can construct Date object from string using ISODate($timestamp).
var project = {
"$project":{
"_id": 0,
"y": {
"$year": ISODate("$timestamp").getFullYear()
},
"m": {
"$month": ISODate("$timestamp").getMonth()+1 // months start from 0
},
"d": {
"$dayOfMonth": ISODate("$timestamp").getDate()
},
"iden" : "$iden"
}
},
group = {
"$group": {
"_id": {
"iden" : "$iden",
"year": "$y",
"month": "$m",
"day": "$d"
},
"count" : { "$sum" : "$scores_today" }
}
};
UPDATE
If you're not running MongoDb shell then you can't use ISODate directly. In this case try to invoke eval command.
var aggregationResult=mongoDB.eval(
'
'function() '+
'{ '+
' var project = { '+
' "$project":{ '+
' "_id": 0, '+
' "y": { '+
' "$year": ISODate("$timestamp").getFullYear() '+
' }, '+
' "m": { '+
' "$month": ISODate("$timestamp").getMonth()+1 // months start from 0 '+
' }, '+
' "d": { '+
' "$dayOfMonth": ISODate("$timestamp").getDate() '+
' }, '+
' "iden" : "$iden" '+
' } '+
' }, '+
' group = { '+
' "$group": { '+
' "_id": { '+
' "iden" : "$iden", '+
' "year": "$y", '+
' "month": "$m", '+
' "day": "$d" '+
' }, '+
' "count" : { "$sum" : "$scores_today" } '+
' } '+
' };
' var result=db.raw.aggregate([ project, group ]); '+
' return result; '+
' } '+
'
);

Related

NodeJS MongoDb $sort not working in aggregate query

I have created a GET api for results, with filters, pagination and sorting. But though the data is correct, the sorting is not happening correctly.
let skip = 0, limit = 10, findCond = {centerId: self.centersModel.getObjectId(userData.id)};
if(queryParams.result) findCond['result'] = {"$regex": queryParams.result, "$options": 'i'};
if(queryParams.fromDate) findCond['created_at'] = {$gte: moment(queryParams.fromDate + ' 00:00:00', 'DD-MM-YYYY HH:mm:ss').toDate()};
if(queryParams.toDate) {
if(findCond.hasOwnProperty('created_at')) findCond['created_at']['$lte'] = moment(queryParams.toDate + ' 23:59:59', 'DD-MM-YYYY HH:mm:ss').toDate();
else findCond['created_at'] = {$lte: moment(queryParams.toDate + ' 23:59:59', 'DD-MM-YYYY HH:mm:ss').toDate()};
}
if(queryParams.perPage) limit = parseInt(queryParams.perPage);
if(queryParams.page) skip = (parseInt(queryParams.page) - 1) * limit;
let aggregate = [
{ "$match": findCond },
{ "$sort": {"created_at": 1} },
{ "$skip": skip },
{ "$limit": limit }
];
console.log(aggregate)
self.resultsModel.aggregate(aggregate, function(err, results) {
console.log(err, results);
});
Result:
{
"status": "ok",
"results": [
{
"id": "5db83e69fcee977a20b24260",
"result": "Pass",
"examDate": "29/10/2019 06:58 PM"
},
{
"id": "5db6b4d33ffd7d3ccde175d1",
"result": "Pass",
"examDate": "28/10/2019 08:38 PM"
},
{
"id": "5db83e9bfcee977a20b24262",
"result": "Pass",
"examDate": "29/10/2019 06:58 PM"
},
{
"id": "5db83ecafcee977a20b24264",
"result": "Pass",
"examDate": "29/10/2019 06:59 PM"
},
{
"id": "5db83f40fcee977a20b24266",
"result": "Fail",
"examDate": "29/10/2019 07:01 PM"
},
{
"id": "5db84395bb402b0f3de43ff7",
"result": "Pass",
"examDate": "29/10/2019 07:20 PM"
},
{
"id": "5db843c0bb402b0f3de43ff9",
"result": "Pass",
"examDate": "29/10/2019 07:20 PM"
}
]
}
One of the date is 28/10/2019 in between 29/10. Please tell me a reason or a solution to this.
I think, to reduce confusion, you should share the value of aggregate variable that you have logged, rather than the response you create after applying a transformation to it.

Mongo groupby date inside array

I have collection in my db as,
[
{
"groupName" : "testName",
"participants" : [
{
"participantEmail" : "test#test.com",
"lastClearedDate" : 12223213123
},
{
"participantEmail" : "test2#test.com",
"lastClearedDate" : 1234343243423
}
],
"messages" : [
{
"message":"sdasdasdasdasdasd",
"time":22312312312,
"sender":"test#test.com"
},
{
"message":"gfdfvd dssdfdsfs",
"time":2231231237789,
"sender":"test#test.com"
}
]
}
]
This is a collection of group which contains all the participants and messages in that group.
The time field inside the message is Timestamp.
I want get all the messages inside a group which are posted after the given date and grouped by date.
I wrote the following code,
ChatGroup.aggregate([
{ $match: { group_name: groupName } },
{ $unwind: "$messages" },
{ $match: { "messages.time": { $gte: messagesFrom } } },
{
$project: {
_id: 0,
y: {
$year: {
$add: [new Date(0), { $multiply: [1000, "$messages.time"] }]
}
},
m: {
$month: {
$add: [new Date(0), { $multiply: [1000, "$messages.time"] }]
}
},
d: {
$dayOfMonth: {
$add: [new Date(0), { $multiply: [1000, "$messages.time"] }]
}
}
}
},
{
$group: {
_id: {
year: "$y",
month: "$m",
day: "$d"
},
messages: { $push: "$messages" },
count: { $sum: 1 }
}
}
]).then(
group => {
console.log("length of messages", group);
resolve(group);
},
err => {
console.log(err);
}
);
});
and I getting the following output,
[
{
"_id": {
"year": 50694,
"month": 9,
"day": 5
},
"messages": [],
"count": 3
},
{
"_id": {
"year": 50694,
"month": 8,
"day": 27
},
"messages": [],
"count": 1
},
{
"_id": {
"year": 50694,
"month": 8,
"day": 26
},
"messages": [],
"count": 10
}
]
I am not getting the messages but the count is correct.
Also the time which is displayed in the result is incorrect e.g. year, date and month.
Mongo version is 3.2.
I referred the groupby and push documentation from mongodb along with other stackoverflow questions on mongo group by.
What am I doing wrong?
Your timestamp is already in seconds. So, you don't need to convert them to millisecond by multiplying with 1000.
So your final query should be something like this
ChatGroup.aggregate([
{ "$match": {
"group_name": groupName,
"messages.time": { "$gte": messagesFrom }
}},
{ "$unwind": "$messages" },
{ "$match": { "messages.time": { "$gte": messagesFrom }}},
{ "$group": {
"_id": {
"year": { "$year": { "$add": [new Date(0), "$messages.time"] }},
"month": { "$month": { "$add": [new Date(0), "$messages.time"] }},
"day": { "$dayOfMonth": { "$add": [new Date(0), "$messages.time"] }}
},
"messages": { "$push": "$messages" },
"count": { "$sum": 1 }
}}
])
Add messages in $project
{
$project: {
_id: 0,
messages : 1,
.........
},
}

Aggregate by timestamp and Sum by float

I have a set of data in mongoDB that I have to sum up grouped by $timestamp. I succeeded in grouping them day by day, but now I need to sum them by another field.
Example data:
[
{
_id: "1442",
timestamp: "1458080642000",
iden: "15",
scores_today: "0.000000",
scores_total: "52337.000000"
}
]
My code
var project = {
"$project":{
"_id" : 0,
"y": {
"$year": {
"$add": [
new Date(0), "$timestamp"
]
}
},
"m": {
"$month": {
"$add": [
new Date(0), "$timestamp"
]
}
},
"d": {
"$dayOfMonth": {
"$add": [
new Date(0), "$timestamp"
]
}
},
"iden" : "$iden",
"totalTd" : "$scores_today"
"total" : "$scores_today_total"
}
},
group = {
"$group": {
"_id": {
"mac" : "$mac",
"year": "$y",
"month": "$m",
"day": "$d"
},
count : { "$sum" : "$total"}
countOther : { "$sum" : "$totalTd" }
}
};
mongoDB.collection('raw').aggregate([ project, group ]).toArray....
I'm not able to sum them. What I need to change?
I need to group them day by day (and this works ) and by iden ( works ) then sum up differents scores.

How could I remove the duplicated items(complex object) from array

In each document,
the records is an array containing many duplicated objects.
and in buy_items there are also containing many duplicated items.
How could I clean the duplicated items ?
Original documents:
{
"_id": "0005d116qwwewdq82a1b84f148fa6027d429f3e",
"records": [
{
"DATE": new Date("1996-02-08T08:00:00+0800"),
"buy_items": [
"5210 ",
"5210 ",
"5210 "
]
},
{
"DATE": new Date("1996-02-08T08:00:00+0800"),
"buy_items": [
"5210 ",
"5210 ",
"5210 "
]
}
{
"DATE": new Date("2012-12-08T08:00:00+0800"),
"buy_items": [
"5210 ",
"1234 ",
" "
]
}
]
}
Expected Output:
{
"_id": "0005d116qwwewdq82a1b84f148fa6027d429f3e",
"records": [
{
"DATE": new Date("1996-02-08T08:00:00+0800"),
"buy_items": [
"5210 "
]
},
{
"DATE": new Date("2012-12-08T08:00:00+0800"),
"buy_items": [
"5210 ",
"1234 ",
" "
]
}
]
}
With Michaels solution, the output might looks like this
{
"_id": "0005d116qwwewdq82a1b84f148fa6027d429f3e",
"records": [
"date": new Date("1996-02-08T08:00:00+0800"),
"buy_items": [
"5210 "
"1234 ",
" "
]
]
}
You can remove duplicated objects using the aggregation framework
db.collection.aggregate(
[
{ $unwind: "$records" },
{ $unwind: "$records.buy_items" },
{ $group: { "_id": {id: "$_id", date: "$records.DATE" }, buy_items: { $addToSet: "$records.buy_items" }}},
{ $group: {"_id": "$_id.id", records: { $push: {"date": "$_id.date", "buy_items": "$buy_items" }}}}, { $sort: { "records.0.date": 1 }} ,
{ $out: "collection" }
]
)
The $out operator let you write your aggregation result in specified collection or Replace you existing collection.
Even better using "Bulk" operations
var bulk = bulk = db.collection.initializeOrderedBulkOp(),
count = 0;
db.collection.aggregate([
{ "$unwind": "$records" },
{ "$project": {
"date": "$records.DATE",
"buy_items": { "$setIntersection": "$records.buy_items" }
}},
{ "$unwind": "$buy_items" },
{ "$group": {
"_id": { "id": "$_id", "date": "$date" },
"buy_items": { "$addToSet": "$buy_items" }
}},
{ "$group": {
"_id": "$_id.id",
"records": { "$push": {
"date": "$_id.date",
"buy_items": "$buy_items"
}}
}}
]).forEach(function(doc) {
bulk.find({"_id": doc._id}).updateOne({
"$set": { "records": doc.records }
});
count++;
if (count % 500 == 0) {
bulk.execute();
bulk = db.collection.initializeOrderedBulkOp();
}
})
if (count % 500 != 0)
bulk.execute();
Result:
{
"_id" : "0005d116qwwewdq82a1b84f148fa6027d429f3e",
"records" : [
{
"date" : ISODate("2012-12-08T00:00:00Z"),
"buy_items" : [
" ",
"1234 ",
"5210 "
]
},
{
"date" : ISODate("1996-02-08T00:00:00Z"),
"buy_items" : [
"5210 "
]
}
]
}
If you want to update your current collections without creating new collection and drop previous collection. I tried this but doing this you should run two different update commands.
First update records with distinct like this :
db.collectionName.update({},{"$set":{"records":db.collectionName.distinct('records')}})
and second update for buy_items with distinct like this :
db.collectionName.update({},{"$set":{"records.0.buy_items":db.collectionName.distinct('records.buy_items')}})
If you want to avoid two update query then follow Michael answer .
You could try using the forEach() method of the find() cursor to iterate over each document properties, check for uniqueness and filter distinct values as follows:
db.collection.find().forEach(function(doc){
var records = [], seen = {};
doc.records.forEach(function (item){
var uniqueBuyItems = item["buy_items"].filter(function(i, pos) {
return item["buy_items"].indexOf(i) == pos;
});
item["buy_items"] = uniqueBuyItems;
if (JSON.stringify(item["buy_items"]) !== JSON.stringify(seen["buy_items"])) {
records.push(item);
seen["buy_items"] = item["buy_items"];
}
});
doc.records = records;
db.collection.save(doc);
})

MongoDB aggregate query

In MongoDB I have a collection:
Statistics
{
UserID: int //User id
Url: string //Url
Clicks: [DateTime] //A time array
}
When a user clicks an url add a date of the click date in Clicks array. My question is how can I write an aggregate query such as get a number of clicks that was from [date1] till [date2] and group by UserID? How can I output the, to a file?
Thanks!
Assuming you have data like this (see at the bottom how to generate this):
{ "_id": ObjectId("508ab0e27bb16229520c9561"), "userid": 0, "url": "", "clickDate": ISODate("20120101T12:01:00Z") }
{ "_id": ObjectId("508ab0e27bb16229520c9562"), "userid": 1, "url": "", "clickDate": ISODate("20120202T12:01:00Z") }
{ "_id": ObjectId("508ab0e27bb16229520c9563"), "userid": 2, "url": "", "clickDate": ISODate("20120303T12:01:00Z") }
{ "_id": ObjectId("508ab0e27bb16229520c9564"), "userid": 3, "url": "", "clickDate": ISODate("20120404T11:01:00Z") }
{ "_id": ObjectId("508ab0e27bb16229520c9565"), "userid": 4, "url": "", "clickDate": ISODate("20120505T11:01:00Z") }
Here is the aggregation function:
db.test.aggregate( {
$match: {
clickDate: { $gte: new Date(2012,8,30,12,0,0) }
}
},
{
$group: {
_id: "$userid",
clicks: { $sum: 1 }
}
}
);
Make sure you have the $match before the $group. See early filtering.
Results:
{
"result": [
{ "_id": 8,
"clicks": 1
},
{ "_id": 7,
"clicks": 2
},
{ "_id": 6,
"clicks": 2
},
{ "_id": 3,
"clicks": 2
},
{ "_id": 2,
"clicks": 2
},
{ "_id": 1,
"clicks": 2
},
{ "_id": 4,
"clicks": 2
},
{ "_id": 0,
"clicks": 2
},
{ "_id": 5,
"clicks": 2
},
{ "_id": 9,
"clicks": 1
}
],
"ok": 1
}
The Data was generated with this loop:
// d=days, m=months (for ISODate months start from 0, while days from 1)
for (var i = 0, d = 1, m = 0, id = 0; i < 100; i++, d++, m++, id++) {
if (d > 30){
d=1;
}
if (m > 10){
m=0;
}
if (id > 9){
id=0;
}
db.test.insert({userid: id, url:"", clickDate: new Date(2012,m,d,12,1,0)});
}