In MongoDB I have a collection:
Statistics
{
UserID: int //User id
Url: string //Url
Clicks: [DateTime] //A time array
}
When a user clicks an url add a date of the click date in Clicks array. My question is how can I write an aggregate query such as get a number of clicks that was from [date1] till [date2] and group by UserID? How can I output the, to a file?
Thanks!
Assuming you have data like this (see at the bottom how to generate this):
{ "_id": ObjectId("508ab0e27bb16229520c9561"), "userid": 0, "url": "", "clickDate": ISODate("20120101T12:01:00Z") }
{ "_id": ObjectId("508ab0e27bb16229520c9562"), "userid": 1, "url": "", "clickDate": ISODate("20120202T12:01:00Z") }
{ "_id": ObjectId("508ab0e27bb16229520c9563"), "userid": 2, "url": "", "clickDate": ISODate("20120303T12:01:00Z") }
{ "_id": ObjectId("508ab0e27bb16229520c9564"), "userid": 3, "url": "", "clickDate": ISODate("20120404T11:01:00Z") }
{ "_id": ObjectId("508ab0e27bb16229520c9565"), "userid": 4, "url": "", "clickDate": ISODate("20120505T11:01:00Z") }
Here is the aggregation function:
db.test.aggregate( {
$match: {
clickDate: { $gte: new Date(2012,8,30,12,0,0) }
}
},
{
$group: {
_id: "$userid",
clicks: { $sum: 1 }
}
}
);
Make sure you have the $match before the $group. See early filtering.
Results:
{
"result": [
{ "_id": 8,
"clicks": 1
},
{ "_id": 7,
"clicks": 2
},
{ "_id": 6,
"clicks": 2
},
{ "_id": 3,
"clicks": 2
},
{ "_id": 2,
"clicks": 2
},
{ "_id": 1,
"clicks": 2
},
{ "_id": 4,
"clicks": 2
},
{ "_id": 0,
"clicks": 2
},
{ "_id": 5,
"clicks": 2
},
{ "_id": 9,
"clicks": 1
}
],
"ok": 1
}
The Data was generated with this loop:
// d=days, m=months (for ISODate months start from 0, while days from 1)
for (var i = 0, d = 1, m = 0, id = 0; i < 100; i++, d++, m++, id++) {
if (d > 30){
d=1;
}
if (m > 10){
m=0;
}
if (id > 9){
id=0;
}
db.test.insert({userid: id, url:"", clickDate: new Date(2012,m,d,12,1,0)});
}
Related
I have the following MongoDB collection:
db={
"problems": [
{
"problemId": 1,
"title": "dummy 1",
},
{
"problemId": 2,
"title": "dummy 2",
}
],
"submissions": [
{
"submissionId": 1,
"status": "AC",
"problemId": 1,
},
{
"submissionId": 2,
"status": "AC",
"problemId": 1,
},
{
"submissionId": 3,
"status": "WA",
"problemId": 2,
},
{
"submissionId": 4,
"status": "WA",
"problemId": 1,
},
{
"_id": 5,
"status": "AC",
"problemId": 2,
},
{
"_id": 6,
"status": "WA",
"problemId": 2,
}
]
}
I want to show the total submission count and the number of solutions with status = 'AC'. I want the result to look like this:
[
{
"problemId": 1,
"title": "dummy 1",
"total_submissions": 3,
"accepted_submissions": 2
},
]
So far I have used the $lookup operator and did something like this:
db.problems.aggregate([
{
"$lookup": {
"from": "submissions",
"localField": "problemId",
"foreignField": "problemId",
"as": "submission_docs"
}
}
])
But the result I get is: (I'm just showing 1 item in the list)
[
{
"_id": ObjectId("5a934e000102030405000000"),
"problemId": 1,
"submission_docs": [
{
"_id": ObjectId("5a934e000102030405000003"),
"problemId": 1,
"status": "AC",
"submissionId": 1
},
{
"_id": ObjectId("5a934e000102030405000004"),
"problemId": 1,
"status": "AC",
"submissionId": 2
},
{
"_id": ObjectId("5a934e000102030405000006"),
"problemId": 1,
"status": "WA",
"submissionId": 4
}
],
"title": "dummy 1"
}
]
The MongoDB playground can be found here: https://mongoplayground.net/p/YRdpyQN5f00
Use $size to count the submission_docs array length for total_submissions.
Filter the submission with status "AC" in submission_docs via $filter and next count the array length.
db.problems.aggregate([
{
"$lookup": {
"from": "submissions",
"localField": "problemId",
"foreignField": "problemId",
"as": "submission_docs"
}
},
{
$project: {
_id: 0,
problemId: 1,
title: 1,
total_submissions: {
$size: "$submission_docs"
},
accepted_submissions: {
$size: {
$filter: {
input: "$submission_docs",
cond: {
$eq: [
"$$this.status",
"AC"
]
}
}
}
}
}
}
])
Sample Mongo Playground
I have a collection from which I need specific obj e.g. notes.blok2 and notes.curse5 as an object, not as an array
{
"year":2020,
"grade":4,
"seccion":"A",
"id": 100,
"name": "pedro",
"notes":[{"curse":5,
"block":1,
"score":{ "a1": 5,"a2": 10, "a3": 15}
},{"curse":5,
"block":2,
"score":{ "b1": 10,"b2": 20, "b3": 30}
}
]
}
My query
notas.find({
"$and":[{"grade":1},{"seccion":"A"},{"year":2020}]},
{"projection":{ "grade":1, "seccion":1,"name":1,"id":1,
"notes":{"$elemMatch":{"block":2,"curse":5}},"notes.score":1} })
It works but returns notes like array
{
"_id": "55",
"id": 100,
"grade": 5,
"name": "pedro",
"seccion": "A",
"notes": [
{"score": { "b1": 10,"b2": 20, "b3": 30} }
]
}
But I NEED LIKE THIS: score at the same level as others and if doesn't exist show empty "score":{}
{
"year":2020,
"grade":5,
"seccion":"A",
"id": 100,
"name": "pedro",
"score":{ "b1": 10,"b2": 20, "b3": 30}
}
Demo - https://mongoplayground.net/p/XlJqR2DYW1X
You can use aggregation query
db.collection.aggregate([
{
$match: { // filter
"grade": 1,
"seccion": "A",
"year": 2020,
"notes": {
"$elemMatch": {
"block": 2,
"curse": 5
}
}
}
},
{ $unwind: "$notes" }, //break into individual documents
{
$match: { // match query on individual note
"notes.block": 2,
"notes.curse": 5
}
},
{
$project: { // projection
"grade": 1,
"seccion": 1,
"name": 1,
"id": 1,
"score": "$notes.score"
}
}
])
Update
Demo - https://mongoplayground.net/p/mq5Kue3UG42
Use $filter
db.collection.aggregate([
{
$match: {
"grade": 1,
"seccion": "A",
"year": 2020
}
},
{
$set: {
"score": {
"$filter": {
"input": "$notes",
"as": "note",
"cond": {
$and: [
{
$eq: [ "$$note.block",3]
},
{
$eq: [ "$$note.curse", 5 ]
}
]
}
}
}
}
},
{
$project: {
// projection
"grade": 1,
"seccion": 1,
"name": 1,
"id": 1,
"score": {
"$first": "$score.score"
}
}
}
])
If you want empty object for score when match not found you can do -
Demo - https://mongoplayground.net/p/dumax58kgrc
{
$set: {
score: {
$cond: [
{ $size: "$score" }, // check array length
{ $first: "$score" }, // true - take 1st
{ score: {} } // false - set empty object
]
}
}
},
Consider the following collection:
requests->
{
_id : ObjectId("573f28f49b0ffc283676f736"),
date : '2018-12-31',
userId: ObjectId("5c1239e93a7b9a72ef1c9197"),
serviceId: ObjectId("572e29b0116b5db3057bd821"),
status: 'completed'
}
I have an aggregate operation on requests collection which returns documents in the following format:
{
"grossRequests" : 2,
"grossData" : [
{
"date" : "2018-08-04",
"count" : 1,
"requests" : [
ObjectId("5b658147c73beb5ea3debc6e")
]
},
{
"date" : "2018-08-05",
"count" : 1,
"requests" : [
ObjectId("5b658160572faa5dd033fb48")
]
}
],
"netData" : [
{
"date" : "2018-08-05",
"count" : 1,
"requests" : [
ObjectId("5b658160572faa5dd033fb48")
]
}
],
"netRequests" : 1.0,
"userId" : ObjectId("5c1239e93a7b9a72ef1c9197"),
"serviceId" : "572e29b0116b5db3057bd821"
}
Above is the document to be inserted in cumulativeData
Now, I need to add all the documents returned by my aggregation operation, into a collection called cumulativeData.
The cumulativeData collection has one document per userId per serviceType.
I am running a query for specific date ranges and while inserting them into the collection, would like to "merge" the documents, rather than replacing them.
so, for example, if I am looping through all the documents returned by the aggregation operation using forEach, for each new document I get ({userId,serviceType} pair which is not present in the cumulativeData collection) I need to insert it as is and for every document which is already present in the collection, I need to update it as follows.
grossRequests -> add both values
grossData -> push the new values into the existing set
netData -> push the new values into the existing set
netRequests -> add both values
The operation would be as follows,
Existing doc in ** cumulativeData **
{
"grossRequests": 2,
"grossData": [{
"date": "2018-08-04",
"count": 1,
"requests": [
ObjectId("5b658147c73beb5ea3debc6e")
]
}, {
"date": "2018-08-05",
"count": 1,
"requests": [
ObjectId("5b658160572faa5dd033fb48")
]
}
],
"netData": [{
"date": "2018-08-05",
"count": 1,
"requests": [
ObjectId("5b658160572faa5dd033fb48")
]
}
],
"netRequests": 1,
"userId": ObjectId("5c1239e93a7b9a72ef1c9197"),
"serviceId": "572e29b0116b5db3057bd821"
}
New Document generated after aggregation on a new date range
{
"grossRequests": 2,
"grossData": [{
"date": "2018-08-04",
"count": 1,
"requests": [
ObjectId("5b658147c73beb5ea3debc8e")
]
}, {
"date": "2018-08-05",
"count": 1,
"requests": [
ObjectId("5b658160572faa5dd033fb4l")
]
}
],
"netData": [{
"date": "2018-08-05",
"count": 1,
"requests": [
ObjectId("5b658160572faa5dd033fb4l")
]
}
],
"netRequests": 1,
"userId": ObjectId("5c1239e93a7b9a72ef1c9197"),
"serviceId": "572e29b0116b5db3057bd821"
}
Final Result
{
"grossRequests": 4,
"grossData": [{
"date": "2018-08-04",
"count": 2,
"requests": [
ObjectId("5b658147c73beb5ea3debc6e") , ObjectId("5b658147c73beb5ea3debc8e")
]
}, {
"date": "2018-08-05",
"count": 2,
"requests": [
ObjectId("5b658160572faa5dd033fb48"), ObjectId("5b658160572faa5dd033fb4l")
]
}
],
"netData": [{
"date": "2018-08-05",
"count": 2,
"requests": [
ObjectId("5b658160572faa5dd033fb48"),ObjectId("5b658160572faa5dd033fb4l")
]
}
],
"netRequests": 2,
"userId": ObjectId("5c1239e93a7b9a72ef1c9197"),
"serviceId": "572e29b0116b5db3057bd821"
}
You can use below code to perform updates.
It involves two steps first step when record with userid/service id not found insert data else update data.
Within update step first update the top fields i.e count followed by iterating netData and grossData to perform the merge.
To perform merge we use the writeresult which has nmodified count to identify if we have to update the array value or insert new array value.
You can adjust below query to meet your needs.
db.getCollection('requests').aggregate(your aggregation query).forEach(function(doc) {
var user_id = doc.userId;
var service_id = doc.serviceId;
var gross_requests = doc.grossRequests;
var net_requests = doc.netRequests;
var gross_data = doc.grossData;
var net_data = doc.netData;
var matched = db.getCollection('cumulativeData').findOne({
"userId": user_id,
serviceId: service_id
});
if (matched == null) {
db.getCollection('cumulativeData').insert(doc);
} else {
db.getCollection('cumulativeData').update({
"userId": user_id,
serviceId: service_id
}, {
$inc: {
"grossRequests": gross_requests,
"netRequests": net_requests
}
});
gross_data.forEach(function(grdoc) {
var writeresult = db.getCollection('cumulativeData').update({
"userId": user_id,
serviceId: service_id,
"grossData.date": grdoc.date
}, {
$inc: {
"grossData.$.count": grdoc.count
},
$push: {
"grossData.$.requests": {
$each: grdoc.requests
}
}
});
if (writeresult.nModified == 0) {
db.getCollection('cumulativeData').update({
"userId": user_id,
serviceId: service_id
}, {
$push: {
"grossData": {
"count": grdoc.count,
"requests": grdoc.requests,
"date": grdoc.date
}
}
});
}
});
net_data.forEach(function(nrdoc) {
var writeresult = db.getCollection('cumulativeData').update({
"userId": user_id,
serviceId: service_id,
"netData.date": nrdoc.date
}, {
$inc: {
"netData.$.count": nrdoc.count
},
$push: {
"netData.$.requests": {
$each: nrdoc.requests
}
}
});
if (writeresult.nModified == 0) {
db.getCollection('cumulativeData').update({
"userId": user_id,
serviceId: service_id
}, {
$push: {
"netData": {
"count": nrdoc.count,
"requests": nrdoc.requests,
"date": nrdoc.date
}
}
});
}
});
}
});
I'm trying to get a list that counts all the victories and battles grouped by player name out of this json that I obtain from an API:
[
{
"createdDate": 1541411260,
"players": [
{
"tag": "tag1234",
"name": "name1",
"battles": 2,
"wins": 1
},
{
"tag": "tag124567",
"name": "name2",
"battles": 1,
"wins": 0
},
{
"tag": "tag1234",
"name": "name3",
"battles": 3,
"wins": 3
}
]
},
{
"createdDate": 1541411460,
"players": [
{
"tag": "tag1234",
"name": "name1",
"battles": 1,
"wins": 1
},
{
"tag": "tag124567",
"name": "name2",
"battles": 1,
"wins": 1
},
{
"tag": "tag1234",
"name": "name3",
"battles": 0,
"wins": 0
},
{
"tag": "tag124567",
"name": "name4",
"battles": 1,
"wins": 0
}
]
},
{
"createdDate": 1541455260,
"players": [
{
"tag": "tag1234",
"name": "name1",
"battles": 0,
"wins": 0
},
{
"tag": "tag124567",
"name": "name2",
"battles": 4,
"wins": 4
},
{
"tag": "tag1234",
"name": "name3",
"battles": 6,
"wins": 6
}
]
}
]
The mongo query I'm using is the following but I can't get the names and battles/wins:
db.getCollection("logs").aggregate([
{ $unwind : '$players' },
{
$group: {
_id: { name: '$players.name' },
numBattles: { $sum: '$players.battles' },
numWins: { $sum: '$players.wins' }
}
},
{
$project: {
name: "$_id.name",
numBattles: '$_id.numBattles',
numWins: '$_id.numWins',
_id: 0
}
]
).pretty();
This gave me 0 results.
Also tried the following but it's returning a full group of players and their stats:
db.getCollection("logs").aggregate([
{
$group: {
_id: { name: '$players.name' },
numBattles: { $sum: '$players.battles' },
numWins: { $sum: '$players.wins' }
}
},
{
$project: {
name: "$_id.name",
numBattles: '$_id.numBattles',
numWins: '$_id.numWins',
_id: 0
}
}
]
).pretty();
The idea is to get something like this:
name1 - 3 battles and 2 wins,
name2 - x battles and y wins,
...
Any ideas?
Thank you.
In your $project stage you're referring to _id which contains only name and other fields should be referenced directly so you just need to change your $project to:
{
$project: {
name: "$_id.name",
numBattles: "$numBattles",
numWins: "$numWins",
_id: 0
}
}
I am new to mongo queries. Currently I have a collection like this, which is used to create a d3 force-directed graph.
{
"_id": "allesgute3",
"nodes": [{
"id": "bmw#gmail.com",
"count": 15,
"nodeUpdatetime": 1525341732
}, {
"id": "abc#gmail.com",
"count": 10,
"nodeUpdatetime": null
}, {
"id": "xyz#gmail.com",
"count": 8,
"nodeUpdatetime": 1525408742
}, {
"id": "wilson#gmail.com",
"count": 4,
"nodeUpdatetime": 1525423847
}, {
"id": "niv#gmail.com",
"count": 6,
"nodeUpdatetime": 1525447758
}, {
"id": "car#gmail.com",
"count": 9,
"nodeUpdatetime": 1525447763
},
{
"id": "jason#gmail.com",
"count": 1,
"nodeUpdatetime": 1525447783
}
],
"links": [{
"source": "bmw#gmail.com",
"target": "jason#gmail.com",
"timestamp": 1525312111
}, {
"source": "car#gmail.com",
"target": "jason#gmail.com",
"timestamp": 1525334013
}, {
"source": "bmw#gmail.com",
"target": "car#gmail.com",
"timestamp": 1525334118
}]
}
Using a mongo query, I would like to generate the output to something like this. Basically for the nested data under "nodes", add a new field called "topn" and rank them by count from 1 to 5. The remainder values are null. Can anyone help? Thank you!
{
"_id": "allesgute3",
"nodes": [{
"id": "bmw#gmail.com",
"count": 15,
"nodeUpdatetime": 1525341732,
"topn": 1
}, {
"id": "abc#gmail.com",
"count": 10,
"nodeUpdatetime": null,
"topn": 2
}, {
"id": "xyz#gmail.com",
"count": 8,
"nodeUpdatetime": 1525408742,
"topn": 4
}, {
"id": "wilson#gmail.com",
"count": 4,
"nodeUpdatetime": 1525423847,
"topn": null
}, {
"id": "niv#gmail.com",
"count": 6,
"nodeUpdatetime": 1525447758,
"topn": 5
}, {
"id": "car#gmail.com",
"count": 9,
"nodeUpdatetime": 1525447763,
"topn": 3
},
..............
The following should get you what you want:
db.collection.aggregate({
$unwind: "$nodes" // flatten the "nodes" array
}, {
$sort: { "nodes.count": -1 } // sort descending by "count"
}, {
$group: { // create the original structure again - just with sorted array elements
_id: "$_id",
nodes: { "$push": "$nodes" }
}
}, {
$addFields: {
"nodes": {
$zip: { // zip two arrays together
inputs: [
"$nodes", // the first one being the existing and now sorted "nodes" array
{ $range: [ 1, 6 ] } // and the second one being [ 1, 2, 3, 4, 5 ]
],
useLongestLength: true // do not stop after five elements but instead continue using a "null" value
}
}
}
}, {
$addFields: {
"nodes": {
$map: { // transform the "nodes" array
input: "$nodes",
as: "this",
in: {
$mergeObjects: [ // by merging two objects
{ $arrayElemAt: [ "$$this", 0] }, // the first sits at array position 0
{
topn: { $arrayElemAt: [ "$$this", 1] } // the second will be a new entity witha a "topn" field holding the second element in the array
}
]
}
}
}
}
})