Mongodb incrementing merge - mongodb

How is it possible to merge the following "clicks" collection into "sessions" collection:
clicks:
{session_id: 2, time: 12},
{session_id: 2, time: 12.1},
{session_id: 3, time: 13},
sessions:
{session_id:1, start_time: 1, clicks: 1},
{session_id:2, start_time: 2, clicks: 1}
so that collection "sessions" becomes:
{session_id: 1, start_time: 1, clicks: 1}, // "old" sessions data remains as is
{session_id: 2, start_time: 2, clicks: 3}, // start_time is still 2, clicks have incremented
{session_id: 3, start_time: 13, clicks: 1} // a session with a new session_id is added
and so that it will be possible to repeat the process when more clicks are added to "clicks" collection filtering it on {time: { $gt: 13 }} (to avoid processing the same clicks again).

Assuming session_id is a unique field(enforced by unique index), you can $group to group the clicks record by session_id first. Then, $merge into sessions collection.
db.clicks.aggregate([
{
$group: {
_id: "$session_id",
start_time: {
$min: "$time"
},
clicks: {
$sum: 1
}
}
},
{
$project: {
_id: 0,
session_id: "$_id",
start_time: 1,
clicks: 1
}
},
{
"$merge": {
"into": "sessions",
"let": {
delta: "$clicks"
},
"on": "session_id",
"whenMatched": [
{
$set: {
clicks: {
$add: [
"$clicks",
"$$delta"
]
}
}
}
],
"whenNotMatched": "insert"
}
}
])
Mongo Playground

Related

MongoDB - Get rank of the document based on frequency

[
{_id: 1, query: 'A', createdAt: 1660610671 },
{_id: 2, query: 'A', createdAt: 1660610672 },
{_id: 3, query: 'A', createdAt: 1660610673 },
{_id: 4, query: 'A', createdAt: 1660610674 },
{_id: 5, query: 'B', createdAt: 1660610675 },
{_id: 6, query: 'C', createdAt: 1660610676 },
{_id: 7, query: 'C', createdAt: 1660610677 },
{_id: 8, query: 'C', createdAt: 1660610678 },
{_id: 9, query: 'D', createdAt: 1660610680 },
{_id: 10, query: 'D', createdAt: 1660610681 },
]
I have the above database structure. I want to get rank from the frequency of the query value in a specific period.
Maybe it would be something like this.
Queries.getRank({ key: 'query', createdAt: {$gte: startUnix, $lt: endUnix } })
I expect the result as below.
Rank
[
{rank: 1, query: 'A', frequency: 4},
{rank: 2, query: 'C', frequency: 3},
{rank: 3, query: 'D', frequency: 2},
{rank: 4, query: 'B', frequency: 1}
]
Is there a way to achieve it? Thanks.
$match - Filter document within the range for createdAt field (if needed).
$group - Group by query and perform $count as frequency.
$project - Decorate the output document(s).
$setWindowFields - With $rank to perform ranking by sorting frequency descending. May consider $denseRank for the document with the same rank.
db.collection.aggregate([
// $match stage
{
$group: {
_id: "$query",
frequency: {
$sum: 1
}
}
},
{
$project: {
_id: 0,
query: "$_id",
frequency: "$frequency"
}
},
{
$setWindowFields: {
partitionBy: null,
sortBy: {
frequency: -1
},
output: {
rank: {
$rank: {}
}
}
}
},
])
Demo # Mongo Playground
You can write the following aggregation pipeline:
db.collection.aggregate([
{
"$group": {
"_id": "$query",
"frequency": {
"$sum": 1
}
}
},
{
"$project": {
"query": "$_id",
"frequency": 1,
"_id": 0
}
},
{
"$sort": {
frequency: -1
}
},
{
"$group": {
"_id": null,
"array": {
"$push": "$$ROOT"
}
}
},
{
"$unwind": {
path: "$array",
"includeArrayIndex": "rank"
}
},
{
"$project": {
_id: 0,
rank: {
"$add": [
"$rank",
1
]
},
frequency: "$array.frequency",
query: "$array.query"
}
}
]);
Playground link.
In this, we first calculate the frequency for each query, then we sort it by the frequency, and finally, we push all documents in an array and calculate the rank, using array index.

mongoDB filter, sort, and rank result

I have a collection like this
{
id: 1,
category: "food",
score: 20
}
{
id: 2,
category: "drink",
score: 19
}
{
id: 3,
category: "food",
score: 50
}
{
id: 4,
category: "food",
score: 30
}
id is not unique btw.
but it is unique in that category.
so it is possible to have
{id: 1, category: "food"}
{id: 1, category: "drink"}
but not
{id: 1, category: "food"}
{id: 1, category: "food"}
here's what I want to do
find all category == "food"
-> it will give id: 1, 3, 4
// I can add some other filter here before sort happen
// like id less than 100
sort them by score
-> it will give id: 3, 4, 1 // highest score must be the first entry
then what is the rank of id: [4, 1]
-> it should give me {id: 4, rank: 2}, {id: 1, rank: 3}
how can I achieve this?
please give me some snippets or idea
db.collection.aggregate([
{
"$match": { //Filter conditions
"category": "food"
}
},
{
"$sort": {//Sorting
"score": -1
}
},
{
"$group": { //Group by null to get array index
"_id": "null",
"data": {
"$push": "$$ROOT",
}
}
},
{
"$unwind": { //Unwind and get index
path: "$data",
"includeArrayIndex": "index"
}
},
{
"$match": {
"data.id": { //Filter require ids
$in: [
3,
4
]
}
}
}
])
Sample

Aggregation with mongodb

We are saving player stats for each match in MongoDb.
{idPlayer: 27, idTeam: 6, matchId: 1, score: 90},
{idPlayer:38, idTeam: 9, matchId:1, score: 6},
{idPlayer:5, idTeam:8, matchId:2, score: 20}
We want to know how many matches a team has played:
We want result as:
{idTeam, sumMatches}
{idTeam: 8, sumMatches: 6}
{idTeam: 9, sumMatches: 4}
We are tryning with aggregations but we don't get this result.
Any idea how to aproach this issue?
This should do it:
db.collection.aggregate([
{
$group: {
_id: "$idTeam",
matches: {
$addToSet: "$matchId"
}
}
},
{
$project: {
_id: 0,
idTeam: "$_id",
sumMatches: {
$size: "$matches"
}
}
}
])

Duplicate elements in a mongo db collection

Is there an quick efficient way to duplicate elements in a mongo db collections based on a property. In the example below, I am trying to duplicate the elements based on a jobId.
I am using Spring boot, so any example using Spring boot API would be even more helpful.
Original Collection
{ _id: 1, jobId: 1, product: "A"},
{ _id: 2, jobId: 1, product: "B"},
{ _id: 3, jobId: 1, product: "C"},
After duplication
{ _id: 1, jobId: 1, product: "A"},
{ _id: 2, jobId: 1, product: "B"},
{ _id: 3, jobId: 1, product: "C"},
{ _id: 4, jobId: 2, product: "A"},
{ _id: 5, jobId: 2, product: "B"},
{ _id: 6, jobId: 2, product: "C"},
You can use following aggregation:
db.col.aggregate([
{
$group: {
_id: null,
values: { $push: "$$ROOT" }
}
},
{
$addFields: {
size: { $size: "$values" },
range: { $range: [ 0, 3 ] }
}
},
{
$unwind: "$range"
},
{
$unwind: "$values"
},
{
$project: {
_id: { $add: [ "$values._id", { $multiply: [ "$range", "$size" ] } ] },
jobId: { $add: [ "$values.jobId", "$range" ] },
product: "$values.product",
}
},
{
$sort: {
_id: 1
}
},
{
$out: "outCollection"
}
])
The algorithm is quite simple here: we want to iterate over two sets:
first one defined by all items from your source collection (that's why I'm grouping by null)
second one defined artificially by $range operator. It will define how many times we want to multiply our collection (3 times in this example)
Double unwind generates as much documents as we need. Then the formula for each _id is following: _id = _id + range * size. Last step is just to redirect the aggregation output to your collection.

MongoChef Aggregation: In one query find and show average score for max 3, 2 and 1 'project month' grouped data

Using MongoChef aggregation, if you have data such as:
{_id: 1, Mnt: 2016-05-01, Score: 85}
{_id: 2, Mnt: 2016-05-01, Score: 85}
{_id: 3, Mnt: 2016-03-01, Score: 80}
{_id: 4, Mnt: 2016-03-01, Score: 80}
{_id: 5, Mnt: 2016-03-01, Score: 80}
{_id: 6, Mnt: 2016-01-01, Score: 75}
and want to:
Calculate max month in the collection (i.e. M1 : May 2016),
Group by "Mnt" - which might not be sequential latest months, e.g. collection above latest/largest 3 months being: 2016-May, 2016-March, 2016-January,
Find the latest X month totals,
Calculate the Average of each,
e.g.
{M1 : 85, M2 : 82, M3 : 80.8}
I.e.
M1 is average of max month in collection,
M2 is average of max 2 project months in collection
M3 is average of max 3 project months in collection etc.
this is a dirty solution, but will give you an overview how to start:
var i = 1;
var elemSum = 0;
var elemCount = 0;
db.a.aggregate([{
$group : {
_id : {
year : {
$year : "$Mnt"
},
month : {
$month : "$Mnt"
}
},
avg : {
$avg : "$Score"
},
elemCount : {
$sum : 1
},
elemSum : {
$sum : "$Score"
}
}
}, {
$sort : {
"_id.year" : -1,
"_id.month" : -1
}
},
{
$limit : 3
}, // first 3 records
]).forEach(function (doc) {
elemSum += doc.elemSum;
elemCount += doc.elemCount;
var result = elemSum / elemCount;
var x = "M" + i.toString() + ": ";
print(x + result.toString());
i++;
})
and I converted month field to iso time
db.a.insert([
{_id: 1, Mnt: new ISODate("2016-05-01T15:44:00.255Z"), Score: 85},
{_id: 2, Mnt: new ISODate("2016-05-01T15:44:00.255Z"), Score: 85},
{_id: 3, Mnt: new ISODate("2016-03-01T15:44:00.255Z"), Score: 80},
{_id: 4, Mnt: new ISODate("2016-03-01T15:44:00.255Z"), Score: 80},
{_id: 5, Mnt: new ISODate("2016-03-01T15:44:00.255Z"), Score: 80},
{_id: 6, Mnt: new ISODate("2016-01-01T15:44:00.255Z"), Score: 75}
])
Code that works - calculate a running 12-month and current month Net Promoter Scores:
db.Collection.aggregate(
// Pipeline
// Stage 1
{
$project: {
ID: "$ID",
Mnt: "$Mnt",
CntryReg: "$CntryReg",
Prom: "$Prom",
}
},
// Stage 2
{
$group: {
_id: '$Mnt',
docs: {
$push: {
Mnt: "$Mnt",
CntryReg: "$CntryReg",
Prom: "$Prom"
}}
}
},
// Stage 3
{
$sort: {
_id: -1
}
},
// Stage 4
{
$limit: 12
},
// Stage 5
{
$group: {
"_id": null,
"values": { "$push": "$docs" }
}
},
// Stage 6
{
$unwind: {
"path": "$values", "includeArrayIndex": "rank"
}
},
// Stage 7
{
$unwind: "$values"
},
// Stage 8
{
$project: {
_id: 0,
Mnt: "$values.Mnt",
CntryReg: "$values.CntryReg",
Prom: "$values.Prom",
rank: "$rank"
}
},
// Stage 9
{
$group: {
_id: {CntryReg:"$CntryReg"} ,
AR12: { $sum: { $cond : [{ $eq : ["$Prom", "D"]}, 1, 0]} },
Ind12: { $sum: { $cond : [{ $eq : ["$Prom", "I"]}, 1, 0]} },
Loy12: { $sum: { $cond : [{ $eq : ["$Prom", "P"]}, 1, 0]} },
Sum12: {$sum: 1 },
AR1: { $sum: { $cond : [{ $and : [{ $eq : ["$Prom", "D"]} , {$eq : ["$rank", 0]} ]}, 1, 0]} },
Loy1: { $sum: { $cond : [{ $and : [{ $eq : ["$Prom", "P"]} , {$eq : ["$rank", 0]} ]}, 1, 0]} },
Ind1: { $sum: { $cond : [{ $and : [{ $eq : ["$Prom", "I"]} , {$eq : ["$rank", 0]} ]}, 1, 0]} },
Sum1: { $sum: { $cond : [ { $eq : ["$rank", 0]}, 1, 0]} },