MongoDB Delete records where group by count - mongodb

I have a lot of records that contains duplicate addresses. Basically, I want to delete those records that has number of duplicate addresses more than 50 but less than 300.
This is how I get those records that I want to delete:
db.directories.aggregate( [
{ $group: { _id: { address: "$address" }, total: { $sum: 1 } } },
{ $match: { total: { $gt: 50, $lt: 300 } } },
{ $sort: { total: -1 } }
], { allowDiskUse: true });

You can use the cursor method forEach() to iterate the cursor returned from the aggregate() method and delete the documents using the remove() method, as in the following example:
var pipeline = [
{ $group: { _id: { address: "$address" }, total: { $sum: 1 } } },
{ $match: { total: { $gt: 50, $lt: 300 } } },
{ $sort: { total: -1 } }
];
var options = { allowDiskUse: true };
db.directories.aggregate(pipeline, options).forEach(function (doc){
var query = {"address": doc._id.address};
db.directories.remove(query);
});

Related

Pagination of facet pipelines

I'm not very experienced with mongodb, so maybe someone can help me.
I have a collection kind of like this:
{ user: 1, type: 0 , ... },
{ user: 1, type: 1, ... },
{ user: 1, type: 1, ... },
...
Now I want for a specific user documents grouped by type, then sorted + offset & limit. ie. like:
[ { $match: { user: 1 } },
{ $facet: {
type0: [
{ $match: { type: 0 },
{ $sort: ... },
{ $skip: offset0 },
{ $limit: limit0 }
],
type1: [
{ $match: { type: 1 },
{ $sort: ... },
{ $skip: offset1 },
{ $limit: limit1 }
]
}
]
This works, but: Is there a way to also get the total count of every type? $facet within $facet is not allowed, but maybe some completly different way? Or will a have to make an aggregate call for every single type in the end?
Something like this:
db.collection.aggregate([
{
$match: {
user: 1
}
},
{
$facet: {
type0: [
{
$match: {
type: 0
}
},
{
$sort: {
type: -1
}
},
{
$skip: 1
},
{
$limit: 2
}
],
type1: [
{
$match: {
type: 1
}
},
{
$sort: {
type: -1
}
},
{
$skip: 1
},
{
$limit: 2
}
],
counts: [
{
$group: {
"_id": "$type",
cnt: {
$sum: 1
}
}
},
{
$project: {
type: "$_id",
cnt: 1,
_id: 0
}
}
]
}
}
])
Explained:
You add one more stage named "counts" in the faced to group the total count of the two different types.
playground

Sum value when satisfy condition in MongoDB

I am trying to get sum of values when certain condition is satisfied in the document.
In the below query i want to get sum of currentValue only when componentId = "ABC"
db.Pointnext_Activities.aggregate(
{ $project: {
_id: 0,
componentId:1,
currentValue:1
}
},
{ $group:
{ _id: "$componentId",
total: { $sum: "$currentValue" }
}
}
)
Please try this :
db.Pointnext_Activities.aggregate([{ $match: { componentId: 'ABC' } },
{
$group:
{
_id: "$componentId",
total: { $sum: "$currentValue" }
}
}, { $project: { 'componentId': '$_id', total: 1, _id: 0 } }])
If you just need the total value & doesn't care about componentId to be returned try this :
db.Pointnext_Activities.aggregate([{ $match: { componentId: 'ABC' } },
{
$group:
{
_id: "",
total: { $sum: "$currentValue" }
}
}, {$project :{total :1, _id:0}}])
It would be ideal in aggregation, if you always start with filter operation i.e; $match, as it would persist only needed documents for further steps.

Grouping complex filtered records

I only want to see how many person i have with specific block id. I tried grouping 'bot_survey'after filtering the array by block id but i couldn't get correct result. Here is my 2 records
{
date: '2019-06-13',
blocks:[
{
block_id: '5caf27cfcb4b530e4d51bb72',
triggered_by:[
{
person_id: '2342'
},
{
person_id: '436'
}
]
}
]
},
{
date: '2019-06-14',
blocks:[
{
block_id: '5caf27cfcb4b530e4d51bb72',
triggered_by:[
{
person_id: '2342'
},
{
person_id: '965'
}
]
}
]
},
and this is my query
db.getCollection('analytics').aggregate([
{$match: {date: {$lte: '2019-06-13'},"blocks.block_id": '5caf27cfcb4b530e4d51bb72'}},
{
$project: {
date: 1,
bot_survey: {
$filter: {
input: "$blocks",
as: "blocks",
cond: { $eq: [ "$$blocks.block_id", '5caf27cfcb4b530e4d51bb72' ] }
}
}
}
},
{
$group: {
_id: {date: "$bot_survey.triggered_by.person_web_id"},
}
}
])
But i need a result something like
{
person_id: '2342',
person_id: '436',
person_id: '965'
}
so i can get count of results. How can i do that?
Based on your Input data you can do aggregation like,
db.orders.aggregate([
{
$unwind: "$blocks"
},
{
$unwind: "$blocks.triggered_by"
},
{
$group: {
_id: "$blocks.block_id",
triggered_by: { $addToSet:
{person_id:"$blocks.triggered_by.person_id"} }
}
}
])
Output:
{"_id":"5caf27cfcb4b530e4d51bb72","triggered_by":[{"person_id":"2342"},{"person_id":"965"},{"person_id":"436"}]}

Mongo aggregation query to calculate multiple computed values

Given the below documents.
{
_id: 1,
ExpirationDate: ISODate("2017-05-02T09:29:46.006+0000")
}
{
_id: 2,
ExpirationDate: ISODate("2017-05-12T09:29:46.006+0000")
}
{
_id: 3,
ExpirationDate: ISODate("2017-05-23T09:29:46.006+0000")
}
How can I use aggregation pipleline to compute the following output?
{
"NumberOfSubscriptionExpiringToday": 12,
"NumberOfSubscriptionExpiringWithInAWeek": 4
}
I am looking to accomplish this with just one query instead of two. Here is what I have so far...
.aggregate([
{
$match: {
"ExpirationDate": {
$gte: ISODate("2017-05-02T00:00:00.000+0000"),
$lte: ISODate("2017-05-03T00:00:00.000+0000")
}
}
},
{
$project: {
_id: 1
}
},
{
$count: "ExpiringToday"
}
]);
.aggregate([
{
$match: {
"ExpirationDate": {
$gte: ISODate("2017-05-02T00:00:00.000+0000"),
$lte: ISODate("2017-05-08T00:00:00.000+0000")
}
}
},
{
$project: {
_id: 1
}
},
{
$count: "ExpiringInSevenDays"
}
]);
You can do it in single aggregation query with $cond operator to check if each document expiration date falls into [today, tomorrow) range, or in [tomorrow, weekAfterToday) range:
var today = ISODate("2017-05-04T00:00:00.000");
var tomorrow = ISODate("2017-05-05T00:00:00.000");
var weekAfterToday = ISODate("2017-05-11T00:00:00.000");
db.collection.aggregate([
{ $match: { "ExpirationDate": { $gte: today, $lt: weekAfterToday }}},
{
$project: {
ExpiringToday: {
$cond: {
if: {
$and: [
{$gte: ["$ExpirationDate",today]},
{$lt:["$ExpirationDate",tomorrow]}
]
}, then: 1, else: 0
}
},
ExpiringInAWeek: {
$cond: { if: {$gte: ["$ExpirationDate",tomorrow]}, then: 1, else: 0 }
}
}
},
{ $group: {
_id: 1,
NumberOfSubscriptionExpiringToday: {$sum: "$ExpiringToday" },
NumberOfSubscriptionExpiringWithInAWeek: {$sum: "$ExpiringInAWeek" }
}
},
{ $project: { _id: 0 }}
]);
Consider also to make two simple requests:
var numberOfSubscriptionExpiringToday = db.collection.count(
{ "ExpirationDate": { $gte: today, $lt: tomorrow }}
);
var numberOfSubscriptionExpiringWithInAWeek = db.collection.count(
{ "ExpirationDate": { $gte: tomorrow , $lt: weekAfterToday }}
);

Mongodb Aggregate - Count fields that equals value in array, but keep both arrays

I need to calculate the percentage of finalized/total items. The problem I have is calculating how many fields in the array equal to 'finished'. With my current solution I get finished items correctly, but total items are the same number as finished.
This is what I'm doing:
Items.aggregate([
{
$match: {
status: {
$ne: ['cancelled','pending']
}
}
},
{
$group: {
_id: '$person',
items: {
$push: {
total: '$status',
finished: {
$cond: [
{
$eq: ['$status', 'finished']
},
'$status',
null
]
}
}
}
}
},
{
$unwind: '$items'
},
{
$match: {
'items.finished': {
$ne: null
},
}
},
{
$group: {
_id: '$_id',
success: {
$push : '$items.finished'
},
total: {
$push: '$items.total'
}
}
},
{
$project: {
successCount: {
$size: '$success'
},
totalCount: {
$size: '$total'
}
}
},
{
$project: {
successScore: {
$divide: [ "$successCount", "$totalCount"]
}
}
}
]);
I also tried simpler solution, but can't figure how to keep total count field in the loop after doing $unwind
Items.aggregate([
{
$group: {
_id: '$_id',
totalCount: {$sum: 1},
finished: { $cond : [ {$eg: ['status', 'finished']}, $status, null] }
}
},
{ $unwind: '$finished'},
...
Then I can't access totalCount later