mongodb aggregation matching properties - mongodb

I have a user_status collection and its schema is like this
const userStatus = mongoose.model(
'user_status',
new mongoose.Schema(
{
user: {
type: mongoose.Schema.Types.ObjectId,
required: true,
ref: 'user'
},
isActive: {
type: Boolean,
default: false
}
},
{ timestamps: true }
)
);
I need to get all the active users count for a given specific month (1, 2, etc..)
I tried this. But this snippet is not giving expected output
// get all active user count for a specific month
router.post('/report', async (req, res) => {
const selectedMonth = req.body.month;
console.log('month', selectedMonth);
const usersStatus = await UserStatus.aggregate([
{ $project: { month: { $month: '$updatedAt' } } },
{
$match: { $and: [{ month: { $eq: selectedMonth } }, { isActive: true }] }
},
{ $group: { _id: '$user', count: { $sum: 1 } } }
]).exec();
res.status(200).send(usersStatus);
});
Could you please tell me where I'm wrong?

You have vanished the isActive and user field after the first $project stage
You can use below aggregation
const usersStatus = await UserStatus.aggregate([
{ "$addFields": { "month": { "$month": "$updatedAt" } } },
{ "$match": { "month": selectedMonth, "isActive": true }},
{ "$group": { "_id": "$user", "count": { "$sum": 1 } } }
])
Or even more convenient way using $expr
const usersStatus = await UserStatus.aggregate([
{ "$match": {
"$expr": {
"$and": [
{ "$eq": [{ "$month": "$updatedAt" }, selectedMonth] },
{ "$eq": ["$isActive", true] }
]
}
}},
{ "$group": { "_id": "$user", "count": { "$sum": 1 } } }
])

Related

How to improve MongoDB aggregation with group and accumulator stages?

The query below:
match a selection of docs with a match
group by day of month and then using an accumulator add to the returned dataset the URLs and the counts
The problem is that the query is pretty complex and as the data grows in volume this really doesn't seem very performance. Is there is an easier way to achieve the same thing in mongodb?
The output shape looks like:
{
"results": [
{
"_id": {
"month": 2,
"day": 2,
"year": 2021
},
"urls": [
{
"url": "https://shop.mydomain.com/product/golden-axe",
"count": 20
},
{
"url": "https://shop.mydomain.com/product/phantasy-star",
"count": 218
},
{
"url": "https://shop.mydomain.com/product/sega-arcades-retro",
"count": 30
}
],
"count": 268
},
{
"_id": {
"month": 2,
"day": 3,
"year": 2021
},
"urls": [
{
"url": "https://shop.mydomain.com/product/golden-axe",
"count": 109
},
{
"url": "https://shop.mydomain.com/product/phantasy-star",
"count": 416
},
{
"url": "https://shop.mydomain.com/product/sega-arcades-retro",
"count": 109
}
],
"count": 634
},
const aggregate = [
{
$match: {
source: 'itemLineView',
createdAt: {
$gte: new Date(query.dateGT),
$lte: new Date(query.dateLT)
},
url: { $regex: `^${query.url}` },
}
},
{
$group: {
_id: {
month: {
$month: '$createdAt'
},
day: {
$dayOfMonth: '$createdAt'
},
year: {
$year: '$createdAt'
}
},
urls: {
// https://docs.mongodb.com/manual/reference/operator/aggregation/accumulator/#grp._S_accumulator
$accumulator: {
init: function (): AccumulatorSourceStats {
return {
origins: []
};
},
// initArgs: [], // Argument arr to pass to the init function
accumulate: function (state: AccumulatorSourceStats, url: string) {
const index = state.origins.findIndex(function (origin) {
return origin.url === url;
});
if (index === -1) {
state.origins.push({
url: url,
count: 1
});
} else {
++state.origins[index].count;
}
return state;
},
accumulateArgs: ['$url'], // Argument(s) passed to the accumulate function
merge: function (state1: AccumulatorSourceStats, state2: AccumulatorSourceStats) {
return {
origins: state1.origins.concat(state2.origins)
};
},
finalize: function (state: AccumulatorSourceStats) { // Adjust the state to only return field we need
const sortByUrl = function (a: AccumulatorSourceStatsOrigin, b: AccumulatorSourceStatsOrigin) {
if (a.url < b.url) {
return -1;
}
if (a.url > b.url) {
return 1;
}
return 0;
};
return state.origins.sort(sortByUrl);
},
lang: 'js'
}
},
count: { $sum: 1 }
}
},
{ $sort: { _id: 1 } }
];
return this.model.aggregate(aggregate);
From the docs:
Executing JavaScript inside of an aggregation operator may decrease performance. Only use the $accumulator operator if the provided pipeline operators cannot fulfill your application’s needs.
It is considered good practice to avoid using javascript code within a Mongo pipeline, this should be used only as a last resort. And in this case we can avoid using it by just $grouping twice, once per url per day and then once per day. like so:
db.collection.aggregate([
{
$match: {
source: "itemLineView",
createdAt: {
$gte: new Date(query.dateGT),
$lte: new Date(query.dateLT)
},
url: {
$regex: `^${query.url}`
},
}
},
{
$group: {
_id: {
month: {
$month: "$createdAt"
},
day: {
$dayOfMonth: "$createdAt"
},
year: {
$year: "$createdAt"
},
url: "$url"
},
count: {
$sum: 1
}
}
},
{
$group: {
_id: {
month: "$_id.month",
day: "$_id.day",
year: "$_id.year",
},
urls: {
$push: {
url: "$_id.url",
count: "$count"
}
},
count: {
$sum: "$count"
}
}
}
])
Mongo Playground

Mongoose subquery

I have a collection that looks like below:
[
{
"orderNum": "100",
"createdTime": ISODate("2020-12-01T21:00:00.000Z"),
"amount": 100,
"memo": "100memo",
"list": [
1
]
},
{
"orderNum": "200",
"createdTime": ISODate("2020-12-01T21:01:00.000Z"),
"amount": 200,
"memo": "200memo",
"list": [
1,
2
]
},
{
"orderNum": "300",
"createdTime": ISODate("2020-12-01T21:02:00.000Z"),
"amount": 300,
"memo": "300memo"
},
{
"orderNum": "400",
"createdTime": ISODate("2020-12-01T21:03:00.000Z"),
"amount": 400,
"memo": "400memo"
},
]
and I'm trying to get the total amount of orders that were created before order# 300 (so order#100 and #200, total amount is 300).
Does anyone know how to get it via Mongoose?
You can use this one:
db.collection.aggregate([
{ $sort: { orderNum: 1 } }, // by default the order of documents in a collection is undetermined
{ $group: { _id: null, data: { $push: "$$ROOT" } } }, // put all documents into one document
{ $set: { data: { $slice: ["$data", { $indexOfArray: ["$data.orderNum", "300"] }] } } }, // cut desired elementes from array
{ $unwind: "$data" }, // transform back to documents
{ $replaceRoot: { newRoot: "$data" } },
{ $group: { _id: null, total_amount: { $sum: "$amount" } } } // make summary
])
Actually it is not needed to $unwind and $group, so the shortcut would be this:
db.collection.aggregate([
{ $sort: { orderNum: 1 } },
{ $group: { _id: null, data: { $push: "$$ROOT" } } },
{ $set: { data: { $slice: ["$data", { $indexOfArray: ["$data.orderNum", "300"] }] } } },
{ $project: { total_amount: { $sum: "$data.amount" } } }
])
But the answer from #turivishal is even better.
Update for additional field
{
$set: {
data: { $slice: ["$data", { $indexOfArray: ["$data.orderNum", "300"] }] },
memo: { $arrayElemAt: [ "$data.memo", { $indexOfArray: ["$data.orderNum", "300"] } ] }
}
}
or
{ $set: { data: { $slice: ["$data", { $indexOfArray: ["$data.orderNum", "300"] }] } } },
{ $set: { memo: { $last: { "$data.memo" } } },
$match orderNum less than 300
$group by null and get totalAmount using $sum of amount
YourSchemaModel.aggregate([
{ $match: { orderNum: { $lt: "300" } } },
{
$group: {
_id: null,
totalAmount: { $sum: "$amount" }
}
}
])
Playground

Do an aggregate with a populate

I'm having troubles with the following. I wonder if it's possible to do it with a single query.
So I have the following model :
const Analytics = new Schema({
createdAt: {
type: Date,
default: Moment(new Date()).format('YYYY-MM-DD')
},
loginTrack: [
{
user_id: {
type: mongoose.Schema.Types.ObjectId,
ref: 'Users',
}
}
]
}, { collection: 'analytics' });
And the user model :
const UserSchema = new mongoose.Schema(
{
nickname: {
type: String,
required: true,
unique: true
},
instance: {
type: mongoose.Schema.Types.ObjectId,
ref: 'Instances',
default: null
}}, {collection: 'users'});
I want to get the connected users for a specific instance at a specific date.
AnalyticsModel.aggregate([
{
$match: {
createdAt: { "$gte": moment(args.startDate).format('YYYY-MM-DD'), "$lt": moment(args.endDate).format('YYYY-MM-DD')}
}
},
{
"$project": {
users: { $size: "$loginTrack" },
"createdAt": 1,
"_id": 0
}
}, {
"$group": {
"_id": "$createdAt",
"count": { "$sum": "$users" }
}
}
This gets me
[ { _id: '2019-02-11', count: 3 },
{ _id: '2019-02-08', count: 6 },
{ _id: '2019-02-07', count: 19 },
{ _id: '2019-02-06', count: 16 } ]
The results expected will be the same but I want to filter on users that belongs to a specific instance
Is it possible to do it with a single query or I need to do a populate first before the aggregation ?
UPDATE
I did some progress on it, I needed to add a lookup and I think it's ok :
AnalyticsModel.aggregate([
{"$unwind": "$loginTrack"},
{
$lookup:
{
from: 'users',
localField:'loginTrack.user_id',
foreignField: '_id',
as: '_users'
}
},
{
$match: {
createdAt: { "$gte": new Date(args.startDate), "$lt": new Date(args.endDate)}
}
},
{
$project: {
_users: {
$filter: {
input: '$_users',
as: 'item',
cond: {
$and: [
{ $eq: ["$$item.instance", new ObjectId(args.instance_id)] }
]
}
}
},
"createdAt": 1,
"_id": 0
}
},
{
"$group": {
"_id": "$createdAt",
"count": { "$sum": { "$size": "$_users" } }
}
}
Also the dates were in string in the model.
The output is now :
[ { _id: 2019-02-11T00:00:00.000Z, count: 2 } ]

Returning a document with two fields from the same array in MongoDB

Given documents such as
{
_id: 'abcd',
userId: '12345',
activities: [
{ status: 'login', timestamp: '10000001' },
{ status: 'logout', timestamp: '10000002' },
{ status: 'login', timestamp: '10000003' },
{ status: 'logout', timestamp: '10000004' },
]
}
I am trying to create a pipeline such as all users that have their latest login/logout activities recorded between two timestamps will be returned. For example, if the two timestamp values are between 10000002 and 10000003, the expected document should be
{
_id: 'abcd',
userId: '12345',
login: '10000003',
logout: '10000002'
}
Of if the two timestamp values are between -1 and 10000001, the expected document should be :
{
_id: 'abcd',
userId: '12345',
login: '10000001',
logout: null
}
Etc.
I know it has to do with aggregations, and I need to $unwind, etc., but I'm not sure about the rest, namely evaluating two fields from the same document array
You can try below aggregation:
db.col.aggregate([
{
$unwind: "$activities"
},
{
$match: {
$and: [
{ "activities.timestamp": { $gte: "10000001" } },
{ "activities.timestamp": { $lte: "10000002" } }
]
}
},
{
$sort: {
"activities.timestamp": -1
}
},
{
$group: {
_id: "$_id",
userId: { $first: "$userId" },
activities: { $push: "$activities" }
}
},
{
$addFields: {
login: { $arrayElemAt: [ { $filter: { input: "$activities", as: "a", cond: { $eq: [ "$$a.status", "login" ] } } } , 0 ] },
logout: { $arrayElemAt: [ { $filter: { input: "$activities", as: "a", cond: { $eq: [ "$$a.status", "logout" ] } } } , 0 ] }
}
},
{
$project: {
_id: 1,
userId: 1,
login: { $ifNull: [ "$login.timestamp", null ] },
logout: { $ifNull: [ "$logout.timestamp", null ] }
}
}
])
We need to use $unwind + $sort + $group to make sure that our activities will be sorted by timestamp. After $unwind you can use $match to apply filtering condition. Then you can use $filter with $arrayElemAt to get first (latest) value of filtered array. In the last $project you can explicitly use $ifNull (otherwise JSON key will be skipped if there's no value)
You can use below aggregation
Instead of $unwind use $lte and $gte with the $fitler aggregation.
db.collection.aggregate([
{ "$project": {
"userId": 1,
"login": {
"$max": {
"$filter": {
"input": "$activities",
"cond": {
"$and": [
{ "$gte": ["$$this.timestamp", "10000001"] },
{ "$lte": ["$$this.timestamp", "10000004"] },
{ "$lte": ["$$this.status", "login"] }
]
}
}
}
},
"logout": {
"$max": {
"$filter": {
"input": "$activities",
"cond": {
"$and": [
{ "$gte": ["$$this.timestamp", "10000001"] },
{ "$lte": ["$$this.timestamp", "10000004"] },
{ "$lte": ["$$this.status", "logout"] }
]
}
}
}
}
}}
])

Exclude user in match MongoDB

I want to add Not in query. but unfortunately I'm not getting the required result. i want to get result where user is not equal to userid. But I'm confused as to how I can add that. I tried multiple scenarios but failed.
server.get('/myFeedback', (req, res) => {
var userid = req.query.userID;
//console.log(req.query);
db.collection("tweetsWithSentimentFeedback").aggregate( [
{
$group: {
_id: {
topic: "$topic",
group : "$group",
type : "$type",
user : "$userName"
},
count: { $sum: 1 }
}
},{ $group: {
_id: {
topic: "$_id.topic",
group : "$_id.group",
},
typeAndCount: {
$addToSet: {
type: "$_id.type",
count: "$count"
}
},
userName: {
$addToSet: {
user: "$_id.userName"
}
},
totalCount: {
$sum: "$count"
}
}
},
{ $match: { $and: [ { totalCount: { $gt: 0, $lt: 15 } }, {userEqual: { $ne: [ "$userName.user", userid ] }} ] } },
// Then sort
{ "$sort": { "totalCount": -1 } }
], (err, result) => {
if (err) {
console.log(err);
}
res.status(200).send(result);
} );
});
You should add a $match as a first stage to filter the user.
{ $match: { userName: { $ne: userid } } }
Update:
db.collection("tweetsWithSentimentFeedback").aggregate(
[{
$group: {
_id: {
topic: "$topic",
group: "$group",
type: "$type",
user: "$userName"
},
count: {
$sum: 1
}
}
}, {
$group: {
_id: {
topic: "$_id.topic",
group: "$_id.group"
},
typeAndCount: {
$addToSet: {
type: "$_id.type",
count: "$count"
}
},
userName: {
$addToSet: "$_id.userName"
},
totalCount: {
$sum: "$count"
}
}
}, {
$match: {
{
totalCount: {
$gt: 0,
$lt: 15
}
},
{
userName: {
$ne: userid
}
}
}
}, , {
$sort: {
totalCount: -1
}
}])