MongoDB Group By query - mongodb

How I can implement on MongoDb this SQL Query
SELECT TOP 100 * FROM Tracks
WHERE ID IN (SELECT MAX(ID) FROM Tracks WHERE UserID IN ([UserIDs...]) GROUP BY UserID)
Tracks structure:
Tracks[{_id, userId, {lat, lon}, dateCreate, ...}, ...]
Thanks!

You'd want to use the aggregation framework for this:
db.Tracks.aggregate( [
{ $match: { 'UserID': { $in: [ UserIDs ] } } },
{ $group: { _id: '$UserID', max: { $max: '$_id' } },
{ $sort: { $max: -1 } },
{ $limit: 100 }
] );
First we match against the wanted UserIDs, then we group depending on UserID and also put the maximum _id value in the new max field. Then we sort by max descendently to get the highest max numbers first and then we limit by the top 100.

Related

Is there a way to write a nested query in mongoDB?

I an new to mongoDB and I am trying to achieve below SQL query equivalent in mongoDB
SELECT ROUND((SELECT COUNT() FROM INFODOCS WHERE ML_PRIORITY = HIGH AND PROCESSOR_ID = userid)
/ (SELECT COUNT() FROM INFODOCS WHERE PROCESSOR_ID = userid) * 100)
AS EFFORTS FROM DUMMY;
EFFORTS = Total High Priority Infodocs / Total Infodocs for a given Processor
I tried to write an aggregation pipeline using $match, $group, $count but the issue is once I get an output for one subquery i did not find anyway how can i compute another subquery and finally use the outputs of both subquery to determine the final result.
The mongo-y way would not to execute two different queries to get the 2 different counts, but to do a sum it dynamically with one query.
You can achieve this in many different ways, here is an example how to use $cond while $grouping to do a conditional sum.
db.collection.aggregate([
{
$match: {
PROCESSOR_ID: "1"
},
},
{
$group: {
_id: null,
totalCount: {
$sum: 1
},
priorityHighCount: {
$sum: {
$cond: [
{
$eq: [
"$ML_PRIORITY",
"HIGH"
]
},
1,
0
]
}
}
}
},
{
$project: {
EFFORTS: {
$round: {
"$multiply": [
{
$divide: [
"$priorityHighCount",
"$totalCount"
]
},
100
]
}
}
}
}
])

How to combine these queries into mongo db aggregation

I have 2 sql queries . These 2 queries results should be combined within one mongodb aggregation.
1st Query:
SELECT id,sum(DiscCount) as UTVCount from (SELECT edu.dispositionBy as id ,count() as DiscCount. FROM `HRC_Education` edu WHERE edu.`RecommendedDisposition` = 'Edu.Disposition.UTV' AND edu.dispositionBy in "users" AND date(edu.DispositionDate) between "2022-01-12T00:00:00.0Z" AND "2022-01-23T00:00:00.0Z" group by id)
union
(SELECT emp.dispositionBy as id ,count() as DiscCount FROM `HRC_Employment` emp WHERE emp.`RecommendedDisposition` = 'Emp.Disposition.UTV' AND emp.dispositionBy in "users" AND date(emp.DispositionDate) between "2022-01-12T00:00:00.0Z" AND "2022-01-23T00:00:00.0Z" group by id)
2nd query:
SELECT id,sum(DiscCount) as UTVCount from (SELECT edu.dispositionBy as id ,count() as DiscCount. FROM `HRC_Education` edu WHERE edu.`RecommendedDisposition` = 'Edu.Disposition.UTV' AND date(edu.DispositionDate) between "2022-01-12T00:00:00.0Z" AND "2022-01-23T00:00:00.0Z" group by id)
union
(SELECT emp.dispositionBy as id ,count() as DiscCount FROM `HRC_Employment` emp WHERE emp.`RecommendedDisposition` = 'Emp.Disposition.UTV' AND date(emp.DispositionDate) between "2022-01-12T00:00:00.0Z" AND "2022-01-23T00:00:00.0Z" group by id)
My Mongo Query:
const result = HRC_Education
.aggregate([
{
$match: {
DispositionDate: { $gte: new Date("2022-01-12T00:00:00.0Z"), $lt: new Date("2022-01-23T00:00:00.0Z") },
DispositionBy: { $eq: 'users' },
RecommendedDisposition: { $eq: 'Edu.Disposition.UTV' }
}
},
{
$unionWith: {
coll: "HRC_Employment",
pipeline: [
{
$match: {
DispositionDate: { $gte: new Date("2022-01-12T00:00:00.0Z"), $lt: new Date("2022-01-23T00:00:00.0Z") },
DispositionBy: { $eq: 'users' },
RecommendedDisposition: { $eq: 'Emp.Disposition.UTV' }
}
}
]
}
},
{
$group: {
_id: {
id: "$DispositionBy"
},
total: { $sum: 1 }
}
},
{
$project: {
_id:0,
id: "$_id.id",
UTVCount: "$total"
}
}
])
Sample records for HRC_Education:
[{DispositionDate: 2022-01-14T00:00:00.0Z,DispositionBy:'users',RecommendedDisposition: 'Edu.Disposition.UTV' }]
Sample records for HRC_Employment:
[{DispositionDate: 2022-01-14T00:00:00.0Z,DispositionBy:'users',RecommendedDisposition: 'Emp.Disposition.UTV' }]
With my mongo Query, I am able to convert either 1st query result or 2nd second result but not both. Can anyone please help me on this to combine these both sql queries into one mongo aggregation.

How to count total customers using MongoDB?

Here is the example of the JSON. I want to count the total customers by unique "email_address" and filter by "transactions_status" equal to "S".
Expected output:
{
_id: null,
count: total amount of customers with an unique email address and filtered by status equal S
}
You can try this aggregations pipeline:
Filter by "transactions_status" equal to "S".
Group distinct emails.
Project the count field with the emails's size.
db.collection.aggregate([
{
$match: {
"transaction_info.transaction_status": "S"
}
},
{
$group: {
_id: null,
emails: {
$addToSet: "$payer_info.email_address"
}
}
},
{
$project: {
count: {
$size: "$emails"
}
}
}
]);

SQL to Mongo Aggregation

Hi I want to change my sql query to mongo aggregation.
select c.year, c.minor_category, count(c.minor_category) from Crime as c
group by c.year, c.minor_category having c.minor_category = (
Select cc.minor_category from Crime as cc where cc.year=c.year group by
cc.minor_category order by count(*) desc, cc.minor_category limit 1)
I tried do something like this:
db.crimes.aggregate({
$group: {
"_id": {
year: "$year",
minor_category :"$minor_category",
count: {$sum: "$minor_category"}
}
},
},
{
$match : {
minor_category: ?
}
})
But i stuck in $match which is equivalent to having, but i dont know how to make subqueries in mongo like in my sql query.
Can anybody can help me ?
Ok based on the confirmation above , the below query should work.
db.crime.aggregate
([
{"$group":{"_id":{"year":"$year","minor":"$minor"},"count":{"$sum":1}}},
{"$project":{"year":"$_id.year","count":"$count","minor":"$_id.minor","document":"$$ROOT"}},
{"$sort":{"year":1,"count":-1}},
{"$group":{"_id":{"year":"$year"},"orig":{"$first":"$document"}}},
{"$project":{"_id":0,"year":"$orig._id.year","minor":"$orig._id.minor","count":"$orig.count"}}
)]
This translates into the following MongoDB query:
db.crime.aggregate({
$group: { // group by year and minor_catetory
_id: {
"year": "$year",
"minor_category": "$minor_category"
},
"count": { $sum: 1 }, // count all documents per group,
}
}, {
$sort: {
"count": -1, // sort descending by count
"minor_category": 1 // and ascending by minor_category
}
}, {
$group: { // now we get the highst element per year
_id: "$_id.year", // so group by year
"minor_category": { $first: "$_id.minor_category" }, // and get the first (we've sorted the data) value
"count": { $first: "$count" } // same here
}
}, {
$project: { // remove the _id field and add the others in the right order (if needed)
"_id": 0,
"year": "$_id",
"minor_category": "$minor_category",
"count": "$count"
}
})

Best usage for MongoDB Aggregate request

I would like to highlight a list of _id documents (with a limit) ranked in descending order (via their timestamp) based on a list of ObjectId.
Corresponding to this:
db.collection.aggregate( [ { $match: { _id: { $in: [ObjectId("X"), ObjectId("Y") ] } } }, { $sort: { timestamp: -1 } }, { $group: { _id: "$_id" } }, { $skip: 0 }, { $limit: 100 } ] )
Knowing that the list from the loop may contain way more than 1000 ObjectId (in $in array), do you think my solution is viable? Is not there a faster and less resource intensive way?
Best Regards.