Mongo Db query to get distinct records - mongodb

I have below collections in DB around 1 million records. Hpw to get distinct eventID and eventName
from the collections in D for any particular date like 29-07-2020?
{
"_id" : 1814099,
"eventId" : "LAS012",
"eventName" : "CustomerTab",
"timeStamp" : ISODate("2018-12-31T20:09:09.820Z"),
"eventMethod" : "click",
"resourceName" : "CustomerTab",
"targetType" : "",
"resourseUrl" : "",
"operationName" : "",
"functionStatus" : "",
"results" : "",
"pageId" : "CustomerPage",
"ban" : "290824901",
"jobId" : "87377713",
"wrid" : "87377713",
"jobType" : "IBJ7FXXS",
"Uid" : "sc343x",
"techRegion" : "W",
"mgmtReportingFunction" : "N",
"recordPublishIndicator" : "Y",
"__v" : 0
}

You can use distinct, for example to fetch unique eventID:
let eventIds = await db.collection.distinct('eventID', {
"timeStamp": {
$gte: ISODate("2018-12-30T00:00:00.000Z"),
$lt: ISODate("2018-12-31T00:00:00.000Z")
}
})
If you want to retrieve both fields at the same time you'll have to use an aggregation:
db.collection.aggregate([
{
$match: {
"timeStamp": {
$gte: ISODate("2018-12-30T00:00:00.000Z"),
$lt: ISODate("2018-12-31T00:00:00.000Z")
}
}
},
{
$facet: {
eventIds: [
{
$group: {
_id: "$eventID"
}
}
],
eventName: [
{
$group: {
_id: "$eventName"
}
}
]
}
}
])
And if eventID and eventName are linked to one another:
db.collection.aggregate([
{
$match: {
"timeStamp": {
$gte: ISODate("2018-12-30T00:00:00.000Z"),
$lt: ISODate("2018-12-31T00:00:00.000Z")
}
}
},
{
$group: {
_id: {eventID: "$eventID", eventName: "$eventName"}
}
}
])

Related

Aggregate Query geting count of most recent element from nest subdocuments

I have a mongodb database with many users and one of the subdocuments I track is file uploads and their statuses through a review process. Every file upload will have an attachment status eventually. I want to be able to pull some metrics to get the total of the current statuses for each uploaded file. I started building an aggregate query that pulls the latest attachment subdocument status from each file uploaded and count them.
The data structure is as follows:
"userName": "johnDoe",
"email": "johnDoe#gmail.com",
"uploads" : [
{
"_id" : ObjectId("adh12451e0012ce9da0"),
"fileName" : "TestDoc.txt",
"fileType" : "text/plain",
"created" : ISODate("2021-01-06T15:26:14.166Z"),
"attachmentStatus" : [ ]
},
{
"_id" : ObjectId("5ff5d6c066cacc0012ed655a"),
"fileName" : "testerABC.txt",
"fileType" : "text/plain",
"created" : ISODate("2021-01-06T15:26:56.027Z"),
"attachmentStatus" : [
{
"_id" : ObjectId("60884f733f88bd00129b9ad4"),
"status" : "Uploaded",
"date" : ISODate("2021-04-22T02:23:00Z")
},
{
"_id" : ObjectId("60884f733f88bd00129b9ad5"),
"status" : "Processing",
"date" : ISODate("2021-04-26T04:54:00Z")
}
]
},
{
"_id" : ObjectId("6075c82a19fdcc0012f81907"),
"fileName" : "Test file.docx",
"fileType" : "application/word",
"created" : ISODate("2021-04-13T16:34:50.955Z"),
"attachmentStatus" : [
{
"_id" : ObjectId("72844f733f88bd11479b9ad7"),
"status" : "Uploaded",
"date" : ISODate("2021-04-23T03:42:00Z")
},
{
"_id" : ObjectId("724986d73f88bd00147c9wt8"),
"status" : "Completed",
"date" : ISODate("2021-04-24T01:37:00Z")
}
]
}
]
"userName": "janeDoe",
"email": "janeDoe#gmail.com",
"uploads" : [
{
"_id" : ObjectId("ej9784652h0012ce9da0"),
"fileName" : "myResume.txt",
"fileType" : "text/plain",
"created" : ISODate("2021-02-13T12:36:14.166Z"),
"attachmentStatus" : [
{
"_id" : ObjectId("15dhdf6f88bd00147c9wt8"),
"status" : "Completed",
"date" : ISODate("2021-04-24T01:37:00Z")
}
]
},
How can I pull the latest attachment status out for each file uploaded and then summarize the statuses?
I want something like this:
{ "status" : "Completed", "Count" : 2 }
{ "status" : "Processing", "Count" : 1 }
...
I get very close with this Aggregate query, but it will grab each and every status and not just the the single most current Status for each file. (one current status per file).
db.myDB.aggregate([
{
"$match" : {
"uploads.attachmentStatus": {
"$elemMatch": { "status": { "$exists": true } }
}
}
},
{ $unwind: "$uploads"},
{ $unwind: "$uploads.attachmentStatus"},
{
$sortByCount: "$uploads.attachmentStatus.status"
},
{
$project: {
_id:0,
status: "$_id",
Count: "$count"
}
}
]).pretty();
Any suggestions?
Demo - https://mongoplayground.net/p/zzOR9qhqny0
{ $sort: { "uploads.attachmentStatus.date": -1 } }, to get the latest 1st
{ $group: { _id: "$uploads._id", status: { $first: "$uploads.attachmentStatus.status" } } } Group the records by uploads._id and take the top status (which is the latest status after the sort by date).
Query
{ $sort: { "uploads.attachmentStatus.date": -1 } },
{ $group: { _id: "$uploads._id", status: { $first: "$uploads.attachmentStatus.status" } } },
Complete query
db.collection.aggregate([
{ $match: { "uploads.attachmentStatus": { "$elemMatch": { "status": { "$exists": true } } } } },
{ $unwind: "$uploads" },
{ $unwind: "$uploads.attachmentStatus" },
{ $sort: { "uploads.attachmentStatus.date": -1 } },
{ $group: { _id: "$uploads._id", status: { $first: "$uploads.attachmentStatus.status" } } },
{ $sortByCount: "$status" },
{ $project: { _id: 0, status: "$_id", Count: "$count" } }
])

I'm having trouble with a mongodb function that should be finding duplicates

I have the following function that is supposed to be returning the _id value of duplicates based on the email key. However, no matter what I've tried, I can't get the function to return anything other than any empty object. What am I missing here? Or is there a better approach I should be considering?
var duplicates = [];
db.medicallists
.aggregate([
{
$group: {
_id: {
email: "$email"
},
duplicate_ids: { $addToSet: "$_id" },
count: { $sum: 1 }
}
},
{
$match: {
count: { $gt: 1 }
}
},
], { allowDiskUse: true })
.forEach(function(doc) {
doc.duplicate_ids.shift(); // First element skipped for deleting
doc.duplicate_ids.forEach(function(dupId) {
duplicates.push(dupId); // Getting all duplicate ids
});
});
printjson(duplicates);
EDIT:
Here is a sample document:
{
_id : 5a2fed0c8023cf7ea2346067,
primary_spec : "Addiction Medicine",
first_name : "John",
last_name : "Sample",
city : "Las Vegas",
state : "NV",
phone : "1111111111",
fax : "1111111111",
email : "sample#aol.com"
}
I have tested your specific query on a similar data set and it works fine. printjson(duplicates); prints out all the duplicate ids.
Also this will, in fact, remove all the duplicate entries based on email:
db.collection.aggregate([
{
$group: {
_id: {
email: "$email"
},
duplicate_ids: {
$push: "$_id"
},
count: {
$sum: 1
}
}
},
{
$match: {
count: {
$gt: 1
}
}
},
]).forEach(function(doc){
doc.duplicate_ids.shift();
db.collection.remove({
_id: {
$in: doc.duplicate_ids
}
});
})
My starting set was:
{
"_id" : ObjectId("6014331de1ef9ab1f708ddd9"),
"item" : "card",
"email" : "zzz#yahoo.com"
}
{
"_id" : ObjectId("6014331de1ef9ab1f708ddda"),
"item" : "card",
"email" : "eee#yahoo.com"
}
{
"_id" : ObjectId("6014331de1ef9ab1f708dddb"),
"item" : "card",
"email" : "zzz#yahoo.com"
}
{
"_id" : ObjectId("6014331de1ef9ab1f708dddc"),
"item" : "card",
"email" : "aaa#yahoo.com"
}
After running the query, it turned to:
{
"_id" : ObjectId("6014331de1ef9ab1f708ddd9"),
"item" : "card",
"email" : "zzz#yahoo.com"
}
{
"_id" : ObjectId("6014331de1ef9ab1f708ddda"),
"item" : "card",
"email" : "eee#yahoo.com"
}
{
"_id" : ObjectId("6014331de1ef9ab1f708dddc"),
"item" : "card",
"email" : "aaa#yahoo.com"
}
Tested on MongoDB server version: 4.2.7

How to $setDifference in array & Object using Mongo DB

UserDetails
{
"_id" : "5c23536f807caa1bec00e79b",
"UID" : "1",
"name" : "A",
},
{
"_id" : "5c23536f807caa1bec00e78b",
"UID" : "2",
"name" : "B",
},
{
"_id" : "5c23536f807caa1bec00e90",
"UID" : "3",
"name" : "C"
}
UserProducts
{
"_id" : "5c23536f807caa1bec00e79c",
"UPID" : "100",
"UID" : "1",
"status" : "A"
},
{
"_id" : "5c23536f807caa1bec00e79c",
"UPID" : "200",
"UID" : "2",
"status" : "A"
},
{
"_id" : "5c23536f807caa1bec00e52c",
"UPID" : "300",
"UID" : "3",
"status" : "A"
}
Groups
{
"_id" : "5bb20d7556db6915846da55f",
"members" : {
"regularStudent" : [
"200" // UPID
],
}
},
{
"_id" : "5bb20d7556db69158468878",
"members" : {
"regularStudent" : {
"0" : "100" // UPID
}
}
}
Step 1
I have to take UID from UserDetails check with UserProducts then take UPID from UserProducts
Step 2
we have to check this UPID mapped to Groups collection or not ?.
members.regularStudent we are mapped UPID
Step 3
Suppose UPID not mapped means i want to print the UPID from from UserProducts
I have tried but couldn't complete this, kindly help me out on this.
Expected Output:
["300"]
Note: Expected Output is ["300"] , because UserProducts having UPID 100 & 200 but Groups collection mapped only 100& 200.
My Code
var queryResult = db.UserDetails.aggregate(
{
$lookup: {
from: "UserProducts",
localField: "UID",
foreignField: "UID",
as: "userProduct"
}
},
{ $unwind: "$userProduct" },
{ "$match": { "userProduct.status": "A" } },
{
"$project": { "_id" : 0, "userProduct.UPID" : 1 }
},
{
$group: {
_id: null,
userProductUPIDs: { $addToSet: "$userProduct.UPID" }
}
});
let userProductUPIDs = queryResult.toArray()[0].userProductUPIDs;
db.Groups.aggregate([
{
$unwind: "$members.regularStudent"
},
{
$group: {
_id: null,
UPIDs: { $addToSet: "$members.regularStudent" }
}
},
{
$project: {
members: {
$setDifference: [ userProductUPIDs , "$UPIDs" ]
},
_id : 0
}
}
])
My Output
{
"members" : [
"300",
"100"
]
}
You need to fix that second aggregation and get all UPIDs as an array. To achieve that you can use $cond and based on $type either return an array or use $objectToArray to run the conversion, try:
db.Groups.aggregate([
{
$project: {
students: {
$cond: [
{ $eq: [ { $type: "$members.regularStudent" }, "array" ] },
"$members.regularStudent",
{ $map: { input: { "$objectToArray": "$members.regularStudent" }, as: "x", in: "$$x.v" } }
]
}
}
},
{
$unwind: "$students"
},
{
$group: {
_id: null,
UPIDs: { $addToSet: "$students" }
}
},
{
$project: {
members: {
$setDifference: [ userProductUPIDs , "$UPIDs" ]
},
_id : 0
}
}
])

Get record of another field with aggregate

I am new in MongoDB world.
I've following data in my collection
{
"_id" : ObjectId("5735d8d4d147aa34e440988f"),
"DeviceLogId" : "26962",
"DeviceId" : "10",
"UserId" : "78",
"LogDateTime" : ISODate("2016-05-12T07:52:44.000+0000")
}
{
"_id" : ObjectId("5735d8d4d147aa34e4409890"),
"DeviceLogId" : "26963",
"DeviceId" : "10",
"UserId" : "342",
"LogDateTime" : ISODate("2016-05-12T07:54:09.000+0000")
}
{
"_id" : ObjectId("5735d8d4d147aa34e4409891"),
"DeviceLogId" : "26964",
"DeviceId" : "10",
"UserId" : "342",
"LogDateTime" : ISODate("2016-05-12T07:54:10.000+0000")
}
{
"_id" : ObjectId("5735d8d4d147aa34e4409892"),
"DeviceLogId" : "26965",
"DeviceId" : "10",
"UserId" : "78",
"LogDateTime" : ISODate("2016-05-12T07:54:27.000+0000")
}
I want to query DeviceId of each user with maximum LogDateTime using group by.
I've written group by query like below but have no idea how would I get DeviceLogId for each record.
collectionName.aggregate(
[{
$match: { LogDateTime: { $gt: todaysDateStart, $lt: todayDateEnd } }
}, {
$group: {
_id: "$UserId",
maxPunchTime: { $max: { $add: [ "$LogDateTime", 330*60000 ] } },
}
}])
In MSSQL, I could easily do it with nested query but I've no idea how would I achieve that in MongoDB.
Thanks in advance.
Use the $addToSet Group Accumulator:
collectionName.aggregate(
[{
$match: { LogDateTime: { $gt: todaysDateStart, $lt: todayDateEnd } }
}
, {
$group: {
_id: "$UserId",
maxPunchTime: { $max: { $add: [ "$LogDateTime", 330*60000 ] } },
deviceLogIds:{$addToSet: "$DeviceLogId"} //<----
}
} ,
{ $sort: {"maxPunchTime" : -1} } , {$limit : 1} //Sort Descending + Limit to 1
])
Add deviceid to an array in group phase,
Device:{$addToSet:deviceId}

Query in Array with dates

I'm trying to do a query(In MongoDB) in array("availability") that will return a only hotel that have the element("available") equals 1 and between the dates inside the availability.
But the query return all hotels when the correct return is "Mercato Hotel"
Query that i have used without success:
{city: "Boston", availability: { $elemMatch: {availability: 1, date: {$gte: ISODate("2015-05-02T00:00:00.000+0000")}, date: {$lte: ISODate("2015-05-04T00:00:00.000+0000")}}}}
Json in MongoDb:
{
"_id" : ObjectId("55b302ee8debdf1a908cdc85"),
"city" : "Boston",
"hotel" : "Mercatto Hotel",
"availability" : [
{
"date" : ISODate("2015-05-01T00:00:00.000+0000"),
"available" : NumberInt(0)
},
{
"date" : ISODate("2015-05-02T00:00:00.000+0000"),
"available" : NumberInt(0)
},
{
"date" : ISODate("2015-05-03T00:00:00.000+0000"),
"available" : NumberInt(0)
},
{
"date" : ISODate("2015-05-04T00:00:00.000+0000"),
"available" : NumberInt(1)
}
]
}
{
"_id" : ObjectId("55b302ee8debdf1a908cdc89"),
"city" : "Boston",
"hotel" : "Hostel Villa",
"availability" : [
{
"date" : ISODate("2015-05-01T00:00:00.000+0000"),
"available" : NumberInt(1)
},
{
"date" : ISODate("2015-05-02T00:00:00.000+0000"),
"available" : NumberInt(0)
},
{
"date" : ISODate("2015-05-03T00:00:00.000+0000"),
"available" : NumberInt(0)
},
{
"date: ISODate("2015-05-04T00:00:00.000+0000"),
"available" : NumberInt(0)
}
]
}
Someone can help me?
Thanks...
You have got a typo in your query, availability instead of available, should be this:
{
city: "Boston",
availability: {
$elemMatch: {
available: 1,
date: {
$gte: ISODate("2015-05-02T00:00:00.000+0000"),
$lte: ISODate("2015-05-04T00:00:00.000+0000")
}
}
}
}
UPDATE with Blakes Seven
If you want to get only the element of availability array that matches your query, add projection:
{
"city": 1,
"hotel": 1
"availability.$.date": 1
}
The query:
{
city: "Boston",
availability: {
$elemMatch: {
available: 1,
date: {
$gte: ISODate("2015-05-02T00:00:00.000+0000"),
$lte: ISODate("2015-05-04T00:00:00.000+0000")
}
}
}
}
Returned the same result. In other words, returned all hotels when the correct return is "Mercato Hotel".
You can use aggregation to get expected output as following:
db.collection.aggregate({
$unwind: "$availability"
}, {
$match: {
"city": "Boston",
"availability.available": 1,
"availability.date": {
$gte: ISODate("2015-05-02T00:00:00.000+0000"),
$lte: ISODate("2015-05-04T00:00:00.000+0000")
}
}
})
Edit
If there are multiple available=1 then use following query:
db.collection.aggregate({
$unwind: "$availability"
}, {
$match: {
"city": "Boston",
"availability.available": 1,
"availability.date": {
$gte: ISODate("2015-05-02T00:00:00.000+0000"),
$lte: ISODate("2015-05-04T00:00:00.000+0000")
}
}
}, {
$group: {
_id: "$hotel",
"city": {
$first: "$city"
},
"availability": {
$push: "$availability"
}
}
})