How to find distinct values of fields using facet operation in mongodb - mongodb

The filteredAccording part and the categorizedBy is working as expected using the query which I provided in the link but I am facing issues in the findDistinct part.
In mongodb I have the following data:
{
"_id": 10001,
"university": "SPYU",
"Courses": [
"English",
"French"
],
"dept": [
"Literature"
],
"type": [
"Autonomous"
],
"status": "ACTIVE",
"isMarked": true
},
{
"_id": 10002,
"university": "SPYU",
"Courses": [
"English",
"French"
],
"dept": [
"Literature"
],
"type": [
"Autonomous"
],
"status": "NON-ACTIVE",
"isMarked": true
}
I wanted the response to be:
"university": [
{
"name": "Literature",
"values": [
{
"_id": 10001,
"university": "SPYU",
"Courses": [
"English",
"French"
],
"dept": [
"Literature"
],
"type": [
"Autonomous"
],
"status": "ACTIVE",
"isMarked": true
},
{
"_id": 10002,
"university": "SPYU",
"Courses": [
"English",
"French"
],
"dept": [
"Literature"
],
"type": [
"Autonomous"
],
"status": "NON-ACTIVE",
"isMarked": true
}
]
}
],
"findDistinct": [
{​​​​​​​​
"name": "Courses",
"values": [
"English",
"French"
]
}​​​​​​​​,
{​​​​​​​​
"name": "Status",
"values": [
"ACTIVE",
"NON-ACTIVE"
]
}​​​​​​​​
]
I tried it using this link but the response is not coming as expected.
https://mongoplayground.net/p/XECZvRMmt3T
Right now, the response is coming like this
"universities": [
{
"name": "Literature",
"values": [
{
"_id": 10001,
"university": "SPYU",
"Courses": [
"English",
"French"
],
"dept": [
"Literature"
],
"type": [
"Autonomous"
],
"status": "ACTIVE",
"isMarked": true
},
{
"_id": 10002,
"university": "SPYU",
"Courses": [
"English",
"French"
],
"dept": [
"Literature"
],
"type": [
"Autonomous"
],
"status": "NON-ACTIVE",
"isMarked": true
}
]
}
],
"findDistinct": [
{​​​​​​​​
"Courses": [
"English",
"French"
]
}​​​​​​​​,
{​​​​​​​​
"status": [
"ACTIVE",
"NON-ACTIVE"
]
}​​​​​​​​
]
Any Help will be appreciated!!

Quick fixes in your query,
universities:
$addFields, remove $project and add only one operation for isMarked
$unwind deconstruct dept array
$group by dept and get values array of root
findDistinct:
$group by null and get unique courses array and status
$reduce to iterate loop of Courses nested array and get unique array using $setUnion
Make array of course and status in dest field
$unwind deconstruct dest array
$replaceRoot replace dest object to root
db.collection.aggregate([
{ $match: { university: "SPYU" }
},
{
$facet: {
universities: [
{ $addFields: { isMarked: { $in: ["French", "$Courses"] } } },
{ $unwind: "$dept" },
{
$group: {
_id: "$dept",
values: { $push: "$$ROOT" }
}
}
],
findDistinct: [
{
$group: {
_id: null,
Courses: { $addToSet: "$Courses" },
Status: { $addToSet: "$status" }
}
},
{
$project: {
_id: 0,
dist: [
{
name: "Courses",
values: {
$reduce: {
input: "$Courses",
initialValue: [],
in: { $setUnion: ["$$this", "$$value"] }
}
}
},
{
name: "Status",
values: "$Status"
}
]
}
},
{ $unwind: "$dist" },
{ $replaceRoot: { newRoot: "$dist" } }
]
}
}
])
Playground

Related

How to use update aggregation in MongoDb

my sample db:
"Employee": [ { "empeId": "e001",
"fName": "James",
"lName": "Bond",
"email": "jamesbond#hotmail.com",
"experience": [
"Database Design",
"SQL",
"Java" ]
},
{ "empeId": "e002",
"fName": "Harry",
"lName": "Potter",
"experience": [
"Data Warehouse",
"SQL",
"Spark Scala",
"Java Scripts" ]
} ],
"Project": [ { "projectId": "p001",
"projectTitle": "Install MongoDB" },
{ "projectId": "p002",
"projectTitle": "Install Oracle" },
{ "projectId": "p003",
"projectTitle": "Install Hadoop" } ],
"EmployeeProject": [ { "empeId": "e001",
"projectId": "p001",
"hoursWorked": 4 },
{ "empeId": "e001",
"projectId": "p003",
"hoursWorked": 2 },
{ "empeId": "e002",
"projectId": "p003",
"hoursWorked": 5 } ]
I want to update the array experience with 'test' of user with empeId: e001
desired output:
"Employee": [ { "empeId": "e001",
"fName": "James",
"lName": "Bond",
"email": "jamesbond#hotmail.com",
"experience": [
"Database Design",
"SQL",
"Java",
"test"
]
}]
I tried using
db.emp.updateOne([
{$unwind: "$Employee"},
{$match: {"Employee.emepId" : "e001" }}
],
{$push: {"Employee.experience" : "test"}})
and I get Syntax Error: Invalid property id #(shell):1:31
is this how the syntax for update with pipeline aggregation works?
The easiest and clean way(using arrayFilters):
db.collection.update({ Employee.empeId" : "e001"},
{
"$push": {
"Employee.$[x].experience": "test"
}
},
{
arrayFilters: [
{
"x.empeId": "e001"
}
]
})
Playground1
And via update/aggregation:
db.collection.update({ Employee.empeId" : "e001" },
[
{
$set: {
"Employee": {
$map: {
input: "$Employee",
as: "m",
in: {
$cond: [
{
$eq: [
"$$m.empeId",
"e001"
]
},
{
$mergeObjects: [
"$$m",
{
experience: {
$concatArrays: [
"$$m.experience",
[
"test"
]
]
}
}
]
},
"$$m"
]
}
}
}
}
}
])
Playground2

Adding a nested value as a field - MongDB aggregation

So I have a parent document with users, as well as an array that has users too. I want to add the DisplayName from the nested users array to the aggregation output. Any ideas?
Output I'm looking to achieve:
[
{
"user": {
"_id": "11",
"Name": "Dave",
"DocID": "1",
"DocDisplyName": "ABC"
},
{
"user": {
"_id": "33",
"Name": "Henry",
"DocID": "1",
"DocDisplyName": "ABC",
"BranchDisplayName:"BranchA"
}
}
]
And so on.. So an array of all users and for users that belong to a branch, add the branch display Name to the output.
// Doc 1
{
"_id": "1",
"DisplayName": "ABC",
"Users": [
{ "_id": "11", "Name": "Dave" },
{ "_id": "22", "Name": "Steve" }
],
"Branches": [
{
"_id": "111",
"DisplayName": "BranchA",
"Users": [
{ "_id": "33", "Name": "Henry" },
{ "_id": "44", "Name": "Josh" },
],
},
{
"_id": "222",
"DisplayName": "BranchB",
"Users": [
{ "_id": "55", "Name": "Mark" },
{ "_id": "66", "Name": "Anton" },
],
}
]
}
``Doc 2
{
"_id": "2",
"DisplayName": "DEF",
"Users": [
{ "_id": "77", "Name": "Josh" },
{ "_id": "88", "Name": "Steve" }
],
"Branches": [
{
"_id": "333",
"DisplayName": "BranchA",
"Users": [
{ "_id": "99", "Name": "Henry" },
{ "_id": "10", "Name": "Josh" },
],
},
{
"_id": "444",
"DisplayName": "BranchB",
"Users": [
{ "_id": "112", "Name": "Susan" },
{ "_id": "112", "Name": "Mary" },
],
}
]
}
Collection.aggregate([
{
$addFields: {
branchUsers: {
$reduce: {
input: "$Branches.Users",
initialValue: [],
in: {
$concatArrays: ["$$this", "$$value"],
},
},
},
},
},
{
$addFields: {
user: {
$concatArrays: ["$branchUsers", "$Users"],
},
},
},
{
$addFields: {
"user.DocID": "$_id","user.DocDisaplyName": "$DisplayName"
},
},
{
$unwind: "$user",
},
{
$project: {
_id: 0,
user: 1,
},
}
])
Thanks in advance!
OK I found a solution.
{
$addFields: {
"branchUsers.BranchDisplayName": {
$let: {
vars: {
first: {
$arrayElemAt: [ "$Branches", 0 ]
}
},
in: "$$first.DisplayName"
}
}
}
},
This creates the field only for the users that belong to the branch

Aggregation: Mongodb aggregation query example

Record is database:
[
{
"title": "title1",
"author": [
{
"name": "user1",
"register": true
},
{
"name": "user2",
"register": true
}
],
"tags": [
"tag1",
"tag2",
"tag3"
]
},
{
"title": "title2",
"author": [
{
"name": "user1",
"register": true
},
{
"name": "user2",
"register": true
}
],
"tags": [
"tag1",
"tag2",
"tag3"
]
},
{
"title": "title3",
"author": [
{
"name": "user1",
"register": true
},
{
"name": "user2",
"register": true
}
],
"tags": [
"tag1",
"tag2",
"tag3"
]
}
]
expected output:
{"tag":"tag1", "titles":["title1","title2","title3"], "size":3}
{"tag":"tag2", "titles":["title2","title4"], "size":2}
Can someone help with aggregate query?
You can use group
$unwind to deconstruct the array
$group to regroup the based on tags
$project to show the desired output
Here is the code,
db.collection.aggregate([
{ "$unwind": "$tags" },
{
"$group": {
"_id": "$tags",
"titles": { "$push": "$title" }
}
},
{
$project: {
tag: "$_id",
titles: 1,
size: { $size: "$titles" },
_id: "$$REMOVE"
}
}
])
Working Mongo playground

How to write an aggregate query to figure the count in Mongodb

I have the following JSON context and I have a requirement to aggregate the distinct keywords under conversion_token group and the count it is repeated; for instance:
"conversion_token": [
{
"keyword": "DBMS",
"count":4,
"classify":2
}
the keyword DBMS is used multiple times under different constructs in the json provided, the aggregate should display the
"conversion_token": [
{
"keyword": "DBMS",
"count":6,
"classify":2
}
{
"keyword": "NVL",
"count":2,`enter code here`
"classify":2
}
etc..
How can I do this?
{
"select_emp": {
"specification": {
"input": [
"p_empno"
],
"declare_stmt": {
"anchorvariable": [
"V_ENAME",
"V_HIREDATE",
"V_TITLE",
"V_REPORTSTO",
"V_DISP_DATE",
"V_INS_COUNT",
"CITY_FROM"
],
"tablename_variable": [
"EMPLOYEE.V_ENAME",
"EMPLOYEE.V_HIREDATE",
"EMPLOYEE.V_TITLE",
"EMPLOYEE.V_REPORTSTO",
"EMPLOYEE.V_DISP_DATE",
"EMPLOYEE.V_INS_COUNT",
"EMPLOYEE.CITY_FROM"
]
}
},
"body": {
"select_stmt1": {
"columns": [
"FIRSNAME",
"HIREDATE",
"TITLE",
"REPORTSTO"
],
"tablename": [
"EMPLOYEE"
],
"conversion_token": [
{
"keyword": "NVL",
"count": 1,
"classify": 2
}
]
},
"select_stmt2": {
"columns": [
"CITY"
],
"tablename": [
"EMPLOYEE"
],
"conversion_token": [
{
"keyword": "DECODE",
"count": 1,
"classify": 3
}
]
},
"dbms_stmt1": {
"dbms_putline": [
"P_EMPNO",
"V_ENAME",
"V_DISP_DATE",
"V_REPORTSTO"
],
"conversion_token": [
{
"keyword": "DBMS",
"count": 1,
"classify": 2
}
]
},
"forloop1": {
"select_stmt": {
"columns": [
"EMPLOYEEID",
"ROWID"
],
"tablename": [
"EMPLOYEE"
],
"conversion_token": [
{
"keyword": "DBMS",
"count": 1,
"classify": 2
}
]
}
},
"merge_stmt1": {
"merge_into": "EMPLOYEE",
"merge_using": {
"columns": [
"EMPLOYEEID",
"LASTNAME",
"TITLE",
"BIRTHDATE",
"HIREDATE",
"ADDRESS",
"CITY",
"STATE",
"COUNTRY",
"POSTALCODE",
"PHONE",
"FAX",
"EMAIL",
"BONUS"
],
"tablename": [
"EMPLOYEE"
]
},
"merge_update": {
"columns": [
"BONUS"
],
"tablename": [
"EMPLOYEE"
]
},
"merge_delete": {
"columns": [
"BONUS"
],
"tablename": [
"EMPLOYEE"
]
},
"merge_insert": {
"columns": [
"EMPLOYEEID",
"LASTNAME",
"FIRSTNAME",
"TITLE",
"BIRTHDATE",
"HIREDATE",
"ADDRESS",
"CITY",
"STATE",
"COUNTRY",
"POSTALCODE",
"PHONE",
"FAX",
"EMAIL",
"BONUS"
],
"tablename": [
"EMPLOYEE"
]
},
"conversion_token": [
{
"keyword": "Merge",
"count": 1,
"classify": 4
}
]
},
"exception_handling1": {
"dbms_putline": [
"P_EMPNO"
],
"conversion_token": [
{
"keyword": "DBMS",
"count": 1,
"classify": 2
}
]
}
}
}
}
First you need to concat conversion_token arrays into one using project and $concatArrays. Then using $unwind, you can separate each conversion_token object to make them ready for aggregation. After $unwind, you can group them by keyword using $group and you can take sum of count.
db.collection.aggregate([
{
$project: {
conversion_tokens: {
$concatArrays:
[
"$select_emp.body.select_stmt1.conversion_token",
"$select_emp.body.select_stmt2.conversion_token",
"$select_emp.body.dbms_stmt1.conversion_token",
"$select_emp.body.forloop1.conversion_token",
"$select_emp.body.merge_stmt1.conversion_token",
"$select_emp.body.exception_handling1.conversion_token"
]
}
}
}, {
$unwind: "$conversion_tokens"
}, {
$group: {
_id: "$conversion_tokens.keyword",
count: {
$sum: "$conversion_tokens.count"
}
}
}
])
This will produce an array like:
{
"_id": "DBMS",
"count": 3
}, {
"_id": "NVL",
"count": 1
}, {
"_id": "DECODE",
"count": 1
}
If you want to change the _id key with keyword, you can use $project again.

MongoDB aggregate results into nested array

I'm quite new to MongoDB and I am currently facing a situation. Below are 2 sample records from the whole database that I have :
{
"_id": 1,
"Record": 1,
"Link": [ "https://wikileaks.org/plusd/cables/1979PANAMA06344_e.html" ],
"Location": [ "USA", "PAN", "USA", "USA", "PAN" ],
"Organization": [ "GN", "SOUTHCOM", "UCMJ", "PRC" ],
"Date": [ "2016" ],
"People": [ "P.Walter" ]
}
{
"_id": 2,
"Record": 2,
"Link": [ "https://wikileaks.org/gifiles/docs/11/111533_-latam-centam-brief-110822-.html" ],
"Location": [ "NIC", "GTM", "JAM", "GTM", "PAN" ],
"Organization": [ "CENTAM", "Calibre Mining Corporation", "STRATFOR", "Alder Resources" ],
"Date": [ "2013" ],
"People": [ "Daniel Ortega", "Hugo Chavez", "Paulo Gregoire" ]
}
Basically, I'm trying to get an output like this :
{
"Country": "US",
"Years": [
{
"Year": "2016",
"Links": [ "https://wikileaks.org/gifiles/docs/11/111533_-latam-centam-brief-110822-.html",
"https://wikileaks.org/plusd/cables/1979PANAMA06344_e.html",
"https://wikileaks.org/gifiles/docs/90/9058_wax-12312008-csv-.html" ]
},
{
"Year": "2013",
"Links": [ ""https://wikileaks.org/gifiles/docs/11/111533_-latam-centam-brief-110822-.html",
"https://wikileaks.org/plusd/cables/1979PANAMA06344_e.html",
"https://wikileaks.org/gifiles/docs/90/9058_wax-12312008-csv-.html" ]
}
]
"Link_Count": 6
}
{
"Country": "UK",
"Years": [
{
"Year": "2009",
"Links": [ "https://wikileaks.org/gifiles/docs/11/111533_-latam-centam-brief-110822-.html",
"https://wikileaks.org/plusd/cables/1979PANAMA06344_e.html",
"https://wikileaks.org/gifiles/docs/90/9058_wax-12312008-csv-.html" ]
},
{
"Year": "2011",
"Links": [ ""https://wikileaks.org/gifiles/docs/11/111533_-latam-centam-brief-110822-.html",
"https://wikileaks.org/plusd/cables/1979PANAMA06344_e.html"]
}
]
"Link_Count": 5
}
I've tried to aggregate it, but I couldn't achieve what I want like I've given in the output. Here's my query :
db.test.aggregate([
{
"$unwind": "$Location"
},
{
"$group" : {
"_id": {
"Country": "$Location",
"Year": "$Date",
"Links": "$Link"
},
Loc: {
$addToSet: "$Location"
}
}
},
{
"$unwind": "$Loc"
},
{
"$group": {
"_id": "$Loc",
"Years": { "$push": {
"Year": "$_id.Year",
"Links": "$_id.Links"
}
}
}
}
]).toArray()
I used $unwind and $addToSet on $Location because there are duplicates found within $Location. I'm open to any suggestions or solution so please do tell! Thanks in advance!
You can use :
db.test.aggregate([{
"$unwind": "$Location"
}, {
"$unwind": "$Date"
}, {
"$unwind": "$Link"
}, {
"$group": {
"_id": {
"Country": "$Location",
"Year": "$Date"
},
Links: {
$addToSet: "$Link"
}
}
}, {
"$group": {
"_id": "$_id.Country",
Years: {
$push: {
"Year": "$_id.Year",
"Links": "$Links"
}
},
Link_Count: { $sum: { $size: "$Links" } }
}
}])
The idea is to $unwind all arrays to be able to $push link into a new array, and to count the grouped record with $size for the last $group stage.