I have the following JSON context and I have a requirement to aggregate the distinct keywords under conversion_token group and the count it is repeated; for instance:
"conversion_token": [
{
"keyword": "DBMS",
"count":4,
"classify":2
}
the keyword DBMS is used multiple times under different constructs in the json provided, the aggregate should display the
"conversion_token": [
{
"keyword": "DBMS",
"count":6,
"classify":2
}
{
"keyword": "NVL",
"count":2,`enter code here`
"classify":2
}
etc..
How can I do this?
{
"select_emp": {
"specification": {
"input": [
"p_empno"
],
"declare_stmt": {
"anchorvariable": [
"V_ENAME",
"V_HIREDATE",
"V_TITLE",
"V_REPORTSTO",
"V_DISP_DATE",
"V_INS_COUNT",
"CITY_FROM"
],
"tablename_variable": [
"EMPLOYEE.V_ENAME",
"EMPLOYEE.V_HIREDATE",
"EMPLOYEE.V_TITLE",
"EMPLOYEE.V_REPORTSTO",
"EMPLOYEE.V_DISP_DATE",
"EMPLOYEE.V_INS_COUNT",
"EMPLOYEE.CITY_FROM"
]
}
},
"body": {
"select_stmt1": {
"columns": [
"FIRSNAME",
"HIREDATE",
"TITLE",
"REPORTSTO"
],
"tablename": [
"EMPLOYEE"
],
"conversion_token": [
{
"keyword": "NVL",
"count": 1,
"classify": 2
}
]
},
"select_stmt2": {
"columns": [
"CITY"
],
"tablename": [
"EMPLOYEE"
],
"conversion_token": [
{
"keyword": "DECODE",
"count": 1,
"classify": 3
}
]
},
"dbms_stmt1": {
"dbms_putline": [
"P_EMPNO",
"V_ENAME",
"V_DISP_DATE",
"V_REPORTSTO"
],
"conversion_token": [
{
"keyword": "DBMS",
"count": 1,
"classify": 2
}
]
},
"forloop1": {
"select_stmt": {
"columns": [
"EMPLOYEEID",
"ROWID"
],
"tablename": [
"EMPLOYEE"
],
"conversion_token": [
{
"keyword": "DBMS",
"count": 1,
"classify": 2
}
]
}
},
"merge_stmt1": {
"merge_into": "EMPLOYEE",
"merge_using": {
"columns": [
"EMPLOYEEID",
"LASTNAME",
"TITLE",
"BIRTHDATE",
"HIREDATE",
"ADDRESS",
"CITY",
"STATE",
"COUNTRY",
"POSTALCODE",
"PHONE",
"FAX",
"EMAIL",
"BONUS"
],
"tablename": [
"EMPLOYEE"
]
},
"merge_update": {
"columns": [
"BONUS"
],
"tablename": [
"EMPLOYEE"
]
},
"merge_delete": {
"columns": [
"BONUS"
],
"tablename": [
"EMPLOYEE"
]
},
"merge_insert": {
"columns": [
"EMPLOYEEID",
"LASTNAME",
"FIRSTNAME",
"TITLE",
"BIRTHDATE",
"HIREDATE",
"ADDRESS",
"CITY",
"STATE",
"COUNTRY",
"POSTALCODE",
"PHONE",
"FAX",
"EMAIL",
"BONUS"
],
"tablename": [
"EMPLOYEE"
]
},
"conversion_token": [
{
"keyword": "Merge",
"count": 1,
"classify": 4
}
]
},
"exception_handling1": {
"dbms_putline": [
"P_EMPNO"
],
"conversion_token": [
{
"keyword": "DBMS",
"count": 1,
"classify": 2
}
]
}
}
}
}
First you need to concat conversion_token arrays into one using project and $concatArrays. Then using $unwind, you can separate each conversion_token object to make them ready for aggregation. After $unwind, you can group them by keyword using $group and you can take sum of count.
db.collection.aggregate([
{
$project: {
conversion_tokens: {
$concatArrays:
[
"$select_emp.body.select_stmt1.conversion_token",
"$select_emp.body.select_stmt2.conversion_token",
"$select_emp.body.dbms_stmt1.conversion_token",
"$select_emp.body.forloop1.conversion_token",
"$select_emp.body.merge_stmt1.conversion_token",
"$select_emp.body.exception_handling1.conversion_token"
]
}
}
}, {
$unwind: "$conversion_tokens"
}, {
$group: {
_id: "$conversion_tokens.keyword",
count: {
$sum: "$conversion_tokens.count"
}
}
}
])
This will produce an array like:
{
"_id": "DBMS",
"count": 3
}, {
"_id": "NVL",
"count": 1
}, {
"_id": "DECODE",
"count": 1
}
If you want to change the _id key with keyword, you can use $project again.
Related
my sample db:
"Employee": [ { "empeId": "e001",
"fName": "James",
"lName": "Bond",
"email": "jamesbond#hotmail.com",
"experience": [
"Database Design",
"SQL",
"Java" ]
},
{ "empeId": "e002",
"fName": "Harry",
"lName": "Potter",
"experience": [
"Data Warehouse",
"SQL",
"Spark Scala",
"Java Scripts" ]
} ],
"Project": [ { "projectId": "p001",
"projectTitle": "Install MongoDB" },
{ "projectId": "p002",
"projectTitle": "Install Oracle" },
{ "projectId": "p003",
"projectTitle": "Install Hadoop" } ],
"EmployeeProject": [ { "empeId": "e001",
"projectId": "p001",
"hoursWorked": 4 },
{ "empeId": "e001",
"projectId": "p003",
"hoursWorked": 2 },
{ "empeId": "e002",
"projectId": "p003",
"hoursWorked": 5 } ]
I want to update the array experience with 'test' of user with empeId: e001
desired output:
"Employee": [ { "empeId": "e001",
"fName": "James",
"lName": "Bond",
"email": "jamesbond#hotmail.com",
"experience": [
"Database Design",
"SQL",
"Java",
"test"
]
}]
I tried using
db.emp.updateOne([
{$unwind: "$Employee"},
{$match: {"Employee.emepId" : "e001" }}
],
{$push: {"Employee.experience" : "test"}})
and I get Syntax Error: Invalid property id #(shell):1:31
is this how the syntax for update with pipeline aggregation works?
The easiest and clean way(using arrayFilters):
db.collection.update({ Employee.empeId" : "e001"},
{
"$push": {
"Employee.$[x].experience": "test"
}
},
{
arrayFilters: [
{
"x.empeId": "e001"
}
]
})
Playground1
And via update/aggregation:
db.collection.update({ Employee.empeId" : "e001" },
[
{
$set: {
"Employee": {
$map: {
input: "$Employee",
as: "m",
in: {
$cond: [
{
$eq: [
"$$m.empeId",
"e001"
]
},
{
$mergeObjects: [
"$$m",
{
experience: {
$concatArrays: [
"$$m.experience",
[
"test"
]
]
}
}
]
},
"$$m"
]
}
}
}
}
}
])
Playground2
So I have a parent document with users, as well as an array that has users too. I want to add the DisplayName from the nested users array to the aggregation output. Any ideas?
Output I'm looking to achieve:
[
{
"user": {
"_id": "11",
"Name": "Dave",
"DocID": "1",
"DocDisplyName": "ABC"
},
{
"user": {
"_id": "33",
"Name": "Henry",
"DocID": "1",
"DocDisplyName": "ABC",
"BranchDisplayName:"BranchA"
}
}
]
And so on.. So an array of all users and for users that belong to a branch, add the branch display Name to the output.
// Doc 1
{
"_id": "1",
"DisplayName": "ABC",
"Users": [
{ "_id": "11", "Name": "Dave" },
{ "_id": "22", "Name": "Steve" }
],
"Branches": [
{
"_id": "111",
"DisplayName": "BranchA",
"Users": [
{ "_id": "33", "Name": "Henry" },
{ "_id": "44", "Name": "Josh" },
],
},
{
"_id": "222",
"DisplayName": "BranchB",
"Users": [
{ "_id": "55", "Name": "Mark" },
{ "_id": "66", "Name": "Anton" },
],
}
]
}
``Doc 2
{
"_id": "2",
"DisplayName": "DEF",
"Users": [
{ "_id": "77", "Name": "Josh" },
{ "_id": "88", "Name": "Steve" }
],
"Branches": [
{
"_id": "333",
"DisplayName": "BranchA",
"Users": [
{ "_id": "99", "Name": "Henry" },
{ "_id": "10", "Name": "Josh" },
],
},
{
"_id": "444",
"DisplayName": "BranchB",
"Users": [
{ "_id": "112", "Name": "Susan" },
{ "_id": "112", "Name": "Mary" },
],
}
]
}
Collection.aggregate([
{
$addFields: {
branchUsers: {
$reduce: {
input: "$Branches.Users",
initialValue: [],
in: {
$concatArrays: ["$$this", "$$value"],
},
},
},
},
},
{
$addFields: {
user: {
$concatArrays: ["$branchUsers", "$Users"],
},
},
},
{
$addFields: {
"user.DocID": "$_id","user.DocDisaplyName": "$DisplayName"
},
},
{
$unwind: "$user",
},
{
$project: {
_id: 0,
user: 1,
},
}
])
Thanks in advance!
OK I found a solution.
{
$addFields: {
"branchUsers.BranchDisplayName": {
$let: {
vars: {
first: {
$arrayElemAt: [ "$Branches", 0 ]
}
},
in: "$$first.DisplayName"
}
}
}
},
This creates the field only for the users that belong to the branch
Record is database:
[
{
"title": "title1",
"author": [
{
"name": "user1",
"register": true
},
{
"name": "user2",
"register": true
}
],
"tags": [
"tag1",
"tag2",
"tag3"
]
},
{
"title": "title2",
"author": [
{
"name": "user1",
"register": true
},
{
"name": "user2",
"register": true
}
],
"tags": [
"tag1",
"tag2",
"tag3"
]
},
{
"title": "title3",
"author": [
{
"name": "user1",
"register": true
},
{
"name": "user2",
"register": true
}
],
"tags": [
"tag1",
"tag2",
"tag3"
]
}
]
expected output:
{"tag":"tag1", "titles":["title1","title2","title3"], "size":3}
{"tag":"tag2", "titles":["title2","title4"], "size":2}
Can someone help with aggregate query?
You can use group
$unwind to deconstruct the array
$group to regroup the based on tags
$project to show the desired output
Here is the code,
db.collection.aggregate([
{ "$unwind": "$tags" },
{
"$group": {
"_id": "$tags",
"titles": { "$push": "$title" }
}
},
{
$project: {
tag: "$_id",
titles: 1,
size: { $size: "$titles" },
_id: "$$REMOVE"
}
}
])
Working Mongo playground
The filteredAccording part and the categorizedBy is working as expected using the query which I provided in the link but I am facing issues in the findDistinct part.
In mongodb I have the following data:
{
"_id": 10001,
"university": "SPYU",
"Courses": [
"English",
"French"
],
"dept": [
"Literature"
],
"type": [
"Autonomous"
],
"status": "ACTIVE",
"isMarked": true
},
{
"_id": 10002,
"university": "SPYU",
"Courses": [
"English",
"French"
],
"dept": [
"Literature"
],
"type": [
"Autonomous"
],
"status": "NON-ACTIVE",
"isMarked": true
}
I wanted the response to be:
"university": [
{
"name": "Literature",
"values": [
{
"_id": 10001,
"university": "SPYU",
"Courses": [
"English",
"French"
],
"dept": [
"Literature"
],
"type": [
"Autonomous"
],
"status": "ACTIVE",
"isMarked": true
},
{
"_id": 10002,
"university": "SPYU",
"Courses": [
"English",
"French"
],
"dept": [
"Literature"
],
"type": [
"Autonomous"
],
"status": "NON-ACTIVE",
"isMarked": true
}
]
}
],
"findDistinct": [
{
"name": "Courses",
"values": [
"English",
"French"
]
},
{
"name": "Status",
"values": [
"ACTIVE",
"NON-ACTIVE"
]
}
]
I tried it using this link but the response is not coming as expected.
https://mongoplayground.net/p/XECZvRMmt3T
Right now, the response is coming like this
"universities": [
{
"name": "Literature",
"values": [
{
"_id": 10001,
"university": "SPYU",
"Courses": [
"English",
"French"
],
"dept": [
"Literature"
],
"type": [
"Autonomous"
],
"status": "ACTIVE",
"isMarked": true
},
{
"_id": 10002,
"university": "SPYU",
"Courses": [
"English",
"French"
],
"dept": [
"Literature"
],
"type": [
"Autonomous"
],
"status": "NON-ACTIVE",
"isMarked": true
}
]
}
],
"findDistinct": [
{
"Courses": [
"English",
"French"
]
},
{
"status": [
"ACTIVE",
"NON-ACTIVE"
]
}
]
Any Help will be appreciated!!
Quick fixes in your query,
universities:
$addFields, remove $project and add only one operation for isMarked
$unwind deconstruct dept array
$group by dept and get values array of root
findDistinct:
$group by null and get unique courses array and status
$reduce to iterate loop of Courses nested array and get unique array using $setUnion
Make array of course and status in dest field
$unwind deconstruct dest array
$replaceRoot replace dest object to root
db.collection.aggregate([
{ $match: { university: "SPYU" }
},
{
$facet: {
universities: [
{ $addFields: { isMarked: { $in: ["French", "$Courses"] } } },
{ $unwind: "$dept" },
{
$group: {
_id: "$dept",
values: { $push: "$$ROOT" }
}
}
],
findDistinct: [
{
$group: {
_id: null,
Courses: { $addToSet: "$Courses" },
Status: { $addToSet: "$status" }
}
},
{
$project: {
_id: 0,
dist: [
{
name: "Courses",
values: {
$reduce: {
input: "$Courses",
initialValue: [],
in: { $setUnion: ["$$this", "$$value"] }
}
}
},
{
name: "Status",
values: "$Status"
}
]
}
},
{ $unwind: "$dist" },
{ $replaceRoot: { newRoot: "$dist" } }
]
}
}
])
Playground
I have a jsonb data of following format, with nested arrays
{
"outerArray": [
{
"price": {
"amount": 108.95,
"currencyCode": "GBP"
},
"innerArray": [
{
"details": {
"field1": "val1",
"field2": "val2",
"field3": "val3"
},
"otherDetail": {
"date": "2016-07-23",
"time": "19:43:00"
},
"innerMostArray": [
{
"A1": "A1"
},
{
"B1": "B1"
}
]
}
],
"someField": "values"
},
{
"price": {
"amount": 108.95,
"currencyCode": "GBP"
},
"innerArray": [
{
"details": {
"field1": "val1",
"field2": "val2",
"field3": "val3"
},
"otherDetail": {
"date": "2016-07-23",
"time": "19:43:00"
},
"innerMostArray": [
{
"A1": "A1"
},
{
"B1": "B1"
}
]
}
],
"someField": "values"
}
]
}
I want to write a retrieve query on this, to maintain same json structure but hide fields "price", "details" , "otherDetail"and "someField"
The retrieved result should look like this
{
"outerArray": [
{
"innerArray": [
{
"innerMostArray": [
{
"A1": "A1"
},
{
"B1": "B1"
}
]
}
]
},
{
"innerArray": [
{
"innerMostArray": [
{
"A1": "A1"
},
{
"B1": "B1"
}
]
}
]
}
]
}
Can this be done?
Please always specify a version of PostgreSQL you are using. An example below should work fine for versions v9.5+.
I would approach this by building a JSONB object you need with jsonb_build_object() and jsonb_build_array() functions:
Sample query:
WITH test(data) AS ( VALUES
('{
"outerArray": [
{
"price": {
"amount": 108.95,
"currencyCode": "GBP"
},
"innerArray": [
{
"details": {
"field1": "val1",
"field2": "val2",
"field3": "val3"
},
"otherDetail": {
"date": "2016-07-23",
"time": "19:43:00"
},
"innerMostArray": [
{
"A1": "A1"
},
{
"B1": "B1"
}
]
}
],
"someField": "values"
},
{
"price": {
"amount": 108.95,
"currencyCode": "GBP"
},
"innerArray": [
{
"details": {
"field1": "val1",
"field2": "val2",
"field3": "val3"
},
"otherDetail": {
"date": "2016-07-23",
"time": "19:43:00"
},
"innerMostArray": [
{
"A1": "A1"
},
{
"B1": "B1"
}
]
}
],
"someField": "values"
}
]}'::JSONB)
)
SELECT
jsonb_build_object(
'outerArray',
array_agg(
jsonb_build_object(
'innerArray',
json_build_array(
json_build_object(
'innerMostArray',
innerArray->'innerMostArray')
)
)
)
) as result
FROM test t,
jsonb_array_elements(t.data->'outerArray') as outerElement,
jsonb_array_elements(outerElement->'innerArray') as innerArray;
Result:
result
----------------------------------------------------------------------------------------------------------------------------------------------------------
{"outerArray": [{"innerArray": [{"innerMostArray": [{"A1": "A1"}, {"B1": "B1"}]}]}, {"innerArray": [{"innerMostArray": [{"A1": "A1"}, {"B1": "B1"}]}]}]}
(1 row)