How to delete duplicates from 2nd level of array in Mongo DB

How to delete duplicates from 2nd level of array in Mongo DB - mongodb

I have duplicates in 2nd level of array.
In 2nd level array elements all the columns contains same data except _id. In Some cases _id also is same.
Please suggest how to delete duplicates from 2nd level of array by keeping one element.
From the below example we can treat AssessmentName as unique filed in Bhra array.
Mongo Version : 3.2.8
Example:
{
"_id" : ObjectId("592415c434810eeb63afe029"),
"Encounter" : [
{
"_id" : ObjectId("5846c6361489b54e402d76f6"),
"Bhra" : [
{
"_id" : "15445853419048538e0ba2cd",
"AssessmentId" : 1,
"AssessmentName" : "Major Depressive Episode",
"AssessmentOrder" : 1,
"IsContinue" : true,
"IsNotAssessed" : false,
"TotalScore" : 9,
"DepressionSeverity" : "Mild depression",
"AssessmentResult" : "Negative",
"Notes" : ""
},
{
"_id" : "15445853419048538e0ba2aa",
"AssessmentId" : 1,
"AssessmentName" : "Major Depressive Episode",
"AssessmentOrder" : 1,
"IsContinue" : true,
"IsNotAssessed" : false,
"TotalScore" : 9,
"DepressionSeverity" : "Mild depression",
"AssessmentResult" : "Negative",
"Notes" : ""
},
{
"_id" : "15445853419048538e0ba2aa",
"AssessmentId" : 1,
"AssessmentName" : "Major Depressive Episode",
"AssessmentOrder" : 1,
"IsContinue" : true,
"IsNotAssessed" : false,
"TotalScore" : 9,
"DepressionSeverity" : "Mild depression",
"AssessmentResult" : "Negative",
"Notes" : ""
}
]
}
]
}
Output should be as follows.
{
"_id" : ObjectId("592415c434810eeb63afe029"),
"Encounter" : [
{
"_id" : ObjectId("5846c6361489b54e402d76f6"),
"Bhra" : [
{
"_id" : "15445853419048538e0ba2cd",// any _id is fine
"AssessmentId" : 1,
"AssessmentName" : "Major Depressive Episode",
"AssessmentOrder" : 1,
"IsContinue" : true,
"IsNotAssessed" : false,
"TotalScore" : 9,
"DepressionSeverity" : "Mild depression",
"AssessmentResult" : "Negative",
"Notes" : ""
}
]
}
]
}
Thanks,
Rao

You can use aggregation. First, you need to $unwind data 2 times in order to reach to level 2 array and then use $group to only include distinct item in array. Finally, $project into required json format.
db.your_collection.aggregate([
{"$unwind":"$Encounter"},
{"$unwind":"$Encounter.Bhra"},
{"$group":{
_id: {
_id:"$_id",
"Encounter_id":"$Encounter._id",
"Encounter_Bhra_AssessmentId":"$Encounter.Bhra.AssessmentId",
"Encounter_Bhra_AssessmentName":"$Encounter.Bhra.AssessmentName",
"Encounter_Bhra_AssessmentOrder":"$Encounter.Bhra.AssessmentOrder",
"Encounter_Bhra_IsContinue":"$Encounter.Bhra.IsContinue",
"Encounter_Bhra_IsNotAssessed":"$Encounter.Bhra.IsNotAssessed",
"Encounter_Bhra_TotalScore":"$Encounter.Bhra.TotalScore",
"Encounter_Bhra_DepressionSeverity":"$Encounter.Bhra.DepressionSeverity",
"Encounter_Bhra_AssessmentResult":"$Encounter.Bhra.AssessmentResult",
"Encounter_Bhra_Notes":"$Encounter.Bhra.Notes"
},
"Encounter_Bhra_id":{"$min":"$Encounter.Bhra._id"}
}
},
{"$project":{
_id:"$_id._id",
Encounter : {
_id:"$_id.Encounter_id",
Bhra : {
_id : "$Encounter_Bhra_id",
AssessmentId : "$_id.Encounter_Bhra_AssessmentId",
AssessmentName : "$_id.Encounter_Bhra_AssessmentName",
AssessmentOrder : "$_id.Encounter_Bhra_AssessmentOrder",
IsContinue : "$_id.Encounter_Bhra_IsContinue",
IsNotAssessed : "$_id.Encounter_Bhra_IsNotAssessed",
TotalScore : "$_id.Encounter_Bhra_TotalScore",
DepressionSeverity : "$_id.Encounter_Bhra_DepressionSeverity",
AssessmentResult : "$_id.Encounter_Bhra_AssessmentResult",
Notes : "$_id.Encounter_Bhra_Notes"
}
}
}
},
{$out: "your_new_coll_name"}
])
Output:
{
"_id" : ObjectId("592415c434810eeb63afe029"),
"Encounter" : {
"_id" : ObjectId("5846c6361489b54e402d76f6"),
"Bhra" : {
"_id" : "15445853419048538e0ba2aa",
"AssessmentId" : 1,
"AssessmentName" : "Major Depressive Episode",
"AssessmentOrder" : 1,
"IsContinue" : true,
"IsNotAssessed" : false,
"TotalScore" : 9,
"DepressionSeverity" : "Mild depression",
"AssessmentResult" : "Negative",
"Notes" : ""
}
}
}

Related

preserving fields collapsed by $group

I want to summarize a set of documents by counting on a field named code. How can I summarize my data and preserve details from the original documents?
The pipeline input contains the documents below.
{
"_id" : ObjectId("5ff38e0eb09dec2cbce14760"),
"code" : "U",
"date" : ISODate("2021-04-09T00:00:00.000+0000"),
"full_day" : false,
"remote" : false,
"student_id" : 9441
}
{
"_id" : ObjectId("5ff38e0eb09dec2cbce14807"),
"code" : "E",
"date" : ISODate("2020-11-02T00:00:00.000+0000"),
"full_day" : false,
"remote" : false,
"student_id" : 9441
}
{
"_id" : ObjectId("5ff39854b09dec2cbce1494c"),
"code" : "E",
"date" : ISODate("2020-11-03T08:00:00.000+0000"),
"full_day" : true,
"remote" : false,
"student_id" : 9441
}
The desired output groups by code, promotes student_id to the root level, and nests the other details in a details array:
{
"code" : "U",
"student_id": 9441,
"count" : 1.0,
"details" : [
{
"date" : ISODate("2021-04-09T00:00:00.000+0000"),
"full_day" : false,
"remote" : false,
}
]
}
{
"code" : "E",
"student_id": 9441,
"count" : 2.0,
"details" : [
{
"date" : ISODate("2020-11-02T00:00:00.000+0000"),
"full_day" : false,
"remote" : false,
},
{
"date" : ISODate("2020-11-03T08:00:00.000+0000"),
"full_day" : true,
"remote" : false,
}
]
}
Combining $group and $push I've only been able to produce:
{
"_id" : "U",
"count" : 1.0,
"details" : [
{
"date" : ISODate("2021-04-09T00:00:00.000+0000"),
"full_day" : false,
"remote" : false,
"student_id" : 9441
}
]
}
{
"_id" : "E",
"count" : 2.0,
"details" : [
{
"date" : ISODate("2020-11-02T00:00:00.000+0000"),
"full_day" : false,
"remote" : false,
"student_id" : 9441
},
{
"date" : ISODate("2020-11-03T08:00:00.000+0000"),
"full_day" : true,
"remote" : false,
"student_id" : 9441.0
}
]
}
The results above were achieved with this pipeline:
[
{
"$match" : {
"student_id" : 9441.0
}
},
{
"$group" : {
"_id" : "$code",
"count" : {
"$sum" : 1.0
},
"details" : {
"$push" : {
"date" : "$date",
"full_day" : "$full_day",
"remote" : "$remote",
"student_id" : "$student_id"
}
}
}
},
{
"$addFields" : {
"student_id" : "$student_id"
}
}
]

If you expect all of the input documents to have the same value for a field, and want that field to be included in the $group output, use the $first accumulation operator:
{
"$group" : {
"_id" : "$code",
"student_id" : {$first: "$student_id"},
"count" : {
"$sum" : 1.0
},
"details" : {
"$push" : {
"date" : "$date",
"full_day" : "$full_day",
"remote" : "$remote"
}
}
}
}
If you need to rename _id back to code, use a $project stage after the group.

Mongo db query for multiple conditions

I have a Mongodb Json which look like this
{
"_id" : "5b862ebecebe455a1744",
"userId" : "111",
"courses" : [
{
"stateName" : "statge 1",
"courseId" : "1453",
"courseName" : "Program Training 1",
"duration" : 1,
"lag" : 0,
"courseType" : "1",
"transitionType" : "onComplete",
"scheduledStartDate" : ISODate("2018-07-27T16:23:14.000+05:30"),
"scheduledEndDate" : ISODate("2018-07-27T16:23:14.000+05:30"),
"courseProgress" : 0,
"ASD" : ISODate("2018-09-17T23:18:30.636+05:30"),
"score" : 0
},
{
"stateName" : "stage 2",
"courseId" : "1454",
"courseName" : "Program Assessment 1",
"duration" : 1,
"lag" : 0,
"courseType" : "2",
"transitionType" : "onComplete",
"scheduledStartDate" : ISODate("2018-07-28T16:23:14.000+05:30"),
"scheduledEndDate" : ISODate("2018-07-28T16:23:14.000+05:30"),
"courseProgress" : 0,
"score" : 0
},
{
"stateName" : "stage 3",
"courseId" : "911",
"courseName" : "Program Training 3",
"duration" : 1,
"lag" : 0,
"courseType" : "1",
"transitionType" : "onComplete",
"scheduledStartDate" : ISODate("2018-07-29T16:23:14.000+05:30"),
"scheduledEndDate" : ISODate("2018-07-29T16:23:14.000+05:30"),
"courseProgress" : 0,
"score" : 0
}
],
"userStatus" : 1,
"modified" : ISODate("2018-09-12T11:49:47.400+05:30"),
"created" : ISODate("2018-09-12T11:49:47.400+05:30"),
"completionStatus" : "IP",
"currentState" : {
"courseProgress" : 0,
"stateName" : "statge 1",
"courseId" : "1453",
"courseName" : "Program Training 1"
}
}
I want to find a query where condition is. Please help, as I am new to mongodb
courses.transitionType = oncomplete
(PROGRESS<100||(PROGRESS==100&&ASD exists false))
And print Result something like this which contain these below data
{
"_id" : "5b862ebecebe455a1744",
"courseData" : {
"userId" : "4688",
"courseId" : "1476",
"courseProgress" : 0
}
}

You will have to use an aggregation with a $match stage and a $project to format your result.
The tricky part of your request is that you want an answer by course, but 1 item of your collection contains many courses. So first, you can use the $unwind stage to separate every course
db.[CollectionName].aggregate([
{
$unwind : '$courses'
}
{
$match: {
'courses.transitionType': 'onComplete',
$or: [
{
'courses.courseProgress': { $lt: 100 }
},
{
'courses.courseProgress': 100,
ASD: { $exists: 0 }
}
]
}
},
{
$project: {
_id: '0',
courseData: {
userId: '$courses.userId',
courseId: '$courses.courseId',
courseProgress: '$courses.courseProgress'
}
}

Mongodb update nested array by id

I have the following document and want to update state
Document ID: ObjectId("5a4e5a448b70d50e34d204a5")
Target ID: ObjectId("5a4e5a438b70d50e34d203ea")
I have no idea how to update the state to e.g. 4
{
"_id" : ObjectId("5a4e5a448b70d50e34d204a5"),
"name" : "Wirtschaftsdienst",
"date" : ISODate("2012-10-07T00:00:00.000Z"),
"comment" : null,
"tasks" : [
{
"name" : "Speisen und Getränke",
"sections" : [
{
"start" : 46800,
"end" : 72000,
"entirely" : true,
"assistants" : [
{
"assistant" : {
"_id" : ObjectId("5a4e5a438b70d50e34d203ea")
},
"state" : 3
},
{
"assistant" : {
"_id" : ObjectId("5a4e5a438b70d50e34d203f4")
},
"state" : 3
}
]
}
]
}
]
}

Use positional operator $[] along with arrayFilters to get your job done!
Try this query:
db.collection.update(
{"_id" : ObjectId("5a4e5a448b70d50e34d204a5")},
{$set: {"tasks.$[].sections.$[].assistants.$[element].state":4}},
{arrayFilters: [ {"element.assistant":{"_id" :
ObjectId("5a4e5a438b70d50e34d203ea")} }
], multi:true}
)
And the output is:
/* 1 */
{
"_id" : ObjectId("5a4e5a448b70d50e34d204a5"),
"name" : "Wirtschaftsdienst",
"date" : ISODate("2012-10-07T00:00:00.000Z"),
"comment" : null,
"tasks" : [
{
"name" : "Speisen und Getränke",
"sections" : [
{
"start" : 46800,
"end" : 72000,
"entirely" : true,
"assistants" : [
{
"assistant" : {
"_id" : ObjectId("5a4e5a438b70d50e34d203ea")
},
"state" : 4.0
},
{
"assistant" : {
"_id" : ObjectId("5a4e5a438b70d50e34d203f4")
},
"state" : 3.0
}
]
}
]
}
]
}

What index to be added in MongoDB to support $elemMatch query on embedded document

Suppose we have a following document
{
embedded:[
{
email:"abc#abc.com",
active:true
},
{
email:"def#abc.com",
active:false
}]
}
What indexing should be used to support $elemMatch query on email and active field of embedded doc.
Update on question :-
db.foo.aggregate([{"$match":{"embedded":{"$elemMatch":{"email":"abc#abc.com","active":true}}}},{"$group":{_id:null,"total":{"$sum":1}}}],{explain:true});
on querying this i am getting following output of explain on aggregate :-
{
"stages" : [
{
"$cursor" : {
"query" : {
"embedded" : {
"$elemMatch" : {
"email" : "abc#abc.com",
"active" : true
}
}
},
"fields" : {
"_id" : 0,
"$noFieldsNeeded" : 1
},
"planError" : "InternalError No plan available to provide stats"
}
},
{
"$group" : {
"_id" : {
"$const" : null
},
"total" : {
"$sum" : {
"$const" : 1
}
}
}
}
],
"ok" : 1
}
I think mongodb internally not using index for this query.
Thanx in advance :)
Update on output of db.foo.stats()
db.foo.stats()
{
"ns" : "test.foo",
"count" : 2,
"size" : 480,
"avgObjSize" : 240,
"storageSize" : 8192,
"numExtents" : 1,
"nindexes" : 3,
"lastExtentSize" : 8192,
"paddingFactor" : 1,
"systemFlags" : 0,
"userFlags" : 1,
"totalIndexSize" : 24528,
"indexSizes" : {
"_id_" : 8176,
"embedded.email_1_embedded.active_1" : 8176,
"name_1" : 8176
},
"ok" : 1
}
db.foo.getIndexes();
[
{
"v" : 1,
"key" : {
"_id" : 1
},
"name" : "_id_",
"ns" : "test.foo"
},
{
"v" : 1,
"key" : {
"embedded.email" : 1,
"embedded.active" : 1
},
"name" : "embedded.email_1_embedded.active_1",
"ns" : "test.foo"
},
{
"v" : 1,
"key" : {
"name" : 1
},
"name" : "name_1",
"ns" : "test.foo"
}
]

Should you decide to stick to that data model and your queries, here's how to create indexes that match the query:
You can simply index "embedded.email", or use a compound key of embedded indexes, i.e. something like
> db.foo.ensureIndex({"embedded.email" : 1 });
- or -
> db.foo.ensureIndex({"embedded.email" : 1, "embedded.active" : 1});
Indexing boolean fields is often not too useful, since their selectivity is low.

Mongodb pull data from subarray

Hi I have below mongodb collection
{
"_id" : ObjectId("53ce993639203f573671d3f5"),
"user_id" : NumberLong(51),
"buses" : [
{
"slot_id" : NumberLong(50),
"status" : NumberLong(3),
"bus_id" : NumberLong(8)
},
{
"slot_id" : NumberLong(67),
"status" : NumberLong(3),
"bus_id" : NumberLong(12)
}
]
}
i want to pull sub array where bus_id=8.
Final result i want to be like this
{
"_id" : ObjectId("53ce993639203f573671d3f5"),
"user_id" : NumberLong(51),
"buses" : [
{
"slot_id" : NumberLong(67),
"status" : NumberLong(3),
"bus_id" : NumberLong(12)
}
]
}
When i tried with below query
db.collectionname.update({},{$pull: {buses: {bus_id:8}}},{multi: true})
I got below error in console,
Cannot apply $pull/$pullAll modifier to non-array
Can any one please suggest me how to achieve this,and also need php mongodb query also.
Thanks in Advance

Worked fine for me for your sample document:
> db.bus.findOne()
{
"_id" : ObjectId("53ce993639203f573671d3f5"),
"user_id" : NumberLong(51),
"buses" : [
{
"slot_id" : NumberLong(50),
"status" : NumberLong(3),
"bus_id" : NumberLong(8)
},
{
"slot_id" : NumberLong(67),
"status" : NumberLong(3),
"bus_id" : NumberLong(12)
}
]
}
> db.bus.update({}, { "$pull" : { "buses" : { "bus_id" : 8 } } }, { "multi" : true })
WriteResult({ "nMatched" : 1, "nUpserted" : 0, "nModified" : 1 })
> db.bus.findOne()
{
"_id" : ObjectId("53ce993639203f573671d3f5"),
"user_id" : NumberLong(51),
"buses" : [
{
"slot_id" : NumberLong(67),
"status" : NumberLong(3),
"bus_id" : NumberLong(12)
}
]
}
The cause of the problem is that some buses element is not an array. What does the query
> db.bus.find({ "buses.0" : { "$exists" : 0}, "buses" : { "$ne" : [] } })
return? This query finds documents where there is no 0th element of the array and the array is not empty, so it should return documents where buses is not an array.

We Keep Coding

iphone swift flutter scala powershell matlab mongodb postgresql perl eclipse

How to delete duplicates from 2nd level of array in Mongo DB - mongodb

Related

preserving fields collapsed by $group

Mongo db query for multiple conditions

Mongodb update nested array by id

What index to be added in MongoDB to support $elemMatch query on embedded document

Mongodb pull data from subarray

Categories

Resources