I have documents in the format of
{
"_id": <some_id>,
"code": <some_code>,
"manually_updated": {
"code": <some_code>
}
}
I would like to find duplicates (group documents) by looking into root code value but also in manually_updated.code field. So the following three documents would be seen as duplicates (second document code was "overwritten" by adding the code to manually_updated with the same code as the first and third document):
{
{
"_id" : ObjectId("5d2dc168651ce400a327b408"),
"code": 'ABCD',
"manually_updated": {}
},
{
"_id" : ObjectId("5d40861411981f0068e22511"),
"code": 'EFGH',
"manually_updated": {
"code": "ABCD"
}
},
{
"_id" : ObjectId("5d41374311981f0163779b79"),
"code": 'ABCD',
"manually_updated": {}
}
}
Thank you.
Please try this :
db.getCollection('yourCollection').aggregate([{
$lookup:
{
from: "yourCollection",
let: { codeToBeCompared: "$code", manualCode: '$manually_updated.code' },
pipeline: [
{
$match:
{
$expr:
{
$or:
[
{ $eq: ["$code", "$$codeToBeCompared"] },
{ $eq: ["$manually_updated.code", "$$codeToBeCompared"] },
{ $and: [{ $gt: ['$manually_updated', {}] }, { $eq: ["$manually_updated.code", '$$manualCode'] }] }
]
}
}
}
],
as: "data"
}
}, { $group: { _id: '$code', manually_updated: { $push: '$manually_updated' }, finalData: { $first: '$$ROOT' } } }, { $match: { $expr: { $gt: [{ $size: "$finalData.data" }, 1] } } },
{ $project: { 'manually_updated': 1, 'data': '$finalData.data' } }])
Sample Docs :
/* 1 */
{
"_id" : ObjectId("5d2dc168651ce400a327b408"),
"code" : "ABCD",
"manually_updated" : {}
}
/* 2 */
{
"_id" : ObjectId("5d40861411981f0068e22511"),
"code" : "EFGH",
"manually_updated" : {
"code" : "ABCD"
}
}
/* 3 */
{
"_id" : ObjectId("5d41374311981f0163779b79"),
"code" : "ABCD",
"manually_updated" : {}
}
/* 4 */
{
"_id" : ObjectId("5d518a3ce8078d6134c4cd21"),
"code" : "APPPP",
"manually_updated" : {}
}
/* 5 */
{
"_id" : ObjectId("5d518a3ce8078d6134c4cd22"),
"code" : "APPPP",
"manually_updated" : {
"code" : "ABCD"
}
}
/* 6 */
{
"_id" : ObjectId("5d518a3ce8078d6134c4cd23"),
"code" : "APPPP",
"manually_updated" : {}
}
/* 7 */
{
"_id" : ObjectId("5d518a3ce8078d6134c4cd24"),
"code" : "deffffff",
"manually_updated" : {}
}
Output :
/* 1 */
{
"_id" : "APPPP",
"manually_updated" : [ // Preserving this to say we've passed thru these values
{},
{
"code": "ABCD"
},
{}
],
"data" : [
{
"_id": ObjectId("5d518a3ce8078d6134c4cd21"),
"code": "APPPP",
"manually_updated": {}
},
{
"_id": ObjectId("5d518a3ce8078d6134c4cd22"),
"code": "APPPP",
"manually_updated": {
"code": "ABCD"
}
},
{
"_id": ObjectId("5d518a3ce8078d6134c4cd23"),
"code": "APPPP",
"manually_updated": {}
}
]
}
/* 2 */
{
"_id" : "EFGH",
"manually_updated" : [
{
"code": "ABCD"
}
],
"data" : [
{
"_id": ObjectId("5d40861411981f0068e22511"),
"code": "EFGH",
"manually_updated": {
"code": "ABCD"
}
}
]
}
/* 3 */
{
"_id" : "ABCD",
"manually_updated" : [
{},
{}
],
"data" : [
{
"_id": ObjectId("5d2dc168651ce400a327b408"),
"code": "ABCD",
"manually_updated": {}
},
{
"_id": ObjectId("5d40861411981f0068e22511"),
"code": "EFGH",
"manually_updated": {
"code": "ABCD"
}
},
{
"_id": ObjectId("5d41374311981f0163779b79"),
"code": "ABCD",
"manually_updated": {}
},
{
"_id": ObjectId("5d518a3ce8078d6134c4cd22"),
"code": "APPPP",
"manually_updated": {
"code": "ABCD"
}
}
]
}
Also this does scan on everything, you can have $match as first stage to filter documents based on a particular code.
Related
i have the following query:
GET index-*/_search
{
"size": 0,
"query": {
"bool": {
"must": [],
"filter": [
{
"match_all": {}
},
{
"bool": {
"should": [
{
"bool": {
"should": [
{
"match": {
"field1": "AP"
}
}
],
"minimum_should_match": 1
}
},
{
"bool": {
"should": [
{
"match": {
"field2": "SP"
}
}
],
"minimum_should_match": 1
}
}
]
}
},
{
"range": {
"cls_timestamp": {
"gte": "2022-09-01T00:00:00Z",
"lt": "2022-10-01T00:00:00Z",
"format": "strict_date_optional_time"
}
}
}
]
}
},
"aggs": {
"id_pratica": {
"terms": {
"field": "pratica_id.keyword",
"size": 10240
}
},
"TestCountBucket": {
"stats_bucket": {
"buckets_path": "id_pratica>_count"
}
}
}
}
With the above query I get the value of field "field1" (AP) and the value of field "field2" (SP).
The problem is that these value are into two separate documents.
The response is:
"aggregations" : {
"id_practice" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "1582652d-8ddb-4795-9731-218b8373a709"
"doc_count" : 4
},
{
"key" : "23ef276e-58d7-41bb-b42f-fd5cd12015df",
"doc_count" : 4
},
{
"key" : "1540f170-44fa-451b-adae-3bd57e792b9c"
"doc_count" : 3
...
...
...
...
"TestCountBucket" : {
"count" : 59,
"min" : 1.0,
"max" : 4.0,
"avg" : 1.423728813559322,
"sum" : 84.0
where doc_count makes me count all practices that have string equal to AP and SP while I would need to filter in the buckets the practices that have at least one AP document and at least one SP document.
How can i do this?
thanks in advance.
EDIT:I solved it by using:
Scripted metric aggregation
Given the following document data in collection called 'blah'...
[
{
"_id" : ObjectId("60913f55987438922d5f0db6"),
"procedureCode" : "code1",
"description" : "Description 1",
"coding" : [
{
"system" : "ABC",
"code" : "L111"
},
{
"system" : "DEFG",
"code" : "S222"
}
]
},
{
"_id" : ObjectId("60913f55987438922d5f0dbc"),
"procedureCode" : "code2",
"description" : "Description 2",
"coding" : [
{
"system" : "ABC",
"code" : "L999"
},
{
"system" : "DEFG",
"code" : "X3333"
}
]
}
]
What I want to get is all of the coding elements where system is ABC for all parents, and an array of codes like so.
[
{ "code": "L111" },
{ "code": "L999" },
]
If I use db.getCollection('blah').find({"coding.system": "ABC"}) I get the parent document with any child in the coding array of ICD.
If I use...
db.getCollection("blah")
.find({ "coding.system": "ABC" })
.projection({ "coding.code": 1 })
I do get the parent documents which have a child with a system of "ABC", but the coding for "DEFG" seems to come along for the ride too.
{
"_id" : ObjectId("60913f55987438922d5f0db6"),
"coding" : [
{
"code" : "L989"
},
{
"code" : "S102"
}
]
},
{
"_id" : ObjectId("60913f55987438922d5f0dbc"),
"coding" : [
{
"code" : "L989"
},
{
"code" : "X382"
}
]
}
I have also tried experimenting with:
db.getCollection("blah").aggregate(
{ $unwind: "$coding" },
{ $match: { "system": "ICD" } }
);
.. as per this page: mongoDB query to find the document in nested array
... but go no where fast with that approach. i.e. no records at all.
What query do I need, please, to achieve something like this..?
[
{ "code": "L111" },
{ "code": "L999" },
...
]
or even better, this..?
[
"L111",
"L999",
...
]
db.collection.aggregate([
{
$match: { "coding.system": "ABC" }
},
{
$unwind: "$coding"
},
{
$match: { "coding.system": "ABC" }
},
{
$project: { code: "$coding.code" }
}
])
mongoplayground
db.collection.aggregate([
{
$match: { "coding.system": "ABC" }
},
{
$unwind: "$coding"
},
{
$match: { "coding.system": "ABC" }
},
{
$group: {
_id: null,
coding: { $push: "$coding.code" }
}
}
])
mongoplayground
Instead of $unwind, $match you can also use $filter:
db.collection.aggregate([
{ $match: { "coding.system": "ABC" } },
{
$project: {
coding: {
$filter: {
input: "$coding",
cond: { $eq: [ "$$this.system", "ABC" ] }
}
}
}
}
])
I have a document in MongoDB 3.4 with the following structure:
{
"_id" : ObjectId("5e3419e468d01013eadb83dc"),
"id_station" : "62",
"fiware_service" : null,
"fiware_servicepath" : null,
"id_fiware_name" : "CE_del_medio",
"attrName" : "15",
"attrType" : "float",
"attrValue" : 0.33,
"id_sensor_station_absolute" : "15_62",
"recvTimeTs" : 1580387045,
"recvTime" : "2020-01-30T12:24:05.00Z",
"id_fiware" : "15",
"sensor_type" : [
{
"name" : "id",
"type" : "String",
"value" : "15"
},
{
"name" : "img",
"type" : "String",
"value" : "assets/img/contrast.png"
},
{
"name" : "manufacturer",
"type" : "String",
"value" : "Hortisis"
},
{
"name" : "medida",
"type" : "String",
"value" : "mS/cm"
},
{
"name" : "name_comun",
"type" : "String",
"value" : "CE del medio"
},
{
"name" : "place",
"type" : "String",
"value" : "interior"
},
{
"name" : "timestamp",
"type" : "DateTime",
"value" : "2020-01-30T12:24:05.00Z"
},
{
"name" : "type",
"type" : "String",
"value" : "fertigation"
}
]
}
I need to convert the sensor_type field to an array with only one object, as follows:
{
"_id":"15_62",
"medidas":[
{
"_id":"5e3419e468d01013eadb83dc",
"marca":"Hortisis",
"modelo":"Estacion",
"fabricante":"Hortisis",
"id_station":"15",
"sensor_type":[
{
"name":"15",
"type":"fertigation",
"place":"interior",
"img":"assets/img/contrast.png",
"name_comun":"Temp. Suelo",
"medida":"mS/cm"
}
],
"attrName":"15",
"attrValue":0.33,
"recvTimeTs":1580387045,
"recvTime":"2020-01-30T12:24:05.00Z",
"id_sensor_station_absolute":"15_62"
}
]
}
As you can really see it is formatting the sensor_type field = name : value.
I'm working with NODEJS and mongoose.
This is my query: (first I search, sort, only show the first value and then with the project I give format, the problem is that I don't know how to tell the project to put that format if I put "sensor_type": "$latest.attributes.name") it only shows the names and I don't know how to put it in the mentioned format.
Datagreenhouse.aggregate([
{ "$match": { "id_sensor_station_absolute": { "$in": array3 } } }, // "id_station": { "$in": id_station },
{ "$sort": { "recvTime": -1 } },
{
"$group": {
"_id": "$id_sensor_station_absolute",
"latest": { "$first": "$$ROOT" },
}
},
{
"$project": {
"_id": 1,
"id_station": "$latest.id_station",
//"id_sensor_station_absolute": "$id_sensor_station_absolute",
"attrName": "$latest.attrName",
"attrValue": "$latest.attrValue",
"recvTimeTs": "$latest.recvTimeTs",
"recvTime": "$latest.recvTime",
"id_sensor_station_absolute": "$latest.id_sensor_station_absolute",
"sensor_type": "$latest.attributes",
"name": { $arrayElemAt: ["$latest.attributes", 0] },
"type": { $arrayElemAt: ["$latest.attributes", 1] },
"place": { $arrayElemAt: ["$latest.attributes", 2] },
"img": { $arrayElemAt: ["$latest.attributes", 1] },
"name_comun": { $arrayElemAt: ["$latest.attributes", 4] },
"medida": { $arrayElemAt: ["$latest.attributes", 3] },
"interfaz": { $arrayElemAt: ["$latest.attributes", 6] },
}
}
], (err, DatagreenhouseRecuperado) => {
if (err) return res.status(500).send({ message: 'Error al realizar la peticion' + err })
if (!DatagreenhouseRecuperado) return res.status(404).send({ message: 'Error el usuario no existe' })
res.status(200).send({ DatagreenhouseRecuperado })
})
Thank you for your help. Best regards.
Since version 3.4.4, MongoDB introduced a magnific operator: $arrayToObject
This operator allows us transmute array key:value pair into object.
Syntax
RAW DATA $map $arrayToObject
sensor_type : [ sensor_type : [ sensor_type : {
{ \ { \
"name" : "manufacturer", ----> k: "manufacturer", --->
"type" : "String", / v: "Hortisis" / "manufacturer" : "Hortisis"
"value" : "Hortisis"
} }
] ] }
db.datagreenhouses.aggregate([
{
"$match": {} // setup your match criteria
},
{
"$sort": {
"recvTime": -1
}
},
{
$group: {
_id: "$id_sensor_station_absolute",
medidas: {
$push: {
_id: "$_id",
"marca": "Hortisis", // don't know where you get this value
"modelo": "Estacion", // don't know where you get this value
"id_station": "$id_station",
"attrName": "$attrName",
"attrValue": "$attrValue",
"recvTimeTs": "$recvTimeTs",
"recvTime": "$recvTime",
"id_sensor_station_absolute": "$id_sensor_station_absolute",
"sensor_type": {
$arrayToObject: {
$map: {
input: "$sensor_type",
in: {
k: "$$this.name",
v: "$$this.value"
}
}
}
}
}
}
}
}
])
MongoPlayground
[
{
"_id": "15_62",
"medidas": [
{
"_id": ObjectId("5e3419e468d01013eadb83dc"),
"attrName": "15",
"attrValue": 0.33,
"id_sensor_station_absolute": "15_62",
"id_station": "62",
"marca": "Hortisis",
"modelo": "Estacion",
"recvTime": "2020-01-30T12:24:05.00Z",
"recvTimeTs": 1.580387045e+09,
"sensor_type": {
"id": "15",
"img": "assets/img/contrast.png",
"manufacturer": "Hortisis",
"medida": "mS/cm",
"name_comun": "CE del medio",
"place": "interior",
"timestamp": "2020-01-30T12:24:05.00Z",
"type": "fertigation"
}
}
]
}
]
All you need to do is transform data to the desired result with an easy to handle object ($unwind medidas field, transform and then $group again)
Note: If your MongoDB is earlier 3.4.4 version, follow update procedure:
Install MongoDB 3.4.4 or newer
Make mongodump with new version MongoBD
Stop old MongoBD
Remove /data directory (make backup)
Start new MongoDB and run mongorestore
This question already has an answer here:
Handling unwind for the non existing embedded document [duplicate]
(1 answer)
Closed 3 years ago.
If country doesn't have reference states and cities. $unwind removes country name from the collections.
Expected Output will be Mongodb should return country name even if the country doesn't any states and cities reference.
Country Collection:
[
{
"_id": "5d052c76df076d23a48d4a3b",
"name": "India"
},
{
"_id": "5d052c76df076d23a48d4b07",
"name": "Indonesia"
},
{
"_id": "5d052c76df076d23a48d22f4",
"name": "Iran"
}
]
State Collection:
[
{
"_id": "5d2236c37ed1112b3cc41397",
"name": "Andaman and Nicobar Islands",
"countryId": "5d052c76df076d23a48d4a3b"
},
{
"_id": "5d2236c37ed1112b3cc41398",
"name": "Andhra Pradesh",
"countryId": "5d052c76df076d23a48d4a3b"
}
]
City Collection:
[
{
"name": "Port Blair",
"stateId": "5d2236c37ed1112b3cc41397"
},
{
"name": "Adoni",
"stateId": "5d2236c37ed1112b3cc41398"
}
]
Query:
Country.aggregate([
{
$lookup:{
from: 'states',
localField:'_id',
foreignField:'countryId',
as:'states'
}
},
{
$unwind: {
path: "$states"
}
},
{
$lookup:{
from: 'cities',
localField:'states._id',
foreignField:'stateId',
as:'states.cities'
}
},
{
$group: {
_id: {
_id: '$_id',
name: '$name'
},
states: {
$push: '$states'
}
}
},
{
$project: {
_id: '$_id._id',
name: '$_id.name',
states: 1
}
}
])
Output:
[
{
"_id":"5d052c76df076d23a48d4a3b",
"name":"India",
"states":[
{
"_id":"5d2236c37ed1112b3cc41397",
"name":"Andaman and Nicobar Islands",
"countryId":"5d052c76df076d23a48d4a3b",
"cities":[
{
"name":"Port Blair",
"stateId":"5d2236c37ed1112b3cc41397"
}
]
},
{
"_id":"5d2236c37ed1112b3cc41398",
"name":"Andhra Pradesh",
"countryId":"5d052c76df076d23a48d4a3b",
"cities":[
{
"name":"Adoni",
"stateId":"5d2236c37ed1112b3cc41398"
}
]
}
]
}
]
Expected Output:
[
{
"_id":"5d052c76df076d23a48d4a3b",
"name":"India",
"states":[
{
"_id":"5d2236c37ed1112b3cc41397",
"name":"Andaman and Nicobar Islands",
"countryId":"5d052c76df076d23a48d4a3b",
"cities":[
{
"name":"Port Blair",
"stateId":"5d2236c37ed1112b3cc41397"
}
]
},
{
"_id":"5d2236c37ed1112b3cc41398",
"name":"Andhra Pradesh",
"countryId":"5d052c76df076d23a48d4a3b",
"cities":[
{
"name":"Adoni",
"stateId":"5d2236c37ed1112b3cc41398"
}
]
}
]
},
{
"_id":"5d052c76df076d23a48d4b07",
"name":"Indonesia",
"states":[
]
},
{
"_id":"5d052c76df076d23a48d22f4",
"name":"Iran",
"states":[
]
}
]
just add " preserveNullAndEmptyArrays: true " to $unwind
Country.aggregate([
{
$lookup:{
from: 'states',
localField:'_id',
foreignField:'countryId',
as:'states'
}
},
{
$unwind: {
path: "$states",
preserveNullAndEmptyArrays: true
}
},
{
$lookup:{
from: 'cities',
localField:'states._id',
foreignField:'stateId',
as:'states.cities'
}
},
{
$group: {
_id: {
_id: '$_id',
name: '$name'
},
states: {
$push: '$states'
}
}
},
{
$project: {
_id: '$_id._id',
name: '$_id.name',
states: 1
}
}
])
output
[
{
"states" : [
{
"cities" : []
}
],
"_id" : "5d052c76df076d23a48d22f4",
"name" : "Iran"
},
{
"states" : [
{
"cities" : []
}
],
"_id" : "5d052c76df076d23a48d4b07",
"name" : "Indonesia"
},
{
"states" : [
{
"_id" : "5d2236c37ed1112b3cc41397",
"name" : "Andaman and Nicobar Islands",
"countryId" : "5d052c76df076d23a48d4a3b",
"cities" : [
{
"_id" : ObjectId("5d38ccb6f9c5fa48bf099027"),
"name" : "Port Blair",
"stateId" : "5d2236c37ed1112b3cc41397"
}
]
},
{
"_id" : "5d2236c37ed1112b3cc41398",
"name" : "Andhra Pradesh",
"countryId" : "5d052c76df076d23a48d4a3b",
"cities" : [
{
"_id" : ObjectId("5d38ccbcf9c5fa48bf09902a"),
"name" : "Adoni",
"stateId" : "5d2236c37ed1112b3cc41398"
}
]
}
],
"_id" : "5d052c76df076d23a48d4a3b",
"name" : "India"
}
]
Json Structure:
"_id" : ObjectId("55d6cb28725f3019a5241781"),
"Number" : {
"value" : "1234567",
},
"DeviceID" : {
"value" : "01",
}
"type" : {
"value" : "ce06"}
Now i want to find only those keys document which start from /dev/.
i tried this script:
var cur = db.LIVEDATA.find({"ProductIMEIno.value":"359983007488004"});
cur.forEach(function(doc){
var keynames = Object.keys(doc);
print('the length is '+keynames.length);
for(var i=0;i<keynames.length;i++){
if(keynames[i].match(/Dev/)){
print("the name is "+keynames); }}} )
but this is not working properly.
Desired Output;
Only this document should show on the basis of key name search.
"DeviceID" : {
"value" : "01",
MongoDB isn't designed to find keys dynamically like this; it's much easier to use it to find values dynamically, so you could restructure your data structure to allow this:
"_id" : ObjectId("55d6cb28725f3019a5241781"),
"data" : [
{
"key" : "Number",
"value" : "1234567",
},
{
"key": "DeviceID",
"value" : "01",
},
{
"key" : "type",
"value" : "ce06"
}
]
Then you will be able to query it like this:
db.LIVEDATA.aggregate([
{$match: {"ProductIMEIno.value":"359983007488004"}},
{$unwind: "$data"},
{$match: {"data.key" : /^dev/i }}
]);
That will return data structured like this:
{
"_id" : ObjectId("55d6cb28725f3019a5241781"),
"data" : {
"key" : "DeviceID",
"value" : "01"
}
}
Suppose you have a data collection like this:
[
{
"Number": {
"value": "1234567"
},
"DeviceID": {
"value": "01"
},
"DeviceID2": {
"value": "01",
"name": "abc123"
},
"type": {
"value": "ce06"
}
},
{
"Number": {
"value": "1234568"
},
"DeviceID": {
"value": "02"
},
"type": {
"value": "ce07"
}
}
]
You can use following aggregation:
db.collection.aggregate([
{
"$match": {}
},
{
"$addFields": {
"root_key_value_list": {
"$objectToArray": "$$ROOT"
}
}
},
{
"$unwind": "$root_key_value_list"
},
{
"$match": {
"root_key_value_list.k": {
"$regex": "^Dev"
}
}
},
{
"$group": {
"_id": "$_id",
"root_key_value_list": {
"$push": "$root_key_value_list"
}
}
},
{
"$project": {
"root": {
"$arrayToObject": "$root_key_value_list"
}
}
},
{
"$replaceRoot": {
"newRoot": "$root"
}
}
])
the result will be:
[
{
"DeviceID": {
"value": "01"
},
"DeviceID2": {
"name": "abc123",
"value": "01"
}
},
{
"DeviceID": {
"value": "02"
}
}
]
playground:
https://mongoplayground.net/p/z5EeHALCqzy