opensearch - how to update index transform - opensearch

we have an index transform job which needs to be updated between product versions.
we use the rest command below to create the transform job:
PUT _plugins/_transform/transform_test
{
"transform": {
"enabled": true,
"schedule": {
"interval": {
"period": 1,
"unit": "Minutes",
"start_time": 1602100553
}
},
"description": "create index device_usage_summary",
"source_index": "device_usage_raw-*",
"target_index": "transform_test",
"data_selection_query": {
"bool": {
"filter": [
{
"range": {
"message_header.occurred_message_display_time_utc_rfc3339": {
"gte": "now-90d/d"
}
}
}
]
}
},
"page_size": 1,
"groups": [
{
"terms": {
"source_field": "device_information.removable_instance_path",
"target_field": "usage_data.removable_instance_path"
}
},
{
"terms": {
"source_field": "message_header.global_tenant_id",
"target_field": "usage_data.global_tenant_id"
}
}
],
"aggregations": {
"usage_data.data_read_kb": {
"sum": {
"field": "usage_data.data_read_kb"
}
},
"usage_data.data_write_kb": {
"sum": {
"field": "usage_data.data_write_kb"
}
}
}
}
}
to update the transform job, we modify some of the fields and then run the SAME command again (as documented in opensearch documentation (https://opensearch.org/docs/latest/im-plugin/index-transforms/transforms-apis/#update-a-transform-job updating )
however we get error :
{
"error" : {
"root_cause" : [
{
"type" : "version_conflict_engine_exception",
"reason" : "[transform_test]: version conflict, document already exists (current version [1])",
"index" : ".opendistro-ism-config",
"shard" : "4",
"index_uuid" : "XYKR62x6QBmt8C9qcHONnQ"
}
],
"type" : "version_conflict_engine_exception",
"reason" : "[transform_test]: version conflict, document already exists (current version [1])",
"index" : ".opendistro-ism-config",
"shard" : "4",
"index_uuid" : "XYKR62x6QBmt8C9qcHONnQ"
},
"status" : 409
}
so our question is: how do we update a transform job after it exists?

Related

Boolean Query with match by multiple fields (in two different document)

i have the following query:
GET index-*/_search
{
"size": 0,
"query": {
"bool": {
"must": [],
"filter": [
{
"match_all": {}
},
{
"bool": {
"should": [
{
"bool": {
"should": [
{
"match": {
"field1": "AP"
}
}
],
"minimum_should_match": 1
}
},
{
"bool": {
"should": [
{
"match": {
"field2": "SP"
}
}
],
"minimum_should_match": 1
}
}
]
}
},
{
"range": {
"cls_timestamp": {
"gte": "2022-09-01T00:00:00Z",
"lt": "2022-10-01T00:00:00Z",
"format": "strict_date_optional_time"
}
}
}
]
}
},
"aggs": {
"id_pratica": {
"terms": {
"field": "pratica_id.keyword",
"size": 10240
}
},
"TestCountBucket": {
"stats_bucket": {
"buckets_path": "id_pratica>_count"
}
}
}
}
With the above query I get the value of field "field1" (AP) and the value of field "field2" (SP).
The problem is that these value are into two separate documents.
The response is:
"aggregations" : {
"id_practice" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "1582652d-8ddb-4795-9731-218b8373a709"
"doc_count" : 4
},
{
"key" : "23ef276e-58d7-41bb-b42f-fd5cd12015df",
"doc_count" : 4
},
{
"key" : "1540f170-44fa-451b-adae-3bd57e792b9c"
"doc_count" : 3
...
...
...
...
"TestCountBucket" : {
"count" : 59,
"min" : 1.0,
"max" : 4.0,
"avg" : 1.423728813559322,
"sum" : 84.0
where doc_count makes me count all practices that have string equal to AP and SP while I would need to filter in the buckets the practices that have at least one AP document and at least one SP document.
How can i do this?
thanks in advance.
EDIT:I solved it by using:
Scripted metric aggregation

Forming an array with aggregation in MongoDB

I have a document in MongoDB 3.4 with the following structure:
{
"_id" : ObjectId("5e3419e468d01013eadb83dc"),
"id_station" : "62",
"fiware_service" : null,
"fiware_servicepath" : null,
"id_fiware_name" : "CE_del_medio",
"attrName" : "15",
"attrType" : "float",
"attrValue" : 0.33,
"id_sensor_station_absolute" : "15_62",
"recvTimeTs" : 1580387045,
"recvTime" : "2020-01-30T12:24:05.00Z",
"id_fiware" : "15",
"sensor_type" : [
{
"name" : "id",
"type" : "String",
"value" : "15"
},
{
"name" : "img",
"type" : "String",
"value" : "assets/img/contrast.png"
},
{
"name" : "manufacturer",
"type" : "String",
"value" : "Hortisis"
},
{
"name" : "medida",
"type" : "String",
"value" : "mS/cm"
},
{
"name" : "name_comun",
"type" : "String",
"value" : "CE del medio"
},
{
"name" : "place",
"type" : "String",
"value" : "interior"
},
{
"name" : "timestamp",
"type" : "DateTime",
"value" : "2020-01-30T12:24:05.00Z"
},
{
"name" : "type",
"type" : "String",
"value" : "fertigation"
}
]
}
I need to convert the sensor_type field to an array with only one object, as follows:
{
"_id":"15_62",
"medidas":[
{
"_id":"5e3419e468d01013eadb83dc",
"marca":"Hortisis",
"modelo":"Estacion",
"fabricante":"Hortisis",
"id_station":"15",
"sensor_type":[
{
"name":"15",
"type":"fertigation",
"place":"interior",
"img":"assets/img/contrast.png",
"name_comun":"Temp. Suelo",
"medida":"mS/cm"
}
],
"attrName":"15",
"attrValue":0.33,
"recvTimeTs":1580387045,
"recvTime":"2020-01-30T12:24:05.00Z",
"id_sensor_station_absolute":"15_62"
}
]
}
As you can really see it is formatting the sensor_type field = name : value.
I'm working with NODEJS and mongoose.
This is my query: (first I search, sort, only show the first value and then with the project I give format, the problem is that I don't know how to tell the project to put that format if I put "sensor_type": "$latest.attributes.name") it only shows the names and I don't know how to put it in the mentioned format.
Datagreenhouse.aggregate([
{ "$match": { "id_sensor_station_absolute": { "$in": array3 } } }, // "id_station": { "$in": id_station },
{ "$sort": { "recvTime": -1 } },
{
"$group": {
"_id": "$id_sensor_station_absolute",
"latest": { "$first": "$$ROOT" },
}
},
{
"$project": {
"_id": 1,
"id_station": "$latest.id_station",
//"id_sensor_station_absolute": "$id_sensor_station_absolute",
"attrName": "$latest.attrName",
"attrValue": "$latest.attrValue",
"recvTimeTs": "$latest.recvTimeTs",
"recvTime": "$latest.recvTime",
"id_sensor_station_absolute": "$latest.id_sensor_station_absolute",
"sensor_type": "$latest.attributes",
"name": { $arrayElemAt: ["$latest.attributes", 0] },
"type": { $arrayElemAt: ["$latest.attributes", 1] },
"place": { $arrayElemAt: ["$latest.attributes", 2] },
"img": { $arrayElemAt: ["$latest.attributes", 1] },
"name_comun": { $arrayElemAt: ["$latest.attributes", 4] },
"medida": { $arrayElemAt: ["$latest.attributes", 3] },
"interfaz": { $arrayElemAt: ["$latest.attributes", 6] },
}
}
], (err, DatagreenhouseRecuperado) => {
if (err) return res.status(500).send({ message: 'Error al realizar la peticion' + err })
if (!DatagreenhouseRecuperado) return res.status(404).send({ message: 'Error el usuario no existe' })
res.status(200).send({ DatagreenhouseRecuperado })
})
Thank you for your help. Best regards.
Since version 3.4.4, MongoDB introduced a magnific operator: $arrayToObject
This operator allows us transmute array key:value pair into object.
Syntax
RAW DATA $map $arrayToObject
sensor_type : [ sensor_type : [ sensor_type : {
{ \ { \
"name" : "manufacturer", ----> k: "manufacturer", --->
"type" : "String", / v: "Hortisis" / "manufacturer" : "Hortisis"
"value" : "Hortisis"
} }
] ] }
db.datagreenhouses.aggregate([
{
"$match": {} // setup your match criteria
},
{
"$sort": {
"recvTime": -1
}
},
{
$group: {
_id: "$id_sensor_station_absolute",
medidas: {
$push: {
_id: "$_id",
"marca": "Hortisis", // don't know where you get this value
"modelo": "Estacion", // don't know where you get this value
"id_station": "$id_station",
"attrName": "$attrName",
"attrValue": "$attrValue",
"recvTimeTs": "$recvTimeTs",
"recvTime": "$recvTime",
"id_sensor_station_absolute": "$id_sensor_station_absolute",
"sensor_type": {
$arrayToObject: {
$map: {
input: "$sensor_type",
in: {
k: "$$this.name",
v: "$$this.value"
}
}
}
}
}
}
}
}
])
MongoPlayground
[
{
"_id": "15_62",
"medidas": [
{
"_id": ObjectId("5e3419e468d01013eadb83dc"),
"attrName": "15",
"attrValue": 0.33,
"id_sensor_station_absolute": "15_62",
"id_station": "62",
"marca": "Hortisis",
"modelo": "Estacion",
"recvTime": "2020-01-30T12:24:05.00Z",
"recvTimeTs": 1.580387045e+09,
"sensor_type": {
"id": "15",
"img": "assets/img/contrast.png",
"manufacturer": "Hortisis",
"medida": "mS/cm",
"name_comun": "CE del medio",
"place": "interior",
"timestamp": "2020-01-30T12:24:05.00Z",
"type": "fertigation"
}
}
]
}
]
All you need to do is transform data to the desired result with an easy to handle object ($unwind medidas field, transform and then $group again)
Note: If your MongoDB is earlier 3.4.4 version, follow update procedure:
Install MongoDB 3.4.4 or newer
Make mongodump with new version MongoBD
Stop old MongoBD
Remove /data directory (make backup)
Start new MongoDB and run mongorestore

Group documents by value from another field

I have documents in the format of
{
"_id": <some_id>,
"code": <some_code>,
"manually_updated": {
"code": <some_code>
}
}
I would like to find duplicates (group documents) by looking into root code value but also in manually_updated.code field. So the following three documents would be seen as duplicates (second document code was "overwritten" by adding the code to manually_updated with the same code as the first and third document):
{
{
"_id" : ObjectId("5d2dc168651ce400a327b408"),
"code": 'ABCD',
"manually_updated": {}
},
{
"_id" : ObjectId("5d40861411981f0068e22511"),
"code": 'EFGH',
"manually_updated": {
"code": "ABCD"
}
},
{
"_id" : ObjectId("5d41374311981f0163779b79"),
"code": 'ABCD',
"manually_updated": {}
}
}
Thank you.
Please try this :
db.getCollection('yourCollection').aggregate([{
$lookup:
{
from: "yourCollection",
let: { codeToBeCompared: "$code", manualCode: '$manually_updated.code' },
pipeline: [
{
$match:
{
$expr:
{
$or:
[
{ $eq: ["$code", "$$codeToBeCompared"] },
{ $eq: ["$manually_updated.code", "$$codeToBeCompared"] },
{ $and: [{ $gt: ['$manually_updated', {}] }, { $eq: ["$manually_updated.code", '$$manualCode'] }] }
]
}
}
}
],
as: "data"
}
}, { $group: { _id: '$code', manually_updated: { $push: '$manually_updated' }, finalData: { $first: '$$ROOT' } } }, { $match: { $expr: { $gt: [{ $size: "$finalData.data" }, 1] } } },
{ $project: { 'manually_updated': 1, 'data': '$finalData.data' } }])
Sample Docs :
/* 1 */
{
"_id" : ObjectId("5d2dc168651ce400a327b408"),
"code" : "ABCD",
"manually_updated" : {}
}
/* 2 */
{
"_id" : ObjectId("5d40861411981f0068e22511"),
"code" : "EFGH",
"manually_updated" : {
"code" : "ABCD"
}
}
/* 3 */
{
"_id" : ObjectId("5d41374311981f0163779b79"),
"code" : "ABCD",
"manually_updated" : {}
}
/* 4 */
{
"_id" : ObjectId("5d518a3ce8078d6134c4cd21"),
"code" : "APPPP",
"manually_updated" : {}
}
/* 5 */
{
"_id" : ObjectId("5d518a3ce8078d6134c4cd22"),
"code" : "APPPP",
"manually_updated" : {
"code" : "ABCD"
}
}
/* 6 */
{
"_id" : ObjectId("5d518a3ce8078d6134c4cd23"),
"code" : "APPPP",
"manually_updated" : {}
}
/* 7 */
{
"_id" : ObjectId("5d518a3ce8078d6134c4cd24"),
"code" : "deffffff",
"manually_updated" : {}
}
Output :
/* 1 */
{
"_id" : "APPPP",
"manually_updated" : [ // Preserving this to say we've passed thru these values
{},
{
"code": "ABCD"
},
{}
],
"data" : [
{
"_id": ObjectId("5d518a3ce8078d6134c4cd21"),
"code": "APPPP",
"manually_updated": {}
},
{
"_id": ObjectId("5d518a3ce8078d6134c4cd22"),
"code": "APPPP",
"manually_updated": {
"code": "ABCD"
}
},
{
"_id": ObjectId("5d518a3ce8078d6134c4cd23"),
"code": "APPPP",
"manually_updated": {}
}
]
}
/* 2 */
{
"_id" : "EFGH",
"manually_updated" : [
{
"code": "ABCD"
}
],
"data" : [
{
"_id": ObjectId("5d40861411981f0068e22511"),
"code": "EFGH",
"manually_updated": {
"code": "ABCD"
}
}
]
}
/* 3 */
{
"_id" : "ABCD",
"manually_updated" : [
{},
{}
],
"data" : [
{
"_id": ObjectId("5d2dc168651ce400a327b408"),
"code": "ABCD",
"manually_updated": {}
},
{
"_id": ObjectId("5d40861411981f0068e22511"),
"code": "EFGH",
"manually_updated": {
"code": "ABCD"
}
},
{
"_id": ObjectId("5d41374311981f0163779b79"),
"code": "ABCD",
"manually_updated": {}
},
{
"_id": ObjectId("5d518a3ce8078d6134c4cd22"),
"code": "APPPP",
"manually_updated": {
"code": "ABCD"
}
}
]
}
Also this does scan on everything, you can have $match as first stage to filter documents based on a particular code.

RESTHeart filtering and sorting by sub documents property

I m working with mongodb and restheart.
In my nosql db i have a unique document with this structure:
{
"_id": "docID",
"users": [
{
"userID": "12",
"elements": [
{
"elementID": "1492446877599",
"events": [
{
"id": 1,
"date": 356
},
{
"id": 2,
"date": 123
}
]
}
]
},
{
"userID": "11",
"elements": [
{
"elementID": "14924",
"events": [
{
"id": 1,
"date": 123
},
{
"id": 2,
"date": 356
}
]
},
{
"elementID": "14925",
"events": [
{
"id": 1,
"date": 12
},
{
"id": 2,
"date": 36
}
]
}
]
}
i need to filter the user with userID = 11 and i need to order his events by ascending date.
i was trying with:
http://myhost:port/myCollection?keys={"users":{"$elemMatch":{"userID":"11"}}}&sort_by={"users.elements.events.date":-1}
but it doesn t work.
db.v.aggregate([
{ $unwind : '$users'},
{ $match : { 'users.userID' : '11' }} ,
{ $unwind : '$users.elements'},
{ $unwind : '$users.elements.events'},
{ $sort : {'users.elements.events.date': 1}},
{ $group : {
_id : '$_id',
elementID : { $first : '$users.elements.elementID' },
userID : { $first : '$users.userID' },
events : { $push : '$users.elements.events'}
}
},
{ $project : {
_id : 1,
userID : 1,
'elements.elementID' : '$elementID',
'elements.events' : '$events'
}
}
]);
This will give you following :
{
"_id" : ObjectId("5911ba55f0d9c285c561ea33"),
"userID" : "11",
"elements" : {
"elementID" : "14924",
"events" : [
{
"id" : 1,
"date" : 123
},
{
"id" : 2,
"date" : 356
}
]
}
}

Match key name and show document in Mongodb?

Json Structure:
"_id" : ObjectId("55d6cb28725f3019a5241781"),
"Number" : {
"value" : "1234567",
},
"DeviceID" : {
"value" : "01",
}
"type" : {
"value" : "ce06"}
Now i want to find only those keys document which start from /dev/.
i tried this script:
var cur = db.LIVEDATA.find({"ProductIMEIno.value":"359983007488004"});
cur.forEach(function(doc){
var keynames = Object.keys(doc);
print('the length is '+keynames.length);
for(var i=0;i<keynames.length;i++){
if(keynames[i].match(/Dev/)){
print("the name is "+keynames); }}} )
but this is not working properly.
Desired Output;
Only this document should show on the basis of key name search.
"DeviceID" : {
"value" : "01",
MongoDB isn't designed to find keys dynamically like this; it's much easier to use it to find values dynamically, so you could restructure your data structure to allow this:
"_id" : ObjectId("55d6cb28725f3019a5241781"),
"data" : [
{
"key" : "Number",
"value" : "1234567",
},
{
"key": "DeviceID",
"value" : "01",
},
{
"key" : "type",
"value" : "ce06"
}
]
Then you will be able to query it like this:
db.LIVEDATA.aggregate([
{$match: {"ProductIMEIno.value":"359983007488004"}},
{$unwind: "$data"},
{$match: {"data.key" : /^dev/i }}
]);
That will return data structured like this:
{
"_id" : ObjectId("55d6cb28725f3019a5241781"),
"data" : {
"key" : "DeviceID",
"value" : "01"
}
}
Suppose you have a data collection like this:
[
{
"Number": {
"value": "1234567"
},
"DeviceID": {
"value": "01"
},
"DeviceID2": {
"value": "01",
"name": "abc123"
},
"type": {
"value": "ce06"
}
},
{
"Number": {
"value": "1234568"
},
"DeviceID": {
"value": "02"
},
"type": {
"value": "ce07"
}
}
]
You can use following aggregation:
db.collection.aggregate([
{
"$match": {}
},
{
"$addFields": {
"root_key_value_list": {
"$objectToArray": "$$ROOT"
}
}
},
{
"$unwind": "$root_key_value_list"
},
{
"$match": {
"root_key_value_list.k": {
"$regex": "^Dev"
}
}
},
{
"$group": {
"_id": "$_id",
"root_key_value_list": {
"$push": "$root_key_value_list"
}
}
},
{
"$project": {
"root": {
"$arrayToObject": "$root_key_value_list"
}
}
},
{
"$replaceRoot": {
"newRoot": "$root"
}
}
])
the result will be:
[
{
"DeviceID": {
"value": "01"
},
"DeviceID2": {
"name": "abc123",
"value": "01"
}
},
{
"DeviceID": {
"value": "02"
}
}
]
playground:
https://mongoplayground.net/p/z5EeHALCqzy