Elasticsearch Ingest pipeline -epoch_millis to date format - date

I am using the reindex API in ES 5.4.1, and I need to convert a long field(which represents a date) to a date field. So the source index looks like
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "twitter",
"_type": "tweet",
"_id": "1",
"_score": 1,
"_source": {
"temp": 1496938873065,
"message": "hello",
"user": "joan"
}
}
]
}
temp has to be converted to a date object.
I want to use a processor,
PUT _ingest/pipeline/p1
{
"processors": [
{
"date" : {
"field" : "temp",
"target_field" : "updatedOn",
"formats":["epoch_millis"],
"timezone" : "Europe/Amsterdam"
}
}
]
}
But on trying to create this processor, I get an error
{
"error": {
"root_cause": [
{
"type": "exception",
"reason": "java.lang.IllegalArgumentException: Illegal pattern component: p",
"header": {
"processor_type": "date"
}
}
],
"type": "exception",
"reason": "java.lang.IllegalArgumentException: Illegal pattern component: p",
"caused_by": {
"type": "illegal_argument_exception",
"reason": "Illegal pattern component: p"
},
"header": {
"processor_type": "date"
}
},
"status": 500
}
Any ideas?

The formats parameter is wrong, you need to use UNIX_MS instead of epoch_millis, like this:
PUT _ingest/pipeline/p1
{
"processors": [
{
"date" : {
"field" : "temp",
"target_field" : "updatedOn",
"formats":["UNIX_MS"],
"timezone" : "Europe/Amsterdam"
}
}
]
}

Related

Elasticsearch search by time range

Trying to search based on date and time separately,
elastic-search document format is,
{
"id": "101",
"name": "Tom",
"customers": ["Jerry", "Nancy", "soli"],
"start_time": "2021-12-13T06:57:29.420198Z",
"end_time": "2021-12-13T07:00:23.511722Z",
}
I need to search based on date and time separately,
Ex:
{
"query": {
"bool" : {
"must" : [
{
"match" : { "customers" : "Jerry" }
},
{
"range": {
"start_time": {"gte" : "2021-12-13", "lte" : "2021-12-15" }}
}
]}
}
}
o/p: I am getting the above doc as the result which is expected.
but when I use the below query, then I am getting errors,
"failed to parse date field [6:57:29] with format [strict_date_optional_time||epoch_millis]: [failed to parse date field [6:57:29] with format [strict_date_optional_time||epoch_millis]]"
{
"query": {
"bool" : {
"must" : [
{
"match" : { "customers" : "Jerry" }
},
{
"range": {
"start_time": {"gte" : "6:57:29", "lte" : "6:59:35" }}
}
]}
}
}
Why I am not able to get the result based on time?
is there any idea to achieve a search based on both date and time with the single field?
Ex:
{
"query": {
"bool" : {
"must" : [
{
"match" : { "customers" : "Jerry" }
},
{
"range": {
"start_time": {"gte" : "2021-12-13", "lte" : "2021-12-15" }}
},
{
"range": {
"start_time": {"gte" : "6:57:29", "lte" : "6:59:35" }}
}
]}
}
}
I also tried to achieve this using regular expressions, but it didn't help me.
This is the mapping,
{
"settings": {
"number_of_shards": 2,
"number_of_replicas": 1
},
"mappings": {
"dynamic": "true",
"_source": {
"enabled": "true"
},
"runtime": {
"start_time": {
"type": "keyword",
"script": {
"source": "doc.start_time.start_time.getHourOfDay() >=
params.min && doc.start_time.start_time.getHourOfDay()
<= params.max"
}
}
},
"properties": {
"name": {
"type": "keyword"
},
"customers": {
"type": "text"
}
}
}
}
Above statement gives error ==> "not a statement: result not used from boolean and operation [&&]"
This is the search query,which I'll try once the index will be created,
{
"query": {
"bool" : {
"must" : [
{
"match" : { "customers" : "Jerry" }
},
{
"match" : { "name" : "Tom" }
},
{
"range": {
"start_time": {
"gte": "2015-11-01",
"lte": "2015-11-30"
}
}
},
{
"script": {
"source":
"doc.start_time.start_time.getHourOfDay()
>= params.min &&
doc.start_time.start_time.getHourOfDay() <= params.max",
"params": {
"min": 6,
"max": 7
}
}
}
]}
}
}

JSON conversion using JOLT

I am trying to convert a JSON to different format using JOLT (using NiFi JoltTransformJson processor). For single JSON record, the JOLT am using is working fine in JOLT app demo whereas if i execute with multiple JSON records then I am not getting expected output in JOLT app demo. Could anyone correct me what additional changes I need to do in JOLT spec to handle multiple JSON records.
sample input json
[
{
"pool": {
"field": [
{
"name": "BillingDay",
"value": "12"
},
{
"name": "Custom1",
"value": "POOL_BASE_PLAN_3GB"
}
]
},
"usage": {
"version": "3",
"quota": {
"name": "POOL_TOP_UP_1GB_2",
"cid": "5764888998010953848"
}
},
"state": {
"version": "1",
"property": [
{
"name": "SMS_RO_TOP",
"value": "1"
},
{
"name": "BillingTimeStamp",
"value": "2020-06-12T01:00:05"
},
{
"name": "timereset",
"value": "2020-01-12T00:35:53"
}
]
}
},
{
"pool": {
"field": [
{
"name": "PoolID",
"value": "111100110000003505209"
},
{
"name": "BillingDay",
"value": "9"
}
]
},
"usage": {
"version": "3"
},
"state": {
"version": "1",
"property": [
{
"name": "BillingTimeStamp",
"value": "2020-06-09T01:00:05"
},
{
"name": "timereset",
"value": "2019-03-20T17:10:38"
}
]
}
}
]
JOLT using:
[
{
"operation": "modify-default-beta",
"spec": {
"state": {
"property": {
"name": "NOTAVAILABLE"
}
},
"usage": {
"quota": {
"name": "NOTAVAILABLE"
}
}
}
},
{
"operation": "shift",
"spec": {
"pool": {
"field": {
"*": {
"value": "pool_item.#(1,name)"
}
}
},
// remaining elements print as it is
"*": "&"
}
}
]
Expected output JSON:
[
{
"pool_item" : {
"BillingDay" : "12",
"Custom1" : "POOL_BASE_PLAN_3GB"
},
"usage" : {
"version" : "3",
"quota" : {
"name" : "POOL_TOP_UP_1GB_2",
"cid" : "5764888998010953848"
}
},
"state" : {
"version" : "1",
"property" : [ {
"name" : "SMS_RO_TOP",
"value" : "1"
}, {
"name" : "BillingTimeStamp",
"value" : "2020-06-12T01:00:05"
}, {
"name" : "timereset",
"value" : "2020-01-12T00:35:53"
} ]
}
},
{
"pool_item" : {
"BillingDay" : "9",
"PoolID" : "111100110000003505209"
},
"usage" : {
"version" : "3",
"quota" : {
"name" : "NOTAVAILABLE"
}
},
"state" : {
"version" : "1",
"property" : [ {
"name" : "SMS_RO_TOP",
"value" : "1"
}, {
"name" : "BillingTimeStamp",
"value" : "2020-06-12T01:00:05"
}, {
"name" : "timereset",
"value" : "2020-01-12T00:35:53"
} ]
}
}
]
This below jolt shift specification will work for your multiple json's in input array.
[
{
"operation": "shift",
"spec": {
"*": {
"pool": {
"field": {
"*": {
"value": "[&4].pool_item.#(1,name)"
}
}
},
"usage": "[&1].usage",
"state": "[&1].state"
}
}
}
]

how to validate datatype in jolt transformation

I'm new to jolt transformation. I was wondering if there is a way to do a validation on data type then proceed.
I'm processing a json to insert record into hbase. From source I'm getting timestamp repeated for the same resource id which I want to use for row key.
So I just retrieve the first timestamp and concate with resource id to create row key. But I have an issue when there is only one timestamp in the record i.e when its not a list. Appreciate if someone can help me how to handle this situation.
input data
{ "resource": {
"id": "200629068",
"name": "resource_name_1)",
"parent": {
"id": 200053744,
"name": "parent_name"
},
"properties": {
"AP_ifSpeed": "0",
"DisplaySpeed": "0 (NotApplicable)",
"description": "description"
}
},
"data": [
{
"metric": {
"id": "2215",
"name": "metric_name 1"
},
"timestamp": 1535064595000,
"value": 0
},
{
"metric": {
"id": "2216",
"name": "metric_name_2"
},
"timestamp": 1535064595000,
"value": 1
}
]
}
Jolt transformation
[{
"operation": "shift",
"spec": {
"resource": {
// "id": "resource_&",
"name": "resource_&",
"id": "resource_&",
"parent": {
"id": "parent_&",
"name": "parent_&"
},
"properties": {
"*": "&"
}
},
"data": {
"*": {
"metric": {
"id": {
"*": {
"#(3,value)": "&1"
}
},
"name": {
"*": {
"#(3,value)": "&1"
}
}
},
"timestamp": "timestamp"
}
}
}
}, {
"operation": "shift",
"spec": {
"timestamp": {
// get first element from list
"0": "&1"
},
"*": "&"
}
},
{
"operation": "modify-default-beta",
"spec": {
"rowkey": "=concat(#(1,resource_id),'_',#(1,timestamp))"
}
}
]
Output I'm getting
{ "resource_name" : "resource_name_1)",
"resource_id" : "200629068",
"parent_id" : 200053744,
"parent_name" : "parent_name",
"AP_ifSpeed" : "0",
"DisplaySpeed" : "0 (NotApplicable)",
"description" : "description",
"2215" : 0,
"metric_name 1" : 0,
"timestamp" : 1535064595000,
"2216" : 1,
"metric_name_2" : 1,
"rowkey" : "200629068_1535064595000"
}
when there is only one timestamp then I get
"rowkey" : "200629068_"
In your shift make the output "timestamp" always be an array, even if the incoming data array only has one element in it.
"timestamp": "timestamp[]"

Search key in Mongodb document

following is my json which i have inserted in mongodb. I need to find record where cardholders value is 200
{
"_id": "11",
"cardholders": {
"100": [
{
"message": "message1",
"time": "timestamp"
},
{
"message": "message2",
"time": "timestamp"
}
],
"200": [
{
"message": "message1",
"time": "timestamp"
},
{
"message": "message2",
"time": "timestamp"
},
{
"message": "message3",
"time": "timestamp"
}
],
"300": [
{
"message": "message1",
"time": "timestamp"
},
{
"message": "message2",
"time": "timestamp"
}
]
}
}
Please advice. I have following
db.test3.find({"message1":{$eq:true}})
> db.test3.find({"100":{$eq:true}})
> db.test3.find({cardholders:{$eq:'100'}})
> db.test3.find({cardholders:{$eq:100}})
You essentially want to include a projection argument in your find() query which matches documents containing only the projection field and the _id field. In this case since "200" is the key, you can project it and use the map() cursor to get the values as follows:
db.test3.find(
{},
{"cardholders.200": 1}
).map(function(doc){ return doc["cardholders"]["200"]})
Output:
/* 0 */
{
"0" : [
{
"message" : "message1",
"time" : "timestamp"
},
{
"message" : "message2",
"time" : "timestamp"
},
{
"message" : "message3",
"time" : "timestamp"
}
]
}
UPDATE
To make querying easier, I would recommend changing your schema to change the cardholders key into an array that holds embedded documents. These embedded documents would have a key and value fields; the key field holds the previous dynamic keys and the value field holds the array values:
{
"_id": "11",
"cardholders": [
{
"key": "100",
"values": [
{
"message": "message1",
"time": "timestamp"
},
{
"message": "message2",
"time": "timestamp"
}
]
},
{
"key": "200",
"values": [
{
"message": "message1",
"time": "timestamp"
},
{
"message": "message2",
"time": "timestamp"
}
]
},
{
"key": "300",
"values": [
{
"message": "message1",
"time": "timestamp"
},
{
"message": "message2",
"time": "timestamp"
}
]
}
]
}
You can then query the embedded documents using a combination of the dot notation and the $elemMatch projection as follows:
db.test3.find(
{"cardholders.key": "200"},
{
"cardholders": {
"$elemMatch": { "key": "200" }
}
}
);

Mongo returning an array element

I have the following JSON document in my mongoDB which I added with mingoimport.
I am trying to return a single element from the questions array where theQuestion equals "q1".
{
"questions": [
{
"questionEntry": {
"id": 1,
"info": {
"seasonNumber": 1,
"episodeNumber": 1,
"episodeName": "Days Gone Bye"
},
"questionItem": {
"theQuestion": "q1",
"attachedElement": {
"type": 1,
"value": ""
}
},
"options": [
{
"type": 1,
"value": "o1"
},
{
"type": 1,
"value": "o1"
}
],
"answer": {
"questionId": 1,
"answer": 1
},
"metaTags": [
"Season 1",
"Episode 1",
"Rick Grimmes"
]
}
},
{
"questionEntry": {
"id": 1,
"info": {
"seasonNumber": 1,
"episodeNumber": 1,
"episodeName": "Days Gone Bye"
},
"questionItem": {
"theQuestion": "q2",
"attachedElement": {
"type": 1,
"value": ""
}
},
"options": [
{
"type": 1,
"value": "o2"
},
{
"type": 1,
"value": "o2"
}
],
"answer": {
"questionId": 1,
"answer": 1
},
"metaTags": [
"Season 1",
"Episode 1",
"Rick Grimmes",
"Glenn Rhee"
]
}
}
]
}
I ran the query db.questions.find({"questions.questionEntry.questionItem.theQuestion" : "q1"}) but this retruned the whole document (both questionEntry's in question array!
I have tried db.questions.find({"questions.questionEntry.questionItem.theQuestion" : "q1"}, _id:0," questions.questionItem": {$elemMatch : {theQuestion: "q1"}}})
But get the following error:
Error: error: {
"$err" : "Can't canonicalize query: BadValue Cannot use $elemMatch projection on a nested field.", "code" : 17287
Is there a way I could limit the result to just the array element which contains it?
Thanks
db.questions.find({},{"questions.questionEntry.questionItem.theQuestion" : "q1"});
or
db.questions.find({"questions.questionEntry.questionItem.theQuestion" : "q1"},{'questions.$':1});
please try these.
If you want to use $elemMatch the query should be:
db.questions.find(
{"questions.questionEntry.questionItem.theQuestion" : "q1"},
{
'_id':0,
"questions": {
$elemMatch : {"questionEntry.questionItem.theQuestion": "q1"}
}
}
)