Trying to search based on date and time separately,
elastic-search document format is,
{
"id": "101",
"name": "Tom",
"customers": ["Jerry", "Nancy", "soli"],
"start_time": "2021-12-13T06:57:29.420198Z",
"end_time": "2021-12-13T07:00:23.511722Z",
}
I need to search based on date and time separately,
Ex:
{
"query": {
"bool" : {
"must" : [
{
"match" : { "customers" : "Jerry" }
},
{
"range": {
"start_time": {"gte" : "2021-12-13", "lte" : "2021-12-15" }}
}
]}
}
}
o/p: I am getting the above doc as the result which is expected.
but when I use the below query, then I am getting errors,
"failed to parse date field [6:57:29] with format [strict_date_optional_time||epoch_millis]: [failed to parse date field [6:57:29] with format [strict_date_optional_time||epoch_millis]]"
{
"query": {
"bool" : {
"must" : [
{
"match" : { "customers" : "Jerry" }
},
{
"range": {
"start_time": {"gte" : "6:57:29", "lte" : "6:59:35" }}
}
]}
}
}
Why I am not able to get the result based on time?
is there any idea to achieve a search based on both date and time with the single field?
Ex:
{
"query": {
"bool" : {
"must" : [
{
"match" : { "customers" : "Jerry" }
},
{
"range": {
"start_time": {"gte" : "2021-12-13", "lte" : "2021-12-15" }}
},
{
"range": {
"start_time": {"gte" : "6:57:29", "lte" : "6:59:35" }}
}
]}
}
}
I also tried to achieve this using regular expressions, but it didn't help me.
This is the mapping,
{
"settings": {
"number_of_shards": 2,
"number_of_replicas": 1
},
"mappings": {
"dynamic": "true",
"_source": {
"enabled": "true"
},
"runtime": {
"start_time": {
"type": "keyword",
"script": {
"source": "doc.start_time.start_time.getHourOfDay() >=
params.min && doc.start_time.start_time.getHourOfDay()
<= params.max"
}
}
},
"properties": {
"name": {
"type": "keyword"
},
"customers": {
"type": "text"
}
}
}
}
Above statement gives error ==> "not a statement: result not used from boolean and operation [&&]"
This is the search query,which I'll try once the index will be created,
{
"query": {
"bool" : {
"must" : [
{
"match" : { "customers" : "Jerry" }
},
{
"match" : { "name" : "Tom" }
},
{
"range": {
"start_time": {
"gte": "2015-11-01",
"lte": "2015-11-30"
}
}
},
{
"script": {
"source":
"doc.start_time.start_time.getHourOfDay()
>= params.min &&
doc.start_time.start_time.getHourOfDay() <= params.max",
"params": {
"min": 6,
"max": 7
}
}
}
]}
}
}
Related
I am trying to convert a JSON to different format using JOLT (using NiFi JoltTransformJson processor). For single JSON record, the JOLT am using is working fine in JOLT app demo whereas if i execute with multiple JSON records then I am not getting expected output in JOLT app demo. Could anyone correct me what additional changes I need to do in JOLT spec to handle multiple JSON records.
sample input json
[
{
"pool": {
"field": [
{
"name": "BillingDay",
"value": "12"
},
{
"name": "Custom1",
"value": "POOL_BASE_PLAN_3GB"
}
]
},
"usage": {
"version": "3",
"quota": {
"name": "POOL_TOP_UP_1GB_2",
"cid": "5764888998010953848"
}
},
"state": {
"version": "1",
"property": [
{
"name": "SMS_RO_TOP",
"value": "1"
},
{
"name": "BillingTimeStamp",
"value": "2020-06-12T01:00:05"
},
{
"name": "timereset",
"value": "2020-01-12T00:35:53"
}
]
}
},
{
"pool": {
"field": [
{
"name": "PoolID",
"value": "111100110000003505209"
},
{
"name": "BillingDay",
"value": "9"
}
]
},
"usage": {
"version": "3"
},
"state": {
"version": "1",
"property": [
{
"name": "BillingTimeStamp",
"value": "2020-06-09T01:00:05"
},
{
"name": "timereset",
"value": "2019-03-20T17:10:38"
}
]
}
}
]
JOLT using:
[
{
"operation": "modify-default-beta",
"spec": {
"state": {
"property": {
"name": "NOTAVAILABLE"
}
},
"usage": {
"quota": {
"name": "NOTAVAILABLE"
}
}
}
},
{
"operation": "shift",
"spec": {
"pool": {
"field": {
"*": {
"value": "pool_item.#(1,name)"
}
}
},
// remaining elements print as it is
"*": "&"
}
}
]
Expected output JSON:
[
{
"pool_item" : {
"BillingDay" : "12",
"Custom1" : "POOL_BASE_PLAN_3GB"
},
"usage" : {
"version" : "3",
"quota" : {
"name" : "POOL_TOP_UP_1GB_2",
"cid" : "5764888998010953848"
}
},
"state" : {
"version" : "1",
"property" : [ {
"name" : "SMS_RO_TOP",
"value" : "1"
}, {
"name" : "BillingTimeStamp",
"value" : "2020-06-12T01:00:05"
}, {
"name" : "timereset",
"value" : "2020-01-12T00:35:53"
} ]
}
},
{
"pool_item" : {
"BillingDay" : "9",
"PoolID" : "111100110000003505209"
},
"usage" : {
"version" : "3",
"quota" : {
"name" : "NOTAVAILABLE"
}
},
"state" : {
"version" : "1",
"property" : [ {
"name" : "SMS_RO_TOP",
"value" : "1"
}, {
"name" : "BillingTimeStamp",
"value" : "2020-06-12T01:00:05"
}, {
"name" : "timereset",
"value" : "2020-01-12T00:35:53"
} ]
}
}
]
This below jolt shift specification will work for your multiple json's in input array.
[
{
"operation": "shift",
"spec": {
"*": {
"pool": {
"field": {
"*": {
"value": "[&4].pool_item.#(1,name)"
}
}
},
"usage": "[&1].usage",
"state": "[&1].state"
}
}
}
]
I have a JSON input jolt transformation
[
{
"System": {
"Provider": {
"Name": "Microsoft-Windows-Eventlog",
"Guid": "{sdada}"
}
},
"EventID": "3434",
"EventData": {
"SubjectUserSid": "3455",
"SubjectUserName": "abc",
"SubjectDomainName": "def",
"SubjectLogonId": "e4545",
"ObjectServer": "dggg",
"ObjectType": "eet"
}
},
{
"System": {
"Provider": {
"Name": "Microsoft-Windows-Eventlog",
"Guid": "{sdada1}"
},
"EventID": "3435"
}
}
]
As you can see Event data is present in the first JSON but not in the second JSON object in the array
My desired output is :
[
{
"winlog": {
"provider_name": "Microsoft-Windows-Eventlog",
"provider_guid": "{sdada}",
"EventID": "3434",
"event_data": {
"SubjectUserSid": "3455",
"SubjectUserName": "abc",
"SubjectDomainName": "def",
"SubjectLogonId": "e4545",
"ObjectServer": "dggg",
"ObjectType": "eet"
}
}
},
{
"winlog": {
"provider_name": "Microsoft-Windows-Eventlog",
"provider_guid": "{sdada1}",
"EventID": "3435",
"event_data": {
"SubjectUserSid": null,
"SubjectUserName": null,
"SubjectDomainName": null,
"SubjectLogonId": null,
"ObjectServer": null,
"ObjectType": null
}
}
}
]
I want to event data key in both the objects with the value as null for the second object
[
{
"operation": "shift",
"spec": {
"*": {
"System": {
"Provider": {
"Name": "[&3].winlog.provider_name",
"Guid": "[&3].winlog.provider_guid"
}
},
"EventData": "[&1].winlog.event_data"
}
}
}]
My output is
[ {
"winlog" : {
"provider_name" : "Microsoft-Windows-Eventlog",
"provider_guid" : "{sdada}",
"event_data" : {
"SubjectUserSid" : "3455",
"SubjectUserName" : "abc",
"SubjectDomainName" : "def",
"SubjectLogonId" : "e4545",
"ObjectServer" : "dggg",
"ObjectType" : "eet"
}
}
}, {
"winlog" : {
"provider_name" : "Microsoft-Windows-Eventlog",
"provider_guid" : "{sdada1}"
}
} ]
In short how to summarize keys for each array JSON
Firstly, you can put default values into the tree like this:
{
"operation": "modify-default-beta",
"spec": {
"*": {
"EventData": {
"SubjectUserSid": null,
"SubjectUserName": null,
"SubjectDomainName": null,
"SubjectLogonId": null,
"ObjectServer": null,
"ObjectType": null
}
}
}
}
this is my REST API:
GET logstash-2017.12.29/_search
{
"_source": {
"includes": [ "IPV4_DST_ADDR","IPV4_SRC_ADDR","IN_BYTES","OUT_BYTES"]
},
"size" : 100,
"query": {
"bool": {
"should": [
{
"match_phrase":{"IPV4_DST_ADDR":"192.168.0.159"}
},
{
"match_phrase":{"IPV4_SRC_ADDR":"192.168.0.159"}
}
],
"must":
{
"range" : {
"LAST_SWITCHED" : {
"gte" : 1514543547
}
}
}
}
},
"aggs": {
"IN_PKTS": {
"sum": {
"field": "IN_PKTS"
}
},
"IN_BYTES": {
"sum": {
"field": "IN_BYTES"
}
},
"OUT_BYTES": {
"sum": {
"field": "OUT_BYTES"
}
},
"OUT_PKTS": {
"sum": {
"field": "OUT_PKTS"
}
},
"genres":{
"terms" : {
"field" : "L7_PROTO_NAME.keyword",
"order" : { "in_bytes" : "desc" }
},
"aggs":{
"in_bytes": {
"sum": { "field":"IN_BYTES"}
}
}
},
"download1" : {
"filter" : { "term": { "IPV4_DST_ADDR":"192.168.0.159"} },
"aggs" : {
"downlod_bytes" : { "sum" : { "field" : "IN_BYTES" } }
}
},
"download2" : {
"filter" : { "term": { "IPV4_SRC_ADDR":"192.168.0.159"} },
"aggs" : {
"downlod_bytes" : { "sum" : { "field" : "OUT_BYTES" } }
}
},"upload1" : {
"filter" : { "term": { "IPV4_DST_ADDR":"192.168.0.159"} },
"aggs" : {
"downlod_bytes" : { "sum" : { "field" : "OUT_BYTES" } }
}
},"upload2" : {
"filter" : { "term": { "IPV4_SRC_ADDR":"192.168.0.159"} },
"aggs" : {
"downlod_bytes" : { "sum" : { "field" : "IN_BYTES" } }
}
}
}
I found there are some return documents didn't meet my requirement.
{
"_index": "logstash-2017.12.29",
"_type": "ntopng-*",
"_id": "AWCh1jPtnZ2m3739FTU7",
"_score": 1,
"_source": {
"IPV4_SRC_ADDR": "192.168.0.109", // not in my expectation
"IN_BYTES": 132,
"IPV4_DST_ADDR": "224.0.0.252", // not in my expectation
"OUT_BYTES": 0
}
}
the return document IPV4_SRC_ADDR or IPV4_DST_ADDR are not "192.168.0.159".
it seems fuzzy search, but I want to match_phrase 100%.
either IPV4_SRC_ADDR or IPV4_DST_ADDR is "192.168.0.159".
How should I modified my REST API .
thank you in advance!
You should map your IP fields using the ip data type
{
"mappings": {
"my_type": {
"properties": {
"IPV4_SRC_ADDR": {
"type": "ip"
},
"IPV4_DST_ADDR": {
"type": "ip"
}
}
}
}
}
Then you'll be able to match those addresses exactly using a simple term query:
"should": [
{
"term":{"IPV4_DST_ADDR":"192.168.0.159"}
},
{
"term":{"IPV4_SRC_ADDR":"192.168.0.159"}
}
],
UPDATE:
Given your mapping you can also use the .keyword sub-field, like this
{
"_source": {
"includes": [
"IPV4_DST_ADDR",
"IPV4_SRC_ADDR",
"IN_BYTES",
"OUT_BYTES"
]
},
"size": 100,
"query": {
"bool": {
"minimum_should_match": 1,
"should": [
{
"term": {
"IPV4_DST_ADDR.keyword": "192.168.0.159"
}
},
{
"term": {
"IPV4_SRC_ADDR.keyword": "192.168.0.159"
}
}
],
"must": {
"range": {
"LAST_SWITCHED": {
"gte": 1514543547
}
}
}
}
},
"aggs": {
"IN_PKTS": {
"sum": {
"field": "IN_PKTS"
}
},
"IN_BYTES": {
"sum": {
"field": "IN_BYTES"
}
},
"OUT_BYTES": {
"sum": {
"field": "OUT_BYTES"
}
},
"OUT_PKTS": {
"sum": {
"field": "OUT_PKTS"
}
},
"genres": {
"terms": {
"field": "L7_PROTO_NAME.keyword",
"order": {
"in_bytes": "desc"
}
},
"aggs": {
"in_bytes": {
"sum": {
"field": "IN_BYTES"
}
}
}
},
"download1": {
"filter": {
"term": {
"IPV4_DST_ADDR.keyword": "192.168.0.159"
}
},
"aggs": {
"download_bytes": {
"sum": {
"field": "IN_BYTES"
}
}
}
},
"download2": {
"filter": {
"term": {
"IPV4_SRC_ADDR.keyword": "192.168.0.159"
}
},
"aggs": {
"downlod_bytes": {
"sum": {
"field": "OUT_BYTES"
}
}
}
},
"upload1": {
"filter": {
"term": {
"IPV4_DST_ADDR.keyword": "192.168.0.159"
}
},
"aggs": {
"downlod_bytes": {
"sum": {
"field": "OUT_BYTES"
}
}
}
},
"upload2": {
"filter": {
"term": {
"IPV4_SRC_ADDR.keyword": "192.168.0.159"
}
},
"aggs": {
"downlod_bytes": {
"sum": {
"field": "IN_BYTES"
}
}
}
}
}
}
I am using the reindex API in ES 5.4.1, and I need to convert a long field(which represents a date) to a date field. So the source index looks like
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "twitter",
"_type": "tweet",
"_id": "1",
"_score": 1,
"_source": {
"temp": 1496938873065,
"message": "hello",
"user": "joan"
}
}
]
}
temp has to be converted to a date object.
I want to use a processor,
PUT _ingest/pipeline/p1
{
"processors": [
{
"date" : {
"field" : "temp",
"target_field" : "updatedOn",
"formats":["epoch_millis"],
"timezone" : "Europe/Amsterdam"
}
}
]
}
But on trying to create this processor, I get an error
{
"error": {
"root_cause": [
{
"type": "exception",
"reason": "java.lang.IllegalArgumentException: Illegal pattern component: p",
"header": {
"processor_type": "date"
}
}
],
"type": "exception",
"reason": "java.lang.IllegalArgumentException: Illegal pattern component: p",
"caused_by": {
"type": "illegal_argument_exception",
"reason": "Illegal pattern component: p"
},
"header": {
"processor_type": "date"
}
},
"status": 500
}
Any ideas?
The formats parameter is wrong, you need to use UNIX_MS instead of epoch_millis, like this:
PUT _ingest/pipeline/p1
{
"processors": [
{
"date" : {
"field" : "temp",
"target_field" : "updatedOn",
"formats":["UNIX_MS"],
"timezone" : "Europe/Amsterdam"
}
}
]
}
following is my json which i have inserted in mongodb. I need to find record where cardholders value is 200
{
"_id": "11",
"cardholders": {
"100": [
{
"message": "message1",
"time": "timestamp"
},
{
"message": "message2",
"time": "timestamp"
}
],
"200": [
{
"message": "message1",
"time": "timestamp"
},
{
"message": "message2",
"time": "timestamp"
},
{
"message": "message3",
"time": "timestamp"
}
],
"300": [
{
"message": "message1",
"time": "timestamp"
},
{
"message": "message2",
"time": "timestamp"
}
]
}
}
Please advice. I have following
db.test3.find({"message1":{$eq:true}})
> db.test3.find({"100":{$eq:true}})
> db.test3.find({cardholders:{$eq:'100'}})
> db.test3.find({cardholders:{$eq:100}})
You essentially want to include a projection argument in your find() query which matches documents containing only the projection field and the _id field. In this case since "200" is the key, you can project it and use the map() cursor to get the values as follows:
db.test3.find(
{},
{"cardholders.200": 1}
).map(function(doc){ return doc["cardholders"]["200"]})
Output:
/* 0 */
{
"0" : [
{
"message" : "message1",
"time" : "timestamp"
},
{
"message" : "message2",
"time" : "timestamp"
},
{
"message" : "message3",
"time" : "timestamp"
}
]
}
UPDATE
To make querying easier, I would recommend changing your schema to change the cardholders key into an array that holds embedded documents. These embedded documents would have a key and value fields; the key field holds the previous dynamic keys and the value field holds the array values:
{
"_id": "11",
"cardholders": [
{
"key": "100",
"values": [
{
"message": "message1",
"time": "timestamp"
},
{
"message": "message2",
"time": "timestamp"
}
]
},
{
"key": "200",
"values": [
{
"message": "message1",
"time": "timestamp"
},
{
"message": "message2",
"time": "timestamp"
}
]
},
{
"key": "300",
"values": [
{
"message": "message1",
"time": "timestamp"
},
{
"message": "message2",
"time": "timestamp"
}
]
}
]
}
You can then query the embedded documents using a combination of the dot notation and the $elemMatch projection as follows:
db.test3.find(
{"cardholders.key": "200"},
{
"cardholders": {
"$elemMatch": { "key": "200" }
}
}
);