How to use query_string with date in elasticsearch - date

Does query_string support GT operation with date?
{
"_source": {
"includes": [
"id"
]
},
"size": 20,
"sort": [
{
"published": {
"order": "DESC"
}
}
],
"query": {
"query_string": {
"query": "updated > (2014-11-01T00:00:00Z)"
}
}
}
Throw Failed to parse query [updated > (2014-11-01T00:00:00Z)]
What is wrong with my query?
Mapping is:
"updated": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ssZ"
}

The query needs to be like this, i.e. in the Lucene expression language
"query": "updated:{2014-11-01 TO *}"
However, you'd be better off using a simple range query:
"range": {
"updated": {
"gt": "2014-11-01T00:00:00Z"
}
}

Related

Cloudant database search index

I have a Json document in cloudant as:
{
"createdAt": "2022-10-26T09:16:29.472Z",
"user_id": "4499c1c2-7507-4707-b0e4-ec83e2d2f34d",
"_id": "606a4d591031c14a8c48fcb4a9541ff0"
}
{
"createdAt": "2022-10-24T11:15:24.269Z",
"user_id": "c4bdcb54-3d0a-4b6a-a8a9-aa12e45345f3",
"_id": "fb24a15d8fb7cdf12feadac08e7c05dc"
}
{
"createdAt": "2022-10-24T11:08:24.269Z",
"user_id": "06d67681-e2c4-4ed4-b40a-5a2c5e7e6ed9",
"_id": "2d277ec3dd8c33da7642b72722aa93ed"
}
I have created a index json as:
{
"type": "json",
"partitioned": false,
"def": {
"fields": [
{
"createdAt": "asc"
},
{
"user_id": "asc"
}
]
}
}
I have created a index text as:
{
"type": "text",
"partitioned": false,
"def": {
"default_analyzer": "keyword",
"default_field": {},
"selector": {},
"fields": [
{
"_id": "string"
},
{
"createdAt": "string"
},
{
"user_id": "string"
}
],
"index_array_lengths": true
}
}
I have created a selctor cloudant query :
{
"selector": {
"$and": [
{
"createdAt": {
"$exists": true
}
},
{
"user_id": {
"$exists": true
}
}
]
},
"fields": [
"createdAt",
"user_id",
"_id"
],
"sort": [
{
"createdAt": "desc"
}
],
"limit": 10,
"skip": 0
}
This code work fine inside the cloudant ambient.
My problem is in the Search Index.
I created this function code that works,
function (doc) {
index("specialsearch", doc._id);
if(doc.createdAt){
index("createdAt", doc.createdAt, {"store":true})
}
if(doc.user_id){
index("user_id", doc.user_id, {"store":true})
}
}
result by this url:
// https://[user]-bluemix.cloudant.com/[database]/_design/attributes/_search/by_all?q=*:*&counts=["createdAt"]&limit=2
{
"total_rows": 10,
"bookmark": "xxx",
"rows": [
{
"id": "fb24a15d8fb7cdf12feadac08e7c05dc",
"order": [
1.0,
0
],
"fields": {
"createdAt": "2022-10-24T11:15:24.269Z",
"user_id": "c4bdcb54-3d0a-4b6a-a8a9-aa12e45345f3"
}
},
{
"id": "dad431735986bbf41b1fa3b1cd30cd0f",
"order": [
1.0,
0
],
"fields": {
"createdAt": "2022-10-24T11:07:02.138Z",
"user_id": "76f03307-4497-4a19-a647-8097fa288e77"
}
},
{
"id": "2d277ec3dd8c33da7642b72722aa93ed",
"order": [
1.0,
0
],
"fields": {
"createdAt": "2022-10-24T11:08:24.269Z",
"user_id": "06d67681-e2c4-4ed4-b40a-5a2c5e7e6ed9"
}
}
]
}
but it doesn't return the id sorted by date based on the createdAt and user_id keys.
What I would like is to get a list of an organized search with the index of the createdAt and user_id keys without having to indicate the value; a wildcard type search
Where am I wrong?
I have read several posts and guides but I did not understand how to do it.
Thanks for your help.
You say you want to return a list of id, createdAt and user_id, sorted by createdAt and user_id. And that you want all the documents returned.
If that is the case, what you need to do is simply create a MapReduce view of your data that emits the createdAt and user_id fields in that order, i.e. :
function (doc) {
emit([doc.createdAt, doc.user_id], 1);
}
You don't need to include the document id because that comes for free.
You can then query the view by visiting the URL:
https://<URL>/<database>/_design/<ddoc_name>/_view/<view_name>
You will get all the docs like this:
{"total_rows":3,"offset":0,"rows":[
{"id":"2d277ec3dd8c33da7642b72722aa93ed","key":["2022-10-24T11:08:24.269Z","06d67681-e2c4-4ed4-b40a-5a2c5e7e6ed9"],"value":1},
{"id":"fb24a15d8fb7cdf12feadac08e7c05dc","key":["2022-10-24T11:15:24.269Z","c4bdcb54-3d0a-4b6a-a8a9-aa12e45345f3"],"value":1},
{"id":"606a4d591031c14a8c48fcb4a9541ff0","key":["2022-10-26T09:16:29.472Z","4499c1c2-7507-4707-b0e4-ec83e2d2f34d"],"value":1}
]}

Configure monitor query with limitation on aggeration

I am trying to configure a monitor that looks at data logged by cron jobs.
I want to trigger an alert if a job does stop to log data.
The query using SQL looks something like this:
POST _plugins/_sql/
{
"query" : "SELECT instance, job-id, count(*), max(#timestamp) as newest FROM job-statistics-* where #timestamp > '2022-09-28 00:00:00.000' group BY job-id, instance HAVING newest < '2022-09-28 08:45:00.000'"
}
Using exlplain I converted this to a JSON Query and made the timestamp dynamic:
{
"from": 0,
"size": 0,
"timeout": "1m",
"query": {
"range": {
"#timestamp": {
"from": "now-1h",
"to": null,
"include_lower": false,
"include_upper": true,
"boost": 1
}
}
},
"sort": [
{
"_doc": {
"order": "asc"
}
}
],
"aggregations": {
"composite_buckets": {
"composite": {
"size": 1000,
"sources": [
{
"job-id": {
"terms": {
"field": "job-id.keyword",
"missing_bucket": true,
"missing_order": "first",
"order": "asc"
}
}
},
{
"instance": {
"terms": {
"field": "instance.keyword",
"missing_bucket": true,
"missing_order": "first",
"order": "asc"
}
}
}
]
},
"aggregations": {
"count(*)": {
"value_count": {
"field": "_index"
}
},
"max(#timestamp)": {
"max": {
"field": "#timestamp"
}
}
}
}
}
}
From this query, the limitation on the aggeration max(#timestmap) is missing.
In the explain response it is here:
"name": "FilterOperator",
"description": {
"conditions": """<(max(#timestamp), cast_to_timestamp("2022-09-28 08:45:00.000"))"""
},
Ideally, this should be max(#timestmap) < now-30m
My question:
How can I integrate this into the query or the monitor?
Is there another way to do this?
Thanks a lot
Marius

Kibana: filter results by aggregated min and max dates from matched id

I want to pass an event_id to Kibana/Elastic Search and find the min and max dates from the #timestamp field for this event_id. Then I want to set the date range to these dates and show all the results. I assume this is doable.
I can get the min and max with this aggregation:
GET /filebeat-*/_search
{
"query": {
"match": {
"event_id": 1234
}
},
"aggs" : {
"min_date": {"min": {"field": "#timestamp" }},
"max_date": {"max": {"field": "#timestamp" }}
}
}
and I can get the results by searching for the specific date range:
GET /filebeat-*/_search
{
"query": {
"bool": {
"filter": {
"range": {"#timestamp": {"gte": "2020-09-11T13:35:35.000Z", "lte": "2020-09-24T20:35:07.000Z"}}
}
}
}
}
how can I combine the two so that I can just change the event_id and have an auto date range type feature?
EDIT:
I can do this:
GET /filebeat-*/_search
{
"query": {
"bool": {
"must": {
"match": {
"event_id": 1234
}
},
"filter": {
"range": {
"#timestamp": {
"lte": "2020-09-25",
"gte": "2020-09-24"
}
}
}
}
},
"aggs": {
"min_date": {
"min": {
"field": "#timestamp"
}
},
"max_date": {
"max": {
"field": "#timestamp"
}
}
}
}
But what I would like to do is something like:
GET /filebeat-*/_search
{
"query": {
"bool": {
"must": {
"match": {
"event_id": 1234
}
},
"filter": {
"range": {
"#timestamp": {
"lte": "max_date",
"gte": "min_date"
}
}
}
}
},
"aggs": {
"min_date": {
"min": {
"field": "#timestamp"
}
},
"max_date": {
"max": {
"field": "#timestamp"
}
}
}
}
But this causes the error: "failed to parse date field [min_date]"
Is it possible to use the aggregated min and max values to define the date range?
Since you have not provided any sample index data, so applying range query on date type field
Adding a working example with index mapping, data, search query, and search result
Index Mapping:
{
"mappings": {
"properties": {
"date": {
"type": "date"
}
}
}
}
Index Data:
{
"date": "2015-02-10",
"event_id":"1234"
}
{
"date": "2015-01-01",
"event_id":"1235"
}
{
"date": "2015-02-01",
"event_id":"1234"
}
{
"date": "2015-02-01",
"event_id":"1235"
}
{
"date": "2015-01-20",
"event_id":"1234"
}
Search Query:
{
"query": {
"bool": {
"must": {
"match": {
"event_id": 1234
}
},
"filter": {
"range": {
"date": {
"lte": "2015-02-15",
"gte": "2015-01-11"
}
}
}
}
},
"aggs": {
"min_date": {
"min": {
"field": "date"
}
},
"max_date": {
"max": {
"field": "date"
}
}
}
}
Search Result:
"hits": {
"total": {
"value": 3,
"relation": "eq"
},
"max_score": 0.44183272,
"hits": [
{
"_index": "stof_64127765",
"_type": "_doc",
"_id": "3",
"_score": 0.44183272,
"_source": {
"date": "2015-02-01",
"event_id": "1234"
}
},
{
"_index": "stof_64127765",
"_type": "_doc",
"_id": "1",
"_score": 0.44183272,
"_source": {
"date": "2015-02-10",
"event_id": "1234"
}
},
{
"_index": "stof_64127765",
"_type": "_doc",
"_id": "5",
"_score": 0.44183272,
"_source": {
"date": "2015-01-20",
"event_id": "1234"
}
}
]
},
"aggregations": {
"max_date": {
"value": 1.4235264E12,
"value_as_string": "2015-02-10T00:00:00.000Z"
},
"min_date": {
"value": 1.421712E12,
"value_as_string": "2015-01-20T00:00:00.000Z"
}
}

Need JOLT spec file for transfer of complex JSON

I have a complex JSON object (I've simplified it for this example) that I cannot figure out the JOLT transform JSON for. Does anybody have any ideas of what the JOLT spec file should be?
Original JSON
[
{
"date": {
"isoDate": "2019-03-22"
},
"application": {
"name": "SiebelProject"
},
"applicationResults": [
{
"reference": {
"name": "Number of Code Lines"
},
"result": {
"value": 44501
}
},
{
"reference": {
"name": "Transferability"
},
"result": {
"grade": 3.1889542208002064
}
}
]
},
{
"date": {
"isoDate": "2019-03-21"
},
"application": {
"name": "SiebelProject"
},
"applicationResults": [
{
"reference": {
"name": "Number of Code Lines"
},
"result": {
"value": 45000
}
},
{
"reference": {
"name": "Transferability"
},
"result": {
"grade": 3.8
}
}
]
}
]
Desired JSON after transformation and sorting by "Name" ASC, "Date" DESC
[
{
"Name": "SiebelProject",
"Date": "2019-03-22",
"Number of Code Lines": 44501,
"Transferability" : 3.1889542208002064
},
{
"Name": "SiebelProject",
"Date": "2019-03-21",
"Number of Code Lines": 45000,
"Transferability" : 3.8
}
]
I couldn't find a way to do the sort (I'm not even sure you can sort descending in JOLT) but here's a spec to do the transform:
[
{
"operation": "shift",
"spec": {
"*": {
"date": {
"isoDate": "[#3].Date"
},
"application": {
"name": "[#3].Name"
},
"applicationResults": {
"*": {
"reference": {
"name": {
"Number of Code Lines": {
"#(3,result.value)": "[#7].Number of Code Lines"
},
"Transferability": {
"#(3,result.grade)": "[#7].Transferability"
}
}
}
}
}
}
}
}
]
After that there are some tools (like jq I think) that could do the sort.

Elasticsearch - query dates without a specified timezone

I have an index with the following mappings - standard format for a date. In the 2nd record below the time specified is actually a local time - but ES treats it as UTC.
Even though ES is internally converting all parsed datetimes to UTC but it must obviously store the original string as well.
My question is whether (and how) it might be possible to query all records for which the scheduledDT value doesn't have the timezone explicitly specified.
{
"curator_v3": {
"mappings": {
"published": {
"analyzer": "classic",
"numeric_detection": true,
"properties": {
"Id": {
"type": "string",
"index": "not_analyzed",
"include_in_all": false
},
"createDT": {
"type": "date",
"format": "dateOptionalTime",
"include_in_all": false
},
"scheduleDT": {
"type": "date",
"format": "dateOptionalTime",
"include_in_all": false
},
"title": {
"type": "string",
"fields": {
"english": {
"type": "string",
"analyzer": "english"
},
"raw": {
"type": "string",
"index": "not_analyzed"
},
"shingle": {
"type": "string",
"analyzer": "shingle"
},
"spanish": {
"type": "string",
"analyzer": "spanish"
}
},
"include_in_all": false
}
}
}
}
}
}
We use .NET as our client to ElasticSearch and haven't been consistent in specifying a timezone for the scheduleDT field.
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 12,
"successful": 12,
"failed": 0
},
"hits": {
"total": 32,
"max_score": null,
"hits": [
{
"_index": "curator_v3",
"_type": "published",
"_id": "29651227",
"_score": null,
"fields": {
"Id": [
"29651227"
],
"scheduleDT": [
"2015-11-21T22:17:51.0946798-06:00"
],
"title": [
"97 Year-Old Woman Cries Tears Of Joy After Finally Getting Her High School Diploma"
],
"createDT": [
"2015-11-21T22:13:32.3597142-06:00"
]
},
"sort": [
1448165871094
]
},
{
"_index": "curator_v3",
"_type": "published",
"_id": "210466413",
"_score": null,
"fields": {
"Id": [
"210466413"
],
"scheduleDT": [
"2015-11-22T12:00:00"
],
"title": [
"6 KC treats to bring to Thanksgiving"
],
"createDT": [
"2015-11-20T15:08:25.4282-06:00"
]
},
"sort": [
1448193600000
]
}
]
},
"aggregations": {
"ScheduleDT": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 27,
"buckets": [
{
"key": 1448165871094,
"key_as_string": "2015-11-22T04:17:51.094Z",
"doc_count": 1
},
{
"key": 1448193600000,
"key_as_string": "2015-11-22T12:00:00.000Z",
"doc_count": 4
}
]
}
}
}
You can do this by querying the document having a scheduleDT whose field length is less than 20 characters (e.g. 2015-11-22T12:00:00). All the date fields with a specified time zone would be longer.
Something like this should do:
{
"query": {
"filtered": {
"filter": {
"script": {
"script": "doc.scheduleDT.value.size() < 20"
}
}
}
}
}
Note, however, that in order to make your queries easier to create you should always try to convert all your timestamps in UTC before indexing your documents.
Finally, also make sure that you have dynamic scripting enabled in order to run the above query.
UPDATE
Actually, if you use the _source directly in the script it will work because it will return the real value from the source as it was when the document was indexed:
{
"query": {
"filtered": {
"filter": {
"script": {
"script": "_source.scheduleDT.size() < 20"
}
}
}
}
}