Elasticsearch: Date Aggregation Most Recent - date

I have query that works. It aggregates data based on Id and finds the MOST RECENT object based on the created field. The problem I have is that I would like to find the SECOND MOST RECENT instead of MOST RECENT. How would I go about this? I have been looking all through the docs and all I can find is range which doesn't help me to much. Thank you :)
{
"query":{
"match": {
"name": "Robert"
}
},
"aggs": {
"previous": {
"terms": {
"field": "Id",
"order": {"timeCreated": "desc"}
},
"aggs": {
"timeCreated": {
"max": {"field": "created"}
}
}
}
}
}

Top_hits is what you are looking for. Use this:
{
"query":{
"match": {
"name": "A"
}
},
"aggs": {
"previous": {
"terms": {
"field": "Id"
},
"aggs": {
"latestRecords": {
"top_hits": {
"sort": {
"created": {
"order": "desc"
}
},
"size" :2
}
}
}
}
}
}

Related

Configure monitor query with limitation on aggeration

I am trying to configure a monitor that looks at data logged by cron jobs.
I want to trigger an alert if a job does stop to log data.
The query using SQL looks something like this:
POST _plugins/_sql/
{
"query" : "SELECT instance, job-id, count(*), max(#timestamp) as newest FROM job-statistics-* where #timestamp > '2022-09-28 00:00:00.000' group BY job-id, instance HAVING newest < '2022-09-28 08:45:00.000'"
}
Using exlplain I converted this to a JSON Query and made the timestamp dynamic:
{
"from": 0,
"size": 0,
"timeout": "1m",
"query": {
"range": {
"#timestamp": {
"from": "now-1h",
"to": null,
"include_lower": false,
"include_upper": true,
"boost": 1
}
}
},
"sort": [
{
"_doc": {
"order": "asc"
}
}
],
"aggregations": {
"composite_buckets": {
"composite": {
"size": 1000,
"sources": [
{
"job-id": {
"terms": {
"field": "job-id.keyword",
"missing_bucket": true,
"missing_order": "first",
"order": "asc"
}
}
},
{
"instance": {
"terms": {
"field": "instance.keyword",
"missing_bucket": true,
"missing_order": "first",
"order": "asc"
}
}
}
]
},
"aggregations": {
"count(*)": {
"value_count": {
"field": "_index"
}
},
"max(#timestamp)": {
"max": {
"field": "#timestamp"
}
}
}
}
}
}
From this query, the limitation on the aggeration max(#timestmap) is missing.
In the explain response it is here:
"name": "FilterOperator",
"description": {
"conditions": """<(max(#timestamp), cast_to_timestamp("2022-09-28 08:45:00.000"))"""
},
Ideally, this should be max(#timestmap) < now-30m
My question:
How can I integrate this into the query or the monitor?
Is there another way to do this?
Thanks a lot
Marius

How to add an extra attribute in a sub document in MongoDB

How to update/add sub-document in sub-document in mongodb.
I have sample data like this:
{
"baselineParty": {
"AP": [
{
"party": {
"partyId": {
"value": "12345"
},
}
},
{
"party": {
"partyId": {
"value": "12346"
}
}
},
{
"party": {
"partyId": {
"value": "12347"
}
}
}
]
}
}
I want to add an extra field "baselineParty.AP.party.verifiedStatusYn" to the existing json
Expected result would be like this :
{
"baselineParty": {
"AP": [
{
"party": {
"partyId": {
"value": "12345"
}
},
"verifiedStatusYn": {
"by": "cdd",
"updated": "22",
"value": "yes"
}
},
{
"party": {
"partyId": {
"value": "12346"
}
},
"verifiedStatusYn": {
"by": "cdd",
"updated": "22",
"value": "yes"
}
},
{
"party": {
"partyId": {
"value": "12347"
}
},
"verifiedStatusYn": {
"by": "cdd",
"updated": "22",
"value": "yes"
}
}
]
}
}
I tried using $set but not getting expected result.
Do you guys have a solution for this. Thanks!
You will need $[] to update all entries in the array.
db.collection.update({},
{
$set: {
"baselineParty.AP.$[].party.verifiedStatusYn": {
"by": "cdd",
"updated": "22",
"value": "yes"
}
}
})
Here is the Mongo playground for your reference.

Kibana: filter results by aggregated min and max dates from matched id

I want to pass an event_id to Kibana/Elastic Search and find the min and max dates from the #timestamp field for this event_id. Then I want to set the date range to these dates and show all the results. I assume this is doable.
I can get the min and max with this aggregation:
GET /filebeat-*/_search
{
"query": {
"match": {
"event_id": 1234
}
},
"aggs" : {
"min_date": {"min": {"field": "#timestamp" }},
"max_date": {"max": {"field": "#timestamp" }}
}
}
and I can get the results by searching for the specific date range:
GET /filebeat-*/_search
{
"query": {
"bool": {
"filter": {
"range": {"#timestamp": {"gte": "2020-09-11T13:35:35.000Z", "lte": "2020-09-24T20:35:07.000Z"}}
}
}
}
}
how can I combine the two so that I can just change the event_id and have an auto date range type feature?
EDIT:
I can do this:
GET /filebeat-*/_search
{
"query": {
"bool": {
"must": {
"match": {
"event_id": 1234
}
},
"filter": {
"range": {
"#timestamp": {
"lte": "2020-09-25",
"gte": "2020-09-24"
}
}
}
}
},
"aggs": {
"min_date": {
"min": {
"field": "#timestamp"
}
},
"max_date": {
"max": {
"field": "#timestamp"
}
}
}
}
But what I would like to do is something like:
GET /filebeat-*/_search
{
"query": {
"bool": {
"must": {
"match": {
"event_id": 1234
}
},
"filter": {
"range": {
"#timestamp": {
"lte": "max_date",
"gte": "min_date"
}
}
}
}
},
"aggs": {
"min_date": {
"min": {
"field": "#timestamp"
}
},
"max_date": {
"max": {
"field": "#timestamp"
}
}
}
}
But this causes the error: "failed to parse date field [min_date]"
Is it possible to use the aggregated min and max values to define the date range?
Since you have not provided any sample index data, so applying range query on date type field
Adding a working example with index mapping, data, search query, and search result
Index Mapping:
{
"mappings": {
"properties": {
"date": {
"type": "date"
}
}
}
}
Index Data:
{
"date": "2015-02-10",
"event_id":"1234"
}
{
"date": "2015-01-01",
"event_id":"1235"
}
{
"date": "2015-02-01",
"event_id":"1234"
}
{
"date": "2015-02-01",
"event_id":"1235"
}
{
"date": "2015-01-20",
"event_id":"1234"
}
Search Query:
{
"query": {
"bool": {
"must": {
"match": {
"event_id": 1234
}
},
"filter": {
"range": {
"date": {
"lte": "2015-02-15",
"gte": "2015-01-11"
}
}
}
}
},
"aggs": {
"min_date": {
"min": {
"field": "date"
}
},
"max_date": {
"max": {
"field": "date"
}
}
}
}
Search Result:
"hits": {
"total": {
"value": 3,
"relation": "eq"
},
"max_score": 0.44183272,
"hits": [
{
"_index": "stof_64127765",
"_type": "_doc",
"_id": "3",
"_score": 0.44183272,
"_source": {
"date": "2015-02-01",
"event_id": "1234"
}
},
{
"_index": "stof_64127765",
"_type": "_doc",
"_id": "1",
"_score": 0.44183272,
"_source": {
"date": "2015-02-10",
"event_id": "1234"
}
},
{
"_index": "stof_64127765",
"_type": "_doc",
"_id": "5",
"_score": 0.44183272,
"_source": {
"date": "2015-01-20",
"event_id": "1234"
}
}
]
},
"aggregations": {
"max_date": {
"value": 1.4235264E12,
"value_as_string": "2015-02-10T00:00:00.000Z"
},
"min_date": {
"value": 1.421712E12,
"value_as_string": "2015-01-20T00:00:00.000Z"
}
}

Need JOLT spec file for transfer of complex JSON

I have a complex JSON object (I've simplified it for this example) that I cannot figure out the JOLT transform JSON for. Does anybody have any ideas of what the JOLT spec file should be?
Original JSON
[
{
"date": {
"isoDate": "2019-03-22"
},
"application": {
"name": "SiebelProject"
},
"applicationResults": [
{
"reference": {
"name": "Number of Code Lines"
},
"result": {
"value": 44501
}
},
{
"reference": {
"name": "Transferability"
},
"result": {
"grade": 3.1889542208002064
}
}
]
},
{
"date": {
"isoDate": "2019-03-21"
},
"application": {
"name": "SiebelProject"
},
"applicationResults": [
{
"reference": {
"name": "Number of Code Lines"
},
"result": {
"value": 45000
}
},
{
"reference": {
"name": "Transferability"
},
"result": {
"grade": 3.8
}
}
]
}
]
Desired JSON after transformation and sorting by "Name" ASC, "Date" DESC
[
{
"Name": "SiebelProject",
"Date": "2019-03-22",
"Number of Code Lines": 44501,
"Transferability" : 3.1889542208002064
},
{
"Name": "SiebelProject",
"Date": "2019-03-21",
"Number of Code Lines": 45000,
"Transferability" : 3.8
}
]
I couldn't find a way to do the sort (I'm not even sure you can sort descending in JOLT) but here's a spec to do the transform:
[
{
"operation": "shift",
"spec": {
"*": {
"date": {
"isoDate": "[#3].Date"
},
"application": {
"name": "[#3].Name"
},
"applicationResults": {
"*": {
"reference": {
"name": {
"Number of Code Lines": {
"#(3,result.value)": "[#7].Number of Code Lines"
},
"Transferability": {
"#(3,result.grade)": "[#7].Transferability"
}
}
}
}
}
}
}
}
]
After that there are some tools (like jq I think) that could do the sort.

How to sum two variable in REST API

I want to sum two variable in REST API,and order by it.
This is my REST API:
"aggs": {
"genres": {
"terms": {
"field": "L7_PROTO_NAME.keyword",
"order": {
"sum_bytes": "desc"
}
},
"aggs": {
"in_bytes": {
"sum": {
"field": "IN_BYTES"
}
},
"out_bytes": {
"sum": {
"field": "OUT_BYTES"
}
}
}
thank you in advance!
You need to create another sub-aggregation that sums the two fields and then order the terms aggregation by that sub-aggregation:
{
"query": {
"bool": {
"should": [
{
"term": {
"_index": "logstash-2018.01.02"
}
},
{
"term": {
"IPV4_DST_ADDR": "192.168.0.159"
}
},
{
"term": {
"IPV4_SRC_ADDR": "192.168.0.159"
}
}
]
}
},
"aggs": {
"genres": {
"terms": {
"field": "L7_PROTO_NAME.keyword",
"order": {
"sum_bytes": "desc"
}
},
"aggs": {
"in_bytes": {
"sum": {
"field": "IN_BYTES"
}
},
"out_bytes": {
"sum": {
"field": "OUT_BYTES"
}
},
"sum_bytes": {
"sum": {
"script": {
"source": "doc.IN_BYTES.value + doc.OUT_BYTES.value"
}
}
}
}
}
}
}
Since scripts are quite computation heavy, you should sum those two fields at indexing time and index the result as a new field that you can use directly in your aggregation, like this:
{
"query": {
"bool": {
"should": [
{
"term": {
"_index": "logstash-2018.01.02"
}
},
{
"term": {
"IPV4_DST_ADDR": "192.168.0.159"
}
},
{
"term": {
"IPV4_SRC_ADDR": "192.168.0.159"
}
}
]
}
},
"aggs": {
"genres": {
"terms": {
"field": "L7_PROTO_NAME.keyword",
"order": {
"sum_bytes": "desc"
}
},
"aggs": {
"in_bytes": {
"sum": {
"field": "IN_BYTES"
}
},
"out_bytes": {
"sum": {
"field": "OUT_BYTES"
}
},
"sum_bytes": {
"sum": {
"field": "SUM_BYTES"
}
}
}
}
}
}