Elasticsearch iterate over range query result in nested object - date

I have a elasticsearch index which is having a nested object called availability which has a date and a boolean field , the mapping schema is as follows
{
"hotel_nested" : {
"mappings" : {
"doc" : {
"properties" : {
"amenities" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"availability" : {
"type" : "nested",
"properties" : {
"date" : {
"type" : "date",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
},
"format" : "MM/dd/yyyy HH:mm:ss||MM/dd/yyyy||epoch_millis"
},
"status" : {
"type" : "boolean"
}
}
},
"dailyRate" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"destination" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"hotelName" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"location" : {
"properties" : {
"lat" : {
"type" : "float"
},
"lon" : {
"type" : "float"
}
}
},
"maxOccupancy" : {
"type" : "long"
},
"propertyType" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"roomType" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"starRating" : {
"type" : "float"
}
}
}
}
}
}
Here is one of the object
{
"_index" : "hotel_nested",
"_type" : "doc",
"_id" : "14",
"_score" : 1.0,
"_source" : {
"roomType" : "Executive",
"destination" : "Albuquerque",
"maxOccupancy" : 4,
"starRating" : 6.8,
"hotelName" : "Lotte Hotel Seoul",
"amenities" : [
"Spa",
"Internet",
"Free parking",
"Air conditioning",
"Laundry Service",
"Business Services"
],
"location" : {
"lat" : 56.76755,
"lon" : -110.75792
},
"propertyType" : "Hotel",
"dailyRate" : "$178.96",
"availability" : [
{
"date" : "10/1/2017",
"status" : true
},
{
"date" : "10/2/2017",
"status" : true
},
{
"date" : "10/3/2017",
"status" : true
},
{
"date" : "10/4/2017",
"status" : false
},
{
"date" : "10/5/2017",
"status" : false
},
{
"date" : "10/6/2017",
"status" : false
},
{
"date" : "10/7/2017",
"status" : true
},
{
"date" : "10/8/2017",
"status" : true
},
{
"date" : "10/9/2017",
"status" : false
},
{
"date" : "10/10/2017",
"status" : false
},
{
"date" : "10/11/2017",
"status" : true
},
{
"date" : "10/12/2017",
"status" : false
},
{
"date" : "10/13/2017",
"status" : false
},
{
"date" : "10/14/2017",
"status" : true
},
{
"date" : "10/15/2017",
"status" : true
},
{
"date" : "10/16/2017",
"status" : true
},
{
"date" : "10/17/2017",
"status" : false
},
{
"date" : "10/18/2017",
"status" : true
},
{
"date" : "10/19/2017",
"status" : false
},
{
"date" : "10/20/2017",
"status" : true
},
{
"date" : "10/21/2017",
"status" : false
},
{
"date" : "10/22/2017",
"status" : true
},
{
"date" : "10/23/2017",
"status" : true
},
{
"date" : "10/24/2017",
"status" : true
},
{
"date" : "10/25/2017",
"status" : false
},
{
"date" : "10/26/2017",
"status" : false
},
{
"date" : "10/27/2017",
"status" : false
},
{
"date" : "10/28/2017",
"status" : true
},
{
"date" : "10/29/2017",
"status" : true
},
{
"date" : "10/30/2017",
"status" : true
},
{
"date" : "10/31/2017",
"status" : true
},
{
"date" : "11/1/2017",
"status" : true
},
{
"date" : "11/2/2017",
"status" : false
},
{
"date" : "11/3/2017",
"status" : false
},
{
"date" : "11/4/2017",
"status" : false
},
{
"date" : "11/5/2017",
"status" : false
},
{
"date" : "11/6/2017",
"status" : true
},
{
"date" : "11/7/2017",
"status" : false
},
{
"date" : "11/8/2017",
"status" : false
},
{
"date" : "11/9/2017",
"status" : false
},
{
"date" : "11/10/2017",
"status" : false
},
{
"date" : "11/11/2017",
"status" : false
},
{
"date" : "11/12/2017",
"status" : false
},
{
"date" : "11/13/2017",
"status" : false
},
{
"date" : "11/14/2017",
"status" : true
},
{
"date" : "11/15/2017",
"status" : true
},
{
"date" : "11/16/2017",
"status" : false
},
{
"date" : "11/17/2017",
"status" : true
},
{
"date" : "11/18/2017",
"status" : false
},
{
"date" : "11/19/2017",
"status" : true
},
{
"date" : "11/20/2017",
"status" : true
},
{
"date" : "11/21/2017",
"status" : true
},
{
"date" : "11/22/2017",
"status" : true
},
{
"date" : "11/23/2017",
"status" : false
},
{
"date" : "11/24/2017",
"status" : false
},
{
"date" : "11/25/2017",
"status" : false
},
{
"date" : "11/26/2017",
"status" : true
},
{
"date" : "11/27/2017",
"status" : true
},
{
"date" : "11/28/2017",
"status" : false
},
{
"date" : "11/29/2017",
"status" : false
},
{
"date" : "11/30/2017",
"status" : false
},
{
"date" : "12/1/2017",
"status" : true
},
{
"date" : "12/2/2017",
"status" : true
},
{
"date" : "12/3/2017",
"status" : true
},
{
"date" : "12/4/2017",
"status" : true
},
{
"date" : "12/5/2017",
"status" : true
},
{
"date" : "12/6/2017",
"status" : true
},
{
"date" : "12/7/2017",
"status" : false
},
{
"date" : "12/8/2017",
"status" : false
},
{
"date" : "12/9/2017",
"status" : true
},
{
"date" : "12/10/2017",
"status" : false
},
{
"date" : "12/11/2017",
"status" : true
},
{
"date" : "12/12/2017",
"status" : false
},
{
"date" : "12/13/2017",
"status" : true
},
{
"date" : "12/14/2017",
"status" : true
},
{
"date" : "12/15/2017",
"status" : true
},
{
"date" : "12/16/2017",
"status" : false
},
{
"date" : "12/17/2017",
"status" : true
},
{
"date" : "12/18/2017",
"status" : true
},
{
"date" : "12/19/2017",
"status" : false
},
{
"date" : "12/20/2017",
"status" : true
},
{
"date" : "12/21/2017",
"status" : true
},
{
"date" : "12/22/2017",
"status" : false
},
{
"date" : "12/23/2017",
"status" : false
},
{
"date" : "12/24/2017",
"status" : false
},
{
"date" : "12/25/2017",
"status" : false
},
{
"date" : "12/26/2017",
"status" : false
},
{
"date" : "12/27/2017",
"status" : false
},
{
"date" : "12/28/2017",
"status" : false
},
{
"date" : "12/29/2017",
"status" : false
},
{
"date" : "12/30/2017",
"status" : true
},
{
"date" : "12/31/2017",
"status" : true
}
]
}
}
My issue is that I want to search between two dates and find out if the hotel availability status is true for all dates between the two dates with also matching the city name at the crude level,i have other criteria to search also which i think i can handle,but finding true status for all dates between two dates is bugging me
eg search query parameters city Albuquerque,start date 10/22/2017 and end date 10/24/2017
I have created a query which is giving undesired results, my query is
{
"query": {
"bool": {
"must":[
{
"match": {
"destination": {
"query": "Albuquerque" ,
"operator": "and"
}
}
}
],
"filter": {
"bool": {
"must": [ {
"nested": {
"path": "availability",
"query": {
"bool": {
"must": [
{
"range": {
"availability.date":{ "gte": "10/22/2017",
"lte":"10/24/2017"}
}
},
{
"term": {
"availability.status": "true"
}
}
]
}
}
}
}
]
}
}
}
}
}
The first hit of this query is
{
"_index": "hotel_nested",
"_type": "doc",
"_id": "305",
"_score": 4.815987,
"_source": {
"roomType": "Executive",
"destination": "Albuquerque",
"maxOccupancy": 2,
"starRating": 4.2,
"hotelName": "Sheraton San Diego Hotel & Marina",
"amenities": [
"Kitchen",
"Air conditioning",
"Laundry Service",
"Business Services",
"Free parking",
"Spa"
],
"location": {
"lat": 54.92887,
"lon": -101.31256
},
"propertyType": "Hotel",
"dailyRate": "$462.59",
"availability": [
{
"date": "10/1/2017",
"status": false
},
{
"date": "10/2/2017",
"status": true
},
{
"date": "10/3/2017",
"status": false
},
{
"date": "10/4/2017",
"status": true
},
{
"date": "10/5/2017",
"status": true
},
{
"date": "10/6/2017",
"status": true
},
{
"date": "10/7/2017",
"status": true
},
{
"date": "10/8/2017",
"status": false
},
{
"date": "10/9/2017",
"status": false
},
{
"date": "10/10/2017",
"status": true
},
{
"date": "10/11/2017",
"status": true
},
{
"date": "10/12/2017",
"status": false
},
{
"date": "10/13/2017",
"status": true
},
{
"date": "10/14/2017",
"status": false
},
{
"date": "10/15/2017",
"status": true
},
{
"date": "10/16/2017",
"status": false
},
{
"date": "10/17/2017",
"status": true
},
{
"date": "10/18/2017",
"status": false
},
{
"date": "10/19/2017",
"status": false
},
{
"date": "10/20/2017",
"status": true
},
{
"date": "10/21/2017",
"status": true
},
{
"date": "10/22/2017",
"status": true
},
{
"date": "10/23/2017",
"status": false
},
{
"date": "10/24/2017",
"status": false
},
{
"date": "10/25/2017",
"status": false
},
{
"date": "10/26/2017",
"status": true
},
{
"date": "10/27/2017",
"status": true
},
{
"date": "10/28/2017",
"status": false
},
{
"date": "10/29/2017",
"status": true
},
{
"date": "10/30/2017",
"status": false
},
{
"date": "10/31/2017",
"status": false
},
{
"date": "11/1/2017",
"status": false
},
{
"date": "11/2/2017",
"status": false
},
{
"date": "11/3/2017",
"status": false
},
{
"date": "11/4/2017",
"status": false
},
{
"date": "11/5/2017",
"status": false
},
{
"date": "11/6/2017",
"status": true
},
{
"date": "11/7/2017",
"status": false
},
{
"date": "11/8/2017",
"status": true
},
{
"date": "11/9/2017",
"status": true
},
{
"date": "11/10/2017",
"status": true
},
{
"date": "11/11/2017",
"status": true
},
{
"date": "11/12/2017",
"status": false
},
{
"date": "11/13/2017",
"status": true
},
{
"date": "11/14/2017",
"status": false
},
{
"date": "11/15/2017",
"status": true
},
{
"date": "11/16/2017",
"status": true
},
{
"date": "11/17/2017",
"status": false
},
{
"date": "11/18/2017",
"status": true
},
{
"date": "11/19/2017",
"status": false
},
{
"date": "11/20/2017",
"status": true
},
{
"date": "11/21/2017",
"status": false
},
{
"date": "11/22/2017",
"status": false
},
{
"date": "11/23/2017",
"status": false
},
{
"date": "11/24/2017",
"status": false
},
{
"date": "11/25/2017",
"status": false
},
{
"date": "11/26/2017",
"status": false
},
{
"date": "11/27/2017",
"status": false
},
{
"date": "11/28/2017",
"status": false
},
{
"date": "11/29/2017",
"status": false
},
{
"date": "11/30/2017",
"status": true
},
{
"date": "12/1/2017",
"status": false
},
{
"date": "12/2/2017",
"status": false
},
{
"date": "12/3/2017",
"status": false
},
{
"date": "12/4/2017",
"status": true
},
{
"date": "12/5/2017",
"status": true
},
{
"date": "12/6/2017",
"status": false
},
{
"date": "12/7/2017",
"status": true
},
{
"date": "12/8/2017",
"status": false
},
{
"date": "12/9/2017",
"status": true
},
{
"date": "12/10/2017",
"status": true
},
{
"date": "12/11/2017",
"status": false
},
{
"date": "12/12/2017",
"status": true
},
{
"date": "12/13/2017",
"status": false
},
{
"date": "12/14/2017",
"status": true
},
{
"date": "12/15/2017",
"status": false
},
{
"date": "12/16/2017",
"status": true
},
{
"date": "12/17/2017",
"status": true
},
{
"date": "12/18/2017",
"status": false
},
{
"date": "12/19/2017",
"status": false
},
{
"date": "12/20/2017",
"status": false
},
{
"date": "12/21/2017",
"status": true
},
{
"date": "12/22/2017",
"status": true
},
{
"date": "12/23/2017",
"status": false
},
{
"date": "12/24/2017",
"status": false
},
{
"date": "12/25/2017",
"status": true
},
{
"date": "12/26/2017",
"status": false
},
{
"date": "12/27/2017",
"status": true
},
{
"date": "12/28/2017",
"status": false
},
{
"date": "12/29/2017",
"status": false
},
{
"date": "12/30/2017",
"status": true
},
{
"date": "12/31/2017",
"status": false
}
]
}
}
In this hit status for dates 10/23/2017 and 10/24/2017 is false
Can someone guide me in the correct direction or maybe give a example search query to solve this problem

You need to put your nested query under must clause. Add the inner_hits key at the bottom of the nested query AND enable specific _source fields at the head (because we don't care about the rest of the sub-documents who don't match the nested query) as follows:
{
"_source": [
"hotelName",
"dailyRate"
],
"query": {
"bool": {
"must": [
{
"match": {
"destination": {
"query": "Albuquerque",
"operator": "and"
}
}
},
{
"nested": {
"path": "availability",
"query": {
"bool": {
"must": [
{
"range": {
"availability.date": {
"gte": "10/22/2017",
"lte": "10/24/2017"
}
}
},
{
"term": {
"availability.status": "true"
}
}
]
}
},
"inner_hits": {
"size": 10
}
}
}
]
}
}
}
Pay attention that now you'll get only nested-documents that comply with the conditions, e.g for your example the result would be:
{
"_index": "hotel_nested",
"_type": "doc",
"_id": "305",
"_score": 4.815987,
"_source": {
"hotelName": "Sheraton San Diego Hotel & Marina",
"dailyRate": "$462.59"
},
"inner_hits": {
"availability": {
"hits": {
"total": 1,
"hits": [
{
"date": "10/22/2017",
"status": true
}
]
}
}
}
}
So now, you will have to check whether all the days in the range 22-24 in October are present.
One shortcut way is to mark "inner_hits":{"size":0} and just look at the result and check if the number of inner_hits equals to the number of the days in the range of 22-24 in October:
"inner_hits"."availability"."hits"."total" == 3

Related

MongoDB update and insert on an array

I have a mongo document as shown below :
{
"ITEMS": [
{
"BUKRS": "1000",
"GLACC": "476000",
"NETWR": 100
}
],
"EXCEPTIONS": [
{
"CURR": true,
"EVENT": "WEB",
"DATE_TIME": "2020-02-08T22:46:29.219Z"
},
{
"CURR": true,
"EVENT": "WEB",
"DATE_TIME": "2020-02-08T22:46:29.219Z"
},
{
"CURR": false,
"EVENT": "WEB",
"DATE_TIME": "2020-02-08T22:46:29.219Z"
}
]
}
What I need to achieve is in one mongo query, set all EXCEPTIONS.$.CURR:false and insert a new object into EXCEPTIONS array with mostly CURR : true.
You can do it in one DB call using .bulkWrite() for MongoDB version's >= 3.2 :
db.collection.bulkWrite([{
updateOne: {
"filter": {}, // If you've any filters to match a doc give it here
"update": { $set: { "EXCEPTIONS.$[].CURR": false } } // $[] operator updates all elements in an array.
}
}, {
updateOne: {
"filter": {},
"update": {
$push: {
"EXCEPTIONS": {
"CURR": true,
"EVENT": "WEB",
"DATE_TIME": "New_Time"
}
}
}
}
}])
Document in DB :
/* 1 */
{
"_id" : ObjectId("5e475da77f8bc30a7504f0b6"),
"ITEMS" : [
{
"BUKRS" : "1000",
"GLACC" : "476000",
"NETWR" : 100
}
],
"EXCEPTIONS" : [
{
"CURR" : true,
"EVENT" : "WEB",
"DATE_TIME" : "2020-02-08T22:46:29.219Z"
},
{
"CURR" : true,
"EVENT" : "WEB",
"DATE_TIME" : "2020-02-08T22:46:29.219Z"
},
{
"CURR" : false,
"EVENT" : "WEB",
"DATE_TIME" : "2020-02-08T22:46:29.219Z"
}
]
}
Response :
/* 1 */
{
"acknowledged" : true,
"deletedCount" : 0.0,
"insertedCount" : 0.0,
"matchedCount" : 2.0, // It's 2 as it modified array/doc twice
"upsertedCount" : 0.0,
"insertedIds" : {},
"upsertedIds" : {}
}
Result in DB :
/* 1 */
{
"_id" : ObjectId("5e475da77f8bc30a7504f0b6"),
"ITEMS" : [
{
"BUKRS" : "1000",
"GLACC" : "476000",
"NETWR" : 100
}
],
"EXCEPTIONS" : [
{
"CURR" : false,
"EVENT" : "WEB",
"DATE_TIME" : "2020-02-08T22:46:29.219Z"
},
{
"CURR" : false,
"EVENT" : "WEB",
"DATE_TIME" : "2020-02-08T22:46:29.219Z"
},
{
"CURR" : false,
"EVENT" : "WEB",
"DATE_TIME" : "2020-02-08T22:46:29.219Z"
},
{
"CURR" : true,
"EVENT" : "WEB",
"DATE_TIME" : "New_Time"
}
]
}

MongoDB Aggregation - Creating new arrays from a given array

i wish to create new arrays from a given array based on my aggregation results, the desired outcome from the following doc is to split the array into new arrays where the split point is the action "start"
{
"_id" : "5f851b06b08ab4b1f916c14841d4bbba",
"actions" : [
{
"action" : "start",
"datetime" : 1525692527345.0
},
{
"action" : "scrolled",
"datetime" : 1525692545966.0
},
{
"action" : "scrolled",
"datetime" : 1525692545983.0
},
{
"action" : "click",
"datetime" : 1525692545999.0
},
{
"action" : "start",
"datetime" : 1525693343877.0
},
{
"action" : "mousemove",
"datetime" : 1525693351075.0
},
{
"action" : "mousemove",
"datetime" : 1525693351117.0
},
{
"action" : "scrolled",
"datetime" : 1525693351212.0
},
{
"action": "scrolled",
"datetime": 1525693354026.0
},
{
"action": "scrolled",
"datetime": 1525693354126.0
}
]
}
so next i should have a document that looks like this:
{
"_id": "5f851b06b08ab4b1f916c14841d4bbba",
"session1": [
{
"action": "start",
"datetime": 1525692527345.0
},
{
"action": "scrolled",
"datetime": 1525692545966.0
},
{
"action": "scrolled",
"datetime": 1525692545983.0
},
{
"action": "click",
"datetime": 1525692545999.0
}
],
"session2": [
{
"action": "start",
"datetime": 1525693343877.0
},
{
"action": "mousemove",
"datetime": 1525693351075.0
},
{
"action": "mousemove",
"datetime": 1525693351118.0
},
{
"action": "mousemove",
"datetime": 1525693351119.0
},
{
"action": "mousemove",
"datetime": 1525693351121.0
},
{
"action": "scrolled",
"datetime": 1525693351212.0
},
{
"action": "scrolled",
"datetime": 1525693354026.0
},
{
"action": "scrolled",
"datetime": 1525693354126.0
}
]
}
the session1, session2 field name can be anything it's just for the sake of showing the desired result.
any ideas how to add this to my existing aggregation pipeline?
db.collection.aggregate(
[
{
"$match" : {
"action" : {
"$exists" : true
},
"domain" : "domain.com"
}
},
{
"$unwind" : "$action"
},
{
"$sort" : {
"action.datetime" : 1.0
}
},
{
"$group" : {
"_id" : "$id",
"count" : {
"$sum" : 1.0
},
"actions" : {
"$addToSet" : {
"id" : "$id",
"action" : "$action.action",
"datetime" : "$action.datetime"
}
}
}
},
{
"$unwind" : "$actions"
},
{
"$sort" : {
"actions.datetime" : 1.0
}
},
{
"$group" : {
"_id" : "$actions.id",
"count" : {
"$sum" : 1.0
},
"actions" : {
"$push" : {
"action" : "$actions.action",
"datetime" : "$actions.datetime"
}
}
}
},
{
"$match" : {
"count" : {
"$gt" : 1.0
}
}
}
],
{
"allowDiskUse" : true
}
);
Thanks for looking!

MongoDB Aggregation Pipeline Optimization

I have an aggregation pipeline as mentioned below. Considering that there can be hundreds of thousands of documents on which I want to apply this pipeline, I have questions on how it can be optimized. Below are my questions:
1.) What is the most optimal index that be applied to the match query. Should I have an index = myArray.tags.tagKey, myArray.tags.tagValue, myArray.someRules.count, myArray.entryTime OR should I create separate indexes for each of these fields. What is the best way to do this?
2.) If you see my pipeline, I am applying match operator twice to filter the documents: Once in the beginning of pipeline to get the benefit of my index. Second, I am using it after unwinding operation to filter the specific array elements of the array "myArray". Is there any way I can combine them. In other words, can array elements be filtered using $match aggregation operator?
Here is an example of my document:
{
"_id" : "8001409457639",
"myArray" : [
{
"entryID" : ObjectId("5101ab1116b12614fb083022"),
"requestID" : "adb0d2asdfbe10",
"owner" : "ABC",
"version" : 1,
"requestType" : "1",
"someRules" : {
"count" : 0,
"somethingElse" : 11
},
"entryTime" : ISODate("2015-09-22T19:25:19.014Z"),
"tags" : [
{
"tagKey" : "Owner",
"tagValue" : "ABC"
},
{
"tagKey" : "Request Type",
"tagValue" : "1"
}
]
},
{
"entryID" : ObjectId("5101a111c6b12614fb083022"),
"requestID" : "fc057asdf16480",
"owner" : "ABC",
"version" : 1,
"requestType" : "1",
"someRules" : {
"count" : 10,
"somethingElse" : 0
},
"entryTime" : ISODate("2015-09-22T19:44:26.558Z"),
"tags" : [
{
"tagKey" : "Owner",
"tagValue" : "ABC"
},
{
"tagKey" : "Request Type",
"tagValue" : "1"
}
]
},
{
"entryID" : ObjectId("5101b111c6b12614fb083011"),
"requestID" : "40c7a0ads2dd8c2",
"owner" : "ABC",
"version" : 1,
"requestType" : "1",
"someRules" : {
"count" : 10,
"somethingElse" : 0
},
"entryTime" : ISODate("2015-09-22T20:24:15.347Z"),
"tags" : [
{
"tagKey" : "Owner",
"tagValue" : "ABC"
},
{
"tagKey" : "Request Type",
"tagValue" : "1"
}
]
}
],
"lockAcquiredBy" : "james",
"lockStartTime" : ISODate("2015-11-18T22:36:05.266Z")
}
This is my aggregation pipeline:
[{
"$match": {
"$and": [{
"$or": [{
"myArray.tags": {
"$elemMatch": {
"tagKey": "Owner",
"tagValue": "ABC"
}
}
}, {
"myArray.tags": {
"$elemMatch": {
"tagKey": "Owner",
"tagValue": "DEF"
}
}
}]
}, {
"$or": [{
"myArray.tags": {
"$elemMatch": {
"tagKey": "Request Type",
"tagValue": "4"
}
}
}, {
"myArray.tags": {
"$elemMatch": {
"tagKey": "Request Type",
"tagValue": "Retry"
}
}
}]
}],
"myArray.someRules.count": 0,
"myArray.entryTime": {
"$gte": {
"$date": "2016-01-05T01:59:07.763Z"
}
}
}
}, {
"$unwind": "$myArray"
}, {
"$match": {
"$and": [{
"$or": [{
"myArray.tags": {
"$elemMatch": {
"tagKey": "Owner",
"tagValue": "ABC"
}
}
}, {
"myArray.tags": {
"$elemMatch": {
"tagKey": "Owner",
"tagValue": "DEF"
}
}
}]
}, {
"$or": [{
"myArray.tags": {
"$elemMatch": {
"tagKey": "Request Type",
"tagValue": "4"
}
}
}, {
"myArray.tags": {
"$elemMatch": {
"tagKey": "Request Type",
"tagValue": "Retry"
}
}
}]
}],
"myArray.someRules.count": 0,
"myArray.entryTime": {
"$gte": {
"$date": "2016-01-05T01:59:07.763Z"
}
}
}
}
// More steps in the pipeline.
]

Elasticsearch index operation fails on complex object

I am indexing a data stream to Elasticsearch and I cannot figure out how to normalize incoming data to make it index without error. I have a mapping type "getdatavalues" which is a meta-data query. This meta-data query can return very different looking responses but I'm not seeing the difference. The error I get:
{"index":{"_index":"ens_event-2016.03.11","_type":"getdatavalues","_id":"865800029798177_2016_03_11_03_18_12_100037","status":400,"error":"MapperParsingException[object mapping for [getdatavalues] tried to parse field [output] as object, but got EOF, has a concrete value been provided to it?]"}}
when performing:
curl -XPUT 'http://192.168.99.100:80/es/ens_event-2016.03.11/getdatavalues/865800029798177_2016_03_11_03_18_12_100037' -d '{
"type": "getDataValues",
"input": {
"deviceID": {
"IMEI": "865800029798177",
"serial-number": "64180258"
},
"handle": 644,
"exprCode": "200000010300140000080001005f00a700000000000000",
"noRollHandle": "478669308-578452",
"transactionID": 290
},
"timestamp": "2016-03-11T03:18:12.000Z",
"handle": 644,
"output": {
"noRollPubSessHandle": "478669308-578740",
"publishSessHandle": 1195,
"status": true,
"matchFilter": {
"prefix": "publicExpr.operatorDefined.commercialIdentifier.FoodSvcs.Restaurant.\"A&C Kabul Curry\".\"Rooster Street\"",
"argValues": {
"event": "InternationalEvent",
"hasEvent": "anyEvent"
}
},
"transactionID": 290,
"validFor": 50
}
}'
Here's what Elasticsearch has for the mapping:
"getdatavalues" : {
"dynamic_templates" : [ {
"strings" : {
"mapping" : {
"index" : "not_analyzed",
"type" : "string"
},
"match_mapping_type" : "string"
}
} ],
"properties" : {
"handle" : {
"type" : "long"
},
"input" : {
"properties" : {
"deviceID" : {
"properties" : {
"IMEI" : {
"type" : "string",
"index" : "not_analyzed"
},
"serial-number" : {
"type" : "string",
"index" : "not_analyzed"
}
}
},
"exprCode" : {
"type" : "string",
"index" : "not_analyzed"
},
"handle" : {
"type" : "long"
},
"noRollHandle" : {
"type" : "string",
"index" : "not_analyzed"
},
"serviceVersion" : {
"type" : "string",
"index" : "not_analyzed"
},
"transactionID" : {
"type" : "long"
}
}
},
"output" : {
"properties" : {
"matchFilter" : {
"properties" : {
"argValues" : {
"properties" : {
"Interests" : {
"type" : "object"
},
"MerchantId" : {
"type" : "string",
"index" : "not_analyzed"
},
"Queue" : {
"type" : "string",
"index" : "not_analyzed"
},
"Vibe" : {
"type" : "string",
"index" : "not_analyzed"
},
"event" : {
"properties" : {
"event" : {
"type" : "string",
"index" : "not_analyzed"
},
"hasEvent" : {
"type" : "string",
"index" : "not_analyzed"
}
}
},
"hasEvent" : {
"type" : "string",
"index" : "not_analyzed"
},
"interests" : {
"type" : "string",
"index" : "not_analyzed"
}
}
},
"prefix" : {
"type" : "string",
"index" : "not_analyzed"
},
"transactionID" : {
"type" : "long"
},
"validFor" : {
"type" : "long"
}
}
},
"noRollPubSessHandle" : {
"type" : "string",
"index" : "not_analyzed"
},
"publishSessHandle" : {
"type" : "long"
},
"status" : {
"type" : "boolean"
},
"transactionID" : {
"type" : "long"
},
"validFor" : {
"type" : "long"
}
}
},
"timestamp" : {
"type" : "date",
"format" : "dateOptionalTime"
},
"type" : {
"type" : "string",
"index" : "not_analyzed"
}
}
},
Looks like the argValues object doesn't quite agree with your mapping:
"argValues": {
"event": "InternationalEvent",
"hasEvent": "anyEvent"
}
Either this:
"argValues": {
"event": {
"event": "InternationalEvent"
},
"hasEvent": "anyEvent"
}
Or this:
"argValues": {
"event": {
"event": "InternationalEvent"
"hasEvent": "anyEvent"
},
}
Would both seem to be valid.

Unexpected results from Elasticsearch

I have some documents stored in ES (by logstash). and the results, when querying ES, do not look right:
The first query (see the queries and the results below) is supposed(meant) to return only documents that do not contain region field.
Even further, based on the result of the first query , obviously there is a document that contains field region, however, the results for second query which should (at least) return a document with region=IN, contains no documents.
Is something wrong with my queries?
How can I investigate where the problem is? (The ES logs do not have anything related to these queries)
Here is the query:
curl -X GET 'http://localhost:9200/logstash*/_search?pretty' -d '{
"query" : {
"match_all" : {}
},
filter : {
"and" : [
{ "term" : { "type" : "xsys" } },
{ "missing" : { "field" : "region" } }
]
}, size: 2
}'
And the result:
{
"took" : 40,
"timed_out" : false,
"_shards" : {
"total" : 90,
"successful" : 90,
"failed" : 0
},
"hits" : {
"total" : 5747,
"max_score" : 1.0,
"hits" : [ {
"_index" : "logstash-2013.09.28",
"_type" : "logs",
"_id" : "UMrz9bwKQgCq__TwBT0WmQ",
"_score" : 1.0,
"_source" : {
.....
"type":"xsys",
....
"region":"IN",
}
}, { ....
} ]
}
}
Furthermore, the result for the following query:
curl -X GET 'http://localhost:9200/logstash*/_search?pretty' -d '{
"query" : { "match_all" : {} },
filter : { "term" : { "region" : "IN" } },
size: 1
}'
is:
{
"took" : 55,
"timed_out" : false,
"_shards" : {
"total" : 90,
"successful" : 90,
"failed" : 0
},
"hits" : {
"total" : 0,
"max_score" : null,
"hits" : [ ]
}
The following mapping is used:
curl -XPUT http://localhost:9200/_template/logstash_per_index -d '
{
"template": "logstash*",
"settings": {
"index.query.default_field": "message",
"index.cache.field.type": "soft",
"index.store.compress.stored": true
},
"mappings": {
"_default_": {
"_all": { "enabled": false },
"properties": {
"message": { "type": "string", "index": "analyzed" },
"#version": { "type": "string", "index": "not_analyzed" },
"#timestamp": { "type": "date", "index": "not_analyzed" },
"type": { "type": "string", "index": "not_analyzed" },
....
"region": { "type": "string", "index": "not_analyzed" },
...
}
}
}
}'
Mapping (what ES has returned - curl -XGET 'http://localhost:9200/logstash-2013.09.28/_mapping):
{
"logstash-2013.09.28":{
"logs":{
"_all":{
"enabled":false
},
"properties":{
"#timestamp":{
"type":"date",
"format":"dateOptionalTime"
},
"#version":{
"type":"string",
"index":"not_analyzed",
"omit_norms":true,
"index_options":"docs"
},
"message":{
"type":"string"
},
"region":{
"type":"string"
},
"type":{
"type":"string",
"index":"not_analyzed",
"omit_norms":true,
"index_options":"docs"
}
}
},
"_default_":{
"_all":{
"enabled":false
},
"properties":{
"#timestamp":{
"type":"date",
"format":"dateOptionalTime"
},
"#version":{
"type":"string",
"index":"not_analyzed",
"omit_norms":true,
"index_options":"docs"
},
"message":{
"type":"string"
},
"type":{
"type":"string",
"index":"not_analyzed",
"omit_norms":true,
"index_options":"docs"
}
}
}
}
}