Unexpected results from Elasticsearch - nosql

I have some documents stored in ES (by logstash). and the results, when querying ES, do not look right:
The first query (see the queries and the results below) is supposed(meant) to return only documents that do not contain region field.
Even further, based on the result of the first query , obviously there is a document that contains field region, however, the results for second query which should (at least) return a document with region=IN, contains no documents.
Is something wrong with my queries?
How can I investigate where the problem is? (The ES logs do not have anything related to these queries)
Here is the query:
curl -X GET 'http://localhost:9200/logstash*/_search?pretty' -d '{
"query" : {
"match_all" : {}
},
filter : {
"and" : [
{ "term" : { "type" : "xsys" } },
{ "missing" : { "field" : "region" } }
]
}, size: 2
}'
And the result:
{
"took" : 40,
"timed_out" : false,
"_shards" : {
"total" : 90,
"successful" : 90,
"failed" : 0
},
"hits" : {
"total" : 5747,
"max_score" : 1.0,
"hits" : [ {
"_index" : "logstash-2013.09.28",
"_type" : "logs",
"_id" : "UMrz9bwKQgCq__TwBT0WmQ",
"_score" : 1.0,
"_source" : {
.....
"type":"xsys",
....
"region":"IN",
}
}, { ....
} ]
}
}
Furthermore, the result for the following query:
curl -X GET 'http://localhost:9200/logstash*/_search?pretty' -d '{
"query" : { "match_all" : {} },
filter : { "term" : { "region" : "IN" } },
size: 1
}'
is:
{
"took" : 55,
"timed_out" : false,
"_shards" : {
"total" : 90,
"successful" : 90,
"failed" : 0
},
"hits" : {
"total" : 0,
"max_score" : null,
"hits" : [ ]
}
The following mapping is used:
curl -XPUT http://localhost:9200/_template/logstash_per_index -d '
{
"template": "logstash*",
"settings": {
"index.query.default_field": "message",
"index.cache.field.type": "soft",
"index.store.compress.stored": true
},
"mappings": {
"_default_": {
"_all": { "enabled": false },
"properties": {
"message": { "type": "string", "index": "analyzed" },
"#version": { "type": "string", "index": "not_analyzed" },
"#timestamp": { "type": "date", "index": "not_analyzed" },
"type": { "type": "string", "index": "not_analyzed" },
....
"region": { "type": "string", "index": "not_analyzed" },
...
}
}
}
}'
Mapping (what ES has returned - curl -XGET 'http://localhost:9200/logstash-2013.09.28/_mapping):
{
"logstash-2013.09.28":{
"logs":{
"_all":{
"enabled":false
},
"properties":{
"#timestamp":{
"type":"date",
"format":"dateOptionalTime"
},
"#version":{
"type":"string",
"index":"not_analyzed",
"omit_norms":true,
"index_options":"docs"
},
"message":{
"type":"string"
},
"region":{
"type":"string"
},
"type":{
"type":"string",
"index":"not_analyzed",
"omit_norms":true,
"index_options":"docs"
}
}
},
"_default_":{
"_all":{
"enabled":false
},
"properties":{
"#timestamp":{
"type":"date",
"format":"dateOptionalTime"
},
"#version":{
"type":"string",
"index":"not_analyzed",
"omit_norms":true,
"index_options":"docs"
},
"message":{
"type":"string"
},
"type":{
"type":"string",
"index":"not_analyzed",
"omit_norms":true,
"index_options":"docs"
}
}
}
}
}

Related

Elasticsearch iterate over range query result in nested object

I have a elasticsearch index which is having a nested object called availability which has a date and a boolean field , the mapping schema is as follows
{
"hotel_nested" : {
"mappings" : {
"doc" : {
"properties" : {
"amenities" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"availability" : {
"type" : "nested",
"properties" : {
"date" : {
"type" : "date",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
},
"format" : "MM/dd/yyyy HH:mm:ss||MM/dd/yyyy||epoch_millis"
},
"status" : {
"type" : "boolean"
}
}
},
"dailyRate" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"destination" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"hotelName" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"location" : {
"properties" : {
"lat" : {
"type" : "float"
},
"lon" : {
"type" : "float"
}
}
},
"maxOccupancy" : {
"type" : "long"
},
"propertyType" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"roomType" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"starRating" : {
"type" : "float"
}
}
}
}
}
}
Here is one of the object
{
"_index" : "hotel_nested",
"_type" : "doc",
"_id" : "14",
"_score" : 1.0,
"_source" : {
"roomType" : "Executive",
"destination" : "Albuquerque",
"maxOccupancy" : 4,
"starRating" : 6.8,
"hotelName" : "Lotte Hotel Seoul",
"amenities" : [
"Spa",
"Internet",
"Free parking",
"Air conditioning",
"Laundry Service",
"Business Services"
],
"location" : {
"lat" : 56.76755,
"lon" : -110.75792
},
"propertyType" : "Hotel",
"dailyRate" : "$178.96",
"availability" : [
{
"date" : "10/1/2017",
"status" : true
},
{
"date" : "10/2/2017",
"status" : true
},
{
"date" : "10/3/2017",
"status" : true
},
{
"date" : "10/4/2017",
"status" : false
},
{
"date" : "10/5/2017",
"status" : false
},
{
"date" : "10/6/2017",
"status" : false
},
{
"date" : "10/7/2017",
"status" : true
},
{
"date" : "10/8/2017",
"status" : true
},
{
"date" : "10/9/2017",
"status" : false
},
{
"date" : "10/10/2017",
"status" : false
},
{
"date" : "10/11/2017",
"status" : true
},
{
"date" : "10/12/2017",
"status" : false
},
{
"date" : "10/13/2017",
"status" : false
},
{
"date" : "10/14/2017",
"status" : true
},
{
"date" : "10/15/2017",
"status" : true
},
{
"date" : "10/16/2017",
"status" : true
},
{
"date" : "10/17/2017",
"status" : false
},
{
"date" : "10/18/2017",
"status" : true
},
{
"date" : "10/19/2017",
"status" : false
},
{
"date" : "10/20/2017",
"status" : true
},
{
"date" : "10/21/2017",
"status" : false
},
{
"date" : "10/22/2017",
"status" : true
},
{
"date" : "10/23/2017",
"status" : true
},
{
"date" : "10/24/2017",
"status" : true
},
{
"date" : "10/25/2017",
"status" : false
},
{
"date" : "10/26/2017",
"status" : false
},
{
"date" : "10/27/2017",
"status" : false
},
{
"date" : "10/28/2017",
"status" : true
},
{
"date" : "10/29/2017",
"status" : true
},
{
"date" : "10/30/2017",
"status" : true
},
{
"date" : "10/31/2017",
"status" : true
},
{
"date" : "11/1/2017",
"status" : true
},
{
"date" : "11/2/2017",
"status" : false
},
{
"date" : "11/3/2017",
"status" : false
},
{
"date" : "11/4/2017",
"status" : false
},
{
"date" : "11/5/2017",
"status" : false
},
{
"date" : "11/6/2017",
"status" : true
},
{
"date" : "11/7/2017",
"status" : false
},
{
"date" : "11/8/2017",
"status" : false
},
{
"date" : "11/9/2017",
"status" : false
},
{
"date" : "11/10/2017",
"status" : false
},
{
"date" : "11/11/2017",
"status" : false
},
{
"date" : "11/12/2017",
"status" : false
},
{
"date" : "11/13/2017",
"status" : false
},
{
"date" : "11/14/2017",
"status" : true
},
{
"date" : "11/15/2017",
"status" : true
},
{
"date" : "11/16/2017",
"status" : false
},
{
"date" : "11/17/2017",
"status" : true
},
{
"date" : "11/18/2017",
"status" : false
},
{
"date" : "11/19/2017",
"status" : true
},
{
"date" : "11/20/2017",
"status" : true
},
{
"date" : "11/21/2017",
"status" : true
},
{
"date" : "11/22/2017",
"status" : true
},
{
"date" : "11/23/2017",
"status" : false
},
{
"date" : "11/24/2017",
"status" : false
},
{
"date" : "11/25/2017",
"status" : false
},
{
"date" : "11/26/2017",
"status" : true
},
{
"date" : "11/27/2017",
"status" : true
},
{
"date" : "11/28/2017",
"status" : false
},
{
"date" : "11/29/2017",
"status" : false
},
{
"date" : "11/30/2017",
"status" : false
},
{
"date" : "12/1/2017",
"status" : true
},
{
"date" : "12/2/2017",
"status" : true
},
{
"date" : "12/3/2017",
"status" : true
},
{
"date" : "12/4/2017",
"status" : true
},
{
"date" : "12/5/2017",
"status" : true
},
{
"date" : "12/6/2017",
"status" : true
},
{
"date" : "12/7/2017",
"status" : false
},
{
"date" : "12/8/2017",
"status" : false
},
{
"date" : "12/9/2017",
"status" : true
},
{
"date" : "12/10/2017",
"status" : false
},
{
"date" : "12/11/2017",
"status" : true
},
{
"date" : "12/12/2017",
"status" : false
},
{
"date" : "12/13/2017",
"status" : true
},
{
"date" : "12/14/2017",
"status" : true
},
{
"date" : "12/15/2017",
"status" : true
},
{
"date" : "12/16/2017",
"status" : false
},
{
"date" : "12/17/2017",
"status" : true
},
{
"date" : "12/18/2017",
"status" : true
},
{
"date" : "12/19/2017",
"status" : false
},
{
"date" : "12/20/2017",
"status" : true
},
{
"date" : "12/21/2017",
"status" : true
},
{
"date" : "12/22/2017",
"status" : false
},
{
"date" : "12/23/2017",
"status" : false
},
{
"date" : "12/24/2017",
"status" : false
},
{
"date" : "12/25/2017",
"status" : false
},
{
"date" : "12/26/2017",
"status" : false
},
{
"date" : "12/27/2017",
"status" : false
},
{
"date" : "12/28/2017",
"status" : false
},
{
"date" : "12/29/2017",
"status" : false
},
{
"date" : "12/30/2017",
"status" : true
},
{
"date" : "12/31/2017",
"status" : true
}
]
}
}
My issue is that I want to search between two dates and find out if the hotel availability status is true for all dates between the two dates with also matching the city name at the crude level,i have other criteria to search also which i think i can handle,but finding true status for all dates between two dates is bugging me
eg search query parameters city Albuquerque,start date 10/22/2017 and end date 10/24/2017
I have created a query which is giving undesired results, my query is
{
"query": {
"bool": {
"must":[
{
"match": {
"destination": {
"query": "Albuquerque" ,
"operator": "and"
}
}
}
],
"filter": {
"bool": {
"must": [ {
"nested": {
"path": "availability",
"query": {
"bool": {
"must": [
{
"range": {
"availability.date":{ "gte": "10/22/2017",
"lte":"10/24/2017"}
}
},
{
"term": {
"availability.status": "true"
}
}
]
}
}
}
}
]
}
}
}
}
}
The first hit of this query is
{
"_index": "hotel_nested",
"_type": "doc",
"_id": "305",
"_score": 4.815987,
"_source": {
"roomType": "Executive",
"destination": "Albuquerque",
"maxOccupancy": 2,
"starRating": 4.2,
"hotelName": "Sheraton San Diego Hotel & Marina",
"amenities": [
"Kitchen",
"Air conditioning",
"Laundry Service",
"Business Services",
"Free parking",
"Spa"
],
"location": {
"lat": 54.92887,
"lon": -101.31256
},
"propertyType": "Hotel",
"dailyRate": "$462.59",
"availability": [
{
"date": "10/1/2017",
"status": false
},
{
"date": "10/2/2017",
"status": true
},
{
"date": "10/3/2017",
"status": false
},
{
"date": "10/4/2017",
"status": true
},
{
"date": "10/5/2017",
"status": true
},
{
"date": "10/6/2017",
"status": true
},
{
"date": "10/7/2017",
"status": true
},
{
"date": "10/8/2017",
"status": false
},
{
"date": "10/9/2017",
"status": false
},
{
"date": "10/10/2017",
"status": true
},
{
"date": "10/11/2017",
"status": true
},
{
"date": "10/12/2017",
"status": false
},
{
"date": "10/13/2017",
"status": true
},
{
"date": "10/14/2017",
"status": false
},
{
"date": "10/15/2017",
"status": true
},
{
"date": "10/16/2017",
"status": false
},
{
"date": "10/17/2017",
"status": true
},
{
"date": "10/18/2017",
"status": false
},
{
"date": "10/19/2017",
"status": false
},
{
"date": "10/20/2017",
"status": true
},
{
"date": "10/21/2017",
"status": true
},
{
"date": "10/22/2017",
"status": true
},
{
"date": "10/23/2017",
"status": false
},
{
"date": "10/24/2017",
"status": false
},
{
"date": "10/25/2017",
"status": false
},
{
"date": "10/26/2017",
"status": true
},
{
"date": "10/27/2017",
"status": true
},
{
"date": "10/28/2017",
"status": false
},
{
"date": "10/29/2017",
"status": true
},
{
"date": "10/30/2017",
"status": false
},
{
"date": "10/31/2017",
"status": false
},
{
"date": "11/1/2017",
"status": false
},
{
"date": "11/2/2017",
"status": false
},
{
"date": "11/3/2017",
"status": false
},
{
"date": "11/4/2017",
"status": false
},
{
"date": "11/5/2017",
"status": false
},
{
"date": "11/6/2017",
"status": true
},
{
"date": "11/7/2017",
"status": false
},
{
"date": "11/8/2017",
"status": true
},
{
"date": "11/9/2017",
"status": true
},
{
"date": "11/10/2017",
"status": true
},
{
"date": "11/11/2017",
"status": true
},
{
"date": "11/12/2017",
"status": false
},
{
"date": "11/13/2017",
"status": true
},
{
"date": "11/14/2017",
"status": false
},
{
"date": "11/15/2017",
"status": true
},
{
"date": "11/16/2017",
"status": true
},
{
"date": "11/17/2017",
"status": false
},
{
"date": "11/18/2017",
"status": true
},
{
"date": "11/19/2017",
"status": false
},
{
"date": "11/20/2017",
"status": true
},
{
"date": "11/21/2017",
"status": false
},
{
"date": "11/22/2017",
"status": false
},
{
"date": "11/23/2017",
"status": false
},
{
"date": "11/24/2017",
"status": false
},
{
"date": "11/25/2017",
"status": false
},
{
"date": "11/26/2017",
"status": false
},
{
"date": "11/27/2017",
"status": false
},
{
"date": "11/28/2017",
"status": false
},
{
"date": "11/29/2017",
"status": false
},
{
"date": "11/30/2017",
"status": true
},
{
"date": "12/1/2017",
"status": false
},
{
"date": "12/2/2017",
"status": false
},
{
"date": "12/3/2017",
"status": false
},
{
"date": "12/4/2017",
"status": true
},
{
"date": "12/5/2017",
"status": true
},
{
"date": "12/6/2017",
"status": false
},
{
"date": "12/7/2017",
"status": true
},
{
"date": "12/8/2017",
"status": false
},
{
"date": "12/9/2017",
"status": true
},
{
"date": "12/10/2017",
"status": true
},
{
"date": "12/11/2017",
"status": false
},
{
"date": "12/12/2017",
"status": true
},
{
"date": "12/13/2017",
"status": false
},
{
"date": "12/14/2017",
"status": true
},
{
"date": "12/15/2017",
"status": false
},
{
"date": "12/16/2017",
"status": true
},
{
"date": "12/17/2017",
"status": true
},
{
"date": "12/18/2017",
"status": false
},
{
"date": "12/19/2017",
"status": false
},
{
"date": "12/20/2017",
"status": false
},
{
"date": "12/21/2017",
"status": true
},
{
"date": "12/22/2017",
"status": true
},
{
"date": "12/23/2017",
"status": false
},
{
"date": "12/24/2017",
"status": false
},
{
"date": "12/25/2017",
"status": true
},
{
"date": "12/26/2017",
"status": false
},
{
"date": "12/27/2017",
"status": true
},
{
"date": "12/28/2017",
"status": false
},
{
"date": "12/29/2017",
"status": false
},
{
"date": "12/30/2017",
"status": true
},
{
"date": "12/31/2017",
"status": false
}
]
}
}
In this hit status for dates 10/23/2017 and 10/24/2017 is false
Can someone guide me in the correct direction or maybe give a example search query to solve this problem
You need to put your nested query under must clause. Add the inner_hits key at the bottom of the nested query AND enable specific _source fields at the head (because we don't care about the rest of the sub-documents who don't match the nested query) as follows:
{
"_source": [
"hotelName",
"dailyRate"
],
"query": {
"bool": {
"must": [
{
"match": {
"destination": {
"query": "Albuquerque",
"operator": "and"
}
}
},
{
"nested": {
"path": "availability",
"query": {
"bool": {
"must": [
{
"range": {
"availability.date": {
"gte": "10/22/2017",
"lte": "10/24/2017"
}
}
},
{
"term": {
"availability.status": "true"
}
}
]
}
},
"inner_hits": {
"size": 10
}
}
}
]
}
}
}
Pay attention that now you'll get only nested-documents that comply with the conditions, e.g for your example the result would be:
{
"_index": "hotel_nested",
"_type": "doc",
"_id": "305",
"_score": 4.815987,
"_source": {
"hotelName": "Sheraton San Diego Hotel & Marina",
"dailyRate": "$462.59"
},
"inner_hits": {
"availability": {
"hits": {
"total": 1,
"hits": [
{
"date": "10/22/2017",
"status": true
}
]
}
}
}
}
So now, you will have to check whether all the days in the range 22-24 in October are present.
One shortcut way is to mark "inner_hits":{"size":0} and just look at the result and check if the number of inner_hits equals to the number of the days in the range of 22-24 in October:
"inner_hits"."availability"."hits"."total" == 3

Elasticsearch - Range query doesn't work

To try this error I have tried with Elasticsearch 2.x and 5.x but doesn't work in any of these.
I have lots of logs saved in my Elasticsearch instance. They have a field called timestamp whose format is "YYYY-MM-dd HH-mm-ss.SSS" (for example, "2017-11-02 00:00:00.000"). When I try to send a query via POSTMAN which is this:
{
"query": {
"range": {
"timestamp": {
"gte": "2017-10-21 00:00:00.000",
"lte": "2017-10-27 00:00:00.000"
}
}
}
}
I receive nothing and I there are more than 500 logs in that range. What am I doing wrong?
EDIT:
My index (loganalyzer):
{
"loganalyzer" : {
"aliases" : { },
"mappings" : {
"logs" : {
"properties" : {
"entireLog" : {
"type" : "string"
},
"formattedMessage" : {
"type" : "string"
},
"id" : {
"type" : "string"
},
"level" : {
"type" : "string"
},
"loggerName" : {
"type" : "string"
},
"testNo" : {
"type" : "string"
},
"threadName" : {
"type" : "string"
},
"timestamp" : {
"type" : "string"
}
}
}
},
"settings" : {
"index" : {
"refresh_interval" : "1s",
"number_of_shards" : "5",
"creation_date" : "1507415366223",
"store" : {
"type" : "fs"
},
"number_of_replicas" : "1",
"uuid" : "9w3QQQc0S0K0NcKtOERtTw",
"version" : {
"created" : "2040699"
}
}
},
"warmers" : { }
}
}
What I receive sending the request:
{
"took": 429,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 0,
"max_score": null,
"hits": []
}
}
And status 200 (OK).
Your edit with the mappings indicates the problem. The reason you aren't getting any result is because it's attempting to find a "range" for the string you're providing against the values of the field in your index, which are also treated as a string.
"timestamp" : {
"type" : "string"
}
Here's the elastic documentation on that mapping type
You need to apply a date mapping to that field before indexing, or reindex to a new index that has that mapping applied prior to ingestion.
Here is what the mapping request could look like, conforming to your timestamp format:
PUT loganalyzer
{
"mappings": {
"logs": {
"properties": {
"timestamp": {
"type": "date",
"format": "YYYY-MM-dd HH-mm-ss.SSS"
}
}
}
}
}

Elasticsearch now function problems

I'm having problems trying to get a query working with the "now" function. My current query looks something like this:
{
"query": {
"bool" : {
"must" : [
{ "match": { "originCountry" : "GB" }},
{ "match": { "destinationCity" : "MIL" }}
]
}
},
"filter" : {
"and": {
"filters": [
{
"exists": {"field": "dateBack"}
} ,
{
"script" : {"script" : "doc['originRegion'].value == doc['destinationRegion'].value"}
},
{
"range": {
"dateOut": {
"gte": "now"
}
}
}
]
}
}
}
That's not returning any results. However if I change the range section to a string date like:
"range": {
"dateOut": {
"gte": "20150101"
}
}
It works perfect. In my index mapping all date fields are using the "basic_date" format (YYYYMMDD)
Could be this creating any issues for the now function? Does anyone knows how the now function works? Is it converting the "now" date to whatever date format the field being compared is using? I'be been unable to find any useful documentation about this.
Thanks
Check your date mapping - it should be YYYYMMdd instead of YYYYMMDD
When I set up the mapping:
curl -XPOST http://localhost:9200/index/testnow/_mapping -d '
{"testnow": {
"properties": {
"dateOut": {"type": "date","format" : "YYYYMMdd"},
"dateBack": {"type": "date","format" : "YYYYMMdd"}
}}}'
and post in a couple of docs:
curl -XPOST http://localhost:9200/index/testnow/ -d '
{
"originCountry": "GB",
"destinationCity": "MIL",
"dateBack" : "20140212",
"originRegion" : "X",
"destinationRegion" : "X",
"dateOut" : "20140201"
}'
curl -XPOST http://localhost:9200/index/testnow/ -d '
{
"originCountry": "GB",
"destinationCity": "MIL",
"dateBack" : "20150212",
"originRegion" : "X",
"destinationRegion" : "X",
"dateOut" : "20150201"
}'
and run the query:
curl -XGET http://localhost:9200/index/testnow/_search -d '
{
"query" : {
"filtered" : {
"query": {
"bool" : {
"must" : [
{ "match": { "originCountry" : "GB" }},
{ "match": { "destinationCity" : "MIL" }}
]
}
},
"filter" : {
"and" : [
{"exists": {"field": "dateBack"}},
{"script" : {"script" : "doc[\"originRegion\"].value == doc[\"destinationRegion\"].value"}},
{"range": {"dateOut": {"gte": "now"}}}
]} }}}'
I get back a single document as expected:
{
"took" : 11,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 1,
"max_score" : 1.4142135,
"hits" : [ {
"_index" : "index",
"_type" : "testnow",
"_id" : "AUqgq8u4aqAGLvfmRnfz",
"_score" : 1.4142135,
"_source":
{
"originCountry": "GB",
"destinationCity": "MIL",
"dateBack" : "20150212",
"originRegion" : "X",
"destinationRegion" : "X",
"dateOut" : "20150201"
}
} ]
}
}

Elasticsearch's keyword tokenizer and searching for emails does not really work

I have set up an index like this:
POST /testindex/ -d '
{
"settings": {
"analysis": {
"analyzer": {
"analyzer_keyword": {
"tokenizer": "keyword"
}
}
}
},
"mappings": {
"users": {
"properties": {
"email": {
"analyzer": "analyzer_keyword",
"type": "string"
}
}
}
}
}'
Now I have added some users documents to testindex whereas a user contains an email address. If I want to search for a user document by specifying the email address like the following, it does not really work as expected:
GET /testindex/users/_search
{
"query" : {
"term" : { "email" : "hello#host.com" }
}
}
This query returns 0 results. But if I say "email": "hello" or "email": "host.com" it returns the exact document. But what is wrong with the #? How can I search by the complete email address?
The elasticsearch documentation says:
A tokenizer of type keyword that emits the entire input as a single input. The entire input is hello#host.com.
I also tried uax_url_email tokenizer. Does not work either.
Seems to work fine to me:
curl -XDELETE "localhost:9200/testindex?pretty"
curl -XPOST "localhost:9200/testindex?pretty" -d '
{
"settings": {
"analysis": {
"analyzer": {
"analyzer_keyword": {
"tokenizer": "keyword"
}
}
}
},
"mappings": {
"users": {
"properties": {
"email": {
"analyzer": "analyzer_keyword",
"type": "string"
}
}
}
}
}'
curl -XPOST "localhost:9200/testindex/users?pretty&refresh" -d '{"email": "hello#host.com"}'
curl -XGET "localhost:9200/testindex/users/_search?pretty" -d '{
"query" : {
"term" : { "email" : "hello#host.com" }
}
}'
it returns:
{
"error" : "IndexMissingException[[testindex] missing]",
"status" : 404
}
{
"ok" : true,
"acknowledged" : true
}
{
"ok" : true,
"_index" : "testindex",
"_type" : "users",
"_id" : "GkPG9l83RGyeMyGM9x6ecQ",
"_version" : 1
}
{
"took" : 62,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 1,
"max_score" : 0.30685282,
"hits" : [ {
"_index" : "testindex",
"_type" : "users",
"_id" : "GkPG9l83RGyeMyGM9x6ecQ",
"_score" : 0.30685282, "_source" : {"email": "hello#host.com"}
} ]
}
}
on both 0.90.7 and current master. Did you try to delete the index before changing the mapping?

mapping in create index in elasticsearch through mongodb river is not taking effect

I am trying to index mongodb in elasticsearch using mongodb-river using the following command but the document mapping is not taking effect. It is still using the default analyzer(standard) for field text
Mongodb-river
The document specifies the creation of index but there is no documentation on how to provide custom mapping. This is what I tried. Is there any other documentation where I can find how to specify custom analyzers etc in using mongodb-river.
curl -XPUT "localhost:9200/_river/autocompleteindex/_meta" -d '
{
"type": "mongodb",
"mongodb": {
"host": "rahulg-dc",
"port": "27017",
"db": "qna",
"collection": "autocomplete_questions"
},
"index": {
"name": "autocompleteindex",
"type": "autocomplete_questions",
"analysis" : {
"analyzer" : {
"str_search_analyzer" : {
"tokenizer" : "keyword",
"filter" : ["lowercase"]
},
"str_index_analyzer" : {
"tokenizer" : "keyword",
"filter" : ["lowercase", "ngram"]
}
},
"filter" : {
"ngram" : {
"type" : "ngram",
"min_gram" : 2,
"max_gram" : 20
}
}
}
},
"autocompleteindex": {
"_boost" : {
"name" : "po",
"null_value" : 1.0
},
"properties": {
"po": {
"type": "double"
},
"text": {
"type": "string",
"boost": 3.0,
"search_analyzer" : "str_search_analyzer",
"index_analyzer" : "str_index_analyzer"
}
}
}
}'
The query returns proper results is I search by full words but does not match any substring match. Also, the boost factor is not showing its effect.
What am I doing wrong ??
You have to create first your index with your index settings (analyzer):
"analysis" : {
"analyzer" : {
"str_search_analyzer" : {
"tokenizer" : "keyword",
"filter" : ["lowercase"]
},
"str_index_analyzer" : {
"tokenizer" : "keyword",
"filter" : ["lowercase", "ngram"]
}
},
"filter" : {
"ngram" : {
"type" : "ngram",
"min_gram" : 2,
"max_gram" : 20
}
}
}
Then you can define a mapping for your type:
"autocomplete_questions": {
"_boost" : {
"name" : "po",
"null_value" : 1.0
},
"properties": {
"po": {
"type": "double"
},
"text": {
"type": "string",
"boost": 3.0,
"search_analyzer" : "str_search_analyzer",
"index_analyzer" : "str_index_analyzer"
}
}
}
And only then, you can create the river:
curl -XPUT "localhost:9200/_river/autocompleteindex/_meta" -d '
{
"type": "mongodb",
"mongodb": {
"host": "rahulg-dc",
"port": "27017",
"db": "qna",
"collection": "autocomplete_questions"
},
"index": {
"name": "autocompleteindex",
"type": "autocomplete_questions"} }
Does it help?