Elasticsearch - query dates without a specified timezone - date

I have an index with the following mappings - standard format for a date. In the 2nd record below the time specified is actually a local time - but ES treats it as UTC.
Even though ES is internally converting all parsed datetimes to UTC but it must obviously store the original string as well.
My question is whether (and how) it might be possible to query all records for which the scheduledDT value doesn't have the timezone explicitly specified.
{
"curator_v3": {
"mappings": {
"published": {
"analyzer": "classic",
"numeric_detection": true,
"properties": {
"Id": {
"type": "string",
"index": "not_analyzed",
"include_in_all": false
},
"createDT": {
"type": "date",
"format": "dateOptionalTime",
"include_in_all": false
},
"scheduleDT": {
"type": "date",
"format": "dateOptionalTime",
"include_in_all": false
},
"title": {
"type": "string",
"fields": {
"english": {
"type": "string",
"analyzer": "english"
},
"raw": {
"type": "string",
"index": "not_analyzed"
},
"shingle": {
"type": "string",
"analyzer": "shingle"
},
"spanish": {
"type": "string",
"analyzer": "spanish"
}
},
"include_in_all": false
}
}
}
}
}
}
We use .NET as our client to ElasticSearch and haven't been consistent in specifying a timezone for the scheduleDT field.
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 12,
"successful": 12,
"failed": 0
},
"hits": {
"total": 32,
"max_score": null,
"hits": [
{
"_index": "curator_v3",
"_type": "published",
"_id": "29651227",
"_score": null,
"fields": {
"Id": [
"29651227"
],
"scheduleDT": [
"2015-11-21T22:17:51.0946798-06:00"
],
"title": [
"97 Year-Old Woman Cries Tears Of Joy After Finally Getting Her High School Diploma"
],
"createDT": [
"2015-11-21T22:13:32.3597142-06:00"
]
},
"sort": [
1448165871094
]
},
{
"_index": "curator_v3",
"_type": "published",
"_id": "210466413",
"_score": null,
"fields": {
"Id": [
"210466413"
],
"scheduleDT": [
"2015-11-22T12:00:00"
],
"title": [
"6 KC treats to bring to Thanksgiving"
],
"createDT": [
"2015-11-20T15:08:25.4282-06:00"
]
},
"sort": [
1448193600000
]
}
]
},
"aggregations": {
"ScheduleDT": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 27,
"buckets": [
{
"key": 1448165871094,
"key_as_string": "2015-11-22T04:17:51.094Z",
"doc_count": 1
},
{
"key": 1448193600000,
"key_as_string": "2015-11-22T12:00:00.000Z",
"doc_count": 4
}
]
}
}
}

You can do this by querying the document having a scheduleDT whose field length is less than 20 characters (e.g. 2015-11-22T12:00:00). All the date fields with a specified time zone would be longer.
Something like this should do:
{
"query": {
"filtered": {
"filter": {
"script": {
"script": "doc.scheduleDT.value.size() < 20"
}
}
}
}
}
Note, however, that in order to make your queries easier to create you should always try to convert all your timestamps in UTC before indexing your documents.
Finally, also make sure that you have dynamic scripting enabled in order to run the above query.
UPDATE
Actually, if you use the _source directly in the script it will work because it will return the real value from the source as it was when the document was indexed:
{
"query": {
"filtered": {
"filter": {
"script": {
"script": "_source.scheduleDT.size() < 20"
}
}
}
}
}

Related

Domino 9.x calendar service create meeting

I have been following this guide to work on Domino 9.0.1
Domino Calendar services
I am using JSON and the POST command works but creates an appointment, what I want to do is create a meeting. I have tried setting other fields like event['x-lotus-appttype'].data or event.AppointmentType = 3 but I still get an appointment.
JSON I am sending
{
"events": [
{
"summary":"Meeting 1",
"location":"Location 1",
"start": {
"date":"2013-12-01",
"time":"13:00:00",
"utc":true
},
"end": {
"date":"2013-12-01",
"time":"14:00:00",
"utc":true
}
}
]
}
What is the correct JSON format to create a meeting ?
Take a look at the following documentation: Event with attendees represented in JSON format
EXAMPLE 4. Event with attendees and time zone array:
{
"x-lotus-charset": {
"data": "UTF-8"
},
"timezones": [
{
"tzid": "Eastern",
"standard": {
"start": {
"date": "1950-11-05",
"time": "02:00:00"
},
"offsetFrom": "-0400",
"offsetTo": "-0500",
"recurrenceRule": "FREQ=YEARLY;BYMONTH=11;BYDAY=1SU;BYHOUR=2;BYMINUTE=0"
},
"daylight": {
"start": {
"date": "1950-03-12",
"time": "02:00:00"
},
"offsetFrom": "-0500",
"offsetTo": "-0400",
"recurrenceRule": "FREQ=YEARLY;BYMONTH=3;BYDAY=2SU;BYHOUR=2;BYMINUTE=0"
}
}
],
"events": [
{
"href": "/mail/dlawson.nsf/api/calendar/events/DB7E0BAC21EC322A85257BD200756E26-Lotus_Notes_Generated",
"id": "DB7E0BAC21EC322A85257BD200756E26-Lotus_Notes_Generated",
"summary": "Staff meeting",
"location": "Ray's office",
"description": "Please email your status update 24 hours before the meeting.",
"start": {
"date": "2013-09-12",
"time": "09:00:00",
"tzid": "Eastern"
},
"end": {
"date": "2013-09-12",
"time": "10:00:00",
"tzid": "Eastern"
},
"class": "public",
"transparency": "opaque",
"sequence": 0,
"last-modified": "20130825T212457Z",
"attendees": [
{
"role": "chair",
"status": "accepted",
"rsvp": false,
"displayName": "Duke Lawson/Peaks",
"email": "DukeLawson#swg.usma.ibm.com"
},
{
"role": "req-participant",
"status": "needs-action",
"rsvp": true,
"displayName": "Dean Melnyk/Peaks",
"email": "DeanMelnyk#swg.usma.ibm.com"
},
{
"role": "req-participant",
"status": "needs-action",
"rsvp": true,
"displayName": "Raymond Chan/Peaks",
"email": "RaymondChan#swg.usma.ibm.com"
}
],
"organizer": {
"displayName": "Duke Lawson/Peaks",
"email": "DukeLawson#swg.usma.ibm.com"
},
"x-lotus-broadcast": {
"data": "FALSE"
},
"x-lotus-notesversion": {
"data": "2"
},
"x-lotus-appttype": {
"data": "3"
}
}
]
}
I hope this can help :)

How can I use CloudKit web services to query based on a reference field?

I've got two CloudKit data objects that look somewhat like this:
Parent Object:
{
"records": [
{
"recordName": "14102C0A-60F2-4457-AC1C-601BC628BF47-184-000000012D225C57",
"recordType": "ParentObject",
"fields": {
"fsYear": {
"value": "2015",
"type": "STRING"
},
"displayOrder": {
"value": 2015221153856287200,
"type": "INT64"
},
"fjpFSGuidForReference": {
"value": "14102C0A-60F2-4457-AC1C-601BC628BF47-184-000000012D225C57",
"type": "STRING"
},
"fsDateSearch": {
"value": "2015221153856287158",
"type": "STRING"
},
},
"recordChangeTag": "id4w7ivn",
"created": {
"timestamp": 1439149087571,
"userRecordName": "_0d26968032e31bbc72c213037b6cb35d",
"deviceID": "A19CD995FDA3093781096AF5D818033A241D65C1BFC3D32EC6C5D6B3B4A9AA6B"
},
"modified": {
"timestamp": 1439149087571,
"userRecordName": "_0d26968032e31bbc72c213037b6cb35d",
"deviceID": "A19CD995FDA3093781096AF5D818033A241D65C1BFC3D32EC6C5D6B3B4A9AA6B"
}
}
],
"total":
}
Child Object:
{
"records": [
{
"recordName": "2015221153856287168",
"recordType": "ChildObject",
"fields": {
"District": {
"value": "002",
"type": "STRING"
},
"ZipCode": {
"value": "12345",
"type": "STRING"
},
"InspecReference": {
"value": {
"recordName": "14102C0A-60F2-4457-AC1C-601BC628BF47-184-000000012D225C57",
"action": "NONE",
"zoneID": {
"zoneName": "_defaultZone"
}
},
"type": "REFERENCE"
},
},
"recordChangeTag": "id4w7lew",
"created": {
"timestamp": 1439149090856,
"userRecordName": "_0d26968032e31bbc72c213037b6cb35d",
"deviceID": "A19CD995FDA3093781096AF5D818033A241D65C1BFC3D32EC6C5D6B3B4A9AA6B"
},
"modified": {
"timestamp": 1439149090856,
"userRecordName": "_0d26968032e31bbc72c213037b6cb35d",
"deviceID": "A19CD995FDA3093781096AF5D818033A241D65C1BFC3D32EC6C5D6B3B4A9AA6B"
}
}
],
"total": 1
}
I'm trying to write a query to directly access the CloudKit web service and return the Child Object based on the reference of the parent object.
My test JSON looks something like this:
{"query":{"recordType":"ChildObject","filterBy":{"fieldName":"InspecReference","fieldValue":{ "value" : "14102C0A-60F2-4457-AC1C-601BC628BF47-184-000000012D225C57", "type" : "string" },"comparator":"EQUALS"}},"zoneID":{"zoneName":"_defaultZone"}}
However, I'm getting the following error from CloudKit:
{"uuid":"33db91f3-b768-4a68-9056-216ecc033e9e","serverErrorCode":"BAD_REQUEST","reason":"BadRequestException:
Unexpected input"}
I'm guessing I have the Record Field Dictionary in the query wrong. However, the documentation isn't clear on what this should look like on a reference object.
You have to re-create the actual object of the reference. In this particular case, the JSON looks like this:
{
"query": {
"recordType": "ChildObject",
"filterBy": {
"fieldName": "InspecReference",
"fieldValue": {
"value": {
"recordName": "14102C0A-60F2-4457-AC1C-601BC628BF47-184-000000012D225C57",
"action": "NONE"
},
"type": "REFERENCE"
},
"comparator": "EQUALS"
}
},
"zoneID": {
"zoneName": "_defaultZone"
}
}

Elastic Search: Any way to make space-separated words in a comma-separated list regarded as one term?

I don't know if this is possible, but I'm trying to search by locations with an "exact search" option. There are a couple fields that get searched, with the most important one being the "location_raw" field:
"match": {
"location.location_raw": {
"type": "boolean",
"operator": "AND",
"query": "[location query]",
"analyzer": "standard"
}
}
The location_raw field is a location string with a comma between each place, such as "Sudbury, Middlesex, Massachusetts" or "Leamington, Warwickshire, England". If someone searches for "Sudbury, Middlesex" it gets passed in as
"query": "Sudbury Middlesex"
and both of those terms must exist in the location_raw field. This part works.
The problem is that when the location_raw field contains multi-word location, like New York or Saint George, these get returned when someone searches for "York" or "George." If I do an exact search for "George," I do not want to get results for "Saint George." Is there any way to make Elastic consider "Saint George" one term in the string "Saint George, Stamford, Lincoln, England"?
Here's one way to do it, but you have to query in csv too, or use a terms filter.
I used a pattern analyzer with a simple pattern: ", ". I set up a simple index with a single document:
PUT /test_index
{
"settings": {
"number_of_shards": 1,
"analysis": {
"analyzer": {
"csv": {
"type": "pattern",
"pattern": ", ",
"lowercase": false
}
}
}
},
"mappings": {
"doc": {
"properties": {
"location": {
"type": "string",
"index_analyzer": "csv",
"search_analyzer": "standard",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
}
}
POST /test_index/_bulk
{"index":{"_index":"test_index","_type":"doc","_id":1}}
{"location":"Saint George, Stamford, Lincoln, England"}
I can see the terms generated with a simple terms aggregation:
POST /test_index/_search?search_type=count
{
"aggs": {
"location_terms": {
"terms": {
"field": "location"
}
}
}
}
...
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 0,
"hits": []
},
"aggregations": {
"location_terms": {
"buckets": [
{
"key": "England",
"doc_count": 1
},
{
"key": "Lincoln",
"doc_count": 1
},
{
"key": "Saint George",
"doc_count": 1
},
{
"key": "Stamford",
"doc_count": 1
}
]
}
}
}
And then if I query with the same csv syntax, the document isn't returned for "George, England":
POST /test_index/_search
{
"query": {
"match": {
"location": {
"type": "boolean",
"operator": "AND",
"query": "George, England",
"analyzer": "csv"
}
}
}
}
...
{
"took": 0,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 0,
"max_score": null,
"hits": []
}
}
but is for "Saint George, England":
POST /test_index/_search
{
"query": {
"match": {
"location": {
"type": "boolean",
"operator": "AND",
"query": "Saint George, England",
"analyzer": "csv"
}
}
}
}
...
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 0.2169777,
"hits": [
{
"_index": "test_index",
"_type": "doc",
"_id": "1",
"_score": 0.2169777,
"_source": {
"location": "Saint George, Stamford, Lincoln, England"
}
}
]
}
}
This query is equivalent, and probably more performant:
POST /test_index/_search
{
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"terms": {
"location": [
"Saint George",
"England"
],
"execution": "and"
}
}
}
}
}
Here's the code I used to test it:
http://sense.qbox.io/gist/234ea93accb7b20ad8fd33e62fe92f1d450a51ab

Highlighting part of word in elasticsearch

I have made a auto-suggester in elastic search using n-gram tokenizer. Now I want to highlight the user entered character sequence in the auto suggest list. For this purpose I used the highlighter available in elastic search my code is as below but in the output the complete term is being highlighted where am I going wrong.
{
"query": {
"query_string": {
"query": "soft",
"default_field": "competency_display_name"
}
},
"highlight": {
"pre_tags": ["<b>"],
"post_tags": ["</b>"],
"fields": {
"competency_display_name": {}
}
}
}
and the result is
{
"took": 8,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "competency_auto_suggest",
"_type": "competency",
"_id": "4",
"_score": 1,
"_source": {
"review": null,
"competency_title": "Software Development",
"id": 4,
"competency_display_name": "Software Development"
},
"highlight": {
"competency_display_name": [
"<b>Software Development</b>"
]
}
}
]
}
}
mapping
"competency":{
"properties": {
"competency_display_name":{
"type":"string",
"index_analyzer": "index_ngram_analyzer",
"search_analyzer": "search_term_analyzer"
}
}
}
settings
"analysis": {
"filter": {
"ngram_tokenizer": {
"type": "nGram",
"min_gram": "1",
"max_gram": "15",
"token_chars": [ "letter", "digit" ]
}
},
"analyzer": {
"index_ngram_analyzer": {
"type": "custom",
"tokenizer": "keyword",
"filter": [ "ngram_tokenizer", "lowercase" ]
},
"search_term_analyzer": {
"type": "custom",
"tokenizer": "keyword",
"filter": "lowercase"
}
}
}
how to highlight Soft instead of Software Development.
You should use ngram tokenizer instead of ngram filter to highlight in this case.
with_positions_offsets is needed to help highlighting more faster.
Here's the workable settings & mapping :
"analysis": {
"tokenizer": {
"ngram_tokenizer": {
"type": "nGram",
"min_gram": "1",
"max_gram": "15",
"token_chars": [ "letter", "digit" ]
}
},
"analyzer": {
"index_ngram_analyzer": {
"type": "custom",
"tokenizer": "ngram_tokenizer",
"filter": [ "lowercase" ]
},
"search_term_analyzer": {
"type": "custom",
"tokenizer": "keyword",
"filter": "lowercase"
}
}
}
mapping
"competency":{
"properties": {
"competency_display_name":{
"type":"string",
"index_analyzer": "index_ngram_analyzer",
"search_analyzer": "search_term_analyzer",
"term_vector":"with_positions_offsets"
}
}
}

Get the first document using $in with mongodb

how can get the first element using in in mongo ?
if i've a list like ['car', 'house', 'cat', dog'], and a collection which contains many documents these element, i'd like to find the first document which contain cat, and first which contains dog etc.
I've tried to use limit() but in fact it gives me only one document, which can be either car, or dog or cat etc.
is there a way to combine a limit with $in ?
Thanks
EDIT:
example of data i've:
{
"_id": {
"$oid": "51d53ace9e674607e837d62d"
},
"sensors": [{
"name": "os-hostname",
"value": "yahourt"
}, {
"name": "os-domain-name",
"value": ""
}, {
"name": "os-platform",
"value": "Win32NT"
}, {
"name": "os-fullname",
"value": "Microsoft Windows XP Professional"
}, {
"name": "os-version",
"value": "5.1.2600.131072"
}],
"type": "os",
"serial": "2_os_os-hostname_yahourt"
} {
"_id": {
"$oid": "51d53ace9e674607e837d62e"
},
"sensors": [{
"name": "cpu-id",
"value": "_Total"
}, {
"name": "cpu-usage",
"value": 37.2257042
}],
"type": "cpu",
"serial": "2_cpu_cpu-id_total"
} {
"_id": {
"$oid": "51d53ace9e674607e837d62f"
},
"sensors": [{
"name": "cpu-id",
"value": "0"
}, {
"name": "cpu-usage",
"value": 48.90282
}],
"type": "cpu",
"serial": "2_cpu_cpu-id_0"
} {
"_id": {
"$oid": "51d53ace9e674607e837d630"
},
"sensors": [{
"name": "cpu-id",
"value": "1"
}, {
"name": "cpu-usage",
"value": 25.54859
}],
"type": "cpu",
"serial": "2_cpu_cpu-id_1"
} {
"_id": {
"$oid": "51d53ace9e674607e837d631"
},
"sensors": [{
"name": "volume-name",
"value": "C:"
}, {
"name": "volume-label",
"value": ""
}, {
"name": "volume-total-size",
"value": "52427898880"
}, {
"name": "volume-total-free-space",
"value": "20305170432"
}, {
"name": "volume-percent-free-space",
"value": "38"
}, {
"name": "volume-reads-per-second",
"value": 0.0
}, {
"name": "volume-writes-per-second",
"value": 9.324152
}, {
"name": "volume-read-bytes-per-second",
"value": 0.0
}, {
"name": "volume-write-bytes-per-second",
"value": 194141.6
}, {
"name": "volume-queue-length",
"value": 0.0
}],
"type": "disk",
"serial": "2_disk_volume-name_c"
}
You cannot add a limit to $in but you could cheat by using the aggregation framework:
db.collection.aggregate([
{$match:{serial:{$in:[list_of_serials]}}},
{$sort:{_id:-1}},
{$group:{_id:'$serial',type:{$first:'$type'},sensors:{$first:'$sensors'},id:{$first:'$_id'}}}
]);
Would get a list of all first found of each type.
Edit
The update will get the last inserted according to the _id.