Why are some of my Google Vision API DOCUMENT_TEXT_DETECTION results' normalizedVertices greater than 1? - rest

I have implemented an OCR invoice solution using Google Vision API DOCUMENT_TEXT_DETECTION service. The software has been in production for a couple months. 99% of the time, the results are pretty accurate. But every once in a while, I get some results containing normalizedVertices with points greater than 1. According to the docs, a normalized vertex should be between 0 and 1. Anyone know why this happens? And how these should be interpreted?
Only thing I have noticed (in conjunction with this issue) is that often the documents producing these results may be rotated. But I don't see why rotation would cause the normalization to be off.
Here's a sample file
{
"fullTextAnnotation": {
"pages": [
{
"property": {
"detectedLanguages": [
{
"languageCode": "en",
"confidence": 0.84
},
{
"languageCode": "da",
"confidence": 0.01
}
]
},
"width": 612,
"height": 792,
"blocks": [
{
"boundingBox": {
"normalizedVertices": [
{
"x": 1.2254902,
"y": 0.79545456
},
{
"x": 1.2254902,
"y": 0.9166667
},
{
"x": 1.1960784,
"y": 0.9166667
},
{
"x": 1.1960784,
"y": 0.79545456
}
]
},
"paragraphs": [
{
"boundingBox": {
"normalizedVertices": [
{
"x": 1.2254902,
"y": 0.79545456
},
{
"x": 1.2254902,
"y": 0.9166667
},
{
"x": 1.1960784,
"y": 0.9166667
},
{
"x": 1.1960784,
"y": 0.79545456
}
]
},
"words": [
{
"property": {
"detectedLanguages": [
{
"languageCode": "en"
}
]
},
"boundingBox": {
"normalizedVertices": [
{
"x": 1.2254902,
"y": 0.79545456
},
{
"x": 1.2254902,
"y": 0.9166667
},
{
"x": 1.1960784,
"y": 0.9166667
},
{
"x": 1.1960784,
"y": 0.79545456
}
]
},
"symbols": [
{
"property": {
"detectedLanguages": [
{
"languageCode": "en"
}
]
},
"text": "I",
"confidence": 0.99
},
{
"property": {
"detectedLanguages": [
{
"languageCode": "en"
}
]
},
"text": "N",
"confidence": 1
},
{
"property": {
"detectedLanguages": [
{
"languageCode": "en"
}
]
},
"text": "V",
"confidence": 0.99
},
{
"property": {
"detectedLanguages": [
{
"languageCode": "en"
}
]
},
"text": "O",
"confidence": 0.99
},
{
"property": {
"detectedLanguages": [
{
"languageCode": "en"
}
]
},
"text": "I",
"confidence": 0.99
},
{
"property": {
"detectedLanguages": [
{
"languageCode": "en"
}
]
},
"text": "C",
"confidence": 0.99
},
{
"property": {
"detectedLanguages": [
{
"languageCode": "en"
}
],
"detectedBreak": {
"type": "LINE_BREAK"
}
},
"text": "E",
"confidence": 1
}
],
"confidence": 0.99
}
],
"confidence": 0.99
}
],
"blockType": "TEXT",
"confidence": 0.99
},...

Related

can't able to store fhir resource in mongodb using asymmetrik mongodb fhir-core-server

i'm running node-fhir-server-mongo(Asymmetrik github repo)..when i put resource using PUT method and it stores in mongodb...everything works fine...but the data is partially stored...when i try to access the data i stored in database it only shows few pieces only...
below code is the data i want to store..
{
"resourceType": "Patient",
"id": "example3",
"text": {
"status": "generated",
},
"identifier": [
{
"use": "usual",
"type": {
"coding": [
{
"system": "http://terminology.hl7.org/CodeSystem/v2-0203",
"code": "MR"
}
]
},
"system": "urn:oid:1.2.36.146.595.217.0.1",
"value": "12345",
"period": {
"start": "2001-05-06"
},
"assigner": {
"display": "Acme Healthcare"
}
}
],
"active": true,
"name": [
{
"use": "official",
"family": "Chalmers",
"given": [
"Peter",
"James"
]
},
{
"use": "usual",
"given": [
"Jim"
]
},
{
"use": "maiden",
"family": "Windsor",
"given": [
"Peter",
"James"
],
"period": {
"end": "2002"
}
}
],
"telecom": [
{
"use": "home"
},
{
"system": "phone",
"value": "(03) 5555 6473",
"use": "work",
"rank": 1
},
{
"system": "phone",
"value": "(03) 3410 5613",
"use": "mobile",
"rank": 2
},
{
"system": "phone",
"value": "(03) 5555 8834",
"use": "old",
"period": {
"end": "2014"
}
}
],
"gender": "male",
"birthDate": "1974-12-25",
"_birthDate": {
"extension": [
{
"url": "http://hl7.org/fhir/StructureDefinition/patient-birthTime",
"valueDateTime": "1974-12-25T14:35:45-05:00"
}
]
},
"deceasedBoolean": false,
"address": [
{
"use": "home",
"type": "both",
"text": "534 Erewhon St PeasantVille, Rainbow, Vic 3999",
"line": [
"534 Erewhon St"
],
"city": "PleasantVille",
"district": "Rainbow",
"state": "Vic",
"postalCode": "3999",
"period": {
"start": "1974-12-25"
}
}
],
"contact": [
{
"relationship": [
{
"coding": [
{
"system": "http://terminology.hl7.org/CodeSystem/v2-0131",
"code": "N"
}
]
}
],
"name": {
"family": "du Marché",
"_family": {
"extension": [
{
"url": "http://hl7.org/fhir/StructureDefinition/humanname-own-prefix",
"valueString": "VV"
}
]
},
"given": [
"Bénédicte"
]
},
"telecom": [
{
"system": "phone",
"value": "+33 (237) 998327"
}
],
"address": {
"use": "home",
"type": "both",
"line": [
"534 Erewhon St"
],
"city": "PleasantVille",
"district": "Rainbow",
"state": "Vic",
"postalCode": "3999",
"period": {
"start": "1974-12-25"
}
},
"gender": "female",
"period": {
"start": "2012"
}
}
],
"managingOrganization": {
"reference": "Organization/1"
} }
and the actual data is stored on the mongodb is....
_id: "example3" id: "example3" meta: Object versionId: "1" lastUpdated: "2022-06-28T08:44:44+00:00" resourceType: "Patient"
if anyone know the answer or how to solve please let me know....thanks in advance!

Time Axis Format with Appsmith and a Custom Fustion Chart

I use a Custom Fusion Chart (Scatter) and I have loaded the data I want to display. I stick to the required x, y format with my data. I would now like to scatter the data along a time axis (x). The import of the data works like a charm, but the axis looks like it just uses the x value as String/Integer. Does anyone have an idea how to properly format the x-axis or the data to be a time axis?
What I also found out that by using regular charts it looks a bit better but the time-stamps do not scale properly across the x-axis (can be seen in the 2nd image).
The json for the custom chart:
{
"type": "scatter",
"dataSource": {
"chart": {
"caption": "Axes",
"subCaption": "Aggregated History",
"baseFont": "Helvetica Neue,Arial",
"xAxisName": "Date",
"yAxisName": "Price",
"theme": "fusion"
},
"categories": [],
"dataset": [
{
"seriesname": "Offer",
"showregressionline": "0",
"data": [
{
"x": "2022-01-20T13:50:00Z",
"y": 105.316
},
{
"x": "2022-02-01T11:16:00Z",
"y": 104.64
},
{
"x": "2022-02-01T11:16:00Z",
"y": 104.64
},
{
"x": "2022-02-01T12:18:00Z",
"y": 104.599
},
{
"x": "2022-02-01T12:18:00Z",
"y": 104.599
},
{
"x": "2022-02-01T12:19:00Z",
"y": 104.564
},
{
"x": "2022-02-01T12:49:00Z",
"y": 104.608
},
{
"x": "2022-02-01T12:49:00Z",
"y": 104.572
},
{
"x": "2022-02-01T13:03:00Z",
"y": 104.56
},
{
"x": "2022-02-01T13:19:00Z",
"y": 104.593
}
]
},
{
"seriesname": "Bid",
"showregressionline": "0",
"data": [
{
"x": "2022-02-14T13:47:00Z",
"y": 102.415
},
{
"x": "2022-02-14T13:47:00Z",
"y": 102.415
},
{
"x": "2022-02-14T13:47:00Z",
"y": 102.415
},
{
"x": "2022-02-14T14:17:00Z",
"y": 102.421
},
{
"x": "2022-02-14T14:17:00Z",
"y": 102.421
},
{
"x": "2022-02-14T14:17:00Z",
"y": 102.421
},
{
"x": "2022-02-14T14:47:00Z",
"y": 102.373
},
{
"x": "2022-02-14T14:47:00Z",
"y": 102.373
},
{
"x": "2022-02-14T14:47:00Z",
"y": 102.373
},
{
"x": "2022-02-14T15:17:00Z",
"y": 102.443
}
]
}
],
"vtrendlines": []
}
}
This worked for me, Basically the scatter chart is having a hard time understanding the date objects directly. To resolve that we will need to use some configs from the fusion chart and also the moment lib to get the right value
Start with adding the category object in the `categories object.
...
"categories": [{
"category": [
{
"x": "{{moment('2022-01-20').unix()}}",
"label": "2022-01-20",
"showverticalline": "0"
}, ...
]
}]
...
notice that the x key value will correspond to the value in your dataset.
Now we do the same for the dataset as well.
...
"dataset": [
{
"seriesname": "Offer",
"showregressionline": "0",
"data": [
{
"x": "{{moment('2022-01-20T13:50:00Z').unix()}}",
"y": 105.316
}, ...
]
}
], ...
the result of this is:
My JS object
var x = {
"type": "scatter",
"dataSource": {
"chart": {
"caption": "Axes",
"subCaption": "Aggregated History",
"baseFont": "Helvetica Neue,Arial",
"xAxisName": "Date",
"yAxisName": "Price",
"theme": "fusion"
},
"categories": [{
"category": [
{
"x": "{{moment('2022-01-20').unix()}}",
"label": "2022-01-20",
"showverticalline": "0"
},
{
"x": "{{moment('2022-02-01').unix()}}",
"label": "2022-02-01",
"showverticalline": "1"
},
{
"x": "{{moment('2022-02-02').unix()}}",
"label": "2022-02-02",
"showverticalline": "1"
},
{
"x": "{{moment('2022-02-03').unix()}}",
"label": "2022-02-03",
"showverticalline": "1"
},
{
"x": "{{moment('2022-02-04').unix()}}",
"label": "2022-02-04",
"showverticalline": "1"
},
{
"x": "{{moment('2022-02-05').unix()}}",
"label": "2022-02-05",
"showverticalline": "1"
},
{
"x": "{{moment('2022-02-06').unix()}}",
"label": "2022-02-06",
"showverticalline": "1"
},
{
"x": "{{moment('2022-02-07').unix()}}",
"label": "2022-02-07",
"showverticalline": "1"
},
{
"x": "{{moment('2022-02-08').unix()}}",
"label": "2022-02-08",
"showverticalline": "1"
},
{
"x": "{{moment('2022-02-09').unix()}}",
"label": "2022-02-09",
"showverticalline": "1"
}
]
}],
"dataset": [
{
"seriesname": "Offer",
"showregressionline": "0",
"data": [
{
"x": "{{moment('2022-01-20T13:50:00Z').unix()}}",
"y": 105.316
},
{
"x": "{{moment('2022-02-01T11:16:00Z').unix()}}",
"y": 104.64
},
{
"x": "{{moment('2022-02-01T11:16:00Z').unix()}}",
"y": 104.64
},
{
"x": "{{moment('2022-02-01T12:18:00Z').unix()}}",
"y": 104.599
},
{
"x": "{{moment('2022-02-01T12:18:00Z').unix()}}",
"y": 104.599
},
{
"x": "{{moment('2022-02-01T12:19:00Z').unix()}}",
"y": 104.564
},
{
"x": "{{moment('2022-02-01T12:49:00Z').unix()}}",
"y": 104.608
},
{
"x": "{{moment('2022-02-01T12:49:00Z').unix()}}",
"y": 104.572
},
{
"x": "{{moment('2022-02-01T13:03:00Z').unix()}}",
"y": 104.56
},
{
"x": "{{moment('2022-02-01T13:19:00Z').unix()}}",
"y": 104.593
}
]
}
],
"vtrendlines": []
}
}
console.log(x)

MongoDB Aggregation Pipeline help (Convert MySQL to Mongo)

I know these have been asked a bunch and I am brand new to MongoDB which means I am struggling. I am using compass and trying to figure out this aggregation pipeline. In short, I need to get the average time difference between the max and min timestamp grouped by id.
The expected result would just be: avg_time: 234.00 or such.
The equivalent MySQL query looks like this:
select SEC_TO_TIME(AVG(TIME_TO_SEC(TIMEDIFF(a.maxDate,a.minDate)))) FROM (select id, min(timestamp) as minDate, max(timestamp) as maxDate from counterHistory group by id) as a
Here is what the dataset looks like:
[{
"_id": {
"$oid": "617dce992743dd52bed811a6"
},
"dateStart": {
"$date": "2021-10-30T23:00:41.056Z"
},
"dateEnd": {
"$date": "2021-10-30T23:00:52.404Z"
},
"areas": {
"c2acc5cc-9a7a-4406-8d91-79cb7f7ded70": {
"color": "yellow",
"type": "rightleft_bottomtop",
"location": {
"point1": {
"x": 1397,
"y": 702
},
"point2": {
"x": 1808,
"y": 645
},
"refResolution": {
"w": 1920,
"h": 969
}
},
"computed": {
"a": 0.15457277801631616,
"b": -332.7843438566361,
"lineBearings": [
81.21317228796154,
261.2131722879615
],
"point1": {
"x": 465.6666666666667,
"y": -260.80495356037153
},
"point2": {
"x": 602.6666666666666,
"y": -239.62848297213623
}
},
"name": "Right line start"
},
"56885eaf-9808-4b5e-b193-06b20e10c39d": {
"color": "turquoise",
"type": "rightleft_bottomtop",
"location": {
"point1": {
"x": 770,
"y": 411
},
"point2": {
"x": 1085,
"y": 360
},
"refResolution": {
"w": 1920,
"h": 969
}
},
"computed": {
"a": 0.18045112781954886,
"b": -199.0092879256966,
"lineBearings": [
79.77099171264048,
259.77099171264047
],
"point1": {
"x": 256.6666666666667,
"y": -152.6934984520124
},
"point2": {
"x": 361.6666666666667,
"y": -133.74613003095976
}
},
"name": "right lane end"
}
},
"videoResolution": {
"w": 640,
"h": 360
},
"filename": "demo.mp4",
"counterSummary": {
"56885eaf-9808-4b5e-b193-06b20e10c39d": {
"_total": 8,
"car": 8
},
"c2acc5cc-9a7a-4406-8d91-79cb7f7ded70": {
"_total": 8,
"car": 7,
"truck": 1
}
},
"trackerSummary": {
"totalItemsTracked": 64
},
"counterHistory": [
{
"frameId": 29,
"timestamp": {
"$date": "2021-10-30T23:00:42.694Z"
},
"area": "c2acc5cc-9a7a-4406-8d91-79cb7f7ded70",
"name": "car",
"id": 408,
"bearing": 291.8014094863518,
"countingDirection": "rightleft_bottomtop",
"angleWithCountingLine": 30.588237198390278
},
{
"frameId": 43,
"timestamp": {
"$date": "2021-10-30T23:00:43.619Z"
},
"area": "c2acc5cc-9a7a-4406-8d91-79cb7f7ded70",
"name": "truck",
"id": 457,
"bearing": 293.1985905136482,
"countingDirection": "rightleft_bottomtop",
"angleWithCountingLine": 31.985418225686644
},
{
"frameId": 50,
"timestamp": {
"$date": "2021-10-30T23:00:44.063Z"
},
"area": "56885eaf-9808-4b5e-b193-06b20e10c39d",
"name": "car",
"id": 421,
"bearing": 303.69006752597977,
"countingDirection": "rightleft_bottomtop",
"angleWithCountingLine": 43.919075813339305
},
{
"frameId": 63,
"timestamp": {
"$date": "2021-10-30T23:00:44.927Z"
},
"area": "c2acc5cc-9a7a-4406-8d91-79cb7f7ded70",
"name": "car",
"id": 458,
"bearing": 293.6293777306568,
"countingDirection": "rightleft_bottomtop",
"angleWithCountingLine": 32.41620544269528
},
{
"frameId": 65,
"timestamp": {
"$date": "2021-10-30T23:00:45.054Z"
},
"area": "56885eaf-9808-4b5e-b193-06b20e10c39d",
"name": "car",
"id": 464,
"bearing": 284.03624346792645,
"countingDirection": "rightleft_bottomtop",
"angleWithCountingLine": 24.265251755286005
},
{
"frameId": 78,
"timestamp": {
"$date": "2021-10-30T23:00:45.888Z"
},
"area": "c2acc5cc-9a7a-4406-8d91-79cb7f7ded70",
"name": "car",
"id": 469,
"bearing": 303.69006752597977,
"countingDirection": "rightleft_bottomtop",
"angleWithCountingLine": 42.47689523801825
},
{
"frameId": 86,
"timestamp": {
"$date": "2021-10-30T23:00:46.415Z"
},
"area": "56885eaf-9808-4b5e-b193-06b20e10c39d",
"name": "car",
"id": 427,
"bearing": 354.28940686250036,
"countingDirection": "rightleft_bottomtop",
"angleWithCountingLine": 85.48158485014012
},
{
"frameId": 122,
"timestamp": {
"$date": "2021-10-30T23:00:48.757Z"
},
"area": "c2acc5cc-9a7a-4406-8d91-79cb7f7ded70",
"name": "car",
"id": 479,
"bearing": 296.565051177078,
"countingDirection": "rightleft_bottomtop",
"angleWithCountingLine": 35.35187888911645
},
{
"frameId": 125,
"timestamp": {
"$date": "2021-10-30T23:00:48.946Z"
},
"area": "56885eaf-9808-4b5e-b193-06b20e10c39d",
"name": "car",
"id": 408,
"bearing": 323.13010235415595,
"countingDirection": "rightleft_bottomtop",
"angleWithCountingLine": 63.359110641515514
},
{
"frameId": 143,
"timestamp": {
"$date": "2021-10-30T23:00:50.143Z"
},
"area": "56885eaf-9808-4b5e-b193-06b20e10c39d",
"name": "car",
"id": 473,
"bearing": 284.03624346792645,
"countingDirection": "rightleft_bottomtop",
"angleWithCountingLine": 24.265251755286005
},
{
"frameId": 152,
"timestamp": {
"$date": "2021-10-30T23:00:50.716Z"
},
"area": "c2acc5cc-9a7a-4406-8d91-79cb7f7ded70",
"name": "car",
"id": 438,
"bearing": 296.565051177078,
"countingDirection": "rightleft_bottomtop",
"angleWithCountingLine": 35.35187888911645
},
{
"frameId": 160,
"timestamp": {
"$date": "2021-10-30T23:00:51.242Z"
},
"area": "56885eaf-9808-4b5e-b193-06b20e10c39d",
"name": "car",
"id": 455,
"bearing": 45,
"countingDirection": "rightleft_bottomtop",
"angleWithCountingLine": 34.77099171264047
},
{
"frameId": 174,
"timestamp": {
"$date": "2021-10-30T23:00:52.149Z"
},
"area": "c2acc5cc-9a7a-4406-8d91-79cb7f7ded70",
"name": "car",
"id": 492,
"bearing": 327.2647737278924,
"countingDirection": "rightleft_bottomtop",
"angleWithCountingLine": 66.05160143993088
},
{
"frameId": 175,
"timestamp": {
"$date": "2021-10-30T23:00:52.212Z"
},
"area": "56885eaf-9808-4b5e-b193-06b20e10c39d",
"name": "car",
"id": 469,
"bearing": 281.30993247402023,
"countingDirection": "rightleft_bottomtop",
"angleWithCountingLine": 21.538940761379738
}
]
}]
Here is my atrocious pipeline that at least gives me the result that I am looking for. So I guess, how do you optimize this?
[{$match: {
_id:ObjectId('617dce992743dd52bed811a6')
}}, {$unwind: {
path: "$counterHistory",
}}, {$group: {
_id: "$counterHistory.id",
maxDate:{$max:"$counterHistory.timestamp"},
minDate:{$min:"$counterHistory.timestamp"}
}}, {$project: {
_id:1,
minDate:1,
maxDate:1,
noMatchingDates:{$ne:["$maxDate","$minDate"]}
}}, {$match: {
noMatchingDates:true
}}, {$group: {
_id: null,
"avg_time": {
"$avg": {
"$subtract": [
{ "$ifNull": [ "$maxDate", 0 ] },
{ "$ifNull": [ "$minDate", 0 ] }
]
}
}
}}, {$project: {
avg_time:1,
hours: { $divide: [ "$avg_time", 3600000 ] },
minutes: { $divide: [ "$avg_time", 60000 ] },
seconds: { $divide: [ "$avg_time", 1000 ] }
}}]
So as I put up above and confirmed by YuTing, this pipeline works:
[{$match: {
_id:ObjectId('617dce992743dd52bed811a6')
}}, {$unwind: {
path: "$counterHistory",
}}, {$group: {
_id: "$counterHistory.id",
maxDate:{$max:"$counterHistory.timestamp"},
minDate:{$min:"$counterHistory.timestamp"}
}}, {$project: {
_id:1,
minDate:1,
maxDate:1,
noMatchingDates:{$ne:["$maxDate","$minDate"]}
}}, {$match: {
noMatchingDates:true
}}, {$group: {
_id: null,
"avg_time": {
"$avg": {
"$subtract": [
{ "$ifNull": [ "$maxDate", 0 ] },
{ "$ifNull": [ "$minDate", 0 ] }
]
}
}
}}, {$project: {
avg_time:1,
hours: { $divide: [ "$avg_time", 3600000 ] },
minutes: { $divide: [ "$avg_time", 60000 ] },
seconds: { $divide: [ "$avg_time", 1000 ] }
}}]

mongodb agregate and filter data

I try to filter some results data from mongodb with mongoose in javascript.
This is my json structure:
{
"name": "john",
"firstname": "doe",
"yearold": 22,
"recipes": [
{
"title": "cheesecake",
"data": [
{
"name": "egg",
"label": "Eggs for",
"value": 6,
"unit": "piece"
},
{
"name": "oil",
"label": "Specific oil",
"unit": "oz",
"value": 0.2
},
{
"name": "flour",
"label": "Wholemel flour",
"value": 450,
"unit": "gr"
}
]
},
{
"title": "cake",
"data": [
{
"name": "egg",
"label": "Eggs for",
"value": 6,
"unit": "piece"
},
{
"name": "flour",
"label": "Wholemel flour",
"value": 500,
"unit": "gr"
},
]
}
]
}
In some case i need to return json data with hiding some values. For example I have a list that specifies all the values ​​to hide
hidekeys=["egg"];
and i would like to get this:
{
"name": "john",
"firstname": "doe",
"yearold": 22,
"recipes": [
{
"title": "cheesecake",
"data": [
{
"name": "egg",
"label": "Eggs for",
"value": #######,
"unit": "piece"
},
{
"name": "oil",
"label": "Specific oil",
"unit": "oz",
"value": 0.2
},
{
"name": "flour",
"label": "Wholemel flour",
"value": 450,
"unit": "gr"
}
]
},
{
"title": "cake",
"data": [
{
"name": "egg",
"label": "Eggs for",
"value": #######,
"unit": "piece"
},
{
"name": "flour",
"label": "Wholemel flour",
"value": 500,
"unit": "gr"
},
]
}
]
}
For each recipe i need to hide ingredient value if it is specified in hidekeys.
I tried something with $project and $cond but it doesnt works
Here's a quick way of how to achieve this using $map
const hidekeys = ["egg"];
db.collection.aggregate([
{
$addFields: {
recipes: {
$map: {
input: "$recipes",
as: "recipe",
in: {
$mergeObjects: [
"$$recipe",
{
data: {
$map: {
input: "$$recipe.data",
as: "datum",
in: {
"$mergeObjects": [
"$$datum",
{
$cond: [
{
"$setIsSubset": [
[
"$$datum.name"
],
hidekeys
]
},
{
value: "#####"
},
{
value: "$$datum.value"
}
]
}
]
}
}
}
}
]
}
}
}
}
}
])
Mongo Playground

Highlighting part of word in elasticsearch

I have made a auto-suggester in elastic search using n-gram tokenizer. Now I want to highlight the user entered character sequence in the auto suggest list. For this purpose I used the highlighter available in elastic search my code is as below but in the output the complete term is being highlighted where am I going wrong.
{
"query": {
"query_string": {
"query": "soft",
"default_field": "competency_display_name"
}
},
"highlight": {
"pre_tags": ["<b>"],
"post_tags": ["</b>"],
"fields": {
"competency_display_name": {}
}
}
}
and the result is
{
"took": 8,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "competency_auto_suggest",
"_type": "competency",
"_id": "4",
"_score": 1,
"_source": {
"review": null,
"competency_title": "Software Development",
"id": 4,
"competency_display_name": "Software Development"
},
"highlight": {
"competency_display_name": [
"<b>Software Development</b>"
]
}
}
]
}
}
mapping
"competency":{
"properties": {
"competency_display_name":{
"type":"string",
"index_analyzer": "index_ngram_analyzer",
"search_analyzer": "search_term_analyzer"
}
}
}
settings
"analysis": {
"filter": {
"ngram_tokenizer": {
"type": "nGram",
"min_gram": "1",
"max_gram": "15",
"token_chars": [ "letter", "digit" ]
}
},
"analyzer": {
"index_ngram_analyzer": {
"type": "custom",
"tokenizer": "keyword",
"filter": [ "ngram_tokenizer", "lowercase" ]
},
"search_term_analyzer": {
"type": "custom",
"tokenizer": "keyword",
"filter": "lowercase"
}
}
}
how to highlight Soft instead of Software Development.
You should use ngram tokenizer instead of ngram filter to highlight in this case.
with_positions_offsets is needed to help highlighting more faster.
Here's the workable settings & mapping :
"analysis": {
"tokenizer": {
"ngram_tokenizer": {
"type": "nGram",
"min_gram": "1",
"max_gram": "15",
"token_chars": [ "letter", "digit" ]
}
},
"analyzer": {
"index_ngram_analyzer": {
"type": "custom",
"tokenizer": "ngram_tokenizer",
"filter": [ "lowercase" ]
},
"search_term_analyzer": {
"type": "custom",
"tokenizer": "keyword",
"filter": "lowercase"
}
}
}
mapping
"competency":{
"properties": {
"competency_display_name":{
"type":"string",
"index_analyzer": "index_ngram_analyzer",
"search_analyzer": "search_term_analyzer",
"term_vector":"with_positions_offsets"
}
}
}