mapping parent/child with _id mongo river elasticsearch

mapping parent/child with _id mongo river elasticsearch - mongodb

I've 3 collections in my database under mongo: shows - venues - dropdowns
shows are mapped like below
"show": {
"properties" : {
"description": {
"type": "string"
},
"image": {
"type": "string"
},
"site": {
"type": "string"
},
"title" : {
"type" : "multi_field",
"fields" : {
"title" : {"type" : "string", "index" : "analyzed"},
"raw_title" : {"type" : "string", "index" : "not_analyzed", "store": "no"}
}
}
}
}
venues like this
"venues": {
"properties" : {
"name" : {
"type" : "multi_field",
"fields" : {
"name" : {"type" : "string", "index" : "analyzed"},
"raw_name" : {"type" : "string", "index" : "not_analyzed", "store": "no"}
}
},
"city" : {
"type" : "multi_field",
"fields" : {
"city" : {"type" : "string", "index" : "analyzed"},
"raw_city" : {"type" : "string", "index" : "not_analyzed", "store": "no"}
}
},
"region" : {
"type" : "multi_field",
"fields" : {
"region" : {"type" : "string", "index" : "analyzed"},
"raw_region" : {"type" : "string", "index" : "not_analyzed", "store": "no"}
}
},
"state" : {
"type": "boolean"
}
}
}
and I've this model in mongo for dropdowns:
{
created: {
type: Date,
default: Date.now
},
analytics: {
type: String,
default: '',
trim: true
},
state: {
type: Boolean,
default: false,
index: true
},
show: {
type: Schema.ObjectId,
ref: 'Show'
},
venues:[{
venue:{
type: Schema.ObjectId,
ref: 'Venue',
index: true
},
site: {
type: String,
trim: true,
index: true
}
}]
}
I'd map dropdowns with parent/child schema into my index, but I can't understand if is possibile with ObjectId because I've tried with this mapping:
"dropdown": {
"properties" : {
"state": {
"type": "boolean"
},
"analytics": {
"type": "string"
},
"_parent":{
"type" : "show"
},
"venues" : {
"properties" : {
"venue" : {
"_parent": {
"type" : "venues"
}
}
},
"site" : {"type" : "string"}
}
}
}
But I received this error:
MapperParsingException[No type specified for property [show]]
There is anyway to setting up correctly my index?

Issue is that you're specifying _parent incorrectly. You have to set it not in properties field, but next to it. Please see documentation and example from it:
PUT /company
{
"mappings": {
"branch": {},
"employee": {
"_parent": {
"type": "branch"
}
}
}
}
So following that logic, I've taken your mappings, simplified it a bit and made it work:
PUT /test
{
"mappings": {
"show": {
"properties": {
"description": {
"type": "string"
},
"image": {
"type": "string"
},
"site": {
"type": "string"
},
"title": {
"type": "multi_field",
"fields": {
"title": {
"type": "string",
"index": "analyzed"
},
"raw_title": {
"type": "string",
"index": "not_analyzed",
"store": "no"
}
}
}
}
},
"venues": {
"properties": {
"name": {
"type": "multi_field",
"fields": {
"name": {
"type": "string",
"index": "analyzed"
},
"raw_name": {
"type": "string",
"index": "not_analyzed",
"store": "no"
}
}
},
"city": {
"type": "multi_field",
"fields": {
"city": {
"type": "string",
"index": "analyzed"
},
"raw_city": {
"type": "string",
"index": "not_analyzed",
"store": "no"
}
}
},
"region": {
"type": "multi_field",
"fields": {
"region": {
"type": "string",
"index": "analyzed"
},
"raw_region": {
"type": "string",
"index": "not_analyzed",
"store": "no"
}
}
},
"state": {
"type": "boolean"
}
}
},
"dropdown": {
"_parent": {
"type": "show"
},
"properties": {
"state": {
"type": "boolean"
},
"analytics": {
"type": "string"
},
"venues": {
"type": "object",
"_parent": {
"type": "venues"
},
"site": {
"type": "string"
}
}
}
}
}
}
I've tried this by myself on Elasticsearch 1.7.1 and it worked fine.
However, I'm not sure if you can declare _parent relationship inside nested documents as you did for venues. My mapping query didn't throw an error and accepted it. However, looking on how it got parsed in head plugin - _parent was eliminated and only object part remained as seen in screenshot:
If I tried to index it without specifying type - this error is thrown:
"MapperParsingException[mapping [dropdown]]; nested:
MapperParsingException[No type specified for property [venues]];

Related

JSON conversion using JOLT

I am trying to convert a JSON to different format using JOLT (using NiFi JoltTransformJson processor). For single JSON record, the JOLT am using is working fine in JOLT app demo whereas if i execute with multiple JSON records then I am not getting expected output in JOLT app demo. Could anyone correct me what additional changes I need to do in JOLT spec to handle multiple JSON records.
sample input json
[
{
"pool": {
"field": [
{
"name": "BillingDay",
"value": "12"
},
{
"name": "Custom1",
"value": "POOL_BASE_PLAN_3GB"
}
]
},
"usage": {
"version": "3",
"quota": {
"name": "POOL_TOP_UP_1GB_2",
"cid": "5764888998010953848"
}
},
"state": {
"version": "1",
"property": [
{
"name": "SMS_RO_TOP",
"value": "1"
},
{
"name": "BillingTimeStamp",
"value": "2020-06-12T01:00:05"
},
{
"name": "timereset",
"value": "2020-01-12T00:35:53"
}
]
}
},
{
"pool": {
"field": [
{
"name": "PoolID",
"value": "111100110000003505209"
},
{
"name": "BillingDay",
"value": "9"
}
]
},
"usage": {
"version": "3"
},
"state": {
"version": "1",
"property": [
{
"name": "BillingTimeStamp",
"value": "2020-06-09T01:00:05"
},
{
"name": "timereset",
"value": "2019-03-20T17:10:38"
}
]
}
}
]
JOLT using:
[
{
"operation": "modify-default-beta",
"spec": {
"state": {
"property": {
"name": "NOTAVAILABLE"
}
},
"usage": {
"quota": {
"name": "NOTAVAILABLE"
}
}
}
},
{
"operation": "shift",
"spec": {
"pool": {
"field": {
"*": {
"value": "pool_item.#(1,name)"
}
}
},
// remaining elements print as it is
"*": "&"
}
}
]
Expected output JSON:
[
{
"pool_item" : {
"BillingDay" : "12",
"Custom1" : "POOL_BASE_PLAN_3GB"
},
"usage" : {
"version" : "3",
"quota" : {
"name" : "POOL_TOP_UP_1GB_2",
"cid" : "5764888998010953848"
}
},
"state" : {
"version" : "1",
"property" : [ {
"name" : "SMS_RO_TOP",
"value" : "1"
}, {
"name" : "BillingTimeStamp",
"value" : "2020-06-12T01:00:05"
}, {
"name" : "timereset",
"value" : "2020-01-12T00:35:53"
} ]
}
},
{
"pool_item" : {
"BillingDay" : "9",
"PoolID" : "111100110000003505209"
},
"usage" : {
"version" : "3",
"quota" : {
"name" : "NOTAVAILABLE"
}
},
"state" : {
"version" : "1",
"property" : [ {
"name" : "SMS_RO_TOP",
"value" : "1"
}, {
"name" : "BillingTimeStamp",
"value" : "2020-06-12T01:00:05"
}, {
"name" : "timereset",
"value" : "2020-01-12T00:35:53"
} ]
}
}
]

This below jolt shift specification will work for your multiple json's in input array.
[
{
"operation": "shift",
"spec": {
"*": {
"pool": {
"field": {
"*": {
"value": "[&4].pool_item.#(1,name)"
}
}
},
"usage": "[&1].usage",
"state": "[&1].state"
}
}
}
]

JOLT spec on adding default values based on a condition

If my input contains "WorkflowCategory" in "metadata", then output should contain workflow.workflowInputProperties with specified default values - having duplicate values (like "" string, 3 etc). If not, workflow.workflowInputProperties should not be added.
Input 1
{
"template": false,
"active": true,
"metadata": [
{
"value": "bank_",
"key": "AssetNamePrefix"
},
{
"value": "-BERG",
"key": "SuffixForPublicId"
},
{
"value": "false",
"key": "CORSEnabled"
},
{
"value": "Capture",
"key": "WorkflowCategory"
},
{
"value": "HD",
"key": "Features"
}
],
"description": "Template for working with PRI",
"name": "prof_name",
"type": "Live",
"id": "BNK056003413",
"version": 6
}
Input 2
{
"template": false,
"active": true,
"metadata": [
{
"value": "HD",
"key": "Features"
}
],
"description": "Live Template",
"name": "Live_HD",
"type": "Live",
"id": "BNK007596994",
"version": 1
}
For Input 1, output should be
{
"id" : "BNK056003413",
"name" : "prof_name",
"metadataSet" : {
"description" : "Template for working with PRI",
"type" : "Live"
},
"workflow" : {
"workflowInputProperties" : {
"assetNamePrefix" : "bank_",
"recordId" : "",
"sourceUri":"",
"processingUri": "",
"recorderType": "ABC",
"completionTimeout": 600
"loopBackTimer": 10,
"numberOfRetries": 3,
"numberOfRetriesForScheduling": 3,
"scheduleDelay" : 3600
},
}
}
For Input 2, output should be as follows, without workflow.workflowInputProperties
{
"id" : "BNK007596994",
"name" : "Live_HD",
"metadataSet" : {
"description" : "Live Template",
"type" : "Live"
}
"features" : "HD"
}

You should add below code to the rest of your spec. The only issue is that I cannot put empty string and I change it into space. I'll try to figure it out.
[
{
"operation": "shift",
"spec": {
"metadata": {
"*": {
"key": {
"WorkflowCategory": {
"#bank_": "workflow.workflowInputProperties.assetNamePrefix",
"# ": [
"workflow.workflowInputProperties.recordId",
"workflow.workflowInputProperties.sourceUri",
"workflow.workflowInputProperties.processingUri"
],
"#ABC": "workflow.workflowInputProperties.recorderType",
"#600": "workflow.workflowInputProperties.completionTimeout",
"#10": "workflow.workflowInputProperties.loopBackTimer",
"#3": ["workflow.workflowInputProperties.numberOfRetries",
"workflow.workflowInputProperties.numberOfRetriesForScheduling"],
"#3600": "workflow.workflowInputProperties.scheduleDelay"
}
}
}
}
}
},
{
"operation": "modify-overwrite-beta",
"spec": {
"workflow": {
"workflowInputProperties": {
"completionTimeout": "=toInteger",
"loopBackTimer": "=toInteger",
"numberOfRetries": "=toInteger",
"numberOfRetriesForScheduling": "=toInteger",
"scheduleDelay": "=toInteger"
}
}
}
}
]

Acumatica Rest API to get sales order with line item detail

I was doing a demo and could easily create and retrieve orders but was not able to get the expand parameter to work
For example
I created order SO003615 with:
URI http://localhost/Acumatica6/entity/Default/6.00.001/SalesOrder
{
"OrderType": { value: "SO" },
"CustomerID" : { value : "ACTIVESTAF" } ,
"LocationID" : { value : "MAIN" },
"Description" : { value : "Sample Order"},
"Details" :
[
{
"InventoryID" : {value: "AACOMPUT01"},
"Quantity" : {value: 2},
"UOM" : {value: "EA"},
"UnitPrice" : {value: 1000.99}
},
{
"InventoryID" : {value: "AALEGO500"},
"Quantity" : {value: 1}
}
]
}
Then I tried to get order with
URI http://localhost/Acumatica6/entity/Default/6.00.001/SalesOrder/SO/SO003615?expand=Details
But the line items are not in the result. What am I missing?
{
"id": "37c15980-f71d-4496-882d-6e05e4a50061",
"rowNumber": 1,
"note": "",
"BillingAddressOverride": {
"value": false
},
"BillingContactOverride": {
"value": false
},
"CreditHold": {
"value": false
},
"Currency": {
"value": "USD"
},
"CustomerID": {
"value": "ACTIVESTAF"
},
"CustomerOrder": {},
"Date": {
"value": "2017-03-16T00:00:00-04:00"
},
"Description": {
"value": "Sample Order 6"
},
"DestinationWarehouseID": {},
"ExternalReference": {},
"Hold": {
"value": false
},
"IsTaxValid": {
"value": false
},
"LastModified": {
"value": "2017-03-17T01:05:56.74-04:00"
},
"LocationID": {
"value": "MAIN"
},
"NewCard": {
"value": false
},
"OrderedQty": {
"value": 3
},
"OrderNbr": {
"value": "SO003615"
},
"OrderTotal": {
"value": 2101.98
},
"OrderType": {
"value": "SO"
},
"PaymentCardIdentifier": {},
"PaymentMethod": {
"value": "CHECK"
},
"PaymentRef": {},
"PreferredWarehouseID": {},
"Project": {
"value": "X"
},
"RequestedOn": {
"value": "2017-03-16T00:00:00-04:00"
},
"ShippingAddressOverride": {
"value": false
},
"ShippingContactOverride": {
"value": false
},
"ShipVia": {},
"Status": {
"value": "Open"
},
"TaxTotal": {
"value": 0
},
"custom": {},
"files": []
}

It is $expand, not expand. Try using the following
http://localhost/Acumatica6/entity/Default/6.00.001/SalesOrder/SO/003615?$expand=Details

ElasticSearch river from Mongo messing up field mappings

I'm using Mongo, Elastic Search and this river plugin: https://github.com/richardwilly98/elasticsearch-river-mongodb
I have successfully set everything up in that the river keeps the ES data updated when Mongo is updated, but the river is straight up copying all the properties from the Mongo documents into ES, but I only want a small sub-set of those records. E.g. if a Mongo doc has 30 properties all of them are getting put into ES instead of only the 5 that I want. I assume the issue is with the mappings, and I've followed several docs and another Stack Overflow thread (curl -X POST -d #mapping.json + mapping not created) but it still is not working for me. Here is what I'm doing:
I'm creating my index with:
curl -XPOST "http://localhost:9200/mongoindex" -d #index.json
index.json:
{
"settings" : {
"number_of_shards" : 1
},
"analysis" : {
"analyzer" : {
"str_search_analyzer" : {
"tokenizer" : "keyword",
"filter" : ["lowercase"]
},
"str_index_analyzer" : {
"tokenizer" : "keyword",
"filter" : ["lowercase", "ngram"]
}
},
"filter" : {
"ngram" : {
"type" : "ngram",
"min_gram" : 2,
"max_gram" : 20
}
}
}
}
Then running:
curl -XPOST "http://localhost:9200/mongoindex/listing/_mapping" -d #mapping.json
With this data:
{
"listing":{
"properties":{
"_all": {
"enabled": false
},
"title": {
"type": "string",
"store": false,
"index": "not_analyzed"
},
"bathrooms": {
"type": "integer",
"store": true,
"index": "analyzed"
},
"bedrooms": {
"type": "integer",
"store": true,
"index": "analyzed"
},
"address": {
"type": "nested",
"include_in_parent": true,
"store": true,
"properties": {
"counrty": {
"type":"string"
},
"city": {
"type":"string"
},
"stateOrProvince": {
"type":"string"
},
"fullStreetAddress": {
"type":"string"
},
"postalCode": {
"type":"string"
}
}
},
"location": {
"type": "geo_point",
"full_name": "geometry.coordiantes",
"store": true
}
}
}
}
Then finally creating the river with:
curl -XPUT "http://localhost:9200/_river/mongoindex/_meta" -d #river.json
river.json:
{
"type": "mongodb",
"mongodb": {
"db": "blueprint",
"collection": "Listing",
"options": {
"secondary_read_preference": true,
"drop_collection": true
}
},
"index": {
"name": "mongoindex",
"type": "listing"
}
}
After all that the river works in that ES is populated, but its a verbatim copy of Mongo right now, and I need to modify the mappings, but it just is not taking effect. What am I missing?
This is what my mapping looks like after the river runs.... nothing like what I want it to look like.

I would set dynamic mapping to false:
The dynamic creation of mappings for unmapped types can be completely
disabled by setting index.mapper.dynamic to false.
http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/mapping-dynamic-mapping.html
Others have had similar issues to yours and it looks like the best solution so far has been to prevent the MongoDB River from dynamically mapping at all:
https://github.com/richardwilly98/elasticsearch-river-mongodb/issues/75

Turns out the issue was that the dynamic property was left out of the mappings config. It should be in 2 places, on the index.json as shown above, and in the mappings.json:
{
"listing":{
"_source": {
"enabled": false
},
"dynamic": false, // <--- Need to add this
"properties":{
"_all": {
"enabled": false
},
"title": {
"type": "string",
"store": false,
"index": "str_index_analyzer"
},
"bathrooms": {
"type": "integer",
"store": true,
"index": "analyzed"
},
"bedrooms": {
"type": "integer",
"store": true,
"index": "analyzed"
},
"address": {
"type": "nested",
"include_in_parent": true,
"store": true,
"properties": {
"counrty": {
"type":"string",
"index": "str_index_analyzer"
},
"city": {
"type":"string",
"index": "str_index_analyzer"
},
"stateOrProvince": {
"type":"string",
"index": "str_index_analyzer"
},
"fullStreetAddress": {
"type":"string",
"index": "str_index_analyzer"
},
"postalCode": {
"type":"string"
}
}
},
"location": {
"type": "geo_point",
"full_name": "geometry.coordiantes",
"store": true
}
}
}
}
The 902 docs vs 451, I think that is an bug in the ElasticSearch Head plugin I'm using to browse documents. It doesn't have duplicates, but a couple of spots show 902 docs as a summary of sorts.

ElasticSearch autocomplete returning 0 hits

I am trying to build an autocomplete feature for our database running on MongoDB. We need to provide autocomplete which lets users complete their queries by offering suggestions while they are typing in the search box.
I have a collection of articles from various sources, which is having the following fields :
{
"title" : "Its the title of a random article",
"cont" : { "paragraphs" : [ .... ] },
and so on..
}
I went through a video by Clinton Gormley. From 37:00 through 42:00 minute, Gormley describes an autocomplete using edgeNGram. Also, I referred to this question to recognize that both are almost the same things, just the mappings differ.
So based on these experiences, I built almost identical settings and mapping and then restored articles collection to ensure that it is indexed by ElasticSearch
The indexing scheme is as follows:
POST /title_autocomplete/title
{
"settings": {
"analysis": {
"filter": {
"autocomplete": {
"type": "edgeNGram",
"min_gram": 2,
"max_gram": 50
}
},
"analyzer": {
"title" : {
"type" : "standard",
"stopwords":[]
},
"autocomplete": {
"type" : "autocomplete",
"tokenizer": "standard",
"filter": ["lowercase", "autocomplete"]
}
}
}
},
"mappings": {
"title": {
"type": "multi_field",
"fields" : {
"title" : {
"type": "string",
"analyzer": "title"
},
"autocomplete" : {
"type": "string",
"index_analyzer": "autocomplete",
"search_analyzer" : "title"
}
}
}
}
}
But when I run the search query, I am unable to get any hits!
GET /title_autocomplete/title/_search
{
"query": {
"bool" : {
"must" : {
"match" : {
"title.autocomplete" : "Its the titl"
}
},
"should" : {
"match" : {
"title" : "Its the titl"
}
}
}
}
}
Can anybody please explain what's wrong with the mapping query or settings? I have been reading ElasticSearch docs for over 7 days now but seem to get nowhere more than full text searches!
ElastiSearch version : 0.90.10
MongoDB version : v2.4.9
using _river
Ubuntu 12.04 64bit
UPDATE
I realised that mapping is screwed after applying previous settings:
GET /title_autocomplete/_mapping
{
"title_autocomplete": {
"title": {
"properties": {
"analysis": {
"properties": {
"analyzer": {
"properties": {
"autocomplete": {
"properties": {
"filter": {
"type": "string"
},
"tokenizer": {
"type": "string"
},
"type": {
"type": "string"
}
}
},
"title": {
"properties": {
"type": {
"type": "string"
}
}
}
}
},
"filter": {
"properties": {
"autocomplete": {
"properties": {
"max_gram": {
"type": "long"
},
"min_gram": {
"type": "long"
},
"type": {
"type": "string"
}
}
}
}
}
}
},
"content": {
... paras and all ...
}
"title": {
"type": "string"
},
"url": {
"type": "string"
}
}
}
}
}
Analyzers and filters are actually mapped into the document after the settings are applied whereas original title field is not affected at all! Is this normal??
I guess this explains why the query is not matching. There is no title.autocomplete field or title.title field at all.
So how should I proceed now?

For those facing this problem, its better to delete the index and start again instead of wasting time with the _river just as DrTech pointed out in the comment.
This saves time but is not a solution. (Therefore not marking it as answer.)

The key is to set up the mappings and index before you initiate the river.
We had an existing setup with a mongodb river and an index called coresearch that we wanted to add autocomplete capacity to, this is the set of commands we used to delete the existing index and river and start again.
Stack is:
ElasticSearch 1.1.1
MongoDB 2.4.9
ElasticSearchMapperAttachments v2.0.0
ElasticSearchRiverMongoDb/2.0.0
Ubuntu 12.04.2 LTS
curl -XDELETE "localhost:9200/_river/node"
curl -XDELETE "localhost:9200/coresearch"
curl -XPUT "localhost:9200/coresearch" -d '
{
"settings": {
"analysis": {
"filter": {
"autocomplete_filter": {
"type": "edge_ngram",
"min_gram": 1,
"max_gram": 20
}
},
"analyzer": {
"autocomplete": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"autocomplete_filter"
]
}
}
}
}
}'
curl -XPUT "localhost:9200/coresearch/_mapping/users" -d '{
"users": {
"properties": {
"firstname": {
"type": "string",
"search_analyzer": "standard",
"index_analyzer": "autocomplete"
},
"lastname": {
"type": "string",
"search_analyzer": "standard",
"index_analyzer": "autocomplete"
},
"username": {
"type": "string",
"search_analyzer": "standard",
"index_analyzer": "autocomplete"
},
"email": {
"type": "string",
"search_analyzer": "standard",
"index_analyzer": "autocomplete"
}
}
}
}'
curl -XPUT "localhost:9200/_river/node/_meta" -d '
{
"type": "mongodb",
"mongodb": {
"servers": [
{ "host": "127.0.0.1", "port": 27017 }
],
"options":{
"exclude_fields": ["time"]
},
"db": "users",
"gridfs": false,
"options": {
"import_all_collections": true
}
},
"index": {
"name": "coresearch",
"type": "documents"
}
}'

We Keep Coding

iphone swift flutter scala powershell matlab mongodb postgresql perl eclipse

mapping parent/child with _id mongo river elasticsearch - mongodb

Related

JSON conversion using JOLT

JOLT spec on adding default values based on a condition

Acumatica Rest API to get sales order with line item detail

ElasticSearch river from Mongo messing up field mappings

ElasticSearch autocomplete returning 0 hits

Categories

Resources