Cloudant JSON data into dashdb table joins - ibm-cloud

I have successfully imported some JSON data into cloudant, the JSON data has three levels. Then created the dashdb warehouse from cloudant to put the data into relational tables. It appears that dashdb has created three tables for each of the levels in the JSON data but has not provided me with a Key to join back to the top level. Is there a customisation that is done somewhere that tells dashdb how to join the tables.
A sample JSON doc is below:
{
"_id": "579b56388aa56fd03a4fd0a9",
"_rev": "1-698183d4326352785f213b823749b9f8",
"v": 0,
"startTime": "2016-07-29T12:48:04.204Z",
"endTime": "2016-07-29T13:11:48.962Z",
"userId": "Ranger1",
"uuid": "497568578283117a",
"modes": [
{
"startTime": "2016-07-29T12:54:22.565Z",
"endTime": "2016-07-29T12:54:49.894Z",
"name": "bicycle",
"_id": "579b56388aa56fd03a4fd0b1",
"locations": []
},
{
"startTime": "2016-07-29T12:48:02.477Z",
"endTime": "2016-07-29T12:53:28.503Z",
"name": "walk",
"_id": "579b56388aa56fd03a4fd0ad",
"locations": [
{
"at": "2016-07-29T12:49:05.716Z",
"_id": "579b56388aa56fd03a4fd0b0",
"location": {
"coords": {
"latitude": -34.0418308,
"longitude": 18.3503616,
"accuracy": 37.5,
"speed": 0,
"heading": 0,
"altitude": 0
},
"battery": {
"is_charging": true,
"level": 0.7799999713897705
}
}
},
{
"at": "2016-07-29T12:49:48.488Z",
"_id": "579b56388aa56fd03a4fd0af",
"location": {
"coords": {
"latitude": -34.0418718,
"longitude": 18.3503895,
"accuracy": 33,
"speed": 0,
"heading": 0,
"altitude": 0
},
"battery": {
"is_charging": true,
"level": 0.7799999713897705
}
}
},
{
"at": "2016-07-29T12:50:20.760Z",
"_id": "579b56388aa56fd03a4fd0ae",
"location": {
"coords": {
"latitude": -34.0418788,
"longitude": 18.3503887,
"accuracy": 33,
"speed": 0,
"heading": 0,
"altitude": 0
},
"battery": {
"is_charging": true,
"level": 0.7799999713897705
}
}
}
]
},
{
"startTime": "2016-07-29T12:53:37.137Z",
"endTime": "2016-07-29T12:54:18.505Z",
"name": "carshare",
"_id": "579b56388aa56fd03a4fd0ac",
"locations": []
},
{
"startTime": "2016-07-29T12:54:54.112Z",
"endTime": "2016-07-29T13:11:47.818Z",
"name": "bus",
"_id": "579b56388aa56fd03a4fd0aa",
"locations": [
{
"at": "2016-07-29T13:00:08.039Z",
"_id": "579b56388aa56fd03a4fd0ab",
"location": {
"coords": {
"latitude": -34.0418319,
"longitude": 18.3503623,
"accuracy": 36,
"speed": 0,
"heading": 0,
"altitude": 0
},
"battery": {
"is_charging": false,
"level": 0.800000011920929
}
}
}
]
}
]
}
SQL for the three tables created in dashdb showing all the fields in each table is here. Note there is no FK that I can see, the "_ID" fields are unique to each table.
SELECT ENDTIME,STARTTIME,USERID,UUID,V,"_ID","_REV"
FROM <schemaname>.RANGER_DATA
where "_ID" = '579b56388aa56fd03a4fd0a9'
SELECT ARRAY_INDEX,ENDTIME,NAME,STARTTIME,TOTALPAUSEDMS,"_ID"
FROM <schemaname>.RANGER_DATA_MODES
where "_ID" = '579b56388aa56fd03a4fd0b1'
SELECT ARRAY_INDEX,AT,LOCATION_BATTERY_IS_CHARGING,LOCATION_BATTERY_LEVEL,LOCATION_COORDS_ACCURACY,LOCATION_COORDS_ALTITUDE,LOCATION_COORDS_HEADING,LOCATION_COORDS_LATITUDE,LOCATION_COORDS_LONGITUDE,LOCATION_COORDS_SPEED,RANGER_DATA_MODES,"_ID"
FROM <schemaname>.RANGER_DATA_MODES_LOCATIONS
where "_ID" = '579b56388aa56fd03a4fd0b0'

Cloudant uses _id for its UID for each document. It seems that the warehousing task iterates over these documents and assumes that there is a new document every time it sees a new _id.
Because you're using _id in your modes and locations this will produce an undesired result in the SQL DB.
Renaming your _id in modes and locations to something else should fix the problem.

Related

Mongoose - Joining based on Object name

Currently, I have 3 schemas that I want to join. The first schema is the user schema.
{
"_id": {
"$oid": "6147f87ac51f060e8c1bc8c7"
},
"userID": "344431410360090625",
"currency": 72590,
"level": 10,
"exp": 78.5,
"sp": 338,
"location": {
"area": 2,
"floor": 3
},
"inv": {
"Rag Hood#363": {
"emote": "",
"description": "",
"rarity": "Common",
"type": "equipment",
"image": "",
"equipmentType": "helmet",
"level": 20,
"ascension": 0,
"exp": 0,
"quantity": 0,
"expToLevelUp": 0,
"equipped": false
},
"Jericho Jehammad": {
"emote": "<:Jericho:823551572029603840>",
"description": "Enhance your weapons with this mysterious item",
"rarity": "Common",
"type": "special",
"image": "",
"quantity": 7147,
"listed": 6964
},
},
"__v": 0,}
I want to be able to use the Object names of "Rag Hood#363" and "Jericho Jehammad". Firstly the equipment schema is shown below.
{
"_id": {
"$oid": "61474cb047a1b66f2cb1b6d8"
},
"itemName": "Rag Hood",
"stats": {
"defense": {
"flat": 2,
"multi": 0
}
},
"equipmentType": "helmet",
"ascensionRequirements": [],
"statsUpPerLvl": {
"defense": 0.5
}}
Next, the items schema is used to join Jericho Jehammad. Equipment in our database is named with #, with the item number after. While other items are identified with just the itemName.
{
"_id": {
"$oid": "60c5c5e6d2d78c794d33b7ae"
},
"itemName": "Jericho Jehammad",
"emote": "<:Jericho:823551572029603840>",
"description": "",
"rarity": "Common",
"type": "special",
"image": ""}
I want to return an object that overrides the value if the value is present in the equipment and items schemas. If the value is not present, I will use the value present in the user schema.

Find Object within an Array that is within an object also within an Array

I've been struggling with the following Mongo document:
{
"name": "Fire Name",
"address": "123 Somestreet Ave",
"city": "Boston",
"state": "MA",
"zip": "02109",
"dispatchReference": "123codefromdispatch",
"created": {
"$date": "2021-02-26T12:30:41Z"
},
"lastPar": {
"$date": "2021-02-26T22:30:41Z"
},
"latitude": "-83.691407",
"longitude": "141.338391",
"par": 3,
"vehicles": [
{
"_id": "60397691c09c2fd299c40420",
"hidden": false,
"vehicleName": "Updated",
"vehicleNumber": 20,
"vehicleStatus": "Enroute",
"latitude": "-83.691407",
"longitude": "141.338391",
"par": 6,
"lastPar": "2021-02-26T22:30:41+00:00",
"members": [
{
"_id": "60397b14f2a2b10978693a47", <---- find Member with this ID
"hidden": false, <----- update this property
"rank": "firefighter",
"dateOfRank": "2021-02-26T22:49:56+00:00",
"role": "hose",
"firstName": "Reyna",
"lastName": "Casey",
"dateOfBirth": "1992-02-26T22:49:56+00:00",
"sex": "male",
"age": 29,
"dateOfExperience": 5,
"departmentLocation": "975 Pine Street, Gasquet, Montana, 1843",
"lastIncidentID": "60397b14095d4f4313fbf716",
"bpm": 120,
"vomax": "33.1",
"temp": "104.69",
"latitude": "-81.393671",
"longitude": "-78.26867",
"scba": {
"maskStatus": false,
"oxygenLevel": "low"
}
I'm attempting to write a Mongo query that when given the member._id, will update that specific member's hidden property from false to true. So I need to tunnel down into my vehicles array of objects, and find the member within the members array with the matching _id.
I suspect it has something to do with the $[] operator but I'm not having any luck.
Try this:
db.testcollection.updateOne(
{
// Specify some condition related to "vehicles" array.
"vehicles.members._id": ObjectId("60397b14f2a2b10978693a47")
},
{
$set: {
"vehicles.$.members.$[obj].hidden": true
}
},
{
arrayFilters: [
{
"obj._id": ObjectId("60397b14f2a2b10978693a47")
}
]
}
);

MongoDB Aggregation Error Returning wrong result

I have my json object like this
{
"_id": "5c2e811154855c0012308f00",
"__pclass": "QXRzXFByb2plY3RcTW9kZWxcUHJvamVjdA==",
"id": 44328,
"name": "Test project via postman2//2",
"address": "some random address",
"area": null,
"bidDate": null,
"building": {
"name": "Health Care Facilities",
"type": "Dental Clinic"
},
"collaborators": [],
"createdBy": {
"user": {
"id": 7662036,
"name": "Someone Here"
},
"firm": {
"id": 2520967,
"type": "ATS"
}
},
"createdDate": "2019-01-03T21:39:29Z",
"customers": [],
"doneBy": null,
"file": null,
"firm": {
"id": 1,
"name": "MyFirm"
},
"leadSource": {
"name": "dontknow",
"number": "93794497"
},
"location": {
"id": null,
"city": {
"id": 567,
"name": "Bahamas"
},
"country": {
"id": 38,
"name": "Canada"
},
"province": {
"id": 7,
"name": "British Columbia"
}
},
"modifiedBy": null,
"modifiedDate": null,
"projectPhase": {
"id": 1,
"name": "pre-design"
},
"quotes": [{
"id": 19,
"opportunityValues": {
"Key1": 100,
"Key2 Key2": 100,
"Key3 Key3 Key3": 200,
}
}],
"specForecast": [],
"specIds": [],
"tags": [],
"valuation": "something"
}
I am trying to aggregate using this query in MongoDB. My aggregation key is 4 level deep and also contains spaces. On all online examples shows me the aggregation at the first level. Looking to the online codes, I tried to re-iterate the same with my 4th level deep key.
db.mydata.aggregate([
{$match: {"id": 44328 } } ,
{$group: { _id: "$quotes.id",
totalKey2:{ $sum: "$quotes.opportunityValues.Key2 Key2"},
totalKey3:{ $sum: "$quotes.opportunityValues.Key3 Key3 Key3"}
}
}
]);
This should return
_id totalKey2 totalKey3
0 19 100 300
But it is returning
_id totalKey2 totalKey3
0 19 0 0
What am I doing Wrong?
Although it's not recommended to use space in field names in Mongo, it works as expected.
The problem with your query is that "quotes" is an array and you should first unwind it before grouping it.
This works as expected:
db.mydata.aggregate([
{ $match: { "id": 44328 } } ,
{ $unwind: "$quotes" },
{ $group: { _id: "$quotes.id",
totalKey2:{ $sum: "$quotes.opportunityValues.Key2 Key2" },
totalKey3:{ $sum: "$quotes.opportunityValues.Key3 Key3 Key3" } }
}
]);

Custom API in Elastic search

Building my first RESTful api, and thought I'd try elasticsearch for a base. Is there a way customize the API in Elasticsearch to only return certain fields from results of a query. For instance if I have data with fname, lname, city, state, zip, email and I only want to return a list of fnames and cities for every query matching the city field. So something like this:
curl -XPOST "http://localhost:9200/custom_call/_search" -d'
{
"query": {
"query_string": {
"query": "Toronto",
"fields": ["city"]
}
}
}'
Would ideally return something like:
{"took": 52, "timed_out": false, "_shards": {
"total": 35,
"successful": 35,
"failed": 0
}, "hits": {
"total": 1,
"max_score": 0.375,
"hits": [
{
"_index": "persons",
"_type": "person",
"_id": "6",
"_score": 0.375,
"_source": {
"fname": "Bob",
"city": "Toronto",
}
},
{
"_index": "persons",
"_type": "person",
"_id": "13",
"_score": 0.375,
"_source": {
"fname": "Sue",
"city": "Toronto",
}
},
{
"_index": "persons",
"_type": "person",
"_id": "21",
"_score": 0.375,
"_source": {
"fname": "Jose",
"city": "Toronto",
}
}
]
}}
Not sure if Elasticsearch is set up to do this or even if you would want it to. My first foray into building a RESTful API. I figure if NPR StackOverflow like it, its worth a shot! Thanks for the help.
Yes you can, I think you haven't tried to find out on your own.
Here is how to do that,
POST localhost:9200/index/type/_search
{
"query": {
"query_string": {
"query": "Toronto",
"fields": ["city"]
}
},
"_source" :["fields_you_want_to_get"]
}
The term you are looking is source filtering.

Elasticsearch nested query

I'm new to elasticsearch, managed to set it up and import recordset from my mongodb collection using the river plugin. For a start, I want to query against the "desc" field but just can't manage to get the query .. not sure if the problem is driven by the way index was defined.. can anyone help please?
Sample recordset in elastic search looks like this
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 107209,
"max_score": 1,
"hits": [
{
"_index": "shiv",
"_type": "shiv",
"_id": "iG1eIzN7RGO7hFfxTlnLuA",
"_score": 1,
"_source": {
"_id": {
"$oid": "50901d7f485bf7bd1c000021"
},
"brand": "",
"category": {
"$ref": "categories",
"$id": {
"$oid": "4fbd2221758cb11d14000174"
}
},
"comments": [],
"count_comment": 0,
"count_fav": 2,
"count_hotness": 1.46,
"count_rekick": 0,
"count_share": 0,
"country": {
"$ref": "countries",
"$id": {
"$oid": "4fec98f7758cb18c6e0002c9"
}
},
"currency": "pound",
"desc": "A men's automatic watch, this Seamaster Bond model features a Co-Axial escapement and date function. Its blue dial is teamed with a stainless steel case and bracelet for a look that's sporty and refined.",
"gender": "male",
"ident": "omega-seamaster-diver-bond-men-s-automatic-watch---ernest-jones-1351622015",
"img_url": "http://s7ondemand4.scene7.com/is/image/Signet/5735793?$detail$",
"lifestyles": [
{
"$ref": "lifestyles",
"$id": {
"$oid": "508ff6ca485bf73112000060"
}
}
],
"location": "United Kingdom",
"owner": {
"$ref": "accounts",
"$id": {
"$oid": "50742fd8485bf74b7a00213f"
}
},
"price": 2400,
"store": "ernestjones.co.uk",
"tags": [
"ernest-jones",
"bond"
],
"timestamp_creation": 1351622015,
"timestamp_exp": 1356825600,
"timestamp_update": 1351622015,
"title": "Omega Seamaster Diver Bond men's automatic watch - Ernest Jones",
"url": "http%3A%2F%2Fwww.ernestjones.co.uk%2Fwebstore%2Fd%2F5735793%2Fomega%20seamaster%20diver%20bond%20men%27s%20automatic%20watch%2F%3Futm_source%3Dgooglebase%26utm_medium%3Dfeedmanager%26cm_mmc%3DFroogle-_-CKB-_-nurses_fobs-_-watches%26cm_mmca1%3Domega%26cm_mmca2%3Dmale%26cm_mmca3%3Dadult"
}
}
]
}
}
The mapping of the index "shiv" looks like
{
"shiv": {
"properties": {
"$oid": {
"type": "string"
}
}
}
}
Thanks again
There are lots of ways to query, have you tried a match query?
Using curl or a rest client of your choice...
http://[host]:9200/[index_name]/[doc_type]/_search
{
"query" : {
"match" : {
"desc" : "some value you want to find in desc"
}
}
}