OrientDB ETL results not in graph Database - orientdb

I have create this config file:
{
"config": { "log": "debug" },
"extractor": {
"jdbc": {
"driver":"com.ibm.db2.jcc.DB2Driver",
"url":"jdbc:db2://DB2TVIPA:447/DB2TVIPA",
"userName": "lidt9bl",
"userPassword": "Wam52017",
"query": "select * from DB2TST5.COMVTC" } },
"transformers": [
{log: {prefix: "MySQL -> "}},
{"field": {"fieldName":"text", "value": "Medical Vendor Type Code"} },
{"field": {"fieldName":"code","expression": "COMVTC_C"} },
{"field": {"fieldName":"COMVTC_C", "operation" :"remove"} },
{"field": {"fieldName":"display", "expression" : "COMVTC_NM"} },
{"field": {"fieldName":"COMVTC_NM", "operation" : "remove"} },
{"field": {"fieldName":"status", "expression" :"COMVTC_STUS_C"} },
{"field": {"fieldName":"COMVTC_STUS_C", "operation" :"remove"} },
{"field": {"fieldName":"effectiveDate","expression" : "COMVTC_EFF_DT"} },
{"field": {"fieldName":"COMVTC_EFF_DT","operation" : "remove"} },
{"field": {"fieldName":"terminationDate", "expression" : "COMVTC_TRM_DT"} },
{"field": {"fieldName":"COMVTC_TRM_DT", "operation" : "remove"} },
{"field": {"fieldName":"createTS","expression" : "COMVTC_REC_MTN_TS"} },
{"field": {"fieldName":"COMVTC_REC_MTN_TS", "operation" : "remove"} },
{"field": {"fieldName":"userID", "expression" : "COMVTC_REC_MTN_ID"} },
{"field": {"fieldName":"COMVTC_REC_MTN_ID", "operation" : "remove"} },
{"field": {"fieldName":"system", "value": "1"} },
{"field": {"fieldName":"version", "value": "1"} },
{"field": {"fieldName":"userSelected", "value": false} },
{"vertex":{"class": "codeableConcept"} } ],
"loader": {
"orientdb": {
"dbURL": "plocal:localhost/p01/app/gdb/orientdb-community-2.2.17/databases/CorporateProviderData",
"dbUser": "admin",
"dbPassword": "admin",
"dbAutoDropIfExists": false,
"dbAutoCreate": false,
"tx": false,
"wal": true,
"batchCommit": 2,
"dbType": "graph",
"dbAutoCreateProperties": true,
"classes":[{"name":"codeableConcept", "extends": "coding"},{"name":"coding", "extends": "V"}]
} } }
Class codeableConcept already exists with no records. The select table reads a small table with two rows:
COMVTC_C COMVTC_STUS_C COMVTC_EFF_DT COMVTC_TRM_DT COMVTC_NM COMVTC_REC_MTN_TS COMVTC_REC_MTN_ID
-------- ------------- ------------- ------------- -------------------------------------------------- -------------------------- -----------------
AS A 1937-01-01 NULL Administrative Service 2007-01-15 02:06:29.396586 XXXXXX
MS A 1937-01-01 NULL Medical Service 2007-01-15 02:06:29.416709 XXXXXX
I am changing the column names to match the following:
create class coding extends V;
create property coding.system string;
create property coding.version string;
create property coding.code string;
create property coding.display string;
create property coding.userSelected Boolean;
ALTER PROPERTY coding.system MANDATORY true;
ALTER PROPERTY coding.version MANDATORY true;
ALTER PROPERTY coding.code MANDATORY true;
CREATE INDEX Index_CodeSet_key ON coding (system,version,code) UNIQUE;
create class codeableConcept extends coding;
create property codeableConcept.text string;
Here is the end of the log:
[3:field] DEBUG Transformer output: {COMVTC_C:null,COMVTC_STUS_C:null,COMVTC_EFF_DT:null,COMVTC_TRM_DT:null,COMVTC_NM:null,COMVTC_REC_MTN_TS:null,COMVTC_REC_MTN_ID:null,text:Medical Vendor Type Code,code:MS,display:Medical Service ,status:A,effectiveDate:1937-01-01,terminationDate:null,createTS:2007-01-15 02:06:29.416709,userID:APOLIDB ,system:1,version:1,userSelected:false}
[3:vertex] DEBUG Transformer input: {COMVTC_C:null,COMVTC_STUS_C:null,COMVTC_EFF_DT:null,COMVTC_TRM_DT:null,COMVTC_NM:null,COMVTC_REC_MTN_TS:null,COMVTC_REC_MTN_ID:null,text:Medical Vendor Type Code,code:MS,display:Medical Service ,status:A,effectiveDate:1937-01-01,terminationDate:null,createTS:2007-01-15 02:06:29.416709,userID:APOLIDB ,system:1,version:1,userSelected:false}
[3:vertex] DEBUG Transformer output: v(codeableConcept)[#22:8]
[orientdb] INFO committing
Pipeline worker done without errors:: true
all items extracted
END ETL PROCESSOR
+ extracted 5 records (0 records/sec) - 5 records -> loaded 2 vertices (0 vertices/sec) Total time: 195ms [0 warnings, 0 errors]
It says it created codeableConcept RID #22:8, but it does not exist.
orientdb {db=CorporateProviderData}> info class codeableConcept
CLASS 'codeableConcept'
Records..............: 0
Super classes........: [coding]
Default cluster......: codeableconcept (id=25)
Supported clusters...: codeableconcept(25), codeableconcept_1(26), codeableconcept_2(27), codeableconcept_3(28)
Cluster selection....: round-robin
Oversize.............: 0.0
Any Ideas?

I found the solution. It took another set of eyes, but it was within the dbURL.
"dbURL": "plocal:localhost/p01/app/gdb/orientdb-community-2.2.17/databases/CorporateProviderData"
I didn't realize that it was creating a new database in another directory because of the use of "localhost". Once I removed it, it worked.

Related

Extract value from json using powershell

I am having the below json
{
"clusterName": "IBDCluster",
"defaultReplicaSet": {
"name": "default",
"primary": "X92SL224XXX2XX:3306",
"ssl": "REQUIRED",
"status": "OK_NO_TOLERANCE",
"statusText": "Cluster is NOT tolerant to any failures. 1 member is not active.",
"topology": {
"X92SL224XXX1XX:3306": {
"address": "X92SL224XXXXXXX:3306",
"memberRole": "SECONDARY",
"mode": "R/O",
"readReplicas": {},
"role": "HA",
"status": "ONLINE",
"version": "5.7.36"
},
"X92SL224XXX2XX:3306": {
"address": "X92SL224XXX2XX:3306",
"memberRole": "PRIMARY",
"mode": "R/W",
"readReplicas": {},
"role": "HA",
"status": "ONLINE",
"version": "5.7.36"
},
"X92SL224XXXX3XX:3306": {
"address": "X92SL224XXX3XX:3306",
"instanceErrors": [
"ERROR: group_replication has stopped with an error."
],
"memberRole": "SECONDARY",
"memberState": "ERROR",
"mode": "R/O",
"readReplicas": {},
"role": "HA",
"status": "(MISSING)",
"version": "5.7.36"
}
},
"topologyMode": "Single-Primary"
},
"groupInformationSourceMember": "X92SL224XXXXXXX:3306"
}
I need to extract value like memberRole, status from the topology section.
when I go to the topology part
$ClusterDetails = $ClusterStatus.defaultReplicaSet.topology
the $ClusterDetails have value like (data visible only for 2 servers but all 3 servers are present)
PS C:\Windows\system32> $ClusterDetails
X92SL224XXXX1XX:3306 X92SL224XXXX2XX:3306
-------------------- --------------------
#{address=X92SL224XXXX1XX:3306; memberRole=SECONDARY; mode=R/O; readReplicas=; role=HA; status=ONLINE; version=5.7.36} #{address=X92SL224XXXX2XX:3306; memberRole=PRIM...
from shell I am able to see the individual output if i select like
PS C:\Windows\system32> $ClusterDetails.'X92SL224XXXX1XX:3306'
address : X92SL224XXXX1XX:3306
memberRole : PRIMARY
mode : R/W
readReplicas :
role : HA
status : ONLINE
version : 5.7.36
I need help to fetch the data from $ClusterDetails for individual servers like above but not getting how to get that dot part via script. please let me know how to do that.
Quite a long statement but this should work:
$json.defaultReplicaSet.topology.PSObject.Properties.Value | Select-Object memberRole, status
# Results in:
memberRole status
---------- ------
SECONDARY ONLINE
PRIMARY ONLINE
SECONDARY (MISSING)
You can access the Values of each Property of the Object in $json.defaultReplicaSet.topology accessing the PSObject Properties.
It's worth noting that .PSObject.Properties.Value works to enumerate all Property Values at once due to Member-Access Enumeration.
The same can be accomplished using a loop, for example:
foreach($property in $json.defaultReplicaSet.topology.PSObject.Properties) {
[pscustomobject]#{
ThisProperty = $property.Name
memberRole = $property.Value.memberRole
status = $property.Value.status
}
}

Ingesting multi-valued dimension from comma sep string

I have event data from Kafka with the following structure that I want to ingest in Druid
{
"event": "some_event",
"id": "1",
"parameters": {
"campaigns": "campaign1, campaign2",
"other_stuff": "important_info"
}
}
Specifically, I want to transform the dimension "campaigns" from a comma-separated string into an array / multi-valued dimension so that it can be nicely filtered and grouped by.
My ingestion so far looks as follows
{
"type": "kafka",
"dataSchema": {
"dataSource": "event-data",
"parser": {
"type": "string",
"parseSpec": {
"format": "json",
"timestampSpec": {
"column": "timestamp",
"format": "posix"
},
"flattenSpec": {
"fields": [
{
"type": "root",
"name": "parameters"
},
{
"type": "jq",
"name": "campaigns",
"expr": ".parameters.campaigns"
}
]
}
},
"dimensionSpec": {
"dimensions": [
"event",
"id",
"campaigns"
]
}
},
"metricsSpec": [
{
"type": "count",
"name": "count"
}
],
"granularitySpec": {
"type": "uniform",
...
}
},
"tuningConfig": {
"type": "kafka",
...
},
"ioConfig": {
"topic": "production-tracking",
...
}
}
Which however leads to campaigns being ingested as a string.
I could neither find a way to generate an array out of it with a jq expression in flattenSpec nor did I find something like a string split expression that may be used as a transformSpec.
Any suggestions?
Try setting useFieldDiscover: false in your ingestion spec. when this flag is set to true (which is default case) then it interprets all fields with singular values (not a map or list) and flat lists (lists of singular values) at the root level as columns.
Here is a good example and reference link to use flatten spec:
https://druid.apache.org/docs/latest/ingestion/flatten-json.html
Looks like since Druid 0.17.0, Druid expressions support typed constructors for creating arrays, so using expression string_to_array should do the trick!

Loopback indexes - how to specify different index types in model definition?

In Loopback (v3), when defining indexes in my model.json files, how do I specify different types of indexes (such as a BRIN)? Also, how do I specify index conditions (such as if I want to create a partial index)? I'm using postgres for the database, if that's relevant.
You can configure the index type via type field.
{
"name": "MyModel",
"properties": {
// ...
},
"indexes": {
"myindex": {
"columns": "name, email",
"type": "BRIN",
// ...
}
}
}
I am afraid LoopBack does not support index conditions (partial indexes) yet. Feel free to open a new issue in https://github.com/strongloop/loopback-connector-postgresql/issues.
i was trying to add in Lb4. Its pretty straightforward there (should be same for lb3 as well i hope)
#model({
name: 'tablename',
settings: {
indexes: {
idx_tablename: {
columnA : '',
columnB : '',
columnC: ''
}
}
}
})
once the build is done, the index name idx_tablename with 3 columns will get created
In PostgreSQL and Loopback 3 you can specify an index for multi-column like this.
The following loopback JSON code creates index in Postgres with fields message and type are unique together.
{
"name": "notification",
"base": "PersistedModel",
"idInjection": true,
"options": {
"validateUpsert": true
},
"properties": {
"message": {
"type": "string",
"required": true
},
"type": {
"type": "string",
"required": true
},
"seen": {
"type": "boolean",
"required": true,
"default": false
}
},
"validations": [],
"relations": {},
"acls": [],
"methods": {},
"indexes": {
"message_type_index": {
"keys": "message, type",
"options": {"unique": true}
}
}
}

Cannot use resultSelector while developing an Azure DevOps extension

I am working on a custom extension for Azure Devops which already contains a service endpoint:
"type": "ms.vss-endpoint.service-endpoint-type"
In addition, I would like to create a custom Release Artifact Source:
“type”: “ms.vss-releaseartifact.release-artifact-type”
Following this documentation, my current struggle is in filling the fields under the Artifact Source using an external API. I tried many patterns in the following ‘resultSelector’ and ‘resultTemplate’, but couldn’t hit one that worked for me.
In my example, I would like to take all the ‘uri’ values under ‘builds’ in the json response and present them in the ‘definition’ inputDescriptor of the Artifact Source. All my attempts resulted in an empty combo-box, even though I can see the request reaching the required API.
The json I would like to parse into the combo-box:
{
"builds": [
{
"uri": "/build1",
"lastStarted": "2018-11-07T13:12:42.547+0000"
},
{
"uri": "/build2",
"lastStarted": "2018-11-09T15:40:30.315+0000"
},
{
"uri": "/build3",
"lastStarted": "2018-11-12T17:46:24.805+0000"
}
],
"uri": "https://<server-address>/api/build"
}
Can you please help me create the Mustache pattern to retrieve the above "uri" values?
I tried:
$.builds[*].uri
which doesn't seem to work.
Here's some more information in case it helps.
Service endpoint's datasources:
"dataSources": [
{
"name": "TestConnection",
"endpointUrl": "{{endpoint.url}}/api/plugins",
"resourceUrl": "",
"resultSelector": "jsonpath:$.values[*]",
"headers": [],
"authenticationScheme": null
},
{
"name": "BuildNames",
"endpointUrl": "{{endpoint.url}}/api/build",
"resultSelector": "jsonpath:$.builds[*].uri"
},
{
"name": "BuildNumbers",
"endpointUrl": "{{endpoint.url}}/api/builds/{{definition}}",
"resultSelector": "jsonpath:$.buildsNumbers[*].uri"
}
]
Artifact source:
"inputDescriptors": [
{
"id": "connection",
"name": "Artifactory service",
"inputMode": "combo",
"isConfidential": false,
"hasDynamicValueInformation": true,
"validation": {
"isRequired": true,
"dataType": "string",
"maxLength": 300
}
},
{
"id": "definition",
"name": "definition",
"description": "Name of the build.",
"inputMode": "combo",
"isConfidential": false,
"dependencyInputIds": [
"connection"
],
"validation": {
"isRequired": true,
"dataType": "string",
"maxLength": 300
}
},
{
"id": "buildNumber",
"name": "Build Number",
"description": "Number of the build.",
"inputMode": "combo",
"isConfidential": false,
"dependencyInputIds": [
"connection"
],
"validation": {
"isRequired": true,
"dataType": "string",
"maxLength": 300
}
}
],
"dataSourceBindings": [
{
"target": "definition",
"dataSourceName": "BuildNames",
"resultTemplate": "{ Value : \"{{uri}}\", DisplayValue : \"{{uri}}\" }"
},
{
"target": "versions",
"dataSourceName": "BuildNumbers",
"resultTemplate": "{ Value : \"{{uri}}\", DisplayValue : \"{{uri}}\" }"
},
{
"target": "latestVersion",
"dataSourceName": "BuildNumbers",
"resultTemplate": "{ Value : \"{{uri}}\", DisplayValue : \"{{uri}}\" }"
},
{
"target": "artifactDetails",
"resultTemplate": "{ Name: \"{{version}}\", downloadUrl : \"{{endpoint.url}}\" }"
},
{
"target": "buildNumber",
"dataSourceName": "BuildNumbers",
"resultTemplate": "{ Value : \"{{uri}}\", DisplayValue : \"{{uri}}\" }"
}
]
}
Any help provided will be highly appreciated.
The working combination for this case is:
dataSources:
{
"name": "BuildNames",
"endpointUrl": "{{endpoint.url}}/api/build",
"resultSelector": "jsonpath:$.builds[*]"
}
dataSourceBindings:
{
"target": "definition",
"dataSourceName": "BuildNames",
"resultTemplate": "{ \"Value\" : \"{{{uri}}}\", \"DisplayValue\" : \"{{{uri}}}\" }"
}

Utilizing OrientDB ETL to create 2 vertices and a connected edge at every line of CSV

I'm utilizing OrientDB ETL tool to import a large amount of data in GBs. The format of the CSV is such that ( I'm using orientDB 2.2 ) :
"101.186.130.130","527225725","233 djfnsdkj","0.119836317542"
"125.143.534.148","112212983","1227 sdfsdfds","0.0465215171983"
"103.149.957.752","112364761","1121 sdfsdfds","0.0938863016658"
"103.190.245.128","785804692","6138 sdfsdfsd","0.117767539364"
I'm required to create Two vertices one with the value in Column1(key being the value itself) and another Vertex having values in column 2 & 3 ( Its key concatenated with both values and both present as attributes in the second vertex type, the 4th column will be the property of the edge connecting both of these vertices.
I used the below code and it works ok with some errors, one problem is all values in each csv row is stored as properties within the IpAddress vertex, Is there any way to store only the IpAddress in it. Secondly please can you let me know the method to concatenate two values read from the csv.
{
"source": { "file": { "path": "/home/abcd/OrientDB/examples/ip_address.csv" } },
"extractor": { "csv": {"columnsOnFirstLine": false, "columns": ["ip:string", "dpcb:string", "address:string", "prob:string"] } },
"transformers": [
{ "merge": { "joinFieldName":"ip", "lookup":"IpAddress.ip" } },
{ "edge": { "class": "Located",
"joinFieldName": "address",
"lookup": "PhyLocation.loc",
"direction": "out",
"targetVertexFields": { "geo_address": "${input.address}", "dpcb_number": "${input.dpcb}"},
"edgeFields": { "confidence": "${input.prob}" },
"unresolvedLinkAction": "CREATE"
}
}
],
"loader": {
"orientdb": {
"dbURL": "remote:/localhost/Bulk_Transfer_Test",
"dbType": "graph",
"dbUser": "root",
"dbPassword": "tiger",
"serverUser": "root",
"serverPassword": "tiger",
"classes": [
{"name": "IpAddress", "extends": "V"},
{"name": "PhyLocation", "extends": "V"},
{"name": "Located", "extends": "E"}
], "indexes": [
{"class":"IpAddress", "fields":["ip:string"], "type":"UNIQUE" },
{"class":"PhyLocation", "fields":["loc:string"], "type":"UNIQUE" }
]
}
}
}