orientdb oetl error / I want to have pokec data - orientdb

To configure the current pokec db ./oetl.sh I'm trying. However, the pipeline exit keeps occurring. I don't know what the problem is. Help me.The dburl is also a code that was well written but is now erased. I've tried all the settings, but they can't. If you do this, the data will be good, but it will not shut down automatically.
{
"config": {
"parallel": true
},
"source": {
"file": {
"path": "/home/yuna/soc-pokec-profiles.txt",
"lock" : true ,
"encoding" : "UTF-8"
}
},
"extractor": { "row": {} },
"transformers": [
{ "csv": {"columns":["id","public","completion_percentage","gender","region","region2","last_login","registration","AGE","body","I_am_working_in_field","spoken_languages","hobbies","I_most_enjoy_good_food","pets","body_type","my_eyesight","eye_color","hair_color","hair_type","completed_level_of_education","favourite_color","relation_to_smoking","relation_to_alcohol","on_pokec_i_am_looking_for","love_is_for_me","relation_to_casual_sex","my_partner_should_be","marital_status","children","relation_to_children","I_like_movies","I_like_watching_movie","I_like_music","I_mostly_like_listening_to_music","the_idea_of_good_evening","I_like_specialties_from_kitchen","fun","I_am_going_to_concerts","my_active_sports","my_passive_sports","profession","I_like_books","life_style","music","cars","politics","relationships","art_culture","hobbies_interests","science_technologies","computers_internet","education","sport","movies","travelling","health","companies_brands","more"],"separator": "/t","nullValue": "NULL"} },
{ "vertex": { "class": "Profile"} },
{"field":
{ "fieldName" : "id",
"expression" : "id.prefix('P')"
}
},
{"field":
{ "fieldNames" :
["region2","body","I_am_working_in_field","spoken_languages","hobbies","I_most_enjoy_good_food","pets","body_type","my_eyesight","eye_color","hair_color","hair_type","completed_level_of_education","favourite_color","relation_to_smoking","relation_to_alcohol","on_pokec_i_am_looking_for","love_is_for_me","relation_to_casual_sex","my_partner_should_be","marital_status","children","relation_to_children","I_like_movies","I_like_watching_movie","I_like_music","I_mostly_like_listening_to_music","the_idea_of_good_evening","I_like_specialties_from_kitchen","fun","I_am_going_to_concerts","my_active_sports","my_passive_sports","profession","I_like_books","life_style","music","cars","politics","relationships","art_culture","hobbies_interests","science_technologies","computers_internet","education","sport","movies","travelling","health","companies_brands","more"],
"operation": "remove"
}
}
],
"loader": {
"orientdb": {
"dbURL": "",
"dbType": "graph",
"wal": false,
"batchCommit": 10000,
"useLightweightEdges" : true,
"dbAutoCreateProperties": true,
"classes": [
{"name": "Profile", "extends": "V", "clusters": 3},
{"name": "Relation", "extends": "E"}
], "indexes": [
{"class":"Profile", "fields":["id:string"], "type":"UNIQUE" }
],
"settings": {
}
}
}

Related

Inserting data in private ipfs network from outside of server throws timeout error

I installed private ipfs network on my server and after that i tested it with entering a file and getting it which works perfectly,now when i try to follow the same steps from my local machine,getting data is working with the url - http://{server's ip}:8080/ipfs/{hash} but when i created api on Nodejs to insert data using url - http://{server's ip}:5001 then i am getting the error of timeout,though this api is working on deploying on server and changing the server's ip to localhost.
IPFS config is as follows -
{
"API": {
"HTTPHeaders": {
"Access-Control-Allow-Methods": [
"PUT",
"GET",
"POST"
],
"Access-Control-Allow-Origin": [
"*"
]
}
},
"Addresses": {
"API": "/ip4/0.0.0.0/tcp/5001",
"Announce": [],
"Gateway": "/ip4/0.0.0.0/tcp/8080",
"NoAnnounce": [],
"Swarm": [
"/ip4/0.0.0.0/tcp/4001",
"/ip6/::/tcp/4001"
]
},
"Bootstrap": [
"/ip4/{server's ip}/tcp/4001/ipfs/<peer identity hash of bootnode>"
],
"Datastore": {
"BloomFilterSize": 0,
"GCPeriod": "1h",
"HashOnRead": false,
"Spec": {
"mounts": [
{
"child": {
"path": "blocks",
"shardFunc": "/repo/flatfs/shard/v1/next-to-last/2",
"sync": true,
"type": "flatfs"
},
"mountpoint": "/blocks",
"prefix": "flatfs.datastore",
"type": "measure"
},
{
"child": {
"compression": "none",
"path": "datastore",
"type": "levelds"
},
"mountpoint": "/",
"prefix": "leveldb.datastore",
"type": "measure"
}
],
"type": "mount"
},
"StorageGCWatermark": 90,
"StorageMax": "10GB"
},
"Discovery": {
"MDNS": {
"Enabled": true,
"Interval": 10
}
},
"Experimental": {
"FilestoreEnabled": false,
"Libp2pStreamMounting": false,
"P2pHttpProxy": false,
"QUIC": false,
"ShardingEnabled": false,
"UrlstoreEnabled": false
},
"Gateway": {
"APICommands": [],
"HTTPHeaders": {
"Access-Control-Allow-Headers": [
"X-Requested-With",
"Range"
],
"Access-Control-Allow-Methods": [
"GET"
],
"Access-Control-Allow-Origin": [
"*"
]
},
"PathPrefixes": [],
"RootRedirect": "",
"Writable": false
},
"Identity": {
"PeerID": "<peer identity hash of bootnode>"
},
"Ipns": {
"RecordLifetime": "",
"RepublishPeriod": "",
"ResolveCacheSize": 128
},
"Mounts": {
"FuseAllowOther": false,
"IPFS": "/ipfs",
"IPNS": "/ipns"
},
"Pubsub": {
"DisableSigning": false,
"Router": "",
"StrictSignatureVerification": false
},
"Reprovider": {
"Interval": "12h",
"Strategy": "all"
},
"Routing": {
"Type": "dht"
},
"Swarm": {
"AddrFilters": null,
"ConnMgr": {
"GracePeriod": "20s",
"HighWater": 900,
"LowWater": 600,
"Type": "basic"
},
"DisableBandwidthMetrics": false,
"DisableNatPortMap": false,
"DisableRelay": false,
"EnableRelayHop": true
}
}

Google smart home action always failing on command execution

I am developing fanSpeed and colorSetting trait for Google Actions, but whenever I execute command for setting fan speed or setting light colour, it fails. Here is my JSON for sync,query and execute step:
sync
{
"requestId":"d25ty67q-98jui-581aa-j891-b1f6dhuas"
"payload":{
"devices": [
{
"id": "AA96A0#16",
"type": "action.devices.types.FAN",
"traits": [
"action.devices.traits.OnOff",
"action.devices.traits.FanSpeed"
],
"roomHint": "ROOM",
"deviceInfo": {
"manufacturer": "smart Homes"
},
"name": {
"defaultNames": [
"fan"
],
"name": "fan"
},
"willReportState": true,
"customData": {
},
"attributes": {
"commandOnlyOnOff": false,
"commandOnlyFanSpeed": false,
"reversible": false,
"availableFanSpeeds": {
"speeds": [
{"speed_name": "speed_low", "speed_values": [{"speed_synonym": ["low", "low speed", "slow"], "lang": "en"}]},
{"speed_name": "speed_medium", "speed_values": [{"speed_synonym": ["medium", "medium speed", "med"], "lang": "en"}]},
{"speed_name": "speed_high", "speed_values": [{"speed_synonym": [ "high speed", "high"], "lang": "en"}]},
{"speed_name": "speed_highest", "speed_values": [{"speed_synonym": [ "highest speed", "highest"], "lang": "en"}]}
],
"ordered": true
}
}
},
{
"id": "240A50#0",
"type": "action.devices.types.LIGHT",
"traits": [
"action.devices.traits.OnOff",
"action.devices.traits.ColorSetting"
],
"roomHint": "ROOM",
"deviceInfo": {
"manufacturer": "smart Homes"
},
"name": {
"defaultNames": [
"light"
],
"name": "light"
},
"willReportState": false,
"customData": {
},
"attributes": {
"commandOnlyColorSetting": true,
"colorModel": "hsv"
}
}
],
"agentUserId": "Home555"
}
}
Execute Fan:
{
"commands": [
{
"ids": [
"AA96A0#16"
],
"status": "SUCCESS",
"states": {
"online": true,
"on": true,
"currentFanSpeedSetting": "speed_highest"
}
}
]
}
Query Fan:
{
"requestId":"167278043664013971",
"payload":{
"devices": {
"AA96A0#16": {
"status": "SUCCESS",
"online": true,
"on": true,
"currentFanSpeedSetting": "speed_highest"
}
}
}
}
Execute color-light:
{
"commands": [
{
"ids": [
"240A50#0"
],
"status": "SUCCESS",
"states": {
"online": true,
"on": true,
"color": {
"spectrumHSV": {
"hue": 120,
"saturation": 1,
"value": 1
}
}
}
}
]
}
Query color-light:
{
"requestId":"167278043664013971",
"payload":{
"devices": {
"240A50#0": {
"status": "SUCCESS",
"online": true,
"on": true,
"color": {
"spectrumHSV": {
"hue": 0,
"saturation": 0,
"value": 1
}
}
}
}
}
}
Speaker response: Sorry, it looks like smart homes is unavailable right now.
However my request is always executing and I can see state change on my device. Can anyone point out Why it is failing?
The json for execute for both the fan and the bulb has missing values for requestId. This might be causing the failures with the execution commands.

OrientDB ETL loading CSV with vertices in one file and edges in another

I have some data that is in 2 CSV files, one contains the vertices and the other file contains the edges are in the other file. I'm working out how to set this up using ETL and am close but not quite there yet--it mostly works but my edges have properties and I'm not sure that they're loading right. This question was helpful but I'm still missing something...
Here's my data:
vertices.csv:
label,data,date
v01,0.1234,2015-01-01
v02,0.5678,2015-01-02
v03,0.9012,2015-01-03
edges.csv:
u,v,weight,date
v01,v02,12.4,2015-06-17
v02,v03,17.9,2015-09-14
I import my vertices using this:
commonVertices.json:
{
"begin": [
{ "let": { "name": "$filePath",
"expression": "$fileDirectory.append($fileName)"
}
},
],
"config": { "log": "info"},
"source": { "file": { "path": "$filePath" } },
"extractor": { "csv": { "ignoreEmptyLines": true,
"nullValue": "N/A",
"dateFormat": "yyyy-mm-dd"
}
},
"transformers": [
{ "vertex": { "class": "myVertex" } },
{ "code": { "language": "Javascript",
"code": "print(' Current record: ' + record); record;" }
}
],
"loader": { "orientdb": {
"dbURL": "plocal:my_orientdb",
"dbType": "graph",
"batchCommit": 1000,
"classes": [ { "name": "myVertex", "extends", "V" },
],
"indexes": []
}
}
}
vertices.json:
{ "config": { "log": "info",
"fileDirectory": "./",
"fileName": "vertices.csv"
}
}
commonEdges.json:
{
"begin": [
{ "let": { "name": "$filePath",
"expression": "$fileDirectory.append($fileName )"
}
},
],
"config": { "log": "info"
},
"source": { "file": { "path": "$filePath" } },
"extractor": { "csv": { "ignoreEmptyLines": true,
"nullValue": "N/A",
"dateFormat": "yyyy-mm-dd"
}
},
"transformers": [
{ "merge": { "joinFieldName": "u", "lookup": "myVertex.label" } },
{ "edge": { "class": "myEdge",
"joinFieldName": "v",
"lookup": "myVertex.label",
"direction": "out",
"unresolvedLinkAction": "NOTHING"
}
},
{ "field": { "fieldNames": ["u", "v"], "operation": "remove" } }
],
"loader": {
"orientdb": {
"dbURL": "plocal:my_orientdb",
"dbType": "graph",
"batchCommit": 1000,
"useLightweightEdges": false,
"classes": [
{ "name": "myEdge", "extends", "E" }
],
"indexes": []
}
}
}
edges.json:
{
"config": {
"log": "info",
"fileDirectory": "./",
"fileName": "edges.csv"
}
}
I am running it with oetl.sh like this:
$ oetl.sh vertices.json commonVertices.json
$ oetl.sh edges.json commonEdges.json
Everything runs, but when I query the edges... I'm new to OrientDB, so maybe it is getting the properties in my edges, but when I query the edges I don't see the weight and date fields:
orientdb {db=my_orientdb}> SELECT FROM myEdge
+----+-----+------+-----+-----+
|# |#RID |#CLASS|out |in |
+----+-----+------+-----+-----+
|0 |#33:0|myEdge|#25:0|#26:0|
|1 |#34:0|myEdge|#26:0|#27:0|
+----+-----+------+-----+-----+
The vertex table contains the [weight] field from my edges.csv and the [date] field is getting clobbered in a weird way. The day of the month is getting overwritten to the day from the edge.csv file, which is undesirable, but it's odd to me that the month itself isn't also getting change:
orientdb {db=my_orientdb}> SELECT FROM myVertex
+----+-----+--------+------+-------------------+-----+------+----------+---------+
|# |#RID |#CLASS |data |date |label|weight|out_myEdge|in_myEdge|
+----+-----+--------+------+-------------------+-----+------+----------+---------+
|0 |#25:0|myVertex|0.1234|2015-01-17 00:06:00|v01 |12.4 |[#33:0] | |
|1 |#26:0|myVertex|0.5678|2015-01-14 00:09:00|v02 |17.9 |[#34:0] |[#33:0] |
|2 |#27:0|myVertex|0.9012|2015-01-03 00:01:00|v03 | | |[#34:0] |
+----+-----+--------+------+-------------------+-----+------+----------+---------+
I'm sure it's probably a simple tweak, any help would be great!
In edge transformer use edgeFields to bind properties in edges. Example:
"transformers": [
{ "merge": { "joinFieldName": "u", "lookup": "myVertex.label" } },
{ "edge": { "class": "myEdge",
"joinFieldName": "v",
"lookup": "myVertex.label",
"edgeFields": { "weight": "${input.weight}", "date": "${input.date}" },
"direction": "out",
"unresolvedLinkAction": "NOTHING"
}
},
{ "field": { "fieldNames": ["u", "v"], "operation": "remove" } }
],
Hope it helps.

OrientDB ETL: how to skip a duplicate vertex but create the edge

I am creating a communication graph.
Each message has a msgid and each person has a userid.
I have already created the message vertices, now i want to create the user vertices and an edge connecting a message vertex to the user vertex.
A user can get multiple messages (obviously).
My file contains:
msgid, userid, (and some other info i will assign to the edge)
The isssue that i am having is that in my file i have duplicate userids (because users can get multiple messages), i dont want to create another vertex with the user id so i skipDuplicates. But if i do skip duplicates the edge will not get created either. I do want multiple edges to the same user vertex as each edge represents one message.
How do i keep the User vertex unique but create the edge?
My current ETL .json file that works fine with the exception of what i have detailed above.
{
"source": { "file": { "path": "msgs.txt" } },
"extractor": { "row": {} },
"transformers": [
{ "csv": {"separator": "\t"} },
{ "vertex": { "class": "user", "skipDuplicates": true } },
{ "edge": { "class": "sent_to", "joinFieldName": "msgid", "lookup":"message.id","direction": "in" } },
"edgeFields": { "n": "${input.n}" }
],
"loader": {
"orientdb": {
"dbURL": "remote:/localhost/databases/communication",
"dbType": "graph",
"classes": [
{"name": "user", "extends": "V"},
{"name": "message", "extends": "V"},
{"name": "sent_to", "extends": "E"}
], "indexes": [
{"class":"user", "fields":["id"], "type":"UNIQUE" }
]
}
}
}
Okay, here is what i did and it seemed to work.
First i created the message vertices (as stated above, in the q.).
Then i created the user vertices.
Then to create the edge in between them i ran the following ETL on a file that had {userid, msgid, ...}
{
"source": { "file": { "path": "msgs1.txt" } },
"extractor": { "row": {} },
"transformers": [
{ "csv": {"separator": "\t"} },
{ "merge": {"joinFieldName": "userid", "lookup": "user.id"} },
{ "vertex": { "class": "user", "skipDuplicates": true } },
{ "edge": { "class": "sent_to",
"joinFieldName": "msgid",
"lookup":"message.id",
"direction": "in",
"edgeFields": { "n": "${input.n}", "date": "${input.date}"}
}
}
],
"loader": {
"orientdb": {
"dbURL": "remote:/localhost/databases/communication",
"dbType": "graph",
"classes": [
{"name": "user", "extends": "V"},
{"name": "message", "extends": "V"},
{"name": "sent_to", "extends": "E"}
],
"indexes": [
]
}
}
}
This created all the edges, even if there was more than one edge pointing to a user.
Hopefully this will help someone

Handle UUID when import from PostgresQL to OrientDB using ETL

I have a graph in PostgresQL with 2 table:
vertex (id:uuid)
edge (from:uuid, to:uuid)
I use etl to transform it to orientdb for vertex:
{
"config": {
"log": "debug"
},
"extractor" : {
"jdbc": { "driver": "org.postgresql.Driver",
"url": "jdbc:postgresql://localhost/test",
"userName": "postgres",
"userPassword": "123456",
"query": "select id from vertex" }
},
"transformers" : [
{ "vertex": { "class": "vertex"} }
],
"loader" : {
"orientdb": {
"dbURL": "PLOCAL:../databases/test",
"dbUser": "admin",
"dbPassword": "admin",
"dbAutoCreate": true,
"dbAutoDropIfExists": false
}
}
}
When browsing the result in OrientDB, it seemed that the value i see in id field is not consistent with the result I got when query in PorgresQL.
Then I run etl for edge:
{
"config": {
"parallel" : true,
"log": "debug"
},
"extractor" : {
"jdbc": { "driver": "org.postgresql.Driver",
"url": "jdbc:postgresql://localhost/test",
"userName": "postgres",
"userPassword": "123456",
"query": "select from, to from edge"
}
},
"transformers": [
{ "merge": { "joinFieldName": "from", "lookup": "vertex.id" } },
{ "vertex": {"class": "vertex", "skipDuplicates": true} },
{ "edge": { "class": "has_edge", "joinFieldName": "to", "lookup": "Vertex.id", "direction": "in" } },
{ "field": { "fieldNames": ["from", "to"], "operation": "remove" } }
],
"loader" : {
"orientdb": {
"dbURL": "PLOCAL:../databases/test",
"dbUser": "admin",
"dbPassword": "admin",
"dbAutoCreate": true,
"dbAutoDropIfExists": false
}
}
}
The debug printed out console there were some edges that etl processor could not lookup references for from or to. This kind of message use uuids from PostgresQL. Since they are not inconsistent with the values I could query in OrientDB, it is hard to figure out which edges causing the problem.
My question is: how could I config my etl to make uuid from PostgresQL import properly to OrientDB?