How to get last N connected edges - orientdb

I'm writing a large scale chat application and here is my db schema:
CREATE CLASS User EXTENDS V;
CREATE PROPERTY User.name STRING;
CREATE CLASS Message EXTENDS V;
CREATE PROPERTY Message.text STRING;
CREATE PROPERTY Message.createdAt DATETIME;
CREATE INDEX Message.createdAt ON Message(createdAt) NOTUNIQUE;
CREATE CLASS Send EXTENDS E;
i'm using lightweight edges and i have 200,000 edges connected to #12:0 like this:
CREATE VERTEX User SET name = 'john';
/* #12:0 */
CREATE VERTEX Message SET content = 'Lorem ipsum dolor sit amet', createdAt = SYSDATE();
/* #20:0 */
CREATE EDGE Send FROM #12:0 TO #20:0
i want to get last 5 messages that connected to the #12:0. i tried these queries:
Query 1:
SELECT FROM (
SELECT EXPAND(OUT('Send')) FROM #12:0
) ORDER BY createdAt DESC LIMIT 5
it took ~2s
"explain" result:
{
"result": [
{
"#type": "d",
"#version": 0,
"documentReads": 200000,
"current": "#19:66661",
"recordReads": 200000,
"fetchingFromTargetElapsed": 377,
"expandElapsed": 0,
"orderByElapsed": 5,
"evaluated": 200000,
"elapsed": 2416.7283,
"resultType": "collection",
"resultSize": 5,
"#fieldTypes": "documentReads=l,current=x,recordReads=l,fetchingFromTargetElapsed=l,expandElapsed=l,orderByElapsed=l,evaluated=l,elapsed=f"
}
],
"notification": "Query executed in 2.427 sec. Returned 1 record(s)"
}
Query 2:
SELECT OUT('Send')[199994-199999] FROM #12:0
it took ~6s
"explain" result:
{
"result": [
{
"#type": "d",
"#version": 0,
"documentReads": 1,
"current": "#12:0",
"recordReads": 1,
"optimizationElapsed": 0,
"fetchingFromTargetElapsed": 8749,
"evaluated": 1,
"elapsed": 8749.445,
"resultType": "collection",
"resultSize": 1,
"#fieldTypes": "documentReads=l,current=x,recordReads=l,optimizationElapsed=l,fetchingFromTargetElapsed=l,evaluated=l,elapsed=f"
}
],
"notification": "Query executed in 8.759 sec. Returned 1 record(s)"
}
is there any faster way to do this?
please don't refer to the chat use case...
i'm using orientdb 2.2.7

Finally! i find a fast way to do this.
I've change my db schema to:
CREATE CLASS User EXTENDS V;
CREATE PROPERTY User.name STRING;
CREATE CLASS Message;
CREATE PROPERTY Message.text STRING;
CREATE PROPERTY Message.writer LINK User;
CREATE PROPERTY Message.createdAt DATETIME;
CREATE INDEX Message.writer ON Message(writer) NOTUNIQUE_HASH_INDEX;
CREATE INDEX Message.createdAt ON Message(createdAt) NOTUNIQUE;
I'm using Message.writer to connect to the User instead of using edge.
Query:
SELECT FROM Message
WHERE createdAt < sysdate() AND writer = #12:0
ORDER BY createdAt DESC
SKIP 10 LIMIT 5
this takes ~30ms on 1.5 million records!
IMPORTANT: Notice the createdAt < sysdate() in WHERE clause,
you have to write a dummy condition on the ORDER BY field, this makes ORDER BY to use createdAt index (~700ms faster)
"explain" result (with createdAt < sysdate()):
{
"result": [
{
"#type": "d",
"#version": 0,
"documentReads": 252,
"fullySortedByIndex": true,
"documentAnalyzedCompatibleClass": 252,
"recordReads": 252,
"fetchingFromTargetElapsed": 6,
"indexIsUsedInOrderBy": true,
"compositeIndexUsed": 1,
"current": "#49:1493348",
"involvedIndexes": [
"Message.createdAt"
],
"limit": 5,
"evaluated": 252,
"elapsed": 6.447579,
"resultType": "collection",
"resultSize": 5,
"#fieldTypes": "documentReads=l,documentAnalyzedCompatibleClass=l,recordReads=l,fetchingFromTargetElapsed=l,compositeIndexUsed=l,current=x,involvedIndexes=e,evaluated=l,user=x,elapsed=f"
}
],
"notification": "Query executed in 0.032 sec. Returned 1 record(s)"
}
"explain" result (without createdAt < sysdate()):
{
"result": [
{
"#type": "d",
"#version": 0,
"documentReads": 48512,
"fullySortedByIndex": false,
"documentAnalyzedCompatibleClass": 48512,
"recordReads": 48512,
"fetchingFromTargetElapsed": 801,
"indexIsUsedInOrderBy": false,
"compositeIndexUsed": 1,
"current": "#49:1499971",
"involvedIndexes": [
"Message.writer"
],
"limit": 5,
"orderByElapsed": 49,
"evaluated": 48512,
"elapsed": 853.48004,
"resultType": "collection",
"resultSize": 5,
"#fieldTypes": "documentReads=l,documentAnalyzedCompatibleClass=l,recordReads=l,fetchingFromTargetElapsed=l,compositeIndexUsed=l,current=x,involvedIndexes=e,orderByElapsed=l,evaluated=l,user=x,elapsed=f"
}
],
"notification": "Query executed in 0.864 sec. Returned 1 record(s)"
}

Related

POSTGRES: How to get a nested value as a new column from the function jsonb_recordset()

I am using jsonb_recordset function to convert a json object keys to a new record. I am able to do that for a top level keys. How to do the same for the nested keys?
screener_domain_results
[
{
"index": 0,
"score": 24,
"domain_id": "f758dc46-5107-40b1-ac54-b2419961b721",
"is_concern": true,
"total_count": 8,
"total_score": 40,
"sub_domian_results": [
{
"index": 0,
"score": 15,
"is_concern": false,
"total_count": 5,
"total_score": 25,
"sub_domain_id": "04bea66c-781f-48d6-9bbf-a39961e8dc7c",
"sub_domain_result_range": {
"range_max": 16,
"range_min": 5,
"range_meta": [],
"range_name": "Excelling",
"range_color": "#FFF400",
"entity_ref_id": "04bea66c-781f-48d6-9bbf-a39961e8dc7c",
"range_entity_type": "SubDomain"
},
{
"index": 1,
"score": 10,
"is_concern": false,
"total_count": 5,
"total_score": 25,
"sub_domain_id": "04bea66c-781f-48d6-9bbf-a39961e8dc7d",
"sub_domain_result_range": {
"range_max": 16,
"range_min": 5,
"range_meta": [],
"range_name": "Excelling",
"range_color": "#FFF400",
"entity_ref_id": "04bea66c-781f-48d6-9bbf-a39961e8dc7c",
"range_entity_type": "SubDomain"
} ]
By running the below query i am able to get the 2 columns domain_id and score...But i want to have subdomain_id and score as well which is in the nested object.
select *
from cte
,jsonb_to_recordset(cte.screener_domain_results)
as items(domain_id text, score text );
I have tried to get the nested key like
select *
from cte
,jsonb_to_recordset(cte.screener_domain_results)
as items(domain_id text, score text , sub_domian_results->subdomain_id text , sub_domian_results->score text);
But getting a syntax error. Kindly advise to get this resolved.

Querying MongoDB collection consisting of one document which in turn is a multi-level nested object with objects/arrays nested inside

DB collection seatsObj:
{
"product_id": 46539040,
"freeSeating": false,
"tempTransId": "1ecae165f2d86315fea19963d0ded41a",
"seatLayout": {
"colAreas": {
"Count": 2,
"intMaxSeatId": 43,
"intMinSeatId": 2,
"objArea": [
{
"AreaDesc": "EXECUTIVE",
"AreaCode": "0000000003",
"AreaNum": "1",
"HasCurrentOrder": true,
"objRow": [
{
"GridRowId": 1,
"PhyRowId": "A",
"objSeat": [
{
"GridSeatNum": 1,
"SeatStatus": "1",
"seatNumber": 1,
"seatPrice": 400,
"ID": 111
},
{
"GridSeatNum": 2,
"SeatStatus": "0",
"seatNumber": 2,
"seatPrice": 450,
"ID": 112
},
I was able to find ways to locate and update specific fields using:
seatsObj.updateOne(
{"seatLayout.colAreas.objArea.0.objRow.0.objSeat.seatPrice": 470},
{$set: {"seatLayout.colAreas.objArea.0.objRow.0.objSeat.$.ID": 888}});
but i cannot find simple way to return a specific field value from objSeat array element based on search criteria (for example: get 400 as a result of querying seatPrice for the seat with ID = 111). Could anyone give me a direction? From my initial research I have to go into crazy nested $unwind -s and $objectToArray -s, etc... Isn't there a simpler way? Thank you!!

Is there a magic function with can extract all select keys/nested keys including array from jsonb

Given a jsonb and set of keys how can I get a new jsonb with required keys.
I've tried extracting key-values and assigned to text[] and then using jsonb_object(text[]). It works well, but the problem comes when a key has a array of jsons.
create table my_jsonb_table
(
data_col jsonb
);
insert into my_jsonb_table (data_col) Values ('{
"schemaVersion": "1",
"Id": "20180601550002",
"Domains": [
{
"UID": "29aa2923",
"quantity": 1,
"item": "book",
"DepartmentDomain": {
"type": "paper",
"departId": "10"
},
"PriceDomain": {
"Price": 79.00,
"taxA": 6.500,
"discount": 0
}
},
{
"UID": "bbaa2923",
"quantity": 2,
"item": "pencil",
"DepartmentDomain": {
"type": "wood",
"departId": "11"
},
"PriceDomain": {
"Price": 7.00,
"taxA": 1.5175,
"discount": 1
}
}
],
"finalPrice": {
"totalTax": 13.50,
"total": 85.0
},
"MetaData": {
"shopId": "1405596346",
"locId": "95014",
"countryId": "USA",
"regId": "255",
"Date": "20180601"
}
}
')
This is what I am trying to achieve :
SELECT some_magic_fun(data_col,'Id,Domains.UID,Domains.DepartmentDomain.departId,finalPrice.total')::jsonb FROM my_jsonb_table;
I am trying to create that magic function which extracts the given keys in a jsonb format, as of now I am able to extract scalar items and put them in text[] and use jsonb_object. but don't know how can I extract all elements of array
expected output :
{
"Id": "20180601550002",
"Domains": [
{
"UID": "29aa2923",
"DepartmentDomain": {
"departId": "10"
}
},
{
"UID": "bbaa2923",
"DepartmentDomain": {
"departId": "11"
}
}
],
"finalPrice": {
"total": 85.0
}
}
I don't know of any magic. You have to rebuild it yourself.
select jsonb_build_object(
-- Straight forward
'Id', data_col->'Id',
'Domains', (
-- Aggregate all the "rows" back together into an array.
select jsonb_agg(
-- Turn each array element into a new object
jsonb_build_object(
'UID', domain->'UID',
'DepartmentDomain', jsonb_build_object(
'departId', domain#>'{DepartmentDomain,departId}'
)
)
)
-- Turn each element of the Domains array into a row
from jsonb_array_elements( data_col->'Domains' ) d(domain)
),
-- Also pretty straightforward
'finalPrice', jsonb_build_object(
'total', data_col#>'{finalPrice,total}'
)
) from my_jsonb_table;
This probably is not a good use of a JSON column. Your data is relational and would better fit traditional relational tables.

Query for latest version of a document by date in mongoDB

I am trying to find a mongoDB script which will look at a collection where there are multiple records of the same document and only provide me with the latest version of each document as a result set.
I cannot explain it in English any better than above but maybe this little SQL below might explain it further. I want each document by transaction_reference but only the latest dated version (object_creation_date).
select
t.transaction_reference,
t.transaction_date,
t.object_creation_date,
t.transaction_sale_value
from MyTable t
inner join (
select
transaction_reference,
max(object_creation_date) as MaxDate
from MyTable
group by transaction_reference
) tm
on t.transaction_reference = tm.transaction_reference
and t.object_creation_date = tm.MaxDat
The reason why there are multiple versions of the same document is because I want to store each iteration of a transaction. The first time I receive a document, it may be in transaction_status of UNPAID then I receive the same transaction again and this time the transaction_status is PAID.
Some analysis will be to SUM all unique transactions whereas some other analysis may be to measure the time distance between a document with status UNPAID and the next of PAID.
As per request, here are two documents:
{
"_id": {
"$oid": "579aa337f36d2808839a05e8"
},
"object_class": "Goods & Services Transaction",
"object_category": "Revenue",
"object_type": "Transaction",
"object_origin": "Sage One",
"object_origin_category": "Bookkeeping",
"object_creation_date": "2016-07-05T00:00:00.201Z",
"party_uuid": "dfa1e80a-5521-11e6-beb8-9e71128cae77",
"connection_uuid": "b945bd7c-7988-4d2a-92f5-8b50ab218e00",
"transaction_reference": "SI-1",
"transaction_status": "UNPAID",
"transaction_date": "2016-06-16T00:00:00.201Z",
"transaction_due_date": "2016-07-15T00:00:00.201Z",
"transaction_currency": "GBP",
"goods_and_services": [
{
"item_identifier": "PROD01",
"item_name": "Product One",
"item_quantity": 1,
"item_gross_unit_sale_value": 1800,
"item_revenue_category": "Sales Revenue",
"item_net_unit_cost_value": null,
"item_net_unit_sale_value": 1500,
"item_unit_tax_value": 300,
"item_net_total_sale_value": 1500,
"item_gross_total_sale_value": 1800,
"item_tax_value": 300
}
],
"transaction_gross_value": 1800,
"transaction_gross_curr_value": 1800,
"transaction_net_value": 1500,
"transaction_cost_value": null,
"transaction_payments_value": null,
"transaction_payment_extras_value": null,
"transaction_tax_value": 300,
"party": {
"customer": {
"customer_identifier": "11",
"customer_name": "KP"
}
}
}
and second version where it is paid now
{
"_id": {
"$oid": "579aa387f36d2808839a05ee"
},
"object_class": "Goods & Services Transaction",
"object_category": "Revenue",
"object_type": "Transaction",
"object_origin": "Sage One",
"object_origin_category": "Bookkeeping",
"object_creation_date": "2016-07-16T00:00:00.201Z",
"party_uuid": "dfa1e80a-5521-11e6-beb8-9e71128cae77",
"connection_uuid": "b945bd7c-7988-4d2a-92f5-8b50ab218e00",
"transaction_reference": "SI-1",
"transaction_status": "PAID",
"transaction_date": "2016-06-16T00:00:00.201Z",
"transaction_due_date": "2016-07-15T00:00:00.201Z",
"transaction_currency": "GBP",
"goods_and_services": [
{
"item_identifier": "PROD01",
"item_name": "Product One",
"item_quantity": 1,
"item_gross_unit_sale_value": 1800,
"item_revenue_category": "Sales Revenue",
"item_net_unit_cost_value": null,
"item_net_unit_sale_value": 1500,
"item_unit_tax_value": 300,
"item_net_total_sale_value": 1500,
"item_gross_total_sale_value": 1800,
"item_tax_value": 300
}
],
"transaction_gross_value": 1800,
"transaction_gross_curr_value": 1800,
"transaction_net_value": 1500,
"transaction_cost_value": null,
"transaction_payments_value": null,
"transaction_payment_extras_value": null,
"transaction_tax_value": 300,
"party": {
"customer": {
"customer_identifier": "11",
"customer_name": "KP"
}
}
}
Thanks for your support, Matt
If I understand the question correctly you could use something like this
db.getCollection('yourTransactionsCollection').aggregate([
{
$sort: {
"transaction_reference": 1,
"object_creation_date": -1
}
},
{
$group: {
_id: "$transaction_reference",
"transaction_date": { $first: "$transaction_date" },
"object_creation_date": { $first: "$transaction_date" },
"transaction_sale_value": { $first: "$transaction_sale_value" }
}
}
])
which outputs a result like the following
{
"_id" : "SI-1",
"transaction_date" : "2016-06-16T00:00:00.201Z",
"object_creation_date" : "2016-06-16T00:00:00.201Z",
"transaction_sale_value" : null
}
Note that you can change the $sort to just include the object_creation_date but I included both transaction_reference and object_creation_date as I think it would make sense to create a composite index on both of them instead of just the creation date. Adjust that according to your indexes so that the $sort will hit one.
In addition there was no document field transaction_sale_value hence the null for it in the result. Maybe you missed that or it is just not in your sample documents but I think you get the idea and can adjust it to your needs.

Pulling out latest (multiple) entries from MongoDB

I am trying to retrieve information on how many attempts a user takes to solve a particular problem as a JSON from a mongodb database. If there are multiple attempts on the same problem, I would only like to pull out the last entry - for instance, right now, if I do a db.proficiencies.find() - I will pull out entries A, B, C, and D but I would like to only pull out entries B and D (latest entries for the problems maze and circle respectively).
Is there an easy way to do so?
Entry A
{
"problem": "maze",
"courseLesson": "elementary_one, 1",
"studentId": "51ed51d0fcb4cc3696000001",
"studentName": "Sarah",
"_id": "51ed51defcb4cc3696000011",
"__v": 0,
"date": "2013-07-22T15:38:06.259Z",
"numberOfAttemptsBeforeSolved": 1
}
Entry B
{
"problem": "maze",
"courseLesson": "elementary_one, 1",
"studentId": "51ed51d0fcb4cc3696000001",
"studentName": "Sarah",
"_id": "51ed51defcb4cc3696000011",
"__v": 0,
"date": "2013-07-27T15:38:06.259Z",
"numberOfAttemptsBeforeSolved": 1
}
Entry C
{
"problem": "circle",
"courseLesson": "elementary_one, 1",
"studentId": "51ed51d0fcb4cc3696000001",
"studentName": "Sarah",
"_id": "51ed51defcb4cc3696000011",
"__v": 0,
"date": "2013-07-22T15:38:06.259Z",
"numberOfAttemptsBeforeSolved": 2
}
Entry D
{
"problem": "circle",
"courseLesson": "elementary_one, 1",
"studentId": "51ed51d0fcb4cc3696000001",
"studentName": "Sarah",
"_id": "51ed51defcb4cc3696000011",
"__v": 0,
"date": "2013-07-27T15:38:06.259Z",
"numberOfAttemptsBeforeSolved": 4
}
var ProficiencySchema = new Schema({
problem: String
, numberOfAttemptsBeforeSolved: {type: Number, default: 0}
//refers to which lesson, e.g. elementary_one, 2 refers to lesson 2 of elementary_one
, courseLesson: String
, date: {type: Date, default: Date.now}
, studentId: Schema.Types.ObjectId
, studentName: String
})
The best way to do this would be to sort the results in descending date-time order (so the latest response is first) and then to limit the result set by one. This would look something like:
db.proficiencies.find(YOUR QUERY).sort({'date': -1}).limit(1)