Query jsonb array column - postgresql

I'm not having any luck querying a jsonb[] column (attributes). I couldn't find anything in the Postgresql docs about arrays of jsonb
My data in the column is a bunch of objects in an array like this:
1 - [{ "trait_type": "color", "value": "blue" },{ "trait_type": "hair", "value": "short" }]
2 - [{ "trait_type": "color", "value": "red" },{ "trait_type": "hair", "value": "short" }]
3 - [{ "trait_type": "color", "value": "green" },{ "trait_type": "hair", "value": "long" }]
4 - [{ "trait_type": "color", "value": "blue" },{ "trait_type": "hair", "value": "med" }]
I tried this:
SELECT * FROM public.items WHERE attributes #> '[{ "trait_type": "color", "value": "blue" }]';
...but get error:
Failed to run sql query: malformed array literal: "[{ "trait_type": "color", "value": "blue" }]"
I also tried:
SELECT * FROM public.items WHERE attributes #> ARRAY['{ "trait_type": "color", "value": "blue" }'];
...but get error:
Failed to run sql query: operator does not exist: jsonb[] #> text[]

Related

Update field in array of objects in json postgres

I have postgres database with next json in my table
{"jobTitle":"Technical Writer","distance":60,"skills":[{"name":"Bootstrap (Front-End Framework)","isPrimary":true,"type":""}, {"name":"Javascript","isPrimary":false,"type":""}]}
I need to update field "skills" depends on "isPrimary" value in each item there. If "isPrimary" false, update to 1, otherwise to 5.
I have next query
UPDATE t
SET body = jsonb_set(body::jsonb, '{skills}', s.skills_updated::jsonb)
FROM (
SELECT
jsonb_agg(
jsonb_set(skills::jsonb, '{isPrimary}',
CASE
WHEN skills ->> 'isPrimary' = 'true' THEN '5'::jsonb
ELSE '1'::jsonb
END
)
) as skills_updated
FROM t,
json_array_elements(body -> 'skills') as skills
) s;
This query working incorrectly - it's accumulating all skills across all records in table and then setting this whole array in each record. this is my fiddle https://dbfiddle.uk/m28W7tPy
For instance, if I have next three records:
{"jobTitle":"Writer","distance":60, "skills": [{ "isPrimary": "true", "name": "Node.js" },{ "isPrimary": "false", "name": "Python" },{ "isPrimary": "false", "name": "Javascript" }]}
{"jobTitle":"Writer","distance":60, "skills": [{ "isPrimary": "true", "name": "Mysql" },{ "isPrimary": "false", "name": "Nest.js" }]}
{"jobTitle":"Writer","distance":60, "skills": [{ "isPrimary": "true", "name": "Postgres" },{ "isPrimary": "false", "name": "Typescript" }]}
I want to update just "isPrimary" for all above records and get in result next
{"jobTitle":"Writer","distance":60, "skills": [{ "isPrimary": 5, "name": "Node.js" },{ "isPrimary": 1, "name": "Python" },{ "isPrimary": 1, "name": "Javascript" }]}
{"jobTitle":"Writer","distance":60, "skills": [{ "isPrimary": 5, "name": "Mysql" },{ "isPrimary": 1, "name": "Nest.js" }]}
{"jobTitle":"Writer","distance":60, "skills": [{ "isPrimary": 5, "name": "Postgres" },{ "isPrimary": 1, "name": "Typescript" }]}
when I'm getting after running my query
{"jobTitle":"Writer","distance":60, "skills": [{ "isPrimary": 5, "name": "Node.js" },{ "isPrimary": 1, "name": "Python" },{ "isPrimary": 1, "name": "Javascript" }, { "isPrimary": 5, "name": "Mysql" },{ "isPrimary": 1, "name": "Nest.js" }, { "isPrimary": 5, "name": "Postgres" },{ "isPrimary": 1, "name": "Typescript" }]}
{"jobTitle":"Writer","distance":60, "skills": [{ "isPrimary": 5, "name": "Node.js" },{ "isPrimary": 1, "name": "Python" },{ "isPrimary": 1, "name": "Javascript" }, { "isPrimary": 5, "name": "Mysql" },{ "isPrimary": 1, "name": "Nest.js" }, { "isPrimary": 5, "name": "Postgres" },{ "isPrimary": 1, "name": "Typescript" }]}
{"jobTitle":"Writer","distance":60, "skills": [{ "isPrimary": 5, "name": "Node.js" },{ "isPrimary": 1, "name": "Python" },{ "isPrimary": 1, "name": "Javascript" }, { "isPrimary": 5, "name": "Mysql" },{ "isPrimary": 1, "name": "Nest.js" }, { "isPrimary": 5, "name": "Postgres" },{ "isPrimary": 1, "name": "Typescript" }]}
Can anyone help?
The issue with your query is that you're not keeping trace of each "id" value, nor you're matching it inside the update statement. Adding the "id" selection inside the subquery, and the final WHERE condition to the UPDATE statement, can solve your problem.
UPDATE t
SET body = jsonb_set(body::jsonb, '{skills}', s.skills_updated::jsonb)
FROM (
SELECT id,
jsonb_agg(
jsonb_set(skills::jsonb, '{isPrimary}',
CASE
WHEN skills ->> 'isPrimary' = 'true' THEN '5'::jsonb
ELSE '1'::jsonb
END
)
) as skills_updated
FROM t,
json_array_elements(body -> 'skills') as skills
GROUP BY id
) s
WHERE t.id = s.id ;
Check the demo here.

How to select filtered postgresql jsonb field with performance prioritization?

A table:
CREATE TABLE events_holder(
id serial primary key,
version int not null,
data jsonb not null
);
Data field can be very very large (up to 100 Mb) and looks like this:
{
"id": 5,
"name": "name5",
"events": [
{
"id": 255,
"name": "festival",
"start_date": "2022-04-15",
"end_date": "2023-04-15",
"values": [
{
"id": 654,
"type": "text",
"name": "importance",
"value": "high"
},
{
"id": 655,
"type": "boolean",
"name": "epic",
"value": "true"
}
]
},
{
"id": 256,
"name": "discovery",
"start_date": "2022-02-20",
"end_date": "2022-02-22",
"values": [
{
"id": 711,
"type": "text",
"name": "importance",
"value": "low"
},
{
"id": 712,
"type": "boolean",
"name": "specificAttribute",
"value": "false"
}
]
}
]
}
I want to select data field by version, but filtered with extra condition: where events end_date > '2022-03-15'. And the output must look like this:
{
"id": 5,
"name": "name5",
"events": [
{
"id": 255,
"name": "festival",
"start_date": "2022-04-15",
"end_date": "2023-04-15",
"values": [
{
"id": 654,
"type": "text",
"name": "importance",
"value": "high"
},
{
"id": 655,
"type": "boolean",
"name": "epic",
"value": "true"
}
]
}
]
}
How can I do this with maximum performance? How should I index the data field?
My primary solution:
with cte as (
select eh.id, eh.version, jsonb_agg(events) as filteredEvents from events_holder eh
cross join jsonb_array_elements(eh.data #> '{events}') as events
where version = 1 and (events ->> 'end_date')::timestamp >= '2022-03-15'::timestamp
group by id, version
)
select jsonb_set(data, '{events}', cte.filteredEvents) from events_holder, cte
where events_holder.id = cte.id;
But i don't think it's a good variant.
You can do this using a JSON path expression:
select eh.id, eh.version,
jsonb_path_query_array(data,
'$.events[*] ? (#.end_date.datetime() >= "2022-03-15".datetime())')
from events_holder eh
where eh.version = 1
and eh.data #? '$.events[*] ? (#.end_date.datetime() >= "2022-03-15".datetime())'
Given your example JSON, this returns:
[
{
"id": 255,
"name": "festival",
"values": [
{
"id": 654,
"name": "importance",
"type": "text",
"value": "high"
},
{
"id": 655,
"name": "epic",
"type": "boolean",
"value": "true"
}
],
"end_date": "2023-04-15",
"start_date": "2022-04-15"
}
]
Depending on your data distribution a GIN index on data or an index on version could help.
If you need to re-construct the whole JSON content but with just a filtered events array, you can do something like this:
select (data - 'events')||
jsonb_build_object('events', jsonb_path_query_array(data, '$.events[*] ? (#.end_date.datetime() >= "2022-03-15".datetime())'))
from events_holder eh
...
(data - 'events') removes the events key from the json. Then the the result of the JSON path query is appended back to that (partial) object.

How create a GIN index on nested arrays in PostgeSQL

I have a table with column "data" of jsonb type. The structure looks like this (synthetic simplified example):
[
{
"object": "car",
"fields": [
{ "id": "price", "value": "200000" },
{ "id": "weight", "value": "600" },
{ "id": "color", "value": "green" }
]
},
{
"object": "wheel1",
"fields": [
{ "id": "diameter", "value": "16" },
{ "id": "height", "value": "20" }
]
},
{
"object": "wheel2",
"fields": [
{ "id": "diameter", "value": "17" },
{ "id": "height", "value": "30" }
]
}
]
The goal is to find rows which has objects with some field properties like so:
SELECT * FROM cars
WHERE data #> '[{"fields": [{"id":"diameter", "value":"16"}, {"id":"height", "value":"20"}]}]'
AND data #> '[{"fields": [{"id":"price", "value":"200000"}, {"id":"weight", "value":"600"}]}]';
EXPLAIN of course says that Seq Scan is involved. I know how to create GIN index on objects in array:
CREATE INDEX cars_data_idx ON cars USING gin ((data->'some_array') jsonb_path_ops);
However I'm unable to create a proper index on "fields" arrays, which are in objects inside main array. Is it generally possible? What is the correct syntax for that?

Transform nested JSON in data factory to sql

New to data factory. I have a json file that needs to manipulate but I can't figure out how to go about it. The file has a generic "name" property but it should have the value as the key name. How can I get it so that I can get the value as key?
So far been getting Complex JSON errors. This json is coming from file store.
[
{
"Version": "1.1",
"Documents": [
{
"DocumentState": "Correct",
"DocumentData": {
"Name": "Name1",
"$type": "Document",
"Fields": [
{
"Name": "Form",
"$type": "Text",
"Value": "Birthday Form"
},
{
"Name": "Date",
"$type": "Text",
"Value": "12/1/1999"
},
{
"Name": "FirstName",
"$type": "Text",
"Value": "John"
},
{
"Name": "FirstName",
"$type": "Text",
"Value": "Smith"
}
]
}
}
]
},
{
"Version": "1.1",
"Documents": [
{
"DocumentState": "Correct",
"DocumentData": {
"Name": "Name2",
"$type": "Document",
"Fields": [
{
"Name": "Form",
"$type": "Text",
"Value": "Entry Form"
},
{
"Name": "Date",
"$type": "Text",
"Value": "4/3/2010"
},
{
"Name": "FirstName",
"$type": "Text",
"Value": "Jane"
},
{
"Name": "LastName",
"$type": "Text",
"Value": "Doe"
}
]
}
}
]
}
]
Expected output
DocumentData: [
{
"Form":"Birthday Form",
"Date": "12/1/1999",
"FirstName": "John",
"LastName": "Smith"
},
{
"Form":"Entry Form",
"Date": "4/3/2010",
"FirstName": "Jane",
"LastName": "Doe"
}
]
#jaimers,
I was able to achieve it by making use of the Data Flow Activity
The below is the complete DataFlow
1) Source1
This step involves getting the data from source. You will have to configure the Source dataset.
The only change I had done in the source was to Convert Fields.Name,Field.Type,Field.Value as string[] (From string).
This was required to make/create key value pair of the fields in the Subsequent steps.
Flatten1
I had made use of Flatten at the Document level.
And got the values of DocumentData.DocumentName and DocumentData.Fields
Note : If you don't want DocumentData.DocumentName - You can safely ignore it.
4) DerivedColumn1
This is actual step where I convert name:key1 key:value1 to key1:value1.
To do that I had made use of the below expression :
keyValues(Fields.Name,Fields.Value)
Note: Keyvalues() function expects 2 array arguments. Hence, in the first step we had changed the type of Fields.Name and Fields.Value to array.
4) Select
Just to select the columns that need to be sent as an output
Output
You mentioned SQL in your title so if you have access to a SQL database, eg Azure SQL DB, then it is quite capable with manipulating JSON, eg using the OPENJSON and FOR JSON PATH methods. A simple example:
DECLARE #json VARCHAR(MAX) = '[
{
"Version": "1.1",
"Documents": [
{
"DocumentState": "Correct",
"DocumentData": {
"Name": "Name1",
"$type": "Document",
"Fields": [
{
"Name": "Form",
"$type": "Text",
"Value": "Birthday Form"
},
{
"Name": "Date",
"$type": "Text",
"Value": "12/1/1999"
},
{
"Name": "FirstName",
"$type": "Text",
"Value": "John"
},
{
"Name": "FirstName",
"$type": "Text",
"Value": "Smith"
}
]
}
}
]
},
{
"Version": "1.1",
"Documents": [
{
"DocumentState": "Correct",
"DocumentData": {
"Name": "Name2",
"$type": "Document",
"Fields": [
{
"Name": "Form",
"$type": "Text",
"Value": "Entry Form"
},
{
"Name": "Date",
"$type": "Text",
"Value": "4/3/2010"
},
{
"Name": "FirstName",
"$type": "Text",
"Value": "Jane"
},
{
"Name": "LastName",
"$type": "Text",
"Value": "Doe"
}
]
}
}
]
}
]'
-- Restructure the JSON and add a root
SELECT *
FROM OPENJSON ( #json )
WITH
(
Form VARCHAR(50) '$.Documents[0].DocumentData.Fields[0].Value',
[Date] DATE '$.Documents[0].DocumentData.Fields[1].Value',
FirstName VARCHAR(50) '$.Documents[0].DocumentData.Fields[2].Value',
LastName VARCHAR(50) '$.Documents[0].DocumentData.Fields[3].Value'
)
FOR JSON PATH, ROOT('DocumentData');
My results:
NB I've used the ROOT clause to add a root to the JSON document. You could make the #json a stored proc parameter and use a Stored Proc task from the pipeline.

mongodb - filter collection by string array contains ""

For the below document, I want to write mongodb query to get the result.
[{
"id": "1",
"class": "class1",
"value": "xyz"
}, {
"id": "2",
"class": "class2",
"value": "abc"
}, {
"id": "3",
"class": "class3",
"value": "123"
}, {
"id": "4",
"class": "class4"
}, {
"id": "5",
"class": "class5",
"value": ""
}
]
The search parameter is an array of values - ["abc", "xyz", ""] and this is
going to look attribute "value"
The output should be below and in this case, the third item in the search array "" is pointing to collection that has "id" - 4 and 5 :
[{
"id": "1",
"class": "class1",
"value": "xyz"
}, {
"id": "2",
"class": "class2",
"value": "abc"
}, {
"id": "4",
"class": "class4"
}, {
"id": "5",
"class": "class5",
"value": ""
}
]
Please assist to provide the mongodb query to get the result like this
Whenever you have blank string you can add null in array, like this,
db.collection.find({
value: {
$in: ["abc", "xyz", "", null]
}
})