Postgres Return jsonb values as jsonb array? - postgresql

tl;dr -- is there some way to get values as a jsonb_array from a jsonb object in postgres?
Trying to use recursive cte in postgres to flatten an arbitrarily deep tree structure like this:
{
"type": "foo",
"properties": {...},
"children": [
"type": "bar",
"properties": {...},
"children": [
"type": "multivariate",
"variants": {
"arbitrary-name": {
properties: {...},
children: [...],
},
"some-other-name": {
properties: {...},
children: [...],
},
"another": {
properties: {...},
children: [...],
}
}
]
]
}
Generally following this post, however I'm stuck at processing the type: "multivariate" node where what I really want is essentially jsonb_agg(jsonb_object_values(json_object -> 'variants'))
Update:
Sorry, I obviously should have included the query I tried:
WITH RECURSIVE tree_nodes (id, json_element) AS (
-- non recursive term
SELECT
id, node
FROM trees
UNION
-- recursive term
SELECT
id,
CASE
WHEN jsonb_typeof(json_element) = 'array'
THEN jsonb_array_elements(json_element)
WHEN jsonb_exists(json_element, 'children')
THEN jsonb_array_elements(json_element -> 'children')
WHEN jsonb_exists(json_element, 'variants')
THEN (select jsonb_agg(element.value) from jsonb_each(json_element -> 'variants') as element)
END AS json_element
FROM
tree_nodes
WHERE
jsonb_typeof(json_element) = 'array' OR jsonb_typeof(json_element) = 'object'
)
select * from tree_nodes;
The schema is just an id & a jsonb node column
This query gives an error:
ERROR: set-returning functions are not allowed in CASE
LINE 16: THEN jsonb_array_elements(json_element -> 'children')
^
HINT: You might be able to move the set-returning function into a LATERAL FROM item.
I just want Object.values(json_element -> 'variants') 😫
Update 2:
After reading this all again, I realized this is a problem due to me using a recent version of PostgreSQL (10.3), which apparently no longer allows returning a set from a CASE statement, which was kind of the crux of getting this tree-flattening approach to work afaict. There's probably some way to achieve the same thing in recent versions of PostgreSQL but I have no idea how I'd go about doing it.

Use jsonb_each() in the FROM clause together with jsonb_agg(<jsonb_each_alias>.value) in the SELECT, for example:
select
id,
jsonb_agg(child.value)
from
(values
(101, '{"child":{"a":1,"b":2}}'::jsonb),
(102, '{"child":{"c":3,"d":4,"e":5}}'::jsonb
)) as t(id, json_object), -- example table, replace values block with actual tablespec
jsonb_each(t.json_object->'child') as child
group by t.id
You can always chain other jsonb functions which return setof jsonb (e.g. jsonb_array_elements) in the FROM if you need to iterate the higher level arrays before the jsonb_each; for example:
select
id,
jsonb_agg(sets.value)
from
(values
(101, '{"children":[{"a_key":{"a":1}},{"a_key":{"b":2}}]}'::jsonb),
(102, '{"children":[{"a_key":{"c":3,"d":4,"e":5}},{"a_key":{"f":6}}]}'::jsonb
)) as t(id, json_object), -- example table, replace values block with actual tablespec
jsonb_array_elements(t.json_object->'children') elem,
jsonb_each(elem->'a_key') as sets
group by t.id;
Update Answer
In answer to your comment and question edit about needing to walk the 'children' of each tree node and extract the 'variants'; I would achieve this by splitting the CTE into multiple stages:
with recursive
-- Constant table for demonstration purposes only; remove this and replace below references to "objects" with table name
objects(id, object) as (values
(101, '{"children":[{"children":[{"variants":{"aa":11}},{"variants":{"ab":12}}],"variants":{"a":1}},{"variants":{"b":2}}]}'::jsonb),
(102, '{"children":[{"children":[{"variants":{"cc":33,"cd":34,"ce":35}},{"variants":{"f":36}}],"variants":{"c":3,"d":4,"e":5}},{"variants":{"f":6}}]}'::jsonb)
),
tree_nodes as ( -- Flatten the tree by walking all 'children' and creating a separate record for each root
-- non-recursive term: get root element
select
o.id, o.object as value
from
objects o
union all
-- recursive term - get JSON object node for each child
select
n.id,
e.value
from
tree_nodes n,
jsonb_array_elements(n.value->'children') e
where
jsonb_typeof(n.value->'children') = 'array'
),
variants as (
select
n.id,
v.value
from
tree_nodes n,
jsonb_each(n.value->'variants') v -- expand variants
where
jsonb_typeof(n.value->'variants') = 'object'
)
select
id,
jsonb_agg(value)
from
variants
group by
id
;
This ability of breaking a query up into a "pipeline" of operations is one of my favourite things about CTEs - it makes the query much easier to understand, maintain and debug.

db<>fiddle
Expanded the test data with more children elements and deeper structure (more nested elements):
{
"type": "foo",
"children": [
{
"type" : "bar1",
"children" : [{
"type" : "blubb",
"children" : [{
"type" : "multivariate",
"variants" : {
"blubb_variant1": {
"properties" : {
"blubb_v1_a" : 100
},
"children" : ["z", "y"]
},
"blubb_variant2": {
"properties" : {
"blubb_v2_a" : 300,
"blubb_v2_b" : 4200
},
"children" : []
}
}
}]
}]
},
{
"type" : "bar2",
"children" : [{
"type" : "multivariate",
"variants" : {
"multivariate_variant1": {
"properties" : {
"multivariate_v1_a" : 1,
"multivariate_v1_b" : 2
},
"children" : [1,2,3]
},
"multivariate_variant2": {
"properties" : {
"multivariate_v2_a" : 3,
"multivariate_v2_b" : 42,
"multivariate_v2_d" : "fgh"
},
"children" : [4,5,6]
},
"multivariate_variant3": {
"properties" : {
"multivariate_v3_a" : "abc",
"multivariate_v3_b" : "def"
},
"children" : [7,8,9]
}
}
},
{
"type" : "blah",
"variants" : {
"blah_variant1": {
"properties" : {
"blah_v1_a" : 1,
"blah_v1_b" : 2
},
"children" : [{
"type" : "blah_sub1",
"variants" : {
"blah_sub1_variant1" : {
"properties" : {
"blah_s1_v1_a" : 12345,
"children" : ["a",1, "bn"]
}
}
}
}]
},
"blah_variant2": {
"properties" : {
"blah_v2_a" : 3,
"blah_v2_b" : 42,
"blah_v2_c" : "fgh"
},
"children" : [4,5,6]
}
}
}]
}
]
}
Result:
variants json
----------------------- ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
"multivariate_variant1" {"children": [1, 2, 3], "properties": {"multivariate_v1_a": 1, "multivariate_v1_b": 2}}
"multivariate_variant2" {"children": [4, 5, 6], "properties": {"multivariate_v2_a": 3, "multivariate_v2_b": 42, "multivariate_v2_d": "fgh"}}
"multivariate_variant3" {"children": [7, 8, 9], "properties": {"multivariate_v3_a": "abc", "multivariate_v3_b": "def"}}
"blah_variant1" {"children": [{"type": "blah_sub1", "variants": {"blah_sub1_variant1": {"properties": {"children": ["a", 1, "bn"], "blah_s1_v1_a": 12345}}}}], "properties": {"blah_v1_a": 1, "blah_v1_b": 2}}
"blah_variant2" {"children": [4, 5, 6], "properties": {"blah_v2_a": 3, "blah_v2_b": 42, "blah_v2_c": "fgh"}}
"blubb_variant1" {"children": ["z", "y"], "properties": {"blubb_v1_a": 100}}
"blubb_variant2" {"children": [], "properties": {"blubb_v2_a": 300, "blubb_v2_b": 4200}}
"blah_sub1_variant1" {"properties": {"children": ["a", 1, "bn"], "blah_s1_v1_a": 12345}}
The Query:
WITH RECURSIVE json_cte(variants, json) AS (
SELECT NULL::jsonb, json FROM (
SELECT '{/*FOR TEST DATA SEE ABOVE*/}'::jsonb as json
)s
UNION
SELECT
row_to_json(v)::jsonb -> 'key', -- D
CASE WHEN v IS NOT NULL THEN row_to_json(v)::jsonb -> 'value' ELSE c END -- C
FROM json_cte
LEFT JOIN LATERAL jsonb_array_elements(json -> 'children') as c ON TRUE -- A
LEFT JOIN LATERAL jsonb_each(json -> 'variants') as v ON TRUE -- B
)
SELECT * FROM json_cte WHERE variants IS NOT NULL
The WITH RECURSIVE structure checks elements in a recursive ways. The first UNION part is the starting point. The second part is the recursive part where the last calculation is taken for the next step.
A: if in the current JSON a children element exists all elements will be expanded into one row per child
B: if the current JSON has an element variants all elements will be expanded into a record. Note that in the example one JSON element can either contain a variants or a children element.
C: if there is a variants element then the expanded record will be converted back into a json. The resulting structure is {"key" : "name_of_variant", "value" : "json_of_variant"}. The values will be the JSON for the next recursion (the JSON of the variants can have own children elements. That's why it works). Otherwise the expanded children elements will be the next data
D: if there is a variants element then the key is printed

Related

How to get a specific key from jsonb in postgresql?

I have column in jsonb named "lines" with many object like this :
[
{
"a" : "1",
"b" : "2",
"c" : "3"
},
{
"a" : "4",
"b" : "5",
"c" : "6"
}
]
This is my query
SELECT *
FROM public.test
WHERE public.test.lines::jsonb ? '[{"c"}]'
In my query i want to get only rows which contain the "c" key in this array
But i have nothing after execution
A quick solution:
SELECT
'c',
*
FROM
jsonb_path_query('[{"a": "1", "b": "2", "c": "3"}, {"a": "4", "b": "5", "c": "6"}]', '$[*].c');
?column? | jsonb_path_query
----------+------------------
c | "3"
c | "6"
The ? operator only works with strings, not with json objects. If you want to test if any of the array elements contains a key with the value c you can use a JSON path predicate:
SELECT *
FROM test
WHERE lines::jsonb ## '$[*].c != null'

Querying an array of JSONB fields in PostgresQL

I have trouble figuring out how to query PostgreSQL JsonB datatype. I have a simple table with the structure:
CREATE TABLE myTable (id BIGINT PRIMARY KEY, answers JSONB)
All Json documents in the column "answers" are of the form:
[
{"R" : "aaa", "V" : 25},
{"R" : "aaa", "V" : 31},
{"R" : "bbb", "V" : 38}
...
]
There can be many elements in the list, but all elements will have a "R" and a "V" item.
I would like to retrieve a table using SQL, listing Ids, and a Json list of all the "V"s where "R" == "aaa" .
For the example above, I would get:
Id for the record
[25, 31] -- the two "V" values where "R" == "aaa"
Any thoughts? Any help appreciated I have spent some time on the JSon paths examples available on the web, but haven't found something similar.
Thanks in advance.
Note: Postgresql 12+ only
Using jsonpath:
WITH data(id, json_arr) AS (
VALUES (1, $$[
{ "R": "aaa", "V": 25 },
{ "R": "aaa", "V": 31 },
{ "R": "bbb", "V": 38 }
]$$::JSONB)
)
SELECT id,
-- $[*] : "inside the top level array"
-- ? (#.R == "aaa") : "keep only when the "R" key's value is "aaa""
-- .V : "select the "V" key of those elements"
jsonb_path_query_array(json_arr, '$[*] ? (#.R == "aaa").V')
FROM data
returns:
+--+----------------------+
|id|jsonb_path_query_array|
+--+----------------------+
|1 |[25, 31] |
+--+----------------------+
Note: you can also use
jsonb_path_query_array(
json_arr,
'$[*] ? (#.R == $r_value).V',
'{"r_value": "aaa"}' -- pass the 'r_value' to the query above
)
I would expand the elements out, filter by R, and then reaggregate. This results in an int[] array.
select m.id, array_agg((a.obj->>'V')::int) as vvals
from mytable m
cross join lateral jsonb_array_elements(answers) as a(obj)
where a.obj->>'R' = 'aaa';
all in pure JSONB:
SELECT id, jsonb_agg(ans->'V') FROM (
SELECT id, jsonb_array_elements(answers) AS ans
FROM myTable
) zz
WHERE ans->'R' = '"aaa"'
GROUP BY id;

MongoDB, how to use document as the smallest unit to search the document in array?

Sorry for the title, but I really do not know how to make it clear. But I can show you.
Here I have insert two document
> db.test.find().pretty()
{
"_id" : ObjectId("557faa461ec825d473b21422"),
"c" : [
{
"a" : 3,
"b" : 7
}
]
}
{
"_id" : ObjectId("557faa4c1ec825d473b21423"),
"c" : [
{
"a" : 1,
"b" : 3
},
{
"a" : 5,
"b" : 9
}
]
}
>
I only want to select the first document with a value which is greater than 'a' and smaller than 'b', like '4'.
But when i search, i cannot get the result i want
> db.test.find({'c.a': {$lte: 4}, 'c.b': {$gte: 4}})
{ "_id" : ObjectId("557faa461ec825d473b21422"), "c" : [ { "a" : 3, "b" : 7 } ] }
{ "_id" : ObjectId("557faa4c1ec825d473b21423"), "c" : [ { "a" : 1, "b" : 3 }, { "a" : 5, "b" : 9 } ] }
>
Because '4' is greater than the '"a" : 1' and smaller than '"b" : 9' in the second document even it is not in the same document in the array, so the second one selected.
But I only want the first one selected.
I found this http://docs.mongodb.org/manual/reference/operator/query/elemMatch/#op._S_elemMatch, but it seems the example is not suitable for my situation.
You would want to
db.test.findOne({ c: {$elemMatch: {a: {$lte: 4}, b: {$gte: 4} } } })
With your query, you are searching for documents that have an object in the 'c' array that has a key 'a' with a value <= 4, and a key 'b' with a value >= 4.
The second record is return because c[0].a is <= 4, and c[1].b is >= 4.
Since you specified you wanted to select only the first document, you would want to do a findOne() instead of a find().
Use $elemMatch as below :
db.test.find({"c":{"$elemMatch":{"a":{"$lte":4},"b":{"$gte":4}}}})
Or
db.test.find({"c":{"$elemMatch":{"a":{"$lte":4},"b":{"$gte":4}}}},{"c.$":1})

MongoDB - Update or Create an object in nested Array in Pymongo

This is my collection
{
"_id" : '50001',
"data" :
[
{
"name" : "ram",
"grade" : 'A'
},
{
"name" : "jango",
"grade" : 'B'
},
{
"name" : "remo",
"grade" : 'A'
}
]
}
Here I want to update the object corresponds to "name": "jango" and have to create a new entry to the Array if "jango" is absent.
I can create a new entry but failed in "create or update".
I tried this way in mongo interpreter
db.MyCollection.update({'_id': '50001', "data.name" :"jango"}, {'$set':{'data': {'data.$.grade':'A'}}}, upsert=true)
but showing
not okForStorage
Mongo nested update so you should know the position or $ of update values below may help
db.collecionName.update(
{'_id': '50001', "data.name" :"jango"},
{'$set':{'data.1.grade':'A'}}, upsert=true)
or
db.collecionName.update(
{'_id': '50001', "data.name" :"jango"},
{'$set':{'data.$.grade':'A'}}, upsert=true)
You almost there:
db.YourCollection.update(
{ '_id':'50001', <-- to find document
'data.name': 'jango' < -- to find element of the array
},
{ '$set': { "data.$.grade" : 'A' } } <-- with .$ you reference array element from first argument
)
Link to documentation

Is there a way to project the type of a field

Suppose we had something like the following document, but we wanted to return only the fields that had numeric information:
{
"_id" : ObjectId("52fac254f40ff600c10e56d4"),
"name" : "Mikey",
"list" : [ 1, 2, 3, 4, 5 ],
"people" : [ "Fred", "Barney", "Wilma", "Betty" ],
"status" : false,
"created" : ISODate("2014-02-12T00:37:40.534Z"),
"views" : 5
}
Now I know that we can query for fields that match a certain type by use of the $type operator. But I'm yet to stumble upon a way to $project this as a field value. So if we looked at the document in the "unwound" form you would see this:
{
"_id" : ObjectId("52fac254f40ff600c10e56d4"),
"name" : 2,
"list" : 16,
"people" : 2
"status" : 8,
"created" : 9,
"views" : 16
}
The final objective would be list only the fields that matched a certain type, let's say compare to get the numeric types and filter out the fields, after much document mangling, to produce a result as follows:
{
"_id" : ObjectId("52fac254f40ff600c10e56d4"),
"list" : [ 1, 2, 3, 4, 5 ],
"views" : 5
}
Does anyone have an approach to handle this.
There are a few issues that make this not practical:
Since the query is a distinctive parameter from the ability to do a projection, this isn't possible from a single query alone, as the projection cannot be influenced by the results of the query
As there's no way with the aggregation framework to iterate fields and check type, that's also not an option
That being said, there's a slightly whacky way of using a Map-Reduce that does get similar answers, albeit in a Map-Reduce style output that's not awesome:
map = function() {
function isNumber(n) {
return !isNaN(parseFloat(n)) && isFinite(n);
}
var numerics = [];
for(var fn in this) {
if (isNumber(this[fn])) {
numerics.push({f: fn, v: this[fn]});
}
if (Array.isArray(this[fn])) {
// example ... more complex logic needed
if(isNumber(this[fn][0])) {
numerics.push({f: fn, v: this[fn]});
}
}
}
emit(this._id, { n: numerics });
};
reduce = function(key, values) {
return values;
};
It's not complete, but the results are similar to what you wanted:
"_id" : ObjectId("52fac254f40ff600c10e56d4"),
"value" : {
"n" : [
{
"f" : "list",
"v" : [
1,
2,
3,
4,
5
]
},
{
"f" : "views",
"v" : 5
}
]
}
The map is just looking at each property and deciding whether it looks like a number ... and if so, adding to an array that will be stored as an object so that the map-reduce engine won't choke on array output. I've kept it simple in the example code -- you could improve the logic of numeric and array checking for sure. :)
Of course, it's not live like a find or aggregation, but as MongoDB wasn't designed with this in mind, this may have to do if you really wanted this functionality.