How to port ARRAY_AGG with mulitple arguments in Postgresql to Snowflake - postgresql

Snowflake supports ARRAY_AGG but it can take only parameter while Postgresql's version supports multiple.
I need to port the following snippet in Posgresql to Snowflake:
ARRAY_AGG(state, city, zipcode)
where state, city and zipcode are fields in one of my tables.
Any workaround? I know I can create 3 separate fields but that's not desired.

Depending if you are want an Array of Array's or an Array of Objects
WITH r AS (
SELECT column1 AS A, column2 AS B FROM (VALUES (1,'A'),(14,'B'),(35,'C'),(91,'D'),(105,'E'))
)
SELECT ARRAY_AGG(ARRAY_CONSTRUCT(a,b)) FROM r;
gives:
[ [ 1, "A" ], [ 14, "B" ], [ 35, "C" ], [ 91, "D" ], [ 105, "E" ] ]
or
WITH r AS (
SELECT column1 AS A, column2 AS B FROM (values (1,'A'),(14,'B'),(35,'C'),(91,'D'),(105,'E'))
)
SELECT ARRAY_AGG(OBJECT_CONSTRUCT('A',a,'B',b)) FROM r;
gives:
[ { "A": 1, "B": "A" }, { "A": 14, "B": "B" }, { "A": 35, "B": "C" }, { "A": 91, "B": "D" }, { "A": 105, "B": "E" } ]
https://docs.snowflake.net/manuals/sql-reference/functions/array_agg.html
https://docs.snowflake.net/manuals/sql-reference/functions/array_construct.html
https://docs.snowflake.net/manuals/sql-reference/functions/object_construct.html

Related

Get all paths of matching jsonpath filter

I'm using PostgreSQL 14.
I need to find all paths that matches the jsonpath filter I give.
Input example
{
"A": [
{
"B": [
[
{
"name": "id",
"conditions": [
{
"validator": "nonnull"
}
]
},
{
"name": "x",
"conditions": [
{
"validator": "required"
}
]
},
{
"name": "y",
"rules": []
}
],
[
{
"name": "z",
"conditions": [
{
"validator": "required"
}
]
}
]
]
}
]
}
JsonPath filter
Every A.B which has a required validator condition:
$.A.B[*].conditions ? (#.validator == "required")
Expected output (Or something close)
{A,0,B,0,1,conditions,0}
{A,0,B,1,0,conditions,0}
This is a relational solution, you may define the query as a view and filter on the validator and you get the array indexes.
select a.index-1 as a,
b.index-1 as b,
b2.index-1 as b2,
c.item ->> 'validator' as validator,
c.index-1 as conditions
from t cross join
jsonb_array_elements(jsn -> 'A') WITH ORDINALITY a(item, index) cross join
jsonb_array_elements(a.item -> 'B') WITH ORDINALITY b(item, index) cross join
jsonb_array_elements(b.item) WITH ORDINALITY b2(item, index) cross join
jsonb_array_elements(b2.item -> 'conditions') WITH ORDINALITY c(item, index);
a|b|b2|validator|conditions|
-+-+--+---------+----------+
0|0| 0|nonnull | 0|
0|0| 1|required | 0|
0|1| 0|required | 0|

DB2 Building json using JSON_ARRAY

I am using DB2LUW 11.5.
I am building a JSON under usage of the below tables and want an output like this
I tried many things but I come not up to a solution.
{
"ID": 1,
"NAME": "a",
"B_OBJECTS": [{
"ID": 1,
"SIZE": 5
}, {
"ID": 2,
"SIZE": 10
}, {
"ID": 3,
"SIZE": 15
}
],
"C_OBJECTS": [{
"ID": 1,
"SIZE": 100
}, {
"ID": 2,
"SIZE": 200
}
]
}
Table_A
ID
NAME
1
a
Table_B
ID
ID_A
SIZE
1
1
5
2
1
10
3
1
15
Table_C
ID
ID_A
SIZE
1
1
100
2
1
200
WITH
TABLE_A(ID,NAME) AS
(
VALUES (1, 'a')
)
, TABLE_B(ID, ID_A, SIZE) AS
(
VALUES (1, 1, 5), (2, 1, 10), (3, 1, 15)
), TABLE_C(ID, ID_A, SIZE) AS
(
VALUES (1, 1, 100), (2,1, 200)
)
, JSON_STEP_1 AS
(
SELECT A_ID, A_NAME, B_ID, C_ID
, JSON_OBJECT('ID' VALUE B_ID, 'SIZE' VALUE B_SIZE) B_JSON
, JSON_OBJECT('ID' VALUE C_ID, 'SIZE' VALUE C_SIZE) C_JSON
FROM
(
SELECT
A.ID AS A_ID, A.NAME AS A_NAME, B.ID AS B_ID, B.SIZE AS B_SIZE, C.ID AS C_ID, C.SIZE AS C_SIZE
FROM TABLE_A A
JOIN TABLE_B B ON B.ID_A = A.ID
JOIN TABLE_C C ON C.ID_A = A.ID
)
GROUP BY A_ID, A_NAME, B_ID, B_SIZE, B_ID, B_SIZE, C_ID, C_SIZE
)
, JSON_STEP_2 AS
(
SELECT
JSON_OBJECT
(
'ID' VALUE A_ID,
'NAME' VALUE A_NAME,
'B_OBJECTS' VALUE JSON_ARRAY (LISTAGG(B_JSON, ', ') WITHIN GROUP (ORDER BY B_ID) FORMAT JSON) FORMAT JSON,
'C_OBJECTS' VALUE JSON_ARRAY (LISTAGG(C_JSON, ', ') WITHIN GROUP (ORDER BY C_ID) FORMAT JSON) FORMAT JSON
) JSON_OBJS
FROM JSON_STEP_1
GROUP BY A_ID, A_NAME
)
SELECT * FROM JSON_STEP_2
I get a mulitplication of the results
{
"ID": 1,
"NAME": "a",
"B_OBJECTS": [{
"ID": 1,
"SIZE": 5
}, {
"ID": 1,
"SIZE": 5
}, {
"ID": 2,
"SIZE": 10
}, {
"ID": 2,
"SIZE": 10
}, {
"ID": 3,
"SIZE": 15
}, {
"ID": 3,
"SIZE": 15
}
],
"C_OBJECTS": [{
"ID": 1,
"SIZE": 100
}, {
"ID": 1,
"SIZE": 100
}, {
"ID": 1,
"SIZE": 100
}, {
"ID": 2,
"SIZE": 200
}, {
"ID": 2,
"SIZE": 200
}, {
"ID": 2,
"SIZE": 200
}
]
}
You have to build arrays from table B and C in different queries.
By the way, since your version is 11.5 you will maybe sometime have JSON_ARRAYAGG available
So you can write :
WITH
TABLE_A(ID,NAME) AS
(
VALUES (1, 'a')
)
, TABLE_B(ID, ID_A, SIZE) AS
(
VALUES (1, 1, 5), (2, 1, 10), (3, 1, 15)
), TABLE_C(ID, ID_A, SIZE) AS
(
VALUES (1, 1, 100), (2,1, 200)
)
select
JSON_OBJECT
(
'ID' VALUE A.ID,
'NAME' VALUE A.NAME,
'B_OBJECTS' VALUE (
select json_arrayagg(
JSON_OBJECT('ID' VALUE b.ID, 'SIZE' VALUE b.SIZE)
) from table_b b where b.id_a = a.id
) format json,
'C_OBJECTS' VALUE (
select json_arrayagg(
JSON_OBJECT('ID' VALUE C.ID, 'SIZE' VALUE C.SIZE)
) from table_c c where c.id_a = a.id) format json
absent on null
)
from table_a a
gives
{
"ID": 1,
"NAME": "a",
"B_OBJECTS": [{
"ID": 1,
"SIZE": 5
}, {
"ID": 2,
"SIZE": 10
}, {
"ID": 3,
"SIZE": 15
}
],
"C_OBJECTS": [{
"ID": 1,
"SIZE": 100
}, {
"ID": 2,
"SIZE": 200
}
]
}

Postgres jsonb to table

Is it possible to return jsonb array of arrays as a table object?
The size of the inner arrays will not always be the same.
Postgres version 9.6.6 is being used.
Example jsonb:
{
"r": [
{
"n": "name",
"rcs": [
[
{
"v": "1",
"vt": 9
},
{
"v": "2",
"vt": 9
},
{
"v": "3",
"vt": 9
}
],
[
{
"v": "4",
"vt": 9
},
{
"v": "5",
"vt": 7
}
]
]
}
]
}
Expected table
+------+------+--------+
| Col1 | Col2 | Col3 |
+------+------+--------+
| 1 | 2 | 3 |
| 4 | 5 | Null |
+------+------+--------+
The width of the returned table will be determined by the max length of all rows.
Excess columns for shorter rows will have null values.
I am completely new to Postgres, and not even sure where to start.
This was as far as I got:
select c from someTable,
jsonb_array_elements(data -> 'r') r,
jsonb_array_elements(r -> 'rcs') c
WHERE r->> 'n' = 'name'

Mongo 3.2 query timeseries value at specific time

I have some timeseries data stored in Mongo with one document per account, like so:
{
"account_number": 123,
"times": [
datetime(2017, 1, 2, 12, 34, 56),
datetime(2017, 3, 4, 17, 18, 19),
datetime(2017, 3, 11, 0, 1, 11),
]
"values": [
1,
10,
9001,
]
}
So, to be clear in the above representation account 123 has a value of 1 from 2017-01-02 12:34:56 until it changes to 10 on 2017-03-04 17:18:19, which then changes to 9001 at 2017-03-11, 00:01:11.
There are many accounts and each account's data is all different (could be at different times and could have more or fewer value changes than other accounts).
I'd like to query for each users value at a given time, e.g. "What was each users value at 2017-01-30 02:03:04? Would return 1 for the above account as it was set to 1 before the given time and did not change until after the given time.
It looks like $zip would be useful but thats only available in Mongo 3.4 and I'm using 3.2 and have no plans to upgrade soon.
Edit:
I can get a small part of the way there using:
> db.account_data.aggregate([{$unwind: '$times'}, {$unwind: '$values'}])
which returns something like:
{"account_number": 123, "times": datetime(2017, 1, 2, 12, 34, 56), "values": 1},
{"account_number": 123, "times": datetime(2017, 1, 2, 12, 34, 56), "values": 10},
#...
which isn't quite right as it is returning the cross product of times/values
This is possible using only 3.2 features. I tested with the Mingo library
var mingo = require('mingo')
var data = [{
"account_number": 123,
"times": [
new Date("2017-01-02T12:34:56"),
new Date("2017-03-04T17:18:19"),
new Date("2017-03-11T00:01:11")
],
"values": [1, 10, 9001]
}]
var maxDate = new Date("2017-01-30T02:03:04")
// 1. filter dates down to those less or equal to the maxDate
// 2. take the size of the filtered date array
// 3. subtract 1 from the size to get the index of the corresponding value
// 4. lookup the value by index in the "values" array into new "valueAtDate" field
// 5. project the extra fields
var result = mingo.aggregate(data, [{
$project: {
valueAtDate: {
$arrayElemAt: [
"$values",
{ $subtract: [ { $size: { $filter: { input: "$times", as: "time", cond: { $lte: [ "$$time", maxDate ] }} } }, 1 ] }
]
},
values: 1,
times: 1
}
}])
console.log(result)
// Outputs
[ { valueAtDate: 1,
values: [ 1, 10, 9001 ],
times:
[ 2017-01-02T12:34:56.000Z,
2017-03-04T17:18:19.000Z,
2017-03-11T00:01:11.000Z ] } ]
Not sure how to do the same with MongoDb 3.2, however from 3.4 you can do the following query:
db.test.aggregate([
{
$project:
{
index: { $indexOfArray: [ "$times", "2017,3,11,0,1,11" ] },
values: true
}
},
{
$project: {
resultValue: { $arrayElemAt: [ "$values", "$index" ] }
}
}])

Retrieving a subset of data from MongoDB

If I have a collection similar to:
[
{ "test": [ { "a": 1, "b": 2 }, { "a": 10, "b": 1 } ] },
{ "test": [ { "a": 5, "b": 1 }, { "a": 14, "b": 2 } ] },
...
]
How do I obtain only a subset of data consisting of the a values when b is 2? In SQL, this would be something similar to:
SELECT test.a FROM collection WHERE test.b = 2
I do understand that I can limit what data I get with something like:
collection.find({ }, { "test.a": 1 })
But that returns all the a values. How can I limit it so that it returns only the values in which b is 2 (the WHERE test.b = 2 part of the SQL equivalent)?
You can do this by adding a selector object as the first parameter of your find call and using the $elemMatch projection operator:
collection.find({ 'test.b': 2 }, { test: { $elemMatch: { b: 2 } }, 'test.a': 1 })
But this will only return the first test array element per-doc where b is 2. You would need to use the aggregation framework if there can be multiple b:2 elements in your test arrays.