Is there a smarter method to create series with different intervalls for count within a query? - postgresql

I want to create different intervalls:
0 to 10 steps 1
10 to 100 steps 10
100 to 1.000 steps 100
1.000 to 10.000 steps 1.000
to query a table for count the items.
with "series" as (
(SELECT generate_series(0, 10, 1) AS r_from)
union
(select generate_series(10, 90, 10) as r_from)
union
(select generate_series(100, 900, 100) as r_from)
union
(select generate_series(1000, 9000, 1000) as r_from)
order by r_from
)
, "range" as ( select r_from
, case
when r_from < 10 then r_from + 1
when r_from < 100 then r_from + 10
when r_from < 1000 then r_from + 100
else r_from + 1000
end as r_to
from series)
select r_from, r_to,(SELECT count(*) FROM "my_table" WHERE "my_value" BETWEEN r_from AND r_to) as "Anz."
FROM "range";

I think generate_series is the right way, there is another way, we can use simple math to calculate the numbers.
SELECT 0 as r_from,1 as r_to
UNION ALL
SELECT power(10, steps ) * v ,
power(10, steps ) * v + power(10, steps )
FROM generate_series(1, 9, 1) v
CROSS JOIN generate_series(0, 3, 1) steps
so that might as below
with "range" as
(
SELECT 0 as r_from,1 as r_to
UNION ALL
SELECT power(10, steps) * v ,
power(10, steps) * v + power(10, steps)
FROM generate_series(1, 9, 1) v
CROSS JOIN generate_series(0, 3, 1) steps
)
select r_from, r_to,(SELECT count(*) FROM "my_table" WHERE "my_value" BETWEEN r_from AND r_to) as "Anz."
FROM "range";
sqlifddle

Rather than generate_series you could create defined integer range types (int4range), then test whether your value is included within the range (see Range/Multirange Functions and Operators. So
with ranges (range_set) as
( values ( int4range(0,10,'[)') )
, ( int4range(10,100,'[)') )
, ( int4range(100,1000,'[)') )
, ( int4range(1000,10000,'[)') )
) --select * from ranges;
select lower(range_set) range_start
, upper(range_set) - 1 range_end
, count(my_value) cnt
from ranges r
left join my_table mt
on (mt.my_value <# r.range_set)
group by r.range_set
order by lower(r.range_set);
Note the 3rd parameter in creating the ranges.
Creating a CTE as above is good if your ranges are static, however if dynamic ranges are required you can put the ranges into a table. Changes ranges then becomes a matter to managing the table. Not simple but does not require code updates. The query then reduces to just the Main part of the above:
select lower(range_set) range_start
, upper(range_set) - 1 range_end
, count(my_value) cnt
from range_tab r
left join my_table mt
on (mt.my_value <# r.range_set)
group by r.range_set
order by lower(r.range_set);
See demo for both here.

Related

how to order output of arrays when using union

I have a query like this:
SELECT array_agg(candles) as candles FROM ( SELECT * FROM ... ) AS candles
UNION ALL
SELECT array_agg(trades) as trades FROM ( SELECT * FROM ... ) AS trades
UNION ALL
SELECT ...
But then I'll get rows that contain arrays, but the order of the rows doesn't necessarily match the query order.
For example, it is possible that the output will have the trades row before the candles row.
How can I get the rows in a predictable order?
Edit:
updated the query based on the answer but getting an error:
SELECT a FROM
(
SELECT 1 as o, array_agg(candles) as a
FROM (
SELECT ts, open, high, low, close, midpoint, volume
FROM exchange.binance.candles
WHERE instrument = 'BTCUSDT' AND ts >= '2022-04-01 00:00:00' AND ts < '2022-04-01 01:00:00'
ORDER BY ts) AS candles
UNION ALL
SELECT 2 as o, array_agg(trades)
FROM (
SELECT ts, price, quantity, direction
FROM exchange.binance.trades
WHERE instrument = 'BTCUSDT' AND ts >= '2022-04-01 00:00:00' AND ts < '2022-04-01 01:00:00'
ORDER BY ts) AS trades
UNION ALL
SELECT 3 as o, array_agg(kvwap)
FROM (
SELECT ts, price, "interval"
FROM exchange.binance.kvwap
WHERE instrument = 'BTCUSDT' AND "interval" IN ('M5', 'H1', 'H4') AND ts >= '2022-04-01 00:00:00' AND ts < '2022-04-01 01:00:00'
ORDER BY ts) AS kvwap
)
ORDER BY o;
the error is:
[42601] ERROR: subquery in FROM must have an alias Hint: For example, FROM (SELECT ...) [AS] foo. Position: 15
Add a column for ordering to each subquery, but don't include it in the output:
SELECT a FROM (
SELECT 1 as o, array_agg(candles) as a FROM ( SELECT * FROM ... ) c group by 1
UNION ALL
SELECT 2, array_agg(trades) FROM ( SELECT * FROM ... ) t group by 1
UNION ALL
SELECT ...
) x
ORDER BY o
Note that with UNION only the first subquery's column names are relevant - the entire union uses column names from the first subquery - so don't bother providing aliases for the others.

How can I increment the numerical value in my WHERE clause using a loop?

I am currently using the UNION ALL workaround below to calculate old_eps_tfq regression slopes of each ticker based off its corresponding rownum value (see WHERE rownum < x). I am interested to know what the old_eps_tfq is when rownum < 4 then increment 4 by 1 to find out what old_eps_tfq is when rownum < 5, and so on (there are ~20 rownum)
Could I use PL/pgSQL for this?
SELECT * FROM(
WITH regression_slope AS(
SELECT
ROW_NUMBER() OVER ( PARTITION BY ticker ORDER BY earnings_growths_ped) AS rownum,
*
FROM "ANALYTICS"."vEARNINGS_GROWTHS"
--WHERE ticker = 'ACN'
ORDER BY ticker )
SELECT
ticker,
current_period_end_date,
max(earnings_growths_ped) AS max_earnings_growths_ped,
--max(rownum) AS max_rownum,
round(regr_slope(old_eps_tfq, rownum)::numeric, 2) AS slope,
round(regr_intercept(old_eps_tfq, rownum)::numeric, 2) AS y_intercept,
round(regr_r2(old_eps_tfq, rownum)::numeric, 3) AS r_squared
FROM regression_slope
WHERE rownum < 4
GROUP BY ticker, current_period_end_date
ORDER BY ticker asc ) q
UNION ALL
SELECT * FROM(
WITH regression_slope AS(
SELECT
ROW_NUMBER() OVER ( PARTITION BY ticker ORDER BY earnings_growths_ped) AS rownum,
*
FROM "ANALYTICS"."vEARNINGS_GROWTHS"
--WHERE ticker = 'ACN'
ORDER BY ticker )
SELECT
ticker,
current_period_end_date,
max(earnings_growths_ped) AS max_earnings_growths_ped,
--max(rownum) AS max_rownum,
round(regr_slope(old_eps_tfq, rownum)::numeric, 2) AS slope,
round(regr_intercept(old_eps_tfq, rownum)::numeric, 2) AS y_intercept,
round(regr_r2(old_eps_tfq, rownum)::numeric, 3) AS r_squared
FROM regression_slope
WHERE rownum < 5
GROUP BY ticker, current_period_end_date
ORDER BY ticker asc ) q
Here is my table
The top query SELECT * FROM (...) q sounds like useless.
Then you can try this :
WITH regression_slope AS(
SELECT
ROW_NUMBER() OVER ( PARTITION BY ticker ORDER BY earnings_growths_ped) AS rownum,
*
FROM "ANALYTICS"."vEARNINGS_GROWTHS"
--WHERE ticker = 'ACN'
ORDER BY ticker )
SELECT
max,
ticker,
current_period_end_date,
max(earnings_growths_ped) AS max_earnings_growths_ped,
--max(rownum) AS max_rownum,
round(regr_slope(old_eps_tfq, rownum)::numeric, 2) AS slope,
round(regr_intercept(old_eps_tfq, rownum)::numeric, 2) AS y_intercept,
round(regr_r2(old_eps_tfq, rownum)::numeric, 3) AS r_squared
FROM regression_slope
INNER JOIN generate_series(4, 24) AS max -- the range 4 to 24 can be adjusted to the need
ON rownum < max
GROUP BY max, ticker, current_period_end_date
ORDER BY max asc, ticker asc

How can you generate a date list from a range in Amazon Redshift?

Getting date list in a range in PostgreSQL shows how to get a date range in PostgreSQL. However, Redshift does not support generate_series():
ans=> select (generate_series('2012-06-29', '2012-07-03', '1 day'::interval))::date;
ERROR: function generate_series("unknown", "unknown", interval) does not exist
HINT: No function matches the given name and argument types. You may need to add explicit type casts.
Is there way to replicate what generate_series() does in Redshift?
a hack, but works:
use a table with many many rows, and a window function to generate the series
this works as long as you are generating a series that is smaller than the number of rows in the table you're using to generate the series
WITH x(dt) AS (SELECT '2016-01-01'::date)
SELECT
dateadd(
day,
COUNT(*) over(rows between unbounded preceding and current row) - 1,
dt)
FROM users, x
LIMIT 100
the initial date 2016-01-01 controls the start date, and the limit controls the number of days in the generated series.
Update: * Will only run on the leader node
Redshift has partial support for the generate_series function but unfortunately does not mention it in their documentation.
This will work and is the shortest & most legible way of generating a series of dates as of this date (2018-01-29):
SELECT ('2016-01-01'::date + x)::date
FROM generate_series(1, 100, 1) x
One option if you don't want to rely on any existing tables is to pre-generate a series table filled with a range of numbers, one for each row.
create table numbers as (
select
p0.n
+ p1.n*2
+ p2.n * power(2,2)
+ p3.n * power(2,3)
+ p4.n * power(2,4)
+ p5.n * power(2,5)
+ p6.n * power(2,6)
+ p7.n * power(2,7)
+ p8.n * power(2,8)
+ p9.n * power(2,9)
+ p10.n * power(2,10)
as number
from
(select 0 as n union select 1) p0,
(select 0 as n union select 1) p1,
(select 0 as n union select 1) p2,
(select 0 as n union select 1) p3,
(select 0 as n union select 1) p4,
(select 0 as n union select 1) p5,
(select 0 as n union select 1) p6,
(select 0 as n union select 1) p7,
(select 0 as n union select 1) p8,
(select 0 as n union select 1) p9,
(select 0 as n union select 1) p10
order by 1
);
This will create a table with numbers from 0 to 2^10, if you need more numbers, just add more clauses :D
Once you have this table, you can join to it as a substitute for generate_series
with date_range as (select
'2012-06-29'::timestamp as start_date ,
'2012-07-03'::timestamp as end_date
)
select
dateadd(day, number::int, start_date)
from date_range
inner join numbers on number <= datediff(day, start_date, end_date)
#michael_erasmus It's interesting, and I make a change for maybe better performance.
CREATE OR REPLACE VIEW v_series_0_to_1024 AS SELECT
p0.n
| (p1.n << 1)
| (p2.n << 2)
| (p3.n << 3)
| (p4.n << 4)
| (p5.n << 5)
| (p6.n << 6)
| (p7.n << 7)
| (p8.n << 8)
| (p9.n << 9)
as number
from
(select 0 as n union select 1) p0,
(select 0 as n union select 1) p1,
(select 0 as n union select 1) p2,
(select 0 as n union select 1) p3,
(select 0 as n union select 1) p4,
(select 0 as n union select 1) p5,
(select 0 as n union select 1) p6,
(select 0 as n union select 1) p7,
(select 0 as n union select 1) p8,
(select 0 as n union select 1) p9
order by number
Last 30 days date series:
select dateadd(day, -number, current_date) as dt from v_series_0_to_1024 where number < 30

Postgres select 'at least' items

I want to select comments to post, elder then particular commentId, BUT I want to have at least 5 comments in result anyway.
So if there are less then 5 comments is sql : SELECT * FROM comments WHERE id >= :comment_id, I have to make another select SELECT * FROM comments LIMIT 5.
Is it possible to get the same logic in one request?
with c as (
select count(*) as c
from comments
where id >= :comment_id
)
select *
from comments
where id >= :comment_id
union all
(
select *
from comments
where id < :comment_id
order by id desc
limit greatest(5 - (select c from c), 0)
)
;
Try:
WITH x AS {
SELECT * FROM comments WHERE id >= :comment_id
),
y AS (
SELECT * FROM comments
LIMIT 5
)
SELECT * FROM x
WHERE 5 <= ( SELECT count(*) FROM x )
UNION ALL
SELECT * FROM y
WHERE 5 > ( SELECT count(*) FROM x )

how to do dead reckoning on column of table, postgresql

I have a table looks like,
x y
1 2
2 null
3 null
1 null
11 null
I want to fill the null value by conducting a rolling
function to apply y_{i+1}=y_{i}+x_{i+1} with sql as simple as possible (inplace)
so the expected result
x y
1 2
2 4
3 7
1 8
11 19
implement in postgresql. I may encapsulate it in a window function, but the implementation of custom function seems always complex
WITH RECURSIVE t AS (
select x, y, 1 as rank from my_table where y is not null
UNION ALL
SELECT A.x, A.x+ t.y y , t.rank + 1 rank FROM t
inner join
(select row_number() over () rank, x, y from my_table ) A
on t.rank+1 = A.rank
)
SELECT x,y FROM t;
You can iterate over rows using a recursive CTE. But in order to do so, you need a way to jump from row to row. Here's an example using an ID column:
; with recursive cte as
(
select id
, y
from Table1
where id = 1
union all
select cur.id
, prev.y + cur.x
from Table1 cur
join cte prev
on cur.id = prev.id + 1
)
select *
from cte
;
You can see the query at SQL Fiddle. If you don't have an ID column, but you do have another way to order the rows, you can use row_number() to get an ID:
; with recursive sorted as
(
-- Specify your ordering here. This example sorts by the dt column.
select row_number() over (order by dt) as id
, *
from Table1
)
, cte as
(
select id
, y
from sorted
where id = 1
union all
select cur.id
, prev.y + cur.x
from sorted cur
join cte prev
on cur.id = prev.id + 1
)
select *
from cte
;
Here's the SQL Fiddle link.