Using aggregate functions on alias? - postgresql

I want to make a query where I am computing the difference between two columns. Something like:
SELECT a,
b,
a - b as "diff"
FROM ...
Now I would like to calculate the stddev of the "diff" column using postgresql built-in stddev aggregate function. How can I achieve this?
Thanks.
EDIT:
The actual query is this:
SELECT tr.date_start,
tr.date_end,
(((CASE when(tourney_summary.val_curr_conv != 0) THEN tourney_summary.val_curr_conv * (tr.amt_won + tr.cnt_bounty * tourney_summary.amt_bounty) ELSE 0.0 END))) AS "amt_won_curr_conv",
(((CASE when(tourney_summary.val_curr_conv != 0) THEN tourney_summary.val_curr_conv * (tourney_summary.amt_buyin + tourney_summary.amt_fee + tourney_summary.amt_rebuy * tr.cnt_rebuy + tourney_summary.amt_addon * tr.cnt_addon + tourney_summary.amt_bounty) ELSE 0.0 END))) AS "amt_buyin_ttl_curr_conv",
((((CASE when(tourney_summary.val_curr_conv != 0) THEN tourney_summary.val_curr_conv * (tr.amt_won + tr.cnt_bounty * tourney_summary.amt_bounty) ELSE 0.0 END))) - (((CASE when(tourney_summary.val_curr_conv != 0) THEN tourney_summary.val_curr_conv * (tourney_summary.amt_buyin + tourney_summary.amt_fee + tourney_summary.amt_rebuy * tr.cnt_rebuy + tourney_summary.amt_addon * tr.cnt_addon + tourney_summary.amt_bounty) ELSE 0.0 END)))) as net_amt_won,
stddev((((CASE when(tourney_summary.val_curr_conv != 0) THEN tourney_summary.val_curr_conv * (tr.amt_won + tr.cnt_bounty * tourney_summary.amt_bounty) ELSE 0.0 END))) - (((CASE when(tourney_summary.val_curr_conv != 0) THEN tourney_summary.val_curr_conv * (tourney_summary.amt_buyin + tourney_summary.amt_fee + tourney_summary.amt_rebuy * tr.cnt_rebuy + tourney_summary.amt_addon * tr.cnt_addon + tourney_summary.amt_bounty) ELSE 0.0 END)))) as diff_std_dev
FROM tourney_summary,
tourney_results tr
WHERE
tr.id_player=1
AND tourney_summary.id_tourney = tr.id_tourney
AND ((tourney_summary.id_gametype = 1)
AND (((((((tourney_summary.id_table_type IN
(SELECT lttt.id_table_type
FROM tourney_table_type lttt
WHERE lttt.val_seats = 2))))))
AND (((((tourney_summary.id_table_type IN
(SELECT lttt.id_table_type
FROM tourney_table_type lttt
WHERE position('S' IN lttt.val_speed) > 0))
OR (tourney_summary.id_table_type IN
(SELECT lttt.id_table_type
FROM tourney_table_type lttt
WHERE position('H' IN lttt.val_speed) > 0))))))))
AND ((tourney_summary.date_start >= '2013/08/15 23:00:00')))
GROUP BY tr.date_start,
tr.date_end,
tourney_summary.val_curr_conv,
tr.amt_won,
tr.cnt_bounty,
tourney_summary.amt_bounty,
tourney_summary.amt_buyin,
tourney_summary.amt_fee,
tourney_summary.amt_rebuy,
tr.cnt_rebuy,
tourney_summary.amt_addon,
tr.cnt_addon
ORDER BY tr.date_end DESC;
The "a" and "b" expressions (the ones with CASE) are big. And I don't know how to avoid the copy/paste. In any case using stddev on the a-b expression returns a blank column. What am I doing wrong?
Thanks.

You pretty much answer it yourself. Calculate the standard deviation of the difference:
SELECT a,
b,
a - b as "diff",
stddev(a - b) AS "diff_stddev"
FROM ...
If a - b is a computationally expensive operation or is in fact a much more complex expression in reality, you can wrap it in a subquery:
SELECT a, b, "diff", stddev("diff") AS diff_stddev
FROM (
SELECT a, b, a - b
FROM ...
) x (a, b, "diff")
x is just a throw-away alias for the subquery table.

it's also possible to do this with cte
with cte1 as (
select a, b, a - b as diff
from ...
)
select
a, b, diff, stddev(diff) as diff_stddev
from cte1

Related

Postgres table bloated without dead tuples

I have a table which has 0 dead tuples, but at the same time the bloat value is 1.7.
The wasted bytes is around 21GB.
Is it possible to have 0 dead tuples but the table being bloated?
If so, on what basis the wasted bytes is calculated?
EDIT: Below is the query I used to get the information.
This query was from AWS support.
SELECT
current_database(),
schemaname,
tablename,
/*reltuples::bigint, relpages::bigint, otta,*/
ROUND((
CASE WHEN otta = 0 THEN
0.0
ELSE
sml.relpages::float / otta
END)::numeric, 1) AS tbloat,
CASE WHEN relpages < otta THEN
0
ELSE
bs * (sml.relpages - otta)::bigint
END AS wastedbytes,
iname,
/*ituples::bigint, ipages::bigint, iotta,*/
ROUND((
CASE WHEN iotta = 0
OR ipages = 0 THEN
0.0
ELSE
ipages::float / iotta
END)::numeric, 1) AS ibloat,
CASE WHEN ipages < iotta THEN
0
ELSE
bs * (ipages - iotta)
END AS wastedibytes
FROM (
SELECT
schemaname,
tablename,
cc.reltuples,
cc.relpages,
bs,
CEIL((cc.reltuples * ((datahdr + ma - (
CASE WHEN datahdr % ma = 0 THEN
ma
ELSE
datahdr % ma
END)) + nullhdr2 + 4)) / (bs - 20::float)) AS otta,
COALESCE(c2.relname, '?') AS iname,
COALESCE(c2.reltuples, 0) AS ituples,
COALESCE(c2.relpages, 0) AS ipages,
COALESCE(CEIL((c2.reltuples * (datahdr - 12)) / (bs - 20::float)), 0) AS iotta -- very rough approximation, assumes all cols
FROM (
SELECT
ma,
bs,
schemaname,
tablename,
(datawidth + (hdr + ma - (
CASE WHEN hdr % ma = 0 THEN
ma
ELSE
hdr % ma
END)))::numeric AS datahdr,
(maxfracsum * (nullhdr + ma - (
CASE WHEN nullhdr % ma = 0 THEN
ma
ELSE
nullhdr % ma
END))) AS nullhdr2
FROM (
SELECT
schemaname,
tablename,
hdr,
ma,
bs,
SUM((1 - null_frac) * avg_width) AS datawidth,
MAX(null_frac) AS maxfracsum,
hdr + (
SELECT
1 + COUNT(*) / 8
FROM
pg_stats s2
WHERE
null_frac <> 0
AND s2.schemaname = s.schemaname
AND s2.tablename = s.tablename) AS nullhdr
FROM
pg_stats s,
(
SELECT
(
SELECT
current_setting('block_size')::numeric) AS bs,
CASE WHEN SUBSTRING(v, 12, 3) IN ('8.0', '8.1', '8.2') THEN
27
ELSE
23
END AS hdr,
CASE WHEN v ~ 'mingw32' THEN
8
ELSE
4
END AS ma
FROM (
SELECT
version() AS v) AS foo) AS constants
GROUP BY 1, 2, 3, 4, 5) AS foo) AS rs
JOIN pg_class cc ON cc.relname = rs.tablename
JOIN pg_namespace nn ON cc.relnamespace = nn.oid
AND nn.nspname = rs.schemaname
AND nn.nspname <> 'information_schema'
LEFT JOIN pg_index i ON indrelid = cc.oid
LEFT JOIN pg_class c2 ON c2.oid = i.indexrelid) AS sml ORDER BY wastedbytes DESC;
A table can be bloated with empty space even if there is not a single dead tuple. The query you show uses heuristics and has been known to get it wrong occasionally.
Use pgstattuple:
CREATE EXTENSION IF NOT EXISTS pgstattuple;
SELECT * FROM pgstattuple('tablename');
That will show you the actual bloat.
Your query considers any space not thought to be used by live tuples to be wasted. So that would include space that is currently occupied by dead tuples, and space that used to be occupied by dead tuples but have since been vacuumed away and is currently available for reuse. It also includes space which is reserved by fillfactor settings, and so is available for updates but not for inserts.

Create a column of increasing number in Postgresql

I want to create a CTE which only contains a single column by Postgresql(Redshift)- increasing number by 1, like 1,2,3,4,..until 1000.
Here's one that goes to 1024. Add "TOP 1000" if you only want 1000.
SELECT
1 + p0.n
+ p1.n*2
+ p2.n * POWER(2,2)
+ p3.n * POWER(2,3)
+ p4.n * POWER(2,4)
+ p5.n * POWER(2,5)
+ p6.n * POWER(2,6)
+ p7.n * POWER(2,7)
+ p8.n * POWER(2,8)
+ p9.n * POWER(2,9)
as number
FROM
(SELECT 0 as n UNION SELECT 1) p0,
(SELECT 0 as n UNION SELECT 1) p1,
(SELECT 0 as n UNION SELECT 1) p2,
(SELECT 0 as n UNION SELECT 1) p3,
(SELECT 0 as n UNION SELECT 1) p4,
(SELECT 0 as n UNION SELECT 1) p5,
(SELECT 0 as n UNION SELECT 1) p6,
(SELECT 0 as n UNION SELECT 1) p7,
(SELECT 0 as n UNION SELECT 1) p8,
(SELECT 0 as n UNION SELECT 1) p9
Order by 1
Yes, this is possible with a CTE:
with recursive numbers (nr) as (
values (1)
union all
select p.nr + 1
from numbers p
where p.nr < 1000
)
select *
from numbers;
Online example

multiply previous row's results for all rows

How can I dynamically multiply the previous row's results:
row A col_3 = if (row A col_1 <= 1 then 1), else (1 * col_2)
row B col_3 = if (row B col_1 <= 1 then 1), else (row A * col_2)
row C col_3 = if (row C col_1 <= 1 then 1), else (row B * col_2)
I've attempted this in Postgres. Here's
sum (
case
when col_1 <= 1 then 1
else (lag(col_3) over (...)) * col_2 <-- I'm aware you cannot use a lag function within a sum/window function
end
) over (order by ...) as col_3
Note: I've asked a similar question here (thanks #Bergi!), but I'm not sure how to implement that answer for this purpose.
EDIT:
Logic:
if (previous_interval is null) previous_interval = 1
if (curr_repetition = 1) interval = 1
else if (curr_repetition = 2) interval = 6
else interval = previous_interval * easiness

PostgreSQL: Joining with Window Function / Partial Table

Issue
I'm working a project with kernels in databases, and my PostgreSQL skills have hit a wall. I am joining between two tables to compute the cross product, i.e.
SELECT (d1.a * d2.a + d1.b * d2.b) AS dot FROM data d1, data d2
This gives me the cross product between all vectors. Having the following data in my table
a | b | c
---+---+---
1 | 1 | 1
2 | 2 | 2
3 | 3 | 3
The above command yields
dot
-----
2
4
6
...
If I want to compute the dot product between, say, row 2 with its preceding row and its following row, how would I do that efficiently?
Attempts
I have tried to use window functions, but failed to do so since they only compute aggregate functions. I want to join a row with its neighbouring rows (i.e. its window), but not compute any aggregate over these. Something along these lines:
SELECT a * a + b * b + c * c
OVER(rows between 1 preceding and 1 following) as value FROM data data;
I have also tried to use row_number() OVER() which works. But this seems clumsy and inefficient with nested subqueries.
SELECT d1.a * d3.a + d1.b * d3.b + d1.c * d3.c
FROM data d1,
(SELECT * FROM
(SELECT *, row_number() OVER() as index from data) d2
WHERE d2.index >= 1 AND d2.index <=3) d3;
Lastly, I tried to dig into LATERALs with no luck.
Any thoughts?
You can get the values of preceding/following rows by lag()/lead().
If the order of rows is determined by a, the query would be like:
SELECT
a,
(lag(a, 1, 0) OVER (ORDER BY a)) * (lead(a, 1, 0) OVER (ORDER BY a))
+ (lag(b, 1, 0) OVER (ORDER BY a)) * (lead(b, 1, 0) OVER (ORDER BY a))
+ (lag(c, 1, 0) OVER (ORDER BY a)) * (lead(c, 1, 0) OVER (ORDER BY a)) AS dot_preceding_and_following
FROM ( VALUES
(1, 1, 1),
(2, 2, 2),
(3, 3, 3)
) T(a, b, c)
ORDER BY
a
;

PostgreSQL - Aliases column and HAVING

SELECT
CASE WHEN SUM(X.Count)*3600 is null THEN '0'
ELSE
SUM(X.Count)*3600
END AS PJZ,
X.Mass
FROM X
WHERE X.Mass > 2000
HAVING ((X.Mass / PJZ * 100) - 100) >= 10;
Getting: ERROR: Column »pjz« doesn't exists.
How can I do something like this?
You can't use aliases in a having, and have to duplicate the statement in the having cluause. Since you only want to check for null, you could do this:
SELECT coalesce(SUM(X.Count)*3600, 0) AS PJZ, X.Mass
FROM X
WHERE X.Mass > 2000
HAVING ((X.Mass / coalesce(SUM(X.Count)*3600, 0) * 100) - 100) >= 10;
Other option is to surround query by WITH statement - for example:
WITH x as (
SELECT coalesce(SUM(X.Count)*3600, 0) AS PJZ, X.Mass
FROM X
WHERE X.Mass > 2000
)
SELECT * from X WHERE PJZ >=10
It is far better then code duplication in my opinion
Wrap it into a derived table:
SELECT CASE
WHEN PJZ = 0 THEN 100
ELSE PJZ
END as PJZ,
mass
FROM (
SELECT CASE
WHEN SUM(X.Count)*3600 is null THEN '0'
ELSE SUM(X.Count)*3600
END AS PJZ,
X.Mass
FROM X
WHERE X.Mass > 2000
GROUP BY X.mass
) t
WHERE PJZ = 0
OR ((X.Mass / PJZ * 100) - 100) >= 10;
(Note that I added the missing group by as otherwise the query would not be valid)