Force Unique Value During Update, without a good join key - tsql

I have a table "MyTable" with 3 columns:
FilePath
FileName
Status
My Table row example:
FilePath | FileName | Status
c:\temp | Null | 30
c:\temp | Null | 30
c:\temp | Null | 30
c:\temp | Null | 30
c:\temp | a.csv | 40
c:\temp | b.csv | 40
c:\temp | c.csv | 40
c:\temp | d.csv | 40
I want to update rows with the FileName is null, with rows where FileName has value.
Now the following query will update the nulls with the same value for all.
I need to force the update to select a unique value per row during the update
UPDATE t1
SET t1.FileName = t2.FileName
FROM MyTable t1
JOIN MyTable t2 ON t1.FilePath = t2.FilePath
The current update will give the following result:
FilePath | FileName | Status
c:\temp | a.csv | 30
c:\temp | a.csv | 30
c:\temp | a.csv | 30
c:\temp | a.csv | 30
c:\temp | a.csv | 40
c:\temp | b.csv | 40
c:\temp | c.csv | 40
c:\temp | d.csv | 40
And what I need is:
FilePath | FileName | Status
c:\temp | a.csv | 30
c:\temp | b.csv | 30
c:\temp | c.csv | 30
c:\temp | d.csv | 30
c:\temp | a.csv | 40
c:\temp | b.csv | 40
c:\temp | c.csv | 40
c:\temp | d.csv | 40

On the wild assumptions that you are using a reasonably modern version of SQL Server and that you would like the FileName pattern to start anew and repeat as needed for each status that has NULL FileName values:
-- Sample data.
declare #MyTable as Table ( Id Int Identity, FilePath VarChar(16), FileName VarChar(16), Status Int );
insert into #MyTable ( FilePath, FileName, Status ) values
( 'c:\temp', null, 30 ),
( 'c:\temp', null, 30 ),
( 'c:\temp', null, 30 ),
( 'c:\temp', null, 30 ),
( 'c:\temp', 'a.csv', 40 ),
( 'c:\temp', 'b.csv', 40 ),
( 'c:\temp', 'c.csv', 40 ),
( 'c:\temp', 'd.csv', 40 ),
( 'c:\temp', null, 50 ),
( 'c:\temp', null, 60 ),
( 'c:\temp', null, 60 ),
( 'c:\temp', null, 60 ),
( 'c:\temp', null, 60 ),
( 'c:\temp', null, 60 ),
( 'c:\temp', null, 60 ),
( 'c:\temp', null, 60 );
select * from #MyTable;
-- Update the null values.
with
FileNames as (
select FileName, Row_Number() over ( order by FileName ) - 1 as RN
from #MyTable
where FileName is not NULL ),
NullNames as (
select Id, Row_Number() over ( partition by Status order by FileName ) - 1 as RN
from #MyTable
where FileName is NULL )
update MT
set FileName = FN.FileName
from #MyTable as MT inner join
NullNames as NN on NN.Id = MT.Id inner join
FileNames as FN on FN.RN = NN.RN % ( select count(42) from FileNames );
select * from #MyTable;

Related

Lederboards in PostgreSQL and get 2 next and previous rows

We use Postgresql 14.1
I have a sample data that contains over 50 million records.
base table:
+------+----------+--------+--------+--------+
| id | item_id | battles| wins | damage |
+------+----------+--------+--------+--------+
| 1 | 255 | 35 | 52.08 | 1245.2 |
| 2 | 255 | 35 | 52.08 | 1245.2 |
| 3 | 255 | 35 | 52.08 | 1245.3 |
| 4 | 255 | 35 | 52.08 | 1245.3 |
| 5 | 255 | 35 | 52.09 | 1245.4 |
| 6 | 255 | 35 | 52.08 | 1245.3 |
| 7 | 255 | 35 | 52.08 | 1245.3 |
| 8 | 255 | 35 | 52.08 | 1245.7 |
| 1 | 460 | 18 | 47.35 | 1010.1 |
| 2 | 460 | 27 | 49.18 | 1518.9 |
| 3 | 460 | 16 | 50.78 | 1171.2 |
+------+----------+--------+--------+--------+
We need to get the target row number and 2 next and 2 previous rows as quickly as possible.
Indexed columns:
id
item_id
Sorting:
damage (DESC)
wins (DESC)
battles (ASC)
id (ASC)
At the example, we need to find the row number and +- 2 rows where id = 4 and item_id = 255. The result table should be:
+------+----------+--------+--------+--------+------+
| id | item_id | battles| wins | damage | rank |
+------+----------+--------+--------+--------+------+
| 5 | 255 | 35 | 52.09 | 1245.4 | 2 |
| 3 | 255 | 35 | 52.08 | 1245.3 | 3 |
| 4 | 255 | 35 | 52.08 | 1245.3 | 4 |
| 6 | 255 | 35 | 52.08 | 1245.3 | 5 |
| 7 | 255 | 35 | 52.08 | 1245.3 | 6 |
+------+----------+--------+--------+--------+------+
How can I do this with Row number windows function?
Is there is any way optimize in query to make it faster because other columns have no indexes?
CREATE OR REPLACE FUNCTION find_top(in_id integer, in_item_id integer) RETURNS TABLE (
r_id int,
r_item_id int,
r_battles int,
r_wins real,
r_damage real,
r_rank bigint,
r_eff real,
r_frags int
) AS $$
DECLARE
center_place bigint;
BEGIN
SELECT place INTO center_place FROM
(SELECT
id, item_id,
ROW_NUMBER() OVER (ORDER BY damage DESC, wins DESC, battles, id) AS place
FROM
public.my_table
WHERE
item_id = in_item_id
AND battles >= 20
) AS s
WHERE s.id = in_id;
RETURN QUERY SELECT
s.place, pt.id, pt.item_id, pt.battles, pt.wins, pt.damage
FROM
(
SELECT * FROM
(SELECT
ROW_NUMBER () OVER (ORDER BY damage DESC, wins DESC, battles, id) AS place,
id, item_id
FROM
public.my_table
WHERE
item_id = in_item_id
AND battles >= 20) x
WHERE x.place BETWEEN (center_place - 2) AND (center_place + 2)
) s
JOIN
public.my_table pt
ON pt.id = s.id AND pt.item_id = s.item_id;
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION find_top(in_id integer, in_item_id integer) RETURNS TABLE (
r_id int,
r_item_id int,
r_battles int,
r_wins real,
r_damage real,
r_rank bigint,
r_eff real,
r_frags int
) AS $$
BEGIN
RETURN QUERY
SELECT c.*, B.ord -3 AS row_number
FROM
( SELECT array_agg(id) OVER w AS id
, array_agg(item_id) OVER w AS item_id
FROM public.my_table
WINDOW w AS (ORDER BY damage DESC, wins DESC, battles, id ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING)
) AS a
CROSS JOIN LATERAL unnest(a.id, a.item_id) WITH ORDINALITY AS b(id, item_id, ord)
INNER JOIN public.my_table AS c
ON c.id = b.id
AND c.item_id = b.item_id
WHERE a.item_id[3] = in_item_id
AND a.id[3] = in_id
ORDER BY b.ord ;
END ; $$ LANGUAGE plpgsql;
test result in dbfiddle

Postgresql array unique aggregation

I have a large table with structure
CREATE TABLE t (
id SERIAL primary key ,
a_list int[] not null,
b_list int[] not null,
c_list int[] not null,
d_list int[] not null,
type int not null
)
I want query all unique values from a_list, b_list, c_list, d_list for type like this
select
some_array_unique_agg_function(a_list),
some_array_unique_agg_function(b_list),
some_array_unique_agg_function(c_list),
some_array_unique_agg_function(d_list),
count(1)
where type = 30
For example, for this data
+----+---------+--------+--------+---------+------+
| id | a_list | b_list | c_list | d_list | type |
+----+---------+--------+--------+---------+------+
| 1 | {1,3,4} | {2,4} | {1,1} | {2,4,5} | 30 |
| 1 | {1,2,4} | {2,4} | {4,1} | {2,4,5} | 30 |
| 1 | {1,3,5} | {2} | {} | {2,4,5} | 30 |
+----+---------+--------+--------+---------+------+
I want the next result
+-------------+--------+--------+-----------+-------+
| a_list | b_list | c_list | d_list | count |
+-------------+--------+--------+-----------+-------+
| {1,2,3,4,5} | {2,4} | {1,4} | {2,4,5} | 3 |
+-------------+--------+--------+-----------+-------+
Is there some_array_unique_agg_function for my purposes?
Try this
with cte as (select
unnest( a_list::text[] )::integer as a_list,
unnest( b_list::text[] )::integer as b_list,
unnest( c_list::text[] )::integer as c_list,
unnest( d_list::text[] )::integer as d_list,
(select count(type) from t) as type
from t
where type = 30
)
select array_agg(distinct a_list),array_agg(distinct b_list)
,array_agg(distinct c_list),array_agg(distinct d_list),type from cte group by type ;
Result:
"{1,2,3,4,5}";"{2,4,NULL}";"{1,4,NULL}";"{2,4,5}";3

How to calculate a computed column's value based on its previous value in PostgreSQL

I'm trying to calculate an adjusted cost base based on a given table's data but can't figure out how to use the previous computed value in the current row.
CREATE TABLE transactions (
datetime timestamp NOT NULL,
type varchar(25) NOT NULL,
amount INT NOT NULL,
shares INT NOT NULL,
symbol VARCHAR(20) NOT NULL
);
With data:
INSERT INTO transactions(amount, type, shares, datetime, symbol) VALUES (100, 'Buy', 10, now() - INTERVAL '14 days', 'XYZ');
INSERT INTO transactions(amount, type, shares, datetime, symbol) VALUES (330, 'Buy', 30, now() - INTERVAL '11 days', 'XYZ');
INSERT INTO transactions(amount, type, shares, datetime, symbol) VALUES (222, 'Buy', 22, now() - INTERVAL '10 days', 'XYZ');
INSERT INTO transactions(amount, type, shares, datetime, symbol) VALUES (245, 'Buy', 24, now() - INTERVAL '8 days', 'XYZ');
INSERT INTO transactions(amount, type, shares, datetime, symbol) VALUES (150, 'Sell', 15, now() - INTERVAL '7 days', 'XYZ');
INSERT INTO transactions(amount, type, shares, datetime, symbol) VALUES (210, 'Buy', 20, now() - INTERVAL '6 days', 'XYZ');
INSERT INTO transactions(amount, type, shares, datetime, symbol) VALUES (235, 'Buy', 22, now() - INTERVAL '5 days', 'XYZ');
INSERT INTO transactions(amount, type, shares, datetime, symbol) VALUES (110, 'Sell', 10, now() - INTERVAL '4 days', 'XYZ');
This is as far as I got:
WITH cte AS (
WITH shares AS (
SELECT transactions.*,
sum(CASE WHEN transactions.type = 'Sell'
THEN transactions.shares * -1 --reduction of shares
ELSE transactions.shares END)
OVER (
PARTITION BY transactions.symbol
ORDER BY transactions.symbol, transactions.datetime ROWS UNBOUNDED PRECEDING ) AS total_shares
FROM transactions)
SELECT shares.*, coalesce(lag(shares.total_shares) OVER(ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 0) as previous_shares FROM shares)
SELECT cte.*,
CASE WHEN cte.type = 'Buy' THEN
-- [Previous total_acb] + cte.amount
ELSE
-- [Previous total_acb] x ((cte.previous_shares – shares) / cte.previous_shares)
END
AS total_acb
FROM cte
Expected result (total_acb is the value I'm trying to compute):
datetime | type | amount | shares | symbol | total_shares | previous_shares | total_acb
----------------------------+------+--------+--------+--------+--------------+-----------------+-----------
2018-01-10 14:09:38.882593 | Buy | 100 | 10 | XYZ | 10 | 0 | 100.00
2018-01-13 14:09:38.887738 | Buy | 330 | 30 | XYZ | 40 | 10 | 430.00
2018-01-14 14:09:38.890691 | Buy | 222 | 22 | XYZ | 62 | 40 | 552.00
2018-01-16 14:09:38.893328 | Buy | 245 | 24 | XYZ | 86 | 62 | 797.00
2018-01-17 14:09:38.905877 | Sell | 150 | 15 | XYZ | 71 | 86 | 657.98
2018-01-18 14:09:38.910944 | Buy | 210 | 20 | XYZ | 91 | 71 | 867.98
2018-01-19 14:09:38.915023 | Buy | 235 | 22 | XYZ | 113 | 91 | 1102.98
2018-01-20 14:09:38.917985 | Sell | 110 | 10 | XYZ | 103 | 113 | 1005.37
The easiest way to do this kind of recursive computation is a plpgsql function.
create or replace function calculate_totals()
returns table (
datetime timestamp,
type text,
amount dec,
shares dec,
symbol text,
total_shares dec,
total_acb dec)
language plpgsql as $$
declare
rec record;
curr_symbol text = '';
begin
for rec in
select *
from transactions
order by symbol, datetime
loop
if rec.symbol <> curr_symbol then
curr_symbol = rec.symbol;
total_acb = 0;
total_shares = 0;
end if;
if rec.type = 'Buy' then
total_acb = round(total_acb + rec.amount, 2);
total_shares = total_shares + rec.shares;
else
total_acb = round(total_acb * (total_shares - rec.shares) / total_shares, 2);
total_shares = total_shares - rec.shares;
end if;
select rec.datetime, rec.type, rec.amount, rec.shares, rec.symbol
into datetime, type, amount, shares, symbol;
return next;
end loop;
end $$;
The result is slightly different from the one given in the question (due to the author's mistake):
select *
from calculate_totals();
datetime | type | amount | shares | symbol | total_shares | total_acb
---------------------------+------+--------+--------+--------+--------------+-----------
2018-01-10 23:28:56.66738 | Buy | 100 | 10 | XYZ | 10 | 100.00
2018-01-13 23:28:56.66738 | Buy | 330 | 30 | XYZ | 40 | 430.00
2018-01-14 23:28:56.66738 | Buy | 222 | 22 | XYZ | 62 | 652.00
2018-01-16 23:28:56.66738 | Buy | 245 | 24 | XYZ | 86 | 897.00
2018-01-17 23:28:56.66738 | Sell | 150 | 15 | XYZ | 71 | 740.55
2018-01-18 23:28:56.66738 | Buy | 210 | 20 | XYZ | 91 | 950.55
2018-01-19 23:28:56.66738 | Buy | 235 | 22 | XYZ | 113 | 1185.55
2018-01-20 23:28:56.66738 | Sell | 110 | 10 | XYZ | 103 | 1080.63
(8 rows)

Select all columns from two tables

Lets say I have the following:
table_a
| id | date | order_id | sku | price |
--------------------------------------------
| 10 | 2016-08-18 | 111 | ABC | 10 |
table_b
| id | date | order_id | description | type | notes | valid |
-------------------------------------------------------------------
| 50 | 2016-08-18 | 111 | test | AA | | true |
I want to get get all columns from both tables, so the resulting table looks like this:
| id | date | order_id | sku | price | description | type | notes | valid |
---------------------------------------------------------------------------------
| 10 | 2016-08-18 | 111 | ABC | 10 | | | | |
---------------------------------------------------------------------------------
| 50 | 2016-08-18 | 111 | | | test | AA | | true |
I tried union:
(
SELECT *
from table_a
where table_a.date > Date('today')
)
UNION
(
SELECT *
from table_b
where table_b.date > Date('today')
)
But I get a:
ERROR: each UNION query must have the same number of columns
How can this be fixed / is there another way to do this?
Easily :)
(
SELECT id, date, order_id, sku, price, NULL AS description, NULL AS type, NULL AS notes, NULL AS valid
from table_a
where table_a.date > Date('today')
)
UNION
(
SELECT id, date, order_id, NULL AS sku, NULL AS price, description, type, notes, valid
from table_b
where table_b.date > Date('today')
)
Alternatively, instead of UNION you can just JOIN them:
SELECT *
FROM table_a A
JOIN table_b B USING ( id )
WHERE A.date > TIMESTAMP 'TODAY'
AND B.date > TIMESTAMP 'TODAY';
See more options: https://www.postgresql.org/docs/9.5/static/queries-table-expressions.html#QUERIES-JOIN

PostgreSQL - How can I replace NULL values with values from another column based on a common unique identifier in a PSQL VIEW

I have three foreign identifiers in my PSQL view. How could I replace the NULL second_id values with the third_id values based on their common first_id?
Currently:
first_id | second_id | third_id
----------+-----------+----------
1 | | 11
1 | | 11
1 | | 11
1 | 22 | 22
2 | 33 | 33
3 | 44 | 44
4 | 55 | 55
5 | 66 | 66
6 | | 77
6 | | 77
6 | | 77
6 | | 77
6 | 88 | 88
Should be:
first_id | second_id | third_id
----------+-----------+----------
1 | 22 | 11
1 | 22 | 11
1 | 22 | 11
1 | 22 | 22
2 | 33 | 33
3 | 44 | 44
4 | 55 | 55
5 | 66 | 66
6 | 88 | 77
6 | 88 | 77
6 | 88 | 77
6 | 88 | 77
6 | 88 | 88
How can I make this change?
The NULL values in the second_id column should be filled i.e. there shouldn't be blank cells.
If the second_id column shares a value with the third_id column, this value should fill the blank cells in the second_id column.
They should both be based on their common first_id.
Thanks so much. I really appreciate it.
The second_id is really a CASE WHEN modification of the third_id. This modification is made in the view.
VIEW:
View "public.my_view"
Column | Type | Modifiers | Storage | Description
-----------------------------+-----------------------------+-----------+----------+-------------
row_number | bigint | | plain |
first_id | integer | | plain |
second_id | integer | | plain |
third_id | integer | | plain |
first_type | character varying(255) | | extended |
date_1 | timestamp without time zone | | plain |
date_2 | timestamp without time zone | | plain |
date_3 | timestamp without time zone | | plain |
View definition:
SELECT row_number() OVER (PARTITION BY t.first_id) AS row_number,
t.first_id,
CASE
WHEN t.localization_key::text = 'rq.bkd'::text THEN t.third_id
ELSE NULL::integer
END AS second_id,
t.third_id,
t.first_type,
CASE
WHEN t.localization_key::text = 'rq.bkd'::text THEN t.created_at
ELSE NULL::timestamp without time zone
END AS date_1,
CASE
WHEN t.localization_key::text = 'st.appt'::text THEN t.created_at
ELSE NULL::timestamp without time zone
END AS date_2,
CASE
WHEN t.localization_key::text = 'st.eta'::text THEN t.created_at
ELSE NULL::timestamp without time zone
END AS date_3
FROM my_table t
WHERE (t.localization_key::text = 'rq.bkd'::text OR t.localization_key::text = 'st.appt'::text OR t.localization_key::text = 'st.eta'::text) AND t.first_type::text = 'thing'::text
ORDER BY t.created_at DESC;
Here is a link to the table definition that the view is using (my_table).
https://gist.github.com/dankreiger/376f6545a0acff19536d
Thanks again for your help.
You can get it by:
select a.first_id, coalesce(a.second_id,b.second_id), a.third_id
from my_table a
left outer join
(
select first_id, second_id from my_table
where second_id is not null
) b
using (first_id)
So the update should be:
update my_table a set second_id = b.second_id
from
(
select first_id, second_id from my_table
where second_id is not null
) b
where b.first_id = a.first_id and a.second_id is null
You can not UPDATE the underlying table my_table because it does not have the second_id column so you should make the view display the data the way you want it. That is fairly straightforward with a CTE:
CREATE VIEW my_view AS
WITH second (first, id) AS (
SELECT first_id, third_id
FROM my_table
WHERE t.localization_key = 'rq.bkd')
SELECT
row_number() OVER (PARTITION BY t.first_id) AS row_number,
t.first_id,
s.id AS second_id,
t.third_id,
t.first_type,
CASE
WHEN t.localization_key = 'rq.bkd' THEN t.created_at
END AS date_1,
CASE
WHEN t.localization_key = 'st.appt' THEN t.created_at
END AS date_2,
CASE
WHEN t.localization_key = 'st.eta' THEN t.created_at
END AS date_3
FROM my_table t
JOIN second s ON s.first = t.first_id
WHERE (t.localization_key = 'rq.bkd'
OR t.localization_key = 'st.appt'
OR t.localization_key = 'st.eta')
AND t.first_type = 'thing'
ORDER BY t.created_at DESC;
This assumes that where my_table.localization_key = 'rq.bkd' you do have exactly 1 third_id value; if not you should add the appropriate qualifiers such as ORDER BY first_id ASC NULLS LAST LIMIT 1 or some other suitable filter. Also note that the CTE is JOINed, not LEFT JOINed, assuming there is always a valid pair (first_id, third_id) without NULLs.