TSQL Select random rows WITH running total - tsql

TSQL, I am using SQL 2012 but will use anything that works from previous versions. I know how to select TOP X random rows from a table using NEWID(). Separately, I know how to select a running total using several methods, CTE, etc.
BUT, how would one combine these 2 results into one query? So I want to select say 3 random records (AND no less than 3), where the running total does not exceed 15. Can't wrap my head around this one...
Use this simple table and data:
CREATE TABLE TblTest (
id int not null identity(1,1) primary key,
value int not null
);
INSERT INTO TblTest (value) VALUES (4);
INSERT INTO TblTest (value) VALUES (3);
INSERT INTO TblTest (value) VALUES (5);
INSERT INTO TblTest (value) VALUES (6);
INSERT INTO TblTest (value) VALUES (6);
INSERT INTO TblTest (value) VALUES (5);
INSERT INTO TblTest (value) VALUES (6);
INSERT INTO TblTest (value) VALUES (5);
INSERT INTO TblTest (value) VALUES (4);
INSERT INTO TblTest (value) VALUES (7);
INSERT INTO TblTest (value) VALUES (7);
INSERT INTO TblTest (value) VALUES (6);
INSERT INTO TblTest (value) VALUES (5);
INSERT INTO TblTest (value) VALUES (4);
My Attempt is below, not sure if it even makes sense having the NEWID there, and sometimes it returns only 2 rows, sometimes 0, I want it to be smart enough to return always 3 rows, and if possible, closest to 15...:
select top(3) ourRandID,
id,
value,
running_total
from (
select NEWID() as ourRandID,
id,
value,
sum(value) over (order by NEWID()) as running_total
from TblTest
) t
where running_total < 16

try this:
select t.*
from TblTest t
join
(
select top (1)
t1.id [id1],t2.id [id2],t3.id [id3],
t1.value[v1],t2.value [v2],t3.value [v3],
t1.value+t2.value+t3.value [sum]
from TblTest t1
join TblTest t2 on (t1.id <> t2.id)
join TblTest t3 on (t3.id <> t2.id and t3.id <> t1.id)
where t1.value+t2.value+t3.value <= 15
order by t1.value+t2.value+t3.value desc,newid()
) [a] on (t.id=a.id1 or t.id=a.id2 or t.id=a.id3)

Related

recursive query to replicate/imitate dense_rank

BEGIN;
CREATE temp TABLE teacher (
name text,
salary numeric
);
INSERT INTO teacher
VALUES ('b1', 90000);
INSERT INTO teacher
VALUES ('f1', 87000);
INSERT INTO teacher
VALUES ('a', 65000),
('b', 90000),
('c', 40000),
('d', 95000),
('e', 60000),
('f', 87000);
COMMIT;
query
with recursive cte as(
(select name, salary, 1 as rn
from teacher order by salary desc limit 1)
union all
select l.* from cte c cross join lateral(
select name, salary, rn + 1 from teacher t
where t.salary < c.salary
order by salary desc
limit 1
) l
)
table cte order by salary desc;
If all salary are distinct,then above mentioned query can imitate as rank/row_number.
I am wondering how to use recursive query to replicate/imitate dense_rank.
related post: https://dba.stackexchange.com/questions/286627/get-top-two-rows-per-group-efficiently

Is it possible to find duplicating records in two columns simultaneously in PostgreSQL?

I have the following database schema (oversimplified):
create sequence partners_partner_id_seq;
create table partners
(
partner_id integer default nextval('partners_partner_id_seq'::regclass) not null primary key,
name varchar(255) default NULL::character varying,
company_id varchar(20) default NULL::character varying,
vat_id varchar(50) default NULL::character varying,
is_deleted boolean default false not null
);
INSERT INTO partners(name, company_id, vat_id) VALUES('test1','1010109191191', 'BG1010109191192');
INSERT INTO partners(name, company_id, vat_id) VALUES('test2','1010109191191', 'BG1010109191192');
INSERT INTO partners(name, company_id, vat_id) VALUES('test3','3214567890102', 'BG1010109191192');
INSERT INTO partners(name, company_id, vat_id) VALUES('test4','9999999999999', 'GE9999999999999');
I am trying to figure out how to return test1, test2 (because the company_id column value duplicates vertically) and test3 (because the vat_id column value duplicates vertically as well).
To put it in other words - I need to find duplicating company_id and vat_id records and group them together, so that test1, test2 and test3 would be together, because they duplicate by company_id and vat_id.
So far I have the following query:
SELECT *
FROM (
SELECT *, LEAD(row, 1) OVER () AS nextrow
FROM (
SELECT *, ROW_NUMBER() OVER (w) AS row
FROM partners
WHERE is_deleted = false
AND ((company_id != '' AND company_id IS NOT null) OR (vat_id != '' AND vat_id IS NOT NULL))
WINDOW w AS (PARTITION BY company_id, vat_id ORDER BY partner_id DESC)
) x
) y
WHERE (row > 1 OR nextrow > 1)
AND is_deleted = false
This successfully shows all company_id duplicates, but does not appear to show vat_id ones - test3 row is missing. Is this possible to be done within one query?
Here is a db-fiddle with the schema, data and predefined query reproducing my result.
You can do this with recursion, but depending on the size of your data you may want to iterate, instead.
The trick is to make the name just another match key instead of treating it differently than the company_id and vat_id:
create table partners (
partner_id integer generated always as identity primary key,
name text,
company_id text,
vat_id text,
is_deleted boolean not null default false
);
insert into partners (name, company_id, vat_id) values
('test1','1010109191191', 'BG1010109191192'),
('test2','1010109191191', 'BG1010109191192'),
('test3','3214567890102', 'BG1010109191192'),
('test4','9999999999999', 'GE9999999999999'),
('test5','3214567890102', 'BG8888888888888'),
('test6','2983489023408', 'BG8888888888888')
;
I added a couple of test cases and left in the lone partner.
with recursive keys as (
select partner_id,
array['n_'||name, 'c_'||company_id, 'v_'||vat_id] as matcher,
array[partner_id] as matchlist,
1 as size
from partners
), matchers as (
select *
from keys
union all
select p.partner_id, c.matcher,
p.matchlist||c.partner_id as matchlist,
p.size + 1
from matchers p
join keys c
on c.matcher && p.matcher
and not p.matchlist #> array[c.partner_id]
), largest as (
select distinct sort(matchlist) as matchlist
from matchers m
where not exists (select 1
from matchers
where matchlist #> m.matchlist
and size > m.size)
-- and size > 1
)
select *
from largest
;
matchlist
{1,2,3,5,6}
{4}
fiddle
EDIT UPDATE
Since recursion did not perform, here is an iterative example in plpgsql that uses a temporary table:
create temporary table match1 (
partner_id int not null,
group_id int not null,
matchkey uuid not null
);
create index on match1 (matchkey);
create index on match1 (group_id);
insert into match1
select partner_id, partner_id, md5('n_'||name)::uuid from partners
union all
select partner_id, partner_id, md5('c_'||company_id)::uuid from partners
union all
select partner_id, partner_id, md5('v_'||vat_id)::uuid from partners;
do $$
declare _cnt bigint;
begin
loop
with consolidate as (
select group_id,
min(group_id) over (partition by matchkey) as new_group_id
from match1
), minimize as (
select group_id, min(new_group_id) as new_group_id
from consolidate
group by group_id
), doupdate as (
update match1
set group_id = m.new_group_id
from minimize m
where m.group_id = match1.group_id
and m.new_group_id != match1.group_id
returning *
)
select count(*) into _cnt from doupdate;
if _cnt = 0 then
exit;
end if;
end loop;
end;
$$;
updated fiddle

Delete duplicate rows with different values in columns

I didn't find my case on the Internet. Tell me how i can delete duplicates if the values are in different columns.
I have a table with a lot of values, for example:
|Id1|Id2|
|89417980|89417978|
|89417980|89417979|
|89417978|89417980|
|89417979|89417980|
I need to exclude duplicates and leave in the answer only:
|Id1|Id2|
|89417980|89417978|
|89417980|89417979|
min/max does not work here, as the values may be different.
I tried to union/join tables on a table/exclude results with temporary tables, but in the end I come to the beginning.
Assuming id1 and id2 are primary keys columns you could try this
DECLARE #tbl table (id1 int, id2 int )
INSERT INTO #tbl
SELECT 89417980, 89417978
UNION SELECT 89417980, 89417979
UNION SELECT 89417978, 89417980
UNION SELECT 89417979, 89417980
SELECT * FROM #tbl
;WITH CTE AS (--Get comparable value as "cs"
SELECT
IIF(id1 > id2, CHECKSUM(id1, id2), CHECKSUM(id2,id1)) as cs
, id1
, id2
, ROW_NUMBER() OVER (order by id1, id2) as rn
FROM #tbl
)
, CTE2 AS ( --Get rows to keep
SELECT MAX (rn) as rn
FROM CTE
GROUP BY cs
HAVING COUNT(*) > 1
)
DELETE tbl -- Delete all except the rows to keep
FROM #tbl tbl
WHERE NOT EXISTS(SELECT 1
FROM CTE2
JOIN CTE ON CTE.rn = CTE2.rn
WHERE CTE.id1 = tbl.id1
AND CTE.id2 = tbl.id2
)
SELECT * FROM #tbl

PostgreSQL grouping

I would like to group values according to values in over columns.
This is an example:
I would like to get the output:
{{-30,-50,20},{-20,30,60},{-30,NULL or other value, 20}}
I managed to arrive to:
SELECT array_agg("val")
FROM my_table
WHERE "t_id" = 1
GROUP BY "m_id";
{{-30,-50,20},{-20,30,60},{-30,20}}
What would be the best approach?
create table my_table (
t_id int,
m_id int,
s_id int,
val int
);
insert into my_table (t_id, m_id, s_id, val) values
(1,1,1,-30),
(1,1,2,-50),
(1,1,3,20),
(1,2,1,-20),
(1,2,2,30),
(1,2,3,60),
(1,3,1,-30),
(1,3,3,20);
select array_agg(val order by s_id)
from
my_table t
right join
(
(
select distinct t_id, m_id
from my_table
) a
cross join
(
select distinct s_id
from my_table
) b
) s using (t_id, m_id, s_id)
where t_id = 1
group by m_id
order by m_id
;
array_agg
---------------
{-30,-50,20}
{-20,30,60}
{-30,NULL,20}

PostgreSQL join to denormalize a table with generate_series

I've this table:
CREATE TABLE "mytable"
( name text, count integer );
INSERT INTO mytable VALUES ('john', 4),('mark',2),('albert',3);
and I would like "denormlize" the rows in this way:
SELECT name FROM mytable JOIN generate_series(1,4) tmp(a) ON (a<=count)
so I've a number of rows for each name equals to the count column: I've 4 rows with john, 2 with mark and 3 with albert.
But i can't use the generate_series() function if I don't know the highest count (in this case 4). There is a way to do this without knowing the MAX(count) ?
select name,
generate_series(1,count)
from mytable;
Set returning functions can be used in the select list and will do a cross join with the row retrieved from the base table.
I think this is an undocumented behaviour that might go away in the future, but I'm not sure about that (I recall some discussion regarding this on the mailing list)
SQLFiddle example
DROP TABLE ztable ;
CREATE TABLE ztable (zname varchar, zvalue INTEGER NOT NULL);
INSERT INTO ztable(zname, zvalue) VALUES( 'one', 1), ( 'two', 2 ), ( 'three', 3) , ( 'four', 4 );
WITH expand AS (
WITH RECURSIVE zzz AS (
SELECT 1::integer AS rnk , t0.zname
FROM ztable t0
UNION
SELECT 1+rr.rnk , t1.zname
FROM ztable t1
JOIN zzz rr ON rr.rnk < t1.zvalue
)
SELECT zzz.zname
FROM zzz
)
SELECT x.*
FROM expand x
;