postgresql conbine two query - postgresql

Query 1
SELECT cpl.product_id
FROM crm_planning_line cpl
LEFT JOIN crm_planning cp ON cpl.crm_planning_id = cp.id
WHERE EXTRACT(MONTH FROM cp.planning_date + interval '7' HOUR)=9
AND EXTRACT(YEAR FROM cp.planning_date + interval '7' HOUR)=2022
AND cp.salesman_id = 27
AND cp.customer_id = 2977
Result 1
product_id | view
7359 1
8192 1
7359 1
8192 1
7357 1
Query 2
SELECT line.product_id
FROM account_invoice_line line
LEFT JOIN account_invoice inv on inv.id=line.invoice_id
WHERE EXTRACT(MONTH FROM inv.date_invoice)=9
AND EXTRACT(YEAR FROM inv.date_invoice)=2022
AND inv.user_id=27
AND inv.partner_id=2977
Result 2
product_id | buy
7359 100
4970 200
4970 50
For final result, I want to combine those query where the result like this:
product_id | summery
7359 100,2
8192 2
7357 1
4970 250
Final result 2:
product_id | summery
7359 100,2
8192 0,2
7357 0,1
4970 250,0
Thank you for the help

Sample Data:
CREATE TABLE public.table_view (
product_id int4 NULL,
"view" int4 NULL
);
CREATE TABLE public.table_buy (
product_id int4 NULL,
buy int4 NULL
);
INSERT INTO table_view (product_id, "view") VALUES(7359, 1);
INSERT INTO table_view (product_id, "view") VALUES(8192, 1);
INSERT INTO table_view (product_id, "view") VALUES(7359, 1);
INSERT INTO table_view (product_id, "view") VALUES(8192, 1);
INSERT INTO table_view (product_id, "view") VALUES(7357, 1);
INSERT INTO table_buy (product_id, buy) VALUES(7359, 100);
INSERT INTO table_buy (product_id, buy) VALUES(4970, 200);
INSERT INTO table_buy (product_id, buy) VALUES(4970, 50);
Sample Query:
select product_id, string_agg(t1.a1, ',') from
(
select product_id, sum("buy")::text a1 from table_buy
group by product_id
union all
select product_id, sum("view")::text a1 from table_view
group by product_id
) t1
group by product_id

Related

SQL deduct some numbers from rows conditionally

I have a table (showing temp table)
CREATE TABLE #TempTable
(
TempID INT IDENTITY(1,1) NOT NULL PRIMARY KEY,
CustID INT NOT NULL,
RODate DATE NULL,
Operation INT NULL
);
This table has this sample data:
INSERT INTO #TempTable (CustID, RODate, Operation)
VALUES (10, DATEADD(MONT, -2, GETDATE()), 2),
(10, DATEADD(MONT, -1, GETDATE()), 3),
(10, GETDATE(), 5)
So table have below data
TempID CustID RODate Operation
-----------------------------------------------------------
1 10 2019-03-17 2
2 10 2019-04-17 3
3 10 2019-05-17 5
Requirement is I will get one integer variable in parameter which is #noOfOperation, let's say its value is 10
I will also get no of months in parameter, let's say it's 3
I have to query the table to return data for last 3 months only (excluding current month (date asc)
Then I have to deduct #noOfOperation from the table and update.
Deduction will be based on availability in operation column.
For example: in this case first we will deduct from 2019-03-17
10 - 2 = 8 (operation column for this row becomes 0)
Next we will deduct from 2019-04-17
8 - 3 = 5 (operation column for this row becomes 0)
Similarly for 2019-05-17
5-5 = 0 (operation column for this row becomes 0)
I have to check if #noOfOperation is less than or more than the number of operation of individual months then do the above accordingly
Check this out, The idea is to use the accumulated operation and then subtracted the needed value as below:
declare #TempTable TABLE
(
TempID INT IDENTITY(1,1) NOT NULL PRIMARY KEY,
CustID INT NOT NULL,
RODate DATE NULL,
Operation INT NULL
);
INSERT INTO #TempTable (CustID, RODate, Operation)
VALUES (10, DATEADD(MONTH, -2, GETDATE()), 5),
(10, DATEADD(MONTH, -1, GETDATE()), 6),
(10, GETDATE(), 7)
select * from #TempTable
Declare #noOfOperation int =8
Declare #noOfMonths int =3
Declare #StartDate date,#DateEnd date,#avNoOfOperation int
--get the range you are working for
select
#StartDate=cast(cast(year(dateadd(Month,-#noOfMonths+1,getdate())) as varchar(4))+'-'+cast(Month(dateadd(Month,-#noOfMonths+1,getdate())) as varchar(2))+'-01' as date)
,#DateEnd=dateadd(day,-1,cast(cast(year(getdate()) as varchar(4))+'-'+cast(Month(getdate()) as varchar(2))+'-01' as date)) ;
--get the total of avaliable operation, for validating before subtracting
select #avNoOfOperation=sum(t.Operation) from #TempTable t where cast(t.RODate as date) between #StartDate and #DateEnd
--review the variables if needed
--select #StartDate [#StartDate],#DateEnd [#DateEnd],#avNoOfOperation [#avNoOfOperation]
if(#avNoOfOperation>=#noOfOperation and #noOfOperation>0)
begin
--only here we can start subtracting
;with DataIncluded as (
select *,#noOfOperation [noOfOperation],sum(Operation) over (order by RODate) [AcOp] from #TempTable t where cast(t.RODate as date) between #StartDate and #DateEnd
),SubtractDataSet as (
select *,AcOp-#noOfOperation [leftOp],
case when (AcOp-#noOfOperation)<=0 then 0 else
case when (AcOp-#noOfOperation)<Operation then AcOp-#noOfOperation else Operation end end [UpOp]
from DataIncluded
)
Update #TempTable
set A.Operation=B.[UpOp]
From #TempTable A
inner join SubtractDataSet B on A.TempID=B.TempID
end
select * from #TempTable
Note: Im not using the current month so my output is different then the one you suggested. if the inputs was as follow:
TempID CustID RODate Operation
1 10 2019-03-17 5
2 10 2019-04-17 6
3 10 2019-05-17 7
The output would be :-
TempID CustID RODate Operation
1 10 2019-03-17 0
2 10 2019-04-17 3
3 10 2019-05-17 7
--Change the value of #OperationsToBeDeducted, to see different results
declare #OperationsToBeDeducted int
declare #OperationsRemaining int
declare #RODate date
set #OperationsToBeDeducted = 4
declare #TempID int
set #TempID = 1
DROP TABLE IF EXISTS #TempOperation
create table #TempOperation
(
TempID INT IDENTITY(1,1) NOT NULL PRIMARY KEY,
CustID INT NOT NULL,
RODate DATE NULL,
Operation INT NULL
);
insert into #TempOperation (CustID,RODate,Operation)
values
(10,DATEADD(month, -3, getdate()),2),
(10,DATEADD(month, -2, getdate()), 2),
(10,DATEADD(month, -1, getdate()),3)
DROP TABLE IF EXISTS #TempOperation2
create table #TempOperation2
(
TempID INT IDENTITY(1,1) NOT NULL PRIMARY KEY,
CustID INT NOT NULL,
RODate DATE NULL,
Operation INT NULL
);
insert into #TempOperation2 select CustID,RODate, Operation from #TempOperation
select * from #TempOperation2 order by RODate asc
declare #maxID int;
select #maxID = max(TempID) from #TempOperation2
while (#TempID <= #maxID)
begin
set #OperationsRemaining = 0
select #OperationsRemaining = Operation, #RODate = RODate from #TempOperation2 where TempID = #TempID
if(#OperationsToBeDeducted is not null and #OperationsRemaining is not null and
#OperationsRemaining > 0 and #OperationsRemaining > #OperationsToBeDeducted)
begin
update #TempOperation set Operation = #OperationsRemaining - #OperationsToBeDeducted where TempID = #TempID
set #OperationsToBeDeducted = 0
end
else if(#OperationsToBeDeducted is not null and #OperationsRemaining is not null and
#OperationsRemaining > 0 and #OperationsRemaining <= #OperationsToBeDeducted)
begin
set #OperationsToBeDeducted = #OperationsToBeDeducted - #OperationsRemaining
update #TempOperation set Operation = #OperationsRemaining - #OperationsRemaining where TempID = #TempID
end
SET #TempID = #TempID + 1
end
select * from #TempOperation order by RODate asc
DROP TABLE #TempOperation
DROP TABLE #TempOperation2

Join on multiple tables using distinct on

create table emp
(
emp_id serial primary key,
emp_no integer,
emp_ref_no character varying(15),
emp_class character varying(15)
);
create table emp_detail
(
emp_detail_id serial primary key,
emp_id integer,
class_no integer,
created_at timestamp without time zone,
constraint con_fk foreign key(emp_id) references emp(emp_id)
);
create table class_detail
(
class_id serial primary key,
emp_id integer,
class_no integer,
col1 JSONB,
created_at timestamp without time zone default now(),
constraint cd_fk foreign key(emp_id) references emp(emp_id)
);
INSERT INTO emp(
emp_no, emp_ref_no, emp_class)
VALUES ('548251', '2QcW', 'abc' );
INSERT INTO emp(
emp_no, emp_ref_no, emp_class)
VALUES ('548251', '2FQx', 'abc');
INSERT INTO emp(
emp_no, emp_ref_no, emp_class)
VALUES ('548251', '2yz', 'abc');
INSERT INTO emp_detail(
emp_id, class_no, created_at
)
VALUES ( 1, 2, '2018-05-04 11:00:00'
);
INSERT INTO emp_detail(
emp_id, class_no, created_at
)
VALUES ( 1, 1, '2018-04-04 11:00:00'
);
INSERT INTO emp_detail(
emp_id, class_no, created_at
)
VALUES ( 2, 1, '2018-05-10 11:00:00'
);
INSERT INTO emp_detail(
emp_id, class_no, created_at
)
VALUES ( 2, 2, '2018-02-01 11:00:00'
);
INSERT INTO emp_detail(
emp_id, class_no, created_at
)
VALUES ( 3, 2, '2018-02-01 11:00:00'
);
insert into class_detail(emp_id, class_no, col1, created_at) values(1,1,'{"Name":"Nik"}', '2018-02-01 10:00:00');
insert into class_detail(emp_id, class_no, col1, created_at) values(1,1,'{"Name":"Nik Anderson"}', '2018-03-01 10:00:00');
insert into class_detail(emp_id, class_no, col1, created_at) values(1,2,'{"Name":"James Anderson TST"}', '2018-03-15 10:00:00');
insert into class_detail(emp_id, class_no, col1, created_at) values(1,2,'{"Name":"Tim Paine ST"}', '2018-04-01 10:00:00');
I want to display corresponding emp_id, emp_no, emp_ref_no, class_no(the latest one from emp_detail table based on created at)along with all the columns of class_detail table. Class_detail table should show the latest corresponding record of the class no
The expected output which I would like to see is something like below :-
emp id | emp_no | emp_ref_no | class_no | class_id | class.col1 | class.created_at | class.created_by
1 | 548251 | 2QcW | 2 | 4 |{"Name":"Tim Paine ST"}|2018-04-01 10:00:00| NUlL
2 | 548251 | 2FQx | 1 | 2 |{"Name":"Nik Anderson"}|2018-03-01 10:00:00| NULL
3 | 548251 | 2yz | 2 | 4 |{"Name":"Tim Paine ST"}|2018-04-01 10:00:00| NULL
As I stated in the comments: It is exactly the same thing as in Inner join using distinct on. You simply have to add another join and another ORDER BY group (cd.created_at DESC)
demo:db<>fiddle
SELECT DISTINCT ON (ed.emp_id)
e.emp_id, e.emp_no, e.emp_ref_no, ed.class_no, cd.*
FROM
emp_detail ed
JOIN emp e ON e.emp_id = ed.emp_id
JOIN class_detail cd ON ed.class_no = cd.class_no
ORDER BY ed.emp_id, ed.created_at DESC, cd.created_at DESC
Note: I am not sure what the emp_id column in class_detail is for. It seems not well designed (this is also because it is always 1 in your example.) You should check whether you really need it.

Creating a Void Function in PostgreSQL

I am getting an error on this create function code in Postgresql. The error says it is happening around Line 2 at DELETE, but it happens at WITH if I remove that line so I think it is a problem with the format of my Creat Function
create or replace function retention_data(shopId integer) returns void as $$
delete from retention where shop_id = shopId;
WITH ret_grid_step1 as (
select * from (
SELECT
order_id as order_name,
cust_name as cust_name,
email as email,
date(order_date) as created_at,
count(*) as num_items_in_order,
sum(total_price) as sales ,
rank() over (partition BY order_id ORDER BY cust_name ASC) as rnk_shipping_name,
rank() over (partition BY order_id ORDER BY email ASC) as rnk_email
FROM orders
WHERE shop_id = shopId
and order_date is not null and order_date > now()::date - 365 and order_date < now()::date + 1
group by 1,2,3,4
) x
where rnk_shipping_name = 1 and rnk_email = 1
)
insert into retention(shop_id, cust_name, email, last_purchase_dt, total_sales, num_orders, days_since_last_order)
select
shopId as shop_id,
coalesce(b.cust_name,'null') as cust_name,
a.email,
a.last_purchase_dt,
total_sales,
num_orders,
current_date - last_purchase_dt as days_since_last_order
from (
select
email,
max(created_at) as last_purchase_dt,
count(*) as num_orders,
sum(sales) as total_sales
from ret_grid_step1
group by 1
) as a
left join (
select
email,
cust_name,
rank() over (partition BY email ORDER BY created_at DESC) as rnk
from ret_grid_step1
--where cust_name is not null
group by 1,2,created_at
) as b
on a.email = b.email
where b.rnk = 1
and a.email <> '';
$$ language plpgsql;

Find exact FK matches

Have a very large table (over 200 million rows)
sID int, wordID int (PK sID, wordID)
Want to find the sID's that have the exact same wordID's (and no extras)
For a sID with over 100 wordID the chance of an exact match goes down so willing to limit it to 100
(but would like to go to 1000)
If this was school and sID were classes and wordID were students.
Then I want to find classes that have the exact same students.
sID, wordID
1, 1
1, 2
1, 3
2, 2
2, 3
3, 1
3, 4
5, 1
5, 2
6, 2
6, 3
7, 1
7, 2
8, 1
8, 1
sID 6 and 2 have the exact same wordID's
sID 5, 7, and 8 have the exact same wordID's
This is what I have so far
I would like to eliminate the two delete #temp3_sID1_sID2 and take care of that in the insert above
But I will try any ideas
It is not like you can easily create a table with 200 million rows to test with
drop table #temp_sID_wordCount
drop table #temp_count_wordID_sID
drop table #temp3_wordID_sID_forThatCount
drop table #temp3_sID1_sID2
drop table #temp3_sID1_sID2_keep
create table #temp_sID_wordCount (sID int primary key, ccount int not null)
create table #temp_count_wordID_sID (ccount int not null, wordID int not null, sID int not null, primary key (ccount, wordID, sID))
create table #temp3_wordID_sID_forThatCount (wordID int not null, sID int not null, primary key(wordID, sID))
create table #temp3_sID1_sID2_keep (sID1 int not null, sID2 int not null, primary key(sID1, sID2))
create table #temp3_sID1_sID2 (sID1 int not null, sID2 int not null, primary key(sID1, sID2))
insert into #temp_sID_wordCount
select sID, count(*) as ccount
FROM [FTSindexWordOnce] with (nolock)
group by sID
order by sID;
select count(*) from #temp_sID_wordCount where ccount <= 100; -- 701,966
truncate table #temp_count_wordID_sID
insert into #temp_count_wordID_sID
select #temp_sID_wordCount.ccount, [FTSindexWordOnce].wordID, [FTSindexWordOnce].sID
from #temp_sID_wordCount
join [FTSindexWordOnce] with (nolock)
on [FTSindexWordOnce].sID = #temp_sID_wordCount.sID
and ccount >= 1 and ccount <= 10
order by #temp_sID_wordCount.ccount, [FTSindexWordOnce].wordID, [FTSindexWordOnce].sID;
select count(*) from #temp_sID_wordCount; -- 34,860,090
truncate table #temp3_sID1_sID2_keep
declare cur cursor for
select top 10 ccount from #temp_count_wordID_sID group by ccount order by ccount
open cur
declare #count int, #sIDcur int
fetch next from cur into #count
while (##FETCH_STATUS = 0)
begin
--print (#count)
--select count(*), #count from #temp_sID_wordCount where #temp_sID_wordCount.ccount = #count
truncate table #temp3_wordID_sID_forThatCount
truncate table #temp3_sID1_sID2
-- wordID and sID for that unique word count
-- they can only be exact if they have the same word count
insert into #temp3_wordID_sID_forThatCount
select #temp_count_wordID_sID.wordID
, #temp_count_wordID_sID.sID
from #temp_count_wordID_sID
where #temp_count_wordID_sID.ccount = #count
order by #temp_count_wordID_sID.wordID, #temp_count_wordID_sID.sID
-- select count(*) from #temp3_wordID_sID_forThatCount
-- this has some duplicates
-- sID1 is the group
insert into #temp3_sID1_sID2
select w1.sID, w2.sID
from #temp3_wordID_sID_forThatCount as w1 with (nolock)
join #temp3_wordID_sID_forThatCount as w2 with (nolock)
on w1.wordID = w2.wordID
and w1.sID <= w2.sID
group by w1.sID, w2.sID
having count(*) = #count
order by w1.sID, w2.sID
-- get rid of the goups of 1
delete #temp3_sID1_sID2
where sID1 in (select sID1 from #temp3_sID1_sID2 group by sID1 having count(*) = 1)
-- get rid of the double dips
delete #temp3_sID1_sID2
where #temp3_sID1_sID2.sID1 in
(select distinct s1del.sID1 -- these are the double dips
from #temp3_sID1_sID2 as s1base with (nolock)
join #temp3_sID1_sID2 as s1del with (nolock)
on s1del.sID1 > s1base.sID1
and s1Del.sID1 = s1base.sID2)
insert into #temp3_sID1_sID2_keep
select #temp3_sID1_sID2.sID1
, #temp3_sID1_sID2.sID2
from #temp3_sID1_sID2 with (nolock)
order by #temp3_sID1_sID2.sID1, #temp3_sID1_sID2.sID2
fetch next from cur into #count
end
close cur
deallocate cur
select *
FROM #temp3_sID1_sID2_keep with (nolock)
order by 1,2
So, as I see, the task is to find equal subsets.
First we can find pairs of equal subsets:
;with tmp1 as (select sID, cnt = count(wordID) from [Table] group by sID)
select s1.sID, s2.sID
from tmp1 s1
cross join tmp1 s2
cross apply (
select count(1)
from [Table] d1
join [Table] d2 on d2.wordID = d1.wordID
where d1.sID = s1.sID and d2.sID = s2.sID
) c(cnt)
where s1.cnt = s2.cnt
and s1.sID > s2.sID
and s1.cnt = c.cnt
Output is:
sID sID
----------- -----------
6 2
7 5
8 5
8 7
And then pairs can be combined into groups, if necessary:
sID gNum
----------- -----------
2 1
6 1
5 2
7 2
8 2
See details in SqlFiddle sample below.
SqlFiddle Sample
The other approach is to calculate hash function for every subset data:
;with a as (
select distinct sID from [Table]
)
select sID,
hashbytes('sha1', (
select cast(wordID as varchar(10)) + '|'
from [Table]
where sID = a.sID
order by wordID
for xml path('')))
from a
Then subsets can be grouped based on hash value.
SqlFiddle Sample
The last one took less than a minute on my machine for a test data of about 10 million rows (20k sID values up to 1k wordID each). Also you can optimize it by excluding sIDs having no wordID count matches to any other.

Using CTE instead of Cursor

I have the following table structure.
I just want to update SubId to all the rows where it is null and where the RawLineNumber is ascending by 1 and also the SeqNumber ascending by 1.
RawlineNumber Claimid SubId SeqNumber
1 6000 A100 1
2 6000 NULL 2
3 6000 NULL 3
10 6000 A200 1
11 6000 NULL 2
25 6000 A300 1
26 6000 NULL 2
27 6000 NULL 3
I want to update
SubId of RawLineNumber 2 and 3 with A100,
SubId of RawLineNumber 11 with A200,
SubId of RawLineNumber 26 and 27 with A300.
I have a cursor which does the job but can I have a CTE to take care of it ?
UPDATE m
SET subid = q.subid
FROM mytable m
CROSS APPLY
(
SELECT TOP 1 subid
FROM mytable mi
WHERE mi.rawLineNumber < m.rawLineNumber
AND mi.subid IS NOT NULL
ORDER BY
rawLineNumber DESC
) q
WHERE m.subid IS NULL
Since a recusive solution was requested, I decided to write one. Also it works for gaps in Seqnumbers and RawlineNumber
declare #t table (RawlineNumber int, Claimid int, SubId varchar(5), SeqNumber int)
insert #t values(1, 6000, 'A100', 1)
insert #t values(2, 6000, NULL, 2)
insert #t values(3, 6000, NULL, 3)
insert #t values(10, 6000, 'A200', 1)
insert #t values(11, 6000, NULL, 2)
insert #t values(25, 6000, 'A300', 1)
insert #t values(26, 6000, NULL, 2)
insert #t values(27, 6000, NULL, 3)
;with cte as
(
select Rawlinenumber, SeqNumber, SubId
from #t where SubId is not null and SeqNumber = 1
union all
select t.Rawlinenumber, t.SeqNumber, c.SubId
from cte c
join
#t t
on c.Rawlinenumber + 1 = t.Rawlinenumber
and c.SeqNumber + 1 = t.SeqNumber
where t.SubId is null and t.SeqNumber > 1
)
update t
set SubId = c.SubId
from #t t join cte c
on c.Rawlinenumber = t.Rawlinenumber
where t.SeqNumber > 1
select * from #t
A not-so simple SQL script should achieve what you want:
update my_table t1 set t1.subid =
(select t2.subid from my_table t2
where t2.rawlinenumber < t1.rawlinenumber
and t2.seqnumber = 1
and t2.rawlinenumber = (
select max(t3.rawlinenumber)
from my_table t3
where t3.seq_number = 1
and t3.rawlinenumber <= t2.rawlinenumber)
where t1.subid is null;
The inner subselect (T3) gives us the last row having seqnumber = 1 before the current line,
the outer subselect gives us the SubID for this row (using windowing functions would be more efficient, but since you didn't mention a specific RDBMS, I stick with this :-) )