Cumulative Sum of 2 Columns - postgresql

I am using Postgresql 11.
I have 2 tables - txn_table and summary_table
create table txn_table(id int, txn_date timestamp, amount decimal);
create table summary_table(id int, txn_date date, day_Total decimal, Cumulative_Total decimal);
Sample data for txn_table is
insert into txn_table values (1, '2020-05-28 10:05:05', 100.00);
insert into txn_table values(2, '2020-05-28 11:45:10', 200.00);
insert into txn_table values(3, '2020-05-29 10:05:05', 300.00);
insert into txn_table values(4, '2020-05-29 12:10:01', 400.00);
I want to insert the data in summary_table like below
day_total contains sum of particular day
Cumulative_total contains cumulative sum of day_total
| id | txn_date | day_total | cumulative_total |
| --- | ------------------------ | --------- | ---------------- |
| 1 | 2020-05-28 | 300 | 300 |
| 2 | 2020-05-29 | 700 | 1000 |

I guess this select could solve your problem:
insert into summary_table
select row_number() over w as id
, txn_date
, day_total
, sum(day_total) over w as cumulative_total
from (
select txn_date::date as txn_date, sum(amount) as day_total
from txn_table
group by txn_date::date
) d
window w as (order by txn_date)
order by txn_date
(I worked it out in my head, didn't try. Perhaps prepare db fiddle or CTE with sample input.)

do following:
Add unique constraint to txn_date column of summary_table.
create table summary_table(id SERIAL PRIMARY KEY, txn_date date unique, day_total decimal, cumulative_total decimal);
if you want to ignore the duplicates. use below mentioned query
insert into summary_table(txn_date,day_total,cumulative_total)
(select txn_date, day_total, sum(day_total) over (order by txn_date) as cumulative_total from (
select txn_date::date as txn_date, sum(amount) as day_total
from txn_table
group by txn_date::date
) d
order by txn_date)
on conflict(txn_date)
do nothing;
if you want to update the duplicate values in summary_table. use below mentioned query
insert into summary_table(txn_date,day_total,cumulative_total)
(select txn_date, day_total, sum(day_total) over (order by txn_date) as cumulative_total from (
select txn_date::date as txn_date, sum(amount) as day_total
from txn_table
group by txn_date::date
) d
order by txn_date)
on conflict(txn_date)
do update set day_total=EXCLUDED.day_total,
cumulative_total=EXCLUDED.cumulative_total;

Related

Liquibase insert select where not exists

I want to insert into table1 multiple rows from table2. The problem is that I have a field of same name in table2 and table1 and I don't want to insert data if there's already a record with same value in this field. Now I have something like this:
insert into table1 (id, sameField, constantField, superFied)
select gen_random_uuid(), "sameField", 'constant', "anotherField"
from table2;
And I assume I need to do something like this:
insert into table1 (id, sameField, constantField, superFied)
select gen_random_uuid(), "sameField", 'constant', "anotherField"
from table2
where not exists ... ?
What I need to write instead of ? if I want this logic: check if there's already same value in sameField in table1 when selecting sameField from table2? DBMS is Postgres.
You can use a sub-query to see whether the record exists. You will need to define the column(s) which should be unique.
create table table2(
id varchar(100),
sameField varchar(25),
constant varchar(25),
superField varchar(25)
);
insert into table2 values
(gen_random_uuid(),'same1','constant1','super1'),
(gen_random_uuid(),'same2','constant2','super2')
✓
2 rows affected
create table table1(
id varchar(100),
sameField varchar(25),
constant varchar(25),
superField varchar(25)
);
insert into table1 values
(gen_random_uuid(),'same1','constant1','super1');
✓
1 rows affected
insert into table1 (id, sameField, constant, superField)
select uuid_in(md5(random()::text || clock_timestamp()::text)::cstring),
t2.sameField, 'constant', t2.superField
from table2 t2
where sameField not in (select sameField from table1)
1 rows affected
select * from table1;
select * from table2;
id | samefield | constant | superfield
:----------------------------------- | :-------- | :-------- | :---------
4cf10b1c-7a3f-4323-9a16-cce681fcd6d8 | same1 | constant1 | super1
d8cf27a0-3f55-da50-c274-c4a76c697b84 | same2 | constant | super2
id | samefield | constant | superfield
:----------------------------------- | :-------- | :-------- | :---------
c8a83804-9f0b-4d97-8049-51c2c8c54665 | same1 | constant1 | super1
3a9cf8b5-8488-4278-a06a-fd75fa74e206 | same2 | constant2 | super2
db<>fiddle here

Join on multiple tables using distinct on

create table emp
(
emp_id serial primary key,
emp_no integer,
emp_ref_no character varying(15),
emp_class character varying(15)
);
create table emp_detail
(
emp_detail_id serial primary key,
emp_id integer,
class_no integer,
created_at timestamp without time zone,
constraint con_fk foreign key(emp_id) references emp(emp_id)
);
create table class_detail
(
class_id serial primary key,
emp_id integer,
class_no integer,
col1 JSONB,
created_at timestamp without time zone default now(),
constraint cd_fk foreign key(emp_id) references emp(emp_id)
);
INSERT INTO emp(
emp_no, emp_ref_no, emp_class)
VALUES ('548251', '2QcW', 'abc' );
INSERT INTO emp(
emp_no, emp_ref_no, emp_class)
VALUES ('548251', '2FQx', 'abc');
INSERT INTO emp(
emp_no, emp_ref_no, emp_class)
VALUES ('548251', '2yz', 'abc');
INSERT INTO emp_detail(
emp_id, class_no, created_at
)
VALUES ( 1, 2, '2018-05-04 11:00:00'
);
INSERT INTO emp_detail(
emp_id, class_no, created_at
)
VALUES ( 1, 1, '2018-04-04 11:00:00'
);
INSERT INTO emp_detail(
emp_id, class_no, created_at
)
VALUES ( 2, 1, '2018-05-10 11:00:00'
);
INSERT INTO emp_detail(
emp_id, class_no, created_at
)
VALUES ( 2, 2, '2018-02-01 11:00:00'
);
INSERT INTO emp_detail(
emp_id, class_no, created_at
)
VALUES ( 3, 2, '2018-02-01 11:00:00'
);
insert into class_detail(emp_id, class_no, col1, created_at) values(1,1,'{"Name":"Nik"}', '2018-02-01 10:00:00');
insert into class_detail(emp_id, class_no, col1, created_at) values(1,1,'{"Name":"Nik Anderson"}', '2018-03-01 10:00:00');
insert into class_detail(emp_id, class_no, col1, created_at) values(1,2,'{"Name":"James Anderson TST"}', '2018-03-15 10:00:00');
insert into class_detail(emp_id, class_no, col1, created_at) values(1,2,'{"Name":"Tim Paine ST"}', '2018-04-01 10:00:00');
I want to display corresponding emp_id, emp_no, emp_ref_no, class_no(the latest one from emp_detail table based on created at)along with all the columns of class_detail table. Class_detail table should show the latest corresponding record of the class no
The expected output which I would like to see is something like below :-
emp id | emp_no | emp_ref_no | class_no | class_id | class.col1 | class.created_at | class.created_by
1 | 548251 | 2QcW | 2 | 4 |{"Name":"Tim Paine ST"}|2018-04-01 10:00:00| NUlL
2 | 548251 | 2FQx | 1 | 2 |{"Name":"Nik Anderson"}|2018-03-01 10:00:00| NULL
3 | 548251 | 2yz | 2 | 4 |{"Name":"Tim Paine ST"}|2018-04-01 10:00:00| NULL
As I stated in the comments: It is exactly the same thing as in Inner join using distinct on. You simply have to add another join and another ORDER BY group (cd.created_at DESC)
demo:db<>fiddle
SELECT DISTINCT ON (ed.emp_id)
e.emp_id, e.emp_no, e.emp_ref_no, ed.class_no, cd.*
FROM
emp_detail ed
JOIN emp e ON e.emp_id = ed.emp_id
JOIN class_detail cd ON ed.class_no = cd.class_no
ORDER BY ed.emp_id, ed.created_at DESC, cd.created_at DESC
Note: I am not sure what the emp_id column in class_detail is for. It seems not well designed (this is also because it is always 1 in your example.) You should check whether you really need it.

Merging rows on SSRS

My raw data returned to SSRS.
IF OBJECT_ID('tempdb..#tmpElections') IS NOT NULL
DROP TABLE #tmpElections
create table #tmpElections
(
ClientId int,
MaterialType varchar(50),
QtyReq int,
QtySent int
)
insert into #tmpElections values (1,'MM1',100,50)
insert into #tmpElections values (2,'MM2',200,50)
insert into #tmpElections values (2,'MM2',200,25)
insert into #tmpElections values (3,'MM3',300,50)
insert into #tmpElections values (3,'MM3',300,150)
insert into #tmpElections values (3,'MM3',300,100)
insert into #tmpElections values (4,'MM4',400,300)
insert into #tmpElections values (4,'MM4',400,100)
select * from #tmpElections
On the report, status = partial, if QtySent < QtyReq, else full.
My ssrs report should display as below, merging/blanking the row cells,
having same Clientid,materialType and status = 'Full'.The column QtySent should be displayed.
Desired Report Sample
Whats the best approach and how to achieve this result.
Should this be handled at T-SQL or SSRS.
The yellow highlighted cells should be blank on the report within each group.
Sample Report
I'd use a sub-query to total up your QtySent for comparison, together with a CASE to assign the status text value. The rest is just SSRS formatting.
SELECT
e.*
,CASE
WHEN s.TotSent = e.QtyReq THEN 'Full'
ELSE 'Partial'
END AS [Status]
FROM
#tmpElections AS e
LEFT JOIN
(
SELECT
e2.ClientId
,e2.MaterialType
,SUM(e2.QtySent) AS TotSent
FROM
#tmpElections AS e2
GROUP BY
e2.ClientId
,e2.MaterialType
) AS s
ON
s.ClientId = e.ClientId
AND s.MaterialType = e.MaterialType;
Result set:
+----------+--------------+--------+---------+---------+
| ClientId | MaterialType | QtyReq | QtySent | Status |
+----------+--------------+--------+---------+---------+
| 1 | MM1 | 100 | 50 | Partial |
| 2 | MM2 | 200 | 50 | Partial |
| 2 | MM2 | 200 | 25 | Partial |
| 3 | MM3 | 300 | 50 | Full |
| 3 | MM3 | 300 | 150 | Full |
| 3 | MM3 | 300 | 100 | Full |
| 4 | MM4 | 400 | 300 | Full |
| 4 | MM4 | 400 | 100 | Full |
+----------+--------------+--------+---------+---------+
You are almost there.. what I would do is add a case statement to determine the status :
select ClientId,MaterialType, max(QtyReq) as qtyreq, sum(QtySent) as qtysent
, case when sum(QtySent)<max(QtyReq) then 'Partial' else 'Full' end as [status]
from #tmpElections
group by
ClientId
,MaterialType
Then in your report.. you just group on the first three columns that is shown in your image description... and then the rest as details
Thank you all for your comments and solutions. I was able to solve my problem as below.
Create procedure dbo.TestRptSample
as
begin
create table #tmpElections
(
ClientId int,
MaterialType varchar(50),
QtyReq int,
QtySent int,
SentDate datetime
)
insert into #tmpElections values (1,'MM1',100,50,'02/01/2018')
insert into #tmpElections values (2,'MM2',200,50,'02/01/2018')
insert into #tmpElections values (2,'MM2',200,25,'03/01/2018')
insert into #tmpElections values (3,'MM3',300,50,'02/01/2018')
insert into #tmpElections values (3,'MM3',300,150,'02/15/2018')
insert into #tmpElections values (3,'MM3',300,100,'03/01/2018')
insert into #tmpElections values (4,'MM4',400,300,'02/01/2018')
insert into #tmpElections values (4,'MM4',400,100,'03/01/2018')
create table #tmpFinal
(
ClientId int,
MaterialType varchar(50),
QtyReq int,
QtySent int,
SentDate datetime,
mStatus varchar(100),
)
Insert into #tmpFinal
select b.*,a.status
from
(
select ClientId,MaterialType, max(QtyReq) as qtyreq, sum(QtySent) as qtysent
, case when sum(QtySent)<max(QtyReq) then 'Partial' else 'Full' end as [status]
from #tmpElections
group by
ClientId
,MaterialType
) A
inner join #tmpElections B on a.ClientId = b.ClientId and a.MaterialType = b.MaterialType;
with x as
(
select *,
ROW_NUMBER() over (partition by clientId,materialType,qtyReq
order by sentdate) as Rowno
from #tmpFinal
)
select *
,max(rowno) over (partition by clientId,materialType,qtyReq) as MaxRow
from x
order by clientId ,sentdate
end
Used the procedure with row_number to generate row numbers within the group by sets.
On the report, in visibility expressions of the row text boxes, used the following expression to show or hide that column.
iif(Fields!mStatus.Value="Full" and Fields!Rowno.Value <> Fields!MaxRow.Value ,True,False)

Postgresql crosstab query with multiple "row name" columns

I have a table that is a "tall skinny" fact table:
CREATE TABLE facts(
eff_date timestamp NOT NULL,
update_date timestamp NOT NULL,
symbol_id int4 NOT NULL,
data_type_id int4 NOT NULL,
source_id char(3) NOT NULL,
fact decimal
/* Keys */
CONSTRAINT fact_pk
PRIMARY KEY (source_id, symbol_id, data_type_id, eff_date),
)
I'd like to "pivot" this for a report, so the header looks like this:
eff_date, symbol_id, source_id, datatypeValue1, ... DatatypeValueN
I.e., I'd like a row for each unique combination of eff_date, symbol_id, and source_id.
However, the postgresql crosstab() function only allow on key column.
Any ideas?
crosstab() expects the following columns from its input query (1st parameter), in this order:
a row_name
(optional) extra columns
a category (matching values in 2nd crosstab parameter)
a value
You don't have a row_name. Add a surrogate row_name with the window function dense_rank().
Your question leaves room for interpretation. Let's add sample rows for demonstration:
INSERT INTO facts (eff_date, update_date, symbol_id, data_type_id, source_id)
VALUES
(now(), now(), 1, 5, 'foo')
, (now(), now(), 1, 6, 'foo')
, (now(), now(), 1, 7, 'foo')
, (now(), now(), 1, 6, 'bar')
, (now(), now(), 1, 7, 'bar')
, (now(), now(), 1, 23, 'bar')
, (now(), now(), 1, 5, 'baz')
, (now(), now(), 1, 23, 'baz'); -- only two rows for 'baz'
Interpretation #1: first N values
You want to list the first N values of data_type_id (the smallest, if there are more) for each distinct (source_id, symbol_id, eff_date).
For this, you also need a synthetic category, can be synthesized with row_number(). The basic query to produce input to crosstab():
SELECT dense_rank() OVER (ORDER BY eff_date, symbol_id, source_id)::int AS row_name
, eff_date, symbol_id, source_id -- extra columns
, row_number() OVER (PARTITION BY eff_date, symbol_id, source_id
ORDER BY data_type_id)::int AS category
, data_type_id AS value
FROM facts
ORDER BY row_name, category;
Crosstab query:
SELECT *
FROM crosstab(
'SELECT dense_rank() OVER (ORDER BY eff_date, symbol_id, source_id)::int AS row_name
, eff_date, symbol_id, source_id -- extra columns
, row_number() OVER (PARTITION BY eff_date, symbol_id, source_id
ORDER BY data_type_id)::int AS category
, data_type_id AS value
FROM facts
ORDER BY row_name, category'
, 'VALUES (1), (2), (3)'
) AS (row_name int, eff_date timestamp, symbol_id int, source_id char(3)
, datatype_1 int, datatype_2 int, datatype_3 int);
Results:
row_name | eff_date | symbol_id | source_id | datatype_1 | datatype_2 | datatype_3
-------: | :--------------| --------: | :-------- | ---------: | ---------: | ---------:
1 | 2017-04-10 ... | 1 | bar | 6 | 7 | 23
2 | 2017-04-10 ... | 1 | baz | 5 | 23 | null
3 | 2017-04-10 ... | 1 | foo | 5 | 6 | 7
Interpretation #2: actual values in column names
You want to append actual values of data_type_id to the column names datatypeValue1, ... DatatypeValueN. One ore more of these:
SELECT DISTINCT data_type_id FROM facts ORDER BY 1;
5, 6, 7, 23 in the example. Then actual display values can be just boolean (or the redundant value?). Basic query:
SELECT dense_rank() OVER (ORDER BY eff_date, symbol_id, source_id)::int AS row_name
, eff_date, symbol_id, source_id -- extra columns
, data_type_id AS category
, TRUE AS value
FROM facts
ORDER BY row_name, category;
Crosstab query:
SELECT *
FROM crosstab(
'SELECT dense_rank() OVER (ORDER BY eff_date, symbol_id, source_id)::int AS row_name
, eff_date, symbol_id, source_id -- extra columns
, data_type_id AS category
, TRUE AS value
FROM facts
ORDER BY row_name, category'
, 'VALUES (5), (6), (7), (23)' -- actual values
) AS (row_name int, eff_date timestamp, symbol_id int, source_id char(3)
, datatype_5 bool, datatype_6 bool, datatype_7 bool, datatype_23 bool);
Result:
eff_date | symbol_id | source_id | datatype_5 | datatype_6 | datatype_7 | datatype_23
:--------------| --------: | :-------- | :--------- | :--------- | :--------- | :----------
2017-04-10 ... | 1 | bar | null | t | t | t
2017-04-10 ... | 1 | baz | t | null | null | t
2017-04-10 ... | 1 | foo | t | t | t | null
dbfiddle here
Related:
Crosstab function in Postgres returning a one row output when I expect multiple rows
Dynamic alternative to pivot with CASE and GROUP BY
Postgres - Transpose Rows to Columns

T-SQL One column in multiple columns select query

I have a simple problem that I have not been able to find a solution to and I'm hoping someone on StackOverflow can help.
I currently have an example query as shown below
SELECT ID
, ColumnName
FROM Table
If I run this query I get the following result:
==================
ID | ColumnName
------------------
1 | One_Two_Three
2 | Four_Five_Six
==================
The result I'm after is as follows:
========================
ID | Col1 | Col2 | Col3
------------------------
1 | One | Two | Three
2 | Four | Five | Six
========================
Your assistence is appreciated.
Have a look at this example
DECLARE #Table1 TABLE
([ID] int, [ColumnName] varchar(13))
INSERT INTO #Table1
([ID], [ColumnName])
VALUES
(1, 'One_Two_Three'),
(2, 'Four_Five_Six')
;WITH Vals AS (
SELECT *,
CAST('<d>' + REPLACE([ColumnName], '_', '</d><d>') + '</d>' AS XML) ColumnValue
FROM #Table1
)
SELECT v.*,
A.B.value('.', 'varchar(max)')
FROM Vals v CROSS APPLY
ColumnValue.nodes('/d') A(B)
SQL Fiddle DEMO