T-SQL - Running total per project - tsql

I have this simplified table structure:
I need to generate an aggregate result, sum'ing the transactions per project/period - and have a running total per project. I added the column TransactionCountInPeriod
How can I write a query to get a result like this:
This is as far as I got with the query... I cannot figure out how to do the running total per ProjectId.
SELECT
prj.Id AS ProjectId,
p.Id AS PeriodId,
SUM(t.Amount) AS SumInPeriod--,
--SUM(t.Amount) OVER (PARTITION BY prj.Id) AS RunningTotal
--COUNT(t.*) AS TransactionCountInPeriod
FROM
Periods p
CROSS JOIN Projects prj
LEFT OUTER JOIN Transactions t ON p.Id = t.PeriodId
GROUP BY
prj.Id,
p.Id
ORDER BY
prj.Id,
p.Id
Schema and Testdata
CREATE TABLE [dbo].[Periods](
[Id] [int] NOT NULL
)
GO
CREATE TABLE [dbo].[Projects](
[Id] [int] IDENTITY(1,1) NOT NULL,
[Name] [varchar](50) NOT NULL
)
GO
CREATE TABLE [dbo].[Transactions](
[Id] [int] IDENTITY(1,1) NOT NULL,
[ProjectId] [int] NOT NULL,
[PeriodId] [int] NOT NULL,
[Amount] [decimal](18, 0) NOT NULL
)
GO
INSERT [dbo].[Periods] ([Id]) VALUES (201801)
GO
INSERT [dbo].[Periods] ([Id]) VALUES (201802)
GO
INSERT [dbo].[Periods] ([Id]) VALUES (201803)
GO
INSERT [dbo].[Periods] ([Id]) VALUES (201804)
GO
INSERT [dbo].[Periods] ([Id]) VALUES (201805)
GO
SET IDENTITY_INSERT [dbo].[Projects] ON
GO
INSERT [dbo].[Projects] ([Id], [Name]) VALUES (1, N'Alpha')
GO
INSERT [dbo].[Projects] ([Id], [Name]) VALUES (2, N'Beta')
GO
SET IDENTITY_INSERT [dbo].[Projects] OFF
GO
SET IDENTITY_INSERT [dbo].[Transactions] ON
GO
INSERT [dbo].[Transactions] ([Id], [ProjectId], [PeriodId], [Amount]) VALUES (1, 1, 201801, CAST(100 AS Decimal(18, 0)))
GO
INSERT [dbo].[Transactions] ([Id], [ProjectId], [PeriodId], [Amount]) VALUES (2, 1, 201801, CAST(100 AS Decimal(18, 0)))
GO
INSERT [dbo].[Transactions] ([Id], [ProjectId], [PeriodId], [Amount]) VALUES (3, 1, 201802, CAST(100 AS Decimal(18, 0)))
GO
INSERT [dbo].[Transactions] ([Id], [ProjectId], [PeriodId], [Amount]) VALUES (4, 1, 201803, CAST(100 AS Decimal(18, 0)))
GO
INSERT [dbo].[Transactions] ([Id], [ProjectId], [PeriodId], [Amount]) VALUES (5, 1, 201803, CAST(100 AS Decimal(18, 0)))
GO
INSERT [dbo].[Transactions] ([Id], [ProjectId], [PeriodId], [Amount]) VALUES (6, 1, 201804, CAST(100 AS Decimal(18, 0)))
GO
INSERT [dbo].[Transactions] ([Id], [ProjectId], [PeriodId], [Amount]) VALUES (7, 2, 201801, CAST(100 AS Decimal(18, 0)))
GO
INSERT [dbo].[Transactions] ([Id], [ProjectId], [PeriodId], [Amount]) VALUES (8, 2, 201801, CAST(100 AS Decimal(18, 0)))
GO
INSERT [dbo].[Transactions] ([Id], [ProjectId], [PeriodId], [Amount]) VALUES (12, 2, 201804, CAST(100 AS Decimal(18, 0)))
GO
SET IDENTITY_INSERT [dbo].[Transactions] OFF
GO
(I would have created a sql fiddle, but it keeps crashing when I try...)

You can use the SUM window function to calculate the running total.
DECLARE #Periods TABLE(
[Id] [int] NOT NULL
)
DECLARE #Projects TABLE(
[Id] [int] NOT NULL,
[Name] [varchar](50) NOT NULL
)
DECLARE #Transactions TABLE(
[Id] [int] NOT NULL,
[ProjectId] [int] NOT NULL,
[PeriodId] [int] NOT NULL,
[Amount] [decimal](18, 0) NOT NULL
)
INSERT #Periods ([Id]) VALUES (201801)
INSERT #Periods ([Id]) VALUES (201802)
INSERT #Periods ([Id]) VALUES (201803)
INSERT #Periods ([Id]) VALUES (201804)
INSERT #Periods ([Id]) VALUES (201805)
INSERT #Projects ([Id], [Name]) VALUES (1, N'Alpha')
INSERT #Projects ([Id], [Name]) VALUES (2, N'Beta')
INSERT #Transactions ([Id], [ProjectId], [PeriodId], [Amount]) VALUES (1, 1, 201801, CAST(100 AS Decimal(18, 0)))
INSERT #Transactions ([Id], [ProjectId], [PeriodId], [Amount]) VALUES (2, 1, 201801, CAST(100 AS Decimal(18, 0)))
INSERT #Transactions ([Id], [ProjectId], [PeriodId], [Amount]) VALUES (3, 1, 201802, CAST(100 AS Decimal(18, 0)))
INSERT #Transactions ([Id], [ProjectId], [PeriodId], [Amount]) VALUES (4, 1, 201803, CAST(100 AS Decimal(18, 0)))
INSERT #Transactions ([Id], [ProjectId], [PeriodId], [Amount]) VALUES (5, 1, 201803, CAST(100 AS Decimal(18, 0)))
INSERT #Transactions ([Id], [ProjectId], [PeriodId], [Amount]) VALUES (6, 1, 201804, CAST(100 AS Decimal(18, 0)))
INSERT #Transactions ([Id], [ProjectId], [PeriodId], [Amount]) VALUES (7, 2, 201801, CAST(100 AS Decimal(18, 0)))
INSERT #Transactions ([Id], [ProjectId], [PeriodId], [Amount]) VALUES (8, 2, 201801, CAST(100 AS Decimal(18, 0)))
INSERT #Transactions ([Id], [ProjectId], [PeriodId], [Amount]) VALUES (12, 2, 201804, CAST(100 AS Decimal(18, 0)))
select
pr.Id as ProjectId
, pe.Id as PeriodId
, isnull(tr.SumAmount, 0) as SumInPeriod
, isnull(sum(tr.SumAmount) over(partition by pr.Id order by pe.Id), 0) as RunningTotal
, tr.TransactionsCount as TransactionCountInPeriod
from #Projects pr
cross join #Periods pe
outer apply (select sum(t.Amount) as SumAmount, count(*) as TransactionsCount from #Transactions t where t.ProjectId = pr.Id and t.PeriodId = pe.Id) tr
order by ProjectId, PeriodId

The simplest option would be to use sum...over.
You where very close - but you don't need the group by and you do need to add order by to the over clause to get a running total - like this:
SELECT pr.Id As ProjectId,
pe.Id As PeriodId,
SUM(Amount) OVER(PARTITION BY pe.Id, pr.Id) AS SumInPeriod,
SUM(Amount) OVER(PARTITION BY pr.Id ORDER BY pe.Id) AS RunningTotal,
COUNT(Amount) OVER(PARTITION BY pe.Id, pr.Id) TransactionCountInPeriod
FROM (
Periods pe
CROSS JOIN projects as pr
)
LEFT JOIN Transactions tr
ON pe.Id = tr.PeriodId
AND tr.ProjectId = pr.Id
ORDER BY pr.id, pe.id

Related

PostgreSQL WITH RECURSIVE order by in non recursive term

I am trying to create a recursive CTE and I wanted to fetch the row in the non recursive term from the table using ORDER BY but it seems impossible to do. Is there any workaround on this?
Example:
CREATE TABLE mytable (
id BIGSERIAL PRIMARY KEY,
ref_id BIGINT NOT NULL,
previous_id BIGINT REFERENCES mytable(id),
some_name TEXT NOT NULL,
created_at TIMESTAMP DEFAULT NOW()
);
INSERT INTO mytable (id, previous_id, ref_id, some_name) VALUES (1, NULL, 1, 'Barry');
INSERT INTO mytable (id, previous_id, ref_id, some_name) VALUES (2, NULL, 1, 'Nick');
INSERT INTO mytable (id, previous_id, ref_id, some_name) VALUES (3, 1, 2, 'Janet');
INSERT INTO mytable (id, previous_id, ref_id, some_name) VALUES (4, 1, 1, 'John');
INSERT INTO mytable (id, previous_id, ref_id, some_name) VALUES (5, 2, 7, 'Ron');
INSERT INTO mytable (id, previous_id, ref_id, some_name) VALUES (6, 1, 1, 'Aaron');
INSERT INTO mytable (id, previous_id, ref_id, some_name) VALUES (7, 4, 1, 'Anna');
The query I am trying to construct
WITH RECURSIVE my_path AS (
SELECT * FROM mytable
WHERE ref_id = 1 AND some_name = 'Anna'
ORDER BY created_at DESC
LIMIT 1
UNION ALL
SELECT ph.* FROM my_path hp
INNER JOIN mytable ph ON hp.previous_id = ph.id
)
SELECT * FROM my_path;
SQLFIDDLE
Just move it into a starter CTE:
updated fiddle
WITH RECURSIVE base_record as (
SELECT * FROM mytable
WHERE ref_id = 1 AND some_name = 'Anna'
ORDER BY created_at DESC
LIMIT 1
), my_path AS (
SELECT * FROM base_record
UNION ALL
SELECT ph.* FROM my_path hp
INNER JOIN mytable ph ON hp.previous_id = ph.id
)
SELECT * FROM my_path;

Postgresql find by count, joined table

Given 3 tables. I need to build SQL query to find two actors who CAST TOGETHER THE MOST and list the titles of those movies. Sort alphabetically
https://www.db-fiddle.com/f/r2Y9CpH8n7MHTeBaqEHe9S/0
The data for reproducing below:
create table film_actor
(
actor_id integer,
film_id integer
)
;
create table film
(
film_id integer,
title varchar
)
;
create table actor
(
actor_id integer,
first_name varchar,
last_name varchar
)
;
INSERT INTO public.film_actor (actor_id, film_id) VALUES (1, 1);
INSERT INTO public.film_actor (actor_id, film_id) VALUES (1, 2);
INSERT INTO public.film_actor (actor_id, film_id) VALUES (1, 3);
INSERT INTO public.film_actor (actor_id, film_id) VALUES (2, 1);
INSERT INTO public.film_actor (actor_id, film_id) VALUES (2, 2);
INSERT INTO public.film_actor (actor_id, film_id) VALUES (2, 3);
INSERT INTO public.film_actor (actor_id, film_id) VALUES (3, 1);
INSERT INTO public.film (film_id, title) VALUES (1, 'First');
INSERT INTO public.film (film_id, title) VALUES (2, 'Second');
INSERT INTO public.film (film_id, title) VALUES (3, 'Third');
INSERT INTO public.film (film_id, title) VALUES (4, 'Fourth');
INSERT INTO public.actor (actor_id, first_name, last_name) VALUES (1, 'John', 'Snow');
INSERT INTO public.actor (actor_id, first_name, last_name) VALUES (2, 'Spider', 'Man');
INSERT INTO public.actor (actor_id, first_name, last_name) VALUES (3, 'Mike', 'Kameron');
Is this what you are looking for?
with acting_pairs as (
select a1.actor_id as a1_id, a2.actor_id as a2_id
from film_actor a1
join film_actor a2 on a1.film_id = a2.film_id
where a1.actor_id < a2.actor_id
)
select a1_id, a2_id, count(*) as total
from acting_pairs
group by (a1_id, a2_id)
order by total desc
limit 1
Giving us expected output for the example input would be nice.

Aggregation giving wrong result in T-SQL

I am getting wrong sum of values in more than two tables.
Please advise the correct qry for the below
my sample data is as follows.
IF OBJECT_ID('tempdb..#Departamentos') IS NOT NULL
DROP TABLE #Departamentos
GO
CREATE TABLE #Departamentos (ID INT IDENTITY(1,1) PRIMARY KEY,
Nome_Dep VARCHAR(200))
GO
INSERT INTO #Departamentos(Nome_Dep)
VALUES('Vendas'), ('TI'), ('Recursos Humanos')
GO
IF OBJECT_ID('tempdb..#Funcionarios') IS NOT NULL
DROP TABLE #Funcionarios
GO
CREATE TABLE #Funcionarios (ID INT IDENTITY(1,1) PRIMARY KEY,
ID_Dep INT,
Nome VARCHAR(200),
Salario Numeric(18,2))
GO
INSERT INTO #Funcionarios (ID_Dep, Nome, Salario)
VALUES(1, 'Fabiano', 2000), (1, 'Amorim', 2000), (1, 'Diego', 9000),
(2, 'Felipe', 2000), (2, 'Ferreira', 2500), (2, 'Nogare', 11999),
(3, 'Laerte', 5000), (3, 'Luciano', 23500), (3, 'Zavaschi', 13999)
GO
IF OBJECT_ID('tempdb..#deals') IS NOT NULL
DROP TABLE #deals
GO
CREATE TABLE #deals (ID INT IDENTITY(1,1) PRIMARY KEY,
ID_Deal INT,
Nome VARCHAR(200),
Revenue Numeric(18,2))
GO
INSERT INTO #deals (ID_Deal, Nome, Revenue)
VALUES(1, 'Fabiano', 50), (1, 'Amorim', 20), (1, 'Diego', 90),
(2, 'Felipe', 20), (2, 'Ferreira', 25), (2, 'Nogare', 119),
(3, 'Laerte', 50), (3, 'Luciano', 23), (3, 'Zavaschi', 13)
GO
My query is as follows to get the result:
select d.id,d.Nome_Dep,sum(salario)salario,sum(revenue)revenue from #Departamentos d left join #Funcionarios f on d.ID=f.ID_Dep
left join #deals dd on dd.ID_Deal=d.ID
group by d.id,d.Nome_Dep
But my desired result should be:
ID Nome_Dep salario revenue
1 Vendas 13000.00 160.00
2 TI 16499.00 164.00
3 Recursos Humanos 42499.00 86.00
Thanks
There is probably duplication happening because of the double table join. One way around this is to aggregate each table in separate subqueries, and then join to them:
SELECT
d.id,
d.Nome_Dep,
COALESCE(f.sum_salario, 0) AS sum_salario,
COALESCE(dd.sum_revenue, 0) AS sum_revenue
FROM #Departamentos d
LEFT JOIN
(
SELECT ID_Dep, SUM(salario) AS sum_salario
FROM #Funcionarios
GROUP BY ID_Dep
) f
ON d.ID = f.ID_Dep
LEFT JOIN
(
SELECT ID_Deal, SUM(revenue) AS sum_revenue
FROM #deals
GROUP BY ID_Deal
) dd
ON d.ID = dd.ID_Deal;
Demo

PostgreSQL: Iterate through a tables rows with for loop, retrieve column value based on current row

I have the following 2 tables
CREATE TABLE salesperson_t (
salespersonid numeric(4,0) NOT NULL,
salespersonname character varying(25),
salespersontelephone character varying(50),
salespersonfax character varying(50),
salespersonaddress character varying(30),
salespersoncity character varying(20),
salespersonstate character(2),
salespersonzip character varying(20),
salesterritoryid numeric(4,0),
CONSTRAINT salesperson_pk PRIMARY KEY (salespersonid)
);
INSERT INTO salesperson_t VALUES (1, 'Doug Henny', '8134445555', NULL, NULL, NULL, NULL, NULL, 2);
INSERT INTO salesperson_t VALUES (2, 'Robert Lewis', '8139264006', NULL, '124 Deerfield', 'Lutz', 'FL', '33549', 13);
INSERT INTO salesperson_t VALUES (3, 'William Strong', '3153821212', NULL, '787 Syracuse Lane', 'Syracuse', 'NY', '33240', 3);
INSERT INTO salesperson_t VALUES (4, 'Julie Dawson', '4355346677', NULL, NULL, NULL, NULL, NULL, 4);
INSERT INTO salesperson_t VALUES (5, 'Jacob Winslow', '2238973498', NULL, NULL, NULL, NULL, NULL, 5);
INSERT INTO salesperson_t VALUES (6, 'Pepe Lepue', NULL, NULL, NULL, 'Platsburg', 'NY', NULL, 13);
INSERT INTO salesperson_t VALUES (8, 'Fred Flinstone', NULL, NULL, '1 Rock Lane', 'Bedrock', 'Ca', '99999', 2);
INSERT INTO salesperson_t VALUES (9, 'Mary James', '3035555454', NULL, '9 Red Line', 'Denver', 'CO', '55555', 4);
INSERT INTO salesperson_t VALUES (10, 'Mary Smithson', '4075555555', NULL, '4585 Maple Dr', 'Orlando', 'FL', '32826', 15);
CREATE TABLE territory2_t (
territoryid numeric(4,0),
territoryname character varying(50),
total_sales_person integer,
CONSTRAINT territory2_t_pk PRIMARY KEY (territoryid)
);
INSERT INTO territory2_t VALUES (1, 'SouthEast', NULL);
INSERT INTO territory2_t VALUES (2, 'SouthWest', NULL);
INSERT INTO territory2_t VALUES (3, 'NorthEast', NULL);
INSERT INTO territory2_t VALUES (4, 'NorthWest', NULL);
INSERT INTO territory2_t VALUES (5, 'Central', NULL);
INSERT INTO territory2_t VALUES (6, 'Alaska', NULL);
INSERT INTO territory2_t VALUES (12, 'Hawaii', NULL);
INSERT INTO territory2_t VALUES (13, 'Colorado', NULL);
INSERT INTO territory2_t VALUES (15, 'Arizona', NULL);
I have the following pseudo code:
DO $$
DECLARE
-- currentRow [relevant datatype];
BEGIN
FOR counter IN 1..(SELECT count(*)FROM territory2_t) LOOP -- There are 13 total rows
-- **assign currentRow to counter**
RAISE NOTICE 'Counter: %', counter; -- debugging purposes
UPDATE terriory2_t
SET total_sales_person = ((SELECT count(*)
FROM salesperson_t
WHERE salesterritoryid = currentRow.territoryid)*1) -- *1 is for debuggin puporses
WHERE territoryid = currentRow.territoryid;
-- **increase currentRow by 1**
END LOOP;
END; $$
It's purpose is count how many rows in the table (salesperson) have the 'territoryid' of the the currentRows->'territory2.territoryid', and then assign that quantity to currentRows->territory2.total_sales_person.
You don't need a loop or even a function for this.
What you want to do can be done in a single update statement because the total count per territory can be calculated with a single aggregation:
SELECT salesterritoryid, count(*) as total_count
FROM salesperson_t
group by salesterritoryid
This can then be used as the source to update the territory table:
UPDATE territory2_t
SET total_sales_person = t.total_count
FROM (
SELECT salesterritoryid, count(*) as total_count
FROM salesperson_t
group by salesterritoryid
) t
WHERE territoryid = t.salesterritoryid;
An alternative that might be easier to understand but will be slower for larger tables is an update with a co-related sub-query
UPDATE territory2_t tg
SET total_sales_person = (select count(*)
from salesperson_t sp
where sp.salesterritoryid = tg.territoryid);
There is a slight difference between the first and second update: the second one will update the total_sales_person to 0 (zero) for those territories where there is no salesperson at all. The first one will only update the count for territories that are actually present in the salesperson table.
Unrelated, but: having a "type identifying" prefix or suffix for an identifier is usually useless and doesn't really help at all. See a related discussion on dba.stackexchange

List ranges and the total count based on condition

Table Schema
CREATE TABLE [dbo].[TblMaster](
[SID] [int] IDENTITY(1,1) NOT NULL Primary Key,
[VID] [int] NOT NULL,
[CreatedDate] [datetime] default (getdate()) NOT NULL,
[CharToAdd] [varchar](10) NULL,
[Start] [int] NOT NULL,
[End] [int] NOT NULL
)
GO
CREATE TABLE [dbo].[TblDetails](
[DetailsID] [int] IDENTITY(1,1) NOT NULL Primary Key,
[SID] [int] NOT NULL,
[Sno] [int] NOT NULL,
[ConcatenatedText] [varchar](20) NOT NULL,
[isIssued] [bit] default (0) NOT NULL,
[isUsed] [bit] default (0) NOT NULL
)
GO
Sample Data:
Insert into dbo.TblMaster Values (1,default, 'CA', 1, 5)
Insert into dbo.TblMaster Values (1,default, 'PA', 1, 5)
GO
Insert into dbo.TblDetails values(1, 1, 'CA1', 0,0)
Insert into dbo.TblDetails values(1, 2, 'CA2', 0,0)
Insert into dbo.TblDetails values(1, 3, 'CA3', 0,0)
Insert into dbo.TblDetails values(1, 4, 'CA4', 1,0)
Insert into dbo.TblDetails values(1, 5, 'CA5', 0,0)
Insert into dbo.TblDetails values(2, 1, 'PA1', 0,0)
Insert into dbo.TblDetails values(2, 2, 'PA2', 0,0)
Insert into dbo.TblDetails values(2, 3, 'PA3', 1,0)
Insert into dbo.TblDetails values(2, 4, 'PA4', 0,0)
Insert into dbo.TblDetails values(2, 5, 'PA5', 0,0)
Insert into dbo.TblDetails values(3, 1, '1', 0,0)
Insert into dbo.TblDetails values(3, 2, '2', 1,0)
Insert into dbo.TblDetails values(3, 3, '3', 1,0)
Insert into dbo.TblDetails values(3, 4, '4', 0,0)
Insert into dbo.TblDetails values(3, 5, '5', 0,0)
GO
Expected Output:
Query I have built as of now:
Declare #VID INT = 1
;WITH Tmp as
(
SELECT
TM.CharToAdd as Prefix,
sno,
sno - ROW_NUMBER() OVER(ORDER BY sno) as grp
FROM dbo.TblDetails TD
LEFT JOIN dbo.TblMaster TM on TM.[SID] = TD.[SID]
WHERE isIssued = 0 and isUsed = 0
AND TM.VID = #VID
)
SELECT Prefix,
MIN(sno) as RangeStart,
MAX(sno) as RangeEnd,
COUNT(*) as [Count]
FROM Tmp
GROUP BY grp, Prefix
In the TblDetails table want to find the range of available values and its total counts from all records whose bit columns are 0. If bit column is 1 then it means it is already used so I am trying to skip it and list rest as available records. Doubtful whether am I explaining the problem statement well so have provided the sample data and expected output for better understanding. I did try doing some recursive function but the result isn't matching the expected output. So looking for help to resolve this.
You were very close...
CODE
Declare #VID INT = 1
;with cte as(
select
m.CHarToAdd,
d.sno,
d.sno - ROW_NUMBER() OVER(partition by m.CharToAdd ORDER BY sno) as grp
from
TblMaster m
inner join
TblDetails d on
d.sid = m.sid
where
d.isIssued = 0 and d.isUsed = 0 and m.vid = #VID)
select
CharToAdd,
min(sno) as Start,
max(sno) as [End],
(max(sno) - min(sno) + 1) as [Count]
from cte
group by
CHarToAdd, grp
order by
CHarToAdd
RESULTS
CharToAdd Start End Count
CA 1 3 3
CA 5 5 1
PA 1 2 2
PA 4 5 2