This question already has answers here:
T-sql :get SUM of Columns
(3 answers)
Closed 5 years ago.
I have a table that looks something like the following :
W1 W2 w3
Gold 10 2 3
Silver 3 1 1
but i need a result :
W1 W2 w3
Gold 10 12 15
Silver 3 4 5
Is there any way i can get that result?
My sql query :
SELECT
week1=[1],week2=[2],week3=[3]
FROM
(
SELECT
[week]=DATEPART(ISO_WEEK,ta.enddate),ta.price
FROM
table1 ta where ta.enddate BETWEEN '2016/01/01' AND '2016/12/31'
) src
PIVOT
(
SUM(price) FOR week IN (
[1],[2],[3])
) piv
Calculate the running total before pivoting the data
SELECT element,
week1=[1],week2=[2],week3=[3]
FROM
(
SELECT [week] = DATEPART(ISO_WEEK,ta.enddate),
price = sum(ta.price)Over(Partition by element Order by enddate),
element
FROM table1 ta
where ta.enddate BETWEEN '2016/01/01' AND '2016/12/31'
) src
PIVOT
(
SUM(price) FOR week IN ( [1],[2],[3])
) piv
for older versions
SELECT element,
week1=[1],week2=[2],week3=[3]
FROM
(
SELECT [week] = DATEPART(ISO_WEEK,ta.enddate),
cs.price,
element
FROM table1 ta
cross apply(select sum(price) from table1 tb
where ta.element = tb.element and ta.enddate >= tb.enddate ) cs (price)
where ta.enddate BETWEEN '2016/01/01' AND '2016/12/31'
) src
PIVOT
(
SUM(price) FOR week IN ( [1],[2],[3])
) piv
Related
I have a data frame like this one given below. Essentially it is a time series derived data frame.
My issue is that the Formula for n-th Row Col C is :-
Col(C) = (Col A(nth row) - Col A(n-1 th row)) + Col C(n-1)th row.
Hence Calculation of Col C is self referencing a previous value of Col C. I am using spark sql, can some one please advise how to proceed with this? For the calculation of Col A I am using LAG function
It seems colC is just colA minus colA in the first row.
e.g.
1 = 6-5,
0 = 5-5,
2 = 7-5,
3 = 8-5,
-2 = 3-5
So this query should work:
SELECT colA, colA - FIRST(colA) OVER (ORDER BY id) AS colC
Your formula is a cumulative sum. Here is a complete example:
SELECT rowid, a, SUM(c0) OVER(ORDER BY rowid) as c
FROM
(
SELECT rowid, a, a - LAG(a, 1) OVER(ORDER BY rowid) as c0
FROM
(
SELECT 1 as rowid, 5 as a union all
SELECT 2 as rowid, 6 as a union all
SELECT 3 as rowid, 5 as a union all
SELECT 4 as rowid, 7 as a union all
SELECT 5 as rowid, 8 as a union all
SELECT 6 as rowid, 3 as a
)t
)t
I'm a little struggled with finding a clean way to do this. Assume that I have the following records in my table named Records:
|Name| |InsertDate| |Size|
john 30.06.2015 1
john 10.01.2016 10
john 12.01.2016 100
john 05.03.2016 1000
doe 01.01.2016 1
How do I get the records for year of 2016 and month is equal to or less than 3 grouped by month(even that month does not exists e.g. month 2 in this case) with cumulative sum of Size including that month? I want to get the result as the following:
|Name| |Month| |Size|
john 1 111
john 2 111
john 3 1111
doe 1 1
As other commenters have already stated, you simply need a table with dates in that you can join from to give you the dates that your source table does not have records for:
-- Build the source data table.
declare #t table(Name nvarchar(10)
,InsertDate date
,Size int
);
insert into #t values
('john','20150630',1 )
,('john','20160110',10 )
,('john','20160112',100 )
,('john','20160305',1000)
,('doe' ,'20160101',1 );
-- Specify the year you want to search for by storing the first day here.
declare #year date = '20160101';
-- This derived table builds a set of dates that you can join from.
-- LEFT JOINing from here is what gives you rows for months without records in your source data.
with Dates
as
(
select #year as MonthStart
,dateadd(day,-1,dateadd(month,1,#year)) as MonthEnd
union all
select dateadd(month,1,MonthStart)
,dateadd(day,-1,dateadd(month,2,MonthStart))
from Dates
where dateadd(month,1,MonthStart) < dateadd(yyyy,1,#year)
)
select t.Name
,d.MonthStart
,sum(t.Size) as Size
from Dates d
left join #t t
on(t.InsertDate <= d.MonthEnd)
where d.MonthStart <= '20160301' -- Without knowing what your logic is for specifying values only up to March, I have left this part for you to automate.
group by t.Name
,d.MonthStart
order by t.Name
,d.MonthStart;
If you have a static date reference table in your database, you don't need to do the derived table creation and can just do:
select d.DateValue
,<Other columns>
from DatesReferenceTable d
left join <Other Tables> o
on(d.DateValue = o.AnyDateColumn)
etc
Here's another approach that utilizes a tally table (aka numbers table) to create the date table. Note my comments.
-- Build the source data table.
declare #t table(Name nvarchar(10), InsertDate date, Size int);
insert into #t values
('john','20150630',1 )
,('john','20160110',10 )
,('john','20160112',100 )
,('john','20160305',1000)
,('doe' ,'20160101',1 );
-- A year is fine, don't need a date data type
declare #year smallint = 2016;
WITH -- dummy rows for a tally table:
E AS (SELECT E FROM (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) t(e)),
dateRange(totalDays, mn, mx) AS -- Get the range and number of months to create
(
SELECT DATEDIFF(MONTH, MIN(InsertDate), MAX(InsertDate)), MIN(InsertDate), MAX(InsertDate)
FROM #t
),
iTally(N) AS -- Tally Oh! Create an inline Tally (aka numbers) table starting with 0
(
SELECT ROW_NUMBER() OVER (ORDER BY (SELECT 1))-1
FROM E a CROSS JOIN E b CROSS JOIN E c CROSS JOIN E d
),
RunningTotal AS -- perform a running total by year/month for each person (Name)
(
SELECT
yr = YEAR(DATEADD(MONTH, n, mn)),
mo = MONTH(DATEADD(MONTH, n, mn)),
Name,
Size = SUM(Size) OVER
(PARTITION BY Name ORDER BY YEAR(DATEADD(MONTH, n, mn)), MONTH(DATEADD(MONTH, n, mn)))
FROM iTally
CROSS JOIN dateRange
LEFT JOIN #t ON MONTH(InsertDate) = MONTH(DATEADD(MONTH, n, mn))
WHERE N <= totalDays
) -- Final output will only return rows where the year matches #year:
SELECT
name = ISNULL(name, LAG(Name, 1) OVER (ORDER BY yr, mo)),
yr, mo,
size = ISNULL(Size, LAG(Size, 1) OVER (ORDER BY yr, mo))
FROM RunningTotal
WHERE yr = #year
GROUP BY yr, mo, name, size;
Results:
name yr mo size
---------- ----------- ----------- -----------
doe 2016 1 1
john 2016 1 111
john 2016 2 111
john 2016 3 1111
I have a query that pulls information from two different tables at a different granular level. I was wondering if it is even possible to keep the value from repeating on the right (only return one row with 110811.67 rest zero) and preserve all values on the left.
select x.pt_year,
x.pt_month,
x.pt_amount,
y.fp_earnedprem
from
(
select sum(a.amount) as pt_amount
, va.ACCT_YEAR as pt_year, va.ACCT_MONTHINYEAR as pt_month, ptp.PTRANS_CODE as pt_code
from fact_policytransaction a
join VDIM_ACCOUNTINGDATE va
on a.ACCOUNTINGDATE_ID=va.ACCOUNTINGDATE_ID
join DIM_POLICYTRANSACTIONTYPE ptp
on a.POLICYTRANSACTIONTYPE_ID=ptp.POLICYTRANSACTIONTYPE_ID
group by va.ACCT_YEAR, va.ACCT_MONTHINYEAR, ptp.PTRANS_CODE
) as x
join
(
select sum(fp.EARNED_PREM_AMT) as fp_earnedprem
, dm.MON_YEAR as fp_year, dm.MON_MONTHINYEAR as fp_month
from fact_policycoverage fp
join dim_month dm on fp.MONTH_ID=dm.MONTH_ID
group by dm.MON_YEAR, dm.MON_MONTHINYEAR
) as y
on x.pt_year = y.fp_year
and x.pt_month = y.fp_month
where x.pt_year=2016 and x.pt_month=6
order by x.pt_year, x.pt_month
pt_year pt_month pt_amount fp_earnedprem
2016 6 4340.00 110811.67
2016 6 15569.00 110811.67
2016 6 30024.00 110811.67
pt_year pt_month pt_amount fp_earnedprem
2016 6 4340.00 110811.67
2016 6 15569.00 0
2016 6 30024.00 0
you can use ROW_NUMBER
;with cte as (
select x.pt_year,
x.pt_month,
x.pt_amount,
y.fp_earnedprem
from
(
select sum(a.amount) as pt_amount
, va.ACCT_YEAR as pt_year, va.ACCT_MONTHINYEAR as pt_month, ptp.PTRANS_CODE as pt_code
from fact_policytransaction a
join VDIM_ACCOUNTINGDATE va
on a.ACCOUNTINGDATE_ID=va.ACCOUNTINGDATE_ID
join DIM_POLICYTRANSACTIONTYPE ptp
on a.POLICYTRANSACTIONTYPE_ID=ptp.POLICYTRANSACTIONTYPE_ID
group by va.ACCT_YEAR, va.ACCT_MONTHINYEAR, ptp.PTRANS_CODE
) as x
join
(
select sum(fp.EARNED_PREM_AMT) as fp_earnedprem
, dm.MON_YEAR as fp_year, dm.MON_MONTHINYEAR as fp_month
from fact_policycoverage fp
join dim_month dm on fp.MONTH_ID=dm.MONTH_ID
group by dm.MON_YEAR, dm.MON_MONTHINYEAR
) as y
on x.pt_year = y.fp_year
and x.pt_month = y.fp_month
where x.pt_year=2016 and x.pt_month=6
)
, Rn as (
select *
, ROW_NUMBER() over (partition by pt_year, pt_month order by pt_year, pt_month) as RoNum
from cte
)
select pt_year, pt_month, pt_amount
, IIF(RoNum = 1, fp_enarnedprem, 0)
from Rn
order by pt_year, pt_month
the main part is your own query, I only added below section and a line on top and moved order by to the bottom.
)
, Rn as (
select *
, ROW_NUMBER() over (partition by pt_year, pt_month order by pt_year, pt_month) as RoNum
from cte
)
select pt_year, pt_month, pt_amount
, IIF(RoNum = 1, fp_enarnedprem, 0)
from Rn
order by pt_year, pt_month
I have a table looks like,
x y
1 2
2 null
3 null
1 null
11 null
I want to fill the null value by conducting a rolling
function to apply y_{i+1}=y_{i}+x_{i+1} with sql as simple as possible (inplace)
so the expected result
x y
1 2
2 4
3 7
1 8
11 19
implement in postgresql. I may encapsulate it in a window function, but the implementation of custom function seems always complex
WITH RECURSIVE t AS (
select x, y, 1 as rank from my_table where y is not null
UNION ALL
SELECT A.x, A.x+ t.y y , t.rank + 1 rank FROM t
inner join
(select row_number() over () rank, x, y from my_table ) A
on t.rank+1 = A.rank
)
SELECT x,y FROM t;
You can iterate over rows using a recursive CTE. But in order to do so, you need a way to jump from row to row. Here's an example using an ID column:
; with recursive cte as
(
select id
, y
from Table1
where id = 1
union all
select cur.id
, prev.y + cur.x
from Table1 cur
join cte prev
on cur.id = prev.id + 1
)
select *
from cte
;
You can see the query at SQL Fiddle. If you don't have an ID column, but you do have another way to order the rows, you can use row_number() to get an ID:
; with recursive sorted as
(
-- Specify your ordering here. This example sorts by the dt column.
select row_number() over (order by dt) as id
, *
from Table1
)
, cte as
(
select id
, y
from sorted
where id = 1
union all
select cur.id
, prev.y + cur.x
from sorted cur
join cte prev
on cur.id = prev.id + 1
)
select *
from cte
;
Here's the SQL Fiddle link.
Assume we have a table and we want to do a sum of the Expend column so that the summation only adds up values of the same Week_Name.
SN Week_Name Exp Sum
-- --------- --- ---
1 Week 1 10 0
2 Week 1 20 0
3 Week 1 30 60
4 Week 2 40 0
5 Week 2 50 90
6 Week 3 10 0
I will assume we will need to `Order By' Week_Name, then compare the previous Week_Name(previous row) with the current row Week_name(Current row).
If both are the same, put zero in the SUM column.
If not the same, add all expenditure, where Week_Name = Week_Name(Previous row) and place in the Sum column. The final output should look like the table above.
Any help on how to achieve this in T-SQL is highly appreciated.
Okay, I was eventually able to resolve this issue, praise Jesus! If you want the exact table I gave above, you can use GilM's response below, it is perfect. If you want your table to have running Cumulatives, i.e. Rows 3 shoud have 60, Row 5, should have 150, Row 6 160 etc. Then, you can use my code below:
USE CAPdb
IF OBJECT_ID ('dbo.[tablebp]') IS NOT NULL
DROP TABLE [tablebp]
GO
CREATE TABLE [tablebp] (
tablebpcCol1 int PRIMARY KEY
,tabledatekey datetime
,tableweekname varchar(50)
,expenditure1 numeric
,expenditure_Cummulative numeric
)
INSERT INTO [tablebp](tablebpcCol1,tabledatekey,tableweekname,expenditure1,expenditure_Cummulative)
SELECT b.s_tablekey,d.PK_Date,d.Week_Name,
SUM(b.s_expenditure1) AS s_expenditure1,
SUM(b.s_expenditure1) + COALESCE((SELECT SUM(s_expenditure1)
FROM source_table bs JOIN dbo.Time dd ON bs.[DATE Key] = dd.[PK_Date]
WHERE dd.PK_Date < d.PK_Date),0)
FROM source_table b
INNER JOIN dbo.Time d ON b.[Date key] = d.PK_Date
GROUP BY d.[PK_Date],d.Week_Name,b.s_tablekey,b.s_expenditure1
ORDER BY d.[PK_Date]
;WITH CTE AS (
SELECT tableweekname
,Max(expenditure_Cummulative) AS Week_expenditure_Cummulative
,MAX(tablebpcCol1) AS MaxSN
FROM [tablebp]
GROUP BY tableweekname
)
SELECT [tablebp].*
,CASE WHEN [tablebp].tablebpcCol1 = CTE.MaxSN THEN Week_expenditure_Cummulative
ELSE 0 END AS [RunWeeklySum]
FROM [tablebp]
JOIN CTE on CTE.tableweekname = [tablebp].tableweekname
I'm not sure why your SN=6 line is 0 rather than 10. Do you really not want the sum for the last Week? If having the last week total is okay, then you might want something like:
;WITH CTE AS (
SELECT Week_Name,SUM([Expend.]) as SumExpend
,MAX(SN) AS MaxSN
FROM T
GROUP BY Week_Name
)
SELECT T.*,CASE WHEN T.SN = CTE.MaxSN THEN SumExpend
ELSE 0 END AS [Sum]
FROM T
JOIN CTE on CTE.Week_Name = T.Week_Name
Based on the requst in the comment wanting a running total in SUM you could try this:
;WITH CTE AS (
SELECT Week_Name, MAX(SN) AS MaxSN
FROM T
GROUP BY Week_Name
)
SELECT T.SN, T.Week_Name,T.Exp,
CASE WHEN T.SN = CTE.MaxSN THEN
(SELECT SUM(EXP) FROM T T2
WHERE T2.SN <= T.SN) ELSE 0 END AS [SUM]
FROM T
JOIN CTE ON CTE.Week_Name = T.Week_Name
ORDER BY SN