3 months rolling average for 3 columns

3 months rolling average for 3 columns - tsql

Need help
Input
Date A B C
2015-10-31 1.49 3.7 7.8
2015-11-30 1.45 3.6 7.6
2015-12-31 1.41 3.7 8.0
2016-01-31 1.33 3.7 8.3
2016-02-29 1.29 4.1 8.6
2016-03-31 1.46 4.4 9.7
CREATE TABLE dbo.ThreeMonth(RDate DATE,A FLOAT,b FLOAT,C FLOAT)
INSERT into dbo.threemonth
( RDate , a , b,c)
VALUES ( '2015-10-31' , 1.49, 3.7,7.8)
INSERT into dbo.threemonth
( RDate , a , b,c)
VALUES ( '2015-11-30' , 1.45, 3.6,7.6)
INSERT into dbo.threemonth
( RDate , a , b,c)
VALUES ( '2015-12-31' , 1.41, 3.7,8.0)
INSERT into dbo.threemonth
( RDate , a , b,c)
VALUES ( '2016-01-31' , 1.33, 3.7,8.3)
INSERT into dbo.threemonth
( RDate , a , b,c)
VALUES ( '2016-02-29' , 1.9, 4.1,8.6)
INSERT into dbo.threemonth
( RDate , a , b,c)
VALUES ( '2016-03-31' , 1.46, 4.4,9.7)
INSERT into dbo.threemonth
( RDate , a , b,c)
VALUES ( '2016-04-30' , 1.35, 4.3,9.4)
SELECT * FROM threemonth
--Tried the Following query
select rdate, avg(A)
OVER (
ORDER BY Rdate
ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
)
FROM threemonth
OutPut
I need TO display rolling 3 months average FOR 3 COLUMNS a,b,c .WHEN I ADD AVG(b) AND AVG(c) it gives ERROR
"Column 'threemonth.RDate' is invalid in the select list because it is not contained in either an aggregate function or the GROUP BY clause."
--Also I want the OUTPUT IN following format
2015-10-31 NULL -- Because 3 months are not available avg cannot be calculated
2015-11-30 NULL -- Because 3 months are not available avg cannot be calculated
2015-12-31 1.45
2016-01-31 1.39666666666667
2016-02-29 1.54666666666667
2016-03-31 1.56333333333333
2016-04-30 1.57 code here
Can somebody advise TO solve the above problem WHERE I need TO TAKE AVG FOR more THEN 1 COLUMN AND display OUTPUT IN the FORMAT I have shown above FOR ALL the 3 COLUMNS?

Somthing Like this
;with cteBase as (SELECT *,RowNr = Row_Number() Over (Order by RDate) FROM threemonth )
Select RDate,Avg=(select Avg(A) from cteBase where RowNr between A.RowNr-2 and A.RowNr and A.RowNr>2)
From cteBase A
Returns
RDate Avg
2015-10-31 NULL
2015-11-30 NULL
2015-12-31 1.45
2016-01-31 1.39666666666667
2016-02-29 1.54666666666667
2016-03-31 1.56333333333333
2016-04-30 1.57
For Cols A,B, and C
;with cteBase as (SELECT *,RowNr = Row_Number() Over (Order by RDate) FROM threemonth )
Select RDate
,AvgA=(select Avg(A) from cteBase where RowNr between A.RowNr-2 and A.RowNr and A.RowNr>2)
,AvgB=(select Avg(B) from cteBase where RowNr between A.RowNr-2 and A.RowNr and A.RowNr>2)
,AvgC=(select Avg(C) from cteBase where RowNr between A.RowNr-2 and A.RowNr and A.RowNr>2)
From cteBase A

Related

How to find gap date and minimum date in the same query?

I have a table customer_history which log customer_id and modification_date.
When customer_id is not modified there is no entry in the table
I can find when customer_id haven't been modified (=last_date_with_no_modification). I look for when the date is missing (= Gaps and Islands problem).
But in the same query if no date is missing the value last_date_with_no_modification should
be DATEADD(DAY,-1,min(modification_date)) for the customer_id.
I don't know how to add this last condition in my SQL query?
I use following tables:
"Customer_history" table:
customer_id modification_date
1 2017-12-20
1 2017-12-19
1 2017-12-17
2 2017-12-20
2 2017-12-18
2 2017-12-17
2 2017-12-15
3 2017-12-20
3 2017-12-19
"#tmp_calendar" table:
date
2017-12-15
2017-12-16
2017-12-17
2017-12-18
2017-12-19
2017-12-20
Query used to qet gap date:
WITH CTE_GAP AS
(SELECT ch.customer_id,
LAG(ch.modification_date) OVER(PARTITION BY ch.customer_id ORDER BY ch.modification_date) as GapStart,
ch.modification_date as GapEnd,
(DATEDIFF(DAY,LAG(ch.modification_date) OVER(PARTITION BY ch.customer_id ORDER BY ch.modification_date), ch.modification_date)-1) GapDays
FROM customer_history ch )
SELECT cg.customer_id,
DATEADD(DAY,1,MAX(cg.GapStart)) as last_date_with_no_modification
FROM CTE_GAP cg
CROSS JOIN #tmp_calendar c
WHERE cg.GapDays >0
AND c.date BETWEEN DATEADD(DAY,1,cg.GapStart) AND DATEADD(DAY,-1,cg.GapEnd)
GROUP BY cg.customer_id
Result:
customer_id last_date_with_no_modification
1 2017-12-18
2 2017-12-19
3 2017-12-19 (Row missing)
How to get customer_id 3?

Something this should work:
WITH CTE_GAP
AS
(
SELECT
ch.customer_id,
LAG(ch.modification_date) OVER(PARTITION BY ch.customer_id ORDER BY ch.modification_date) as GapStart,
ch.modification_date as GapEnd,
(DATEDIFF(DAY,LAG(ch.modification_date) OVER(PARTITION BY ch.customer_id ORDER BY ch.modification_date), ch.modification_date)-1) GapDays
FROM #customer_history ch
)
SELECT DISTINCT
C.customer_id
, ISNULL(LD.last_date_with_no_modification, LD_NO_GAP.last_date_with_no_modification) last_date_with_no_modification
FROM
customer_history C
LEFT JOIN
(
SELECT
cg.customer_id,
DATEADD(DAY, 1, MAX(cg.GapStart)) last_date_with_no_modification
FROM
CTE_GAP cg
CROSS JOIN #tmp_calendar c
WHERE
cg.GapDays >0
AND c.date BETWEEN DATEADD(DAY, 1, cg.GapStart) AND DATEADD(DAY, -1, cg.GapEnd)
GROUP BY cg.customer_id
) LD
ON C.customer_id = LD.customer_id
LEFT JOIN
(
SELECT
customer_id
, DATEADD(DAY, -1, MIN(modification_date)) last_date_with_no_modification
FROM customer_history
GROUP BY customer_id
) LD_NO_GAP
ON C.customer_id = LD_NO_GAP.customer_id

How does this Time Difference Calculation work?

I wanted to display the difference in HH:MM:SS between two datetime fields in SQL Server 2014.
I found a solution in this Stack Overflow post. And it works perfectly. But I want to understand the "why" of how this arrives at the correct answer.
T-SQL:
SELECT y.CustomerID ,
y.createDate ,
y.HarvestDate ,
y.DateDif ,
DATEDIFF ( DAY, 0, y.DateDif ) AS [Days] ,
DATEPART ( HOUR, y.DateDif ) AS [Hours] ,
DATEPART ( MINUTE, y.DateDif ) AS [Minutes]
FROM (
SELECT x.createDate - x.HarvestDate AS [DateDif] ,
x.createDate ,
x.HarvestDate ,
x.CustomerID
FROM (
SELECT CustomerID ,
HarvestDate ,
createDate
FROM dbo.CustomerHarvestReports
WHERE HarvestDate >= DATEADD ( MONTH, -6, GETDATE ())
) AS [x]
) AS [y]
ORDER BY DATEDIFF ( DAY, 0, y.DateDif ) DESC;
Results:
1239090 2017-11-07 08:51:03.870 2017-10-14 11:39:49.540 1900-01-24 21:11:14.330 23 21 11
1239090 2017-11-07 08:51:04.823 2017-10-19 11:17:48.320 1900-01-19 21:33:16.503 18 21 33
1843212 2017-10-27 19:14:02.070 2017-10-21 10:49:57.733 1900-01-07 08:24:04.337 6 8 24
1843212 2017-10-27 19:14:03.057 2017-10-21 10:49:57.733 1900-01-07 08:24:05.323 6 8 24
The first column in Customer ID - the second and third columns are the columns I wanted to calculate the time difference between. The third column is the difference between the two columns - and one of the points in the code in which I do not understand.
If you subtract two datetime fields like this create date - harvestdate, why does it default to the year 1900?
And regarding DATEDIFF ( DAY, 0 , y.DateDiff) - what does the 0 mean? Does the 0 set the date as '01-01-1900'?
It works - for that I am grateful. I was hoping I could get an explanation as to why this behavior works?

I've added some comments that should explain it:
SELECT y.CustomerID ,
y.createDate ,
y.HarvestDate ,
y.DateDif ,
DATEDIFF ( DAY, 0, y.DateDif ) AS [Days] , -- calculates the number of whole days between 0 and the difference
DATEPART ( HOUR, y.DateDif ) AS [Hours] , -- the number of hours between the two dates has already been cleverly
-- calculated in [DateDif], therefore, all that is required is to extract
-- that figure using DATEPART
DATEPART ( MINUTE, y.DateDif ) AS [Minutes] -- same explanation as [Hours]
FROM (
SELECT x.createDate - x.HarvestDate AS [DateDif] , -- calculates the difference expressed as a datetime;
-- 0 is '1900-01-01 00:00:00.000' as a datetime, so the
-- resulting datetime will be that plus the difference
x.createDate ,
x.HarvestDate ,
x.CustomerID
FROM (
SELECT CustomerID ,
HarvestDate ,
createDate
FROM dbo.CustomerHarvestReports
WHERE HarvestDate >= DATEADD ( MONTH, -6, GETDATE ())
) AS [x]
) AS [y]
ORDER BY DATEDIFF ( DAY, 0, y.DateDif ) DESC;

Find date sequence in PostgreSQL

I'm trying to find the maximum sequence of days by customer in my data. I want to understand what is the max sequence of days that specific customer made. If someone enter to my app in the 25/8/16 AND 26/08/16 AND 27/08/16 AND 01/09/16 AND 02/09/16 - The max sequence will be 3 days (25,26,27).
In the end (The output) I want to get two fields: custid | MaxDaySequence
I have the following fields in my data table: custid | orderdate(timestemp)
For exmple:
custid orderdate
1 25/08/2007
1 03/10/2007
1 13/10/2007
1 15/01/2008
1 16/03/2008
1 09/04/2008
2 18/09/2006
2 08/08/2007
2 28/11/2007
2 04/03/2008
3 27/11/2006
3 15/04/2007
3 13/05/2007
3 19/06/2007
3 22/09/2007
3 25/09/2007
3 28/01/2008
I'm using PostgreSQL 2014.
Thanks
Trying:
select custid, max(num_days) as longest
from (
select custid,rn, count (*) as num_days
from (
select custid, date(orderdate),
cast (row_number() over (partition by custid order by date(orderdate)) as varchar(5)) as rn
from table_
) x group by custid, CURRENT_DATE - INTERVAL rn|| ' day'
) y group by custid

Try:
SELECT custid, max( abc ) as max_sequence_of_days
FROM (
SELECT custid, yy, count(*) abc
FROM (
SELECT * ,
SUM( xx ) OVER (partition by custid order by orderdate ) yy
FROM (
select * ,
CASE WHEN
orderdate - lag( orderdate ) over (partition by custid order by orderdate )
<= 1
THEN 0 ELSE 1 END xx
from mytable
) x
) z
GROUP BY custid, yy
) q
GROUP BY custid
Demo: http://sqlfiddle.com/#!15/00422/11
===== EDIT ===========
Got "operator does not exist: interval <= integer"
This means that orderdate column is of type timestamp, not date.
In this case you need to use <= interval '1' day condition instead of <= 1:
Please see this link: https://www.postgresql.org/docs/9.0/static/functions-datetime.html to learn more about date arithmetic in PostgreSQL
Please see this demo:
http://sqlfiddle.com/#!15/7c2200/2
SELECT custid, max( abc ) as max_sequence_of_days
FROM (
SELECT custid, yy, count(*) abc
FROM (
SELECT * ,
SUM( xx ) OVER (partition by custid order by orderdate ) yy
FROM (
select * ,
CASE WHEN
orderdate - lag( orderdate ) over (partition by custid order by orderdate )
<= interval '1' day
THEN 0 ELSE 1 END xx
from mytable
) x
) z
GROUP BY custid, yy
) q
GROUP BY custid

Rolling sum per time interval per group

Table, data and task as follows.
See SQL-Fiddle-Link for demo-data and estimated results.
create table "data"
(
"item" int
, "timestamp" date
, "balance" float
, "rollingSum" float
)
insert into "data" ( "item", "timestamp", "balance", "rollingSum" ) values
( 1, '2014-02-10', -10, -10 )
, ( 1, '2014-02-15', 5, -5 )
, ( 1, '2014-02-20', 2, -3 )
, ( 1, '2014-02-25', 13, 10 )
, ( 2, '2014-02-13', 15, 15 )
, ( 2, '2014-02-16', 15, 30 )
, ( 2, '2014-03-01', 15, 45 )
I need to get all rows in an defined time interval. The above table doesn't hold a record per item for each possible date - only dates on which changes applied are recorded ( it is possible that there are n rows per timestamp per item )
If the given interval does not fit exactly on stored timestamps, the latest timestamp before startdate ( nearest smallest neighbour ) should be used as start-balance/rolling-sum.
estimated results ( time interval: startdate = '2014-02-13', enddate = '2014-02-20' )
"item", "timestamp" , "balance", "rollingSum"
1 , '2014-02-13' , -10 , -10
1 , '2014-02-15' , 5 , -5
1 , '2014-02-20' , 2 , -3
2 , '2014-02-13' , 15 , 15
2 , '2014-02-16' , 15 , 30
I checked questions like this and googled a lot, but didn't found a solution yet.
I don't think it's a good idea to extend "data" table with one row per missing date per item, thus the complete interval ( smallest date <-----> latest date per item may expand over several years ).
Thanks in advance!

select sum(balance)
from table
where timestamp >= (select max(timestamp) from table where timestamp <= 'startdate')
and timestamp <= 'enddate'
Don't know what you mean by rolling-sum.

here is an attempt. Seems it gives the right result, not so beautiful. Would have been easier in sqlserver 2012+:
declare #from date = '2014-02-13'
declare #to date = '2014-02-20'
;with x as
(
select
item, timestamp, balance, row_number() over (partition by item order by timestamp, balance) rn
from (select item, timestamp, balance from data
union all
select distinct item, #from, null from data) z
where timestamp <= #to
)
, y as
(
select item,
timestamp,
coalesce(balance, rollingsum) balance ,
a.rollingsum,
rn
from x d
cross apply
(select sum(balance) rollingsum from x where rn <= d.rn and d.item = item) a
where timestamp between '2014-02-13' and '2014-02-20'
)
select item, timestamp, balance, rollingsum from y
where rollingsum is not null
order by item, rn, timestamp
Result:
item timestamp balance rollingsum
1 2014-02-13 -10,00 -10,00
1 2014-02-15 5,00 -5,00
1 2014-02-20 2,00 -3,00
2 2014-02-13 15,00 15,00
2 2014-02-16 15,00 30,00

Create Top 10 but with everything except Top 9 in Other

What i would like to do is have a Top 10, but the 10th entry is called "Other" with the sum of everything bar the top 9 within it and has a total. So basically it looks like this:
ReportingDate FundCode Currency Duration Contribution Percentage
31/10/2012 1111 Malaysian Ringgit 0.5 14.6
31/10/2012 1111 Turkish Lira 0.3 13.5
31/10/2012 1111 Russian Rouble 0.5 11.9
31/10/2012 1111 Indunesian Rupiah 0.6 11.7
31/10/2012 1111 Mexican Peso 0.6 11.7
31/10/2012 1111 Polish Zloty 0.3 10.2
31/10/2012 1111 Mexican Peso 0.4 10.1
31/10/2012 1111 Polish Zloty 0.3 9.9
31/10/2012 1111 South African Rand 0.2 5.8
31/10/2012 1111 Brazilian Real 0.3 2.0
31/10/2012 1111 Other 0.6 -1.4
31/10/2012 1111 Total 4.6 100.0
My code currently looks like this:
;;WITH CTE AS
(
SELECT
ReportingDate
, PortfolioID
, DV.dmv_nme AS Currency
, RANK() OVER (PARTITION BY PortfolioID ORDER BY SUM(Percentage) DESC) AS [Rank]
, ISNULL(CAST(SUM(DurationContribution)/100.0 AS DECIMAL(22,1)),0) AS [Duration Contribution]
, CAST(SUM(Percentage) AS DECIMAL(22,1)) AS [Weight]
FROM #Worktable as WT
INNER JOIN dw_domain_value AS DV
ON DV.dmv_value = WT.Currency
AND DV.data_cls_num = 2
GROUP BY WT.ReportingDate
, WT.PortfolioID
, DV.dmv_nme
)
SELECT
ReportingDate
, PortfolioID
, Currency
, [Rank]
, [Duration Contribution]
, [Weight]
FROM CTE
WHERE [Rank] <= 10
ORDER BY ReportingDate, PortfolioID, [Rank], [Weight] DESC
So this gives me the top 10 fine. So how could i get it so that the final 10th line is "Other" with everything bar the top 9 summed within it, and also include a total at the end?

Here is one solution. But you may be able to move it into the CTE for better performance.
;;WITH CTE AS
(
SELECT
ReportingDate
,PortfolioID
,DV.dmv_nme AS Currency
,RANK() OVER (PARTITION BY PortfolioID ORDER BY SUM(Percentage) DESC) AS [Rank]
,ISNULL(CAST(SUM(DurationContribution)/100.0 AS DECIMAL(22,1)),0) AS [Duration Contribution]
,CAST(SUM(Percentage) AS DECIMAL(22,1)) AS [Weight]
FROM #Worktable as WT
INNER JOIN dw_domain_value AS DV
ON DV.dmv_value = WT.Currency
AND DV.data_cls_num = 2
GROUP BY WT.ReportingDate
, WT.PortfolioID
, DV.dmv_nme
)
SELECT
ReportingDate
, PortfolioID
, CASE WHEN [Rank] <= 10 THEN Currency ELSE 'Total' END As Currency
, SUM([Duration Contribution]) As [Duration Contribution]
, SUM([Weight]) As [Weight]
FROM CTE
GROUP BY
ReportingDate
, PortfolioID
, CASE WHEN [Rank] <= 10 THEN Currency ELSE 'Total' END As Currency
WITH ROLLUP
ORDER BY ReportingDate, PortfolioID, [Rank], [Weight] DESC

For this i decided i couldn't use CTE, so ended up having to insert these section by section into a temp table as so:
/* Include only top 9 */
INSERT INTO #FinalOutput
SELECT
ReportingDate
, PortfolioID
, PortfolioNme
, Currency
, [Rank]
, DurationContribution
, [Weight]
FROM #WorktableGrouped
WHERE [Rank] <= 9
ORDER BY ReportingDate, PortfolioID, [Rank], [Weight] DESC
/* Aggregate everything outside the top 9 into other */
INSERT INTO #FinalOutput
SELECT
ReportingDate
, PortfolioID
, PortfolioNme
, 'Other' AS Currency
, 10 AS [Rank]
, SUM(DurationContribution) AS DurationContribution
, SUM([Weight]) AS [Weight]
FROM #WorktableGrouped
WHERE [Rank] > 9
GROUP BY ReportingDate, PortfolioID, PortfolioNme
ORDER BY ReportingDate, PortfolioID, [Rank], [Weight] DESC
SELECT * FROM #FinalOutput
/* Final Select with roll up for total per portfolio */
SELECT
ReportingDate
, PortfolioID
, PortfolioNme
, CASE
WHEN GROUPING_ID(ReportingDate, PortfolioID, PortfolioNme, Currency, [Rank]) = 3 THEN 'Total'
ELSE Currency
END AS Currency
, CASE
WHEN GROUPING_ID(ReportingDate, PortfolioID, PortfolioNme, Currency, [Rank]) = 3 THEN 11
ELSE [Rank]
END AS [Rank]
, ISNULL(CAST(SUM(DurationContribution) AS DECIMAL(22,1)),0) AS [Duration Contribution]
, CAST(SUM([Weight]) AS DECIMAL(22,1)) AS [Weight]
--, GROUPING_ID(ReportingDate, PortfolioID, PortfolioNme, Currency, [Rank])
FROM #FinalOutput
GROUP BY ReportingDate
, PortfolioID
, PortfolioNme
, Currency
, [Rank] WITH ROLLUP
HAVING GROUPING_ID(ReportingDate, PortfolioID, PortfolioNme, Currency, [Rank]) IN (0,3)
ORDER BY ReportingDate, PortfolioID, [Rank]

We Keep Coding

iphone swift flutter scala powershell matlab mongodb postgresql perl eclipse

3 months rolling average for 3 columns - tsql

Related

How to find gap date and minimum date in the same query?

How does this Time Difference Calculation work?

Find date sequence in PostgreSQL

Rolling sum per time interval per group

Create Top 10 but with everything except Top 9 in Other

Categories

Resources