Need help
Input
Date A B C
2015-10-31 1.49 3.7 7.8
2015-11-30 1.45 3.6 7.6
2015-12-31 1.41 3.7 8.0
2016-01-31 1.33 3.7 8.3
2016-02-29 1.29 4.1 8.6
2016-03-31 1.46 4.4 9.7
CREATE TABLE dbo.ThreeMonth(RDate DATE,A FLOAT,b FLOAT,C FLOAT)
INSERT into dbo.threemonth
( RDate , a , b,c)
VALUES ( '2015-10-31' , 1.49, 3.7,7.8)
INSERT into dbo.threemonth
( RDate , a , b,c)
VALUES ( '2015-11-30' , 1.45, 3.6,7.6)
INSERT into dbo.threemonth
( RDate , a , b,c)
VALUES ( '2015-12-31' , 1.41, 3.7,8.0)
INSERT into dbo.threemonth
( RDate , a , b,c)
VALUES ( '2016-01-31' , 1.33, 3.7,8.3)
INSERT into dbo.threemonth
( RDate , a , b,c)
VALUES ( '2016-02-29' , 1.9, 4.1,8.6)
INSERT into dbo.threemonth
( RDate , a , b,c)
VALUES ( '2016-03-31' , 1.46, 4.4,9.7)
INSERT into dbo.threemonth
( RDate , a , b,c)
VALUES ( '2016-04-30' , 1.35, 4.3,9.4)
SELECT * FROM threemonth
--Tried the Following query
select rdate, avg(A)
OVER (
ORDER BY Rdate
ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
)
FROM threemonth
OutPut
I need TO display rolling 3 months average FOR 3 COLUMNS a,b,c .WHEN I ADD AVG(b) AND AVG(c) it gives ERROR
"Column 'threemonth.RDate' is invalid in the select list because it is not contained in either an aggregate function or the GROUP BY clause."
--Also I want the OUTPUT IN following format
2015-10-31 NULL -- Because 3 months are not available avg cannot be calculated
2015-11-30 NULL -- Because 3 months are not available avg cannot be calculated
2015-12-31 1.45
2016-01-31 1.39666666666667
2016-02-29 1.54666666666667
2016-03-31 1.56333333333333
2016-04-30 1.57 code here
Can somebody advise TO solve the above problem WHERE I need TO TAKE AVG FOR more THEN 1 COLUMN AND display OUTPUT IN the FORMAT I have shown above FOR ALL the 3 COLUMNS?
Somthing Like this
;with cteBase as (SELECT *,RowNr = Row_Number() Over (Order by RDate) FROM threemonth )
Select RDate,Avg=(select Avg(A) from cteBase where RowNr between A.RowNr-2 and A.RowNr and A.RowNr>2)
From cteBase A
Returns
RDate Avg
2015-10-31 NULL
2015-11-30 NULL
2015-12-31 1.45
2016-01-31 1.39666666666667
2016-02-29 1.54666666666667
2016-03-31 1.56333333333333
2016-04-30 1.57
For Cols A,B, and C
;with cteBase as (SELECT *,RowNr = Row_Number() Over (Order by RDate) FROM threemonth )
Select RDate
,AvgA=(select Avg(A) from cteBase where RowNr between A.RowNr-2 and A.RowNr and A.RowNr>2)
,AvgB=(select Avg(B) from cteBase where RowNr between A.RowNr-2 and A.RowNr and A.RowNr>2)
,AvgC=(select Avg(C) from cteBase where RowNr between A.RowNr-2 and A.RowNr and A.RowNr>2)
From cteBase A
Related
I have a table customer_history which log customer_id and modification_date.
When customer_id is not modified there is no entry in the table
I can find when customer_id haven't been modified (=last_date_with_no_modification). I look for when the date is missing (= Gaps and Islands problem).
But in the same query if no date is missing the value last_date_with_no_modification should
be DATEADD(DAY,-1,min(modification_date)) for the customer_id.
I don't know how to add this last condition in my SQL query?
I use following tables:
"Customer_history" table:
customer_id modification_date
1 2017-12-20
1 2017-12-19
1 2017-12-17
2 2017-12-20
2 2017-12-18
2 2017-12-17
2 2017-12-15
3 2017-12-20
3 2017-12-19
"#tmp_calendar" table:
date
2017-12-15
2017-12-16
2017-12-17
2017-12-18
2017-12-19
2017-12-20
Query used to qet gap date:
WITH CTE_GAP AS
(SELECT ch.customer_id,
LAG(ch.modification_date) OVER(PARTITION BY ch.customer_id ORDER BY ch.modification_date) as GapStart,
ch.modification_date as GapEnd,
(DATEDIFF(DAY,LAG(ch.modification_date) OVER(PARTITION BY ch.customer_id ORDER BY ch.modification_date), ch.modification_date)-1) GapDays
FROM customer_history ch )
SELECT cg.customer_id,
DATEADD(DAY,1,MAX(cg.GapStart)) as last_date_with_no_modification
FROM CTE_GAP cg
CROSS JOIN #tmp_calendar c
WHERE cg.GapDays >0
AND c.date BETWEEN DATEADD(DAY,1,cg.GapStart) AND DATEADD(DAY,-1,cg.GapEnd)
GROUP BY cg.customer_id
Result:
customer_id last_date_with_no_modification
1 2017-12-18
2 2017-12-19
3 2017-12-19 (Row missing)
How to get customer_id 3?
Something this should work:
WITH CTE_GAP
AS
(
SELECT
ch.customer_id,
LAG(ch.modification_date) OVER(PARTITION BY ch.customer_id ORDER BY ch.modification_date) as GapStart,
ch.modification_date as GapEnd,
(DATEDIFF(DAY,LAG(ch.modification_date) OVER(PARTITION BY ch.customer_id ORDER BY ch.modification_date), ch.modification_date)-1) GapDays
FROM #customer_history ch
)
SELECT DISTINCT
C.customer_id
, ISNULL(LD.last_date_with_no_modification, LD_NO_GAP.last_date_with_no_modification) last_date_with_no_modification
FROM
customer_history C
LEFT JOIN
(
SELECT
cg.customer_id,
DATEADD(DAY, 1, MAX(cg.GapStart)) last_date_with_no_modification
FROM
CTE_GAP cg
CROSS JOIN #tmp_calendar c
WHERE
cg.GapDays >0
AND c.date BETWEEN DATEADD(DAY, 1, cg.GapStart) AND DATEADD(DAY, -1, cg.GapEnd)
GROUP BY cg.customer_id
) LD
ON C.customer_id = LD.customer_id
LEFT JOIN
(
SELECT
customer_id
, DATEADD(DAY, -1, MIN(modification_date)) last_date_with_no_modification
FROM customer_history
GROUP BY customer_id
) LD_NO_GAP
ON C.customer_id = LD_NO_GAP.customer_id
I wanted to display the difference in HH:MM:SS between two datetime fields in SQL Server 2014.
I found a solution in this Stack Overflow post. And it works perfectly. But I want to understand the "why" of how this arrives at the correct answer.
T-SQL:
SELECT y.CustomerID ,
y.createDate ,
y.HarvestDate ,
y.DateDif ,
DATEDIFF ( DAY, 0, y.DateDif ) AS [Days] ,
DATEPART ( HOUR, y.DateDif ) AS [Hours] ,
DATEPART ( MINUTE, y.DateDif ) AS [Minutes]
FROM (
SELECT x.createDate - x.HarvestDate AS [DateDif] ,
x.createDate ,
x.HarvestDate ,
x.CustomerID
FROM (
SELECT CustomerID ,
HarvestDate ,
createDate
FROM dbo.CustomerHarvestReports
WHERE HarvestDate >= DATEADD ( MONTH, -6, GETDATE ())
) AS [x]
) AS [y]
ORDER BY DATEDIFF ( DAY, 0, y.DateDif ) DESC;
Results:
1239090 2017-11-07 08:51:03.870 2017-10-14 11:39:49.540 1900-01-24 21:11:14.330 23 21 11
1239090 2017-11-07 08:51:04.823 2017-10-19 11:17:48.320 1900-01-19 21:33:16.503 18 21 33
1843212 2017-10-27 19:14:02.070 2017-10-21 10:49:57.733 1900-01-07 08:24:04.337 6 8 24
1843212 2017-10-27 19:14:03.057 2017-10-21 10:49:57.733 1900-01-07 08:24:05.323 6 8 24
The first column in Customer ID - the second and third columns are the columns I wanted to calculate the time difference between. The third column is the difference between the two columns - and one of the points in the code in which I do not understand.
If you subtract two datetime fields like this create date - harvestdate, why does it default to the year 1900?
And regarding DATEDIFF ( DAY, 0 , y.DateDiff) - what does the 0 mean? Does the 0 set the date as '01-01-1900'?
It works - for that I am grateful. I was hoping I could get an explanation as to why this behavior works?
I've added some comments that should explain it:
SELECT y.CustomerID ,
y.createDate ,
y.HarvestDate ,
y.DateDif ,
DATEDIFF ( DAY, 0, y.DateDif ) AS [Days] , -- calculates the number of whole days between 0 and the difference
DATEPART ( HOUR, y.DateDif ) AS [Hours] , -- the number of hours between the two dates has already been cleverly
-- calculated in [DateDif], therefore, all that is required is to extract
-- that figure using DATEPART
DATEPART ( MINUTE, y.DateDif ) AS [Minutes] -- same explanation as [Hours]
FROM (
SELECT x.createDate - x.HarvestDate AS [DateDif] , -- calculates the difference expressed as a datetime;
-- 0 is '1900-01-01 00:00:00.000' as a datetime, so the
-- resulting datetime will be that plus the difference
x.createDate ,
x.HarvestDate ,
x.CustomerID
FROM (
SELECT CustomerID ,
HarvestDate ,
createDate
FROM dbo.CustomerHarvestReports
WHERE HarvestDate >= DATEADD ( MONTH, -6, GETDATE ())
) AS [x]
) AS [y]
ORDER BY DATEDIFF ( DAY, 0, y.DateDif ) DESC;
I'm trying to find the maximum sequence of days by customer in my data. I want to understand what is the max sequence of days that specific customer made. If someone enter to my app in the 25/8/16 AND 26/08/16 AND 27/08/16 AND 01/09/16 AND 02/09/16 - The max sequence will be 3 days (25,26,27).
In the end (The output) I want to get two fields: custid | MaxDaySequence
I have the following fields in my data table: custid | orderdate(timestemp)
For exmple:
custid orderdate
1 25/08/2007
1 03/10/2007
1 13/10/2007
1 15/01/2008
1 16/03/2008
1 09/04/2008
2 18/09/2006
2 08/08/2007
2 28/11/2007
2 04/03/2008
3 27/11/2006
3 15/04/2007
3 13/05/2007
3 19/06/2007
3 22/09/2007
3 25/09/2007
3 28/01/2008
I'm using PostgreSQL 2014.
Thanks
Trying:
select custid, max(num_days) as longest
from (
select custid,rn, count (*) as num_days
from (
select custid, date(orderdate),
cast (row_number() over (partition by custid order by date(orderdate)) as varchar(5)) as rn
from table_
) x group by custid, CURRENT_DATE - INTERVAL rn|| ' day'
) y group by custid
Try:
SELECT custid, max( abc ) as max_sequence_of_days
FROM (
SELECT custid, yy, count(*) abc
FROM (
SELECT * ,
SUM( xx ) OVER (partition by custid order by orderdate ) yy
FROM (
select * ,
CASE WHEN
orderdate - lag( orderdate ) over (partition by custid order by orderdate )
<= 1
THEN 0 ELSE 1 END xx
from mytable
) x
) z
GROUP BY custid, yy
) q
GROUP BY custid
Demo: http://sqlfiddle.com/#!15/00422/11
===== EDIT ===========
Got "operator does not exist: interval <= integer"
This means that orderdate column is of type timestamp, not date.
In this case you need to use <= interval '1' day condition instead of <= 1:
Please see this link: https://www.postgresql.org/docs/9.0/static/functions-datetime.html to learn more about date arithmetic in PostgreSQL
Please see this demo:
http://sqlfiddle.com/#!15/7c2200/2
SELECT custid, max( abc ) as max_sequence_of_days
FROM (
SELECT custid, yy, count(*) abc
FROM (
SELECT * ,
SUM( xx ) OVER (partition by custid order by orderdate ) yy
FROM (
select * ,
CASE WHEN
orderdate - lag( orderdate ) over (partition by custid order by orderdate )
<= interval '1' day
THEN 0 ELSE 1 END xx
from mytable
) x
) z
GROUP BY custid, yy
) q
GROUP BY custid
Table, data and task as follows.
See SQL-Fiddle-Link for demo-data and estimated results.
create table "data"
(
"item" int
, "timestamp" date
, "balance" float
, "rollingSum" float
)
insert into "data" ( "item", "timestamp", "balance", "rollingSum" ) values
( 1, '2014-02-10', -10, -10 )
, ( 1, '2014-02-15', 5, -5 )
, ( 1, '2014-02-20', 2, -3 )
, ( 1, '2014-02-25', 13, 10 )
, ( 2, '2014-02-13', 15, 15 )
, ( 2, '2014-02-16', 15, 30 )
, ( 2, '2014-03-01', 15, 45 )
I need to get all rows in an defined time interval. The above table doesn't hold a record per item for each possible date - only dates on which changes applied are recorded ( it is possible that there are n rows per timestamp per item )
If the given interval does not fit exactly on stored timestamps, the latest timestamp before startdate ( nearest smallest neighbour ) should be used as start-balance/rolling-sum.
estimated results ( time interval: startdate = '2014-02-13', enddate = '2014-02-20' )
"item", "timestamp" , "balance", "rollingSum"
1 , '2014-02-13' , -10 , -10
1 , '2014-02-15' , 5 , -5
1 , '2014-02-20' , 2 , -3
2 , '2014-02-13' , 15 , 15
2 , '2014-02-16' , 15 , 30
I checked questions like this and googled a lot, but didn't found a solution yet.
I don't think it's a good idea to extend "data" table with one row per missing date per item, thus the complete interval ( smallest date <-----> latest date per item may expand over several years ).
Thanks in advance!
select sum(balance)
from table
where timestamp >= (select max(timestamp) from table where timestamp <= 'startdate')
and timestamp <= 'enddate'
Don't know what you mean by rolling-sum.
here is an attempt. Seems it gives the right result, not so beautiful. Would have been easier in sqlserver 2012+:
declare #from date = '2014-02-13'
declare #to date = '2014-02-20'
;with x as
(
select
item, timestamp, balance, row_number() over (partition by item order by timestamp, balance) rn
from (select item, timestamp, balance from data
union all
select distinct item, #from, null from data) z
where timestamp <= #to
)
, y as
(
select item,
timestamp,
coalesce(balance, rollingsum) balance ,
a.rollingsum,
rn
from x d
cross apply
(select sum(balance) rollingsum from x where rn <= d.rn and d.item = item) a
where timestamp between '2014-02-13' and '2014-02-20'
)
select item, timestamp, balance, rollingsum from y
where rollingsum is not null
order by item, rn, timestamp
Result:
item timestamp balance rollingsum
1 2014-02-13 -10,00 -10,00
1 2014-02-15 5,00 -5,00
1 2014-02-20 2,00 -3,00
2 2014-02-13 15,00 15,00
2 2014-02-16 15,00 30,00
What i would like to do is have a Top 10, but the 10th entry is called "Other" with the sum of everything bar the top 9 within it and has a total. So basically it looks like this:
ReportingDate FundCode Currency Duration Contribution Percentage
31/10/2012 1111 Malaysian Ringgit 0.5 14.6
31/10/2012 1111 Turkish Lira 0.3 13.5
31/10/2012 1111 Russian Rouble 0.5 11.9
31/10/2012 1111 Indunesian Rupiah 0.6 11.7
31/10/2012 1111 Mexican Peso 0.6 11.7
31/10/2012 1111 Polish Zloty 0.3 10.2
31/10/2012 1111 Mexican Peso 0.4 10.1
31/10/2012 1111 Polish Zloty 0.3 9.9
31/10/2012 1111 South African Rand 0.2 5.8
31/10/2012 1111 Brazilian Real 0.3 2.0
31/10/2012 1111 Other 0.6 -1.4
31/10/2012 1111 Total 4.6 100.0
My code currently looks like this:
;;WITH CTE AS
(
SELECT
ReportingDate
, PortfolioID
, DV.dmv_nme AS Currency
, RANK() OVER (PARTITION BY PortfolioID ORDER BY SUM(Percentage) DESC) AS [Rank]
, ISNULL(CAST(SUM(DurationContribution)/100.0 AS DECIMAL(22,1)),0) AS [Duration Contribution]
, CAST(SUM(Percentage) AS DECIMAL(22,1)) AS [Weight]
FROM #Worktable as WT
INNER JOIN dw_domain_value AS DV
ON DV.dmv_value = WT.Currency
AND DV.data_cls_num = 2
GROUP BY WT.ReportingDate
, WT.PortfolioID
, DV.dmv_nme
)
SELECT
ReportingDate
, PortfolioID
, Currency
, [Rank]
, [Duration Contribution]
, [Weight]
FROM CTE
WHERE [Rank] <= 10
ORDER BY ReportingDate, PortfolioID, [Rank], [Weight] DESC
So this gives me the top 10 fine. So how could i get it so that the final 10th line is "Other" with everything bar the top 9 summed within it, and also include a total at the end?
Here is one solution. But you may be able to move it into the CTE for better performance.
;;WITH CTE AS
(
SELECT
ReportingDate
,PortfolioID
,DV.dmv_nme AS Currency
,RANK() OVER (PARTITION BY PortfolioID ORDER BY SUM(Percentage) DESC) AS [Rank]
,ISNULL(CAST(SUM(DurationContribution)/100.0 AS DECIMAL(22,1)),0) AS [Duration Contribution]
,CAST(SUM(Percentage) AS DECIMAL(22,1)) AS [Weight]
FROM #Worktable as WT
INNER JOIN dw_domain_value AS DV
ON DV.dmv_value = WT.Currency
AND DV.data_cls_num = 2
GROUP BY WT.ReportingDate
, WT.PortfolioID
, DV.dmv_nme
)
SELECT
ReportingDate
, PortfolioID
, CASE WHEN [Rank] <= 10 THEN Currency ELSE 'Total' END As Currency
, SUM([Duration Contribution]) As [Duration Contribution]
, SUM([Weight]) As [Weight]
FROM CTE
GROUP BY
ReportingDate
, PortfolioID
, CASE WHEN [Rank] <= 10 THEN Currency ELSE 'Total' END As Currency
WITH ROLLUP
ORDER BY ReportingDate, PortfolioID, [Rank], [Weight] DESC
For this i decided i couldn't use CTE, so ended up having to insert these section by section into a temp table as so:
/* Include only top 9 */
INSERT INTO #FinalOutput
SELECT
ReportingDate
, PortfolioID
, PortfolioNme
, Currency
, [Rank]
, DurationContribution
, [Weight]
FROM #WorktableGrouped
WHERE [Rank] <= 9
ORDER BY ReportingDate, PortfolioID, [Rank], [Weight] DESC
/* Aggregate everything outside the top 9 into other */
INSERT INTO #FinalOutput
SELECT
ReportingDate
, PortfolioID
, PortfolioNme
, 'Other' AS Currency
, 10 AS [Rank]
, SUM(DurationContribution) AS DurationContribution
, SUM([Weight]) AS [Weight]
FROM #WorktableGrouped
WHERE [Rank] > 9
GROUP BY ReportingDate, PortfolioID, PortfolioNme
ORDER BY ReportingDate, PortfolioID, [Rank], [Weight] DESC
SELECT * FROM #FinalOutput
/* Final Select with roll up for total per portfolio */
SELECT
ReportingDate
, PortfolioID
, PortfolioNme
, CASE
WHEN GROUPING_ID(ReportingDate, PortfolioID, PortfolioNme, Currency, [Rank]) = 3 THEN 'Total'
ELSE Currency
END AS Currency
, CASE
WHEN GROUPING_ID(ReportingDate, PortfolioID, PortfolioNme, Currency, [Rank]) = 3 THEN 11
ELSE [Rank]
END AS [Rank]
, ISNULL(CAST(SUM(DurationContribution) AS DECIMAL(22,1)),0) AS [Duration Contribution]
, CAST(SUM([Weight]) AS DECIMAL(22,1)) AS [Weight]
--, GROUPING_ID(ReportingDate, PortfolioID, PortfolioNme, Currency, [Rank])
FROM #FinalOutput
GROUP BY ReportingDate
, PortfolioID
, PortfolioNme
, Currency
, [Rank] WITH ROLLUP
HAVING GROUPING_ID(ReportingDate, PortfolioID, PortfolioNme, Currency, [Rank]) IN (0,3)
ORDER BY ReportingDate, PortfolioID, [Rank]