SQL 2008 cumulative count - sql-server-2008-r2

I have a query returning the number of rows grouped by date :
SELECT convert(date, run.TimeStamp) as TimeStamp, count(*)
FROM ScriptResult AS res INNER JOIN
ScriptRun AS run ON run.ScriptRunID = res.ScriptRunID INNER JOIN
WorkListItems AS wli ON wli.WorkListItemID = res.WorklistItemID INNER JOIN
WorkList AS wl ON wl.WorkListID = wli.WorkListID
WHERE (wli.WorkListID = #WLID)
GROUP by convert(date, run.TimeStamp)
ORDER BY convert(date, run.TimeStamp);
This produces a result set like this :
TimeStamp (ItemCount)
2015-03-10 5364
2015-03-11 22027
2015-03-12 18037
Now what I want, is to cumulatively summarize the itemcount, like this :
TimeStamp ItemCount TotalCount
2015-03-10 5364 5364
2015 -03-11 22027 27391
2015-03-12 18037 45428
The query needs to be compatible with 2008R2.
I have played with [count ...over..partition by] in several variations but the problem is that the window function boundary should chage. And I cannot use ROWS or RANGE.
Any ideas please ?
Thanks in advance.

Try with correlated subquery:
;WITH cte as(
SELECT convert(date, run.TimeStamp) as TimeStamp, count(*) AS S
FROM ScriptResult AS res INNER JOIN
ScriptRun AS run ON run.ScriptRunID = res.ScriptRunID INNER JOIN
WorkListItems AS wli ON wli.WorkListItemID = res.WorklistItemID INNER JOIN
WorkList AS wl ON wl.WorkListID = wli.WorkListID
WHERE (wli.WorkListID = #WLID)
GROUP by convert(date, run.TimeStamp)
)
SELECT TimeStamp,
S,
(SELECT SUM(S) FROM cte t2 WHERE t2.TimeStamp <= t1.TimeStamp) AS TS
FROM cte t1

You could try creating a temp table to hold the first query results that you can further aggregate to return the cumulative sum on the ItemCount field:
CREATE TABLE #TempTable(
[SeqNo] [int] NULL,
[TimeStamp] [Date] NULL,
[ItemCount] [int] NULL
) ON [PRIMARY]
SELECT
ROW_NUMBER() OVER (PARTITION BY res.ScriptRunID ORDER BY run.TimeStamp) AS SeqNo,
CONVERT(Date, run.TimeStamp) AS TimeStamp,
COUNT(*) AS ItemCount
INTO #TempTable
FROM ScriptResult AS res
INNER JOIN ScriptRun AS run
ON run.ScriptRunID = res.ScriptRunID
INNER JOIN WorkListItems AS wli
ON wli.WorkListItemID = res.WorklistItemID
INNER JOIN WorkList AS wl
ON wl.WorkListID = wli.WorkListID
WHERE (wli.WorkListID = #WLID)
GROUP BY CONVERT(Date, run.TimeStamp)
ORDER BY CONVERT(Date, run.TimeStamp);
SELECT
t1.TimeStamp,
t1.ItemCount,
SUM(t2.ItemCount) AS TotalCount
FROM #TempTable AS t1
INNER JOIN #TempTable AS t2
on t1.SeqNo >= t2.SeqNo
GROUP BY t1.TimeStamp, t1.ItemCount
ORDER BY t1.TimeStamp
SQL Fiddle Example
Note: This links to a Microsoft SQL Server 2014 database version SQL fiddle which should work with SQL Server 2008 as well.

Related

Postgres: Date grouping in Subquery from timestamp

I am trying find out how many leads are generated per listing per day.
I have this query:
SELECT
vl.listing_id,
vl.created_at::date as dt,
(
SELECT count(*)
FROM voice_leads vl2
WHERE vl2.listing_id = vl.listing_id
AND vl.created_at::date = vl2.created_at::date
) as cnt
FROM voice_leads vl
GROUP BY listing_id, vl.created_at::date
ORDER BY listing_id
but when executing I get "ERROR: subquery uses ungrouped column "vl.created_at" from outer query LINE 8: AND vl.created_at::date = vl2.created_at::date"
Any idea on what I could do to fix it?
SELECT
vl.listing_id,
vl.created_at::date as dt,
count(cnt.*)
FROM voice_leads vl, lateral (SELECT *
from voice_leads vl2
WHERE vl2.listing_id = vl.listing_id
AND vl.created_at::date = vl2.created_at::date) cnt
GROUP BY vl.listing_id, vl.created_at::date
ORDER BY listing_id
You don't need the subquery:
SELECT
vl.listing_id,
vl.created_at::date as dt,
count( vl.listing_id ) as cnt
FROM voice_leads vl
GROUP BY listing_id, vl.created_at::date
ORDER BY listing_id
should do the same.
count(field) will count the number of rows in each group.
count(*) will count the total number of rows.

postgres JOIN with left table null

my query is:
SELECT main.group_id, s_ref.title, s_ref.username, main.m_per_group, main.pos, u.lang
FROM (
SELECT user_id, group_id, COUNT(user_id) AS m_per_group,
ROW_NUMBER() OVER (
PARTITION BY group_id
ORDER BY COUNT(group_id) DESC
) AS pos
FROM messages
WHERE message_date > date_trunc('week', now())
GROUP BY group_id, user_id
) AS main
LEFT OUTER JOIN supergroups_ref AS s_ref
USING (group_id)
RIGHT JOIN users AS u
ON u.user_id = main.user_id
WHERE main.user_id = %s
ORDER BY m_per_group DESC
the problem is that when main returns 0 elements, i don't get neither the language of the user of the users JOIN but i get exactly []
i instead would like to get [(None, None, None, None, 'en')] this is why i used a right join. How can i get the result i want?
Move this condition:
WHERE main.user_id = %s
To the main subquery:
WHERE message_date > date_trunc('week', now()) and main.user_id = %s
The way it is now it is turning an outer join into an inner join.

Faster left join with last non-empty

Table1:
Shop
Manager
Date
Table2:
Shop
Date
Sales
I need to get Table2 with Manager field from Table1. I did the following trick:
select
t1.[Shop]
,t1.[Date]
,t1.[Sum]
,t2.[Manager]
from t1
left join t2
on t1.[Shop] = t2.[Shop]
and t2.[Date] = (select max(t2.[Date]) from t2
where t2.[Shop] = t1.[Shop]
and t2.[Date] < t1.[Date])
It works, but subquerying is very slow, so I wonder if there is more elegant and fast way to do so?
Some sample data to play around: http://pastebin.com/uLN6x5JE
may seem like a round about way but join on a single condition is typically faster
select t12.[Shop], t12.[Date], t12.[Sum]
, t12.[Manager]
from
( select t1.[Shop], t1.[Date], t1.[Sum]
, t2.[Manager]
, row_number() over (partition by t2.[Shop] order by t2.[Date] desc) as rn
from t1
join t2
on t2.[Shop] = t1.[Shop]
and t1.[Date] < t1.[Date]
) as t12
where t12.rn = 1
union
select t1.[Shop], t1.[Date], t1.[Sum]
, null as [Manager]
from t1
left join t2
on t2.[Shop] = t1.[Shop]
and t1.[Date] < t1.[Date]
group by t1.[Shop], t1.[Date], t1.[Sum]
having count(*) = 1
You may get much better performance by adding a covering index on t2 if you don't already have one:
create index T2ShopDate on t2 ([Shop], [Date]) include ([Manager])
Here is a version that uses a CTE to find all maximum manager dates first and then join back to t2 to get the manager:
;with MaxDates ([Shop], [Date], [Sum], [MaxMgrDate]) as
(
select
t1.[Shop]
,t1.[Date]
,t1.[Sum]
,max(t2.[Date])
from t1
left join t2
on t2.[Shop] = t1.[Shop]
and t2.[Date] < t1.[Date]
group by
t1.[Shop]
,t1.[Date]
,t1.[Sum]
)
select
MaxDates.[Shop]
,MaxDates.[Date]
,MaxDates.[Sum]
,t2.[Manager]
from MaxDates
inner join t2
on t2.[Date] = MaxDates.[MaxMgrDate]
You might be able to remove the second join back to t2 by using row_number():
;with MaxDates ([Shop], [Date], [Sum], [Manager], [RowNum]) as
(
select
t1.[Shop]
,t1.[Date]
,t1.[Sum]
,t2.[Manager]
,row_number() over (partition by (t1.[Shop]) order by t2.[Date] desc)
from t1
left join t2
on t2.[Shop] = t1.[Shop]
and t2.[Date] < t1.[Date]
)
select *
from MaxDates
where RowNum = 1

How to display rollup data in new column?

I have the following query which returns the number of android questions per each day on StackOverflow in the year of 2011. I want to get the sum of all the questions asked during the year 2011. For this I am using ROLLUP.
select
year(p.CreationDate) as [Year],
month(p.CreationDate) as [Month],
day(p.CreationDate) as [Day],
count(*) as [QuestionsAskedToday]
from Posts p
inner join PostTags pt on p.id = pt.postid
inner join Tags t on t.id = pt.tagid
where
t.tagname = 'android' and
p.CreationDate > '2011-01-01 00:00:00'
group by year(p.CreationDate), month(p.CreationDate),day(p.CreationDate)
​with rollup
order by year(p.CreationDate), month(p.CreationDate) desc,day(p.CreationDate) desc​
This is the output:
The sum of all questions asked on each day in 2011 is being displayed in the QuestionsAskedToday column itself.
Is there a way to display the rollup in a new column with an alias?
Link to the query
To show this as a column rather than a row you can use SUM(COUNT(*)) OVER () instead of ROLLUP. (Online Demo)
SELECT YEAR(p.CreationDate) AS [Year],
MONTH(p.CreationDate) AS [Month],
DAY(p.CreationDate) AS [Day],
COUNT(*) AS [QuestionsAskedToday],
SUM(COUNT(*)) OVER () AS [Total]
FROM Posts p
INNER JOIN PostTags pt
ON p.id = pt.postid
INNER JOIN Tags t
ON t.id = pt.tagid
WHERE t.tagname = 'android'
AND p.CreationDate > '2011-01-01 00:00:00'
GROUP BY YEAR(p.CreationDate),
MONTH(p.CreationDate),
DAY(p.CreationDate)
ORDER BY YEAR(p.CreationDate),
MONTH(p.CreationDate) DESC,
DAY(p.CreationDate) DESC
You could take an approach like this: Example
SELECT
YEAR(p.CreationDate) AS 'Year'
, CASE
WHEN GROUPING(MONTH(p.CreationDate)) = 0
THEN CAST(MONTH(p.CreationDate) AS VARCHAR(2))
ELSE 'Totals:'
END AS 'Month'
, CASE
WHEN GROUPING(DAY(p.CreationDate)) = 0
THEN CAST(DAY(p.CreationDate) AS VARCHAR(2))
ELSE 'Totals:'
END AS [DAY]
, CASE
WHEN GROUPING(MONTH(p.CreationDate)) = 0
AND GROUPING(DAY(p.CreationDate)) = 0
THEN COUNT(1)
END AS 'QuestionsAskedToday'
, CASE
WHEN GROUPING(MONTH(p.CreationDate)) = 1
OR GROUPING(DAY(p.CreationDate)) = 1
THEN COUNT(1)
END AS 'Totals'
FROM Posts AS p
INNER JOIN PostTags AS pt ON p.id = pt.postid
INNER JOIN Tags AS t ON t.id = pt.tagid
WHERE t.tagname = 'android'
AND p.CreationDate >= '2011-01-01'
GROUP BY ROLLUP(YEAR(p.CreationDate)
, MONTH(p.CreationDate)
, DAY(p.CreationDate))
ORDER BY YEAR(p.CreationDate)
, MONTH(p.CreationDate) DESC
, DAY(p.CreationDate) DESC​​​​​​​
If this is what you wanted, the same technique can be applied to Years as well to total them in the new column, or their own column, if you want to query for multiple years and aggregate them.

Select last value in a month for all given IDs

I have 2 tables, one containing meter IDs, and another containing measurements for some of the meters in the first table. This is the table structure:
MeterConfig:
MeterID (int)
MeterNumber (char[16])
Type (char[25])
Readings:
MeterID (int)
Date (datetime)
Value (numeric(18,6))
I need to get the last reading (and its date) from a given period for each meter, as well as the meter number. I managed to do this in T-SQL, although I'm not particularly pleased with the way I did it using this query:
select distinct
cfg.MeterNumber,
(select top 1 r.Date from Readings as r where r.Date between #startdate and #endDate and r.MeterID = cfg.MeterID order by r.Date desc) as Date,
(select top 1 r.Value from Readings as r where r.Date between #startdate and #endDate and r.MeterID = cfg.MeterID order by r.Date desc) as Value
from
MeterConfig cfg, Readings r1
where cfg.MeterID = r1.MeterID and r1.Date between #startdate and #endDate;
How can I do this more efficiently?
WITH CTE AS (
SELECT mc.MeterID, Date, Value, ROW_NUMBER() OVER (PARTITION BY mc.MeterID ORDER BY Date DESC) as Rank
FROM MeterConfig mc
INNER JOIN Readings rd
ON mc.MeterID = rd.MeterID
WHERE rd.Date BETWEEN #startdate AND #endDate)
SELECT * FROM CTE WHERE Rank = 1
Assuming the dates in Readings are unique (ic include a timestamp), following should be equivalent to your query.
SELECT DISTINCT cfg.MeterNumber
, r1.Date
, r1.Value
FROM MeterConfig cfg
INNER JOIN Readings r1 ON cfg.MeterID = r1.MeterID
INNER JOIN (
SELECT date = MAX(r.Date)
FROM Readings r
WHERE r.Date BETWEEN #StartDate AND #EndDate
) r2 On r2.date = r1.date