tsql distinct count subquery - tsql

I am using SSMS 2008 and I need to use a subquery to return the count of unique records / client. How do I do this? Currently I am returning all unique records over the whole dataset and not per client. Here is my pseudocode currently:
SELECT A.Program, A.PEOPLE_ID, K.EVENT_NAME, A.Program2, A.Program3
(SELECT COUNT(DISTINCT K.EVENT_NAME)
FROM #TEMP1 A, evolv_cs.dbo.facility_view F, evolv_cs.dbo.people_x N, event_view K WITH (NOLOCK)
WHERE F.group_profile_id = A.group_profile_id AND
K.event_definition_id = a.event_definition_id AND
A.people_id = N.people_id
GROUP BY K.EVENT_NAME) as DistinctEvent
FROM #TEMP1 A
JOIN event_view K WITH (NOLOCK) on K.event_definition_id = A.event_definition_id
WHERE #START_DATE BETWEEN A.Enrolled_Date AND DATEADD(D, 14, A.Enrolled_Date)
AND (#SERVICE IS NULL OR #SERVICE = K.event_name)
GROUP BY
A.Program, A.PEOPLE_ID, K.EVENT_NAME, A.Program2, A.Program3
OK, I edited the above query now. I still want event_name per client.

What you want to use here is the GROUP BY clause for your distinct counts, instead of the joined subqueries. The following two queries should give you what you want.
SELECT A.PEOPLE_ID, COUNT(DISTINCT K.EVENT_NAME)
FROM #TEMP1 A, event_view K WITH (NOLOCK)
WHERE K.event_definition_id = a.event_definition_id
AND #START_DATE BETWEEN A.Enrolled_Date AND DATEADD(D, 14, A.Enrolled_Date)
AND (#SERVICE IS NULL OR #SERVICE = K.event_name)
GROUP BY A.PEOPLE_ID
SELECT K.EVENT_NAME, COUNT(DISTINCT A.PEOPLE_ID)
FROM #TEMP1 A, event_view K WITH (NOLOCK)
WHERE K.event_definition_id = a.event_definition_id
AND #START_DATE BETWEEN A.Enrolled_Date AND DATEADD(D, 14, A.Enrolled_Date)
AND (#SERVICE IS NULL OR #SERVICE = K.event_name)
GROUP BY K.EVENT_NAME
If you need to combine the results of the queries into a single result set, you should be able to do so with a UNION.

Related

Convert query from MySQL to PostgreSQL: unlucky employees

I am trying to get this query for a database question( Unlucky wmployees) to Postgres. I am having a hard time getting it right
CREATE PROCEDURE unluckyEmployees()
BEGIN
SET #rn =0;
SELECT dep_name, emp_number, total_salary FROM
(SELECT dep_name, emp_number, total_salary, (#rn := #rn + 1) as seqnum FROM
(SELECT name AS dep_name, IF(e.id IS NULL, 0, COUNT(*)) AS emp_number, IFNULL(SUM(salary), 0) AS total_salary
FROM Department d LEFT JOIN Employee e ON e.Department = d.id
GROUP BY d.id HAVING COUNT(*) < 6 ORDER BY SUM(salary) DESC, COUNT(*) DESC, d.id) t )tt WHERE mod(seqnum, 2) = 1;
END
I am hoping to get the query in postgres. Tried setting #rn to row_number() but still not working.

SQL Server: How to get previous data

I know that LEAD & LAG functions are there to achieve the task to get previous & next data row. my question is how to achieve the same goal in old sql server where LEAD & LAG not present. please suggest some good approach to retrieve Previous & Next data row with LEAD & LAG function. Thanks
Here i am sharing one example which is working fine.
Create table #test
(
ID int identity(1,1),
Quarter nvarchar(20)
)
insert into #test values
('1Q 2010'),
('2Q 2010'),
('3Q 2010'),
('4Q 2010'),
('FY 2010')
select * from #test
select PrevID,PrevQuarter,CurrID,CurrQuarter
from
(
select Lag(ID,1) over(order by ID)PrevID ,LAG(Quarter,1)over(order by ID)PrevQuarter,
ID as CurrID,Quarter as CurrQuarter
from #test
)t
where CurrID=4
You could use correlated TOP subqueries in lieu of LEAD and LAG:
SELECT PrevID, PrevQuarter, CurrID, CurrQuarter
FROM
(
SELECT
(SELECT TOP 1 ID FROM #test t2
WHERE t2.ID < t1.ID ORDER BY t2.ID DESC) PrevID,
(SELECT TOP 1 Quarter FROM #test t2
WHERE t2.ID < t1.ID ORDER BY t2.ID DESC) PrevQuarter,
ID AS CurrID, Quarter AS CurrQuarter
FROM #test t1
) t
WHERE CurrID = 4;
Here is a working demo.
there are many ways to do it. Here are 2
Self Join:
If ID is not continuous, use row_number() to generate one
select p.ID as PrevID, p.Quarter as PrevQuarter,
c.ID as CurrID, c.Quarter as CurrQuarter
from #test c
left join #test p on c.ID = p.ID + 1
where c.ID = 4
using APPLY():
select p.ID as PrevID, p.Quarter as PrevQuarter,
c.ID as CurrID, c.Quarter as CurrQuarter
from #test c
outer apply
(
select top 1 ID, Quarter
from #test x
where x.ID < c.ID
order by x.ID desc
) p
where c.ID = 4

Faster left join with last non-empty

Table1:
Shop
Manager
Date
Table2:
Shop
Date
Sales
I need to get Table2 with Manager field from Table1. I did the following trick:
select
t1.[Shop]
,t1.[Date]
,t1.[Sum]
,t2.[Manager]
from t1
left join t2
on t1.[Shop] = t2.[Shop]
and t2.[Date] = (select max(t2.[Date]) from t2
where t2.[Shop] = t1.[Shop]
and t2.[Date] < t1.[Date])
It works, but subquerying is very slow, so I wonder if there is more elegant and fast way to do so?
Some sample data to play around: http://pastebin.com/uLN6x5JE
may seem like a round about way but join on a single condition is typically faster
select t12.[Shop], t12.[Date], t12.[Sum]
, t12.[Manager]
from
( select t1.[Shop], t1.[Date], t1.[Sum]
, t2.[Manager]
, row_number() over (partition by t2.[Shop] order by t2.[Date] desc) as rn
from t1
join t2
on t2.[Shop] = t1.[Shop]
and t1.[Date] < t1.[Date]
) as t12
where t12.rn = 1
union
select t1.[Shop], t1.[Date], t1.[Sum]
, null as [Manager]
from t1
left join t2
on t2.[Shop] = t1.[Shop]
and t1.[Date] < t1.[Date]
group by t1.[Shop], t1.[Date], t1.[Sum]
having count(*) = 1
You may get much better performance by adding a covering index on t2 if you don't already have one:
create index T2ShopDate on t2 ([Shop], [Date]) include ([Manager])
Here is a version that uses a CTE to find all maximum manager dates first and then join back to t2 to get the manager:
;with MaxDates ([Shop], [Date], [Sum], [MaxMgrDate]) as
(
select
t1.[Shop]
,t1.[Date]
,t1.[Sum]
,max(t2.[Date])
from t1
left join t2
on t2.[Shop] = t1.[Shop]
and t2.[Date] < t1.[Date]
group by
t1.[Shop]
,t1.[Date]
,t1.[Sum]
)
select
MaxDates.[Shop]
,MaxDates.[Date]
,MaxDates.[Sum]
,t2.[Manager]
from MaxDates
inner join t2
on t2.[Date] = MaxDates.[MaxMgrDate]
You might be able to remove the second join back to t2 by using row_number():
;with MaxDates ([Shop], [Date], [Sum], [Manager], [RowNum]) as
(
select
t1.[Shop]
,t1.[Date]
,t1.[Sum]
,t2.[Manager]
,row_number() over (partition by (t1.[Shop]) order by t2.[Date] desc)
from t1
left join t2
on t2.[Shop] = t1.[Shop]
and t2.[Date] < t1.[Date]
)
select *
from MaxDates
where RowNum = 1

SQL 2008 cumulative count

I have a query returning the number of rows grouped by date :
SELECT convert(date, run.TimeStamp) as TimeStamp, count(*)
FROM ScriptResult AS res INNER JOIN
ScriptRun AS run ON run.ScriptRunID = res.ScriptRunID INNER JOIN
WorkListItems AS wli ON wli.WorkListItemID = res.WorklistItemID INNER JOIN
WorkList AS wl ON wl.WorkListID = wli.WorkListID
WHERE (wli.WorkListID = #WLID)
GROUP by convert(date, run.TimeStamp)
ORDER BY convert(date, run.TimeStamp);
This produces a result set like this :
TimeStamp (ItemCount)
2015-03-10 5364
2015-03-11 22027
2015-03-12 18037
Now what I want, is to cumulatively summarize the itemcount, like this :
TimeStamp ItemCount TotalCount
2015-03-10 5364 5364
2015 -03-11 22027 27391
2015-03-12 18037 45428
The query needs to be compatible with 2008R2.
I have played with [count ...over..partition by] in several variations but the problem is that the window function boundary should chage. And I cannot use ROWS or RANGE.
Any ideas please ?
Thanks in advance.
Try with correlated subquery:
;WITH cte as(
SELECT convert(date, run.TimeStamp) as TimeStamp, count(*) AS S
FROM ScriptResult AS res INNER JOIN
ScriptRun AS run ON run.ScriptRunID = res.ScriptRunID INNER JOIN
WorkListItems AS wli ON wli.WorkListItemID = res.WorklistItemID INNER JOIN
WorkList AS wl ON wl.WorkListID = wli.WorkListID
WHERE (wli.WorkListID = #WLID)
GROUP by convert(date, run.TimeStamp)
)
SELECT TimeStamp,
S,
(SELECT SUM(S) FROM cte t2 WHERE t2.TimeStamp <= t1.TimeStamp) AS TS
FROM cte t1
You could try creating a temp table to hold the first query results that you can further aggregate to return the cumulative sum on the ItemCount field:
CREATE TABLE #TempTable(
[SeqNo] [int] NULL,
[TimeStamp] [Date] NULL,
[ItemCount] [int] NULL
) ON [PRIMARY]
SELECT
ROW_NUMBER() OVER (PARTITION BY res.ScriptRunID ORDER BY run.TimeStamp) AS SeqNo,
CONVERT(Date, run.TimeStamp) AS TimeStamp,
COUNT(*) AS ItemCount
INTO #TempTable
FROM ScriptResult AS res
INNER JOIN ScriptRun AS run
ON run.ScriptRunID = res.ScriptRunID
INNER JOIN WorkListItems AS wli
ON wli.WorkListItemID = res.WorklistItemID
INNER JOIN WorkList AS wl
ON wl.WorkListID = wli.WorkListID
WHERE (wli.WorkListID = #WLID)
GROUP BY CONVERT(Date, run.TimeStamp)
ORDER BY CONVERT(Date, run.TimeStamp);
SELECT
t1.TimeStamp,
t1.ItemCount,
SUM(t2.ItemCount) AS TotalCount
FROM #TempTable AS t1
INNER JOIN #TempTable AS t2
on t1.SeqNo >= t2.SeqNo
GROUP BY t1.TimeStamp, t1.ItemCount
ORDER BY t1.TimeStamp
SQL Fiddle Example
Note: This links to a Microsoft SQL Server 2014 database version SQL fiddle which should work with SQL Server 2008 as well.

tsql distinct count subquery2

I am using SSMS 2008 and I need to use a subquery to return the count of unique records / client. How do I do this? Currently I am getting the error:
Msg 512, Level 16, State 1, Line 58
Subquery returned more than 1 value. This is not permitted when the subquery follows =, !=, <, <= , >, >= or
Here is my pseudocode currently:
SELECT A.Program, A.PEOPLE_ID, K.EVENT_NAME, A.Program2, A.Program3
(SELECT COUNT(DISTINCT K.EVENT_NAME)
FROM #TEMP1 A, evolv_cs.dbo.facility_view F, evolv_cs.dbo.people_x N, event_view K WITH (NOLOCK)
WHERE F.group_profile_id = A.group_profile_id AND
K.event_definition_id = a.event_definition_id AND
A.people_id = N.people_id
GROUP BY K.EVENT_NAME) as DistinctEvent
FROM #TEMP1 A
JOIN event_view K WITH (NOLOCK) on K.event_definition_id = A.event_definition_id
WHERE #START_DATE BETWEEN A.Enrolled_Date AND DATEADD(D, 14, A.Enrolled_Date)
AND (#SERVICE IS NULL OR #SERVICE = K.event_name)
GROUP BY
A.Program, A.PEOPLE_ID, K.EVENT_NAME, A.Program2, A.Program3
This should work and run more efficiently.
SELECT A.Program, A.PEOPLE_ID, sub.EVENT_NAME, A.Program2, A.Program3, sub.DistinctEvent
FROM (
SELECT K.EVENT_NAME, COUNT(DISTINCT K.EVENT_NAME) as DistinctEvent
FROM #TEMP1 as A
JOIN evolv_cs.dbo.facility_view as F ON F.group_profile_id = A.group_profile_id
JOIN evolv_cs.dbo.people_x as N ON A.people_id = N.people_id
JOIN event_view as K WITH (NOLOCK) ON K.event_definition_id = a.event_definition_id
WHERE #START_DATE BETWEEN A.Enrolled_Date AND DATEADD(D, 14, A.Enrolled_Date)
AND (#SERVICE IS NULL OR #SERVICE = K.event_name)
GROUP BY K.EVENT_NAME
) as sub
JOIN #TEMP1 as A ON A.EVENT_NAME = sub.EVENT_NAME