Problem counting item frequency on T-SQL

Problem counting item frequency on T-SQL - tsql

I'm trying to count the frequency of numbers from 1 to 100 on different fields of a table.
Let's say I have the table "Results" with the following data:
LottoId Winner Second Third
--------- --------- --------- ---------
1 1 2 3
2 1 2 3
I'd like to be able to get the frequency per numbers. For that I'm using the following code:
--Creating numbers temp table
CREATE TABLE #Numbers(
Number int)
--Inserting the numbers into the temp table
declare #counter int
set #counter = 0
while #counter < 100
begin
set #counter = #counter + 1
INSERT INTO #Numbers(Number) VALUES(#counter)
end
--
SELECT #Numbers.Number, Count(Results.Winner) as Winner,Count(Results.Second) as Second, Count(Results.Third) as Third FROM #Numbers
LEFT JOIN Results ON
#Numbers.Number = Results.Winner OR #Numbers.Number = Results.Second OR #Numbers.Number = Results.Third
GROUP BY #Numbers.Number
The problem is that the counts are repeating the same values for each number. In this particular case I'm getting the following result:
Number Winner Second Third
--------- --------- --------- ---------
1 2 2 2
2 2 2 2
3 2 2 2
...
When I should get this:
Number Winner Second Third
--------- --------- --------- ---------
1 2 0 0
2 0 2 0
3 0 0 2
...
What am I missing?

If you are using SQL Server 2005+
With
WinnerCounts As
(
Select #Numbers.Number, Count(Results.Winner) As Results
FROM #Numbers
JOIN Results
On #Numbers.Number = Results.Winner
)
, SecondCounts As
(
Select #Numbers.Number, Count(Results.Second) As Results
FROM #Numbers
JOIN Results
On #Numbers.Number = Results.Second
)
, ThirdCounts As
(
Select #Numbers.Number, Count(Results.Third) As Results
FROM #Numbers
JOIN Results
On #Numbers.Number = Results.Third
)
Select Numbers.Number, Coalesce(WinnerCounts.Results,0) As Winner, Coalesce(SecondCounts.Result,0) As Second, Coalesce(ThirdCounts.Result,0) As Third
From #Numbers
Left Join WinnerCounts
On WinnerCounts.Results = #Numbers.Number
Left Join SecondCounts
On SecondCounts.Results = #Numbers.Number
Left Join ThirdCounts
On ThirdCounts.Results = #Numbers.Number
Another possible solution which will work in older versions of SQL Server:
Select #Numbers.Number
, SUM( Case When Winners.Winner Is Not Null Then 1 Else 0 End ) As WinnerCount
, SUM( Case When Seconds.Second Is Not Null Then 1 Else 0 End ) As SecondCount
, SUM( Case When Thirds.Third Is Not Null Then 1 Else 0 End ) As ThirdCount
From #Numbers
Left Join Results As Winners
On Winners.Winner = #Numbers.Number
Left Join Results As Seconds
On Seconds.Second = #Numbers.Number
Left Join Results As Thirds
On Thirds.Third = #Numbers.Number
Group By #Numbers.Number

You can use PIVOT and UNPIVOT.
SELECT Number, Winner, Second, Third
FROM (SELECT LottoID, Ranking, Number
FROM Lotto UNPIVOT (Number FOR Ranking
IN ([Winner], [Second], [Third])) AS unpvt) flat
PIVOT (COUNT(LottoId) FOR Ranking
IN ([Winner], [Second], [Third])) crosstab

Related

Label groups in SQL records based on consecutive matching records

Given the below dataset
CREATE TABLE #temp
( A NUMERIC,
B NUMERIC )
INSERT INTO #temp VALUES (243184, 0);
INSERT INTO #temp VALUES (240719, 0);
INSERT INTO #temp VALUES (236482, 1);
INSERT INTO #temp VALUES (230777, 0);
INSERT INTO #temp VALUES (226023, 0);
INSERT INTO #temp VALUES (222522, 0);
INSERT INTO #temp VALUES (214977, 1);
SELECT *
FROM #temp
ORDER BY A DESC
A B
--------------------------------------- ---------------------------------------
243184 0
240719 0
236482 1
230777 0
226023 0
222522 0
214977 1
How can I obtain the following output?
A B C
--------------------------------------- --------------------------------------- ----
243184 0 1
240719 0 1
236482 1 2
230777 0 3
226023 0 3
222522 0 3
214977 1 4
I want to group consecutive instances of [B] records into groups based on the value of 'A' when sorted in descending value.
So that each time [B] flips from 0 to 1 a new group starts
Any ideas?

Please try this:
SELECT s.A, s.B
,SUM(s.IsChange) OVER (ORDER BY s.A DESC ROWS UNBOUNDED PRECEDING) +1 AS [C]
FROM (
SELECT t.A,t.B
,CASE WHEN t.B <> LEAD(t.B)OVER(ORDER BY t.A) THEN 1 ELSE 0 END AS [IsChange]
FROM #temp t
) s
ORDER BY s.A DESC
;

TSQL: Inserting missing records into table

I am stuck at this T-SQL query.
I have table below
Age SectioName Cost
---------------------
1 Section1 100
2 Section1 200
1 Section2 500
3 Section2 100
4 Section2 200
Lets say for each section I can have maximum 5 Age. In above table there are some missing Ages. How do I insert missing Ages for each section. (Possibly without using cursor). The cost would be zero for missing Ages
So after the insertion the table should look like
Age SectioName Cost
---------------------
1 Section1 100
2 Section1 200
3 Section1 0
4 Section1 0
5 Section1 0
1 Section2 500
2 Section2 0
3 Section2 100
4 Section2 200
5 Section2 0
EDIT1
I should have been more clear with my question. The maximum age is dynamic value. It could be 5,6,10 or someother value but it will be always less than 25.

I think I got it
;WITH tally AS
(
SELECT 1 AS r
UNION ALL
SELECT r + 1 AS r
FROM tally
WHERE r < 5 -- this value could be dynamic now
)
select n.r, t.SectionName, 0 as Cost
from (select distinct SectionName from TempFormsSectionValues) t
cross join
(select ta.r FROM tally ta) n
where not exists
(select * from TempFormsSectionValues where YearsAgo = n.r and SectionName = t.SectionName)
order by t.SectionName, n.r

You can use this query to select missing value:
select n.num, t.SectioName, 0 as Cost
from (select distinct SectioName from table1) t
cross join
(select 1 as num union select 2 union select 3 union select 4 union select 5) n
where not exists
(select * from table1 where table1.age = n.num and table1.SectioName = t.SectioName)
It creates a Cartesian product of sections and numbers 1 to 5 and then selects those that doesn't exist yet. You can then use this query for the source of insert into your table.
SQL Fiddle (it has order by added to check the results easier but it's not necessary for inserting).

Use below query to generate missing rows
SELECT t1.Age,t1.Section,ISNULL(t2.Cost,0) as Cost
FROM
(
SELECT 1 as Age,'Section1' as Section,0 as Cost
UNION
SELECT 2,'Section1',0
UNION
SELECT 3,'Section1',0
UNION
SELECT 4,'Section1',0
UNION
SELECT 5,'Section1',0
UNION
SELECT 1,'Section2',0
UNION
SELECT 2,'Section2',0
UNION
SELECT 3,'Section2',0
UNION
SELECT 4,'Section2',0
UNION
SELECT 5,'Section2',0
) as t1
LEFT JOIN test t2
ON t1.Age=t2.Age AND t1.Section=t2.Section
ORDER BY Section,Age
SQL Fiddle
You can utilize above result set for inserting missing rows by using EXCEPT operator to exclude already existing rows in table -
INSERT INTO test
SELECT t1.Age,t1.Section,ISNULL(t2.Cost,0) as Cost
FROM
(
SELECT 1 as Age,'Section1' as Section,0 as Cost
UNION
SELECT 2,'Section1',0
UNION
SELECT 3,'Section1',0
UNION
SELECT 4,'Section1',0
UNION
SELECT 5,'Section1',0
UNION
SELECT 1,'Section2',0
UNION
SELECT 2,'Section2',0
UNION
SELECT 3,'Section2',0
UNION
SELECT 4,'Section2',0
UNION
SELECT 5,'Section2',0
) as t1
LEFT JOIN test t2
ON t1.Age=t2.Age AND t1.Section=t2.Section
EXCEPT
SELECT Age,Section,Cost
FROM test
SELECT * FROM test
ORDER BY Section,Age
http://www.sqlfiddle.com/#!3/d9035/11

Check for equal amounts of negative numbers as positive numbers

I have a table with two columns: intGroupID, decAmount
I want to have a query that can basically return the intGroupID as a result if for every positive(+) decAmount, there is an equal and opposite negative(-) decAmount.
So a table of (id=1,amount=1.0),(1,2.0),(1,-1.0),(1,-2.0) would return back the intGroupID of 1, because for each positive number there exists a negative number to match.
What I know so far is that there must be an equal number of decAmounts (so I enforce a count(*) % 2 = 0) and the sum of all rows must = 0.0. However, some cases that get by that logic are:
ID | Amount
1 | 1.0
1 | -1.0
1 | 2.0
1 | -2.0
1 | 3.0
1 | 2.0
1 | -4.0
1 | -1.0
This has a sum of 0.0 and has an even number of rows, but there is not a 1-for-1 relationship of positives to negatives. I need a query that can basically tell me if there is a negative amount for each positive amount, without reusing any of the rows.
I tried counting the distinct absolute values of the numbers and enforcing that it is less than the count of all rows, but it's not catching everything.
The code I have so far:
DECLARE #tblTest TABLE(
intGroupID INT
,decAmount DECIMAL(19,2)
);
INSERT INTO #tblTest (intGroupID ,decAmount)
VALUES (1,-1.0),(1,1.0),(1,2.0),(1,-2.0),(1,3.0),(1,2.0),(1,-4.0),(1,-1.0);
DECLARE #intABSCount INT = 0
,#intFullCount INT = 0;
SELECT #intFullCount = COUNT(*) FROM #tblTest;
SELECT #intABSCount = COUNT(*) FROM (
SELECT DISTINCT ABS(decAmount) AS absCount FROM #tblTest GROUP BY ABS(decAmount)
) AS absCount
SELECT t1.intGroupID
FROM #tblTest AS t1
/* Make Sure Even Number Of Rows */
INNER JOIN
(SELECT COUNT(*) AS intCount FROM #tblTest
)
AS t2 ON t2.intCount % 2 = 0
/* Make Sure Sum = 0.0 */
INNER JOIN
(SELECT SUM(decAmount) AS decSum FROM #tblTest)
AS t3 ON decSum = 0.0
/* Make Sure Count of Absolute Values < Count of Values */
WHERE
#intABSCount < #intFullCount
GROUP BY t1.intGroupID
I think there is probably a better way to check this table, possibly by finding pairs and removing them from the table and seeing if there's anything left in the table once there are no more positive/negative matches, but I'd rather not have to use recursion/cursors.

Create TABLE #tblTest (
intA INT
,decA DECIMAL(19,2)
);
INSERT INTO #tblTest (intA,decA)
VALUES (1,-1.0),(1,1.0),(1,2.0),(1,-2.0),(1,3.0),(1,2.0),(1,-4.0),(1,-1.0), (5,-5.0),(5,5.0) ;
SELECT * FROM #tblTest;
SELECT
intA
, MIN(Result) as IsBalanced
FROM
(
SELECT intA, X,Result =
CASE
WHEN count(*)%2 = 0 THEN 1
ELSE 0
END
FROM
(
---- Start thinking here --- inside-out
SELECT
intA
, x =
CASE
WHEN decA < 0 THEN
-1 * decA
ELSE
decA
END
FROM #tblTest
) t1
Group by intA, X
)t2
GROUP BY intA

Not tested but I think you can get the idea
This returns the id that do not conform
The not is easier to test / debug
select pos.*, neg.*
from
( select id, amount, count(*) as ccount
from tbl
where amount > 0
group by id, amount ) pos
full outer join
( select id, amount, count(*) as ccount
from tbl
where amount < 0
group by id, amount ) neg
on pos.id = neg.id
and pos.amount = -neg.amount
and pos.ccount = neg.ccount
where pos.id is null
or neg.id is null
I think this will return a list of id that do conform
select distinct(id) from tbl
except
select distinct(isnull(pos.id, neg.id))
from
( select id, amount, count(*) as ccount
from tbl
where amount > 0
group by id, amount ) pos
full outer join
( select id, amount, count(*) as ccount
from tbl
where amount < 0
group by id, amount ) neg
on pos.id = neg.id
and pos.amount = -neg.amount
and pos.ccount = neg.ccount
where pos.id is null
or neg.id is null

Boy, I found a simpler way to do this than my previous answers. I hope all my crazy edits are saved for posterity.
This works by grouping all numbers for an id by their absolute value (1, -1 grouped by 1).
The sum of the group determines if there are an equal number of pairs. If it is 0 then it is equal, any other value for the sum means there is an imbalance.
The detection of evenness by the COUNT aggregate is only necessary to detect an even number of zeros. I assumed that 0's could exist and they should occur an even number of times. Remove it if this isn't a concern, as 0 will always pass the first test.
I rewrote the query a bunch of different ways to get the best execution plan. The final result below only has one big heap sort which was unavoidable given the lack of an index.
Query
WITH tt AS (
SELECT intGroupID,
CASE WHEN SUM(decAmount) > 0 OR COUNT(*) % 2 = 1 THEN 1 ELSE 0 END unequal
FROM #tblTest
GROUP BY intGroupID, ABS(decAmount)
)
SELECT tt.intGroupID,
CASE WHEN SUM(unequal) != 0 THEN 'not equal' ELSE 'equals' END [pair]
FROM tt
GROUP BY intGroupID;
Tested Values
(1,-1.0),(1,1.0),(1,2),(1,-2), -- should work
(2,-1.0),(2,1.0),(2,2),(2,2), -- fail, two positive twos
(3,1.0),(3,1.0),(3,-1.0), -- fail two 1's , one -1
(4,1),(4,2),(4,-.5),(4,-2.5), -- fail: adds up the same sum, but different values
(5,1),(5,-1),(5,0),(5,0), -- work, test zeros
(6,1),(6,-1),(6,0), -- fail, test zeros
(7,1),(7,-1),(7,-1),(7,1),(7,1) -- fail, 3 x 1
Results
A pairs
_ _____
1 equal
2 not equal
3 not equal
4 not equal
5 equal
6 not equal
7 not equal

The following should return "disbalanced" groups:
;with pos as (
select intGroupID, ABS(decAmount) m
from TableName
where decAmount > 0
), neg as (
select intGroupID, ABS(decAmount) m
from TableName
where decAmount < 0
)
select distinct IsNull(p.intGroupID, n.intGroupID) as intGroupID
from pos p
full join neg n on n.id = p.id and abs(n.m - p.m) < 1e-8
where p.m is NULL or n.m is NULL
to get unpaired elements, select satement can be changed to following:
select IsNull(p.intGroupID, n.intGroupID) as intGroupID, IsNull(p.m, -n.m) as decAmount
from pos p
full join neg n on n.id = p.id and abs(n.m - p.m) < 1e-8
where p.m is NULL or n.m is NULL

Does this help?
-- Expected result - group 1 and 3
declare #matches table (groupid int, value decimal(5,2))
insert into #matches select 1, 1.0
insert into #matches select 1, -1.0
insert into #matches select 2, 2.0
insert into #matches select 2, -2.0
insert into #matches select 2, -2.0
insert into #matches select 3, 3.0
insert into #matches select 3, 3.5
insert into #matches select 3, -3.0
insert into #matches select 3, -3.5
insert into #matches select 4, 4.0
insert into #matches select 4, 4.0
insert into #matches select 4, -4.0
-- Get groups where we have matching positive/negatives, with the same number of each
select mat.groupid, min(case when pos.PositiveCount = neg.NegativeCount then 1 else 0 end) as 'Match'
from #matches mat
LEFT JOIN (select groupid, SUM(1) as 'PositiveCount', Value
from #matches where value > 0 group by groupid, value) pos
on pos.groupid = mat.groupid and pos.value = ABS(mat.value)
LEFT JOIN (select groupid, SUM(1) as 'NegativeCount', Value
from #matches where value < 0 group by groupid, value) neg
on neg.groupid = mat.groupid and neg.value = case when mat.value < 0 then mat.value else mat.value * -1 end
group by mat.groupid
-- If at least one pair within a group don't match, reject
having min(case when pos.PositiveCount = neg.NegativeCount then 1 else 0 end) = 1

You can compare your values this way:
declare #t table(id int, amount decimal(4,1))
insert #t values(1,1.0),(1,-1.0),(1,2.0),(1,-2.0),(1,3.0),(1,2.0),(1,-4.0),(1,-1.0),(2,-1.0),(2,1.0)
;with a as
(
select count(*) cnt, id, amount
from #t
group by id, amount
)
select id from #t
except
select b.id from a
full join a b
on a.cnt = b.cnt and a.amount = -b.amount
where a.id is null
For some reason i can't write comments, however Daniels comment is not correct, and my solution does accept (6,1),(6,-1),(6,0) which can be correct. 0 is not specified in the question and since it is a 0 value it can be handled eather way. My answer does NOT accept (3,1.0),(3,1.0),(3,-1.0)
To Blam: No I am not missing
or b.id is null
My solution is like yours, but not exactly identical

TSQL - COUNT number of rows in a different state than current row

It's kind of hard to explain, but from this example it should be clear.
Table TABLE:
Name State Time
--------------------
A 1 1/4/2012
B 0 1/3/2012
C 0 1/2/2012
D 1 1/1/2012
Would like to
select * from TABLE where state=1 order by Time desc
plus an additional column 'Skipped' containing the number of rows after one where state=1 in state 0, in other words the output should look like this:
Name State Time Skipped
A 1 1/4/2012 2 -- 2 rows after A where State != 1
D 1 1/1/2012 0 -- 0 rows after D where State != 1
0 should also be reported in case of 2 consecutive rows are in state = 1, i.e. there is nothing between these rows in a state other than 1.
It seems like CTE are must here, but can't figure out how to count rows where state != 1.
Any help will be appreciated.
(MS Sql Server 2008)

I've used a CTE to establish RowNo, so that you're not dependent on consecutive dates:
WITH CTE_Rows as
(
select name,state,time,
rowno = ROW_NUMBER() over (order by [time])
from MyTable
)
select name,state,time,
gap = isnull(r.rowno - x.rowno - 1,0)
from
CTE_Rows r
outer apply (
select top 1 rowno
from CTE_Rows sub
where sub.rowno < r.rowno and sub.state = 1
order by sub.rowno desc) x
where r.state = 1
If you just want to do it by date, then its simpler - just need an outer apply:
select name,state,r.time,
gap = convert(int,isnull(r.time - x.time - 1,0))
from
MyTable r
outer apply (
select top 1 time
from MyTable sub
where sub.time < r.time and sub.state = 1
order by sub.time desc) x
where r.state = 1
FYI the test data is used was created as follows:
create table MyTable
(Name char(1), [state] tinyint, [Time] datetime)
insert MyTable
values
('E',1,'2012-01-05'),
('A',1,'2012-01-04'),
('B',0,'2012-01-03'),
('C',0,'2012-01-02'),
('D',1,'2012-01-01')

Okay, here you go (it gets a little messy):
SELECT U.CurrentTime,
(SELECT COUNT(*)
FROM StateTable AS T3
WHERE T3.State=0
AND T3.Time BETWEEN U.LastTime AND U.CurrentTime) AS Skipped
FROM (SELECT T1.Time AS CurrentTime,
(SELECT TOP 1 T2.Time
FROM StateTable AS T2
WHERE T2.Time < T1.Time AND T2.State=1
ORDER BY T2.Time DESC) AS LastTime
FROM StateTable AS T1 WHERE T1.State = 1) AS U

Summing From Consecutive Rows

Assume we have a table and we want to do a sum of the Expend column so that the summation only adds up values of the same Week_Name.
SN Week_Name Exp Sum
-- --------- --- ---
1 Week 1 10 0
2 Week 1 20 0
3 Week 1 30 60
4 Week 2 40 0
5 Week 2 50 90
6 Week 3 10 0
I will assume we will need to `Order By' Week_Name, then compare the previous Week_Name(previous row) with the current row Week_name(Current row).
If both are the same, put zero in the SUM column.
If not the same, add all expenditure, where Week_Name = Week_Name(Previous row) and place in the Sum column. The final output should look like the table above.
Any help on how to achieve this in T-SQL is highly appreciated.

Okay, I was eventually able to resolve this issue, praise Jesus! If you want the exact table I gave above, you can use GilM's response below, it is perfect. If you want your table to have running Cumulatives, i.e. Rows 3 shoud have 60, Row 5, should have 150, Row 6 160 etc. Then, you can use my code below:
USE CAPdb
IF OBJECT_ID ('dbo.[tablebp]') IS NOT NULL
DROP TABLE [tablebp]
GO
CREATE TABLE [tablebp] (
tablebpcCol1 int PRIMARY KEY
,tabledatekey datetime
,tableweekname varchar(50)
,expenditure1 numeric
,expenditure_Cummulative numeric
)
INSERT INTO [tablebp](tablebpcCol1,tabledatekey,tableweekname,expenditure1,expenditure_Cummulative)
SELECT b.s_tablekey,d.PK_Date,d.Week_Name,
SUM(b.s_expenditure1) AS s_expenditure1,
SUM(b.s_expenditure1) + COALESCE((SELECT SUM(s_expenditure1)
FROM source_table bs JOIN dbo.Time dd ON bs.[DATE Key] = dd.[PK_Date]
WHERE dd.PK_Date < d.PK_Date),0)
FROM source_table b
INNER JOIN dbo.Time d ON b.[Date key] = d.PK_Date
GROUP BY d.[PK_Date],d.Week_Name,b.s_tablekey,b.s_expenditure1
ORDER BY d.[PK_Date]
;WITH CTE AS (
SELECT tableweekname
,Max(expenditure_Cummulative) AS Week_expenditure_Cummulative
,MAX(tablebpcCol1) AS MaxSN
FROM [tablebp]
GROUP BY tableweekname
)
SELECT [tablebp].*
,CASE WHEN [tablebp].tablebpcCol1 = CTE.MaxSN THEN Week_expenditure_Cummulative
ELSE 0 END AS [RunWeeklySum]
FROM [tablebp]
JOIN CTE on CTE.tableweekname = [tablebp].tableweekname

I'm not sure why your SN=6 line is 0 rather than 10. Do you really not want the sum for the last Week? If having the last week total is okay, then you might want something like:
;WITH CTE AS (
SELECT Week_Name,SUM([Expend.]) as SumExpend
,MAX(SN) AS MaxSN
FROM T
GROUP BY Week_Name
)
SELECT T.*,CASE WHEN T.SN = CTE.MaxSN THEN SumExpend
ELSE 0 END AS [Sum]
FROM T
JOIN CTE on CTE.Week_Name = T.Week_Name
Based on the requst in the comment wanting a running total in SUM you could try this:
;WITH CTE AS (
SELECT Week_Name, MAX(SN) AS MaxSN
FROM T
GROUP BY Week_Name
)
SELECT T.SN, T.Week_Name,T.Exp,
CASE WHEN T.SN = CTE.MaxSN THEN
(SELECT SUM(EXP) FROM T T2
WHERE T2.SN <= T.SN) ELSE 0 END AS [SUM]
FROM T
JOIN CTE ON CTE.Week_Name = T.Week_Name
ORDER BY SN

We Keep Coding

iphone swift flutter scala powershell matlab mongodb postgresql perl eclipse

Problem counting item frequency on T-SQL - tsql

You can use PIVOT and UNPIVOT. SELECT Number, Winner, Second, Third FROM (SELECT LottoID, Ranking, Number FROM Lotto UNPIVOT (Number FOR Ranking IN ([Winner], [Second], [Third])) AS unpvt) flat PIVOT (COUNT(LottoId) FOR Ranking IN ([Winner], [Second], [Third])) crosstab

Related

Label groups in SQL records based on consecutive matching records

TSQL: Inserting missing records into table

Check for equal amounts of negative numbers as positive numbers

TSQL - COUNT number of rows in a different state than current row

Summing From Consecutive Rows

Categories

Resources