Duplicates removing [duplicate] - tsql

This question already has answers here:
Closed 11 years ago.
Possible Duplicate:
Delete duplicate records from a SQL table without a primary key
I have data:
SELECT
a
, b
FROM
(
select a = 1, b = 30
union all
select a = 2, b = 50
union all
select a = 3, b = 50
union all
select a = 4, b = 50
union all
select a = 5, b = 60
) t
I have to get output (next (order by a) dublicate records should be excluded from result set):
a b
----------- -----------
1 30
2 50
3 50 -- should be excluded
4 50 -- should be excluded
5 60

SELECT
min(a) as a
, b
FROM
(
select a = 1, b = 30
union all
select a = 2, b = 50
union all
select a = 3, b = 50
union all
select a = 4, b = 50
union all
select a = 5, b = 60
) t
GROUP BY b
ORDER BY a

In oracle I was able to do this using a group by clause, you should be able to do similar.
select min(a), b
from (select 1 a, 30 b
from dual
union all
select 2 a, 50 b
from dual
union all
select 3 a, 50 b
from dual
union all
select 4 a, 50 b
from dual
union all
select 5 a, 60 b from dual)
group by b;
edit: looks like someone else came up with a MS sql solution, I'll leave this here for posterity though.

The easiest way to do this is with a simple GROUP BY:
SELECT
a
, b
INTO #tmp
FROM
(
select a = 1, b = 30
union all
select a = 2, b = 50
union all
select a = 3, b = 50
union all
select a = 4, b = 50
union all
select a = 5, b = 60
) t
SELECT DISTINCT MIN(a) AS a,b
FROM #tmp
GROUP BY b
ORDER BY a

Related

Spark use self reference in calculation for column

I have a data frame like this one given below. Essentially it is a time series derived data frame.
My issue is that the Formula for n-th Row Col C is :-
Col(C) = (Col A(nth row) - Col A(n-1 th row)) + Col C(n-1)th row.
Hence Calculation of Col C is self referencing a previous value of Col C. I am using spark sql, can some one please advise how to proceed with this? For the calculation of Col A I am using LAG function
It seems colC is just colA minus colA in the first row.
e.g.
1 = 6-5,
0 = 5-5,
2 = 7-5,
3 = 8-5,
-2 = 3-5
So this query should work:
SELECT colA, colA - FIRST(colA) OVER (ORDER BY id) AS colC
Your formula is a cumulative sum. Here is a complete example:
SELECT rowid, a, SUM(c0) OVER(ORDER BY rowid) as c
FROM
(
SELECT rowid, a, a - LAG(a, 1) OVER(ORDER BY rowid) as c0
FROM
(
SELECT 1 as rowid, 5 as a union all
SELECT 2 as rowid, 6 as a union all
SELECT 3 as rowid, 5 as a union all
SELECT 4 as rowid, 7 as a union all
SELECT 5 as rowid, 8 as a union all
SELECT 6 as rowid, 3 as a
)t
)t

Capture First Character of Last Group of 1s in a Binary Series Part II: Multiple IDs

I have data something like this:
ID 1 1 1 1 1 1 1 1 1 1 1 1
Month J F M A M J J A S O N D
Status 1 0 0 1 0 1 0 0 1 1 1 1
ID 2 2 2 2 2 2 2 2 2 2 2 2
Month J F M A M J J A S O N D
Status 1 0 1 0 1 0 1 0 1 0 1 1
ID 3 3 3 3 3 3 3 3 3 3 3 3
Month J F M A M J J A S O N D
Status 0 0 0 0 0 0 0 0 0 0 0 1
Using t-SQL, I am trying to capture the month corresponding to the first STATUS = 1 in the last group of 1s for each ID, i.e., September, November and December in this example.
Here is the code I'm using:
IF OBJECT_ID('tempdb..#Temp1') IS NOT NULL DROP TABLE #Temp1
;WITH PARTITIONED1 AS
(SELECT t0.ID
, t0.Year_Month
, LAST_VALUE(t0.Year_Month) OVER (PARTITION BY t0.Account_Number ORDER BY t0.Year_Month) AS STATUS
, ROW_NUMBER() OVER (PARTITION BY t0.Account_Number ORDER BY t0.Year_Month) AS rn1
FROM #Temp0 t0
)
SELECT *
INTO #Temp1
FROM PARTITIONED1 p1
ORDER BY t0.ID
, t0.Year_Month
IF OBJECT_ID('tempdb..#Temp') IS NOT NULL DROP TABLE #Temp
SELECT *
INTO #Temp
FROM #Temp1 t1
WHERE t1.rn1 = (SELECT MAX(b.rn1) + 1 FROM #Temp1 b WHERE b.STATUS = 0)
GROUP BY t1.ID
, t1.Year_Month
, t1.rn1
However, this just returns the last instance where STATUS = 1 is achieved overall as the first 1 of the last group of 1s, in this case January.
I've tried using CASE statements and grouping in various combinations (hence the intermediate step reading the data into #Temp1), but have not been able to get results for all three IDs; is anyone able to assist?
Thanks in advance!
Assuming Ju for June and Jl for July:
--Sample Data
IF OBJECT_ID('tempdb..#Temp0') IS NOT NULL DROP TABLE #Temp0
CREATE TABLE #Temp0 (ID INT, Year_Month VARCHAR(1), Status INT)
INSERT INTO #Temp0
VALUES(1,'J',1),(1,'F',0),(1,'M',0),(1,'A',1),(1,'M',0),(1,'J',1),(1,'J',0),(1,'A',0),(1,'S',1),(1,'O',1),(1,'N',1),(1,'D',1),(2,'J',1),(2,'F',0),(2,'M',1),(2,'A',0),(2,'M',1),(2,'J',0),(2,'J',1),(2,'A',0),(2,'S',1),(2,'O',0),(2,'N',1),(2,'D',1),(3,'J',0),(3,'F',0),(3,'M',0),(3,'A',0),(3,'M',0),(3,'J',0),(3,'J',0),(3,'A',0),(3,'S',0),(3,'O',0),(3,'N',0),(3,'D',1);
--Query
WITH A
AS ( SELECT *,
CASE Year_Month
WHEN 'J' THEN 1
WHEN 'F' THEN 2
WHEN 'M' THEN 3
WHEN 'A' THEN 4
WHEN 'M' THEN 5
WHEN 'Ju' THEN 6
WHEN 'Jl' THEN 7
WHEN 'A' THEN 8
WHEN 'S' THEN 9
WHEN 'O' THEN 10
WHEN 'N' THEN 11
WHEN 'D' THEN 12
END
AS MonthNumber
FROM #Temp0 ),
StartingPoints
AS ( SELECT ID,
Year_Month,
MonthNumber,
Status
FROM A
WHERE NOT EXISTS
(
SELECT 1
FROM A
AS B
WHERE B.ID=A.ID
AND B.Status=A.Status-1
) ),
MonthRanking
AS ( SELECT A.*,
ROW_NUMBER( ) OVER( PARTITION BY A.ID ORDER BY A.MonthNumber )
AS rownum
FROM A
INNER JOIN
(
SELECT ID,
MAX( MonthNumber )+1
AS StartOfLastGroup
FROM StartingPoints
GROUP BY ID
)
AS B
ON A.ID=B.ID
AND A.MonthNumber>=B.StartOfLastGroup )
SELECT *
FROM MonthRanking
WHERE rownum=1;
Results:
If Month Names are recorded in Full as in July, June then this would work as well:
WITH StartingPoints
AS (SELECT ID,
Year_Month,
MonthNUmber = MONTH('01-'+Year_Month+'-2010'),
Status
FROM #Temp0
WHERE NOT EXISTS
(
SELECT 1
FROM #Temp0 AS B
WHERE B.ID = #Temp0.ID
AND B.Status = #Temp0.Status - 1
)),
MonthRanking
AS (SELECT A.*,
ROW_NUMBER() OVER(PARTITION BY A.ID ORDER BY MONTH('01-'+A.Year_Month+'-2010')) AS rownum
FROM #Temp0 AS A
INNER JOIN
(
SELECT ID,
MAX(MonthNumber) + 1 AS StartOfLastGroup
FROM StartingPoints
GROUP BY ID
) AS B ON A.ID = B.ID
AND MONTH('01-'+A.Year_Month+'-2010') >= B.StartOfLastGroup)
SELECT *
FROM MonthRanking
WHERE rownum = 1;
Results:
And if we assume that the data is as Iamdave assumes then it simply like so:
WITH StartingPoints
AS (SELECT ID,
Year_Month,
Status
FROM #Temp0
WHERE NOT EXISTS
(
SELECT 1
FROM #Temp0 AS B
WHERE B.ID = #Temp0.ID
AND B.Status = #Temp0.Status - 1
)),
MonthRanking
AS (SELECT A.*,
ROW_NUMBER() OVER(PARTITION BY A.ID ORDER BY Year_Month) AS rownum
FROM #Temp0 AS A
INNER JOIN
(
SELECT ID,
MAX(Year_Month) + 1 AS StartOfLastGroup
FROM StartingPoints
GROUP BY ID
) AS B ON A.ID = B.ID
AND A.Year_Month >= B.StartOfLastGroup)
SELECT *
FROM MonthRanking
WHERE rownum = 1;
Results:
You can do this with a couple derived tables that stack two window functions on top of one another (which can't be done in the same select). I have assumed that your data is slightly different to the table you have provided, based on the column names in your query. If they are not as I have them below, I strongly recommend having a look at how you store your data:
declare #t table(ID int, YearMonth int,StatusValue bit);
insert into #t values (1,201501,1),(1,201502,0),(1,201503,0),(1,201504,1),(1,201505,0),(1,201506,1),(1,201507,0),(1,201508,0),(1,201509,1),(1,201510,1),(1,201511,1),(1,201512,1),(2,201601,1),(2,201602,0),(2,201603,1),(2,201604,0),(2,201605,1),(2,201606,0),(2,201607,1),(2,201608,0),(2,201609,1),(2,201610,0),(2,201611,1),(2,201612,1),(3,201701,0),(3,201702,0),(3,201703,0),(3,201704,0),(3,201705,0),(3,201706,0),(3,201707,0),(3,201708,0),(3,201709,0),(3,201710,0),(3,201711,0),(3,201712,1);
with c as
(
select ID
,YearMonth
,StatusValue
,case when StatusValue = 1
and lead(StatusValue,1,1) over (partition by ID
order by YearMonth desc) = 0
then 1
else 0
end as c
from #t
), sc as
(
select ID
,YearMonth
,StatusValue
,sum(c) over (partition by ID order by YearMonth desc) as sc
from c
where c = 1
)
select ID
,YearMonth
,StatusValue
from sc
where sc = 1
order by ID;
Output:
+----+-----------+-------------+
| ID | YearMonth | StatusValue |
+----+-----------+-------------+
| 1 | 201509 | 1 |
| 2 | 201611 | 1 |
| 3 | 201712 | 1 |
+----+-----------+-------------+

Returns the records even when the condition is not to pull the record

I have two tables e and o1
Where table e has
Onumber edt_image
1 AA
1 AB
1 AC
1 AA
1 AB
2 AB
3 AB
3 AA
And table o1 has
Onumber Obill
1 ABCD
2 ABCD
3 ABCD
So when I wanted the sql to pull me Onumber with no AA and with obill “ABCD”, I mean to get
Onumber edt_image
2 AB
But right now it pulls me
Onumber Edt_image
1 AB
1 AC
1 AB
2 AB
3 AB
The sql I used
Select e.onumber,o1.onumber
From e join o1 on o1.onumber=e.onumber
Where e.edt_image<>’AA’
And o1.obill=ABCD
Use EXISTS and NOT EXISTS:
SELECT *
FROM e e1
WHERE NOT EXISTS(SELECT 1 FROM e
WHERE Onumber = e1.Onumber
AND edt_image = 'AA')
AND EXISTS( SELECT 1 FROM o1
WHERE Onumber = e1.Onumber
AND Obill = 'ABCD');
Try this. Hope it helps.
If you are running the query on larger result set, please consider adding indexes on Joining keys.
Word of caution, please look at the larger impact on the server before adding any indexes
;WITH cte_e (Onumber,edt_image) AS
(
SELECT 1, 'AA' UNION ALL
SELECT 1, 'AB' UNION ALL
SELECT 1, 'AC' UNION ALL
SELECT 1, 'AA' UNION ALL
SELECT 1, 'AB' UNION ALL
SELECT 2, 'AB' UNION ALL
SELECT 3, 'AB' UNION ALL
SELECT 3, 'AA'
), cte_O1(Onumber, Obill) AS
(
SELECT 1, 'ABCD' UNION ALL
SELECT 2, 'ABCD' UNION ALL
SELECT 3, 'ABCD'
)
SELECT e1.onumber,
e1.edt_image
FROM cte_e e1
INNER JOIN cte_O1 o1
ON o1.Onumber = e1.Onumber
WHERE NOT EXISTS (
SELECT 1
FROM cte_e e2
WHERE e1.Onumber = e2.Onumber
AND edt_image = 'AA'
)

difficult (for me) postgres sql query

Here are the tables I have:
AB tuple table
C table which has entries with A.id, B.id, C.units
D table which has entries with C.id
I want to count all the entries in D table which have a C.id that has the same A.id and B.id and subtract that count from the sum of all C.units that have the same A.id and B.id as a new column "difference"
So I want the query to return the "difference", the common A.id and the common B.id in a single line
It should also return an entry if the count is 0 and the "difference" will just be be equal to sum(C.units)
For example
D table
D.id = 1, open=true, D.CID = 2
D.id = 2, open=true, D.CID = 3
D.id = 3, open=true, D.CID = 3
D.id = 4, open=true, D.CID = 4
C table
C.id = 2, A.id = 3, B.id = 5, units =4
C.id = 3, A.id = 3, B.id = 5, units = 6
C.id = 4, A.id = 4, B.id = 6, units = 8
C.id = 5, A.id = 4, B.id = 6, units = 10
Bc the first 3 entries in D have CID's with the same AID and BID they are counted in the same entry. Also, the C entries that have the same A.id and B.id have their units summed. Even when a C entry has no associated D entry. Therefore, the query should return the following 2 entries:
1. difference = (6+4)-3 = 7 A.id = 3 B.id = 5
2. difference = (10+8)-1 = 17 A.id = 4 B.id = 6
Setup (which you really should include in your question):
CREATE TABLE c
(
id int NOT NULL PRIMARY KEY,
aid int NOT NULL,
bid int NOT NULL,
units int NOT NULL
);
CREATE TABLE d
(
id int NOT NULL PRIMARY KEY,
open boolean NOT NULL,
cid int NOT NULL
);
INSERT INTO c VALUES (2,3,5,4),(3,3,5,6),(4,4,6,8),(5,4,6,10),(6,7,8,9);
INSERT INTO d VALUES (1,true,2),(2,true,3),(3,true,3),(4,true,4);
It's a little hard to understand the question, but I think you might be looking for something like this:
WITH n AS (
SELECT aid, bid, count(*) AS cnt
from c
JOIN d ON (d.cid = c.id)
GROUP BY aid, bid
)
SELECT aid, bid, sum(c.units) - COALESCE(n.cnt, 0) AS difference
FROM c
LEFT JOIN n USING (aid, bid)
GROUP BY aid, bid, n.cnt
ORDER BY aid, bid;
I get these results:
aid | bid | difference
-----+-----+------------
3 | 5 | 7
4 | 6 | 17
7 | 8 | 9
(3 rows)

SQL select from a group

Suppose we have the following table data:
ID parent stage submitted
1 1 1 1
2 1 2 1
3 1 3 0
4 1 4 0
5 5 1 1
6 5 2 1
7 5 3 1
8 5 4 1
As you can see we have 2 groups (that have the same parent). I want to select the latter stage that is submitted. In the above example i want to select the ID`s 2 and 8. I am completely lost so if anyone can help it will be appreciated a lot. :)
SELECT T.ID, T.PARENT, T.STAGE
from
T,
(
select PARENT, MAX( STAGE) MAX_STAGE
from T
where SUBMITTED = 1
GROUP BY PARENT
) M
where
T.STAGE = M.MAX_STAGE
AND T.PARENT = M.PARENT
Explanation:
First, isolate the max stage for each group with submitted = 1 (the inner select).
Then, join the result with the real table, to filter out the records with no max stage.
Select Parent, max(Id)
From tbl t
Inner Join
(
Select Parent, max(Stage) as Stage
from tbl t
Where Submitted = 1
Group by Parent
) submitted
on t.Parent = submitted.parent and
t.stage = submitted.stage
Group by Parent
This should do it:
SELECT
T1.id,
T1.parent,
T1.stage,
T1.submitted
FROM
Some_Table T1
LEFT OUTER JOIN Some_Table T2 ON
T2.parent = T1.parent AND
T2.submitted = 1 AND
T2.stage > T1.stage
WHERE
T1.submitted = 1 AND
T2.id IS NULL
SELECT * FROM Table WHERE ID = 2 OR ID = 8
Is this what you want?