SQL Union exclude row if value already exists in first table - tsql

I have two tables which i wish to combine. However, there is a field in both tables that should have the same value in the second table the second tables record should be excluded.
These are a MSSQL 2012 tables.
The only way i can think of is something nasty like this.
Select A, B
from Tab1
Union
Select C, D
from Tab2
where Tab2.c not in (Select A from Tab1)
It looks relatively clean in my example but the selects for Tab1 and tab2 have long and complex where clauses and i would need to duplicated that in the "not in" select statement.
I've seen other solutions but not in MSSQL. Any one out there have a better example ?
Thanks

well in t-sql i use this kind of code
USE tempdb
GO
CREATE TABLE StudentDetails
(
StudentID INTEGER PRIMARY KEY,
StudentName VARCHAR(15)
)
GO
INSERT INTO StudentDetails
VALUES(1,'SMITH')
INSERT INTO StudentDetails
VALUES(2,'ALLEN')
INSERT INTO StudentDetails
VALUES(3,'JONES')
INSERT INTO StudentDetails
VALUES(4,'MARTIN')
INSERT INTO StudentDetails
VALUES(5,'JAMES')
GO
CREATE TABLE StudentTotalMarks
(
StudentID INTEGER REFERENCES StudentDetails,
StudentMarks INTEGER
)
GO
INSERT INTO StudentTotalMarks
VALUES(1,230)
INSERT INTO StudentTotalMarks
VALUES(2,255)
INSERT INTO StudentTotalMarks
VALUES(3,200)
GO
-- Select from Table
SELECT *
FROM StudentDetails
GO
SELECT *
FROM StudentTotalMarks
GO
-- Merge Statement
MERGE StudentTotalMarks AS stm
USING (SELECT StudentID,StudentName FROM StudentDetails) AS sd
ON stm.StudentID = sd.StudentID
WHEN MATCHED AND stm.StudentMarks > 250 THEN DELETE
WHEN MATCHED THEN UPDATE SET stm.StudentMarks = stm.StudentMarks + 25
WHEN NOT MATCHED THEN
INSERT(StudentID,StudentMarks)
VALUES(sd.StudentID,25);
GO
-- Select from Table
SELECT *
FROM StudentDetails
GO
SELECT *
FROM StudentTotalMarks
GO
-- Clean up
DROP TABLE StudentDetails
GO
DROP TABLE StudentTotalMarks
GO
The Merge Join performs very well and the following result is obtained.
http://www.pinaldave.com/bimg/MergeStatement.gif
Hope this helps

Related

Invalid column name of temp table

I want to create a procedure in which I insert data into several tables. I need to get the inserted ID's so I create temp table in which I catch them. The problem is that I receive an error "Invalid column name 'app_guid'" and "Invalid column name 'app_nazwa_pliku'" but I create temp tables with such columns. Do you happen to know what's wrong with my code?
create procedure p_paseczek_przenies
as
declare #new_nr_sprawy varchar(50)
if object_id('tempdb..##paseczki') is not null drop table ##paseczki
select
top 1 with ties
s.sp_numer as SprawaGlowna_sp_numer,
s.sp_id as SprawaGlowna_sp_id
,Paseczek.max_ak_id as Paseczek_max_ak_id
,apisp_data_przyjscia
,app_guid
,app_nazwa_pliku
into ##paseczki
from sprawa as s
join akcja as a on a.ak_sp_id=s.sp_id and ak_akt_id=111
join sprawa_powiazania as sp on s.sp_id=sp.sp_id and rodzaj_powiazania='SPRAWY POLUBOWNE'
join (select max(ak_id) max_ak_id,ak_sp_id from akcja
where ak_akt_id=1089
group by ak_sp_id) as Paseczek on Paseczek.ak_sp_id=sp.sp_id_powiazana
join akcja_pismo on apis_ak_id=max_ak_id
join akcja_pismo_przychodzace on apis_apisp_id=apisp_id
join akcja_pismo_plik on app_apis_id=apis_id
where s.sp_numer=#new_nr_sprawy
order by ROW_NUMBER() over (partition by s.sp_id order by paseczek.max_ak_id desc)
if exists (select * from ##paseczki)
begin
if object_id('tempdb..##akcja') is not null drop table ##akcja
create table ##akcja (
ak_id int
,apisp_data_przyjscia datetime
,app_guid varchar(max)
,app_nazwa_pliku varchar(max)
)
merge akcja as target using (
select * from ##paseczki) as source on 1=0
when not matched then insert
(ak_akt_id, ak_sp_id, ak_kolejnosc, ak_interwal, ak_zakonczono, ak_pr_id, ak_publiczna)
values (1089,SprawaGlowna_sp_id,1,1,getdate(),5,1)
output inserted.ak_id,source.apisp_data_przyjscia,source.app_guid,source.app_nazwa_pliku
into ##akcja;
insert into rezultat
(re_ak_id, re_ret_id, re_data_planowana, re_us_id_planujacy, re_data_wykonania, re_us_id_wykonujacy, re_konczy)
select ak_id,309,getdate(),5,getdate(),5,1 from ##akcja
if object_id('tempdb..##akcja_pismo_przychodzace') is not null drop table ##akcja_pismo_przychodzace
create table ##akcja_pismo_przychodzace (
apisp_id int
,ak_id int
,app_guid varchar(max)
,app_nazwa_pliku varchar(max)
)
merge akcja_pismo_przychodzace as target using (
select * from ##akcja) as source on 1=0
when not matched then insert
(apisp_data_przyjscia)
values (apisp_data_przyjscia)
output inserted.apisp_id,source.ak_id,source.app_guid,source.app_nazwa_pliku
into ##akcja_pismo_przychodzace;
if object_id('tempdb..##akcja_pismo') is not null drop table ##akcja_pismo
create table ##akcja_pismo (
apis_id int
,app_guid varchar(max)
,app_nazwa_pliku varchar(max)
)
merge akcja_pismo as target using (
select * from ##akcja_pismo_przychodzace) as source on 1=0
when not matched then insert
(apis_ak_id, apis_apisp_id, apis_data_stworzenia,[apis_us_id_tworzacy])
values (ak_id,apisp_id,getdate(),5)
output inserted.apis_id,source.app_guid,source.app_nazwa_pliku
into ##akcja_pismo;
alter table [dm_data_bps].[dbo].[akcja_pismo_plik] disable trigger [tr_akcja_pismo_plik_ins]
insert into akcja_pismo_plik
([app_guid],[app_apis_id],[app_nazwa_pliku])
select [app_guid],[apis_id],[app_nazwa_pliku] from ##akcja_pismo
alter table [dm_data_bps].[dbo].[akcja_pismo_plik] enable trigger [tr_akcja_pismo_plik_ins]
end
SQL Server compiles the procedure at creation and when it is first executed, verifying the entire procedure based on the context at that time.
For example, try the following query:
CREATE PROCEDURE P
AS
IF OBJECT_ID('tempdb..#T') IS NOT NULL DROP TABLE #T
SELECT 1 Y INTO #T
SELECT Y FROM #T
GO
CREATE TABLE #T (X INT)
GO
EXEC P
You will get an error ("Invalid column name 'Y'."), because when the procedure is compiled the table #T has only the column X.
To avoid this problem, you should make sure that the table #T either does not exist or has the right columns, before the procedure is executed.
One way would be to have another stored procedure (a wrapper):
CREATE PROCEDURE P1
AS
SELECT 1 Y INTO #T
SELECT Y FROM #T
GO
CREATE PROCEDURE P2
AS
IF OBJECT_ID('tempdb..#T') IS NOT NULL DROP TABLE #T
EXEC P1
GO
CREATE TABLE #T (X INT)
GO
EXEC P2
GO
DROP PROCEDURE P1, P2
--DROP TABLE #T
Another way would be to use dynamic SQL, because that code is compiled separately, as if it would be another stored procedure.
A better way would be to make sure that temp tables are uniquely named in each stored procedure, unless sharing data between them is desired. For the later case, you can read http://www.sommarskog.se/share_data.html#temptables for more insights.
This error is also encountered when a stored procedure creates a #temp table and then fires a trigger which creates a #temp table with the same name. The SP #temp table is referenced by the trigger when the column names are explicit, (like SELECT id FROM #temp;), but the local trigger #temp table is referenced when SELECT * FROM #temp; is used.
Microsoft, if you are listening, could you kindly attend to it and retrofit existing supported versions with a maintenance update?

IF Exists doesn't seem to work for a Table Drop if already exists

Was getting this error each and every time tried to execute a DROP Table if exists
Step 1: Created a Table
CREATE TABLE Work_Tables.dbo.Drop_Table_Test (RowID INT IDENTITY(1,1), Data VARCHAR(50))
INSERT INTO Work_Tables.dbo.Drop_Table_Test
SELECT 'Test' UNION
SELECT 'Test1' UNION
SELECT 'Test2' UNION
SELECT 'Test3'
Step 2: Wrote a IF Exists block to check if the Table exists.
IF EXISTS (SELECT 1 FROM Work_Tables.dbo.SysObjects WHERE NAME LIKE 'Drop_Table_Test' AND XType = 'U')
BEGIN
PRINT 'IN'
DROP TABLE Work_Tables.dbo.Drop_Table_Test
END
CREATE TABLE Work_Tables.dbo.Drop_Table_Test (RowID INT IDENTITY(1,1), Data VARCHAR(50), NAME VARCHAR(20), PreCheck INT)
INSERT INTO Work_Tables.dbo.Drop_Table_Test (Data, Name, PreCheck)
SELECT 'Test','SRK',1 UNION
SELECT 'Test1','Daya',2 UNION
SELECT 'Test2','Dinesh',3 UNION
SELECT 'Test3','Suresh',4
On running the Step 2 Code its obvious the Table has to be Dropped and recreated with the same name but it didn't even enter the Begin End block.
I feel that its because have added few more columns in the second try, but still not clear why it should have problems as we are to DROP the table.
You can not drop and create the same table in the same batch in SQL Server.
Break your code up into separate batches so the table can be dropped before you try and recreate it. Add GO after END in your BEGIN / END statement.
IF EXISTS (SELECT 1 FROM Work_Tables.dbo.SysObjects WHERE NAME LIKE 'Drop_Table_Test' AND XType = 'U')
BEGIN
PRINT 'IN'
DROP TABLE Work_Tables.dbo.Drop_Table_Test
END
GO --Add this...
....
Straight from Microsoft's Documentation:
DROP TABLE and CREATE TABLE should not be executed on the same table in the same batch. Otherwise an unexpected error may occur.
You can try to use this syntax:
IF OBJECT_ID('dbo.Drop_Table_Test', 'U') IS NOT NULL
DROP TABLE dbo.Drop_Table_Test;
IF EXISTS will drop the table only when your table Drop_Table_Test does not contain any row. In case if it contains the data then it will not drop the table.

TSQL query to delete all duplicate records but one [duplicate]

This question already has answers here:
Closed 10 years ago.
Possible Duplicate:
delete duplicate records in SQL Server
I have a table in which unique records are denoted by a composite key, such as (COL_A, COL_B).
I have checked and confirmed that I have duplicate rows in my table by using the following query:
select COL_A, COL_B, COUNT(*)
from MY_TABLE
group by COL_A, COL_B
having count(*) > 1
order by count(*) desc
Now, I would like to remove all duplicate records but keep only one.
Could someone please shed some light on how to achieve this with 2 columns?
EDIT:
Assume the table only has COL_A and COL_B
1st solution,
It is flexible, because you can add more columns than COL_A and COL_B :
-- create table with identity filed
-- using idenity we can decide which row we can delete
create table MY_TABLE_COPY
(
id int identity,
COL_A varchar(30),
COL_B varchar(30)
/*
other columns
*/
)
go
-- copy data
insert into MY_TABLE_COPY (COL_A,COL_B/*other columns*/)
select COL_A, COL_B /*other columns*/
from MY_TABLE
group by COL_A, COL_B
having count(*) > 1
-- delete data from MY_TABLE
-- only duplicates (!)
delete MY_TABLE
from MY_TABLE_COPY c, MY_TABLE t
where c.COL_A=t.COL_A
and c.COL_B=t.COL_B
go
-- copy data without duplicates
insert into MY_TABLE (COL_A, COL_B /*other columns*/)
select t.COL_A, t.COL_B /*other columns*/
from MY_TABLE_COPY t
where t.id = (
select max(id)
from MY_TABLE_COPY c
where t.COL_A = c.COL_A
and t.COL_B = c.COL_B
)
go
2nd solution
If you have really two columns in MY_TABLE you can use:
-- create table and copy data
select distinct COL_A, COL_B
into MY_TABLE_COPY
from MY_TABLE
-- delete data from MY_TABLE
-- only duplicates (!)
delete MY_TABLE
from MY_TABLE_COPY c, MY_TABLE t
where c.COL_A=t.COL_A
and c.COL_B=t.COL_B
go
-- copy data without duplicates
insert into MY_TABLE
select t.COL_A, t.COL_B
from MY_TABLE_COPY t
go
Try:
-- Copy Current Table
SELECT * INTO #MY_TABLE_COPY FROM MY_TABLE
-- Delte all rows from current able
DELETE FROM MY_TABLE
-- Insert only unique values, removing your duplicates
INSERT INTO MY_TABLE
SELECT DISTINCT * FROM #MY_TABLE_COPY
-- Remove Temp Table
DROP TABLE #MY_TABLE_COPY
That should work as long as you don't break any foreign keys when deleting rows from MY_TABLE.

T-SQL Delete Inserted Records

I know the title may seem strange but this is what I want to do:
I have table with many records.
I want to get some of this records and insert them in other table. Something like this:
INSERT INTO TableNew SELECT * FROM TableOld WHERE ...
The tricky part is that I want this rows that I have inserted to be deleted form the origin table as well.
Is there a easy way to do this, because the only think that I have managed to do is to use a temporary table for saving the selected records and then to put them in the second table and delete rows that match with them from the first table. It is a solution, but with so many records (over 3 millions and half) I am looking for some other idea...
In 2005+ use OUTPUT clause like this:
DELETE FROM TableOld
OUTPUT DELETED.* INTO TableNew
WHERE YourCondition
It will be performed in single transaction and either completed or roll back simultaneously
You can use the insert ... output clause to store the ID's of the copied rows in a temporary table. Then you can delete the rows from the original table based on the temporary table.
declare #Table1 table (id int, name varchar(50))
declare #Table2 table (id int, name varchar(50))
insert #Table1 (id,name)
select 1, 'Mitt'
union all select 2, 'Newt'
union all select 3, 'Rick'
union all select 4, 'Ron'
declare #copied table (id int)
insert #Table2
(id, name)
output inserted.id
into #copied
select id
, name
from #Table1
where name <> 'Mitt'
delete #Table1
where id in
(
select id
from #copied
)
select *
from #Table1
Working example at Data Explorer.
You should do some thing like this:
INSERT INTO "table1" ("column1", "column2", ...)
SELECT "column3", "column4", ...
FROM "table2"
WHERE ...
DELETE FROM "table1"
WHERE ...

SELECT or INSERT a row in one command

I'm using PostgreSQL 9.0 and I have a table with just an artificial key (auto-incrementing sequence) and another unique key. (Yes, there is a reason for this table. :)) I want to look up an ID by the other key or, if it doesn't exist, insert it:
SELECT id
FROM mytable
WHERE other_key = 'SOMETHING'
Then, if no match:
INSERT INTO mytable (other_key)
VALUES ('SOMETHING')
RETURNING id
The question: is it possible to save a round-trip to the DB by doing both of these in one statement? I can insert the row if it doesn't exist like this:
INSERT INTO mytable (other_key)
SELECT 'SOMETHING'
WHERE NOT EXISTS (SELECT * FROM mytable WHERE other_key = 'SOMETHING')
RETURNING id
... but that doesn't give the ID of an existing row. Any ideas? There is a unique constraint on other_key, if that helps.
Have you tried to union it?
Edit - this requires Postgres 9.1:
create table mytable (id serial primary key, other_key varchar not null unique);
WITH new_row AS (
INSERT INTO mytable (other_key)
SELECT 'SOMETHING'
WHERE NOT EXISTS (SELECT * FROM mytable WHERE other_key = 'SOMETHING')
RETURNING *
)
SELECT * FROM new_row
UNION
SELECT * FROM mytable WHERE other_key = 'SOMETHING';
results in:
id | other_key
----+-----------
1 | SOMETHING
(1 row)
No, there is no special SQL syntax that allows you to do select or insert. You can do what Ilia mentions and create a sproc, which means it will not do a round trip fromt he client to server, but it will still result in two queries (three actually, if you count the sproc itself).
using 9.5 i successfully tried this
based on Denis de Bernardy's answer
only 1 parameter
no union
no stored procedure
atomic, thus no concurrency problems (i think...)
The Query:
WITH neworexisting AS (
INSERT INTO mytable(other_key) VALUES('hello 2')
ON CONFLICT(other_key) DO UPDATE SET existed=true -- need some update to return sth
RETURNING *
)
SELECT * FROM neworexisting
first call:
id|other_key|created |existed|
--|---------|-------------------|-------|
6|hello 1 |2019-09-11 11:39:29|false |
second call:
id|other_key|created |existed|
--|---------|-------------------|-------|
6|hello 1 |2019-09-11 11:39:29|true |
First create your table ;-)
CREATE TABLE mytable (
id serial NOT NULL,
other_key text NOT NULL,
created timestamptz NOT NULL DEFAULT now(),
existed bool NOT NULL DEFAULT false,
CONSTRAINT mytable_pk PRIMARY KEY (id),
CONSTRAINT mytable_uniq UNIQUE (other_key) --needed for on conflict
);
you can use a stored procedure
IF (SELECT id FROM mytable WHERE other_key = 'SOMETHING' LIMIT 1) < 0 THEN
INSERT INTO mytable (other_key) VALUES ('SOMETHING')
END IF
I have an alternative to Denis answer, that I think is less database-intensive, although a bit more complex:
create table mytable (id serial primary key, other_key varchar not null unique);
WITH table_sel AS (
SELECT id
FROM mytable
WHERE other_key = 'test'
UNION
SELECT NULL AS id
ORDER BY id NULLS LAST
LIMIT 1
), table_ins AS (
INSERT INTO mytable (id, other_key)
SELECT
COALESCE(id, NEXTVAL('mytable_id_seq'::REGCLASS)),
'test'
FROM table_sel
ON CONFLICT (id) DO NOTHING
RETURNING id
)
SELECT * FROM table_ins
UNION ALL
SELECT * FROM table_sel
WHERE id IS NOT NULL;
In table_sel CTE I'm looking for the right row. If I don't find it, I assure that table_sel returns at least one row, with a union with a SELECT NULL.
In table_ins CTE I try to insert the same row I was looking for earlier. COALESCE(id, NEXTVAL('mytable_id_seq'::REGCLASS)) is saying: id could be defined, if so, use it; whereas if id is null, increment the sequence on id and use this new value to insert a row. The ON CONFLICT clause assure
that if id is already in mytable I don't insert anything.
At the end I put everything together with a UNION between table_ins and table_sel, so that I'm sure to take my sweet id value and execute both CTE.
This query needs to search for the value other_key only once, and is a "search this value" not a "check if this value not exists in the table", that is very heavy; in Denis alternative you use other_key in both types of searches. In my query you "check if a value not exists" only on id that is a integer primary key, that, for construction, is fast.
Minor tweak a decade late to Denis's excellent answer:
-- Create the table with a unique constraint
CREATE TABLE mytable (
id serial PRIMARY KEY
, other_key varchar NOT NULL UNIQUE
);
WITH new_row AS (
-- Only insert when we don't find anything, avoiding a table lock if
-- possible.
INSERT INTO mytable ( other_key )
SELECT 'SOMETHING'
WHERE NOT EXISTS (
SELECT *
FROM mytable
WHERE other_key = 'SOMETHING'
)
RETURNING *
)
(
-- This comes first in the UNION ALL since it'll almost certainly be
-- in the query cache. Marginally slower for the insert case, but also
-- marginally faster for the much more common read-only case.
SELECT *
FROM mytable
WHERE other_key = 'SOMETHING'
-- Don't check for duplicates to be removed
UNION ALL
-- If we reach this point in iteration, we needed to do the INSERT and
-- lock after all.
SELECT *
FROM new_row
) LIMIT 1 -- Just return whatever comes first in the results and allow
-- the query engine to cut processing short for the INSERT
-- calculation.
;
The UNION ALL tells the planner it doesn't have to collect results for de-duplication. The LIMIT 1 at the end allows the planner to short-circuit further processing/iteration once it knows there's an answer available.
NOTE: There is a race condition present here and in the original answer. If the entry does not already exist, the INSERT will fail with a unique constraint violation. The error can be suppressed with ON CONFLICT DO NOTHING, but the query will return an empty set instead of the new row. This is a difficult problem because getting that info from another transaction would violate the I in ACID.