Is it possible in Snowflake to automate a merge? - merge

Currently I have a script that merges between my source and target table but updating and inserting. Both of these tables update daily through a task created on snowflake. I would like to preform this merge daily too. Is it possible to automate this merge through either a task or something else on snowflake?
Thanks

If your script contains only SQL commands (or commands that can be written in JS), you can create a stored procedure to call them, and then create a task to run this procedure on every day.
https://docs.snowflake.com/en/sql-reference/stored-procedures-usage.html
https://docs.snowflake.com/en/user-guide/tasks-intro.html

-- Here is prerequisite for running automerge procedure that is pasted at the back ---
1 --Create Log Table:
--EDWH_DEV.WS_EA_DNATA_DEV.GEN_LOG definition
create or replace TABLE GEN_LOG (
LOG_ID NUMBER(38,0) autoincrement,
"number of rows inserted" NUMBER(38,0),
"number of rows updated" NUMBER(38,0),
PROC_NAME VARCHAR(100),
FINISHED TIMESTAMP_NTZ(9),
USER_NAME VARCHAR(100),
USER_ROLE VARCHAR(100),
STATUS VARCHAR(50),
MESSAGE VARCHAR(2000)
);
2 --Data is loaded based on an existing table structure which must match source file columns count.
--Example:
--EDWH_DEV.WS_EA_DNATA_DEV.AIRLINES definition
create or replace TABLE AIRLINES (
CONSOLIDATED_AIRLINE_CODE VARCHAR(80),
POSSIBLE_CUSTOMER_NAME VARCHAR(100),
CUSTOMER_TYPE VARCHAR(70),
CONSOLIDATED_AIRLINE_NAME VARCHAR(90),
constraint CONSOLIDATED_AIRLINE_CODE unique (CONSOLIDATED_AIRLINE_CODE),
constraint CUSTOMER_TYPE unique (CUSTOMER_TYPE)
);
3 --File in stage is AIRLINES.CSV has same column number in same order, not necessary has to have same headers as they will be aliased automatically to created table column names as above.
4 --Make sure you have required file format set or use default ones(refer to SF documentation)
--ALTER FILE FORMAT "EDWH_DEV"."WS_EA_DNATA_DEV".CSV SET COMPRESSION = 'AUTO' FIELD_DELIMITER = ',' RECORD_DELIMITER = '\n' SKIP_HEADER = 1 FIELD_OPTIONALLY_ENCLOSED_BY = '\042' TRIM_SPACE = FALSE ERROR_ON_COLUMN_COUNT_MISMATCH = ----TRUE ESCAPE = 'NONE' ESCAPE_UNENCLOSED_FIELD = '\134' DATE_FORMAT = 'AUTO' TIMESTAMP_FORMAT = 'AUTO' NULL_IF = ('\\N');
5 --Tables must be appended to have constraints which then will be used for MERGE ON clause in merge statement. Constraint name must match Column name.
ALTER TABLE AIRLINES ADD CONSTRAINT CONSOLIDATED_AIRLINE_CODE UNIQUE (CONSOLIDATED_AIRLINE_CODE);
ALTER TABLE AIRLINES ADD CONSTRAINT CUSTOMER_TYPE UNIQUE (CUSTOMER_TYPE);
6 --You have stage set up and you can view files in it.
list #my_stage;
7 -- this view is used to pull unique fields for on clause in merge
CREATE OR REPLACE VIEW CONSTRAINS_VW AS
SELECT
tbl.table_schema,
tbl.table_name,
con.constraint_name,
col.data_type
FROM EDWH_DEV.information_schema.table_constraints con
INNER JOIN EDWH_DEV.information_schema.tables tbl
ON con.table_name = tbl.table_name
AND con.constraint_schema = tbl.table_schema
INNER JOIN EDWH_DEV.information_schema.columns col
ON tbl.table_name = col.table_name
AND con.constraint_name = col.column_name
AND con.constraint_schema = col.table_schema
;
WHERE con.constraint_type in ('PRIMARY KEY', 'UNIQUE');
------ the general procedure code compline once use many times :) ---
CREATE OR REPLACE PROCEDURE "MERGER_BUILDER_GEN"("TABLE_NAME" VARCHAR(200), "SCHEMA_NAME" VARCHAR(200), "STAGE_NAME" VARCHAR(200))
RETURNS VARCHAR(32000)
LANGUAGE JAVASCRIPT
EXECUTE AS CALLER
AS $$
var result;
snowflake.execute( {sqlText: "begin transaction;"});
var my_sql_command = `SELECT
0 AS "number of rows inserted"
, 0 as "number of rows updated"
,'` + TABLE_NAME + `' AS proc_name
,CURRENT_TIMESTAMP() AS FINISHED
,CURRENT_USER() AS USER_NAME
,CURRENT_ROLE() USER_ROLE
,'Failed' as status`;
var statement1 = snowflake.createStatement( {sqlText: my_sql_command} );
var result_set1 = statement1.execute();
result_set1.next();
var column1 = result_set1.getColumnValue(1);
var column2 = result_set1.getColumnValue(2);
var column3 = result_set1.getColumnValue(3);
var column4 = result_set1.getColumnValue(4);
var column5 = result_set1.getColumnValue(5);
var column6 = result_set1.getColumnValue(6);
var column7 = result_set1.getColumnValue(7);
try {
var v_sql_stmt = `CREATE OR REPLACE temporary TABLE vars_of_merger_dyn00 AS
SELECT
COL_NAMES_SELECT
,REPLACE(listagg (distinct' nvl(tgt."'||cons.constraint_name||'",'
||CASE WHEN cons.data_type ='FLOAT' THEN '0'
WHEN cons.data_type ='NUMBER' THEN '0'
WHEN cons.data_type ='DATE' THEN '''1900-12-01'''
WHEN cons.data_type ='TIMESTAMP_NTZ' THEN '''1900-12-01 00:00:00'''
ELSE '-999999' END||') = nvl(src."'
||cons.constraint_name ||'",'
||CASE WHEN cons.data_type ='FLOAT' THEN '0'
WHEN cons.data_type ='NUMBER' THEN '0'
WHEN cons.data_type ='DATE' THEN '''1900-12-01'''
WHEN cons.data_type ='TIMESTAMP_NTZ' THEN '''1900-12-01 00:00:00'''
ELSE '-999999' END ,') and \n') ||')','-999999','''''') AS dd
,REPLACE(COL_NAMES_WHEN,'-999999','''''') AS COL_NAMES_WHEN
,COL_NAMES_SET
,COL_NAMES_INS
,COL_NAMES_INS1
FROM (
SELECT
InTab.TABLE_NAME
,listagg (' cast($' ||InTab.ORDINAL_POSITION || ' as ' || intab.DATA_TYPE || ') as "' ||InTab.COLUMN_NAME,'", \n') WITHIN GROUP ( ORDER BY ORDINAL_POSITION asc ) ||'"' AS Col_Names_select
,listagg (' nvl(tgt."' || CASE WHEN intab.CM IS NULL THEN InTab.COLUMN_NAME ELSE NULL end || '", '
||CASE WHEN intab.data_type ='FLOAT' THEN '0'
WHEN intab.data_type ='NUMBER' THEN '0'
WHEN intab.data_type ='DATE' THEN '''1900-12-01'''
WHEN intab.data_type ='TIMESTAMP_NTZ' THEN '''1900-12-01 00:00:00''' ELSE '-999999' END
||') != nvl(src."' ||InTab.COLUMN_NAME||'",'||
CASE WHEN intab.data_type ='FLOAT' THEN '0'
WHEN intab.data_type ='NUMBER' THEN '0'
WHEN intab.data_type ='DATE' THEN '''1900-12-01'''
WHEN intab.data_type ='TIMESTAMP_NTZ' THEN '''1900-12-01 00:00:00''' ELSE '-999999' END
,') OR\n') WITHIN GROUP ( ORDER BY ORDINAL_POSITION asc ) ||')' AS Col_Names_when
,listagg (' tgt."' ||CASE WHEN intab.CM IS NULL THEN InTab.COLUMN_NAME ELSE NULL end || '"= src."' ||InTab.COLUMN_NAME , '",\n') WITHIN GROUP ( ORDER BY ORDINAL_POSITION asc ) ||'"' AS Col_Names_set
,listagg ( '"'||InTab.COLUMN_NAME,'",\n') WITHIN GROUP ( ORDER BY ORDINAL_POSITION asc ) ||'"' AS Col_Names_ins
,listagg ( ' src."' ||InTab.COLUMN_NAME,'",\n') WITHIN GROUP ( ORDER BY InTab.ORDINAL_POSITION asc ) ||'"' AS Col_Names_ins1
,listagg (ORDINAL_POSITION,',') WITHIN GROUP ( ORDER BY ORDINAL_POSITION asc ) ORDINAL_POSITION
FROM (
SELECT
InTab.TABLE_NAME
,InTab.COLUMN_NAME
,InTab.ORDINAL_POSITION
,intab.DATA_TYPE
,cons.CONSTRAINT_NAME AS CM
FROM INFORMATION_SCHEMA.COLUMNS InTab
LEFT JOIN constrains_vw cons ON cons.table_name = intab.table_name AND InTab.COLUMN_NAME = cons.CONSTRAINT_NAME
where intab.TABLE_SCHEMA = '`+ SCHEMA_NAME +`'
AND intab.TABLE_NAME = '`+ TABLE_NAME +`'
GROUP BY
InTab.TABLE_NAME
,InTab.COLUMN_NAME
,InTab.COLUMN_NAME
,InTab.ORDINAL_POSITION
,intab.DATA_TYPE
,CONSTRAINT_NAME
ORDER BY InTab.TABLE_NAME,InTab.ORDINAL_POSITION ) InTab
GROUP BY TABLE_NAME
ORDER BY TABLE_NAME,ORDINAL_POSITION
) tt
LEFT JOIN constrains_vw cons ON cons.table_name = tt.table_name
GROUP BY
COL_NAMES_SELECT
,COL_NAMES_WHEN
,COL_NAMES_SET
,COL_NAMES_INS
,COL_NAMES_INS1;` ;
var rs_clip_name = snowflake.execute ({sqlText: v_sql_stmt});
var my_sql_command1 = `SELECT Col_Names_select,dd,Col_Names_when,Col_Names_set,Col_Names_ins,Col_Names_ins1 FROM vars_of_merger_dyn00;`;
var statement2 = snowflake.createStatement( {sqlText: my_sql_command1} );
var result_set = statement2.execute();
result_set.next();
var Col_Names_select = result_set.getColumnValue(1);
var dd = result_set.getColumnValue(2);
var Col_Names_when = result_set.getColumnValue(3);
var Col_Names_set = result_set.getColumnValue(4);
var Col_Names_ins = result_set.getColumnValue(5);
var Col_Names_ins1 = result_set.getColumnValue(6);
if (Col_Names_set == '"')
{
var my_sql_command2 = `MERGE INTO EDWH_DEV.`+ SCHEMA_NAME +`.`+ TABLE_NAME +` AS tgt
USING
( select
`+ Col_Names_select +`
from
#` + STAGE_NAME + `/` + TABLE_NAME + `.csv (file_format => 'CSV') )
AS src
ON ( `+ dd +`
)
WHEN NOT MATCHED
THEN INSERT ( `+ Col_Names_ins +`)
VALUES
(`+ Col_Names_ins1 +`); `;
var rs_clip_name2 = snowflake.execute ({sqlText: my_sql_command2});
snowflake.createStatement( { sqlText: `INSERT INTO GEN_LOG
("number of rows inserted", "number of rows updated", proc_name , FINISHED, USER_NAME, USER_ROLE, STATUS, MESSAGE)
SELECT "number of rows inserted", 0 as "number of rows updated", '` + TABLE_NAME + `' AS proc_name , sysdate(), CURRENT_USER() ,CURRENT_ROLE(),'done' as status ,'' AS message
FROM TABLE (RESULT_SCAN(LAST_QUERY_ID()));`} ).execute();
}
else
{
var my_sql_command2 = `MERGE INTO EDWH_DEV.`+ SCHEMA_NAME +`.`+ TABLE_NAME +` AS tgt
USING
( select
`+ Col_Names_select +`
from
#` + STAGE_NAME + `/` + TABLE_NAME + `.csv (file_format => 'CSV') )
AS src
ON ( `+ dd +`
)
WHEN MATCHED
AND `+ Col_Names_when +`
THEN UPDATE SET
`+ Col_Names_set +`
WHEN NOT MATCHED
THEN INSERT ( `+ Col_Names_ins +`)
VALUES
(`+ Col_Names_ins1 +`); `;
var rs_clip_name2 = snowflake.execute ({sqlText: my_sql_command2});
snowflake.createStatement( { sqlText: `INSERT INTO GEN_LOG
("number of rows inserted", "number of rows updated", proc_name , FINISHED, USER_NAME, USER_ROLE, STATUS, MESSAGE)
SELECT "number of rows inserted","number of rows updated", '` + TABLE_NAME + `' AS proc_name , sysdate(), CURRENT_USER() ,CURRENT_ROLE(),'done' as status ,'' AS message
FROM TABLE (RESULT_SCAN(LAST_QUERY_ID()));`} ).execute();
}
snowflake.execute( {sqlText: "commit;"} );
result = "Succeeded" + my_sql_command2 ;
} catch (err) {
snowflake.execute({
sqlText: `insert into GEN_LOG VALUES (DEFAULT,?,?,?,?,?,?,?,?)`
,binds: [column1, column2, column3 ,column4 , column5 , column6 ,column7 , err.code + " | State: " + err.state + "\n Message: " + err.message + "\nStack Trace:\n" + err.stackTraceTxt ]
});
snowflake.execute( {sqlText: "commit;"} );
return 'Failed.' + my_sql_command2 ;
}
return result;
$$;
now you can stop here and use proc as : CALL MERGER_BUILDER_GEN('MY_TABLE','MY_SCHEMA','MY_STAGE'); example --- all case senssitive
So what it does in a nut shell it writes a proper merge statement for any table ddl that you created in schema and feeded to proc, it looks up file and creates dynamically select out of it for merge select , then other little bits like "on clause", "when matched and nvl(everything) and when not matched then insert" also it does cast to different data types on the fly, kind of like what "copy into" does but in my humble opinion merge is better for non perfect deltas, so if you don't want to have data lake with partitioned files over dates and then stitch together via external tables or god forbid in a union view then give this a shot.
Also you can use little set up to run as many tables as you like with automerge 1 by 1
create or replace TABLE PROC_LIST (
PROC_PRIORIT_ID NUMBER(38,0) autoincrement,
PROC_NAME VARCHAR(150)
);
INSERT INTO PROC_LIST (PROC_NAME) VALUES ('TABLE1'); with 50 columns
INSERT INTO PROC_LIST (PROC_NAME) VALUES ('TABLE2');
INSERT INTO PROC_LIST (PROC_NAME) VALUES ('TABLE3');
INSERT INTO PROC_LIST (PROC_NAME) VALUES ('TABLE4');
INSERT INTO PROC_LIST (PROC_NAME) VALUES ('TABLE5'); with 500 columns
INSERT INTO PROC_LIST (PROC_NAME) VALUES ('TABLE6');
INSERT INTO PROC_LIST (PROC_NAME) VALUES ('TABLE7');
INSERT INTO PROC_LIST (PROC_NAME) VALUES ('TABLE8'); limit dyn sql is 32000 chars go crazy
INSERT INTO PROC_LIST (PROC_NAME) VALUES ('TABLE9');
--CREATEed SOME nice LIST OF TABLES TO be loaded 1 BY 1 USING AUTO merge !
CREATE OR REPLACE VIEW PROC_LOAD_CONTROL AS
select
metadata$filename
,REPLACE(REPLACE(metadata$filename,'.csv',''),'path/to/your_table_ifnot_inmain_stage_location/','') AS file_name
,pl.PROC_NAME AS table_name
,'MY_SCHEMA' as schema_name
,'MY_STAGE' AS stage_name
from #MY_STAGE
inner JOIN PROC_LIST pl ON pl.PROC_NAME = REPLACE(REPLACE(metadata$filename,'.csv',''),'path/to/your_table_ifnot_inmain_stage_location/','')
GROUP BY metadata$filename,pl.proc_name
ORDER BY REPLACE(REPLACE(metadata$filename,'.csv',''),'path/to/your_table_ifnot_inmain_stage_location/','') asc;
--this will make sure that your TABLES MATCH names WITH actual FILES IN your STAGE, please look FOR requisite TO make this thing WORK smoothly
CREATE OR REPLACE PROCEDURE "PROJECT_REFRESH_MRG"()
RETURNS VARCHAR(1000)
LANGUAGE JAVASCRIPT
EXECUTE AS OWNER
AS $$
try {
var v_sql_stmt = `SELECT
table_name
,schema_name
,stage_name
FROM PROC_LOAD_CONTROL;`;
var rs_proc_name = snowflake.execute ({sqlText: v_sql_stmt});
var v_table_name = '';
var v_schema_name = '';
var v_stage_name = '';
//loop throgh all the external table and refresh
while (rs_proc_name.next()) {
v_table_name = rs_proc_name.getColumnValue(1);
v_schema_name = rs_proc_name.getColumnValue(2);
v_stage_name = rs_proc_name.getColumnValue(3);
//refresh the external table
v_sql_stmt = `call MERGER_BUILDER_GEN('`+v_table_name+`','`+v_schema_name+`','`+v_stage_name+`')`;
snowflake.execute ({sqlText: v_sql_stmt});
}
return "Success: " + v_sql_stmt;
}
catch (err)
{
//error log here
return "Failed" + err; // Return a success/error indicator
}
$$;
--- So this will create a list of tables with stage and schema vars and pass in while loop to generic merger builder.

Related

pivot or reshapre sql [duplicate]

I've been tasked with coming up with a means of translating the following data:
date category amount
1/1/2012 ABC 1000.00
2/1/2012 DEF 500.00
2/1/2012 GHI 800.00
2/10/2012 DEF 700.00
3/1/2012 ABC 1100.00
into the following:
date ABC DEF GHI
1/1/2012 1000.00
2/1/2012 500.00
2/1/2012 800.00
2/10/2012 700.00
3/1/2012 1100.00
The blank spots can be NULLs or blanks, either is fine, and the categories would need to be dynamic. Another possible caveat to this is that we'll be running the query in a limited capacity, which means temp tables are out. I've tried to research and have landed on PIVOT but as I've never used that before I really don't understand it, despite my best efforts to figure it out. Can anyone point me in the right direction?
Dynamic SQL PIVOT:
create table temp
(
date datetime,
category varchar(3),
amount money
)
insert into temp values ('1/1/2012', 'ABC', 1000.00)
insert into temp values ('2/1/2012', 'DEF', 500.00)
insert into temp values ('2/1/2012', 'GHI', 800.00)
insert into temp values ('2/10/2012', 'DEF', 700.00)
insert into temp values ('3/1/2012', 'ABC', 1100.00)
DECLARE #cols AS NVARCHAR(MAX),
#query AS NVARCHAR(MAX);
SET #cols = STUFF((SELECT distinct ',' + QUOTENAME(c.category)
FROM temp c
FOR XML PATH(''), TYPE
).value('.', 'NVARCHAR(MAX)')
,1,1,'')
set #query = 'SELECT date, ' + #cols + ' from
(
select date
, amount
, category
from temp
) x
pivot
(
max(amount)
for category in (' + #cols + ')
) p '
execute(#query)
drop table temp
Results:
Date ABC DEF GHI
2012-01-01 00:00:00.000 1000.00 NULL NULL
2012-02-01 00:00:00.000 NULL 500.00 800.00
2012-02-10 00:00:00.000 NULL 700.00 NULL
2012-03-01 00:00:00.000 1100.00 NULL NULL
Dynamic SQL PIVOT
Different approach for creating columns string
create table #temp
(
date datetime,
category varchar(3),
amount money
)
insert into #temp values ('1/1/2012', 'ABC', 1000.00)
insert into #temp values ('2/1/2012', 'DEF', 500.00)
insert into #temp values ('2/1/2012', 'GHI', 800.00)
insert into #temp values ('2/10/2012', 'DEF', 700.00)
insert into #temp values ('3/1/2012', 'ABC', 1100.00)
DECLARE #cols AS NVARCHAR(MAX)='';
DECLARE #query AS NVARCHAR(MAX)='';
SELECT #cols = #cols + QUOTENAME(category) + ',' FROM (select distinct category from #temp ) as tmp
select #cols = substring(#cols, 0, len(#cols)) --trim "," at end
set #query =
'SELECT * from
(
select date, amount, category from #temp
) src
pivot
(
max(amount) for category in (' + #cols + ')
) piv'
execute(#query)
drop table #temp
Result
date ABC DEF GHI
2012-01-01 00:00:00.000 1000.00 NULL NULL
2012-02-01 00:00:00.000 NULL 500.00 800.00
2012-02-10 00:00:00.000 NULL 700.00 NULL
2012-03-01 00:00:00.000 1100.00 NULL NULL
I know this question is older but I was looking thru the answers and thought that I might be able to expand on the "dynamic" portion of the problem and possibly help someone out.
First and foremost I built this solution to solve a problem a couple of coworkers were having with inconstant and large data sets needing to be pivoted quickly.
This solution requires the creation of a stored procedure so if that is out of the question for your needs please stop reading now.
This procedure is going to take in the key variables of a pivot statement to dynamically create pivot statements for varying tables, column names and aggregates. The Static column is used as the group by / identity column for the pivot(this can be stripped out of the code if not necessary but is pretty common in pivot statements and was necessary to solve the original issue), the pivot column is where the end resultant column names will be generated from, and the value column is what the aggregate will be applied to. The Table parameter is the name of the table including the schema (schema.tablename) this portion of the code could use some love because it is not as clean as I would like it to be. It worked for me because my usage was not publicly facing and sql injection was not a concern. The Aggregate parameter will accept any standard sql aggregate 'AVG', 'SUM', 'MAX' etc. The code also defaults to MAX as an aggregate this is not necessary but the audience this was originally built for did not understand pivots and were typically using max as an aggregate.
Lets start with the code to create the stored procedure. This code should work in all versions of SSMS 2005 and above but I have not tested it in 2005 or 2016 but I can not see why it would not work.
create PROCEDURE [dbo].[USP_DYNAMIC_PIVOT]
(
#STATIC_COLUMN VARCHAR(255),
#PIVOT_COLUMN VARCHAR(255),
#VALUE_COLUMN VARCHAR(255),
#TABLE VARCHAR(255),
#AGGREGATE VARCHAR(20) = null
)
AS
BEGIN
SET NOCOUNT ON;
declare #AVAIABLE_TO_PIVOT NVARCHAR(MAX),
#SQLSTRING NVARCHAR(MAX),
#PIVOT_SQL_STRING NVARCHAR(MAX),
#TEMPVARCOLUMNS NVARCHAR(MAX),
#TABLESQL NVARCHAR(MAX)
if isnull(#AGGREGATE,'') = ''
begin
SET #AGGREGATE = 'MAX'
end
SET #PIVOT_SQL_STRING = 'SELECT top 1 STUFF((SELECT distinct '', '' + CAST(''[''+CONVERT(VARCHAR,'+ #PIVOT_COLUMN+')+'']'' AS VARCHAR(50)) [text()]
FROM '+#TABLE+'
WHERE ISNULL('+#PIVOT_COLUMN+','''') <> ''''
FOR XML PATH(''''), TYPE)
.value(''.'',''NVARCHAR(MAX)''),1,2,'' '') as PIVOT_VALUES
from '+#TABLE+' ma
ORDER BY ' + #PIVOT_COLUMN + ''
declare #TAB AS TABLE(COL NVARCHAR(MAX) )
INSERT INTO #TAB EXEC SP_EXECUTESQL #PIVOT_SQL_STRING, #AVAIABLE_TO_PIVOT
SET #AVAIABLE_TO_PIVOT = (SELECT * FROM #TAB)
SET #TEMPVARCOLUMNS = (SELECT replace(#AVAIABLE_TO_PIVOT,',',' nvarchar(255) null,') + ' nvarchar(255) null')
SET #SQLSTRING = 'DECLARE #RETURN_TABLE TABLE ('+#STATIC_COLUMN+' NVARCHAR(255) NULL,'+#TEMPVARCOLUMNS+')
INSERT INTO #RETURN_TABLE('+#STATIC_COLUMN+','+#AVAIABLE_TO_PIVOT+')
select * from (
SELECT ' + #STATIC_COLUMN + ' , ' + #PIVOT_COLUMN + ', ' + #VALUE_COLUMN + ' FROM '+#TABLE+' ) a
PIVOT
(
'+#AGGREGATE+'('+#VALUE_COLUMN+')
FOR '+#PIVOT_COLUMN+' IN ('+#AVAIABLE_TO_PIVOT+')
) piv
SELECT * FROM #RETURN_TABLE'
EXEC SP_EXECUTESQL #SQLSTRING
END
Next we will get our data ready for the example. I have taken the data example from the accepted answer with the addition of a couple of data elements to use in this proof of concept to show the varied outputs of the aggregate change.
create table temp
(
date datetime,
category varchar(3),
amount money
)
insert into temp values ('1/1/2012', 'ABC', 1000.00)
insert into temp values ('1/1/2012', 'ABC', 2000.00) -- added
insert into temp values ('2/1/2012', 'DEF', 500.00)
insert into temp values ('2/1/2012', 'DEF', 1500.00) -- added
insert into temp values ('2/1/2012', 'GHI', 800.00)
insert into temp values ('2/10/2012', 'DEF', 700.00)
insert into temp values ('2/10/2012', 'DEF', 800.00) -- addded
insert into temp values ('3/1/2012', 'ABC', 1100.00)
The following examples show the varied execution statements showing the varied aggregates as a simple example. I did not opt to change the static, pivot, and value columns to keep the example simple. You should be able to just copy and paste the code to start messing with it yourself
exec [dbo].[USP_DYNAMIC_PIVOT] 'date','category','amount','dbo.temp','sum'
exec [dbo].[USP_DYNAMIC_PIVOT] 'date','category','amount','dbo.temp','max'
exec [dbo].[USP_DYNAMIC_PIVOT] 'date','category','amount','dbo.temp','avg'
exec [dbo].[USP_DYNAMIC_PIVOT] 'date','category','amount','dbo.temp','min'
This execution returns the following data sets respectively.
Updated version for SQL Server 2017 using STRING_AGG function to construct the pivot column list:
create table temp
(
date datetime,
category varchar(3),
amount money
);
insert into temp values ('20120101', 'ABC', 1000.00);
insert into temp values ('20120201', 'DEF', 500.00);
insert into temp values ('20120201', 'GHI', 800.00);
insert into temp values ('20120210', 'DEF', 700.00);
insert into temp values ('20120301', 'ABC', 1100.00);
DECLARE #cols AS NVARCHAR(MAX),
#query AS NVARCHAR(MAX);
SET #cols = (SELECT STRING_AGG(category,',') FROM (SELECT DISTINCT category FROM temp WHERE category IS NOT NULL)t);
set #query = 'SELECT date, ' + #cols + ' from
(
select date
, amount
, category
from temp
) x
pivot
(
max(amount)
for category in (' + #cols + ')
) p ';
execute(#query);
drop table temp;
There's my solution cleaning up the unnecesary null values
DECLARE #cols AS NVARCHAR(MAX),
#maxcols AS NVARCHAR(MAX),
#query AS NVARCHAR(MAX)
select #cols = STUFF((SELECT ',' + QUOTENAME(CodigoFormaPago)
from PO_FormasPago
order by CodigoFormaPago
FOR XML PATH(''), TYPE
).value('.', 'NVARCHAR(MAX)')
,1,1,'')
select #maxcols = STUFF((SELECT ',MAX(' + QUOTENAME(CodigoFormaPago) + ') as ' + QUOTENAME(CodigoFormaPago)
from PO_FormasPago
order by CodigoFormaPago
FOR XML PATH(''), TYPE
).value('.', 'NVARCHAR(MAX)')
,1,1,'')
set #query = 'SELECT CodigoProducto, DenominacionProducto, ' + #maxcols + '
FROM
(
SELECT
CodigoProducto, DenominacionProducto,
' + #cols + ' from
(
SELECT
p.CodigoProducto as CodigoProducto,
p.DenominacionProducto as DenominacionProducto,
fpp.CantidadCuotas as CantidadCuotas,
fpp.IdFormaPago as IdFormaPago,
fp.CodigoFormaPago as CodigoFormaPago
FROM
PR_Producto p
LEFT JOIN PR_FormasPagoProducto fpp
ON fpp.IdProducto = p.IdProducto
LEFT JOIN PO_FormasPago fp
ON fpp.IdFormaPago = fp.IdFormaPago
) xp
pivot
(
MAX(CantidadCuotas)
for CodigoFormaPago in (' + #cols + ')
) p
) xx
GROUP BY CodigoProducto, DenominacionProducto'
t #query;
execute(#query);
The below code provides the results which replaces NULL to zero in the output.
Table creation and data insertion:
create table test_table
(
date nvarchar(10),
category char(3),
amount money
)
insert into test_table values ('1/1/2012','ABC',1000.00)
insert into test_table values ('2/1/2012','DEF',500.00)
insert into test_table values ('2/1/2012','GHI',800.00)
insert into test_table values ('2/10/2012','DEF',700.00)
insert into test_table values ('3/1/2012','ABC',1100.00)
Query to generate the exact results which also replaces NULL with zeros:
DECLARE #DynamicPivotQuery AS NVARCHAR(MAX),
#PivotColumnNames AS NVARCHAR(MAX),
#PivotSelectColumnNames AS NVARCHAR(MAX)
--Get distinct values of the PIVOT Column
SELECT #PivotColumnNames= ISNULL(#PivotColumnNames + ',','')
+ QUOTENAME(category)
FROM (SELECT DISTINCT category FROM test_table) AS cat
--Get distinct values of the PIVOT Column with isnull
SELECT #PivotSelectColumnNames
= ISNULL(#PivotSelectColumnNames + ',','')
+ 'ISNULL(' + QUOTENAME(category) + ', 0) AS '
+ QUOTENAME(category)
FROM (SELECT DISTINCT category FROM test_table) AS cat
--Prepare the PIVOT query using the dynamic
SET #DynamicPivotQuery =
N'SELECT date, ' + #PivotSelectColumnNames + '
FROM test_table
pivot(sum(amount) for category in (' + #PivotColumnNames + ')) as pvt';
--Execute the Dynamic Pivot Query
EXEC sp_executesql #DynamicPivotQuery
OUTPUT :
A version of Taryn's answer with performance improvements:
Data
CREATE TABLE dbo.Temp
(
[date] datetime NOT NULL,
category nchar(3) NOT NULL,
amount money NOT NULL,
INDEX [CX dbo.Temp date] CLUSTERED ([date]),
INDEX [IX dbo.Temp category] NONCLUSTERED (category)
);
INSERT dbo.Temp
([date], category, amount)
VALUES
({D '2012-01-01'}, N'ABC', $1000.00),
({D '2012-01-02'}, N'DEF', $500.00),
({D '2012-01-02'}, N'GHI', $800.00),
({D '2012-02-10'}, N'DEF', $700.00),
({D '2012-03-01'}, N'ABC', $1100.00);
Dynamic pivot
DECLARE
#Delimiter nvarchar(4000) = N',',
#DelimiterLength bigint,
#Columns nvarchar(max),
#Query nvarchar(max);
SET #DelimiterLength = LEN(REPLACE(#Delimiter, SPACE(1), N'#'));
-- Before SQL Server 2017
SET #Columns =
STUFF
(
(
SELECT
[text()] = #Delimiter,
[text()] = QUOTENAME(T.category)
FROM dbo.Temp AS T
WHERE T.category IS NOT NULL
GROUP BY T.category
ORDER BY T.category
FOR XML PATH (''), TYPE
)
.value(N'text()[1]', N'nvarchar(max)'),
1, #DelimiterLength, SPACE(0)
);
-- Alternative for SQL Server 2017+ and database compatibility level 110+
SELECT #Columns =
STRING_AGG(CONVERT(nvarchar(max), QUOTENAME(T.category)), N',')
WITHIN GROUP (ORDER BY T.category)
FROM
(
SELECT T2.category
FROM dbo.Temp AS T2
WHERE T2.category IS NOT NULL
GROUP BY T2.category
) AS T;
IF #Columns IS NOT NULL
BEGIN
SET #Query =
N'SELECT [date], ' +
#Columns +
N'
FROM
(
SELECT [date], amount, category
FROM dbo.Temp
) AS S
PIVOT
(
MAX(amount)
FOR category IN (' +
#Columns +
N')
) AS P;';
EXECUTE sys.sp_executesql #Query;
END;
Execution plans
Results
date
ABC
DEF
GHI
2012-01-01 00:00:00.000
1000.00
NULL
NULL
2012-01-02 00:00:00.000
NULL
500.00
800.00
2012-02-10 00:00:00.000
NULL
700.00
NULL
2012-03-01 00:00:00.000
1100.00
NULL
NULL
CREATE TABLE #PivotExample(
[ID] [nvarchar](50) NULL,
[Description] [nvarchar](50) NULL,
[ClientId] [smallint] NOT NULL,
)
GO
INSERT #PivotExample ([ID],[Description], [ClientId]) VALUES ('ACI1','ACI1Desc1',1008)
INSERT #PivotExample ([ID],[Description], [ClientId]) VALUES ('ACI1','ACI1Desc2',2000)
INSERT #PivotExample ([ID],[Description], [ClientId]) VALUES ('ACI1','ACI1Desc3',3000)
INSERT #PivotExample ([ID],[Description], [ClientId]) VALUES ('ACI1','ACI1Desc4',4000)
INSERT #PivotExample ([ID],[Description], [ClientId]) VALUES ('ACI2','ACI2Desc1',5000)
INSERT #PivotExample ([ID],[Description], [ClientId]) VALUES ('ACI2','ACI2Desc2',6000)
INSERT #PivotExample ([ID],[Description], [ClientId]) VALUES ('ACI2','ACI2Desc3', 7000)
SELECT * FROM #PivotExample
--Declare necessary variables
DECLARE #SQLQuery AS NVARCHAR(MAX)
DECLARE #PivotColumns AS NVARCHAR(MAX)
--Get unique values of pivot column
SELECT #PivotColumns= COALESCE(#PivotColumns + ',','') + QUOTENAME([Description])
FROM (SELECT DISTINCT [Description] FROM [dbo].#PivotExample) AS PivotExample
--SELECT #PivotColumns
--Create the dynamic query with all the values for
--pivot column at runtime
SET #SQLQuery =
N' -- Your pivoted result comes here
SELECT ID, ' + #PivotColumns + '
FROM
(
-- Source table should in a inner query
SELECT ID,[Description],[ClientId]
FROM #PivotExample
)AS P
PIVOT
(
-- Select the values from derived table P
SUM(ClientId)
FOR [Description] IN (' + #PivotColumns + ')
)AS PVTTable'
--SELECT #SQLQuery
--Execute dynamic query
EXEC sp_executesql #SQLQuery
Drop table #PivotExample
Fully generic way that will work in non-traditional MS SQL environments (e.g. Azure Synapse Analytics Serverless SQL Pools) - it's in a SPROC but no need to use as such...
-- DROP PROCEDURE IF EXISTS
if object_id('dbo.usp_generic_pivot') is not null
DROP PROCEDURE dbo.usp_generic_pivot
GO;
CREATE PROCEDURE dbo.usp_generic_pivot (
#source NVARCHAR (100), -- table or view object name
#pivotCol NVARCHAR (100), -- the column to pivot
#pivotAggCol NVARCHAR (100), -- the column with the values for the pivot
#pivotAggFunc NVARCHAR (20), -- the aggregate function to apply to those values
#leadCols NVARCHAR (100) -- comma seprated list of other columns to keep and order by
)
AS
BEGIN
DECLARE #pivotedColumns NVARCHAR(MAX)
DECLARE #tsql NVARCHAR(MAX)
SET #tsql = CONCAT('SELECT #pivotedColumns = STRING_AGG(qname, '','') FROM (SELECT DISTINCT QUOTENAME(', #pivotCol,') AS qname FROM ',#source, ') AS qnames')
EXEC sp_executesql #tsql, N'#pivotedColumns nvarchar(max) out', #pivotedColumns out
SET #tsql = CONCAT ( 'SELECT ', #leadCols, ',', #pivotedColumns,' FROM ',' ( SELECT ',#leadCols,',',
#pivotAggCol,',', #pivotCol, ' FROM ', #source, ') as t ',
' PIVOT (', #pivotAggFunc, '(', #pivotAggCol, ')',' FOR ', #pivotCol,
' IN (', #pivotedColumns,')) as pvt ',' ORDER BY ', #leadCols)
EXEC (#tsql)
END
GO;
-- TEST EXAMPLE
EXEC dbo.usp_generic_pivot
#source = '[your_db].[dbo].[form_answers]',
#pivotCol = 'question',
#pivotAggCol = 'answer',
#pivotAggFunc = 'MAX',
#leadCols = 'candidate_id, candidate_name'
GO;

t-sql select column names from all tables where there is at least 1 null value

Context: I am exploring a new database (in MS SQL server), and I want to know for each table, all columns that have null values.
I.e. result would look something like this:
table column nulls
Tbl1 Col1 8
I have found this code here on stackoverflow, that makes a table of table-columnnames - without the WHERE statement which is my addition.
I tried to filter for nulls in WHERE statement, but then the table ends up empty, and I see why - i am checking if the col name is actually null, and not its contents. But can't figure out how to proceed.
select schema_name(tab.schema_id) as schema_name,
tab.name as table_name,
col.name as column_name
from sys.tables as tab
inner join sys.columns as col
on tab.object_id = col.object_id
left join sys.types as t
on col.user_type_id = t.user_type_id
-- in this where statement, I am trying to filter for nulls, but i get an empty result. and i know there are nulls
where col.name is null
order by schema_name, table_name, column_id
I also tried this (see 4th line):
select schema_name(tab.schema_id) as schema_name,
tab.name as table_name,
col.name as column_name
,(select count(*) from tab.name where col.name is null) as countnulls
from sys.tables as tab
inner join sys.columns as col
on tab.object_id = col.object_id
left join sys.types as t
on col.user_type_id = t.user_type_id
order by schema_name, table_name, column_id
the last one returns an error "Invalid object name 'tab.name'."
column name can't be null but if you mean nullable column (column that accept null value) that has null value at least so you can use following statement:
declare #schema varchar(255), #table varchar(255), #col varchar(255), #cmd varchar(max)
DECLARE getinfo cursor for
SELECT schema_name(tab.schema_id) as schema_name,tab.name , col.name from sys.tables as tab
inner join sys.columns as col on tab.object_id = col.object_id
where col.is_nullable =1
order by schema_name(tab.schema_id),tab.name,col.name
OPEN getinfo
FETCH NEXT FROM getinfo into #schema,#table,#col
WHILE ##FETCH_STATUS = 0
BEGIN
set #schema = QUOTENAME(#schema)
set #table = QUOTENAME(#table)
set #col = QUOTENAME(#col)
SELECT #cmd = 'IF EXISTS (SELECT 1 FROM '+ #schema +'.'+ #table +' WHERE ' + #col + ' IS NULL) BEGIN SELECT '''+#schema+''' as schemaName, '''+#table+''' as tablename, '''+#col+''' as columnName, * FROM '+ #schema +'.'+ #table +' WHERE ' + #col + ' IS NULL end'
EXEC(#cmd)
FETCH NEXT FROM getinfo into #schema,#table,#col
END
CLOSE getinfo
DEALLOCATE getinfo
that use cursor on all nullable columns in every table in the Database then check if this column has at least one null value if yes will select schema Name, table name, column name and all records that has null value in this column
but if you want to get only count of nulls you can use the following statement:
declare #schema varchar(255), #table varchar(255), #col varchar(255), #cmd varchar(max)
DECLARE getinfo cursor for
SELECT schema_name(tab.schema_id) as schema_name,tab.name , col.name from sys.tables as tab
inner join sys.columns as col on tab.object_id = col.object_id
where col.is_nullable =1
order by schema_name(tab.schema_id),tab.name,col.name
OPEN getinfo
FETCH NEXT FROM getinfo into #schema,#table,#col
WHILE ##FETCH_STATUS = 0
BEGIN
set #schema = QUOTENAME(#schema)
set #table = QUOTENAME(#table)
set #col = QUOTENAME(#col)
SELECT #cmd = 'IF EXISTS (SELECT 1 FROM '+ #schema +'.'+ #table +' WHERE ' + #col + ' IS NULL) BEGIN SELECT '''+#schema+''' as schemaName, '''+#table+''' as tablename, '''+#col+''' as columnName, count(*) as nulls FROM '+ #schema +'.'+ #table +' WHERE ' + #col + ' IS NULL end'
EXEC(#cmd)
FETCH NEXT FROM getinfo into #schema,#table,#col
END
that use cursor on all nullable columns in every table in the Database then check if this column has at least one null value if yes will select schema Name, table name, column name and count all records that has null value in this column

Dynamic sql to select a specific value from a column using Joins

I am attempting to use dynamic sql to select a value based on a field. I have a table of field references I am using for the column names. What I am having troubles with is of course the dynamic sql. My return result is (SELECT ecoa_code FROM CRA_METRO2_BASE WHERE id = 568470) for example. But I really want it to run that select statement. Executing only returns the last row.
DECLARE #BaseCol VARCHAR(250)
SELECT
#BaseCol = '(SELECT ' + FR_base.field_name + ' FROM CRA_METRO2_BASE WHERE id = ' + CONVERT(VARCHAR(15), B.id) + ')'
FROM CRA_INNOVIS_AUDIT_ERROR_FIELDS E
LEFT JOIN CRA_METRO2_BASE B
ON B.id = E.base_id
LEFT JOIN CRA_METRO2_FIELD_REF FR_base
ON FR_base.id = E.base_field_ref
WHERE E.audit_id = #audit_id
EXEC(#BaseCol)
I am not sure I understand your premises correctly and without a mock-up...; so please take this answer with a grain of salt:)
DECLARE #sqlstring VARCHAR(MAX)
SELECT #sqlstring = 'SELECT ' + a.column_name + ' FROM ' + a.[Schema] + '.' + a.table_name
from (
SELECT TOP 1 T.object_id,OBJECT_SCHEMA_NAME(T.[object_id],DB_ID()) AS [Schema],
T.[name] AS [table_name], AC.[name] AS [column_name]
--,TY.[name] AS system_data_type
, AC.[max_length],
AC.[precision], AC.[scale], AC.[is_nullable], AC.[is_ansi_padded]
,AC.column_id
FROM sys.tables AS T
INNER JOIN sys.[all_columns] AC ON T.[object_id] = AC.[object_id]
) a
SELECT #sqlstring
EXEC(#sqlstring)
So I used my above query and now I am using a CTE to build my basic result list. And in my cte I create update statements which then are all put into a temp table.
I extract the update statements and execute them on the temp table. And walla, I have my results!
IF(OBJECT_ID('tempdb..#Temp') IS NOT NULL)
BEGIN
DROP TABLE #Temp
END
CREATE TABLE #Temp
(
usb_data VARCHAR(500),
cra_data VARCHAR(500)
);
WITH ErrorFieldsCTE(id, field, usb_data, cra_data, AUD, SOR, acceptable_variance, is_variance_known, is_reviewed)
AS(
SELECT
+ 'UPDATE #TEMP SET usb_data = (SELECT ' + FR_base.field_name +' FROM CRA_METRO2_BASE WHERE id = '+ CONVERT(VARCHAR(25), B.id) +' ) WHERE id = ' + CONVERT(VARCHAR(15), E.id) + ' ' [usb_data],
+ 'UPDATE #TEMP SET cra_data = (SELECT ' + FR_audit.field_name +' FROM CRA_INNOVIS_INBOUND_AUDIT_INFORMATION WHERE id = '+ CONVERT(VARCHAR(25), A.id) +') WHERE id = ' + CONVERT(VARCHAR(15), E.id) + ' ' [cra_data]
FROM CRA_INNOVIS_AUDIT_ERROR_FIELDS E
LEFT JOIN CRA_METRO2_BASE B
ON B.id = E.base_id
LEFT JOIN CRA_INNOVIS_INBOUND_AUDIT_INFORMATION A
ON A.id = E.audit_id
LEFT JOIN CRA_METRO2_FIELD_REF FR_audit
ON FR_audit.id = E.audit_field_ref
LEFT JOIN CRA_METRO2_FIELD_REF FR_base
ON FR_base.id = E.base_field_ref
WHERE E.audit_id = #audit_id
)
INSERT INTO #Temp
SELECT
id, field, usb_data, cra_data, AUD, SOR, acceptable_variance, is_variance_known, is_reviewed
FROM ErrorFieldsCTE
SELECT -- extract query
#usb_data += usb_data + '',
#cra_data += cra_data + ''
FROM #Temp
EXEC(#usb_data) -- updating temp table, selects usb-data
EXEC(#cra_data) -- updating temp table, selects cra-data
SELECT -- return to web
id, field, usb_data, cra_data, AUD, SOR, acceptable_variance, is_variance_known, is_reviewed
FROM #Temp
IF(OBJECT_ID('tempdb..#Temp') IS NOT NULL)
Begin
Drop Table #Temp
End

Postgresql, select a "fake" row

In Postgres 8.4 or higher, what is the most efficient way to get a row of data populated by defaults without actually creating the row. Eg, as a transaction (pseudocode):
create table "mytable"
(
id serial PRIMARY KEY NOT NULL,
parent_id integer NOT NULL DEFAULT 1,
random_id integer NOT NULL DEFAULT random(),
)
begin transaction
fake_row = insert into mytable (id) values (0) returning *;
delete from mytable where id=0;
return fake_row;
end transaction
Basically I'd expect a query with a single row where parent_id is 1 and random_id is a random number (or other function return value) but I don't want this record to persist in the table or impact on the primary key sequence serial_id_seq.
My options seem to be using a transaction like above or creating views which are copies of the table with the fake row added but I don't know all the pros and cons of each or whether a better way exists.
I'm looking for an answer that assumes no prior knowledge of the datatypes or default values of any column except id or the number or ordering of the columns. Only the table name will be known and that a record with id 0 should not exist in the table.
In the past I created the fake record 0 as a permanent record but I've come to consider this record a type of pollution (since I typically have to filter it out of future queries).
You can copy the table definition and defaults to the temp table with:
CREATE TEMP TABLE table_name_rt (LIKE table_name INCLUDING DEFAULTS);
And use this temp table to generate dummy rows. Such table will be dropped at the end of the session (or transaction) and will only be visible to current session.
You can query the catalog and build a dynamic query
Say we have this table:
create table test10(
id serial primary key,
first_name varchar( 100 ),
last_name varchar( 100 ) default 'Tom',
age int not null default 38,
salary float default 100.22
);
When you run following query:
SELECT string_agg( txt, ' ' order by id )
FROM (
select 1 id, 'SELECT ' txt
union all
select 2, -9999 || ' as id '
union all
select 3, ', '
|| coalesce( column_default, 'null'||'::'||c.data_type )
|| ' as ' || c.column_name
from information_schema.columns c
where table_schema = 'public'
and table_name = 'test10'
and ordinal_position > 1
) xx
;
you will get this sting as a result:
"SELECT -9999 as id , null::character varying as first_name ,
'Tom'::character varying as last_name , 38 as age , 100.22 as salary"
then execute this query and you will get the "phantom row".
We can build a function that build and excecutes the query and return our row as a result:
CREATE OR REPLACE FUNCTION get_phantom_rec (p_i test10.id%type )
returns test10 as $$
DECLARE
v_sql text;
myrow test10%rowtype;
begin
SELECT string_agg( txt, ' ' order by id )
INTO v_sql
FROM (
select 1 id, 'SELECT ' txt
union all
select 2, p_i || ' as id '
union all
select 3, ', '
|| coalesce( column_default, 'null'||'::'||c.data_type )
|| ' as ' || c.column_name
from information_schema.columns c
where table_schema = 'public'
and table_name = 'test10'
and ordinal_position > 1
) xx
;
EXECUTE v_sql INTO myrow;
RETURN myrow;
END$$ LANGUAGE plpgsql ;
and then this simple query gives you what you want:
select * from get_phantom_rec ( -9999 );
id | first_name | last_name | age | salary
-------+------------+-----------+-----+--------
-9999 | | Tom | 38 | 100.22
I would just select the fake values as literals:
select 1 id, 1 parent_id, 1 user_id
The returned row will be (virtually) indistinguishable from a real row.
To get the values from the catalog:
select
0 as id, -- special case for serial type, just return 0
(select column_default::int -- Cast to int, because we know the column is int
from INFORMATION_SCHEMA.COLUMNS
where table_name = 'mytable'
and column_name = 'parent_id') as parent_id,
(select column_default::int -- Cast to int, because we know the column is int
from INFORMATION_SCHEMA.COLUMNS
where table_name = 'mytable'
and column_name = 'user_id') as user_id;
Note that you must know what the columns are and their type, but this is reasonable. If you change the table schema (except default value), you would need to tweak the query.
See the above as a SQLFiddle.

How to add a column in TSQL after a specific column?

I have a table:
MyTable
ID
FieldA
FieldB
I want to alter the table and add a column so it looks like:
MyTable
ID
NewField
FieldA
FieldB
In MySQL I would so a:
ALTER TABLE MyTable ADD COLUMN NewField int NULL AFTER ID;
One line, nice, simple, works great. How do I do this in Microsoft's world?
Unfortunately you can't.
If you really want them in that order you'll have to create a new table with the columns in that order and copy data. Or rename columns etc. There is no easy way.
solution:
This will work for tables where there are no dependencies on the changing table which would trigger cascading events. First make sure you can drop the table you want to restructure without any disastrous repercussions. Take a note of all the dependencies and column constraints associated with your table (i.e. triggers, indexes, etc.). You may need to put them back in when you are done.
STEP 1: create the temp table to hold all the records from the table you want to restructure. Do not forget to include the new column.
CREATE TABLE #tmp_myTable
( [new_column] [int] NOT NULL, <-- new column has been inserted here!
[idx] [bigint] NOT NULL,
[name] [nvarchar](30) NOT NULL,
[active] [bit] NOT NULL
)
STEP 2: Make sure all records have been copied over and that the column structure looks the way you want.
SELECT TOP 10 * FROM #tmp_myTable ORDER BY 1 DESC
-- you can do COUNT(*) or anything to make sure you copied all the records
STEP 3: DROP the original table:
DROP TABLE myTable
If you are paranoid about bad things could happen, just rename the original table (instead of dropping it). This way it can be always returned back.
EXEC sp_rename myTable, myTable_Copy
STEP 4: Recreate the table myTable the way you want (should match match the #tmp_myTable table structure)
CREATE TABLE myTable
( [new_column] [int] NOT NULL,
[idx] [bigint] NOT NULL,
[name] [nvarchar](30) NOT NULL,
[active] [bit] NOT NULL
)
-- do not forget any constraints you may need
STEP 5: Copy the all the records from the temp #tmp_myTable table into the new (improved) table myTable.
INSERT INTO myTable ([new_column],[idx],[name],[active])
SELECT [new_column],[idx],[name],[active]
FROM #tmp_myTable
STEP 6: Check if all the data is back in your new, improved table myTable. If yes, clean up after yourself and DROP the temp table #tmp_myTable and the myTable_Copy table if you chose to rename it instead of dropping it.
You should be able to do this if you create the column using the GUI in Management Studio. I believe Management studio is actually completely recreating the table, which is why this appears to happen.
As others have mentioned, the order of columns in a table doesn't matter, and if it does there is something wrong with your code.
In SQL Enterprise Management Studio, open up your table, add the column where you want it, and then -- instead of saving the change -- generate the change script. You can see how it's done in SQL.
In short, what others have said is right. SQL Management studio pulls all your data into a temp table, drops the table, recreates it with columns in the right order, and puts the temp table data back in there. There is no simple syntax for adding a column in a specific position.
/*
Script to change the column order of a table
Note this will create a new table to replace the original table.
WARNING : Original Table could be dropped.
HOWEVER it doesn't copy the triggers or other table properties - just the data
*/
Generate a new table with the columns in the order that you require
Select Column2, Column1, Column3 Into NewTable from OldTable
Delete the original table
Drop Table OldTable;
Rename the new table
EXEC sp_rename 'NewTable', 'OldTable';
In Microsoft SQL Server Management Studio (the admin tool for MSSQL) just go into "design" on a table and drag the column to the new position. Not command line but you can do it.
This is absolutely possible. Although you shouldn't do it unless you know what you are dealing with.
Took me about 2 days to figure it out.
Here is a stored procedure where i enter:
---database name
(schema name is "_" for readability)
---table name
---column
---column data type
(column added is always null, otherwise you won't be able to insert)
---the position of the new column.
Since I'm working with tables from SAM toolkit (and some of them have > 80 columns) , the typical variable won't be able to contain the query. That forces the need of external file. Now be careful where you store that file and who has access on NTFS and network level.
Cheers!
USE [master]
GO
/****** Object: StoredProcedure [SP_Set].[TrasferDataAtColumnLevel] Script Date: 8/27/2014 2:59:30 PM ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE PROCEDURE [SP_Set].[TrasferDataAtColumnLevel]
(
#database varchar(100),
#table varchar(100),
#column varchar(100),
#position int,
#datatype varchar(20)
)
AS
BEGIN
set nocount on
exec ('
declare #oldC varchar(200), #oldCDataType varchar(200), #oldCLen int,#oldCPos int
create table Test ( dummy int)
declare #columns varchar(max) = ''''
declare #columnVars varchar(max) = ''''
declare #columnsDecl varchar(max) = ''''
declare #printVars varchar(max) = ''''
DECLARE MY_CURSOR CURSOR LOCAL STATIC READ_ONLY FORWARD_ONLY FOR
select column_name, data_type, character_maximum_length, ORDINAL_POSITION from ' + #database + '.INFORMATION_SCHEMA.COLUMNS where table_name = ''' + #table + '''
OPEN MY_CURSOR FETCH NEXT FROM MY_CURSOR INTO #oldC, #oldCDataType, #oldCLen, #oldCPos WHILE ##FETCH_STATUS = 0 BEGIN
if(#oldCPos = ' + #position + ')
begin
exec(''alter table Test add [' + #column + '] ' + #datatype + ' null'')
end
if(#oldCDataType != ''timestamp'')
begin
set #columns += #oldC + '' , ''
set #columnVars += ''#'' + #oldC + '' , ''
if(#oldCLen is null)
begin
if(#oldCDataType != ''uniqueidentifier'')
begin
set #printVars += '' print convert('' + #oldCDataType + '',#'' + #oldC + '')''
set #columnsDecl += ''#'' + #oldC + '' '' + #oldCDataType + '', ''
exec(''alter table Test add ['' + #oldC + ''] '' + #oldCDataType + '' null'')
end
else
begin
set #printVars += '' print convert(varchar(50),#'' + #oldC + '')''
set #columnsDecl += ''#'' + #oldC + '' '' + #oldCDataType + '', ''
exec(''alter table Test add ['' + #oldC + ''] '' + #oldCDataType + '' null'')
end
end
else
begin
if(#oldCLen < 0)
begin
set #oldCLen = 4000
end
set #printVars += '' print #'' + #oldC
set #columnsDecl += ''#'' + #oldC + '' '' + #oldCDataType + ''('' + convert(character,#oldCLen) + '') , ''
exec(''alter table Test add ['' + #oldC + ''] '' + #oldCDataType + ''('' + #oldCLen + '') null'')
end
end
if exists (select column_name from INFORMATION_SCHEMA.COLUMNS where table_name = ''Test'' and column_name = ''dummy'')
begin
alter table Test drop column dummy
end
FETCH NEXT FROM MY_CURSOR INTO #oldC, #oldCDataType, #oldCLen, #oldCPos END CLOSE MY_CURSOR DEALLOCATE MY_CURSOR
set #columns = reverse(substring(reverse(#columns), charindex('','',reverse(#columns)) +1, len(#columns)))
set #columnVars = reverse(substring(reverse(#columnVars), charindex('','',reverse(#columnVars)) +1, len(#columnVars)))
set #columnsDecl = reverse(substring(reverse(#columnsDecl), charindex('','',reverse(#columnsDecl)) +1, len(#columnsDecl)))
set #columns = replace(replace(REPLACE(#columns, '' '', ''''), char(9) + char(9),'' ''), char(9), '''')
set #columnVars = replace(replace(REPLACE(#columnVars, '' '', ''''), char(9) + char(9),'' ''), char(9), '''')
set #columnsDecl = replace(replace(REPLACE(#columnsDecl, '' '', ''''), char(9) + char(9),'' ''),char(9), '''')
set #printVars = REVERSE(substring(reverse(#printVars), charindex(''+'',reverse(#printVars))+1, len(#printVars)))
create table query (id int identity(1,1), string varchar(max))
insert into query values (''declare '' + #columnsDecl + ''
DECLARE MY_CURSOR CURSOR LOCAL STATIC READ_ONLY FORWARD_ONLY FOR '')
insert into query values (''select '' + #columns + '' from ' + #database + '._.' + #table + ''')
insert into query values (''OPEN MY_CURSOR FETCH NEXT FROM MY_CURSOR INTO '' + #columnVars + '' WHILE ##FETCH_STATUS = 0 BEGIN '')
insert into query values (#printVars )
insert into query values ( '' insert into Test ('')
insert into query values (#columns)
insert into query values ( '') values ( '' + #columnVars + '')'')
insert into query values (''FETCH NEXT FROM MY_CURSOR INTO '' + #columnVars + '' END CLOSE MY_CURSOR DEALLOCATE MY_CURSOR'')
declare #path varchar(100) = ''C:\query.sql''
declare #query varchar(500) = ''bcp "select string from query order by id" queryout '' + #path + '' -t, -c -S '' + ##servername + '' -T''
exec master..xp_cmdshell #query
set #query = ''sqlcmd -S '' + ##servername + '' -i '' + #path
EXEC xp_cmdshell #query
set #query = ''del '' + #path
exec xp_cmdshell #query
drop table ' + #database + '._.' + #table + '
select * into ' + #database + '._.' + #table + ' from Test
drop table query
drop table Test ')
END
Even if the question is old, a more accurate answer about Management Studio would be required.
You can create the column manually or with Management Studio. But Management Studio will require to recreate the table and will result in a time out if you have too much data in it already, avoid unless the table is light.
To change the order of the columns you simply need to move them around in Management Studio. This should not require (Exceptions most likely exists) that Management Studio to recreate the table since it most likely change the ordination of the columns in the table definitions.
I've done it this way on numerous occasion with tables that I could not add columns with the GUI because of the data in them. Then moved the columns around with the GUI of Management Studio and simply saved them.
You will go from an assured time out to a few seconds of waiting.
If you are using the GUI to do this you must deselect the following option allowing the table to be dropped,
Create New Add new Column Table Script ex: [DBName].[dbo].[TableName]_NEW
COPY old table data to new table: INSERT INTO newTable ( col1,col2,...) SELECT col1,col2,... FROM oldTable
Check records old and new are the same:
DROP old table
rename newtable to oldtable
rerun your sp add new colum value
-- 1. Create New Add new Column Table Script
CREATE TABLE newTable
( [new_column] [int] NOT NULL, <-- new column has been inserted here!
[idx] [bigint] NOT NULL,
[name] [nvarchar](30) NOT NULL,
[active] [bit] NOT NULL
)
-- 2. COPY old table data to new table:
INSERT INTO newTable ([new_column],[idx],[name],[active])
SELECT [new_column],[idx],[name],[active]
FROM oldTable
-- 3. Check records old and new are the same:
select sum(cnt) FROM (
SELECT 'table_1' AS table_name, COUNT(*) cnt FROM newTable
UNION
SELECT 'table_2' AS table_name, -COUNT(*) cnt FROM oldTable
) AS cnt_sum
-- 4. DROP old table
DROP TABLE oldTable
-- 5. rename newtable to oldtable
USE [DB_NAME]
EXEC sp_rename newTable, oldTable
You have to rebuild the table. Luckily, the order of the columns doesn't matter at all!
Watch as I magically reorder your columns:
SELECT ID, Newfield, FieldA, FieldB FROM MyTable
Also this has been asked about a bazillion times before.