How to eliminate Nulls, and insert with split delimter - tsql

drop table #temp
Create Table #Temp
(
col1 Varchar(20),
col2 Varchar(20),
Col3 Varchar(50),
col4 Varchar(20)
)
Select * From #Temp
Insert Into #Temp(col1)
Select * From SplitDelimiterString('123,456', ',')
Insert Into #Temp(col2)
Select * From SplitDelimiterString('abc,def', ',')
Insert Into #Temp(Col3)
Select * From SplitDelimiterString('fff,ggg', ',')
Insert Into #Temp(col4)
Select * From SplitDelimiterString('520002,520003', ',')
Select * From #Temp
FYI, SplitDelimiterString is a function.
-- Code for SplitDelimiterString
Create FUNCTION [dbo].[SplitDelimiterString] (#StringWithDelimiter VARCHAR(8000), #Delimiter VARCHAR(8))
RETURNS #ItemTable TABLE (Item VARCHAR(8000))
AS
BEGIN
DECLARE #StartingPosition INT;
DECLARE #ItemInString VARCHAR(8000);
SELECT #StartingPosition = 1;
--Return if string is null or empty
IF LEN(#StringWithDelimiter) = 0 OR #StringWithDelimiter IS NULL RETURN;
WHILE #StartingPosition > 0
BEGIN
--Get starting index of delimiter .. If string
--doesn't contain any delimiter than it will returl 0
SET #StartingPosition = CHARINDEX(#Delimiter,#StringWithDelimiter);
--Get item from string
IF #StartingPosition > 0
SET #ItemInString = SUBSTRING(#StringWithDelimiter,0,#StartingPosition)
ELSE
SET #ItemInString = #StringWithDelimiter;
--If item isn't empty than add to return table
IF( LEN(#ItemInString) > 0)
INSERT INTO #ItemTable(Item) VALUES (#ItemInString);
--Remove inserted item from string
SET #StringWithDelimiter = SUBSTRING(#StringWithDelimiter,#StartingPosition +
LEN(#Delimiter),LEN(#StringWithDelimiter) - #StartingPosition)
--Break loop if string is empty
IF LEN(#StringWithDelimiter) = 0 BREAK;
END
RETURN
END
-- The result set is
Col1 Col2 Col3 Col4
123 NULL NULL NULL
456 NULL NULL NULL
NULL abc NULL NULL
NULL def NULL NULL
NULL NULL fff NULL
NULL NULL ggg NULL
NULL NULL NULL 520002
NULL NULL NULL 520003
-- I need a result set like
-- The result set is
col1 col2 col3 col4
123 abc fff 520002
456 def ggg 520003
Please help.

--- Figured out my self. Thanks to #liebs19 for logic
BEGIN TRAN
Create Table #Temp1
(
RowID int not null identity(1,1) primary key,
col1 Varchar(20),
)
Create Table #Temp2
(
RowID int not null identity(1,1) primary key,
col2 Varchar(20),
)
Create Table #Temp3
(
RowID int not null identity(1,1) primary key,
col3 Varchar(20),
)
Create Table #Temp4
(
RowID int not null identity(1,1) primary key,
col4 Varchar(20),
)
Insert Into #Temp1(col1)
Select * From SplitDelimiterString('123,456', ',')
Insert Into #Temp2(col2)
Select * From SplitDelimiterString('abc,def', ',')
Insert Into #Temp3(Col3)
Select * From SplitDelimiterString('fff,ggg', ',')
Insert Into #Temp4(col4)
Select * From SplitDelimiterString('520002,520003', ',')
Select #Temp1.Col1, #Temp2.col2, #Temp3.Col3, #Temp4.Col4
From #Temp1
Inner Join #Temp2 ON #Temp1.RowID = #Temp2.RowID
Inner Join #Temp3 ON #Temp1.RowID = #Temp3.RowID
Inner Join #Temp4 ON #Temp1.RowID = #Temp4.RowID
ROLLBACK TRAN
-- This is the output finally I am looking for.
col1 col2 col3 col4
123 abc fff 520002
456 def ggg 520003
.

Related

Is it possible to find duplicating records in two columns simultaneously in PostgreSQL?

I have the following database schema (oversimplified):
create sequence partners_partner_id_seq;
create table partners
(
partner_id integer default nextval('partners_partner_id_seq'::regclass) not null primary key,
name varchar(255) default NULL::character varying,
company_id varchar(20) default NULL::character varying,
vat_id varchar(50) default NULL::character varying,
is_deleted boolean default false not null
);
INSERT INTO partners(name, company_id, vat_id) VALUES('test1','1010109191191', 'BG1010109191192');
INSERT INTO partners(name, company_id, vat_id) VALUES('test2','1010109191191', 'BG1010109191192');
INSERT INTO partners(name, company_id, vat_id) VALUES('test3','3214567890102', 'BG1010109191192');
INSERT INTO partners(name, company_id, vat_id) VALUES('test4','9999999999999', 'GE9999999999999');
I am trying to figure out how to return test1, test2 (because the company_id column value duplicates vertically) and test3 (because the vat_id column value duplicates vertically as well).
To put it in other words - I need to find duplicating company_id and vat_id records and group them together, so that test1, test2 and test3 would be together, because they duplicate by company_id and vat_id.
So far I have the following query:
SELECT *
FROM (
SELECT *, LEAD(row, 1) OVER () AS nextrow
FROM (
SELECT *, ROW_NUMBER() OVER (w) AS row
FROM partners
WHERE is_deleted = false
AND ((company_id != '' AND company_id IS NOT null) OR (vat_id != '' AND vat_id IS NOT NULL))
WINDOW w AS (PARTITION BY company_id, vat_id ORDER BY partner_id DESC)
) x
) y
WHERE (row > 1 OR nextrow > 1)
AND is_deleted = false
This successfully shows all company_id duplicates, but does not appear to show vat_id ones - test3 row is missing. Is this possible to be done within one query?
Here is a db-fiddle with the schema, data and predefined query reproducing my result.
You can do this with recursion, but depending on the size of your data you may want to iterate, instead.
The trick is to make the name just another match key instead of treating it differently than the company_id and vat_id:
create table partners (
partner_id integer generated always as identity primary key,
name text,
company_id text,
vat_id text,
is_deleted boolean not null default false
);
insert into partners (name, company_id, vat_id) values
('test1','1010109191191', 'BG1010109191192'),
('test2','1010109191191', 'BG1010109191192'),
('test3','3214567890102', 'BG1010109191192'),
('test4','9999999999999', 'GE9999999999999'),
('test5','3214567890102', 'BG8888888888888'),
('test6','2983489023408', 'BG8888888888888')
;
I added a couple of test cases and left in the lone partner.
with recursive keys as (
select partner_id,
array['n_'||name, 'c_'||company_id, 'v_'||vat_id] as matcher,
array[partner_id] as matchlist,
1 as size
from partners
), matchers as (
select *
from keys
union all
select p.partner_id, c.matcher,
p.matchlist||c.partner_id as matchlist,
p.size + 1
from matchers p
join keys c
on c.matcher && p.matcher
and not p.matchlist #> array[c.partner_id]
), largest as (
select distinct sort(matchlist) as matchlist
from matchers m
where not exists (select 1
from matchers
where matchlist #> m.matchlist
and size > m.size)
-- and size > 1
)
select *
from largest
;
matchlist
{1,2,3,5,6}
{4}
fiddle
EDIT UPDATE
Since recursion did not perform, here is an iterative example in plpgsql that uses a temporary table:
create temporary table match1 (
partner_id int not null,
group_id int not null,
matchkey uuid not null
);
create index on match1 (matchkey);
create index on match1 (group_id);
insert into match1
select partner_id, partner_id, md5('n_'||name)::uuid from partners
union all
select partner_id, partner_id, md5('c_'||company_id)::uuid from partners
union all
select partner_id, partner_id, md5('v_'||vat_id)::uuid from partners;
do $$
declare _cnt bigint;
begin
loop
with consolidate as (
select group_id,
min(group_id) over (partition by matchkey) as new_group_id
from match1
), minimize as (
select group_id, min(new_group_id) as new_group_id
from consolidate
group by group_id
), doupdate as (
update match1
set group_id = m.new_group_id
from minimize m
where m.group_id = match1.group_id
and m.new_group_id != match1.group_id
returning *
)
select count(*) into _cnt from doupdate;
if _cnt = 0 then
exit;
end if;
end loop;
end;
$$;
updated fiddle

PostgreSQL grouping

I would like to group values according to values in over columns.
This is an example:
I would like to get the output:
{{-30,-50,20},{-20,30,60},{-30,NULL or other value, 20}}
I managed to arrive to:
SELECT array_agg("val")
FROM my_table
WHERE "t_id" = 1
GROUP BY "m_id";
{{-30,-50,20},{-20,30,60},{-30,20}}
What would be the best approach?
create table my_table (
t_id int,
m_id int,
s_id int,
val int
);
insert into my_table (t_id, m_id, s_id, val) values
(1,1,1,-30),
(1,1,2,-50),
(1,1,3,20),
(1,2,1,-20),
(1,2,2,30),
(1,2,3,60),
(1,3,1,-30),
(1,3,3,20);
select array_agg(val order by s_id)
from
my_table t
right join
(
(
select distinct t_id, m_id
from my_table
) a
cross join
(
select distinct s_id
from my_table
) b
) s using (t_id, m_id, s_id)
where t_id = 1
group by m_id
order by m_id
;
array_agg
---------------
{-30,-50,20}
{-20,30,60}
{-30,NULL,20}

How to write One query for multiple groupings?

In the queries below, how can I make just one query that will give me the results, instead of making copies with diff groupings and unioning them?
If possible.
Thanks in advance!!
`create table #temp1 (col1 varchar(50), col2 varchar(50), col3 varchar(50), col4 varchar(50), col5 varchar(50), sumit int)
insert into #temp1 values('AEAMS','CE Europe', 'Belarus', 'Govt', 'Int Fed Gvt', 1)
insert into #temp1 values('AEAMS','CE Europe', 'Belarus', 'Govt', 'Public Lib', 1)
insert into #temp1 values('AEDS','Japan', 'Japan C', 'Acad', 'CollUnive', 1)
insert into #temp1 values('AEDS','Japan', 'Japan F', 'Acad', 'Med', 1)
insert into #temp1 values('A- Regular Databases','UK and Ireland', 'Ireland', 'School', 'HIGH SCHOOL', 1)
Select col1 CC, null GM, null Terr, null Mkt, null Seg, sum(sumit) SS
from #temp1
group by col1
Union
Select col1 CC, col2 GM, null Terr, null Mkt, null Seg, sum(sumit) SS
from #temp1
group by col1, col2
Union
Select col1 CC, col2 GM, col3 Terr, null Mkt, null Seg, sum(sumit) SS
from #temp1
group by col1, col2, col3
Union
Select col1 CC, col2 GM, col3 Terr, col4 Mkt, null Seg, sum(sumit) SS
from #temp1
group by col1, col2, col3, col4, col5
Try using WITH ROLLUP:
Select col1 CC, col2 GM, col3 Terr, col4 Mkt, null Seg, sum(sumit) SS
from #temp1
group by col1, col2, col3, col4, col5
with rollup
SQL Fiddle Example

in T-SQL, is it possible to find names of columns containing NULL in a given row (without knowing all column names)?

Is it possible in T-SQL to write a proper query reflecting this pseudo-code:
SELECT {primary_key}, {column_name}
FROM {table}
WHERE {any column_name value} is NULL
i.e. without referencing each column-name explicitly.
Sounds simple enough but I've searched pretty extensively and found nothing.
You have to use dynamic sql to solve that problem. I have demonstrated how it could be done.
With this sql you can pick a table and check the row with id = 1 for columns being null and primary keys. I included a test table at the bottom of the script. Code will not display anything if there is not primary keys and no columns being null.
DECLARE #table_name VARCHAR(20)
DECLARE #chosencolumn VARCHAR(20)
DECLARE #sqlstring VARCHAR(MAX)
DECLARE #sqlstring2 varchar(100)
DECLARE #text VARCHAR(8000)
DECLARE #t TABLE (col1 VARCHAR(30), dummy INT)
SET #table_name = 'test_table' -- replace with your tablename if you want
SET #chosencolumn = 'ID=1' -- replace with criteria for selected row
SELECT #sqlstring = COALESCE(#sqlstring, '') + 'UNION ALL SELECT '',''''NULL '''' '' + '''+t1.column_name+''', 1000 ordinal_position FROM ['+#table_name+'] WHERE [' +t1.column_name+ '] is null and ' +#chosencolumn+ ' '
FROM INFORMATION_SCHEMA.COLUMNS t1
LEFT JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE t2
ON t1.column_name = t2.column_name
AND t1.table_name = t2.table_name
AND t1.table_schema = t2.table_schema
WHERE t1.table_name = #table_name
AND t2.column_name is null
SET #sqlstring = stuff('UNION ALL SELECT '',''''PRIMARY KEY'''' ''+ column_name + '' '' col1, ordinal_position
FROM INFORMATION_SCHEMA.KEY_COLUMN_USAGE
WHERE table_name = ''' + #table_name+ '''' + #sqlstring, 1, 10, '') + 'order by 2'
INSERT #t
EXEC( #sqlstring)
SELECT #text = COALESCE(#text, '') + col1
FROM #t
SET #sqlstring2 ='select '+stuff(#text,1,1,'')
EXEC( #sqlstring2)
Result:
id host_id date col1
PRIMARY KEY PRIMARY KEY PRIMARY KEY NULL
Test table
CREATE TABLE [dbo].[test_table](
[id] int not null,
[host_id] [int] NOT NULL,
[date] [datetime] NOT NULL,
[col1] [varchar](20) NULL,
[col2] [varchar](20) NULL,
CONSTRAINT [PK_test_table] PRIMARY KEY CLUSTERED
(
[id] ASC,
[host_id] ASC,
[date] ASC
))
Test data
INSERT test_table VALUES (1, 1, getdate(), null, 'somevalue')

SQL Server 2008 T-SQL UDF Split() Tailoring

I'm useing SQL Ser 2008 and have a large table with only one column of data. The data is a random string with very little consistency. Eample: Name Account 445566 0010020056893010445478008 AFD 369. I've been working with a split function that a stackoverflow user suggested. It works great but the function assigns the split string into one column. I need a row of individual columns. The present result is 1col with values Name, Account, 445566,... in it but the result I'm looking for is col1 Name, col2 Account, col3 445566,...
If anyone could provide some insight on how to tailor this script or its usage to get the desired result it would be much appreciated.
CREATE FUNCTION [dbo].[Split]
(
#String varchar(max)
,#Delimiter char
)
RETURNS #Results table
(
Ordinal int
,StringValue varchar(max)
)
as
begin
set #String = isnull(#String,'')
set #Delimiter = isnull(#Delimiter,'')
declare
#TempString varchar(max) = #String
,#Ordinal int = 0
,#CharIndex int = 0
set #CharIndex = charindex(#Delimiter, #TempString)
while #CharIndex != 0 begin
set #Ordinal += 1
insert #Results values
(
#Ordinal
,substring(#TempString, 0, #CharIndex)
)
set #TempString = substring(#TempString, #CharIndex + 1, len(#TempString) - #CharIndex)
set #CharIndex = charindex(#Delimiter, #TempString)
end
if #TempString != '' begin
set #Ordinal += 1
insert #Results values
(
#Ordinal
,#TempString
)
end
return
end
--The usage:
SELECT
*
FROM
mytable M
CROSS APPLY
[dbo].[Split] (M.TheColumn, ' ') S
Where rtrim(s.StringValue) != ''
If you know that you have 6 columns in the string you can use a split functions that looks like this and of course modify the function to whatever number of columns you want. A function can not return a dynamic number of columns.
create function dbo.Split6(#String varchar(max), #Delimiter char(1))
returns table as return
(
select
substring(T.Col, 1, S1.Pos-1) as Col1,
substring(T.Col, S1.Pos+1, S2.Pos-S1.Pos-1) as Col2,
substring(T.Col, S2.Pos+1, S3.Pos-S2.Pos-1) as Col3,
substring(T.Col, S3.Pos+1, S4.Pos-S3.Pos-1) as Col4,
substring(T.Col, S4.Pos+1, S5.Pos-S4.Pos-1) as Col5,
substring(T.Col, S5.Pos+1, S6.Pos-S5.Pos-1) as Col6
from (select #String+replicate(#Delimiter, 6)) as T(Col)
cross apply (select charindex(#Delimiter, T.Col, 1)) as S1(Pos)
cross apply (select charindex(#Delimiter, T.Col, S1.Pos+1)) as S2(Pos)
cross apply (select charindex(#Delimiter, T.Col, S2.Pos+1)) as S3(Pos)
cross apply (select charindex(#Delimiter, T.Col, S3.Pos+1)) as S4(Pos)
cross apply (select charindex(#Delimiter, T.Col, S4.Pos+1)) as S5(Pos)
cross apply (select charindex(#Delimiter, T.Col, S5.Pos+1)) as S6(Pos)
)
Test:
declare #T table (Col varchar(100))
insert into #T values
('Name Account 445566 0010020056893010445478008 AFD 369'),
(''),
('1 2'),
('1 3')
select S.Col1, S.Col2, S.Col3, S.Col4, S.Col5, S.Col6
from #T as T
cross apply
dbo.Split6(T.Col, ' ') as S
Result:
Col1 Col2 Col3 Col4 Col5 Col6
---- ------- ------ ------------------------- ---- ----
Name Account 445566 0010020056893010445478008 AFD 369
1 2
1 3
You might try using a PIVOT.
http://msdn.microsoft.com/en-us/library/ms177410.aspx