TSQL - Problem converting and updating a float column - tsql

I have tried many ways but could not find the answer. My problem is:
there is Table ORG_DATA_AS_VARCHAR with a columnFLOATNMBRS (varchar)
FLOATNMBRS
--------------------------
0
0
*0,25 /*Yeah, there is a star in data ... bad data quality ....*/
*0,31
0
Now, my aim is to convert this strings to a float and update these new float values to a new (existing) table CONVERTED_DATA:
FLOATNMBRS (float)
--------------------------
0
0
0.25
0.31
0
...
What I have tried:
UPDATE CONVERTED_DATA
SET
FLOATNMBRS = b.newValue
FROM
(
Select convert (float, replace(replace(FLOATNMBRS, '*', ''),',','.')) as newValue from
ORG_DATA_AS_VARCHAR
) b
or
Replacing and converting it and create a #Temp Table with the new Values and Update CONVERTED_DATA with values from #Temp.
but everytime I ended up like:
FLOATNMBRS (float)
--------------------------
0
0
0
0
0
All values were updated as 0.
When I tried:
Select convert (float, replace(replace(FLOATNMBRS, '*', ''),',','.')) as newValue from
ORG_DATA_AS_VARCHAR
the result is correct. Even when I copy the value to #Temp. All values are correct.
Does someone know what I m doing wrong ???

You are possibly not matching the records in the 2 tables. It is not clear from your example how the rows are identified (what's the key).
Assuming you have the same ID in ORG_DATA_AS_VARCHAR and CONVERTED_DATA tables, this works:
create table #ORG_DATA_AS_VARCHAR (ID int, floatnmbrs_varchar varchar(128))
create table #CONVERTED_DATA (ID int, floatnmbrs float)
go
insert into #ORG_DATA_AS_VARCHAR (ID, floatnmbrs_varchar)
values (1, '0'), (2, '0'), (3, '*0,25'), (4, '*0.31'), (5, '0')
insert into #CONVERTED_DATA (ID, floatnmbrs)
values (1, 0), (2, 0), (3, 0), (4, 0), (5, 0)
go
update #CONVERTED_DATA
set floatnmbrs = x.converted
from (
select ID, converted = convert(float, replace(replace(floatnmbrs_varchar, '*', ''), ',', '.'))
from #ORG_DATA_AS_VARCHAR
) as x
where x.ID = #CONVERTED_DATA.ID
select * from #CONVERTED_DATA
go
drop table #ORG_DATA_AS_VARCHAR
drop table #CONVERTED_DATA
go

Related

Divide table raw into chunks in Postgres with st_dwithin limit

I got a table with linestrings that I want to divide into chunks that have a list of id not higher than provided number for each and store only lines that are within certain distance.
For example, I got a table with 14 rows
create table lines ( id integer primary key, geom geometry(linestring) );
insert into lines (id, geom) values ( 1, 'LINESTRING(0 0, 0 1)');
insert into lines (id, geom) values ( 2, 'LINESTRING(0 1, 1 1)');
insert into lines (id, geom) values ( 3, 'LINESTRING(1 1, 1 2)');
insert into lines (id, geom) values ( 4, 'LINESTRING(1 2, 2 2)');
insert into lines (id, geom) values ( 11, 'LINESTRING(2 2, 2 3)');
insert into lines (id, geom) values ( 12, 'LINESTRING(2 3, 3 3)');
insert into lines (id, geom) values ( 13, 'LINESTRING(3 3, 3 4)');
insert into lines (id, geom) values ( 14, 'LINESTRING(3 4, 4 4)');
create index lines_gix on lines using gist(geom);
I want to split it into chunks with 3 ids for each chunk with lines that are within 2 meters from each other or the first one.
The result I am trying to get from this example is:
| Chunk No.| Id chunk list |
|----------|----------------|
| 1 | 1, 2, 3 |
| 2 | 4, 5, 6 |
| 3 | 7, 8, 9 |
| 4 | 10, 11, 12 |
| 5 | 13, 14 |
I tried to use st_clusterwithin but when lines are close to each other it will return all of them not split into chunks.
I also tried to use some with recursive magic like the one from the answer provided by Paul Ramsey here. But I don't know how to modify the query to return limited grouped id list.
I am not sure if it is the best possible answer so if anyone has a better method or know how to improve provided answer feel free to update it. With a little modification of Paul answer, I've managed to create following queries that are doing what I asked for.
-- Create function for easier interaction
CREATE OR REPLACE FUNCTION find_connected(integer, double precision, integer, integer[])
returns integer[] AS
$$
WITH RECURSIVE lines_r AS -- Recursive allow to use the same query on the output - is like continues append to result and use it inside a query
(SELECT ARRAY[id] AS idlist,
geom, id
FROM lines
WHERE id = $1
UNION ALL
SELECT array_append(lines_r.idlist, lines.id) AS idlist, -- append id list to array
lines.geom AS geom, -- keep geometry
lines.id AS id -- keep source table id
FROM (SELECT * FROM lines WHERE NOT $4 #> array[id]) lines, lines_r -- from source table and recursive table
WHERE ST_DWITHIN(lines.geom, lines_r.geom, $2) -- where lines are within 2 meters
AND NOT lines_r.idlist #> ARRAY[lines.id] -- recursive id list array not contain lines array
AND array_length(idlist, 1) <= $3
)
SELECT idlist
FROM lines_r WHERE array_length(idlist, 1) <= $3 ORDER BY array_length(idlist, 1) DESC LIMIT 1;
$$
LANGUAGE 'sql';
-- Create id chunks
WITH RECURSIVE groups_r AS (
(SELECT find_connected(id, 2, 3, ARRAY[id]) AS idlist, find_connected(id, 2, 3, ARRAY[id]) AS grouplist, id
FROM lines WHERE id = 1)
UNION ALL
(SELECT array_cat(groups_r.idlist, find_connected(lines.id, 2, 3, groups_r.idlist)) AS idlist,
find_connected(lines.id, 2, 3, groups_r.idlist) AS grouplist,
lines.id
FROM lines,
groups_r
WHERE NOT groups_r.idlist #> ARRAY[lines.id]
LIMIT 1))
SELECT
-- (SELECT array_agg(DISTINCT x) FROM unnest(idlist) t (x)) idlist, -- left for better understanding what is happening
row_number() OVER () chunk_id,
(SELECT array_agg(DISTINCT x) FROM unnest(grouplist) t (x)) grouplist,
id input_line_id
FROM groups_r;
The only problem is that performance is quite pure when the number of ids in the chunk increase. For a table with 300 rows and 20 ids per chunk, execution time is around 15 min, even with indexes on geometry and id columns.

Best way to avoid duplicates in table?

I've been given a task that requires writing a script to mass change items in a table(ProductArea):
ProductID int
SalesareaID int
One ProductID can only exist once in each SalesareaID so there can't be any duplicates in this table. But one ProductID can be sold in multiple SalesareaID.
So an example would look something like:
ProductID SalesareaID
1 1
1 2
1 3
2 2
3 1
Now, some areas have merged. So, if I try to run a straight-forward UPDATE to fix this like:
UPDATE ProductArea SET SalesareaID = 4 where SalesareaID IN (2, 3)
it will find (1, 2) and change that to (1, 4). Then it will find (1, 3) and try to change that to (1, 4). But that already exist so it will crash with a "Cannot insert duplicate key..."-error.
Is there a best/recommended way to tell my UPDATE to only update if the resulting (ProductID, SalesareaID) doesn't already exist?
This should work
It uses a window function
declare #T table (prodID int, salesID int, primary key (prodID, salesID));
insert into #T values
(1, 1)
, (1, 2)
, (1, 3)
, (2, 2)
, (3, 1);
with cte as
( select t.*
, row_number() over (partition by t.prodID order by t.salesID) as rn
from #T t
where t.salesID in (2, 3)
)
delete cte where rn > 1;
update #T set salesID = 4 where salesID in (2, 3);
select * from #T;
If you are creating a new merged region from existing regions then I think the easiest thing to do would be to treat the merge as two separate operations.
First you insert entries for the new area based on the existing areas.
INSERT INTO ProductArea (ProductID, SalesareaID)
SELECT DISTINCT ProductID, 4 FROM ProductArea
WHERE SalesareaID IN (2, 3)
Then you remove the entries for the existing areas.
DELETE FROM ProductArea WHERE SalesareaID IN (2, 3)
The SalesareaID of 4 would need to be replaced by the id of the new Salesarea. The 2 and 3 would also need to be replaced by the ids of the areas you are merging to create the new Salesarea.

Summarizing Only Rows with given criteria

all!
Given the following table structure
DECLARE #TempTable TABLE
(
idProduct INT,
Layers INT,
LayersOnPallet INT,
id INT IDENTITY(1, 1) NOT NULL,
Summarized BIT NOT NULL DEFAULT(0)
)
and the following insert statement which generates test data
INSERT INTO #TempTable(idProduct, Layers, LayersOnPallet)
SELECT 1, 2, 4
UNION ALL
SELECT 1, 2, 4
UNION ALL
SELECT 1, 1, 4
UNION ALL
SELECT 2, 2, 4
I would like to summarize only those rows (by the Layers only) with the same idProduct and which will have the sum of layers equal to LayersOnPallet.
A picture is worth a thousand words:
From the picture above, you can see that only the first to rows were summarized because both have the same idProduct and the sum(layers) will be equal to LayersOnPallet.
How can I achieve this? It's there any way to do this only in selects (not with while)?
Thank you!
Perhaps this will do the trick. Note my comments:
-- your sample data
DECLARE #TempTable TABLE
(
idProduct INT,
Layers INT,
LayersOnPallet INT,
id INT IDENTITY(1, 1) NOT NULL,
Summarized BIT NOT NULL DEFAULT(0)
)
INSERT INTO #TempTable(idProduct, Layers, LayersOnPallet)
SELECT 1, 2, 4 UNION ALL
SELECT 1, 2, 4 UNION ALL
SELECT 1, 1, 4 UNION ALL
SELECT 2, 2, 4;
-- an intermediate temp table used for processing
IF OBJECT_ID('tempdb..#processing') IS NOT NULL DROP TABLE #processing;
-- let's populate the #processing table with duplicates
SELECT
idProduct,
Layers,
LayersOnPallet,
rCount = COUNT(*)
INTO #processing
FROM #tempTable
GROUP BY
idProduct,
Layers,
LayersOnPallet
HAVING COUNT(*) > 1;
-- Remove the duplicates
DELETE t
FROM #TempTable t
JOIN #processing p
ON p.idProduct = t.idProduct
AND p.Layers = t.Layers
AND p.LayersOnPallet = t.LayersOnPallet
-- Add the new, updated record
INSERT #TempTable
SELECT
idProduct,
Layers * rCount,
LayersOnPallet, 1
FROM #processing;
DROP TABLE #processing; -- cleanup
-- Final output
SELECT idProduct, Layers, LayersOnPallet, Summarized
FROM #TempTable;
Results:
idProduct Layers LayersOnPallet Summarized
----------- ----------- -------------- ----------
1 4 4 1
1 1 4 0
2 2 4 0

PostgreSQL Get holes in index column

I suppose it is not easy to query a table for data which don't exists but maybe here is some trick to achieve holes in one integer column (rowindex).
Here is small table for illustrating concrete situation:
DROP TABLE IF EXISTS examtable1;
CREATE TABLE examtable1
(rowindex integer primary key, mydate timestamp, num1 integer);
INSERT INTO examtable1 (rowindex, mydate, num1)
VALUES (1, '2015-03-09 07:12:45', 1),
(3, '2015-03-09 07:17:12', 4),
(5, '2015-03-09 07:22:43', 1),
(6, '2015-03-09 07:25:15', 3),
(7, '2015-03-09 07:41:46', 2),
(10, '2015-03-09 07:42:05', 1),
(11, '2015-03-09 07:45:16', 4),
(14, '2015-03-09 07:48:38', 5),
(15, '2015-03-09 08:15:44', 2);
SELECT rowindex FROM examtable1;
With showed query I get all used indexes listed.
But I would like to get (say) first five indexes which is missed so I can use them for insert new data at desired rowindex.
In concrete example result will be: 2, 4, 8, 9, 12 what represent indexes which are not used.
Is here any trick to build a query which will give n number of missing indexes?
In real, such table may contain many rows and "holes" can be anywhere.
You can do this by generating a list of all numbers using generate_series() and then check which numbers don't exist in your table.
This can either be done using an outer join:
select nr.i as missing_index
from (
select i
from generate_series(1, (select max(rowindex) from examtable1)) i
) nr
left join examtable1 t1 on nr.i = t1.rowindex
where t1.rowindex is null;
or an not exists query:
select i
from generate_series(1, (select max(rowindex) from examtable1)) i
where not exists (select 1
from examtable1 t1
where t1.rowindex = i.i);
I have used a hardcoded lower bound for generate_series() so that you would also detect a missing rowindex that is smaller than the lowest number.

Subnet (or CIDR) IP control at T-SQL

I don't know how to exactly explain, but there is a problem about selecting and comparing query IP subnet. For example, there is a list for IP address and I have another CIDR/subnet mask list (X.X.X.0/24 etc). How can I learn that each IP address in first list is in CIDR/subnet mask list via T-SQL?
For example:
IP: 172.28.112.23 -> false
IP: 172.28.111.33 -> true
IP List Output:
SubNet Output:
You want to do exactly what a computer would do to determine if an ip address is in a subnet- ie:
1) convert the network address, subnet mask and test address to binary.
2) Check if (Network Address & Subnet Mask) = (Test Address & Subnet mask)
(& represents bitwise AND)
If this comparison is true the test address is within the subnet
The key to understanding this is to realise that IP addresses (and subnet masks) are just 32 bit numbers.
A bitwise and between 2 32 bit numbers creates a new 32 bit number with a 1 in the position where there was a 1 in both of the 2 numbers being compared, and a 0 otherwise.
EG: 1010 & 1100 = 1000 because the first digit is 1 in both numbers (yielding a 1 in the result for the first digit), but the 2nd 3rd and 4th digits are not (so give 0 in the result for the 2nd 3rd and 4th digits).
SQL Server cannot do a bitwise and between 2 binary numbers unfortunately, but it works fine between decimal representations (ie when converted to BIGINT datatype).
Therefore I'd propose you create a function that converts your IP addresses to BIGINT datatype firstly
CREATE FUNCTION dbo.fnIPtoBigInt
(
#Ipaddress NVARCHAR(15) -- should be in the form '123.123.123.123'
)
RETURNS BIGINT
AS
BEGIN
DECLARE #part1 AS NVARCHAR(3)
DECLARE #part2 AS NVARCHAR(3)
DECLARE #part3 AS NVARCHAR(3)
DECLARE #part4 AS NVARCHAR(3)
SELECT #part1 = LEFT(#Ipaddress, CHARINDEX('.',#Ipaddress) - 1)
SELECT #Ipaddress = SUBSTRING(#Ipaddress, LEN(#part1) + 2, 15)
SELECT #part2 = LEFT(#Ipaddress, CHARINDEX('.',#Ipaddress) - 1)
SELECT #Ipaddress = SUBSTRING(#Ipaddress, LEN(#part2) + 2, 15)
SELECT #part3 = LEFT(#Ipaddress, CHARINDEX('.',#Ipaddress) - 1)
SELECT #part4 = SUBSTRING(#Ipaddress, LEN(#part3) + 2, 15)
DECLARE #ipAsBigInt AS BIGINT
SELECT #ipAsBigInt =
(16777216 * (CAST(#part1 AS BIGINT)))
+ (65536 * (CAST(#part2 AS BIGINT)))
+ (256 * (CAST(#part3 AS BIGINT)))
+ (CAST(#part4 AS BIGINT))
RETURN #ipAsBigInt
END
GO
Then you can easily implement a function to test if an address is in a subnet:
CREATE FUNCTION dbo.fnIsIpaddressInSubnet
(
#networkAddress NVARCHAR(15), -- 'eg: '192.168.0.0'
#subnetMask NVARCHAR(15), -- 'eg: '255.255.255.0' for '/24'
#testAddress NVARCHAR(15) -- 'eg: '192.168.0.1'
)
RETURNS BIT AS
BEGIN
RETURN CASE WHEN (dbo.fnIPtoBigInt(#networkAddress) & dbo.fnIPtoBigInt(#subnetMask))
= (dbo.fnIPtoBigInt(#testAddress) & dbo.fnIPtoBigInt(#subnetMask))
THEN 1 ELSE 0 END
END
To make this a bit easier for you you'll probably want a function that can convert '/24' to a BigInt too.
'/24' is a shorthand way of writing 255.255.255.0 - ie a 32bit number with the first 24bits set to 1 (and the remaining 8 bits set to 0)
CREATE FUNCTION dbo.fnSubnetBitstoBigInt
(
#SubnetBits TINYINT -- max = 32
)
RETURNS BIGINT
AS
BEGIN
DECLARE #multiplier AS BIGINT = 2147483648
DECLARE #ipAsBigInt AS BIGINT = 0
DECLARE #bitIndex TINYINT = 1
WHILE #bitIndex <= #SubnetBits
BEGIN
SELECT #ipAsBigInt = #ipAsBigInt + #multiplier
SELECT #multiplier = #multiplier / 2
SELECT #bitIndex = #bitIndex + 1
END
RETURN #ipAsBigInt
END
GO
If you create the following additional function the conversion becomes easy
CREATE FUNCTION dbo.fnIsIpaddressInSubnetShortHand
(
#network NVARCHAR(18), -- 'eg: '192.168.0.0/24'
#testAddress NVARCHAR(15) -- 'eg: '192.168.0.1'
)
RETURNS BIT AS
BEGIN
DECLARE #networkAddress NVARCHAR(15)
DECLARE #subnetBits TINYINT
SELECT #networkAddress = LEFT(#network, CHARINDEX('/', #network) - 1)
SELECT #subnetBits = CAST(SUBSTRING(#network, LEN(#networkAddress) + 2, 2) AS TINYINT)
RETURN CASE WHEN (dbo.fnIPtoBigInt(#networkAddress) & dbo.fnSubnetBitstoBigInt(#subnetBits))
= (dbo.fnIPtoBigInt(#testAddress) & dbo.fnSubnetBitstoBigInt(#subnetBits))
THEN 1 ELSE 0 END
END
i.e.
SELECT dbo.fnIsIpaddressInSubnetShorthand('192.168.2.0/24','192.168.3.91') -- returns 0
SELECT dbo.fnIsIpaddressInSubnetShorthand('192.168.2.0/24','192.168.2.91') -- returns 1
This is not an answer in itself, but a way to make one of the functions in James S answer easier to read and possibly more efficient.
SQL Server has a function to handle getting parts from database object names. Those names are 4 parts [Server].[Database].[Schema].[Object]. So the following allows you to get the schema name. The index works from the right
SELECT PARSENAME('[myServer].[master].[sys].[objects]', 2)
There's nothing to say you can't use that for an IP address. And as it's so fundamental to how SQL operates I assume it has been hella optimised.
CREATE FUNCTION dbo.fnIPtoBigInt
(
#Ipaddress NVARCHAR(15) -- should be in the form '123.123.123.123'
)
RETURNS BIGINT
AS
BEGIN
DECLARE #ipAsBigInt AS BIGINT
SELECT #ipAsBigInt =
(16777216 * (CAST(PARSENAME(#Ipaddress, 4) AS BIGINT)))
+ (65536 * (CAST(PARSENAME(#Ipaddress, 3) AS BIGINT)))
+ (256 * (CAST(PARSENAME(#Ipaddress, 2) AS BIGINT)))
+ (CAST(PARSENAME(#Ipaddress, 1) AS BIGINT))
RETURN #ipAsBigInt
END
GO
Complete solution
CREATE
OR ALTER FUNCTION dbo.IPv4SubnetContainsIPAddress (
#net AS VARCHAR(15),
#mask AS VARCHAR(15),
#ip AS VARCHAR(15)
) RETURNS tinyint AS BEGIN DECLARE #result AS tinyint IF LEN(#mask) <= 2
SELECT
#mask = m
FROM
(
VALUES
(0, '0.0.0.0'),
(1, '128.0.0.0'),
(2, '192.0.0.0'),
(3, '224.0.0.0'),
(4, '240.0.0.0'),
(5, '248.0.0.0'),
(6, '252.0.0.0'),
(7, '254.0.0.0'),
(8, '255.0.0.0'),
(9, '255.128.0.0'),
(10, '255.192.0.0'),
(11, '255.224.0.0'),
(12, '255.240.0.0'),
(13, '255.248.0.0'),
(14, '255.252.0.0'),
(15, '255.254.0.0'),
(16, '255.255.0.0'),
(17, '255.255.128.0'),
(18, '255.255.192.0'),
(19, '255.255.224.0'),
(20, '255.255.240.0'),
(21, '255.255.248.0'),
(22, '255.255.252.0'),
(23, '255.255.254.0'),
(24, '255.255.255.0'),
(25, '255.255.255.128'),
(26, '255.255.255.192'),
(27, '255.255.255.224'),
(28, '255.255.255.240'),
(29, '255.255.255.248'),
(30, '255.255.255.252'),
(31, '255.255.255.254'),
(32, '255.255.255.255')
) AS o (i, m)
WHERE
i = #mask
SELECT
#result = IIF(Count(*) = 4, 1, 0)
FROM
(
SELECT
*,
IIF(
o_ip BETWEEN o_subnet
AND o_broadcast,
1,
0
) AS eq
FROM
(
SELECT
*,
o_net & o_mask AS o_subnet,
o_net | (255 - o_mask) AS o_broadcast
FROM
(
SELECT
o_net,
o_mask,
o_ip
FROM
(
VALUES
(1, CAST(PARSENAME(#net, 4) AS INTEGER)),
(2, CAST(PARSENAME(#net, 3) AS INTEGER)),
(3, CAST(PARSENAME(#net, 2) AS INTEGER)),
(4, CAST(PARSENAME(#net, 1) AS INTEGER))
) AS c1 (i, o_net)
LEFT JOIN (
SELECT
i,
o_mask
FROM
(
VALUES
(1, CAST(PARSENAME(#mask, 4) AS INTEGER)),
(2, CAST(PARSENAME(#mask, 3) AS INTEGER)),
(3, CAST(PARSENAME(#mask, 2) AS INTEGER)),
(4, CAST(PARSENAME(#mask, 1) AS INTEGER))
) AS c2 (i, o_mask)
) AS c2 ON c1.i = c2.i
LEFT JOIN (
SELECT
i,
o_ip
FROM
(
VALUES
(1, CAST(PARSENAME(#ip, 4) AS INTEGER)),
(2, CAST(PARSENAME(#ip, 3) AS INTEGER)),
(3, CAST(PARSENAME(#ip, 2) AS INTEGER)),
(4, CAST(PARSENAME(#ip, 1) AS INTEGER))
) AS c3 (i, o_ip)
) AS c3 ON c1.i = c3.i
) AS t
) AS t
) AS t
WHERE
eq = 1 RETURN #result END
GO
SELECT
dbo.IPv4SubnetContainsIPAddress('192.168.64.0', '255.255.224.0', '192.168.40.1') -- returns 0
SELECT
dbo.IPv4SubnetContainsIPAddress('192.168.64.0', '19', '192.168.40.1') -- returns 0
SELECT
dbo.IPv4SubnetContainsIPAddress('192.168.64.0', '255.255.192.0', '192.168.80.1') -- returns 1
SELECT
dbo.IPv4SubnetContainsIPAddress('192.168.64.0', '18', '192.168.80.1') -- returns 1