Compare varchar string to produce missing items list - tsql

I have a table with a column. The column stores locations using varchar as the datatype. The locations use the format -2,7 -25,30 etc. I am trying to produce a list of missing locations i.e. where we don't have any customers.
The locations go from -30,-30 to 30,30. I can't find a way to setup a loop to run though all the options. Is there a way to do this?

Microsoft SQL Server 2017
;WITH cte as (
select -30 as n --anchor member
UNION ALL
select n + 1 --recursive member
from cte
where n < 31
)
select z.*
from (
select CONCAT(y.n,',',x.n) as locations
from cte as x CROSS JOIN cte y
) as z
LEFT OUTER JOIN dbo.Client as cli ON cli.client_location = z.locations
where cli.client_location IS NULL
order by z.locations asc

Generate all combinations.
Then match the generated against the existing combinations.
WITH DIGITS AS
(
SELECT n FROM (VALUES (0),(1),(2),(3),(4),(5),(6),(7),(8),(9)) AS val(n)
),
NUMS AS
(
SELECT (tens.n * 10 + ones.n)-50 AS n
FROM DIGITS ones
CROSS JOIN DIGITS tens
),
LOCATIONS AS
(
SELECT CONCAT(n1.n,',',n2.n) AS location, n1.n as n1, n2.n as n2
FROM NUMS n1
JOIN NUMS n2 ON n2.n BETWEEN -30 AND 30
WHERE n1.n BETWEEN -30 AND 30
)
SELECT loc.location
FROM LOCATIONS loc
LEFT JOIN
(
SELECT Client_Location, COUNT(*) Cnt
FROM dbo.Client
GROUP BY Client_Location
) cl ON cl.Client_Location = loc.location
WHERE cl.Client_Location IS NULL
ORDER BY loc.n1, loc.n2

I would go with a recursive CTE. This is a slight variation of SNR's approach:
with cte as (
select -30 as n --anchor member
union all
select n + 1 --recursive member
from cte
where n < 30
)
select cte.x, cte.y,
concat(cte_x.n, ',', cte_y.n) as missing_location
from cte cte_x cross join
cte cte_y left join
dbo.client c
on c.client_location = concat(cte_x.n, ',', cte_y.n)
where c.client_location is null;
Or to avoid the concat() twice:
select cte.x, cte.y, v.location as missing_location
from cte cte_x cross join
cte cte_y cross apply
(values (concat(cte_x.n, ',', cte_y.n))
) v(location) left join
dbo.client c
on c.client_location = v.location
where c.client_location is null;

Related

TSQL - in a string, replace a character with a fixed one every 2 characters

I can't replace every 2 characters of a string with a '.'
select STUFF('abcdefghi', 3, 1, '.') c3,STUFF('abcdefghi', 5, 1,
'.') c5,STUFF('abcdefghi', 7, 1, '.') c7,STUFF('abcdefghi', 9, 1, '.')
c9
if I use STUFF I should subsequently overlap the strings c3, c5, c7 and c9. but I can't find a method
can you help me?
initial string:
abcdefghi
the result I would like is
ab.de.gh.
the string can be up to 50 characters
Create a numbers / tally / digits table, if you don't have one already, then you can use this to target each character position:
with digits as ( /* This would be a real table, here it's just to test */
select n from (values(1),(2),(3),(4),(5),(6),(7),(8),(9),(10))x(n)
), t as (
select 'abcdefghi' as s
)
select String_Agg( case when d.n%3 = 0 then '.' else Substring(t.s, d.n, 1) end, '')
from t
cross apply digits d
where d.n <Len(t.s)
Using for xml with existing table
with digits as (
select n from (values(1),(2),(3),(4),(5),(6),(7),(8),(9),(10))x(n)
),
r as (
select t.id, case when d.n%3=0 then '.' else Substring(t.s, d.n, 1) end ch
from t
cross apply digits d
where d.n <Len(t.s)
)
select result=(select '' + ch
from r r2
where r2.id=r.id
for xml path('')
)
from r
group by r.id
You can try it like this:
Easiest might be a quirky update ike here:
DECLARE #string VARCHAR(100)='abcdefghijklmnopqrstuvwxyz';
SELECT #string = STUFF(#string,3*A.pos,1,'.')
FROM (SELECT TOP(LEN(#string)/3) ROW_NUMBER() OVER(ORDER BY (SELECT NULL))
FROM master..spt_values) A(pos);
SELECT #string;
Better/Cleaner/Prettier was a recursive CTE:
We use a declared table to have some tabular sample data
DECLARE #tbl TABLE(ID INT IDENTITY, SomeString VARCHAR(200));
INSERT INTO #tbl VALUES('')
,('a')
,('ab')
,('abc')
,('abcd')
,('abcde')
,('abcdefghijklmnopqrstuvwxyz');
--the query
WITH recCTE AS
(
SELECT ID
,SomeString
,(LEN(SomeString)+1)/3 AS CountDots
,1 AS OccuranceOfDot
,SUBSTRING(SomeString,4,LEN(SomeString)) AS RestString
,CAST(LEFT(SomeString,2) AS VARCHAR(MAX)) AS Growing
FROM #tbl
UNION ALL
SELECT t.ID
,r.SomeString
,r.CountDots
,r.OccuranceOfDot+2
,SUBSTRING(RestString,4,LEN(RestString))
,CONCAT(Growing,'.',LEFT(r.RestString,2))
FROM #tbl t
INNER JOIN recCTE r ON t.ID=r.ID
WHERE r.OccuranceOfDot/2<r.CountDots-1
)
SELECT TOP 1 WITH TIES ID,Growing
FROM recCTE
ORDER BY ROW_NUMBER() OVER(PARTITION BY ID ORDER BY OccuranceOfDot DESC);
--the result
1
2 a
3 ab
4 ab
5 ab
6 ab.de
7 ab.de.gh.jk.mn.pq.st.vw.yz
The idea in short
We use a recursive CTE to walk along the string
we add the needed portion together with a dot
We stop, when the remaining length is to short to continue
a little magic is the ORDER BY ROW_NUMBER() OVER() together with TOP 1 WITH TIES. This will allow all first rows (frist per ID) to appear.

Include unmatched records when joining with non merge-joinable and non hash-joinable conditions

If I have data representing points and areas:
CREATE TABLE test_areas AS (
SELECT id area_id, ST_Buffer(ST_SetSRID(ST_MakePoint(x,y), 32632), 2) geom
FROM ( VALUES (1,2,2),(2,4,6) ) s(id,x,y) );
CREATE TABLE test_points AS (
SELECT id point_id, ST_SetSRID(ST_MakePoint(x,y), 32632) geom
FROM ( VALUES (0,1,1),(1,1,3),(2,1,5),(3,3,6),(4,5,7),(5,7,8) ) s(id,x,y) );
To apply the id of the containing area for each point I would write a query:
SELECT area_id, point_id
FROM test_points p JOIN test_areas a ON ST_Intersects(p.geom, a.geom)
To include points that are not within any area (point_id = 2, 5) under a separate category (e.g. 0) I would normally use FULL OUTER JOIN and COALESCE but ST_Intersects does not support this.
I can do this in with a workaround:
WITH c AS (
SELECT area_id, point_id
FROM test_points p JOIN test_areas a ON ST_Intersects(p.geom, a.geom)
)
SELECT 0 area_id, point_id
FROM ( SELECT point_id FROM test_points EXCEPT SELECT point_id FROM c) s
UNION
SELECT * FROM c
Is there a better way to include unmatched records without using FULL OUTER?
Well I found a better solution:
SELECT COALESCE(area_id, 0) area_id, point_id
FROM test_points
FULL OUTER JOIN (
SELECT area_id, point_id
FROM test_points p JOIN test_areas a ON ST_Intersects(p.geom, a.geom)
) s USING (point_id)
You join the points again with the result, though I'm not sure about the performance.
If you only need the area and point ids (no other value columns from points) you can shorten the query to:
SELECT COALESCE(area_id, 0) area_id, point_id
FROM test_points p1
JOIN test_areas a ON ST_Intersects(p1.geom, a.geom)
FULL OUTER JOIN test_points USING (point_id);

Grouping consecutive dates in PostgreSQL

I have two tables which I need to combine as sometimes some dates are found in table A and not in table B and vice versa. My desired result is that for those overlaps on consecutive days be combined.
I'm using PostgreSQL.
Table A
id startdate enddate
--------------------------
101 12/28/2013 12/31/2013
Table B
id startdate enddate
--------------------------
101 12/15/2013 12/15/2013
101 12/16/2013 12/16/2013
101 12/28/2013 12/28/2013
101 12/29/2013 12/31/2013
Desired Result
id startdate enddate
-------------------------
101 12/15/2013 12/16/2013
101 12/28/2013 12/31/2013
Right. I have a query that I think works. It certainly works on the sample records you provided. It uses a recursive CTE.
First, you need to merge the two tables. Next, use a recursive CTE to get the sequences of overlapping dates. Finally, get the start and end dates, and join back to the "merged" table to get the id.
with recursive allrecords as -- this merges the input tables. Add a unique row identifier
(
select *, row_number() over (ORDER BY startdate) as rowid from
(select * from table1
UNION
select * from table2) a
),
path as ( -- the recursive CTE. This gets the sequences
select rowid as parent,rowid,startdate,enddate from allrecords a
union
select p.parent,b.rowid,b.startdate,b.enddate from allrecords b join path p on (p.enddate + interval '1 day')>=b.startdate and p.startdate <= b.startdate
)
SELECT id,g.startdate,g.enddate FROM -- outer query to get the id
-- inner query to get the start and end of each sequence
(select parent,min(startdate) as startdate, max(enddate) as enddate from
(
select *, row_number() OVER (partition by rowid order by parent,startdate) as row_number from path
) a
where row_number = 1 -- We only want the first occurrence of each record
group by parent)g
INNER JOIN allrecords a on a.rowid = parent
The below fragment does what you intend. (but it will probably be very slow) The problem is that detecteng (non)overlapping dateranges is impossible with standard range operators, since a range could be split into two parts.
So, my code does the following:
split the dateranges from table_A into atomic records, with one date per record
[the same for table_b]
cross join these two tables (we are only interested in A_not_in_B, and B_not_in_A) , remembering which of the L/R outer join wings it came from.
re-aggregate the resulting records into date ranges.
-- EXPLAIN ANALYZE
--
WITH RECURSIVE ranges AS (
-- Chop up the a-table into atomic date units
WITH ar AS (
SELECT generate_series(a.startdate,a.enddate , '1day'::interval)::date AS thedate
, 'A'::text AS which
, a.id
FROM a
)
-- Same for the b-table
, br AS (
SELECT generate_series(b.startdate,b.enddate, '1day'::interval)::date AS thedate
, 'B'::text AS which
, b.id
FROM b
)
-- combine the two sets, retaining a_not_in_b plus b_not_in_a
, moments AS (
SELECT COALESCE(ar.id,br.id) AS id
, COALESCE(ar.which, br.which) AS which
, COALESCE(ar.thedate, br.thedate) AS thedate
FROM ar
FULL JOIN br ON br.id = ar.id AND br.thedate = ar.thedate
WHERE ar.id IS NULL OR br.id IS NULL
)
-- use a recursive CTE to re-aggregate the atomic moments into ranges
SELECT m0.id, m0.which
, m0.thedate AS startdate
, m0.thedate AS enddate
FROM moments m0
WHERE NOT EXISTS ( SELECT * FROM moments nx WHERE nx.id = m0.id AND nx.which = m0.which
AND nx.thedate = m0.thedate -1
)
UNION ALL
SELECT rr.id, rr.which
, rr.startdate AS startdate
, m1.thedate AS enddate
FROM ranges rr
JOIN moments m1 ON m1.id = rr.id AND m1.which = rr.which AND m1.thedate = rr.enddate +1
)
SELECT * FROM ranges ra
WHERE NOT EXISTS (SELECT * FROM ranges nx
-- suppress partial subassemblies
WHERE nx.id = ra.id AND nx.which = ra.which
AND nx.startdate = ra.startdate
AND nx.enddate > ra.enddate
)
;

how to do dead reckoning on column of table, postgresql

I have a table looks like,
x y
1 2
2 null
3 null
1 null
11 null
I want to fill the null value by conducting a rolling
function to apply y_{i+1}=y_{i}+x_{i+1} with sql as simple as possible (inplace)
so the expected result
x y
1 2
2 4
3 7
1 8
11 19
implement in postgresql. I may encapsulate it in a window function, but the implementation of custom function seems always complex
WITH RECURSIVE t AS (
select x, y, 1 as rank from my_table where y is not null
UNION ALL
SELECT A.x, A.x+ t.y y , t.rank + 1 rank FROM t
inner join
(select row_number() over () rank, x, y from my_table ) A
on t.rank+1 = A.rank
)
SELECT x,y FROM t;
You can iterate over rows using a recursive CTE. But in order to do so, you need a way to jump from row to row. Here's an example using an ID column:
; with recursive cte as
(
select id
, y
from Table1
where id = 1
union all
select cur.id
, prev.y + cur.x
from Table1 cur
join cte prev
on cur.id = prev.id + 1
)
select *
from cte
;
You can see the query at SQL Fiddle. If you don't have an ID column, but you do have another way to order the rows, you can use row_number() to get an ID:
; with recursive sorted as
(
-- Specify your ordering here. This example sorts by the dt column.
select row_number() over (order by dt) as id
, *
from Table1
)
, cte as
(
select id
, y
from sorted
where id = 1
union all
select cur.id
, prev.y + cur.x
from sorted cur
join cte prev
on cur.id = prev.id + 1
)
select *
from cte
;
Here's the SQL Fiddle link.

Dealing with periods and dates without using cursors

I would like to solve this issue avoiding to use cursors (FETCH).
Here comes the problem...
1st Table/quantity
------------------
periodid periodstart periodend quantity
1 2010/10/01 2010/10/15 5
2st Table/sold items
-----------------------
periodid periodstart periodend solditems
14343 2010/10/05 2010/10/06 2
Now I would like to get the following view or just query result
Table Table/stock
-----------------------
periodstart periodend itemsinstock
2010/10/01 2010/10/04 5
2010/10/05 2010/10/06 3
2010/10/07 2010/10/15 5
It seems impossible to solve this problem without using cursors, or without using single dates instead of periods.
I would appreciate any help.
Thanks
DECLARE #t1 TABLE (periodid INT,periodstart DATE,periodend DATE,quantity INT)
DECLARE #t2 TABLE (periodid INT,periodstart DATE,periodend DATE,solditems INT)
INSERT INTO #t1 VALUES(1,'2010-10-01T00:00:00.000','2010-10-15T00:00:00.000',5)
INSERT INTO #t2 VALUES(14343,'2010-10-05T00:00:00.000','2010-10-06T00:00:00.000',2)
DECLARE #D1 DATE
SELECT #D1 = MIN(P) FROM (SELECT MIN(periodstart) P FROM #t1
UNION ALL
SELECT MIN(periodstart) FROM #t2) D
DECLARE #D2 DATE
SELECT #D2 = MAX(P) FROM (SELECT MAX(periodend) P FROM #t1
UNION ALL
SELECT MAX(periodend) FROM #t2) D
;WITH
L0 AS (SELECT 1 AS c UNION ALL SELECT 1),
L1 AS (SELECT 1 AS c FROM L0 A CROSS JOIN L0 B),
L2 AS (SELECT 1 AS c FROM L1 A CROSS JOIN L1 B),
L3 AS (SELECT 1 AS c FROM L2 A CROSS JOIN L2 B),
L4 AS (SELECT 1 AS c FROM L3 A CROSS JOIN L3 B),
Nums AS (SELECT ROW_NUMBER() OVER (ORDER BY (SELECT 0)) AS i FROM L4),
Dates AS(SELECT DATEADD(DAY,i-1,#D1) AS D FROM Nums where i <= 1+DATEDIFF(DAY,#D1,#D2)) ,
Stock As (
SELECT D ,t1.quantity - ISNULL(t2.solditems,0) AS itemsinstock
FROM Dates
LEFT OUTER JOIN #t1 t1 ON t1.periodend >= D and t1.periodstart <= D
LEFT OUTER JOIN #t2 t2 ON t2.periodend >= D and t2.periodstart <= D ),
NStock As (
select D,itemsinstock, ROW_NUMBER() over (order by D) - ROW_NUMBER() over (partition by itemsinstock order by D) AS G
from Stock)
SELECT MIN(D) AS periodstart, MAX(D) AS periodend, itemsinstock
FROM NStock
GROUP BY G, itemsinstock
ORDER BY periodstart
Hopefully a little easier to read than Martin's. I used different tables and sample data, hopefully extrapolating the right info:
CREATE TABLE [dbo].[Quantity](
[PeriodStart] [date] NOT NULL,
[PeriodEnd] [date] NOT NULL,
[Quantity] [int] NOT NULL
) ON [PRIMARY]
CREATE TABLE [dbo].[SoldItems](
[PeriodStart] [date] NOT NULL,
[PeriodEnd] [date] NOT NULL,
[SoldItems] [int] NOT NULL
) ON [PRIMARY]
INSERT INTO Quantity (PeriodStart,PeriodEnd,Quantity)
SELECT '20100101','20100115',5
INSERT INTO SoldItems (PeriodStart,PeriodEnd,SoldItems)
SELECT '20100105','20100107',2 union all
SELECT '20100106','20100108',1
The actual query is now:
;WITH Dates as (
select PeriodStart as DateVal from SoldItems union select PeriodEnd from SoldItems union select PeriodStart from Quantity union select PeriodEnd from Quantity
), Periods as (
select d1.DateVal as StartDate, d2.DateVal as EndDate
from Dates d1 inner join Dates d2 on d1.DateVal < d2.DateVal left join Dates d3 on d1.DateVal < d3.DateVal and d3.DateVal < d2.DateVal where d3.DateVal is null
), QuantitiesSold as (
select StartDate,EndDate,COALESCE(SUM(si.SoldItems),0) as Quantity
from Periods p left join SoldItems si on p.StartDate < si.PeriodEnd and si.PeriodStart < p.EndDate
group by StartDate,EndDate
)
select StartDate,EndDate,q.Quantity - qs.Quantity
from QuantitiesSold qs inner join Quantity q on qs.StartDate < q.PeriodEnd and q.PeriodStart < qs.EndDate
And the result is:
StartDate EndDate (No column name)
2010-01-01 2010-01-05 5
2010-01-05 2010-01-06 3
2010-01-06 2010-01-07 2
2010-01-07 2010-01-08 4
2010-01-08 2010-01-15 5
Explanation: I'm using three Common Table Expressions. The first (Dates) is gathering all of the dates that we're talking about, from the two tables involved. The second (Periods) selects consecutive values from the Dates CTE. And the third (QuantitiesSold) then finds items in the SoldItems table that overlap these periods, and adds their totals together. All that remains in the outer select is to subtract these quantities from the total quantity stored in the Quantity Table
John, what you could do is a WHILE loop. Declare and initialise 2 variables before your loop, one being the start date and the other being end date. Your loop would then look like this:
WHILE(#StartEnd <= #EndDate)
BEGIN
--processing goes here
SET #StartEnd = #StartEnd + 1
END
You would need to store your period definitions in another table, so you could retrieve those and output rows when required to a temporary table.
Let me know if you need any more detailed examples, or if I've got the wrong end of the stick!
Damien,
I am trying to fully understand your solution and test it on a large scale of data, but I receive following errors for your code.
Msg 102, Level 15, State 1, Line 20
Incorrect syntax near 'Dates'.
Msg 102, Level 15, State 1, Line 22
Incorrect syntax near ','.
Msg 102, Level 15, State 1, Line 25
Incorrect syntax near ','.
Damien,
Based on your solution I also wanted to get a neat display for StockItems without overlapping dates. How about this solution?
CREATE TABLE [dbo].[SoldItems](
[PeriodStart] [datetime] NOT NULL,
[PeriodEnd] [datetime] NOT NULL,
[SoldItems] [int] NOT NULL
) ON [PRIMARY]
INSERT INTO SoldItems (PeriodStart,PeriodEnd,SoldItems)
SELECT '20100105','20100106',2 union all
SELECT '20100105','20100108',3 union all
SELECT '20100115','20100116',1 union all
SELECT '20100101','20100120',10
;WITH Dates as (
select PeriodStart as DateVal from SoldItems
union
select PeriodEnd from SoldItems
union
select PeriodStart from Quantity
union
select PeriodEnd from Quantity
), Periods as (
select d1.DateVal as StartDate, d2.DateVal as EndDate
from Dates d1
inner join Dates d2 on d1.DateVal < d2.DateVal
left join Dates d3 on d1.DateVal < d3.DateVal and
d3.DateVal < d2.DateVal where d3.DateVal is null
), QuantitiesSold as (
select StartDate,EndDate,SUM(si.SoldItems) as Quantity
from Periods p left join SoldItems si on p.StartDate < si.PeriodEnd and si.PeriodStart < p.EndDate
group by StartDate,EndDate
)
select StartDate,EndDate, qs.Quantity
from QuantitiesSold qs
where qs.quantity is not null