To split the column in sql server 2008 - tsql

I have a table:
servername description ObjectState
VM1 SP LA - W IN S V R S #P19 99 9999 999999 999 QTY 1 0
VM2 S PL A - V R STD #P29-9 9 99 QTY 2 : SPLVRENT #P3 999999 9 QTY 3 1
VM3 S PL A - WI NS V R S TD #P39-9 999 QTY 3 0
VM4 SPLA - WI NS V R STD #P59- 9 9 99 QTY 2 : S P LA - W IN SV RENT #P39-9999 QTY 3 : SPL A - WIN S VR SMB # P 3 9- 999 99 QTY 5 1
VM6 SPLA - W I NS VRSQ LSE R VE RS TD #P 6 9-9 9 9 9 QTY 6 0
VM5 SPLA - W IN S VRS TD #P59-9999 QTY 5 : SPLA - SQLSERVER-08 #P59-9999 QTY 5 0
VM7 SPLA - W I NSV R ST D #P79-9999 QTY 6 0
VM8 SPLA - W INS VR STD #P8 9-9 9 99 QTY 1 1
I wrote query like this
to split the description column like
SELECT
T.servername,
A.x.value('(/e/text())[1]', 'varchar(MAX)') [description],
A.x.value('(/e/text())[2]', 'varchar(25)') [ponumber],
A.x.value('(/e/text())[3]', 'varchar(2)') [qty],
A.x.value('(/e/text())[4]', 'char(1)') [ObjectState]
FROM
T1 AS T
CROSS APPLY
(SELECT T.[description] AS [text()] FOR XML PATH('')) AS R(s)
CROSS APPLY
(SELECT T.[ObjectState] as [text()] FOR XML PATH('')) AS B(t)
CROSS APPLY
(SELECT CAST('<e>' + REPLACE(R.s,':', '</e><e>') + '</e>' AS xml)) AS S(x)
CROSS APPLY
S.x.nodes('/e') AS N1(e)
CROSS APPLY
(SELECT CAST('<e>' + REPLACE(REPLACE(N1.e.value('text()[1]','varchar(MAX)'), '#', '</e><e>'), 'QTY', '</e><e>') + '</e>' AS xml)) AS A(x)
and I am getting output like this
servername description ponumber qty ObjectStat
VM1 SP LA - W IN S V R S P19 99 9999 999999 999 1 NULL
VM2 S PL A - V R STD P29-9 9 99 2 NULL
VM2 SPLVRENT P3 999999 9 3 NULL
I want the output in the colum Objectstate for
vm1 SP LA - W IN S V R S P19 99 9999 999999 999 1 0
vm2 S PL A - V R STD P29-9 9 99 2 1
vm2 SPLVRENT P3 999999 9 3 1
like this - How would I modify the above code to get the output I require?

In your query it looks like ObjectState is a field of its own. So...
SELECT
T.servername,
A.x.value('(/e/text())[1]', 'varchar(MAX)') [description],
A.x.value('(/e/text())[2]', 'varchar(25)') [ponumber],
A.x.value('(/e/text())[3]', 'varchar(2)') [qty],
T.ObjectState
FROM
#T AS T
CROSS APPLY
(SELECT T.[description] AS [text()] FOR XML PATH('')) AS R(s)
CROSS APPLY
(SELECT CAST('<e>' + REPLACE(R.s,':', '</e><e>') + '</e>' AS xml)) AS S(x)
CROSS APPLY
S.x.nodes('/e') AS N1(e)
CROSS APPLY
(SELECT CAST('<e>' + REPLACE(REPLACE(N1.e.value('text()[1]','varchar(MAX)'), '#', '</e><e>'), 'QTY', '</e><e>') + '</e>' AS xml)) AS A(x)

Related

PostgreSQL cummulative sum max condition

I have the following table:
account
size id name
100 1 John
200 2 Mary
300 3 Jane
400 4 Anne
100 5 Mike
600 6 Joanne
I want to partition the rows in groups, where the sum of size <= 600.
Expected result:
account
group size id name
1 100 1 John
1 200 2 Mary
1 300 3 Jane
2 400 4 Anne
2 100 5 Mike
3 600 6 Joanne
I don't know how to do the partition and add the condition.
I cannot think of how to do this without recursion. I left the running_total in the result to make it easier to follow:
with recursive rns as ( -- Assign row numbers as rn in case of gaps in id
select *, row_number() over (order by id) as rn
from account
), sumgrp as ( -- Start with first row
select size, id, name, rn, 1 as grp, size as running_total
from rns
where rn = 1
union all
select n.size, n.id, n.name, n.rn,
case -- Increment the grp when running_total exceeds 600
when p.running_total + n.size > 600 then p.grp + 1
else p.grp
end as grp,
case -- Reset the running_total when it exceeds 600
when p.running_total + n.size > 600 then n.size
else p.running_total + n.size
end as running_total
from sumgrp p
join rns n on n.rn = p.rn + 1
)
select grp, size, id, name, running_total
from sumgrp
order by id;
db<>fiddle here

Pivot Table in SQL (using Groupby)

I have a table structured as below
Customer_ID Sequence Comment_Code Comment
1 10 0 a
1 11 1 b
1 12 1 c
1 13 1 d
2 20 0 x
2 21 1 y
3 100 0 m
3 101 1 n
3 102 1 o
1 52 0 t
1 53 1 y
1 54 1 u
Sequence number is the unique number in the table
I want the output in SQL as below
Customer_ID Sequence
1 abcd
2 xy
3 mno
1 tyu
Can someone please help me with this. I can provide more details if required.
enter image description here
This looks like a simple gaps/islands problem.
-- Sample Data
DECLARE #table TABLE
(
Customer_ID INT,
[Sequence] INT,
Comment_Code INT,
Comment CHAR(1)
);
INSERT #table
(
Customer_ID,
[Sequence],
Comment_Code,
Comment
)
VALUES (1,10 ,0,'a'),(1,11 ,1,'b'),(1,12 ,1,'c'),(1,13 ,1,'d'),(2,20 ,0,'x'),(2,21 ,1,'y'),
(3,100,0,'m'),(3,101,1,'n'),(3,102,1,'o'),(1,52 ,0,'t'),(1,53 ,1,'y'),(1,54 ,1,'u');
-- Solution
WITH groups AS
(
SELECT
t.Customer_ID,
Grouper = [Sequence] - DENSE_RANK() OVER (ORDER BY [Sequence]),
t.Comment
FROM #table AS t
)
SELECT
g.Customer_ID,
[Sequence] =
(
SELECT g2.Comment+''
FROM groups AS g2
WHERE g.Customer_ID = g2.Customer_ID AND g.Grouper = g2.Grouper
FOR XML PATH('')
)
FROM groups AS g
GROUP BY g.Customer_ID, g.Grouper;
Returns:
Customer_ID Sequence
----------- ----------
1 abcd
1 tyu
2 xy
3 mno

PostgreSQL window function & difference between dates

Suppose I have data formatted in the following way (FYI, total row count is over 30K):
customer_id order_date order_rank
A 2017-02-19 1
A 2017-02-24 2
A 2017-03-31 3
A 2017-07-03 4
A 2017-08-10 5
B 2016-04-24 1
B 2016-04-30 2
C 2016-07-18 1
C 2016-09-01 2
C 2016-09-13 3
I need a 4th column, let's call it days_since_last_order which, in the case where order_rank = 1 then 0 else calculate the number of days since the previous order (with rank n-1).
So, the above would return:
customer_id order_date order_rank days_since_last_order
A 2017-02-19 1 0
A 2017-02-24 2 5
A 2017-03-31 3 35
A 2017-07-03 4 94
A 2017-08-10 5 38
B 2016-04-24 1 0
B 2016-04-30 2 6
C 2016-07-18 1 79
C 2016-09-01 2 45
C 2016-09-13 3 12
Is there an easier way to calculate the above with a window function (or similar) rather than join the entire dataset against itself (eg. on A.order_rank = B.order_rank - 1) and doing the calc?
Thanks!
use the lag window function
SELECT
customer_id
, order_date
, order_rank
, COALESCE(
DATE(order_date)
- DATE(LAG(order_date) OVER (PARTITION BY customer_id ORDER BY order_date))
, 0)
FROM <table_name>

Refer to current row in window function

Is it possible to refer to the current row in a window partition? I want to do something like the following:
SELECT min(ABS(variable - CURRENT.variable)) over (order by criterion RANGE UNBOUNDED PRECEDING)
That is, i want to find in the given partition the variable which is closest to the current value. Is is possible to do something like that?
As an example, from:
criterion | variable
1 2
2 4
3 2
4 7
5 6
We would obtain:
null
2
0
3
1
Thanks
As far as I know, this cannot be done with window functions.
But it can be done with a self join:
SELECT a.id,
a.variable,
min(abs(a.variable - b.variable))
FROM mydata a
LEFT JOIN mydata b
ON (b.criterion < a.criterion)
GROUP BY a.id, a.variable
ORDER BY a.id;
If I understand correctly:
with t (v) as (values (-5),(-2),(0),(1),(3),(10))
select v,
least(
v - lag(v) over (order by v),
lead(v) over (order by v) - v
) as closest
from t
;
v | closest
----+---------
-5 | 3
-2 | 2
0 | 1
1 | 1
3 | 2
10 | 7
Hope this could help you (pay attention for performance problems).
I tried this in MSSQL (at bottom you'll find POSTGRESQL version):
CREATE TABLE TX (CRITERION INT, VARIABILE INT);
INSERT INTO TX VALUES (1,2), (2,4),(3,2),(4,7), (5,6);
SELECT CRITERION, MIN_DELTA FROM
(
SELECT TX.CRITERION
, MIN(ABS(B.TX2_VAR - TX.VARIABILE)) OVER (PARTITION BY TX.CRITERION) AS MIN_DELTA
, RANK() OVER (PARTITION BY TX.CRITERION ORDER BY ABS(B.TX2_VAR - TX.VARIABILE) ) AS MIN_RANK
FROM TX
CROSS APPLY (SELECT TX2.CRITERION AS TX2_CRIT, TX2.VARIABILE AS TX2_VAR FROM TX TX2 WHERE TX2.CRITERION < TX.CRITERION) B
) C
WHERE MIN_RANK=1
ORDER BY CRITERION
;
Output:
CRITERION MIN_DELTA
----------- -----------
2 2
3 0
4 3
5 1
POSTGRESQL Version (tested on Rextester http://rextester.com/VMGJ87600):
CREATE TABLE TX (CRITERION INT, VARIABILE INT);
INSERT INTO TX VALUES (1,2), (2,4),(3,2),(4,7), (5,6);
SELECT * FROM TX;
SELECT CRITERION, MIN_DELTA FROM
(
SELECT TX.CRITERION
, MIN(ABS(B.TX2_VAR - TX.VARIABILE)) OVER (PARTITION BY TX.CRITERION) AS MIN_DELTA
, RANK() OVER (PARTITION BY TX.CRITERION ORDER BY ABS(B.TX2_VAR - TX.VARIABILE) ) AS MIN_RANK
FROM TX
LEFT JOIN LATERAL (SELECT TX2.CRITERION AS TX2_CRIT, TX2.VARIABILE AS TX2_VAR FROM TX TX2 WHERE TX2.CRITERION < TX.CRITERION) B ON TRUE
) C
WHERE MIN_RANK=1
ORDER BY CRITERION
;
DROP TABLE TX;
Output:
criterion variabile
1 1 2
2 2 4
3 3 2
4 4 7
5 5 6
criterion min_delta
1 1 NULL
2 2 2
3 3 0
4 4 3
5 5 1

postgres detect repeating patterns of zeros

Is there a way to detect subseries of zeros of length at least 3 within a time series in Postgres?
year value
--------------
1 0
2 0
3 0
4 33
5 72
6 0
7 0
8 0
9 0
10 25
11 0
12 56
13 37
So in this example I'd like to return years 1-3 and 6-9, but not year 11.
This one will do it:
WITH d(y,v) AS (VALUES
(1,0),(2,0),(3,0),(4,33),(5,72),
(6,0),(7,0),(8,0),(9,0),(10,25),
(11,0),(12,56),(13,37)
)
SELECT grp, numrange(min(y),max(y),'[]') as ys, count(*) as len
FROM (
/* group identifiers via running total */
SELECT y, v, g, sum(g) OVER (ORDER BY y) grp
FROM (
/* group boundaries */
SELECT y, v, CASE WHEN
v IS DISTINCT FROM lag(v) OVER (ORDER BY y) THEN 1
END g
FROM d) s
WHERE v=0) s
GROUP BY grp
HAVING count(*) >= 3;