extracting string using subtring and instring - oracle-sqldeveloper

I have a problem, I used many methods like instring and substring to get the output, but was missing something
Below is the data
V V1
4 hurdle:longrun:runner:rod:robi
4 hurdle:longrun:runner:rod:robi
3 longrun:jog:rod:robi
2 longrun: sprint :runner:rod:robi
I want output like
V V1
4 longrun
4 longrun
3 null
2 sprint
actually i want o/p which is next to “runner” .. Someone please help me

Could you try this?
select v, substr(regexp_substr(v1, ':[^:]*:runner'), 1, -7) from TABLE_NAME;
(Caveat: I'm afraid I haven't got an Oracle DB to hand, so I haven't tested this myself)

Here's my solution,
WITH x AS (SELECT '4 hurdle:longrun:runner:rod:robi' AS v_string FROM dual UNION ALL
SELECT '4 hurdle:longrun:runner:rod:robi' AS v_string FROM dual UNION ALL
SELECT '3 longrun:jog:rod:robi' AS v_string FROM dual UNION ALL
SELECT '2 longrun: sprint :runner:rod:robi' AS v_string FROM dual )
select
substr(v_string,1,2)||
substr(v_string,
instr(v_string,':',1)+1,
instr(
substr(v_string,
instr(v_string,':',1)+3 ),
'runner')) from x;
OR
WITH x AS (SELECT '4 hurdle:longrun:runner:rod:robi' AS v_string FROM dual UNION ALL
SELECT '4 hurdle:longrun:runner:rod:robi' AS v_string FROM dual UNION ALL
SELECT '3 longrun:jog:rod:robi' AS v_string FROM dual UNION ALL
SELECT '2 longrun: sprint :runner:rod:robi' AS v_string FROM dual )
SELECT replace(substr(v_string,1,instr(v_string, ':runner')-1),
substr(
substr(v_string,1,instr(v_string, ':runner')-1),
3,
instr(
substr(v_string,1,instr(v_string, ':runner')-1),
':',
1)-2)
)from x;

Related

Repeat Row Values

I have the following data:
with source (Account,AccountNumber,Indentation) as
(
select 'INCOME STATEMENT',1000,0 union all
select 'REVENUE',1100,0 union all
select 'Revenue - Aircon',1110,1 union all
select 'Revenue - Consumer Goods',1120,1 union all
select 'Revenue - Spares',1130,1 union all
select 'Revenue - Accessories',1140,1 union all
select 'Revenue - Sub Stock',1150,1 union all
select 'Revenue - Services',1160,1 union all
select 'Revenue - Other',1170,1 union all
select 'Revenue - Intercompany',1180,1 union all
select 'Revenue - Delivery Charges',1400,1 union all
select 'COST OF SALES',1500,0 union all
select 'COGS - Aircon',1510,1 union all
select 'COGS - Consumer Goods',1520,1 union all
select 'COGS - Spares',1530,1 union all
select 'COGS - Accessories',1540,1 union all
select 'COGS - Sub Stock',1550,1 union all
select 'COGS - Services',1560,1 union all
select 'COGS - Other',1570,1 union all
select 'COGS - Intercompany',1580,1 union all
select 'COS - Sub Stock Stock Adjustments',1610,1 union all
select 'COS - Sub Stock Repairs',1620,1 union all
select 'COS - Consumables & Packing Materials',1810,1 union all
select 'COS - Freight & Delivery',1820,1 union all
select 'COS - Inventory Adj - Stock Count',1910,1 union all
select 'COS - Inv. Adj - Stock Write up / Write down',1920,1 union all
select 'COS - Provision for Obsolete Stock (IS)',1930,1 union all
select 'COS - Inventory Adj - System A/c',1996,1 union all
select 'COS - Purch & Dir. Cost Appl A/c - System A/c',1997,1 union all
select 'GROSS MARGIN',1999,0 union all
select 'OTHER INCOME',2000,0 union all
select 'Admin Fees Received',2100,1 union all
select 'Bad Debt Recovered',2110,1 union all
select 'Discount Received',2120,1 union all
select 'Dividends Received',2130,1 union all
select 'Fixed Assets - NBV on Disposal',2140,1 union all
select 'Fixed Assets - Proceeds on Disposal',2145,1 union all
select 'Rebates Received',2150,1 union all
select 'Rental Income',2160,1 union all
select 'Sundry Income',2170,1 union all
select 'Warranty Income',2180,1 union all
select 'INTEREST RECEIVED',2200,0 union all
select 'Interest Received - Banks',2210,1
)
select
Account
, AccountNumber
, Indentation
from source;
Using the following script:
with s as (
select
iif(Account like 'Total%',null,iif(Indentation=0,Account,null)) Header
, iif(Account like 'Total%',null,iif(Indentation=1,Account,null)) SubHeader1
, *
from Source
)
select
Header
--, case lag(Header) over (order by [Account Number]) when Header then isnull(Header,lag(Header) over (order by [Account Number])) else Header end
, SubHeader1
, [Account Number]
, Indentation
from s
I'm able to split the columns like this:
I need to be able to report the Header Column to look like this:
I tried doing it using LAG(), but it doesn't work, how would I script this?
This is one option. I created a Group for each header and then used it to grab the first in that group that had an Indention = 0. Tack this on to your source CTE:
,CTE2 AS
(
select
iif(Account like 'Total%',null,iif(Indentation=0,Account,null)) Header
, iif(Account like 'Total%',null,iif(Indentation=1,Account,null)) SubHeader1
, SUM(CASE WHEN Indentation = 0 THEN 1 ELSE 0 END) OVER (ORDER BY AccountNUmber) H1
, *
from Source
)
SELECT T2.Header, t1.SubHeader1, t1.AccountNumber, t1.Indentation
FROM CTE2 t1
CROSS APPLY(SELECT MAX(t3.HEADER) HEADER FROM CTE2 T3 where t3.H1 = T1.H1 and T3.Indentation = 0 ) T2
ORDER BY t1.AccountNumber

Checking Slowly Changing Dimension 2

I have a table that looks like this:
A slowly changing dimension type 2, according to Kimball.
Key is just a surrogate key, a key to make rows unique.
As you can see there are three rows for product A.
Timelines for this product are ok. During time the description of the product changes.
From 1-1-2020 up until 4-1-2020 the description of this product was ProdA1.
From 5-1-2020 up until 12-2-2020 the description of this product was ProdA2 etc.
If you look at product B, you see there are gaps in the timeline.
We use DB2 V12 z/Os. How can I check if there are gaps in the timelines for each and every product?
Tried this, but doesn't work
with selectie (key, tel) as
(select product, count(*)
from PROD_TAB
group by product
having count(*) > 1)
Select * from
PROD_TAB A
inner join selectie B
on A.product = B.product
Where not exists
(SELECT 1 from PROD_TAB C
WHERE A.product = C.product
AND A.END_DATE + 1 DAY = C.START_DATE
)
Does anyone know the answer?
The following query returns all gaps for all products.
The idea is to enumerate (RN column) all periods inside each product by START_DATE and join each record with its next period record.
WITH
/*
MYTAB (PRODUCT, DESCRIPTION, START_DATE, END_DATE) AS
(
SELECT 'A', 'ProdA1', DATE('2020-01-01'), DATE('2020-01-04') FROM SYSIBM.SYSDUMMY1
UNION ALL SELECT 'A', 'ProdA2', DATE('2020-01-05'), DATE('2020-02-12') FROM SYSIBM.SYSDUMMY1
UNION ALL SELECT 'A', 'ProdA3', DATE('2020-02-13'), DATE('2020-12-31') FROM SYSIBM.SYSDUMMY1
UNION ALL SELECT 'B', 'ProdB1', DATE('2020-01-05'), DATE('2020-01-09') FROM SYSIBM.SYSDUMMY1
UNION ALL SELECT 'B', 'ProdB2', DATE('2020-01-12'), DATE('2020-03-14') FROM SYSIBM.SYSDUMMY1
UNION ALL SELECT 'B', 'ProdB3', DATE('2020-03-15'), DATE('2020-04-18') FROM SYSIBM.SYSDUMMY1
UNION ALL SELECT 'B', 'ProdB4', DATE('2020-04-16'), DATE('2020-05-03') FROM SYSIBM.SYSDUMMY1
)
,
*/
MYTAB_ENUM AS
(
SELECT
T.*
, ROWNUMBER() OVER (PARTITION BY PRODUCT ORDER BY START_DATE) RN
FROM MYTAB T
)
SELECT A.PRODUCT, A.END_DATE + 1 START_DT, B.START_DATE - 1 END_DT
FROM MYTAB_ENUM A
JOIN MYTAB_ENUM B ON B.PRODUCT = A.PRODUCT AND B.RN = A.RN + 1
WHERE A.END_DATE + 1 <> B.START_DATE
AND A.END_DATE < B.START_DATE;
The result is:
|PRODUCT|START_DT |END_DT |
|-------|----------|----------|
|B |2020-01-10|2020-01-11|
May be more efficient way:
WITH MYTAB2 AS
(
SELECT
T.*
, LAG(END_DATE) OVER (PARTITION BY PRODUCT ORDER BY START_DATE) END_DATE_PREV
FROM MYTAB T
)
SELECT PRODUCT, END_DATE_PREV + 1 START_DATE, START_DATE - 1 END_DATE
FROM MYTAB2
WHERE END_DATE_PREV + 1 <> START_DATE
AND END_DATE_PREV < START_DATE;
Thnx Mark, will try this one of these days.
Never heard of LAG in DB2 V12 for z/Os
Will read about it
Thnx

Oracle to Redshift query

I am trying to run below query and getting the error.
ERROR: This type of correlated subquery pattern is not supported due to internal error. How can I re write subquery without altering the result. Highlighted in bold is causing the issue.
SELECT
bin_max,
bin_count,
ROUND(RATIO_TO_REPORT(bin_count) over (), 5) bin_percent
FROM
(
SELECT
bin_max,
cum_count - lag(cum_count, 1) over (ORDER BY bin_max) bin_count
FROM
(
SELECT
b.bin_max,
(select COUNT(*)
FROM ndw_owner.MBP_USER_LOGINS_BY_USER ulbu
WHERE
ulbu.DAYS_SINCE_FIRST_LOGIN > 30
and ulbu.PROJECTED_30_DAY_LOGINS <= b.bin_max
) cum_count
FROM
(SELECT * FROM ( SELECT 1 AS BIN_MAX
UNION
SELECT 2 AS BIN_MAX UNION
SELECT 3 AS BIN_MAX UNION
SELECT 4 AS BIN_MAX UNION
SELECT 5 AS BIN_MAX UNION
SELECT 10 AS BIN_MAX UNION
SELECT 15 AS BIN_MAX UNION
SELECT 20 AS BIN_MAX UNION
SELECT 30 AS BIN_MAX UNION
SELECT 40 AS BIN_MAX UNION
SELECT 60 AS BIN_MAX UNION
SELECT 80 AS BIN_MAX UNION
SELECT 99999999 AS BIN_MAX
)
) b
)
);
Change the sub query to perform an inequality join between b and ulbu. This will make the data you need in the top query.

TSQL Fuzzy address matching grouping, 2019 Edition

I have this situation where people asked to group on bad addresses. And I need to work on the tools/env I have, I don't have choice for Google API or 3rd party Data Science tools. I also did my HW, see posts several years old, so still want to check all if any updates available.
In my scenario people want to group IDs 1-6 into single, rest I added for neg test.
SELECT * INTO #t FROM ( --test data: select * from #t drop table #t
SELECT 1 Id, '1 CROLANA HEIGHTS' Adr UNION -- A vs O
SELECT 2 Id, '1 CROLONA HEIGHTS' Adr union
SELECT 3 Id, '1 CROLONA HEIGHT DRIVE' Adr union
SELECT 4 Id,'1 CROLONA HEIGHTS DR' Adr union
SELECT 5 Id, '1 CROLONA HGHTS DR' Adr union
SELECT 6 Id, '1 CROLONA HTS DR' Adr UNION
---------------------------------------- rest should not match
SELECT 7 Id, '1 CORWING DR' Adr UNION
SELECT 8 Id, '1 SUNNYHILL DRIVE' Adr UNION
SELECT 9 Id, '1 CROWN HILL DR' Adr UNION
SELECT 10 Id, '1 ADDISON DRv' Adr ) a
------------------- and below is my fuzzy working script which can be improved)
SELECT id, adr, LEAD(adr,1) OVER ( ORDER BY adr ) adr_lead,
SOUNDEX(adr) Sdx, DIFFERENCE(adr, LEAD(adr,1) OVER ( ORDER BY adr )) diff
--- SOUNDEX(adr), COUNT(*) c
FROM #t
--GROUP BY SOUNDEX(adr)
WHERE SOUNDEX(adr) = SOUNDEX('1 CROLANA HEIGHTS')
There is suggestions which I gladly take. I'm using intell replace at the end of string and standalone words to improve data.
DECLARE #st VARCHAR(100) = 'La_Beg_10 La_midleMacy La' --replace et the end of string
SELECT 'ryba', #st, '-->' f, CASE WHEN #st LIKE '%' + ' La'
THEN SUBSTRING(#st,1,LEN(#st) - LEN('La')) + 'Lane' ELSE #st END N

Filtering data based on dynamic date

Create table #TEMP
(
ID INT
)
Create table #TEMP1
(
ID INT,
Letter_Type VARCHAR(100),
Letter_Sent_Date DATE
)
INSERT INTO #TEMP VALUES (1),(2),(3),(4)
GO
INSERT INTO #TEMP1 VALUES
(1,'A','01/01/2017'),
(1,'B','01/02/2017'),
(1,'C','01/03/2018'),
(1,'D','01/04/2018'),
(2,'A','01/01/2017'),
(2,'B','01/02/2017'),
(2,'C','01/10/2018'),
(2,'D','01/12/2018')
I'm trying to achieve below results - data should be based on date.
Suppose I want to know any letter sent after '01/05/2018' for letter type C.
For ID 1 there is no letter C - in that case, we need to print null value.
I'm trying to do it in single statement as query I currently have is super big due to couple of joins used.
Any help is much appreciated. Thanks!
OUTPUT
1,NULL,NULL
2,C,'01/10/2018'
This is rather clumsy, but I think that it returns what you want (written in Oracle).
CTE (WITH factoring clause) is used to prepare test data; you don't need that, as your data already is in those two tables). Useful code begins at line 15; have a look at the comments.
SQL> with
2 temp (id) as
3 (select level from dual connect by level <= 4),
4 temp1 (id, letter_type, letter_sent_date) as
5 (select 1, 'A', date '2017-01-01' from dual union all
6 select 1, 'B', date '2017-01-02' from dual union all
7 select 1, 'C', date '2018-01-03' from dual union all
8 select 1, 'D', date '2018-01-04' from dual union all
9 --
10 select 2, 'A', date '2017-01-01' from dual union all
11 select 2, 'B', date '2017-01-02' from dual union all
12 select 2, 'C', date '2018-01-10' from dual union all
13 select 2, 'D', date '2018-01-12' from dual
14 ),
15 -- letter type = 'C' and letters were sent after 2018-01-05
16 satisfy_condition as
17 (select t.id, t1.letter_type, t1.letter_sent_date
18 from temp t join temp1 t1 on t1.id = t.id
19 where t1.letter_type = 'C'
20 and t1.letter_sent_date > date '2018-01-05'
21 )
22 -- Finally: rows that satisfy condition ...
23 select i.id, i.letter_type, i.letter_sent_date
24 from satisfy_condition i
25 union
26 -- ... and rows that contain letter type = 'C', but were sent before 2018-01-05
27 select t.id, null, null
28 from temp1 t
29 where t.letter_type = 'C'
30 and t.id not in (select i1.id from satisfy_condition i1)
31 order by id, letter_type, letter_sent_date;
ID LETTER_TYPE LETTER_SENT_DATE
---------- ------------ --------------------
1
2 C 2018-01-10
SQL>
In an effort to avoid not in /not exist solution maybe this can help you?
WITH TEMP (ID, LETTER_TYPE, LETTER_SENT_DATE) AS
(select 1, 'A', date '2017-01-01' from dual union all
SELECT 1, 'B', DATE '2017-02-01' FROM DUAL UNION ALL
--SELECT 1, 'C', DATE '2018-03-01' FROM DUAL UNION ALL
select 1, 'D', date '2018-04-01' from dual union all
select 2, 'A', date '2017-01-01' from dual union all
SELECT 2, 'B', DATE '2017-02-01' FROM DUAL UNION ALL
SELECT 2, 'C', DATE '2018-10-01' FROM DUAL UNION ALL
select 2, 'D', date '2018-12-01' from dual
)
SELECT ID, LETTER_TYPE, LETTER_SENT_DATE FROM TEMP
WHERE LETTER_SENT_DATE > DATE '2018-05-01'
AND LETTER_TYPE = 'C'
UNION
SELECT ID, NULL LETTER_TYPE, NULL LETTER_SENT_DATE
from (
select tt.id , sum(tt.HASLETTER) hasletter
FROM
(SELECT T1.ID, CASE WHEN (T1.LETTER_TYPE='C') THEN 1 ELSE 0 END HASLETTER --, NULL T1.LETTER_SENT_DATE
FROM TEMP T1) TT
GROUP BY TT.ID
) hl
where hl.hasletter = 0