REGEX for special character (Decode Unicode)

REGEX for special character (Decode Unicode) - unicode

I have try this code to decode, unicode all works fine if the comment start with any special character, symbol, space and etc shows error.
CREATE TEMP FUNCTION DecodeUnicode(s STRING) AS (
IF(s NOT LIKE '%\\u%', s,
(SELECT CODE_POINTS_TO_STRING(ARRAY_AGG(CAST(CONCAT('0x', x) AS INT64)))
FROM UNNEST(SPLIT(s, '\\u')) AS x
WHERE x != ''))
);
SELECT
original,
DecodeUnicode(original) AS decoded
FROM (
SELECT trim(r'$-\u6599\u91d1\u304c\u9ad8\u3059\u304e\uff01\uff01\uff01') AS original UNION ALL
SELECT trim(r'abcd')
);

Below is for BigQuery Standard SQL
#standardSQL
CREATE TEMP FUNCTION DecodeUnicode(s STRING) AS (
(SELECT CODE_POINTS_TO_STRING(ARRAY_AGG(CAST(CONCAT('0x', x) AS INT64)))
FROM UNNEST(SPLIT(s, '\\u')) AS x
WHERE x != ''
)
);
WITH `yourTable` AS (
SELECT r'$-\u6599\u91d1\u304c\u9ad8\u3059\u304e\uff01\uff01\uff01' AS original UNION ALL
SELECT r'abcd'
), uchars AS (
SELECT DISTINCT
c,
DecodeUnicode(c) uchar
FROM `yourTable`,
UNNEST(REGEXP_EXTRACT_ALL(original, r'(\\u[abcdef0-9]{4})')) c
)
SELECT
original,
STRING_AGG(IFNULL(uchar, x), '' ORDER BY pos) decoded
FROM (
SELECT
original,
pos,
SUBSTR(original,
SUM(CASE char WHEN '' THEN 1 ELSE 6 END)
OVER(PARTITION BY original ORDER BY pos) - CASE char WHEN '' THEN 0 ELSE 5 END,
CASE char WHEN '' THEN 1 ELSE 6 END) x,
uchar
FROM `yourTable`,
UNNEST(REGEXP_EXTRACT_ALL(original, r'(\\u[abcdef0-9]{4})|.')) char WITH OFFSET AS pos
LEFT JOIN uchars u ON u.c = char
)
GROUP BY original
-- ORDER BY original
What it does - it extracts all unicode characters and decode them and replace them in original string leaving non-unicodes stay as they are, so the output will be as below
original decoded
$-\u6599\u91d1\u304c\u9ad8\u3059\u304e\uff01\uff01\uff01 $-料金が高すぎ！！！
abcd abcd

Related

TSQL - in a string, replace a character with a fixed one every 2 characters

I can't replace every 2 characters of a string with a '.'
select STUFF('abcdefghi', 3, 1, '.') c3,STUFF('abcdefghi', 5, 1,
'.') c5,STUFF('abcdefghi', 7, 1, '.') c7,STUFF('abcdefghi', 9, 1, '.')
c9
if I use STUFF I should subsequently overlap the strings c3, c5, c7 and c9. but I can't find a method
can you help me?
initial string:
abcdefghi
the result I would like is
ab.de.gh.
the string can be up to 50 characters

Create a numbers / tally / digits table, if you don't have one already, then you can use this to target each character position:
with digits as ( /* This would be a real table, here it's just to test */
select n from (values(1),(2),(3),(4),(5),(6),(7),(8),(9),(10))x(n)
), t as (
select 'abcdefghi' as s
)
select String_Agg( case when d.n%3 = 0 then '.' else Substring(t.s, d.n, 1) end, '')
from t
cross apply digits d
where d.n <Len(t.s)
Using for xml with existing table
with digits as (
select n from (values(1),(2),(3),(4),(5),(6),(7),(8),(9),(10))x(n)
),
r as (
select t.id, case when d.n%3=0 then '.' else Substring(t.s, d.n, 1) end ch
from t
cross apply digits d
where d.n <Len(t.s)
)
select result=(select '' + ch
from r r2
where r2.id=r.id
for xml path('')
)
from r
group by r.id

You can try it like this:
Easiest might be a quirky update ike here:
DECLARE #string VARCHAR(100)='abcdefghijklmnopqrstuvwxyz';
SELECT #string = STUFF(#string,3*A.pos,1,'.')
FROM (SELECT TOP(LEN(#string)/3) ROW_NUMBER() OVER(ORDER BY (SELECT NULL))
FROM master..spt_values) A(pos);
SELECT #string;
Better/Cleaner/Prettier was a recursive CTE:
We use a declared table to have some tabular sample data
DECLARE #tbl TABLE(ID INT IDENTITY, SomeString VARCHAR(200));
INSERT INTO #tbl VALUES('')
,('a')
,('ab')
,('abc')
,('abcd')
,('abcde')
,('abcdefghijklmnopqrstuvwxyz');
--the query
WITH recCTE AS
(
SELECT ID
,SomeString
,(LEN(SomeString)+1)/3 AS CountDots
,1 AS OccuranceOfDot
,SUBSTRING(SomeString,4,LEN(SomeString)) AS RestString
,CAST(LEFT(SomeString,2) AS VARCHAR(MAX)) AS Growing
FROM #tbl
UNION ALL
SELECT t.ID
,r.SomeString
,r.CountDots
,r.OccuranceOfDot+2
,SUBSTRING(RestString,4,LEN(RestString))
,CONCAT(Growing,'.',LEFT(r.RestString,2))
FROM #tbl t
INNER JOIN recCTE r ON t.ID=r.ID
WHERE r.OccuranceOfDot/2<r.CountDots-1
)
SELECT TOP 1 WITH TIES ID,Growing
FROM recCTE
ORDER BY ROW_NUMBER() OVER(PARTITION BY ID ORDER BY OccuranceOfDot DESC);
--the result
1
2 a
3 ab
4 ab
5 ab
6 ab.de
7 ab.de.gh.jk.mn.pq.st.vw.yz
The idea in short
We use a recursive CTE to walk along the string
we add the needed portion together with a dot
We stop, when the remaining length is to short to continue
a little magic is the ORDER BY ROW_NUMBER() OVER() together with TOP 1 WITH TIES. This will allow all first rows (frist per ID) to appear.

Find and Replace numbers in a string

If I input a string as given below, I should be able to convert as mentioned below.
Ex 1: String - 5AB89C should be converted as 0000000005AB0000000089C
Ex 2: String GH1HJ should be converted as GH0000000001HJ
Ex 3: String N99K7H45 should be B0000000099K0000000007H0000000045
Each number should be complimented with 10 leading zeros including the number. In Ex:1, number 5 is complemented with 9 leading zeros making 10 digits, same way 89 is complimented with 8 leading zeros making total of 10 digits. Alphabets and any special characters should be untouched.

Once you get a copy of PatternSplitCM This is easy as pie.
Here's how we do it with one value:
DECLARE #string VARCHAR(8000) = '5AB89C'
SELECT CASE f.[matched] WHEN 1 THEN '00000000'+'' ELSE '' END + f.item
FROM dbo.patternsplitCM(#String,'[0-9]') AS f
ORDER BY f.ItemNumber
FOR XML PATH('');
Returns: 000000005AB0000000089C
Now against a table:
-- sample data
DECLARE #table TABLE (StringId INT IDENTITY, String VARCHAR(8000));
INSERT #table(String)
VALUES('5AB89C'),('GH1HJ'),('N99K7H45');
SELECT t.StringId, oldstring = t.String, newstring = f.padded
FROM #table AS t
CROSS APPLY
(
SELECT CASE f.[matched] WHEN 1 THEN '00000000'+'' ELSE '' END + f.item
FROM dbo.patternsplitCM(t.String,'[0-9]') AS f
ORDER BY f.ItemNumber
FOR XML PATH('')
) AS f(padded);
Returns:
StringId oldstring newstring
----------- ----------------- --------------------------------------
1 5AB89C 000000005AB0000000089C
2 GH1HJ GH000000001HJ
3 N99K7H45 N0000000099K000000007H0000000045
... and that's it. The code to create PatternSplitCM is below.
PatternSplitCM Code:
CREATE FUNCTION dbo.PatternSplitCM
(
#List VARCHAR(8000) = NULL
,#Pattern VARCHAR(50)
) RETURNS TABLE WITH SCHEMABINDING
AS
RETURN
WITH numbers AS (
SELECT TOP(ISNULL(DATALENGTH(#List), 0))
n = ROW_NUMBER() OVER(ORDER BY (SELECT NULL))
FROM
(VALUES (0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) d (n),
(VALUES (0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) e (n),
(VALUES (0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) f (n),
(VALUES (0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) g (n))
SELECT
ItemNumber = ROW_NUMBER() OVER(ORDER BY MIN(n)),
Item = SUBSTRING(#List,MIN(n),1+MAX(n)-MIN(n)),
Matched
FROM (
SELECT n, y.Matched, Grouper = n - ROW_NUMBER() OVER(ORDER BY y.Matched,n)
FROM numbers
CROSS APPLY (
SELECT Matched = CASE WHEN SUBSTRING(#List,n,1) LIKE #Pattern THEN 1 ELSE 0 END
) y
) d
GROUP BY Matched, Grouper

Get characters before underscore and separated by comma from a string in SQL Server 2008

I tried this query
DECLARE #AdvancedSearchSelectedDropdownName TABLE (
SelectedIds VARCHAR(2048),
AdvanceSearchOptionTypeId INT
)
INSERT INTO #AdvancedSearchSelectedDropdownName
VALUES ('4_0,5_1,6_2,7_3', 23),
('62_3', 21), ('2_4', 23)
DECLARE #selectedIds VARCHAR(MAX) = '';
SELECT #selectedIds +=
CASE WHEN SelectedIds IS NULL
THEN #selectedIds + ISNULL(SelectedIds + ',', '')
WHEN SelectedIds IS NOT NULL
THEN SUBSTRING(SelectedIds, 0, CHARINDEX('_', SelectedIds, 0)) + ','
END
FROM #AdvancedSearchSelectedDropdownName WHERE advanceSearchOptionTypeId = 23
SELECT #selectedIds
Current output: 4,2
Required output: 4,5,6,7,2
We may have n number of comma separated values in the SelectedIds column.

You might go this route:
WITH Casted AS
(
SELECT *
,CAST('<x><y>' + REPLACE(REPLACE(SelectedIds,'_','</y><y>'),',','</y></x><x><y>') + '</y></x>' AS XML) SplittedToXml
FROM #AdvancedSearchSelectedDropdownName
)
SELECT *
FROM Casted;
This will return your data in this form:
<x>
<y>4</y>
<y>0</y>
</x>
<x>
<y>5</y>
<y>1</y>
</x>
<x>
<y>6</y>
<y>2</y>
</x>
<x>
<y>7</y>
<y>3</y>
</x>
Now we can grab all the x and just the first y:
WITH Casted AS
(
SELECT *
,CAST('<x><y>' + REPLACE(REPLACE(SelectedIds,'_','</y><y>'),',','</y></x><x><y>') + '</y></x>' AS XML) SplittedToXml
FROM #AdvancedSearchSelectedDropdownName
)
SELECT Casted.AdvanceSearchOptionTypeId AS TypeId
,x.value('y[1]/text()[1]','int') AS IdValue
FROM Casted
CROSS APPLY SplittedToXml.nodes('/x') A(x);
The result:
TypeId IdValue
23 4
23 5
23 6
23 7
21 62
23 2
Hint: Do not store comma delimited values!
It is a very bad idea to store your data in this format. You can use a generic format like my XML to store this or a structure of related side tables. But such construction tend to turn out as a real pain in the neck...

After a little re-think. Perhaps something a little more straightforward.
Now, if you have a limited number of _N
Example
;with cte as (
Select *
,RN = Row_Number() over(Order by (Select NULL))
From #AdvancedSearchSelectedDropdownName A
)
Select AdvanceSearchOptionTypeId
,IDs = replace(
replace(
replace(
replace(
replace(
stuff((Select ',' +SelectedIds From cte Where AdvanceSearchOptionTypeId=A.AdvanceSearchOptionTypeId Order by RN For XML Path ('')),1,1,'')
,'_0','')
,'_1','')
,'_2','')
,'_3','')
,'_4','')
From cte A
Group By AdvanceSearchOptionTypeId
Returns
AdvanceSearchOptionTypeId IDs
21 62
23 4,5,6,7,2

If interested in a helper function.
Tired of extracting strings (left, right, charindex, patindex, ...) I modified s split/parse function to accept TWO non-like delimiters. In this case a , and _.
Example
;with cte as (
Select A.AdvanceSearchOptionTypeId
,B.*
,RN = Row_Number() over(Order by (Select NULL))
From #AdvancedSearchSelectedDropdownName A
Cross Apply [dbo].[tvf-Str-Extract](','+A.SelectedIds,',','_') B
)
Select AdvanceSearchOptionTypeId
,IDs = stuff((Select ',' +RetVal From cte Where AdvanceSearchOptionTypeId=A.AdvanceSearchOptionTypeId Order by RN,RetVal For XML Path ('')),1,1,'')
From cte A
Group By AdvanceSearchOptionTypeId
Returns
AdvanceSearchOptionTypeId IDs
21 62
23 4,5,6,7,2
The TVF if Interested
CREATE FUNCTION [dbo].[tvf-Str-Extract] (#String varchar(max),#Delimiter1 varchar(100),#Delimiter2 varchar(100))
Returns Table
As
Return (
with cte1(N) As (Select 1 From (Values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) N(N)),
cte2(N) As (Select Top (IsNull(DataLength(#String),0)) Row_Number() over (Order By (Select NULL)) From (Select N=1 From cte1 N1,cte1 N2,cte1 N3,cte1 N4,cte1 N5,cte1 N6) A ),
cte3(N) As (Select 1 Union All Select t.N+DataLength(#Delimiter1) From cte2 t Where Substring(#String,t.N,DataLength(#Delimiter1)) = #Delimiter1),
cte4(N,L) As (Select S.N,IsNull(NullIf(CharIndex(#Delimiter1,#String,s.N),0)-S.N,8000) From cte3 S)
Select RetSeq = Row_Number() over (Order By N)
,RetPos = N
,RetVal = left(RetVal,charindex(#Delimiter2,RetVal)-1)
From (
Select *,RetVal = Substring(#String, N, L)
From cte4
) A
Where charindex(#Delimiter2,RetVal)>1
)
/*
Max Length of String 1MM characters
Declare #String varchar(max) = 'Dear [[FirstName]] [[LastName]], ...'
Select * From [dbo].[tvf-Str-Extract] (#String,'[[',']]')
*/

Disclaimer.As per first Normal form, you should not store multiple values in a single cell. I would suggest you to avoid storing this way.
Still the approach would be: Create a UDF function which separates comma separated list into a table valued variable. Below code I have not tested. but, it gives idea on how to approach this problem.
Refer to CSV to table approaches
Declare #selectedIds varchar(max) = '';
SET #selectedIds = SELECT STUFF
(SELECT ','+ (SUBSTRING(c.value, 0, CHARINDEX('_', c.value, 0))
FROM #AdvancedSearchSelectedDropdownName AS tv
CROSS APPLY dbo.udfForCSVToList(SelectedIds) AS c
WHERE advanceSearchOptionTypeId = 23
FOR XML PATH('')),1,2,'');
SELECT #selectedIds

Get the text between two special characters in oracle

I have a requirement wherein I have to take in the data between two special characters only. The first one and the second character.
Ex: KVN REG#HENDRI#AEP: 6256765058812#KERG00101258#875303069817#THT914000
Here I need the data between first # and the second one. I gathered data from different sources and wrote a query. I just want to know a simpler form of the query, wherein I need not to have to put the rownum function.
Query:
select b.name as v_custname
from ( select a.*, rownum rnum
from (SELECT regexp_substr(token, '[^:]+', 1, 1) name
FROM (
SELECT regexp_substr(s, '[^\#]+', 1, lvl) token, lvl
FROM (
SELECT s, LEVEL lvl FROM (select 'KVN REG#HENDRI#AEP: 6256765058812#KERG00101258#875303069817#THT914000' s from dual)
CONNECT BY LEVEL < LENGTH(s) - LENGTH(REPLACE(s, '#'))
)
) ) a
where rownum <= 2 ) b
where rnum >= 2;

You can do this with just instr and substr():
with sample_data as (
select 'KVN REG#HENDRI#AEP: 6256765058812#KERG00101258#875303069817#THT914000' as token
from dual
)
select substr(token,
instr(token, '#') + 1,
instr(token, '#', 1, 2) - instr(token, '#') - 1
)
from sample_data
instr(token, '#') + 1 finds the first occurrence of #
instr(token, '#', 1, 2) finds the second occurrence of #
substr() takes the first position plus a length to be extracted. The length you need is the second position minus the first position.

Split a string in characters SQL

How can I split a string in characters and add a new line after each character in PostgreSQL
For example
num desc
1 Hello
2 Bye
num desc
1 H
e
l
l
o
2 B
y
e

select num, regexp_split_to_table(descr,'')
from the_table
order by num;
SQLFiddle: http://sqlfiddle.com/#!15/13c00/4
The order of the characters is however not guaranteed and achieving that is a bit complicated.
Building on Erwin's answer regarding this problem:
select case
when row_number() over (partition by id order by rn) = 1 then id
else null
end as id_display,
ch_arr[rn]
from (
select *,
generate_subscripts(ch_arr, 1) AS rn
from (
select id,
regexp_split_to_array(descr,'') as ch_arr
from data
) t1
) t2
order by id, rn;
Edit:
If you just want a single string for each id, where the characters are separated by a newline, you can use this:
select id,
array_to_string(regexp_split_to_array(descr,''), chr(10))
from data
order by id

We Keep Coding

iphone swift flutter scala powershell matlab mongodb postgresql perl eclipse

REGEX for special character (Decode Unicode) - unicode

Related

TSQL - in a string, replace a character with a fixed one every 2 characters

Find and Replace numbers in a string

Get characters before underscore and separated by comma from a string in SQL Server 2008

Get the text between two special characters in oracle

Split a string in characters SQL

Categories

Resources