RedShift: troubles with regexp_substr

RedShift: troubles with regexp_substr - amazon-redshift

I have this JSON at RedShift: {"skippable": true, "unit": true}
I want to get only words between "" (JSON keys). Example: "skippable", "unit" etc.
I use this QUERY:
SELECT regexp_substr(REPLACE(REPLACE(attributes, '{', ''), '}', '')::VARCHAR, '\S+:') AS regexp, JSON_PARSE(attributes) AS attributes_super
FROM source.table
WHERE prompttype != 'input'.
But I have nothing to column "regexp".

Solution is:
SELECT
n::int
INTO TEMP numbers
FROM
(SELECT
row_number() over (order by true) as n
FROM table limit 30)
CROSS JOIN
(SELECT
max(regexp_count(attributes, '[,]')) as max_num
FROM table limit 30)
WHERE
n <= max_num + 1;
WITH all_values AS (
SELECT c.id, c.attributes, c.attributes_super.prompt, c.attributes_super.description,
c.attributes_super.topic, c.attributes_super.context,
c.attributes_super.use_case, c.attributes_super.subtitle, c.attributes_super.txValues, c.attributes_super.flashmode,
c.attributes_super.skippable, c.attributes_super.videoMaxDuration, c.attributes_super.defaultCameraFacing, c.attributes_super.locationRequired
FROM (
SELECT *, JSON_PARSE(attributes) AS attributes_super
FROM table
WHERE prompttype != 'input'
) AS c
ORDER BY created DESC
limit 1
), list_of_attr AS (
SELECT *, regexp_substr(split_part(attributes,',',n), '\"[0-9a-zA-Z]+\"') as others_attrs
FROM
all_values
CROSS JOIN
numbers
WHERE
split_part(attributes,',',n) is not null
AND split_part(attributes,',',n) != ''
), combine_attrs AS (
SELECT id, attributes, prompt, description,
topic, context, use_case, subtitle, txvalues, flashmode,
skippable, videomaxduration, defaultcamerafacing, locationrequired, LISTAGG(others_attrs, ',') AS others_attrs
FROM list_of_attr
GROUP BY id, attributes, prompt, description, topic,
context, use_case, subtitle, txvalues, flashmode,
skippable, videomaxduration, defaultcamerafacing, locationrequired)

Related

Function that will read the data from a column and generate the missing rows dynamically

i have a function that return missing documentno from a table but lenght of left part of data in column documentno is passed manually and right part has some ambiguities.
Now i want a function that will read the data from a column (documentno) and return the missing documentno dynamically.
my query :
CREATE TABLE c_order (
"order_id" VARCHAR(22),
"documentno" VARCHAR(20)
);
INSERT INTO c_order
("order_id", "documentno")
VALUES
('100001120', 'AGB/2021/02050'),
('100001124' ,'AGB/2021/02055'),
('100001120', 'PROFS/2021/02056'),
('100001124' ,'PROF/2021/02060'),
('100001125' ,'PROF/2021/02065'),
('100001120', 'PROFS/2020/02050_A'),
('100001124' ,'PROFS/2020/02055_A'),
('100001120', 'PROFS/2021/02056'),
('100001124' ,'PROFSS/2021/0206010'),
('100001125' ,'PROFSS/2021/0206020')
with cte as (
select left(documentno,13) lpart,
regexp_replace(split_part(documentno, '/', 3), '[^0-9]', '', 'g')::int as num
from c_order
), minmax as (
select lpart, min(num) minpart, max(num) maxpart
from cte
group by lpart
)
select lpart||t.doc_no as missing_doc_no
from minmax m
cross join generate_series(minpart, maxpart) as t(doc_no)
where not exists (select *
from c_order c
where regexp_replace(split_part(c.documentno, '/', 3), '[^0-9]', '', 'g')::int = t.doc_no)
my fiddle : https://dbfiddle.uk/?rdbms=postgres_12&fiddle=0d3a6a647c6ef2a9063c7f3289446998
How can I do that?

i didn't write the whole solution since you already got it right, but here is how to get the last numeric part dynamically using regular expression :
select
substring(documentno, '\w*\/\d*\/') constantpart ,
max(substring(regexp_replace(documentno, '\w*\/\d*\/', '') from '\d*'))::int Maxnumericpart,
min(substring(regexp_replace(documentno, '\w*\/\d*\/', '') from '\d*'))::int Minnumericpart
from c_order co
group by constantpart
ok , here is the full solutions:
select tt.constantpart || LPAD(t.doc_no::text,ll,'0') missingdocumentNo
from
(select
substring(documentno, '\w*\/\d*\/') constantpart ,
max(substring(regexp_replace(documentno, '\w*\/\d*\/', '') from '\d*'))::int Maxnumericpart,
min(substring(regexp_replace(documentno, '\w*\/\d*\/', '') from '\d*'))::int Minnumericpart,
length(min(substring(regexp_replace(documentno, '\w*\/\d*\/', '') from '\d*'))) ll
from c_order co
group by constantpart
) tt
cross join generate_series(Minnumericpart, Maxnumericpart) as t(doc_no)
where not exists (
select 1
from c_order co2
where substring(co2.documentno, '\w*\/\d*\/\d*') = tt.constantpart || LPAD(t.doc_no::text,ll,'0')
)
order by missingdocumentNo;
db<>fiddle here

Select specific lines in data according to last update [duplicate]

Name Value AnotherColumn
-----------
Pump 1 8000.0 Something1
Pump 1 10000.0 Something2
Pump 1 10000.0 Something3
Pump 2 3043 Something4
Pump 2 4594 Something5
Pump 2 6165 Something6
My table looks something like this. I would like to know how to select max value for each pump.
select a.name, value from out_pumptable as a,
(select name, max(value) as value from out_pumptable where group by posnumber)g where and g.value = value
this code does the job, but i get two entries of Pump 1 since it has two entries with same value.

select name, max(value)
from out_pumptable
group by name

select name, value
from( select name, value, ROW_NUMBER() OVER(PARTITION BY name ORDER BY value desc) as rn
from out_pumptable ) as a
where rn = 1

SELECT
b.name,
MAX(b.value) as MaxValue,
MAX(b.Anothercolumn) as AnotherColumn
FROM out_pumptabl
INNER JOIN (SELECT
name,
MAX(value) as MaxValue
FROM out_pumptabl
GROUP BY Name) a ON
a.name = b.name AND a.maxValue = b.value
GROUP BY b.Name
Note this would be far easier if you had a primary key. Here is an Example
SELECT * FROM out_pumptabl c
WHERE PK in
(SELECT
MAX(PK) as MaxPK
FROM out_pumptabl b
INNER JOIN (SELECT
name,
MAX(value) as MaxValue
FROM out_pumptabl
GROUP BY Name) a ON
a.name = b.name AND a.maxValue = b.value)

select Name, Value, AnotherColumn
from out_pumptable
where Value =
(
select Max(Value)
from out_pumptable as f where f.Name=out_pumptable.Name
)
group by Name, Value, AnotherColumn
Try like this, It works.

select * from (select * from table order by value desc limit 999999999) v group by v.name

Using analytic function is the easy way to find max value of every group.
Documentation : https://learn.microsoft.com/en-us/sql/t-sql/functions/row-number-transact-sql?view=sql-server-ver15
Select name,
value,
AnotherColumn
From(
SELECT Row_Number() over(partition by name order by value desc)as
row_number, *
FROM students
)
Where row_number = 1

SELECT t1.name, t1.Value, t1.AnotherColumn
FROM mytable t1
JOIN (SELECT name AS nameMax, MAX(Value) as valueMax
FROM mytable
GROUP BY name) AS t2
ON t2.nameMax = t1.name AND t2.valueMax = t1.Value
WHERE 1 OR <anything you would like>
GROUP BY t1.name;

SELECT DISTINCT (t1.ProdId), t1.Quantity FROM Dummy t1 INNER JOIN
(SELECT ProdId, MAX(Quantity) as MaxQuantity FROM Dummy GROUP BY ProdId) t2
ON t1.ProdId = t2.ProdId
AND t1.Quantity = t2.MaxQuantity
ORDER BY t1.ProdId
this will give you the idea.

Parse Numeric Ranges in PostgreSQL

I would like to produce a string containing some parsed numeric ranges.
I have a table with some data
b_id,s_id
1,50
1,51
1,53
1,61
1,62
1,63
2,91
2,95
2,96
2,97
Using only SQL in PostgreSQL, how could I produce this output:
b_id,s_seqs
1,"50-51,53,61-63"
2,"91,95-97"
How on earth do I do that?

select b_id, string_agg(seq, ',' order by seq_no) as s_seqs
from (
select
b_id, seq_no,
replace(regexp_replace(string_agg(s_id::text, ','), ',.+,', '-'), ',', '-') seq
from (
select
b_id, s_id,
sum(mark) over w as seq_no
from (
select
b_id, s_id,
(s_id- 1 <> lag(s_id, 1, s_id) over w)::int as mark
from my_table
window w as (partition by b_id order by s_id)
) s
window w as (partition by b_id order by s_id)
) s
group by 1, 2
) s
group by 1;
Here you can find a step-by-step analyse from the innermost query towards the outside.

T-SQL split string by - and space

I'm having difficult time with T-SQL and I was wondering if somebody could me point me to the right track.
I have the following variable called #input
DECLARE #input nvarchar(100);
SET #input= '27364 - John Smith';
-- SET #input= '27364 - John Andrew Smith';
I need to split this string in 3 parts (ID,Firstname and LastName) or 4 if the string contains a MiddleName. For security reason I cannot use functions.
My aproach was use Substring and Charindex.
SET #Id = SUBSTRING(#input, 1, CASE CHARINDEX('-', #input)
WHEN 0
THEN LEN(#input)
ELSE
CHARINDEX('-', #input) - 2
END);
SET #FirstName = SUBSTRING(#input, CASE CHARINDEX(' ', #input)
WHEN 0
THEN LEN(#input) + 1
ELSE
CHARINDEX(' ', #input) + 1
END, 1000);
SET #LastName = SUBSTRING(#input, CASE CHARINDEX(' ', #input)
WHEN 0
THEN LEN(#input) + 1
ELSE
CHARINDEX('0', #input) + 1
END, 1000);
Select #PartyCode,#FirstName,#LastName
I am stuck because I don't know how to proceed and also the code has to be smart enough to add a fourth split if Middlename exists.
Any thoughts?
Thanks in advance

Hopefully this is part of a normalization project. This data is breaking 1NF and one really should avoid that...
Try it like this
The advantages
typesafe values
ad-hoc SQL
set based
If you want you might use a CASE WHEN to check if the last part is NULL and place Part2 into Part3 in this case...
DECLARE #input table(teststring nvarchar(100));
INSERT INTO #input VALUES
(N'27364 - John Smith'),(N'27364 - John Andrew Smith');
WITH Splitted AS
(
SELECT CAST(N'<x>' + REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(teststring,N' - ',N' '),N'&',N'&'),N'<',N'<'),N'>',N'>'),N' ',N'</x><x>') + N'</x>' AS XML) testXML
FROM #input
)
SELECT testXML.value('/x[1]','int') AS Number
,testXML.value('/x[2]','nvarchar(max)') AS Part1
,testXML.value('/x[3]','nvarchar(max)') AS Part2
,testXML.value('/x[4]','nvarchar(max)') AS Part3
FROM Splitted
The result
Number Part1 Part2 Part3
27364 John Smith NULL
27364 John Andrew Smith

SQL Server 2016 has a new built-in function called STRING_SPLIT()
Assuming creating built-in functions, but CLR functions are not allowed:
CREATE FUNCTION dbo.WORD_SPLIT
(
#String AS nvarchar(4000)
)
RETURNS TABLE
AS
RETURN
(
WITH Spaces AS
(
SELECT Spaced.[value], ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY (SELECT 1)) AS ordinal
FROM STRING_SPLIT(#String, ' ') AS Spaced
)
, Tabs AS
(
SELECT Tabbed.[value], ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY s.ordinal, (SELECT 1)) AS ordinal
FROM Spaces AS s
CROSS APPLY STRING_SPLIT(s.[value], ' ') AS Tabbed
)
, NewLines1 AS
(
SELECT NewLined1.[value], ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY t.ordinal, (SELECT 1)) AS ordinal
FROM Tabs AS t
CROSS APPLY STRING_SPLIT(t.[value], CHAR(13)) AS NewLined1
)
, NewLines2 AS
(
SELECT NewLined2.[value], ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY nl1.ordinal, (SELECT 1)) AS ordinal
FROM NewLines1 AS nl1
CROSS APPLY STRING_SPLIT(nl1.[value], CHAR(10)) AS NewLined2
)
SELECT LTRIM(RTRIM(nl2.[value])) AS [value], ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY nl2.ordinal, (SELECT 1)) AS ordinal
FROM NewLines2 AS nl2
WHERE LTRIM(RTRIM(nl2.[value])) <> ''
)
GO
Usage:
-- Not Normailized
SELECT i.*, split.[value], split.[ordinal]
FROM #input AS i
CROSS APPLY dbo.WORD_SPLIT(i.teststring) AS split
-- Normalized
;WITH Splitted AS
(
SELECT split.[value], split.[ordinal]
FROM #input AS i
CROSS APPLY dbo.WORD_SPLIT(i.teststring) AS split
)
SELECT *
FROM (SELECT [value], 'part' + CONVERT(nvarchar(20), [ordinal]) AS [parts] FROM Splitted) AS s
PIVOT (MAX([value]) FOR [parts] IN ([part1], [part2], [part3], [part4])
Or assuming that, per-security, you are not allowed to make schema changes:
WITH Splitting AS
(
SELECT teststring AS [value]
FROM #input
)
WITH Spaces AS
(
SELECT Spaced.[value], ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY (SELECT 1)) AS ordinal
FROM Splitting AS sp
CROSS APPLY STRING_SPLIT(sp.[value], ' ') AS Spaced
)
, Tabs AS
(
SELECT Tabbed.[value], ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY s.ordinal, (SELECT 1)) AS ordinal
FROM Spaces AS s
CROSS APPLY STRING_SPLIT(s.[value], ' ') AS Tabbed
)
, NewLines1 AS
(
SELECT NewLined1.[value], ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY t.ordinal, (SELECT 1)) AS ordinal
FROM Tabs AS t
CROSS APPLY STRING_SPLIT(t.[value], CHAR(13)) AS NewLined1
)
, NewLines2 AS
(
SELECT NewLined2.[value], ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY nl1.ordinal, (SELECT 1)) AS ordinal
FROM NewLines1 AS nl1
CROSS APPLY STRING_SPLIT(nl1.[value], CHAR(10)) AS NewLined2
)
, Splitted AS
(
SELECT LTRIM(RTRIM(nl2.[value])) AS [teststring], ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY nl2.ordinal, (SELECT 1)) AS ordinal
FROM NewLines2 AS nl2
WHERE LTRIM(RTRIM(nl2.[value])) <> ''
)
SELECT *
FROM (SELECT [value], 'part' + CONVERT(nvarchar(20), [ordinal]) AS [parts] FROM Splitted) AS s
PIVOT (MAX([value]) FOR [parts] IN ([part1], [part2], [part3], [part4])
Hopefully helpful!

ERROR: function coalerse(bigint, integer) does not exist

I have this query, where I want to return zero values instead of null ones.
create view ct as
select userid, coalerse(count(tweets), 0) as nooftweets, coalerse(count(distinct mention), 0) as mention
from (
select t.user_id as userid, t.id as tweets, m.mentionedusers_id as mention, row_number() over (partition by m.tweet_id order by m.mentionedusers_id
) rn
from "tweet_mentUsers" m right join tweet t on m.tweet_id = t.id where text like '#%') a where rn <= 2 group by 1
However I get this error message:
ERROR: function coalerse(bigint, integer) does not exist
LINE 2: select userid, coalerse(nooftweets, 0), coalerse(mention, 0)...
^
HINT: No function matches the given name and argument types. You might need to add explicit type casts.
Do you have any idea?

I think the COALESCE function will do what you want.
create view ct as
select userid, coalesce(count(tweets), 0) as nooftweets, coalesce(count(distinct mention), 0) as mention
from (
select t.user_id as userid, t.id as tweets, m.mentionedusers_id as mention, row_number() over (partition by m.tweet_id order by m.mentionedusers_id
) rn
from "tweet_mentUsers" m right join tweet t on m.tweet_id = t.id where text like '#%') a where rn <= 2 group by 1

We Keep Coding

iphone swift flutter scala powershell matlab mongodb postgresql perl eclipse

RedShift: troubles with regexp_substr - amazon-redshift

Related

Function that will read the data from a column and generate the missing rows dynamically

Select specific lines in data according to last update [duplicate]

Parse Numeric Ranges in PostgreSQL

T-SQL split string by - and space

ERROR: function coalerse(bigint, integer) does not exist

Categories

Resources