String replacement in Postgresql originating an array of additional strings - postgresql

Suppose you have two tables with substitutions which MUST be kept as they are and another table containing a body of names. How could I get all the possible substitutions?
Substitution Table
--------------------------------------
word subs_list
MOUNTAIN MOUNTAIN, MOUNT, MT, MTN
HOUSE HAUS, HOUSE
VIEW VU, VIEW
Synonyms table
-------------------------------------------------
EDUCATION SCHOOL, UNIVERSITY, COLLEGE, TRAINING
FOOD STORE, FOOD, CAFE
STORE FOOD, STORE, MARKET
REFRIGERATION FOODLOCKER, FREEZE, FRIDGE
names table
------------------------------------------------
MOUNT VU FOOD USA
MOUNTAIN VU STORE CA
Note: I know that it would be desirable to have just one substitution table, but both substution tables must remain because they served to additional purposes than the one explained above, those tables are already in used. In addition, the list of replacements in both tables are just a varchar with a string separated by commas
Considering the previous, the problem is to generate possible names derived by substitution. For instance, the name MOUNT VU FOOD USA should be decomposed to MOUNTAIN VIEW FOOD USA and MOUNTAIN VIEW STORE USA, the same fashion would apply for the second.
I have been able to get the replacements in a wrong order and all together in function, there is a way
to get an array as output with the different names generated after replacement? So far I have created this function for replacement:
create or replace function replace_companies_array(i_sentence IN VARCHAR) returns VARCHAR[] AS $p_replaced$
DECLARE
p_replaced VARCHAR[];
subs RECORD;
flag boolean:= True;
cur_s CURSOR(i_sentence VARCHAR)
FOR SELECT w.input, coalesce(x.word, w.input) as word, count(*) OVER (PARTITION BY w.input) as counter
FROM regexp_split_to_table(trim(i_sentence), '\s') as w(input)
LEFT JOIN (
select s.word, trim(s1.token) as token
from subs01 s
cross join unnest(string_to_array(s.subs_list, ',')) s1(token)
union
select sy.word, trim(s2.token) as token
from syns01 sy
cross join unnest(string_to_array(sy.syn_list, ',')) s2(token)
) as x on lower(trim(w.input)) = lower(x.token)
order by counter;
BEGIN
OPEN cur_s(i_sentence);
LOOP
--fetch row into the substitutions
FETCH cur_s INTO subs;
--Exit when no more rows to fetch
EXIT WHEN NOT FOUND;
SELECT REGEXP_REPLACE(i_sentence,'(^|[^a-z0-9])' || subs.input || '($|[^a-z0-9])','\1' || UPPER(subs.word) || '\2','g')
INTO i_sentence;
END LOOP;
p_replaced:=array_append(p_replaced, i_sentence);
RETURN p_replaced;
END;
$p_replaced$ LANGUAGE plpgsql;
Thank you so much for your contributions

I didn't manage to get the final result, but I'w quite close to it!
From sentence: MOUNT VU FOOD USA, I obtain {"MOUNTAIN VIEW MARKET USA","MOUNTAIN VIEW STORE USA","MOUNTAIN VIEW CAFE USA","MOUNTAIN VIEW FOOD USA"}
Here are all my script to recreate the synonyms & substitute tables:
DROP TABLE IF EXISTS subs01;
DROP TABLE IF EXISTS syns01;
CREATE TABLE subs01 (word VARCHAR(20), subs_list VARCHAR(200));
CREATE TABLE syns01 (word VARCHAR(20), syn_list VARCHAR(200));
INSERT INTO subs01 (word, subs_list) VALUES ('MOUNTAIN', 'MOUNTAIN, MOUNT, MT, MTN'),('HOUSE', 'HAUS, HOUSE'),('VIEW', 'VU, VIEW');
INSERT INTO syns01 (word, syn_list) VALUES ('EDUCATION', 'SCHOOL, UNIVERSITY, COLLEGE, TRAINING'),('FOOD', 'STORE, FOOD, CAFE'),('STORE', 'FOOD, STORE, MARKET'),('REFRIGERATION', 'FOODLOCKER, FREEZE, FRIDGE');
I decided to split the job into 2 phases:
Substitute the words:
CREATE OR REPLACE function substitute_words (i_sentence IN VARCHAR) returns VARCHAR AS $p_substituted$
DECLARE
--p_substituted VARCHAR;
subs_cursor CURSOR FOR select su.word, trim(s2.token) as token from subs01 su cross join unnest(string_to_array(su.subs_list, ',')) s2(token);
subs_record record;
BEGIN
OPEN subs_cursor;
LOOP
FETCH subs_cursor INTO subs_record;
EXIT WHEN NOT FOUND;
RAISE NOTICE 'INFO : TOKEN (%) ',subs_record.token ;
IF i_sentence LIKE '%'|| subs_record.token || '%' THEN
RAISE NOTICE '-- FOUND : TOKEN (%) ',subs_record.token ;
SELECT replace (i_sentence, subs_record.token, subs_record.word) INTO i_sentence;
END IF;
END LOOP;
CLOSE subs_cursor;
RETURN i_sentence;
END
$p_substituted$ LANGUAGE plpgsql;
Replace known words by their synomyms:
CREATE OR REPLACE function synonymize_sentence (i_sentence IN VARCHAR) returns TABLE (sentence_result VARCHAR) AS $p_syn$
DECLARE
syn_cursor CURSOR FOR select su.word, trim(s2.token) as token from syns01 su cross join unnest(string_to_array(su.syn_list, ',')) s2(token);
syn_record record;
BEGIN
CREATE TEMPORARY TABLE record_syn (result VARCHAR(200)) ON COMMIT DROP;
INSERT INTO record_syn (result) SELECT i_sentence;
OPEN syn_cursor;
LOOP
FETCH syn_cursor INTO syn_record;
EXIT WHEN NOT FOUND;
RAISE NOTICE 'INFO : WORD (%) ',syn_record.word ;
INSERT INTO record_syn (result) SELECT replace (result, syn_record.word, syn_record.token) FROM record_syn where result LIKE '%'|| syn_record.word || '%';
END LOOP;
CLOSE syn_cursor;
RETURN QUERY SELECT distinct result FROM record_syn;
END;
$p_syn$ LANGUAGE plpgsql;
Then, to generate the result array, I perform this statement:
SELECT ARRAY(SELECT synonymize_sentence (substitute_words ('MOUNT VU FOOD USA')));

Related

how to use a loop with the function pgr_dijkstra

I want to use a loop to calculate the distance traveled between two nodes 152 and 17720 (I use the function pgr_dijkstra) by deleting each time a cell. A cell contains several road links. the grid_edges_routard table contains the road links and the corresponding cell.
Iwant to have for each blocked cell the distance traveled between the two nodes.
I must use pgr_dijkstra to display in a second time the links traveled.
CREATE OR REPLACE FUNCTION get_dist_grid()
RETURNS TABLE (
celref_blocked INT,
dist INT
) AS $$
DECLARE
var_r record;
BEGIN
FOR var_r IN(SELECT distinct(cellule)as cel from grid_edges_routard )
LOOP
SELECT * FROM pgr_dijkstra('SELECT id, source, target,cost
FROM road_routard.edges_vulnerabilite
where id not in (select edge_id
from grid_edges_routard
where cellule=var_r) ',152 ,17720, FALSE)
where edge=-1;
celref_blocked := var_r.cel ;
RETURN NEXT;
END LOOP;
END; $$
LANGUAGE 'plpgsql';
select get_dist_grid()
I have an error message: ERROR: column « var_r » does not exist.
I use postgresql 9.5.
Define a new variable (var_q) of type record, then Execute your select query into your defined variable like this Execute 'SELECT * FROM pgr_dijkstra(''SELECT id, source, target,cost FROM road_routard.edges_vulnerabilite where id not in (select edge_id from grid_edges_routard where cellule='||var_r||') '',152 ,17720, FALSE) where edge=-1' into var_q
This might give some errors as we have to escape the quotes for inner query, Try escaping quotes if it doesn't work and then you can use the out of the query in similar way as you have used celref_blocked := var_r.cel

Function to return dynamic set of columns for given table

I have a fields table to store column information for other tables:
CREATE TABLE public.fields (
schema_name varchar(100),
table_name varchar(100),
column_text varchar(100),
column_name varchar(100),
column_type varchar(100) default 'varchar(100)',
column_visible boolean
);
And I'd like to create a function to fetch data for a specific table.
Just tried sth like this:
create or replace function public.get_table(schema_name text,
table_name text,
active boolean default true)
returns setof record as $$
declare
entity_name text default schema_name || '.' || table_name;
r record;
begin
for r in EXECUTE 'select * from ' || entity_name loop
return next r;
end loop;
return;
end
$$
language plpgsql;
With this function I have to specify columns when I call it!
select * from public.get_table('public', 'users') as dept(id int, uname text);
I want to pass schema_name and table_name as parameters to function and get record list, according to column_visible field in public.fields table.
Solution for the simple case
As explained in the referenced answers below, you can use registered (row) types, and thus implicitly declare the return type of a polymorphic function:
CREATE OR REPLACE FUNCTION public.get_table(_tbl_type anyelement)
RETURNS SETOF anyelement AS
$func$
BEGIN
RETURN QUERY EXECUTE format('TABLE %s', pg_typeof(_tbl_type));
END
$func$ LANGUAGE plpgsql;
Call:
SELECT * FROM public.get_table(NULL::public.users); -- note the syntax!
Returns the complete table (with all user columns).
Wait! How?
Detailed explanation in this related answer, chapter
"Various complete table types":
Refactor a PL/pgSQL function to return the output of various SELECT queries
TABLE foo is just short for SELECT * FROM foo:
Is there a shortcut for SELECT * FROM?
2 steps for completely dynamic return type
But what you are trying to do is strictly impossible in a single SQL command.
I want to pass schema_name and table_name as parameters to function and get record list, according to column_visible field in
public.fields table.
There is no direct way to return an arbitrary selection of columns (return type not known at call time) from a function - or any SQL command. SQL demands to know number, names and types of resulting columns at call time. More in the 2nd chapter of this related answer:
How do I generate a pivoted CROSS JOIN where the resulting table definition is unknown?
There are various workarounds. You could wrap the result in one of the standard document types (json, jsonb, hstore, xml).
Or you generate the query with one function call and execute the result with the next:
CREATE OR REPLACE FUNCTION public.generate_get_table(_schema_name text, _table_name text)
RETURNS text AS
$func$
SELECT format('SELECT %s FROM %I.%I'
, string_agg(quote_ident(column_name), ', ')
, schema_name
, table_name)
FROM fields
WHERE column_visible
AND schema_name = _schema_name
AND table_name = _table_name
GROUP BY schema_name, table_name
ORDER BY schema_name, table_name;
$func$ LANGUAGE sql;
Call:
SELECT public.generate_get_table('public', 'users');
This create a query of the form:
SELECT usr_id, usr FROM public.users;
Execute it in the 2nd step. (You might want to add column numbers and order columns.)
Or append \gexec in psql to execute the return value immediately. See:
How to force evaluation of subquery before joining / pushing down to foreign server
Be sure to defend against SQL injection:
INSERT with dynamic table name in trigger function
Define table and column names as arguments in a plpgsql function?
Asides
varchar(100) does not make much sense for identifiers, which are limited to 63 characters in standard Postgres:
Maximum characters in labels (table names, columns etc)
If you understand how the object identifier type regclass works, you might replace schema and table name with a singe regclass column.
I think you just need another query to get the list of columns you want.
Maybe something like (this is untested):
create or replace function public.get_table(_schema_name text, _table_name text, active boolean default true) returns setof record as $$
declare
entity_name text default schema_name || '.' || table_name;
r record;
columns varchar;
begin
-- Get the list of columns
SELECT string_agg(column_name, ', ')
INTO columns
FROM public.fields
WHERE fields.schema_name = _schema_name
AND fields.table_name = _table_name
AND fields.column_visible = TRUE;
-- Return rows from the specified table
RETURN QUERY EXECUTE 'select ' || columns || ' from ' || entity_name;
RETURN;
end
$$
language plpgsql;
Keep in mind that column/table references may need to be surrounded by double quotes if they have certain characters in them.

DB2. Select all columns in a table that contains a value

I would like to find all column names in a table that contains a value in any given record.
I.e All columns that contains a string in the record value.
'%ABC%' or '%QAW%' or '%IGH%'
If possible give me all the tables and columns in a DB schema, so I do not have to query ever table manually
2016-06-15
So I got a little further, I can now get all the values from each column in each row in each table. Now I need to see if that value ( v_value ) exist in a list of airport codes. i.e ['LAS','LAX','BIL']
I have all the airports in a table that I want to read into and array.
I am having trouble with creating that array and getting the data into it.
Here is what I have so far.
Look at the TODO's
CREATE OR REPLACE PROCEDURE "CMSDB"."TEST1"
()
LANGUAGE SQL
SPECIFIC SQL3
P1: BEGIN
DECLARE v_tabschema VARCHAR(255);
DECLARE v_tabname VARCHAR(255);
DECLARE v_colname VARCHAR(255);
DECLARE v_airport VARCHAR(255);
DECLARE v_stmt VARCHAR(3000);
DECLARE V_SQL VARCHAR(3000);
DECLARE v_value VARCHAR(255);
DECLARE SQLSTATE CHAR(5) DEFAULT '00000';
DECLARE v_stmt2 STATEMENT;
DECLARE v_value_cursor CURSOR FOR v_stmt2;
DECLARE v_airport_cursor CURSOR FOR select IDX from CMSDB.AIRPORTS;
DECLARE syscat_cursor CURSOR FOR select trim(tabschema), tabname, colname from cmsdb.syscat.columns where tabname = 'ACCTGROUP' and tabschema = 'CMSDB' and TYPENAME = 'VARCHAR' and colname not in ('CHGDATE','CHGPAGE','CHGPROG','CHGTYPE','CHGUSER','CREATEDATETIME','CREATEDBYID','REC_ID');
open v_airport_cursor;
FETCH FROM v_airport_cursor INTO v_airport;
WHILE (SQLSTATE = '00000') DO
call DBMS_OUTPUT.PUT_LINE(v_airport);
-- TODO Add each value to a list, arryalist that can be used to check if the v_value is in the list.
FETCH FROM v_airport_cursor INTO v_airport;
END WHILE;
close v_airport_cursor;
OPEN syscat_cursor;
FETCH FROM syscat_cursor INTO v_tabschema, v_tabname, v_colname;
WHILE (SQLSTATE = '00000') DO
--call DBMS_OUTPUT.PUT_LINE(v_tabschema || ' ' || v_tabname || ' ' || v_colname);
SET v_stmt = 'select ' || v_colname || ' from ' || v_tabschema || '.' || v_tabname;
--call DBMS_OUTPUT.PUT_LINE(v_stmt);
PREPARE v_stmt2 FROM v_stmt;
OPEN v_value_cursor;
FETCH FROM v_value_cursor INTO v_value;
WHILE (SQLSTATE = '00000') DO
-- TODO
--IF ( airportList contains v_value) THEN
--call DBMS_OUTPUT.PUT_LINE(v_value);
--END IF;
FETCH FROM v_value_cursor INTO v_value;
END WHILE;
CLOSE v_value_cursor;
FETCH FROM syscat_cursor INTO v_tabschema, v_tabname, v_colname;
END WHILE;
close syscat_cursor;
END P1
You can use sysibm.syscolumns:
select colname
from sysibm.syscolumns
where tbname = 'XX' and
(name like %ABC%' or name like '%QAW%' or name like '%IGH%');
You'll need to create a cursor over SYSTABLES that returns all the tables in the system. Then have another cursor that returns all the column names in a given table. Once you have those, you can build a dynamic statement that checks all the columns in a given table for the value you are looking for. Fetch the next table name and do it all over again.
Obviously, if you can narrow down your search to a particular schema or even limit the search to tables/columns with a particular naming pattern; you'd be better off.
Another technique, depending on your platform and version of DB2. You might be able to do some sort of a bulk export to a set of text files. Then use a tool that will serach the contents of those text files.

Postgres - Trigger with matched key

I have several tables. A table, cexp, is a table that has attributes cid and total. Cid is grouped and total is the sum of quantity * price for that cid (matched on cid)
The cexp table was populated with the results of the following code:
SELECT c.cid, sum(ol.quantity*b.price) as total
FROM customers c join orders o on c.cid=o.cid
join orderlist ol on o.ordernum=ol.ordernum
join books b on b.isbn=ol.isbn
GROUP BY C.CID
My task is to create a trigger that, when inserting rows for order and orderderlist, finds the matching name, in cexp and increments the existing total by the product of new quantity (from orderlist) and the price (from books). If no match, insert a row in cexp.
Tables are as follows:
Customers-cid,name pk-cid
Books - isbn,title,price pk-isbn
Orders - ordernum,cid pk-ordernum
Orderlist - ordernum,isbn, quantity - pk-(ordernum,isbn)
cexp - cid,total - pk-cid
I am getting syntax errors. Can anyone correct this code?
CREATE OR REPLACE FUNCTION cexpupd()
RETURNS trigger as
$cexpupd$
BEGIN
UPDATE cexp
SET new.total=total+Select (b.price*new.quantity) FROM customers c
join orders o on c.cid=o.cid
join orderlist ol on o.ordernum=ol.ordernum
join books b on b.isbn=ol.isbn
where b.isbn=new.isbn;
--INSERT CODE WHEN ABOVE LINE DOES NOT OCCUR -INSERTS NEW ROW INTO CEXP
END;
$cexpupd$
LANGUAGE plpgsql
I would say that your UPDATE statement is an unsalvageable birds-nest. Fortunately, there's an easier way to achieve the same result.
Keep in mind that a trigger function is procedural code, so there's no need to concurrently load the gun, pull the trigger, scream, and shoot yourself in the foot in a single statement. You have access to all the procedural goodies like local variables and flow control statements.
The following should give you a good headstart on what you want:
CREATE OR REPLACE FUNCTION cexpupd()
RETURNS trigger as
$cexpupd$
DECLARE
bookprice books.price%TYPE;
ext_total cexp.total%TYPE;
custid orders.cid%TYPE;
BEGIN
SELECT cid
INTO custid
FROM orders
WHERE orders.ordernum = NEW.ordernum;
SELECT price
INTO bookprice
FROM books
WHERE books.isbn = NEW.isbn;
ext_total = bookprice * NEW.quantity;
UPDATE cexp
SET total = ext_total
WHERE cid = custid;
IF NOT FOUND THEN
--INSERT new CID record here
INSERT INTO cexp
(cid, total)
VALUES
(custid, ext_total);
END IF;
RETURN NEW;
END;
$cexpupd$
LANGUAGE plpgsql;
Note the statement near the end, RETURN NEW; This line is crucial, as it is how a PostgreSQL trigger function tells the database to go ahead and finish executing the statement that fired the trigger.
If you need any clarification, please don't hesitate to ask. Please note I have not tried executing this function, but I did create the necessary tables to compile it successfully.

Massive insertions from one big table to other related tables

Into:
Currently i have scraped all the data into one PostgreSQL 'Bigtable' table(there are about 1.2M rows). Now i need to split the design into separate tables which all have dependency on the Bigtable. Some of the tables might have subtables. The model looks pretty much like snowflake.
Problem:
What would be best option to inserting data into tables? I thought to make the insertion with functions written in 'SQL' or PLgSQL. But the problem is still with auto-generated ID-s.
Also if you know what tools might make this problem solving easier then post!
//Edit i have added example, this not the real case just for illustration
1.2 M rows is not too much. The best tool is sql script executed from console "psql". If you have a some newer version of Pg, then you can use inline functions (DO statement) when it is necessary. But probably the most useful command is INSERT INTO SELECT statement.
-- file conversion.sql
DROP TABLE IF EXISTS f1 CASCADE;
CREATE TABLE f1(a int, b int);
INSERT INTO f1
SELECT x1, y1
FROM data
WHERE x1 = 10;
...
-- end file
psql mydb -f conversion.sql
If I understand your question, you can use a psql function like this:
CREATE OR REPLACE FUNCTION migration() RETURNS integer AS
$BODY$
DECLARE
currentProductId INTEGER;
currentUserId INTEGER;
currentReg RECORD;
BEGIN
FOR currentReg IN
SELECT * FROM bigtable
LOOP
-- Product
SELECT productid INTO currentProductId
FROM product
WHERE name = currentReg.product_name;
IF currentProductId IS NULL THEN
EXECUTE 'INSERT INTO product (name) VALUES (''' || currentReg.product_name || ''') RETURNING productid'
INTO currentProductId;
END IF;
-- User
SELECT userid INTO currentUserId
FROM user
WHERE first_name = currentReg.first_name and last_name = currentReg.last_name;
IF currentUserId IS NULL THEN
EXECUTE 'INSERT INTO user (first_name, last_name) VALUES (''' || currentReg.first_name || ''', ''' || currentReg.last_name || ''') RETURNING userid'
INTO currentUserId;
-- Insert into userAdded too with: currentUserId and currentProductId
[...]
END IF;
-- Rest of tables
[...]
END LOOP;
RETURN 1;
END;
$BODY$
LANGUAGE plpgsql;
select * from migration();
In this case it's assumed that each table runs its own primary key sequence and I have reduced the number of fields in the tables to simplify.
I hope you have been helpful.
No need to use a function for this (unless I misunderstood your problem)
If your id columns are all defined as serial column (i.e. they automatically generate the values), then this can be done with simple INSERT statements. This assumes that the target tables are all empty.
INSERT INTO users (firstname, lastname)
SELECT DISTINCT firstname, lastname
FROM bigtable;
INSERT INTO category (name)
SELECT DISTINCT category_name
FROM bigtable;
-- the following assumes a column categoryid in the product table
-- which is not visible from your screenshot
INSERT INTO product (product_name, description, categoryid)
SELECT DISTINCT b.product_name, b.description, c.categoryid
FROM bigtable b
JOIN category c ON c.category_name = b.category_name;
INSERT INTO product_added (product_productid, user_userid)
SELECT p.productid, u.userid
FROM bigtable b
JOIN product p ON p.product_name = b.product_name
JOIN users u ON u.firstname = b.firstname AND u.lastname = b.lastname