Mysql Unique column pairs as A,B or B,A where C is higher - mysqli

I have this table
id|id_user_send|id_user_receive|date
-------------------------------------
0 | 1 | 2 | 2017-04-06
0 | 2 | 1 | 2017-04-07
0 | 1 | 4 | 2017-04-07
0 | 1 | 4 | 2017-04-08
0 | 4 | 1 | 2017-04-09
0 | 1 | 2 | 2017-04-10
i want only unique couple id_user_send,id_user_receive or id_user_receive,id_user_send where date is higher.
I dont see how get this whit group by or select distinct i try number off combine but not work. Can you just give me a way i want understand not just answer please thank you so much to helper.
edit : for me the couple 1, 4 and 4,1 is same i juste want the higher date

Here is the query that will help you to get the max C from the all rows that have A,B or B,A.
Here columns used are ..
A = sender (id_user_send)
B = receiver (id_user_receive)
C = day (date)
Explanation:
Query consists of UNION of 2 queries
In First Query, tables T1 and T2 are rows with sender, receiver, concatenated string (sender,receiver) and their max(day). WHERE clause matches the concatenated string (sr) with reverse of the same string, this will get all the rows where sender and receiver combination is same i.e. (1-4 and 4-1). Using CASE..WHEN..END we can get the row that has max date.
Second Query covers corner scenarios rows i.e. rows that one way interactions between sender and receiver AND same sender and receiver.
Sample Data Preparation
CREATE TABLE stack(sender int, receiver int, day date);
-- rows with sender:receiver as 1-4 or 4-1 (TWO_WAY INTERACTION)
INSERT INTO stack VALUES(1, 4, TO_DATE('2017-04-06', 'yyyy-mm-dd'));
INSERT INTO stack VALUES(1, 4, TO_DATE('2017-04-08', 'yyyy-mm-dd'));
INSERT INTO stack VALUES(4, 1, TO_DATE('2017-02-06', 'yyyy-mm-dd'));
INSERT INTO stack VALUES(4, 1, TO_DATE('2017-01-06', 'yyyy-mm-dd'));
-- rows with sender:receiver as 1-1 or 2-1 (TWO_WAY INTERACTION)
INSERT INTO stack VALUES(1, 2, TO_DATE('2017-06-06', 'yyyy-mm-dd'));
INSERT INTO stack VALUES(1, 2, TO_DATE('2017-04-06', 'yyyy-mm-dd'));
INSERT INTO stack VALUES(2, 1, TO_DATE('2017-09-06', 'yyyy-mm-dd'));
INSERT INTO stack VALUES(2, 1, TO_DATE('2017-01-06', 'yyyy-mm-dd'));
-- rows with sender:receiver as 3-6 (CORNER SCENARIO : ONE_WAY INTERACTION)
INSERT INTO stack VALUES(3, 6, TO_DATE('2017-09-06', 'yyyy-mm-dd'));
INSERT INTO stack VALUES(3, 6, TO_DATE('2017-01-06', 'yyyy-mm-dd'));
-- rows with sender:receiver as 7-7 (CORNER SCENARIO : SELF INTERACTION)
INSERT INTO stack VALUES(7, 7, TO_DATE('2017-09-06', 'yyyy-mm-dd'));
INSERT INTO stack VALUES(7, 7, TO_DATE('2017-01-06', 'yyyy-mm-dd'));
Sample Data View
Query
SELECT DISTINCT
CASE
WHEN T1.maxDay >= T2.maxDay THEN T1.sender ELSE T2.sender
END as sender,
CASE
WHEN T1.maxDay >= T2.maxDay THEN T1.receiver ELSE T2.receiver
END as receiver,
CASE
WHEN T1.maxDay >= T2.maxDay THEN T1.maxDay ELSE T2.maxDay
END as maxDay
FROM
(SELECT sender, receiver, (sender || ',' || receiver) AS sr, MAX(day) AS maxDay
FROM stack
GROUP BY sender, receiver, (sender || ',' || receiver)) T1,
(SELECT sender, receiver, (sender || ',' || receiver) AS sr, MAX(day) AS maxDay
FROM stack
GROUP BY sender, receiver, (sender || ',' || receiver)) T2
WHERE T1.sr = REVERSE(T2.sr)
UNION
SELECT T1.sender, T1.receiver, T1.maxday
FROM
(SELECT sender, receiver, (sender || ',' || receiver) AS sr, MAX(day) AS maxDay
FROM stack
GROUP BY sender, receiver, (sender || ',' || receiver)) T1
WHERE T1.sr NOT IN
(SELECT DISTINCT (REVERSE(sender || ',' || receiver))
FROM stack)
Result
Let me know if you need more explanation for query.
I have provided SQL Fiddle also for you to play around with the query so you can understand it by executing it part by part.
http://sqlfiddle.com/#!4/2d6f7/3/0

Related

If there is only one zero value then group by supplier and show zero, if there is no zero, then avg all values

I will give you example of table that I have:
Supplier | Value
sup1 | 4
sup2 | 1
sup1 | 0
sup1 | 3
sup2 | 5
I need a result that will do average by supplier, but if there is value 0 for a supplier, do not average, but return 0 instead
It should look like this:
Supplier | Value
sup1 | 0
sup2 | 3
This is a little trick but it should work :
SELECT Supplier,
CASE WHEN MIN(ABS(Value)) = 0 THEN 0 ELSE AVG(Value) END
FROM TableTest
GROUP BY Supplier
EDIT : Using the ABS() function let you avoid having problems with negative values
DECLARE #TAB TABLE (SUPPLIER VARCHAR(50),VALUE INTEGER)
INSERT INTO #TAB
SELECT 'sup1',4
UNION ALL
SELECT 'sup2',1
UNION ALL
SELECT 'sup1',0
UNION ALL
SELECT 'sup1',3
UNION ALL
SELECT 'sup2',5
SELECT * FROM #TAB
SELECT T1.SUPPLIER,CASE WHEN EXISTS(SELECT 1 FROM #TAB T WHERE T.SUPPLIER = T1.SUPPLIER AND T.VALUE = 0) THEN 0 ELSE AVG(T1.VALUE) END AS VALUE
FROM #TAB T1
GROUP BY T1.SUPPLIER
Result
SUPPLIER VALUE
sup1 0
sup2 3
Using the following query is one of the way to do.
First I push the supplier which has the Value = 0, then based on the result, I will do the remaining calculation and finally using UNION to get the expected result:
DECLARE #ZeroValue TABLE (Supplier VARCHAR (20));
INSERT INTO #ZeroValue (Supplier)
SELECT Supplier FROM TestTable WHERE Value = 0
SELECT Supplier, 0 AS Value FROM #ZeroValue
UNION
SELECT T.Supplier, AVG(T.Value) AS Value
FROM TestTable T
JOIN #ZeroValue Z ON Z.Supplier != T.Supplier
GROUP BY T.Supplier
Schema used for the sample:
CREATE TABLE TestTable (Supplier VARCHAR (20), Value INT);
INSERT INTO TestTable (Supplier, Value) VALUES
('sup1', 4), ('sup2', 1), ('sup1', 0), ('sup1', 3), ('sup2', 5);
Please find the working demo on db<>fiddle

Divide table raw into chunks in Postgres with st_dwithin limit

I got a table with linestrings that I want to divide into chunks that have a list of id not higher than provided number for each and store only lines that are within certain distance.
For example, I got a table with 14 rows
create table lines ( id integer primary key, geom geometry(linestring) );
insert into lines (id, geom) values ( 1, 'LINESTRING(0 0, 0 1)');
insert into lines (id, geom) values ( 2, 'LINESTRING(0 1, 1 1)');
insert into lines (id, geom) values ( 3, 'LINESTRING(1 1, 1 2)');
insert into lines (id, geom) values ( 4, 'LINESTRING(1 2, 2 2)');
insert into lines (id, geom) values ( 11, 'LINESTRING(2 2, 2 3)');
insert into lines (id, geom) values ( 12, 'LINESTRING(2 3, 3 3)');
insert into lines (id, geom) values ( 13, 'LINESTRING(3 3, 3 4)');
insert into lines (id, geom) values ( 14, 'LINESTRING(3 4, 4 4)');
create index lines_gix on lines using gist(geom);
I want to split it into chunks with 3 ids for each chunk with lines that are within 2 meters from each other or the first one.
The result I am trying to get from this example is:
| Chunk No.| Id chunk list |
|----------|----------------|
| 1 | 1, 2, 3 |
| 2 | 4, 5, 6 |
| 3 | 7, 8, 9 |
| 4 | 10, 11, 12 |
| 5 | 13, 14 |
I tried to use st_clusterwithin but when lines are close to each other it will return all of them not split into chunks.
I also tried to use some with recursive magic like the one from the answer provided by Paul Ramsey here. But I don't know how to modify the query to return limited grouped id list.
I am not sure if it is the best possible answer so if anyone has a better method or know how to improve provided answer feel free to update it. With a little modification of Paul answer, I've managed to create following queries that are doing what I asked for.
-- Create function for easier interaction
CREATE OR REPLACE FUNCTION find_connected(integer, double precision, integer, integer[])
returns integer[] AS
$$
WITH RECURSIVE lines_r AS -- Recursive allow to use the same query on the output - is like continues append to result and use it inside a query
(SELECT ARRAY[id] AS idlist,
geom, id
FROM lines
WHERE id = $1
UNION ALL
SELECT array_append(lines_r.idlist, lines.id) AS idlist, -- append id list to array
lines.geom AS geom, -- keep geometry
lines.id AS id -- keep source table id
FROM (SELECT * FROM lines WHERE NOT $4 #> array[id]) lines, lines_r -- from source table and recursive table
WHERE ST_DWITHIN(lines.geom, lines_r.geom, $2) -- where lines are within 2 meters
AND NOT lines_r.idlist #> ARRAY[lines.id] -- recursive id list array not contain lines array
AND array_length(idlist, 1) <= $3
)
SELECT idlist
FROM lines_r WHERE array_length(idlist, 1) <= $3 ORDER BY array_length(idlist, 1) DESC LIMIT 1;
$$
LANGUAGE 'sql';
-- Create id chunks
WITH RECURSIVE groups_r AS (
(SELECT find_connected(id, 2, 3, ARRAY[id]) AS idlist, find_connected(id, 2, 3, ARRAY[id]) AS grouplist, id
FROM lines WHERE id = 1)
UNION ALL
(SELECT array_cat(groups_r.idlist, find_connected(lines.id, 2, 3, groups_r.idlist)) AS idlist,
find_connected(lines.id, 2, 3, groups_r.idlist) AS grouplist,
lines.id
FROM lines,
groups_r
WHERE NOT groups_r.idlist #> ARRAY[lines.id]
LIMIT 1))
SELECT
-- (SELECT array_agg(DISTINCT x) FROM unnest(idlist) t (x)) idlist, -- left for better understanding what is happening
row_number() OVER () chunk_id,
(SELECT array_agg(DISTINCT x) FROM unnest(grouplist) t (x)) grouplist,
id input_line_id
FROM groups_r;
The only problem is that performance is quite pure when the number of ids in the chunk increase. For a table with 300 rows and 20 ids per chunk, execution time is around 15 min, even with indexes on geometry and id columns.

Interleaving array_agg in postgres

I have a postgres query in which I want to interleave my array_agg statements :
SELECT client_user_id,
(array_agg(question), array_agg(client_intake_question_id), array_agg(answer)) as answer
FROM client_intake_answer
LEFT OUTER JOIN client_intake_question
ON client_intake_question.id = client_user_id
GROUP BY client_user_id
Gives me the following:
5 | ("{""Have you ever received counselling?"",""Have you ever received counselling or mental health support in the past?""}","{1,2}","{yes,no}")
I would like the results to be:
5 | ("{""Have you ever received counselling?", 1, "yes"",""Have you ever received counselling or mental health support in the past?", 2, "no""}"
How do I do this?
I've set up a small example similar to yours:
create table answers(user_id int, question_id int, answer varchar(20));
create table questions(question_id int, question varchar(20));
insert into questions values
(1, 'question 1'),
(2, 'question 2');
insert into answers values
(1, 1, 'yes'),
(1, 2, 'no'),
(2, 1, 'no'),
(2, 2, 'yes');
select user_id, array_agg(concat(questions.question, ',', questions.question_id::text, ',', answers.answer))
from questions
inner join answers
on questions.question_id = answers.question_id
group by answers.user_id
user_id | array_agg
------: | :-------------------------------------
1 | {"question 1,1,yes","question 2,2,no"}
2 | {"question 1,1,no","question 2,2,yes"}
dbfiddle here
To interleave or splice together multiple array_agg's you can do the following:
SELECT client_user_id,
array_agg('[' || client_intake_question_id || question || ',' || answer || ']') as answer
FROM client_intake_answer
LEFT OUTER JOIN
client_intake_question ON client_intake_question.id = client_user_id
GROUP BY client_user_id

Copy value from one row to another row in PostgreSQL

I have a table like this:
id product amount
1 A 6
1 A 8
1 A
1 B 1
1 B
2 C 2
2 C
2 C 4
2 C
2 C
and I need to make it like this:
id product amount
1 A 6
1 A 8
1 A 8
1 B 1
1 B 1
2 C 2
2 C 2
2 C 4
2 C 4
2 C 4
Copy amount by previous non-missing value.
I tried to use lag() function. however, aggregation function lag() is not allowed in UPDATE.
update tableA set amount = lag(amount);
What can I do using PostgreSQL?
You can SELECT what you want to UPDATE, but there is no (easy) way to actually do the UPDATE, because the table fox does not have a primary key (yet).
CREATE TABLE fox (
id integer NOT NULL,
product text NOT NULL,
amount integer
);
To populate the fox with some data.
INSERT INTO fox VALUES
(1, 'A', 6),
(1, 'A', 8),
(1, 'A', NULL),
(1, 'B', 1),
(1, 'B', NULL),
(2, 'C', 2),
(2, 'C', NULL),
(2, 'C', 4),
(2, 'C', NULL),
(2, 'C', NULL),
(3, 'What does the fox say?', 5);
The query.
WITH ranks (rank, id, product, amount) AS (
SELECT ROW_NUMBER() OVER (), id, product, amount FROM foo
)
SELECT r.id, r.product,
(SELECT amount FROM ranks
WHERE id = r.id AND product = r.product
AND rank < r.rank AND amount IS NOT NULL
ORDER BY amount DESC LIMIT 1
)
FROM ranks r WHERE r.amount IS NULL ORDER BY 1, 2, 3;
Yields the rows which previously had a NULL and now have the appropriate amount.
id | product | amount
----+---------+--------
1 | A | 8
1 | B | 1
2 | C | 2
2 | C | 4
2 | C | 4
But you cannot use this data to update, because rows are still not uniquely identified by (id, product) - which means you cannot write a WHERE condition identifying your rows uniquely. How would the WHERE clause know whether to change the amount to 2 or 4 in the UPDATE? The multiple rows with (id, product) = (2, 'C') are indistinguishable in the WHERE of the UPDATE.
Let's give the fox a primary key.
ALTER TABLE fox ADD COLUMN IF NOT EXISTS pkey serial ;
ALTER TABLE fox ADD PRIMARY KEY (pkey) ;
Now we can identify the rows by the PRIMARY KEY pkey.
WITH nulls AS (
SELECT pkey, id, product
FROM fox
WHERE amount IS NULL
)
SELECT pkey,
id, product, -- you can leave these out in your UPDATE: pkey is UNIQUE
(SELECT amount FROM fox
WHERE id = n.id AND product = n.product
AND n.pkey > pkey AND amount IS NOT NULL
ORDER BY pkey DESC LIMIT 1)
FROM nulls n ORDER BY 1, 2, 3, 4;
to display the changes to be made
pkey | id | product | amount
------+----+---------+--------
3 | 1 | A | 8
5 | 1 | B | 1
7 | 2 | C | 2
9 | 2 | C | 4
10 | 2 | C | 4
And we can use pkey in the UPDATE.
BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE ;
WITH nulls AS (
SELECT pkey, id, product
FROM fox
WHERE amount IS NULL
), changes AS (
SELECT pkey,
(SELECT amount FROM fox
WHERE id = n.id AND product = n.product
AND n.pkey > pkey AND amount IS NOT NULL
ORDER BY pkey DESC LIMIT 1)
FROM nulls n
) UPDATE fox f SET amount = c.amount FROM changes c WHERE f.pkey = c.pkey ;
Check the result is okay:
SELECT * FROM fox ORDER BY 1, 2, 3, 4;
And accept using COMMIT or ROLLBACK accordingly.
Alternative to adding a PRIMARY KEY
Every table should always have a primary key.
If you insist not to have one, then you could also compute the rows with their then-not-NULL amount and instead of UPDATEing them, you could INSERT them into your table and then DELETE FROM fox WHERE amount IS NULL remove the rows which had no amount. This way you get around adding a primary key, which is unique. Of course the UPDATE and DELETE are packaged into a TRANSACTION such as not to interfere with other Transactions running concurrently. For example another Transaction adding rows with NULL amount AFTER you have calculated the data to be INSERTed using SELECT and before you DELETE all NULL amounts. You'd miss the concurrently added row with NULL amount in this case (data loss due to concurrency; think ACID).
But a missing primary key will probably bite you later on, anyway.
Without knowing what defines "previous rows" all is a guess. But you can use a anonymous block to do what your want, just make your changes:
CREATE TEMPORARY TABLE test_lag AS
SELECT column1 AS id, column2 AS product, column3 AS amount FROM (
VALUES (1, 'A', 6),
(1, 'A', 8),
(1, 'A', NULL),
(1, 'B', 1),
(1, 'B', NULL),
(2, 'C', 2),
(2, 'C', NULL),
(2, 'C', 4),
(2, 'C', NULL),
(2, 'C', NULL)) AS tmp;
DO $$
BEGIN
--Loop until update all null amounts
--Why we need this? It's because PostgreSQL don't supports IGNORE NULLS clause on lag()
LOOP
WITH tmp AS (
SELECT ctid, lag(amount) OVER() AS last_amount FROM test_lag ORDER BY id, product -- You MUST change this ORDER to right columns (What's previous row?)
)
UPDATE test_lag SET amount = tmp.last_amount FROM tmp WHERE test_lag.ctid = tmp.ctid AND amount IS NULL;
IF NOT FOUND THEN
EXIT;
END IF;
END LOOP;
END $$;
SELECT * FROM test_lag ORDER BY id, product, amount;

SQL Running Subtraction

Just a brief of business scenario is table has been created for a good receipt. So here we have good expected line with PurchaseOrder(PO) in first few line. And then we receive each expected line physically and that time these quantity may be different, due to business case like quantity may damage and short quantity like that. So we maintain a status for that eg: OK, Damage, also we have to calculate short quantity based on total of expected quantity of each item and total of received line.
if object_id('DEV..Temp','U') is not null
drop table Temp
CREATE TABLE Temp
(
ID INT IDENTITY(1,1) PRIMARY KEY CLUSTERED,
Item VARCHAR(32),
PO VARCHAR(32) NULL,
ExpectedQty INT NULL,
ReceivedQty INT NULL,
[STATUS] VARCHAR(32) NULL,
BoxName VARCHAR(32) NULL
)
Please see first few line with PO data will be the expected lines,
and then rest line will be received line
INSERT INTO TEMP (Item,PO,ExpectedQty,ReceivedQty,[STATUS],BoxName)
SELECT 'ITEM01','PO-01','30',NULL,NULL,NULL UNION ALL
SELECT 'ITEM01','PO-02','20',NULL,NULL,NULL UNION ALL
SELECT 'ITEM02','PO-01','40',NULL,NULL,NULL UNION ALL
SELECT 'ITEM03','PO-01','50',NULL,NULL,NULL UNION ALL
SELECT 'ITEM03','PO-02','30',NULL,NULL,NULL UNION ALL
SELECT 'ITEM03','PO-03','20',NULL,NULL,NULL UNION ALL
SELECT 'ITEM04','PO-01','30',NULL,NULL,NULL UNION ALL
SELECT 'ITEM01',NULL,NULL,'20','OK','box01' UNION ALL
SELECT 'ITEM01',NULL,NULL,'25','OK','box02' UNION ALL
SELECT 'ITEM01',NULL,NULL,'5','DAMAGE','box03' UNION ALL
SELECT 'ITEM02',NULL,NULL,'38','OK','box04' UNION ALL
SELECT 'ITEM02',NULL,NULL,'2','DAMAGE','box05' UNION ALL
SELECT 'ITEM03',NULL,NULL,'30','OK','box06' UNION ALL
SELECT 'ITEM03',NULL,NULL,'30','OK','box07' UNION ALL
SELECT 'ITEM03',NULL,NULL,'30','OK','box08' UNION ALL
SELECT 'ITEM03',NULL,NULL,'10','DAMAGE','box09' UNION ALL
SELECT 'ITEM04',NULL,NULL,'25','OK','box10'
Below Table is my expected result based on above data.
I need to show those data following way.
So I appreciate if you can give me an appropriate query for it.
Note: first row is blank and it is actually my table header. :)
SELECT '' as 'ITEM', '' as 'PO#', '' as 'ExpectedQty',
'' as 'ReceivedQty','' as 'DamageQty' ,'' as 'ShortQty' UNION ALL
SELECT 'ITEM01','PO-01','30','30','0' ,'0' UNION ALL
SELECT 'ITEM01','PO-02','20','15','5' ,'0' UNION ALL
SELECT 'ITEM02','PO-01','40','38','2' ,'0' UNION ALL
SELECT 'ITEM03','PO-01','50','50','0' ,'0' UNION ALL
SELECT 'ITEM03','PO-02','30','30','0' ,'0' UNION ALL
SELECT 'ITEM03','PO-03','20','10','10','0' UNION ALL
SELECT 'ITEM04','PO-01','30','25','0' ,'5'
Note : we don't received more than expected.
solution should be based on SQL 2000
You should reconsider how you store this data. Separate Expected and Received+Damaged in different tables (you have many unused (null) cells). This way any query should become more readable.
I think what you try to do can be achieved more easily with a stored procedure.
Anyway, try this query:
SELECT Item, PO, ExpectedQty,
CASE WHEN [rec-consumed] > 0 THEN ExpectedQty
ELSE CASE WHEN [rec-consumed] + ExpectedQty > 0
THEN [rec-consumed] + ExpectedQty
ELSE 0
END
END ReceivedQty,
CASE WHEN [rec-consumed] < 0
THEN CASE WHEN DamageQty >= -1*[rec-consumed]
THEN -1*[rec-consumed]
ELSE DamageQty
END
ELSE 0
END DamageQty,
CASE WHEN [rec_damage-consumed] < 0
THEN DamageQty - [rec-consumed]
ELSE 0
END ShortQty
FROM (
select t1.Item,
t1.PO,
t1.ExpectedQty,
st.sum_ReceivedQty_OK
- (sum(COALESCE(t2.ExpectedQty,0))
+t1.ExpectedQty)
[rec-consumed],
st.sum_ReceivedQty_OK + st.sum_ReceivedQty_DAMAGE
- (sum(COALESCE(t2.ExpectedQty,0))
+t1.ExpectedQty)
[rec_damage-consumed],
st.sum_ReceivedQty_DAMAGE DamageQty
from #tt t1
left join #tt t2 on t1.Item = t2.Item
and t1.PO > t2.PO
and t2.PO is not null
join (select Item
, sum(CASE WHEN status = 'OK' THEN ReceivedQty ELSE 0 END)
sum_ReceivedQty_OK
, sum(CASE WHEN status != 'OK' THEN ReceivedQty ELSE 0 END)
sum_ReceivedQty_DAMAGE
from #tt where PO is null
group by Item) st on t1.Item = st.Item
where t1.PO is not null
group by t1.Item, t1.PO, t1.ExpectedQty,
st.sum_ReceivedQty_OK,
st.sum_ReceivedQty_DAMAGE
) a
order by Item, PO