Optimising T-SQL reporting performance - tsql

I have the table bellow, I need to delete opposite rows between two dates by pairs based on PerCode Value,
In fact, we delete rows inside the date range that have the same PerCode and have equal and opposite values.
The problem is that begin date and end date are provided by users as parameters while reporting but the query take too much time if i try to delete these at runtime.
Example:
Begin date = 01/01/2018
End date = 31/12/2018
I should delete rows 3 and 4.
Do u have any idea how to do that while optimising performance (the table have 200 Millions of rows)
+----+------------+---------+---------+-----------+
| Id | Date | PerCode | Value | IsDeleted |
+----+------------+---------+---------+-----------+
| 1 | 01/10/2017 | C1 | 10 | |
| 2 | 01/01/2018 | C1 | -10 | |
| 3 | 15/02/2018 | C2 | 20 | 1 |
| 4 | 10/03/2018 | C2 | -20 | 1 |
| 5 | 01/12/2018 | C3 | 15 | |
| 6 | 01/02/2019 | C3 | -15 | |
+----+------------+---------+---------------------+

I had a quick go at this, using a table variable to allow me to knock together a query using your test data. However, this might not perform well when used over 2 million rows?
DECLARE #table TABLE (id INT, [date] DATE, percode CHAR(2), [value] INT, isdeleted BIT);
INSERT INTO #table
SELECT 1, '20171001', 'C1', 10, NULL
UNION ALL
SELECT 2, '20180101', 'C1', -10, NULL
UNION ALL
SELECT 3, '20180215', 'C2', 20, NULL
UNION ALL
SELECT 4, '20180310', 'C2', -20, NULL
UNION ALL
SELECT 5, '20181201', 'C3', 15, NULL
UNION ALL
SELECT 6, '20190201', 'C3', -15, NULL;
DECLARE #date_from DATE = '20180101';
DECLARE #date_to DATE = '20181231';
WITH ordered AS (
SELECT
id,
percode,
[value],
ROW_NUMBER() OVER (PARTITION BY percode, [value] ORDER BY [value]) AS order_id
FROM
#table
WHERE
[date] BETWEEN #date_from AND #date_to
AND ISNULL(isdeleted, 0) != 1),
matches AS (
SELECT
m1.id AS match_1_id,
m2.id AS match_2_id
FROM
ordered m1
INNER JOIN ordered m2 ON m1.percode = m2.percode AND m1.[value] = m2.[value] * -1 AND m1.order_id = m2.order_id)
UPDATE
t
SET
isdeleted = 1
FROM
#table t
INNER JOIN matches m ON m.match_1_id = t.id OR m.match_2_id = t.id;
SELECT * FROM #table;
Results:
id date percode value isdeleted
1 2017-10-01 C1 10 NULL
2 2018-01-01 C1 -10 NULL
3 2018-02-15 C2 20 1
4 2018-03-10 C2 -20 1
5 2018-12-01 C3 15 NULL
6 2019-02-01 C3 -15 NULL
How does it work? Well I broke the task down into steps:
make a list of all rows in the date period specified, where they aren't already deleted;
for each row of data assign it a running count number, grouped by the percode and the value. So the first C1 10 would be number #1, then the second C1 10 would be number #2, etc.;
to find matches it's simply a case of finding any value that has the same percode, the equal and opposite value to another value group, and the same running count number;
where there's a match set the isdeleted flag to 1.

Here is my code but this is not performant over 200 millions rows in real time.
and in real life Percode is concatenation of 5 columns (date, varchar(13), varchar(2),varchar(1) and varchar(50)) and Value is 4 numeric columns.
I am searching for other ideas.
--DECLARE #table TABLE (id INT, [date] DATE, percode CHAR(2), [value] INT, isdeleted BIT);
Select * INTO #MasterTable FROM
(
SELECT 1 id, '20171001' [date], 'C1' percode, 10 [value], NULL isdeleted
UNION ALL
SELECT 2, '20180101', 'C1', -10, NULL
UNION ALL
SELECT 3, '20180215', 'C2', 20, NULL
UNION ALL
SELECT 4, '20180310', 'C2', -20, NULL
UNION ALL
SELECT 5, '20181201', 'C3', 15, NULL
UNION ALL
SELECT 6, '20190201', 'C3', -15, NULL
) T ;
DECLARE #date_from DATE = '20180101';
DECLARE #date_to DATE = '20181231';
select F.id
Into #TmpTable
from
(
select Id, PerCode, Value
,ROW_NUMBER() over (partition by PerCode, Value order by (select 0)) Rn2
from
#MasterTable ) F
inner join (
select
PerCode
, Rn1
from (
select
PerCode
,Value
,ROW_NUMBER() over (partition by PerCode, Value order by (select 0)) Rn1
FROM #MasterTable
where
[date] BETWEEN #date_from AND #date_to
) A
group by PerCode , Rn1
having sum(Value) = 0 and count(*)>1
) B on F.PerCode = B.PerCode
and F.Rn2 = B.Rn1
update R
set IsDeleted = 1
from #MasterTable R
inner join #TmpTable P
on R.id = P.id
select * from #MasterTable
drop table #MasterTable ;
drop table #TmpTable;

Related

How to add a condition in select class, based on the result value of another select class

I need a Postgres query to get "A value", "A value date", "B value" and "B value date"
The B value and date should be the one which is between 95 to 100 days of "A value date"
I have the query to get "A value" and "A value date", don't know how to get the B value and date by using the result (A value)
select u.id,
(select activity
from Sol_pro
where user_id = u.id
and uni_par = 'weight_m'
order by created_at asc
limit 1) as A_value
from users u;
the B_value and B_date from the same sol_pro table,
95-100 days after the A_value date (if more mores are there between 95-100, I need only one(recent) value) Expected
Output: id = 112233, A_Value = "weight = 210.25", A_value_date = 12-12-2020, B_value = "weight = 220.25", B_value_date = 12-12-2020
Well without table definition I developed it from the output columns and your original query. Further I had to make up stuff for the data, but the following should be close enough for you to see the technique involved. It is actually a simple join operation, just that it is self-join on the table sol_pol. (I.e it is joined to itself). Notice comments indicated by --<<<
select distinct on (a.id)
a.id
, a.user_id --<<< assumption needed
, a.activity "A_value"
, a.created_at::date "A_date"
, b.activity "B_value"
, b.created_at::date
from sol_pro a
join sol_pro b
on ( b.user_id = a.user_id --<<< assumption
and b.uni_par = a.uni_par --<<< assumption
)
where a.id = 112233 --<<< given Orig query
and a.uni_par = 'weight_m' --<<< given Orig query, but not needed if id is PK
and b.created_at between a.created_at + interval '95 days' --<<< date range inclusive of 95-100
and a.created_at + interval '100 days'
order by a.id, b.created_at desc;
See here for example run. The example contains a column you will not have "belayer_note". This is just a note-to-self I sometimes use for initial testing.
Suppose that you have tables users and measures:
# create table users (id integer);
# create table measures (user_id integer, value decimal, created_at date);
They are filled with test data:
INSERT INTO users VALUES (1), (2), (3);
INSERT INTO measures VALUES (1, 100, '9/10/2020'), (1, 103, '9/15/2020'), (1, 104, '10/2/2020');
INSERT INTO measures VALUES (2, 200, '9/11/2020'), (2, 207, '9/21/2020'), (2, 204, '10/1/2020');
INSERT INTO measures VALUES (3, 300, '9/12/2020'), (3, 301, '10/1/2020'), (3, 318, '10/12/2020');
Query:
WITH M AS (
SELECT
A.user_id,
A.value AS A_value, A.created_at AS A_date,
B.value AS B_value, B.created_at AS B_date
FROM measures A
LEFT JOIN measures B ON
A.user_id = B.user_id AND
B.created_at >= A.created_at + INTERVAL '5 days' AND
B.created_at <= A.created_at + INTERVAL '10 days'
ORDER BY
user_id, A_date, B_date
)
SELECT DISTINCT ON (user_id) * FROM M;
will select for each user:
the first available measurement (A)
the next measurement (B) which is made between 5-10 days from (A).
Result:
user_id | a_value | a_date | b_value | b_date
---------+---------+------------+---------+------------
1 | 100 | 2020-09-10 | 103 | 2020-09-15
2 | 200 | 2020-09-11 | 207 | 2020-09-21
3 | 300 | 2020-09-12 | [NULL] | [NULL]
(3 rows)
P.S. You must sort table rows carefully with ODRDER BY when using DISTINCT ON () clause because PostgreSQL will keep only first records and discard others.

If there is only one zero value then group by supplier and show zero, if there is no zero, then avg all values

I will give you example of table that I have:
Supplier | Value
sup1 | 4
sup2 | 1
sup1 | 0
sup1 | 3
sup2 | 5
I need a result that will do average by supplier, but if there is value 0 for a supplier, do not average, but return 0 instead
It should look like this:
Supplier | Value
sup1 | 0
sup2 | 3
This is a little trick but it should work :
SELECT Supplier,
CASE WHEN MIN(ABS(Value)) = 0 THEN 0 ELSE AVG(Value) END
FROM TableTest
GROUP BY Supplier
EDIT : Using the ABS() function let you avoid having problems with negative values
DECLARE #TAB TABLE (SUPPLIER VARCHAR(50),VALUE INTEGER)
INSERT INTO #TAB
SELECT 'sup1',4
UNION ALL
SELECT 'sup2',1
UNION ALL
SELECT 'sup1',0
UNION ALL
SELECT 'sup1',3
UNION ALL
SELECT 'sup2',5
SELECT * FROM #TAB
SELECT T1.SUPPLIER,CASE WHEN EXISTS(SELECT 1 FROM #TAB T WHERE T.SUPPLIER = T1.SUPPLIER AND T.VALUE = 0) THEN 0 ELSE AVG(T1.VALUE) END AS VALUE
FROM #TAB T1
GROUP BY T1.SUPPLIER
Result
SUPPLIER VALUE
sup1 0
sup2 3
Using the following query is one of the way to do.
First I push the supplier which has the Value = 0, then based on the result, I will do the remaining calculation and finally using UNION to get the expected result:
DECLARE #ZeroValue TABLE (Supplier VARCHAR (20));
INSERT INTO #ZeroValue (Supplier)
SELECT Supplier FROM TestTable WHERE Value = 0
SELECT Supplier, 0 AS Value FROM #ZeroValue
UNION
SELECT T.Supplier, AVG(T.Value) AS Value
FROM TestTable T
JOIN #ZeroValue Z ON Z.Supplier != T.Supplier
GROUP BY T.Supplier
Schema used for the sample:
CREATE TABLE TestTable (Supplier VARCHAR (20), Value INT);
INSERT INTO TestTable (Supplier, Value) VALUES
('sup1', 4), ('sup2', 1), ('sup1', 0), ('sup1', 3), ('sup2', 5);
Please find the working demo on db<>fiddle

Filter Out a Specific result from a complicated SQL statement and clean up my SQL

I am taking a combination of the following tables to determine, in and out time, total hours worked for the day, workcenter and associated pay rates. However, when the PunchType = '303' there is two rows, 1 for the initial workcenter as a '10' punch and then the '303' When the '303 punch exists I need to use that as the in punch instead. The below query and sample results show that I have narrowed my query down to show both the results of using the '10' punch (in) - '12' punch (out) and the '303' punch (in) if it exists to the '12' punch (out). In my final result I only want the '303' - '12' match if it exists or I will have duplicate records. All of this is being dumped in Report Builder 3.0 to calculate totals hours worked and dollars paid out per day ( no need for help there, just trying to give some context)
I have included the RowNumber in the final query results because I was thinking to use that to filter the results as needed. My thinking: if Count(RowNumber) = 2, return where RowNumber = '2', IF Count(RowNumber) = 1, return where RowNumber = '1', IF Count(RowNumber) = 4, return Where RowNumber = '3,4). In know the syntax in the phrasing here is wrong, but I am just using it to illustrate what I am trying to do. I am sure there is an easier way to write the query (though I am OK with if not) as long as I can figure out how to filter the results to only what I need. Any help is appreciated. Thanks!
Sample Data:
Timecard
| TimeCardID | StoreID | EmpID | CardDate
| PunchType | WorkCenter | BreakIndex |ShadowTimeCardForID
B6B839AD-D8DF-E611-A3E5-0019170149B6 | 32365 | 4171 |2017-01-21 07:54:00.500
| 303 | 4 |0 | 00000000-0000-0000-0000-000000000000
EmployeeRate
| EmployeeRateID | EmployeeID
| RateIndex | WorkCenter | OvertimeRate | RegularRate
| C3325A54-E7A9-E611-A16D-0019178089A7 | 27139B5C-7A74-E611-969E-3417EBD1A8D1
| 4 | 4 | 2250 |1500
Query:
DECLARE #datetime datetime = '2017-01-22 04:00:00.000'
SELECT
z.EmpID,
z.RegularRate,
z.OvertimeRate,
z.WorkCenter,
z.in_punch,
z.out_punch,
z.HoursWorked,
z.RowNumber
FROM
(SELECT
y.EmpID,
y.RegularRate,
y.OvertimeRate,
y.WorkCenter,
y.in_punch,
y.out_punch,
y.HoursWorked,
row_number() OVER(PARTITION BY EmpID ORDER BY EmpiD) AS RowNumber
FROM
(SELECT
f.EmpID,
f.RegularRate,
f.OvertimeRate,
f.WorkCenter,
f.in_punch,
f.out_punch,
f.HoursWorked
FROM
(SELECT
tc.EmpID,
er.RegularRate,
er.OvertimeRate,
tc.WorkCenter,
tc.in_punch,
tc.out_punch,
CONVERT(varchar(3),DATEDIFF(MINUTE,in_punch,out_punch)/60) + ':' +
RIGHT('0' + CONVERT(varchar(2),DATEDIFF(MINUTE,in_punch,out_punch)%60),2)
AS HoursWorked,
row_number() OVER(PARTITION BY tc.EmpID ORDER BY tc.EmpiD) AS RowNumber
FROM
(SELECT
e.EmpID,
e.WorkCenter,
e.CardDate AS in_punch,
e2.CardDate AS out_punch
FROM
(SELECT EmpID, CardDate, WorkCenter FROM TimeCard where PunchType = '10'
AND CardDate BETWEEN DATEADD(DAY, -1, #datetime) AND #datetime) e
INNER JOIN
(SELECT EmpID, CardDate, WorkCenter
FROM TimeCard where PunchType = '12' AND CardDate BETWEEN DATEADD(DAY, -1,
#datetime) AND #datetime) e2
ON
e.EmpID = e2.EmpID
) tc
INNER JOIN
[dbo].[Employee] em
ON tc.EmpID = em.EmpID
INNER JOIN
[dbo].[EmployeeRate] er
ON em.[EmployeeID] = er.[EmployeeID] AND tc.[Workcenter] = er.[WorkCenter]
WHERE tc.in_punch <= tc.out_punch
GROUP BY tc.EmpID,
er.RegularRate,
er.OvertimeRate,
tc.WorkCenter,
tc.in_punch,
tc.out_punch
) f
WHERE f.[RowNumber] <> '2'
UNION
SELECT
f.EmpID,
f.RegularRate,
f.OvertimeRate,
f.WorkCenter,
f.in_punch,
f.out_punch,
f.HoursWorked
FROM
(SELECT
tc.EmpID,
er.RegularRate,
er.OvertimeRate,
tc.WorkCenter,
tc.in_punch,
tc.out_punch,
CONVERT(varchar(3),DATEDIFF(MINUTE,in_punch,out_punch)/60) + ':' +
RIGHT('0' +
CONVERT(varchar(2),DATEDIFF(MINUTE,in_punch,out_punch)%60),2) AS
HoursWorked,
row_number() OVER(PARTITION BY tc.EmpID ORDER BY tc.EmpiD) AS RowNumber
FROM
(SELECT
e.EmpID,
e.WorkCenter,
e.CardDate AS in_punch,
e2.CardDate AS out_punch
FROM
(SELECT EmpID, CardDate, WorkCenter
FROM TimeCard where PunchType = '303' AND CardDate BETWEEN DATEADD(DAY, -1,
#datetime) AND #datetime) e
INNER JOIN
(SELECT EmpID, CardDate, WorkCenter
FROM TimeCard where PunchType = '12' AND CardDate BETWEEN DATEADD(DAY, -1,
#datetime) AND #datetime) e2
ON
e.EmpID = e2.EmpID
) tc
INNER JOIN
[dbo].[Employee] em
ON tc.EmpID = em.EmpID
INNER JOIN
[dbo].[EmployeeRate] er
ON em.[EmployeeID] = er.[EmployeeID] AND tc.[Workcenter] = er.[WorkCenter]
WHERE tc.in_punch <= tc.out_punch
GROUP BY tc.EmpID,
er.RegularRate,
er.OvertimeRate,
tc.WorkCenter,
tc.in_punch,
tc.out_punch
) f
WHERE f.[RowNumber] <> '2'
) y
) z
GROUP BY
z.EmpID,
z.RegularRate,
z.OvertimeRate,
z.WorkCenter,
z.in_punch,
z.out_punch,
z.HoursWorked,
z.RowNumber
ORDER BY COUNT(RowNumber)OVER(PARTITION BY EmpID)
Results:
EmpID,RegularRate,OvertimeRate,WorkCenter,in_punch,out_punch,HoursWorked,RowNumber
9267,1150,1725,9,2017-01-21 16:59:27.940,2017-01-22 01:16:16.200,8:17, 1
9438,550,825,3,2017-01-21 09:55:34.500,2017-01-21 15:37:51.770,5:42,1
9471,223,335,1,2017-01-21 10:32:08.060,2017-01-21 14:18:23.430,3:46,1
9471,223,335,1,2017-01-21 15:54:29.570,2017-01-21 23:00:00.000,7:06,2
4171,223,335,1,2017-01-21 07:54:00.490,2017-01-21 15:17:31.740,7:23,1
4171,1500,2250,4,2017-01-21 07:54:00.500,2017-01-21 15:17:31.740,7:23,2

TSQL query to return values from a table where there are multiple rows with same ID into a single row but each unique value in a different column

I'm trying to return values from a table so that I get 1 row per purchaseID and return multiple columns with Buyers First and Last Names.
E.G
I have a table with the following Data
| PurchaseID | FirstName | LastName|
|---------1------- | ----Joe------ | ---Smith----|
|---------1------- | -----Peter--- | ---Pan------|
|---------2------- | ----Max------|---Power----|
|---------2------- | -----Jack---- | ---Frost----|
I'm trying to write a query that returns the values like so
| PurchaseID | Buyer1FirstName | Buyer1LastName | Buyer2FirstName |Buyer2LastName|
|--------1---------|------------Joe--------- |--------Smith----------|---------Peter-----------|--------Pan------------|
|--------2---------|-------------Max--------|---------Power--------|---------Jack -----------|---------Frost----------|
I've been looking online but because I'm not sure how to explain in words what I want to do, I'm not having much luck. I'm hoping with a more visual explanation someone could point me in the right direction.
Any help would be awesome.
You can use ROW_NUMBER as the below:
DECLARE #Tbl TABLE (PurchaseID INT, FirstName VARCHAR(50), LastName VARCHAR(50))
INSERT INTO #Tbl
VALUES
(1, 'Joe', 'Smith'),
(1, 'Peter', 'Pan'),
(2, 'Max', 'Power'),
(2, 'Jack', 'Frost'),
(2, 'Opss', 'Sspo')
;WITH CTE
AS
(
SELECT
*, ROW_NUMBER() OVER (PARTITION BY PurchaseID ORDER BY PurchaseID) RowId
FROM #Tbl
)
SELECT
A.PurchaseID,
MIN(CASE WHEN A.RowId = 1 THEN A.FirstName END) Buyer1FirstName,
MIN(CASE WHEN A.RowId = 1 THEN A.LastName END ) Buyer1LastName ,
MIN(CASE WHEN A.RowId = 2 THEN A.FirstName END) Buyer2FirstName ,
MIN(CASE WHEN A.RowId = 2 THEN A.LastName END )Buyer2LastName,
MIN(CASE WHEN A.RowId = 3 THEN A.FirstName END) Buyer3FirstName ,
MIN(CASE WHEN A.RowId = 3 THEN A.LastName END )Buyer3LastName,
MIN(CASE WHEN A.RowId = 4 THEN A.FirstName END) Buyer4FirstName ,
MIN(CASE WHEN A.RowId = 4 THEN A.LastName END )Buyer4LastName
FROM
CTE A
GROUP BY
A.PurchaseID
Result:
PurchaseID Buyer1FirstName Buyer1LastName Buyer2FirstName Buyer2LastName Buyer3FirstName Buyer3LastName Buyer4FirstName Buyer4LastName
----------- ------------------- -------------------- -------------------- ------------------ ------------------- ----------------- ------------------- --------------
1 Joe Smith Peter Pan NULL NULL NULL NULL
2 Max Power Jack Frost Opss Sspo NULL NULL

PostgreSQL: select nearest rows according to sort order

I have a table like this:
a | user_id
----------+-------------
0.1133 | 2312882332
4.3293 | 7876123213
3.1133 | 2312332332
1.3293 | 7876543213
0.0033 | 2312222332
5.3293 | 5344343213
3.2133 | 4122331112
2.3293 | 9999942333
And I want to locate a particular row - 1.3293 | 7876543213 for example - and select the nearest 4 rows. 2 above, 2 below if possible.
Sort order is ORDER BY a ASC.
In this case I will get:
0.0033 | 2312222332
0.1133 | 2312882332
2.3293 | 9999942333
3.1133 | 2312332332
How can I achieve this using PostgreSQL? (BTW, I'm using PHP.)
P.S.: For the last or first row the nearest rows would be 4 above or 4 below.
Test case:
CREATE TEMP TABLE tbl(a float, user_id bigint);
INSERT INTO tbl VALUES
(0.1133, 2312882332)
,(4.3293, 7876123213)
,(3.1133, 2312332332)
,(1.3293, 7876543213)
,(0.0033, 2312222332)
,(5.3293, 5344343213)
,(3.2133, 4122331112)
,(2.3293, 9999942333);
Query:
WITH x AS (
SELECT a
,user_id
,row_number() OVER (ORDER BY a, user_id) AS rn
FROM tbl
), y AS (
SELECT rn, LEAST(rn - 3, (SELECT max(rn) - 5 FROM x)) AS min_rn
FROM x
WHERE (a, user_id) = (1.3293, 7876543213)
)
SELECT *
FROM x, y
WHERE x.rn > y.min_rn
AND x.rn <> y.rn
ORDER BY x.a, x.user_id
LIMIT 4;
Returns result as depicted in the question. Assuming that (a, user_id) is unique.
It is not clear whether a is supposed to unique. That's why I sort by user_id additionally to break ties. That's also why I use the window function row_number(), an not rank() for this. row_number() is the correct tool in any case. We want 4 rows. rank() would give an undefined number of rows if there were peers in the sort order.
This always returns 4 rows as long as there are at least 5 rows in the table. Close to first / last row, the first / last 4 rows are returned. The two rows before / after in all other cases. The criteria row itself is excluded.
Improved performance
This is an improved version of what #Tim Landscheidt posted. Vote for his answer if you like the idea with the index. Don't bother with small tables. But will boost performance for big tables - provided you have a fitting index in place. Best choice would be a multicolumn index on (a, user_id).
WITH params(_a, _user_id) AS (SELECT 5.3293, 5344343213) -- enter params once
,x AS (
(
SELECT a
,user_id
,row_number() OVER (ORDER BY a DESC, user_id DESC) AS rn
FROM tbl, params p
WHERE a < p._a
OR a = p._a AND user_id < p._user_id -- a is not defined unique
ORDER BY a DESC, user_id DESC
LIMIT 5 -- 4 + 1: including central row
)
UNION ALL -- UNION right away, trim one query level
(
SELECT a
,user_id
,row_number() OVER (ORDER BY a ASC, user_id ASC) AS rn
FROM tbl, params p
WHERE a > p._a
OR a = p._a AND user_id > p._user_id
ORDER BY a ASC, user_id ASC
LIMIT 5
)
)
, y AS (
SELECT a, user_id
FROM x, params p
WHERE (a, user_id) <> (p._a, p._user_id) -- exclude central row
ORDER BY rn -- no need to ORDER BY a
LIMIT 4
)
SELECT *
FROM y
ORDER BY a, user_id -- ORDER result as requested
Major differences to #Tim's version:
According to the question (a, user_id) form the search criteria, not just a. That changes window frame, ORDER BY and WHERE clause in subtly different ways.
UNION right away, no need for an extra query level. You need parenthesis around the two UNION-queries to allow for individual ORDER BY.
Sort result as requested. Requires another query level (at hardly any cost).
As parameters are used in multiple places I centralized the input in a leading CTE.
For repeated use you can wrap this query almost 'as is' into an SQL or plpgsql function.
And another one:
WITH prec_rows AS
(SELECT a,
user_id,
ROW_NUMBER() OVER (ORDER BY a DESC) AS rn
FROM tbl
WHERE a < 1.3293
ORDER BY a DESC LIMIT 4),
succ_rows AS
(SELECT a,
user_id,
ROW_NUMBER() OVER (ORDER BY a ASC) AS rn
FROM tbl
WHERE a > 1.3293
ORDER BY a ASC LIMIT 4)
SELECT a, user_id
FROM
(SELECT a,
user_id,
rn
FROM prec_rows
UNION ALL SELECT a,
user_id,
rn
FROM succ_rows) AS s
ORDER BY rn, a LIMIT 4;
AFAIR WITH will instantiate a memory table, so the focus of this solution is to limit its size as much as possible (in this case eight rows).
set search_path='tmp';
DROP TABLE lutser;
CREATE TABLE lutser
( val float
, num bigint
);
INSERT INTO lutser(val, num)
VALUES ( 0.1133 , 2312882332 )
,( 4.3293 , 7876123213 )
,( 3.1133 , 2312332332 )
,( 1.3293 , 7876543213 )
,( 0.0033 , 2312222332 )
,( 5.3293 , 5344343213 )
,( 3.2133 , 4122331112 )
,( 2.3293 , 9999942333 )
;
WITH ranked_lutsers AS (
SELECT val, num
,rank() OVER (ORDER BY val) AS rnk
FROM lutser
)
SELECT that.val, that.num
, (that.rnk-this.rnk) AS relrnk
FROM ranked_lutsers that
JOIN ranked_lutsers this ON (that.rnk BETWEEN this.rnk-2 AND this.rnk+2)
WHERE this.val = 1.3293
;
Results:
DROP TABLE
CREATE TABLE
INSERT 0 8
val | num | relrnk
--------+------------+--------
0.0033 | 2312222332 | -2
0.1133 | 2312882332 | -1
1.3293 | 7876543213 | 0
2.3293 | 9999942333 | 1
3.1133 | 2312332332 | 2
(5 rows)
As Erwin pointed out, the center row is not wanted in the output. Also, the row_number() should be used instead of rank().
WITH ranked_lutsers AS (
SELECT val, num
-- ,rank() OVER (ORDER BY val) AS rnk
, row_number() OVER (ORDER BY val, num) AS rnk
FROM lutser
) SELECT that.val, that.num
, (that.rnk-this.rnk) AS relrnk
FROM ranked_lutsers that
JOIN ranked_lutsers this ON (that.rnk BETWEEN this.rnk-2 AND this.rnk+2 )
WHERE this.val = 1.3293
AND that.rnk <> this.rnk
;
Result2:
val | num | relrnk
--------+------------+--------
0.0033 | 2312222332 | -2
0.1133 | 2312882332 | -1
2.3293 | 9999942333 | 1
3.1133 | 2312332332 | 2
(4 rows)
UPDATE2: to always select four, even if we are at the top or bottom of the list. This makes the query a bit uglier. (but not as ugly as Erwin's ;-)
WITH ranked_lutsers AS (
SELECT val, num
-- ,rank() OVER (ORDER BY val) AS rnk
, row_number() OVER (ORDER BY val, num) AS rnk
FROM lutser
) SELECT that.val, that.num
, ABS(that.rnk-this.rnk) AS srtrnk
, (that.rnk-this.rnk) AS relrnk
FROM ranked_lutsers that
JOIN ranked_lutsers this ON (that.rnk BETWEEN this.rnk-4 AND this.rnk+4 )
-- WHERE this.val = 1.3293
WHERE this.val = 0.1133
AND that.rnk <> this.rnk
ORDER BY srtrnk ASC
LIMIT 4
;
Output:
val | num | srtrnk | relrnk
--------+------------+--------+--------
0.0033 | 2312222332 | 1 | -1
1.3293 | 7876543213 | 1 | 1
2.3293 | 9999942333 | 2 | 2
3.1133 | 2312332332 | 3 | 3
(4 rows)
UPDATE: A version with a nested CTE (featuring outer join!!!). For conveniance, I added a primary key to the table, which sounds like a good idea anyway IMHO.
WITH distance AS (
WITH ranked_lutsers AS (
SELECT id
, row_number() OVER (ORDER BY val, num) AS rnk
FROM lutser
) SELECT l0.id AS one
,l1.id AS two
, ABS(l1.rnk-l0.rnk) AS dist
-- Warning: Cartesian product below
FROM ranked_lutsers l0
, ranked_lutsers l1 WHERE l0.id <> l1.id
)
SELECT lu.*
FROM lutser lu
JOIN distance di
ON lu.id = di.two
WHERE di.one= 1
ORDER by di.dist
LIMIT 4
;