Say I have a dataset of survey responses like this:
survey_id
user_id
question
answer
1
1
fav_colour
red
1
2
fav_colour
blue
1
2
fav_fruit
orange
2
3
fav_sport
hockey
I want to be able to use a single query with a survey ID to query the rows by survey ID like so:
...where survey_id = 1
survey_id
user_id
fav_color
fav_fruit
1
1
red
NULL
1
2
blue
orange
...where survey_id = 2
survey_id
user_id
fav_sport
2
3
hockey
How can I perform a query to get each question for a survey as a column, listing each response under it?
Table:
create table survey(survey_id int ,user_id int,question varchar(20),answer varchar(20));
insert into survey values(1,1,'fav_colour','red'),(1,2,'fav_colour','blue'),(1,2,'fav_fruit','orange'),(2,3,'fav_sport','hockey');
SQL for Full Display:
WITH favcolor
AS (SELECT *
FROM (SELECT survey_id,
user_id,
CASE
WHEN question = 'fav_colour' THEN answer
END AS fav_color
FROM survey) inline_view
WHERE fav_color IS NOT NULL),
favsport
AS (SELECT *
FROM (SELECT survey_id,
user_id,
CASE
WHEN question = 'fav_sport' THEN answer
END AS fav_sport
FROM survey) inline_view
WHERE fav_sport IS NOT NULL),
favfruit
AS (SELECT *
FROM (SELECT survey_id,
user_id,
CASE
WHEN question = 'fav_fruit' THEN answer
END AS fav_fruit
FROM survey) inline_view
WHERE fav_fruit IS NOT NULL)
SELECT COALESCE(inline_view.survey_id, ff.survey_id) survey_id,
COALESCE(inline_view.user_id, ff.user_id) user_id,
inline_view.fav_color,
inline_view.fav_sport,
ff.fav_fruit
FROM (SELECT COALESCE(fc.survey_id, fs.survey_id) survey_id,
COALESCE(fc.user_id, fs.user_id) user_id,
fc.fav_color,
fs.fav_sport
FROM (SELECT survey_id,
user_id,
fav_color
FROM favcolor) fc
FULL OUTER JOIN (SELECT survey_id,
user_id,
fav_sport
FROM favsport) fs
ON fc.survey_id = fs.survey_id
AND fc.user_id = fs.user_id) inline_view
FULL OUTER JOIN (SELECT survey_id,
user_id,
fav_fruit
FROM favfruit) ff
ON inline_view.survey_id = ff.survey_id
AND inline_view.user_id = ff.user_id;
Output:
survey_id | user_id | fav_color | fav_sport | fav_fruit
-----------+---------+-----------+-----------+-----------
1 | 1 | red | |
1 | 2 | blue | | orange
2 | 3 | | hockey |
(3 rows)
SQL for particular id , survey_id say 1 , the earlier SQL with filter on survery_id:
WITH favcolor
AS (SELECT *
FROM (SELECT survey_id,
user_id,
CASE
WHEN question = 'fav_colour' THEN answer
END AS fav_color
FROM survey) inline_view
WHERE fav_color IS NOT NULL),
favsport
AS (SELECT *
FROM (SELECT survey_id,
user_id,
CASE
WHEN question = 'fav_sport' THEN answer
END AS fav_sport
FROM survey) inline_view
WHERE fav_sport IS NOT NULL),
favfruit
AS (SELECT *
FROM (SELECT survey_id,
user_id,
CASE
WHEN question = 'fav_fruit' THEN answer
END AS fav_fruit
FROM survey) inline_view
WHERE fav_fruit IS NOT NULL)
SELECT *
FROM (SELECT COALESCE(inline_view.survey_id, ff.survey_id) survey_id,
COALESCE(inline_view.user_id, ff.user_id) user_id,
inline_view.fav_color,
inline_view.fav_sport,
ff.fav_fruit
FROM (SELECT COALESCE(fc.survey_id, fs.survey_id) survey_id,
COALESCE(fc.user_id, fs.user_id) user_id,
fc.fav_color,
fs.fav_sport
FROM (SELECT survey_id,
user_id,
fav_color
FROM favcolor) fc
FULL OUTER JOIN (SELECT survey_id,
user_id,
fav_sport
FROM favsport) fs
ON fc.survey_id = fs.survey_id
AND fc.user_id = fs.user_id) inline_view
FULL OUTER JOIN (SELECT survey_id,
user_id,
fav_fruit
FROM favfruit) ff
ON inline_view.survey_id = ff.survey_id
AND inline_view.user_id = ff.user_id)
final_inline_view
WHERE survey_id = 1 ;
Output:
survey_id | user_id | fav_color | fav_sport | fav_fruit
-----------+---------+-----------+-----------+-----------
1 | 1 | red | |
1 | 2 | blue | | orange
(2 rows)
For Survey_id=2:
WITH favcolor
AS (SELECT *
FROM (SELECT survey_id,
user_id,
CASE
WHEN question = 'fav_colour' THEN answer
END AS fav_color
FROM survey) inline_view
WHERE fav_color IS NOT NULL),
favsport
AS (SELECT *
FROM (SELECT survey_id,
user_id,
CASE
WHEN question = 'fav_sport' THEN answer
END AS fav_sport
FROM survey) inline_view
WHERE fav_sport IS NOT NULL),
favfruit
AS (SELECT *
FROM (SELECT survey_id,
user_id,
CASE
WHEN question = 'fav_fruit' THEN answer
END AS fav_fruit
FROM survey) inline_view
WHERE fav_fruit IS NOT NULL)
SELECT *
FROM (SELECT COALESCE(inline_view.survey_id, ff.survey_id) survey_id,
COALESCE(inline_view.user_id, ff.user_id) user_id,
inline_view.fav_color,
inline_view.fav_sport,
ff.fav_fruit
FROM (SELECT COALESCE(fc.survey_id, fs.survey_id) survey_id,
COALESCE(fc.user_id, fs.user_id) user_id,
fc.fav_color,
fs.fav_sport
FROM (SELECT survey_id,
user_id,
fav_color
FROM favcolor) fc
FULL OUTER JOIN (SELECT survey_id,
user_id,
fav_sport
FROM favsport) fs
ON fc.survey_id = fs.survey_id
AND fc.user_id = fs.user_id) inline_view
FULL OUTER JOIN (SELECT survey_id,
user_id,
fav_fruit
FROM favfruit) ff
ON inline_view.survey_id = ff.survey_id
AND inline_view.user_id = ff.user_id)
final_inline_view
WHERE survey_id = 2 ;
Output:
survey_id | user_id | fav_color | fav_sport | fav_fruit
-----------+---------+-----------+-----------+-----------
2 | 3 | | hockey |
(1 row)
the tablefunc module contains function to do that:
https://www.postgresql.org/docs/13/tablefunc.html
Related
Here is my data and query. I want row data show in columns... Can someone help me to modify the query? I am using PostgreSQL queries.
select
ss.name, ip.product_name, ssr.quantity
from
services_servicerecipe ssr
inner join
services_service ss on ssr.service_id = ss.id
inner join
inventory_product ip on ssr.product_id = ip.id
order by
ss.name
Output:
Service_name | Product_name | Quantity
-------------+------------------+-----------
Balayage | 7.3-revlon | 2
Balayage | 701-revlon | 1
I want it to look like this
Service_name | Product_name | Quantity | Product_name | Quantity
-------------+-------------------+--------------+------------------+----------
Balayage | 7.3-revlon | 2 | 701-revlon | 1
Here is a pivot option, using ROW_NUMBER:
with cte as (
select ss.name, ip.product_name, ssr.quantity,
row_number() over (partition by ss.name order by ip.product_name) rn
from services_servicerecipe ssr
inner join services_service ss on ssr.service_id = ss.id
inner join inventory_product ip on ssr.product_id = ip.id
)
select
name,
max(case when rn = 1 then product_name end) as product1,
max(case when rn = 1 then quantity end) as quantity1,
max(case when rn = 2 then product_name end) as product2,
max(case when rn = 2 then quantity end) as quantity2
from cte
group by name;
I am taking a combination of the following tables to determine, in and out time, total hours worked for the day, workcenter and associated pay rates. However, when the PunchType = '303' there is two rows, 1 for the initial workcenter as a '10' punch and then the '303' When the '303 punch exists I need to use that as the in punch instead. The below query and sample results show that I have narrowed my query down to show both the results of using the '10' punch (in) - '12' punch (out) and the '303' punch (in) if it exists to the '12' punch (out). In my final result I only want the '303' - '12' match if it exists or I will have duplicate records. All of this is being dumped in Report Builder 3.0 to calculate totals hours worked and dollars paid out per day ( no need for help there, just trying to give some context)
I have included the RowNumber in the final query results because I was thinking to use that to filter the results as needed. My thinking: if Count(RowNumber) = 2, return where RowNumber = '2', IF Count(RowNumber) = 1, return where RowNumber = '1', IF Count(RowNumber) = 4, return Where RowNumber = '3,4). In know the syntax in the phrasing here is wrong, but I am just using it to illustrate what I am trying to do. I am sure there is an easier way to write the query (though I am OK with if not) as long as I can figure out how to filter the results to only what I need. Any help is appreciated. Thanks!
Sample Data:
Timecard
| TimeCardID | StoreID | EmpID | CardDate
| PunchType | WorkCenter | BreakIndex |ShadowTimeCardForID
B6B839AD-D8DF-E611-A3E5-0019170149B6 | 32365 | 4171 |2017-01-21 07:54:00.500
| 303 | 4 |0 | 00000000-0000-0000-0000-000000000000
EmployeeRate
| EmployeeRateID | EmployeeID
| RateIndex | WorkCenter | OvertimeRate | RegularRate
| C3325A54-E7A9-E611-A16D-0019178089A7 | 27139B5C-7A74-E611-969E-3417EBD1A8D1
| 4 | 4 | 2250 |1500
Query:
DECLARE #datetime datetime = '2017-01-22 04:00:00.000'
SELECT
z.EmpID,
z.RegularRate,
z.OvertimeRate,
z.WorkCenter,
z.in_punch,
z.out_punch,
z.HoursWorked,
z.RowNumber
FROM
(SELECT
y.EmpID,
y.RegularRate,
y.OvertimeRate,
y.WorkCenter,
y.in_punch,
y.out_punch,
y.HoursWorked,
row_number() OVER(PARTITION BY EmpID ORDER BY EmpiD) AS RowNumber
FROM
(SELECT
f.EmpID,
f.RegularRate,
f.OvertimeRate,
f.WorkCenter,
f.in_punch,
f.out_punch,
f.HoursWorked
FROM
(SELECT
tc.EmpID,
er.RegularRate,
er.OvertimeRate,
tc.WorkCenter,
tc.in_punch,
tc.out_punch,
CONVERT(varchar(3),DATEDIFF(MINUTE,in_punch,out_punch)/60) + ':' +
RIGHT('0' + CONVERT(varchar(2),DATEDIFF(MINUTE,in_punch,out_punch)%60),2)
AS HoursWorked,
row_number() OVER(PARTITION BY tc.EmpID ORDER BY tc.EmpiD) AS RowNumber
FROM
(SELECT
e.EmpID,
e.WorkCenter,
e.CardDate AS in_punch,
e2.CardDate AS out_punch
FROM
(SELECT EmpID, CardDate, WorkCenter FROM TimeCard where PunchType = '10'
AND CardDate BETWEEN DATEADD(DAY, -1, #datetime) AND #datetime) e
INNER JOIN
(SELECT EmpID, CardDate, WorkCenter
FROM TimeCard where PunchType = '12' AND CardDate BETWEEN DATEADD(DAY, -1,
#datetime) AND #datetime) e2
ON
e.EmpID = e2.EmpID
) tc
INNER JOIN
[dbo].[Employee] em
ON tc.EmpID = em.EmpID
INNER JOIN
[dbo].[EmployeeRate] er
ON em.[EmployeeID] = er.[EmployeeID] AND tc.[Workcenter] = er.[WorkCenter]
WHERE tc.in_punch <= tc.out_punch
GROUP BY tc.EmpID,
er.RegularRate,
er.OvertimeRate,
tc.WorkCenter,
tc.in_punch,
tc.out_punch
) f
WHERE f.[RowNumber] <> '2'
UNION
SELECT
f.EmpID,
f.RegularRate,
f.OvertimeRate,
f.WorkCenter,
f.in_punch,
f.out_punch,
f.HoursWorked
FROM
(SELECT
tc.EmpID,
er.RegularRate,
er.OvertimeRate,
tc.WorkCenter,
tc.in_punch,
tc.out_punch,
CONVERT(varchar(3),DATEDIFF(MINUTE,in_punch,out_punch)/60) + ':' +
RIGHT('0' +
CONVERT(varchar(2),DATEDIFF(MINUTE,in_punch,out_punch)%60),2) AS
HoursWorked,
row_number() OVER(PARTITION BY tc.EmpID ORDER BY tc.EmpiD) AS RowNumber
FROM
(SELECT
e.EmpID,
e.WorkCenter,
e.CardDate AS in_punch,
e2.CardDate AS out_punch
FROM
(SELECT EmpID, CardDate, WorkCenter
FROM TimeCard where PunchType = '303' AND CardDate BETWEEN DATEADD(DAY, -1,
#datetime) AND #datetime) e
INNER JOIN
(SELECT EmpID, CardDate, WorkCenter
FROM TimeCard where PunchType = '12' AND CardDate BETWEEN DATEADD(DAY, -1,
#datetime) AND #datetime) e2
ON
e.EmpID = e2.EmpID
) tc
INNER JOIN
[dbo].[Employee] em
ON tc.EmpID = em.EmpID
INNER JOIN
[dbo].[EmployeeRate] er
ON em.[EmployeeID] = er.[EmployeeID] AND tc.[Workcenter] = er.[WorkCenter]
WHERE tc.in_punch <= tc.out_punch
GROUP BY tc.EmpID,
er.RegularRate,
er.OvertimeRate,
tc.WorkCenter,
tc.in_punch,
tc.out_punch
) f
WHERE f.[RowNumber] <> '2'
) y
) z
GROUP BY
z.EmpID,
z.RegularRate,
z.OvertimeRate,
z.WorkCenter,
z.in_punch,
z.out_punch,
z.HoursWorked,
z.RowNumber
ORDER BY COUNT(RowNumber)OVER(PARTITION BY EmpID)
Results:
EmpID,RegularRate,OvertimeRate,WorkCenter,in_punch,out_punch,HoursWorked,RowNumber
9267,1150,1725,9,2017-01-21 16:59:27.940,2017-01-22 01:16:16.200,8:17, 1
9438,550,825,3,2017-01-21 09:55:34.500,2017-01-21 15:37:51.770,5:42,1
9471,223,335,1,2017-01-21 10:32:08.060,2017-01-21 14:18:23.430,3:46,1
9471,223,335,1,2017-01-21 15:54:29.570,2017-01-21 23:00:00.000,7:06,2
4171,223,335,1,2017-01-21 07:54:00.490,2017-01-21 15:17:31.740,7:23,1
4171,1500,2250,4,2017-01-21 07:54:00.500,2017-01-21 15:17:31.740,7:23,2
I have 3 tables which are Accounts, Payments, Statements. Table Accounts have all the accounts, table Payments have all the payments made to the account, and table Statements have all the statement data for the accounts.
Accounts
AccountID | DateOfDeath |
1001 | 2014-03-10 |
Payments
AccountID | PaidAmount | PaymentDate
1001 | 80.27 | 2014-07-09
1001 | 80.27 | 2014-06-10
1001 | 80.27 | 2014-05-12
1001 | 80.27 | 2014-04-13
1001 | 80.27 | 2014-03-15
1001 | 80.27 | 2014-02-14
Statements
AccountID | Balance | StatementDate
1001 | 0.00 | 2014-03-28
1001 | 1909.31 | 2014-02-25
I need to know the sum of PaidAmount (table Payments) in Payments table which is between the StatementDate (table Statements) of 2014-03-28 and 2014-02-25. The sum of the PaidAmount should have been 80.27 but I am getting 321.08. Can anyone tell me what I am doing wrong or how can I write the query in a better way?
here is what I have so far
create table #temp1
(
AccountID Numeric(9, 0)
, DateOfDeath date
, StatementDate date
, Balance numeric(17,2)
)
insert into #temp1
(
AccountID, DateOfDeath, StatementDate, Balance
)
select a.AccountID
,DateofDeath
,StatementDate
,Balance
from Accounts a
inner join Statements b on a.accountID = b.accountID
where StatementDate in (select top 1 statementdate
from Statements
where AccountID = a.AccountID
and StatementDate >= DateOfDeath
order by StatementDate)
Order By a.AccountID, StatementDate
create table #temp2
(
AccountId Numeric(9,0)
, PaidAmount Numeric(10, 2)
, PaymentDate date
)
select a.accountid, sum(a.Paidamount), max(a.PaymentDate)
from tblCreditDefenseInceptionToDateBenefit a
inner join #temp1 b on a.accountid = b.accountid
where a.paymentdate <= (select top 1 StatementDate from Statements
where AccountID = a.accountid
and statementdate >= b.dateofdeath
order by StatementDate desc)
and a.paymentdate > (select top 1 StatementDate from Statements
where AccountID = a.accountid
and statementdate < b.dateofdeath
order by StatementDate desc)
group by a.accountid
order by a.accountid desc
select * from #temp2
drop table #temp1
drop table #temp2
you can go about it a few ways
Create table #accounts
(AccountID int, Date_Death date)
insert into #accounts
(accountID, Date_death)
values
('1001', '03/10/2014')
Create Table #payments
(AccountID int, paidamt decimal(6,2), paymentdt date)
insert into #payments
(AccountID , paidamt, paymentdt)
values
('1001', '80.27','07/09/2014'),
('1001', '80.27','06/10/2014'),
('1001', '80.27','05/12/2014'),
('1001', '80.27','04/13/2014'),
('1001', '80.27','03/15/2014'),
('1001', '80.27','02/14/2014')
;
with cte as (
select
Accountid,
case when paymentdt between '02/25/2014'and '03/28/2014' then (paidamt) else null end as paidamt
from
#payments
)
Select
accountid,
SUM(paidamt)
from cte
group by
AccountID
or
put it in the where clause instead of doing a case statement, really depends onyour style
select
accountid,
sum(paidamt)paidamt
from
#payments
where paymentdate >= '02/25/2014'
and paymentdate <= '03/282014'
or
if you want to use the statement table dates as parameters
with cte as
(
select
a.AccountID,
case when a.paymentdt between b.min_dt and b.max_dt then a.paidamt else null end as 'pdamt'
from
#payments as a
inner join
(select accountid, MIN(statementdt)min_dt, MAX(statementdt)max_dt from #statement group by accountid) as b on b.accountid = a.AccountID
)
select
AccountID,
SUM(pdamt) as 'Paid Amount'
from
cte
group by
AccountID
again, could be added in where clase if you dontwant to do case staements
I have the following query in T-SQL:
SELECT dbo.table2.device,
dbo.table2.CREATETIME AS create_time,
CASE WHEN dbo.table1.ACTIONID = 1
THEN dbo.table1.startstop
END AS start_time,
CASE WHEN dbo.table1.ACTIONID = 2
THEN dbo.table1.startstop
END AS stop_time,
dbo.table2.collect_time
FROM dbo.table2
JOIN dbo.table1 ON dbo.table1.CREATETIME = dbo.table2.CREATETIME;
...which gives me a result table with several rows, each a duplicate with once the start- once the end- time (sql time - shortened for simplicity) - the other being NULL - e.g.:
device | create_time | start_time | stop_time | collect_time
1 | 0000001 | 0000001 | NULL | 0000001
1 | 0000001 | NULL | 0000002 | 0000001
I want to group these two rows (with create_time as ID) so I get them into one.... Thanks!
You can aggregate (SUM) these columns:
SELECT dbo.table2.device,
dbo.table2.CREATETIME AS create_time,
SUM(CASE WHEN dbo.table1.ACTIONID = 1
THEN dbo.table1.startstop ELSE 0
END) AS start_time,
SUM(CASE WHEN dbo.table1.ACTIONID = 2
THEN dbo.table1.startstop ELSE 0
END) AS stop_time,
dbo.table2.collect_time
FROM dbo.table2
JOIN dbo.table1 ON dbo.table1.CREATETIME = dbo.table2.CREATETIME
GROUP BY dbo.table2.device, dbo.table2.CREATETIME, dbo.table2.collect_time;
Also I suppose using subquery would work out as well
SELECT
X.Device
,sum(X.start_time)
,sum(X.stop_time)
,X.collect_time
from(
SELECT dbo.table2.device,
dbo.table2.CREATETIME AS create_time,
CASE WHEN dbo.table1.ACTIONID = 1
THEN dbo.table1.startstop
END AS start_time,
CASE WHEN dbo.table1.ACTIONID = 2
THEN dbo.table1.startstop
END AS stop_time,
dbo.table2.collect_time
FROM dbo.table2
JOIN dbo.table1 ON dbo.table1.CREATETIME = dbo.table2.CREATETIME) AS X
group by
X.Device, X.collect_time
I have a table like this:
a | user_id
----------+-------------
0.1133 | 2312882332
4.3293 | 7876123213
3.1133 | 2312332332
1.3293 | 7876543213
0.0033 | 2312222332
5.3293 | 5344343213
3.2133 | 4122331112
2.3293 | 9999942333
And I want to locate a particular row - 1.3293 | 7876543213 for example - and select the nearest 4 rows. 2 above, 2 below if possible.
Sort order is ORDER BY a ASC.
In this case I will get:
0.0033 | 2312222332
0.1133 | 2312882332
2.3293 | 9999942333
3.1133 | 2312332332
How can I achieve this using PostgreSQL? (BTW, I'm using PHP.)
P.S.: For the last or first row the nearest rows would be 4 above or 4 below.
Test case:
CREATE TEMP TABLE tbl(a float, user_id bigint);
INSERT INTO tbl VALUES
(0.1133, 2312882332)
,(4.3293, 7876123213)
,(3.1133, 2312332332)
,(1.3293, 7876543213)
,(0.0033, 2312222332)
,(5.3293, 5344343213)
,(3.2133, 4122331112)
,(2.3293, 9999942333);
Query:
WITH x AS (
SELECT a
,user_id
,row_number() OVER (ORDER BY a, user_id) AS rn
FROM tbl
), y AS (
SELECT rn, LEAST(rn - 3, (SELECT max(rn) - 5 FROM x)) AS min_rn
FROM x
WHERE (a, user_id) = (1.3293, 7876543213)
)
SELECT *
FROM x, y
WHERE x.rn > y.min_rn
AND x.rn <> y.rn
ORDER BY x.a, x.user_id
LIMIT 4;
Returns result as depicted in the question. Assuming that (a, user_id) is unique.
It is not clear whether a is supposed to unique. That's why I sort by user_id additionally to break ties. That's also why I use the window function row_number(), an not rank() for this. row_number() is the correct tool in any case. We want 4 rows. rank() would give an undefined number of rows if there were peers in the sort order.
This always returns 4 rows as long as there are at least 5 rows in the table. Close to first / last row, the first / last 4 rows are returned. The two rows before / after in all other cases. The criteria row itself is excluded.
Improved performance
This is an improved version of what #Tim Landscheidt posted. Vote for his answer if you like the idea with the index. Don't bother with small tables. But will boost performance for big tables - provided you have a fitting index in place. Best choice would be a multicolumn index on (a, user_id).
WITH params(_a, _user_id) AS (SELECT 5.3293, 5344343213) -- enter params once
,x AS (
(
SELECT a
,user_id
,row_number() OVER (ORDER BY a DESC, user_id DESC) AS rn
FROM tbl, params p
WHERE a < p._a
OR a = p._a AND user_id < p._user_id -- a is not defined unique
ORDER BY a DESC, user_id DESC
LIMIT 5 -- 4 + 1: including central row
)
UNION ALL -- UNION right away, trim one query level
(
SELECT a
,user_id
,row_number() OVER (ORDER BY a ASC, user_id ASC) AS rn
FROM tbl, params p
WHERE a > p._a
OR a = p._a AND user_id > p._user_id
ORDER BY a ASC, user_id ASC
LIMIT 5
)
)
, y AS (
SELECT a, user_id
FROM x, params p
WHERE (a, user_id) <> (p._a, p._user_id) -- exclude central row
ORDER BY rn -- no need to ORDER BY a
LIMIT 4
)
SELECT *
FROM y
ORDER BY a, user_id -- ORDER result as requested
Major differences to #Tim's version:
According to the question (a, user_id) form the search criteria, not just a. That changes window frame, ORDER BY and WHERE clause in subtly different ways.
UNION right away, no need for an extra query level. You need parenthesis around the two UNION-queries to allow for individual ORDER BY.
Sort result as requested. Requires another query level (at hardly any cost).
As parameters are used in multiple places I centralized the input in a leading CTE.
For repeated use you can wrap this query almost 'as is' into an SQL or plpgsql function.
And another one:
WITH prec_rows AS
(SELECT a,
user_id,
ROW_NUMBER() OVER (ORDER BY a DESC) AS rn
FROM tbl
WHERE a < 1.3293
ORDER BY a DESC LIMIT 4),
succ_rows AS
(SELECT a,
user_id,
ROW_NUMBER() OVER (ORDER BY a ASC) AS rn
FROM tbl
WHERE a > 1.3293
ORDER BY a ASC LIMIT 4)
SELECT a, user_id
FROM
(SELECT a,
user_id,
rn
FROM prec_rows
UNION ALL SELECT a,
user_id,
rn
FROM succ_rows) AS s
ORDER BY rn, a LIMIT 4;
AFAIR WITH will instantiate a memory table, so the focus of this solution is to limit its size as much as possible (in this case eight rows).
set search_path='tmp';
DROP TABLE lutser;
CREATE TABLE lutser
( val float
, num bigint
);
INSERT INTO lutser(val, num)
VALUES ( 0.1133 , 2312882332 )
,( 4.3293 , 7876123213 )
,( 3.1133 , 2312332332 )
,( 1.3293 , 7876543213 )
,( 0.0033 , 2312222332 )
,( 5.3293 , 5344343213 )
,( 3.2133 , 4122331112 )
,( 2.3293 , 9999942333 )
;
WITH ranked_lutsers AS (
SELECT val, num
,rank() OVER (ORDER BY val) AS rnk
FROM lutser
)
SELECT that.val, that.num
, (that.rnk-this.rnk) AS relrnk
FROM ranked_lutsers that
JOIN ranked_lutsers this ON (that.rnk BETWEEN this.rnk-2 AND this.rnk+2)
WHERE this.val = 1.3293
;
Results:
DROP TABLE
CREATE TABLE
INSERT 0 8
val | num | relrnk
--------+------------+--------
0.0033 | 2312222332 | -2
0.1133 | 2312882332 | -1
1.3293 | 7876543213 | 0
2.3293 | 9999942333 | 1
3.1133 | 2312332332 | 2
(5 rows)
As Erwin pointed out, the center row is not wanted in the output. Also, the row_number() should be used instead of rank().
WITH ranked_lutsers AS (
SELECT val, num
-- ,rank() OVER (ORDER BY val) AS rnk
, row_number() OVER (ORDER BY val, num) AS rnk
FROM lutser
) SELECT that.val, that.num
, (that.rnk-this.rnk) AS relrnk
FROM ranked_lutsers that
JOIN ranked_lutsers this ON (that.rnk BETWEEN this.rnk-2 AND this.rnk+2 )
WHERE this.val = 1.3293
AND that.rnk <> this.rnk
;
Result2:
val | num | relrnk
--------+------------+--------
0.0033 | 2312222332 | -2
0.1133 | 2312882332 | -1
2.3293 | 9999942333 | 1
3.1133 | 2312332332 | 2
(4 rows)
UPDATE2: to always select four, even if we are at the top or bottom of the list. This makes the query a bit uglier. (but not as ugly as Erwin's ;-)
WITH ranked_lutsers AS (
SELECT val, num
-- ,rank() OVER (ORDER BY val) AS rnk
, row_number() OVER (ORDER BY val, num) AS rnk
FROM lutser
) SELECT that.val, that.num
, ABS(that.rnk-this.rnk) AS srtrnk
, (that.rnk-this.rnk) AS relrnk
FROM ranked_lutsers that
JOIN ranked_lutsers this ON (that.rnk BETWEEN this.rnk-4 AND this.rnk+4 )
-- WHERE this.val = 1.3293
WHERE this.val = 0.1133
AND that.rnk <> this.rnk
ORDER BY srtrnk ASC
LIMIT 4
;
Output:
val | num | srtrnk | relrnk
--------+------------+--------+--------
0.0033 | 2312222332 | 1 | -1
1.3293 | 7876543213 | 1 | 1
2.3293 | 9999942333 | 2 | 2
3.1133 | 2312332332 | 3 | 3
(4 rows)
UPDATE: A version with a nested CTE (featuring outer join!!!). For conveniance, I added a primary key to the table, which sounds like a good idea anyway IMHO.
WITH distance AS (
WITH ranked_lutsers AS (
SELECT id
, row_number() OVER (ORDER BY val, num) AS rnk
FROM lutser
) SELECT l0.id AS one
,l1.id AS two
, ABS(l1.rnk-l0.rnk) AS dist
-- Warning: Cartesian product below
FROM ranked_lutsers l0
, ranked_lutsers l1 WHERE l0.id <> l1.id
)
SELECT lu.*
FROM lutser lu
JOIN distance di
ON lu.id = di.two
WHERE di.one= 1
ORDER by di.dist
LIMIT 4
;