How to factor this postgis query with subquery? - postgresql

I have a query to found POIs around trace :
SELECT
ptb.* AS pois
FROM traces tr, pois pta, pois ptb
WHERE tr.id = #{trace.id}
AND pta.id = #{poi.id}
AND ST_DWithin(
ST_LineSubstring(
tr.path,
ST_LineLocatePoint(tr.path, pta.lonlat::geometry) + (#{dist} * 1000) / ST_Length(tr.path, false),
ST_LineLocatePoint(tr.path, pta.lonlat::geometry) + (#{end_point} * 1000) / ST_Length(tr.path, false)
)::geography,
ptb.lonlat::geography,
4000)
How can I use subquery for :
ST_LineLocatePoint(tr.path, pta.lonlat::geometry)
ST_Length(tr.path, false)
EDIT :
WITH RECURSIVE locate_point_a AS (
select ST_LineLocatePoint(tr.path, pta.lonlat::geometry) AS locate_point_a
FROM traces tr, pois pta
WHERE tr.id = 2
AND pta.id = 2
)
SELECT
ptb.* AS pois
FROM traces tr, pois pta, pois ptb, locate_point_a
WHERE tr.id = 2
AND pta.id = 2
AND ST_DWithin(
ST_LineSubstring(
tr.path,
locate_point_a + (25 * 1000) / ST_Length(tr.path, false),
locate_point_a + (250 * 1000) / ST_Length(tr.path, false)
)::geography,
ptb.lonlat::geography,
4000)
SQL

This should do the same (but I do not understand the logic, TBH):
SELECT
ptb.* -- AS pois
FROM pois ptb
WHERE EXISTS (
SELECT *
FROM traces tr
JOIN pois pta ON ST_DWithin(
ST_LineSubstring( tr.path
, ST_LineLocatePoint(tr.path, pta.lonlat::geometry) + (#{dist} * 1000) / ST_Length(tr.path, false)
, ST_LineLocatePoint(tr.path, pta.lonlat::geometry) + (#{end_point} * 1000) / ST_Length(tr.path, false)
)::geography
, ptb.lonlat::geography
, 4000
)
WHERE tr.id = #{trace.id}
AND pta.id = #{poi.id}
;

Related

Postgres json_agg order by

I have multiple tables in a json_agg for psql:
SELECT json_agg(t) FROM (SELECT *,
( SELECT row_to_json(b) FROM ( SELECT * from (SELECT *, ( (3959 * acos( cos( radians(6.414478) ) * cos( radians( lat ) ) * cos( radians( lng ) - radians(12.466646) ) + sin( radians(6.414478) ) * sin( radians( lat ) ) ) ) * 1.609344 ) AS distance
from farm_location ) al WHERE farm_location_id=supply_forecast.farm_location_id and distance < 100 ) b) as farm_location,
( SELECT json_agg(c) FROM ( SELECT *
FROM supply_forecast_status_history WHERE supply_forecast_id=supply_forecast.supply_forecast_id) c) as supply_forecast_status
FROM supply_forecast WHERE delete = B'0' ORDER BY farm_location.distance desc) t;
I'm trying to calculate the distance from a farm location. The issue is I'm trying to sort the data by "distance" but it's throwing an error:
ERROR: missing FROM-clause entry for table "farm_location"
LINE 6: ...FROM supply_forecast WHERE delete = B'0' ORDER BY farm_locat...
^
********** Error **********
ERROR: missing FROM-clause entry for table "farm_location"
SQL state: 42P01
Character: 642
If I remove the ORDER BY farm_location.distance desc the query works but the data is not sorted by distance. Any idea on how to go about this?
json_agg function can support order by parameter.
SELECT json_agg(t.* ORDER BY distance desc) FROM
(SELECT *,
(SELECT row_to_json(b) FROM
(SELECT * from
(SELECT *
, ((3959 * acos( cos( radians(6.414478)) * cos( radians( lat )) * cos(radians( lng )
- radians(12.466646) ) + sin( radians(6.414478) ) * sin( radians( lat ) ) ) ) * 1.609344 ) as distance
from farm_location) al
where farm_location_id=supply_forecast.farm_location_id and distance < 100
) b
) as farm_location
, (select json_agg(c)
from (SELECT * FROM supply_forecast_status_history
WHERE supply_forecast_id=supply_forecast.supply_forecast_id) c
)as supply_forecast_status
FROM supply_forecast WHERE delete = B'0'
) t;
I apply ORDER BY inside function json_agg() and itÅ› work OK
select
jsonb_agg(jsonb_build_object('toma_parada', app51.app51_tomas__toma_parada)|| jsonb_build_object('lado', app51.app51_tomas__lado)|| jsonb_build_object('existen_tomas_paradas', app51.app51_tomas__existen_tomas_paradas)|| jsonb_build_object('b', app51.app51_tomas__b)|| jsonb_build_object('a', app51.app51_tomas__a)|| jsonb_build_object('m', app51.app51_tomas__m)|| jsonb_build_object('nombre', app51.app51_tomas__nombre, 'id_publico', app51.app51_tomas__id_publico , 'id', app51.app51_tomas__id )
order by app51.app51_tomas__id desc) as app51_tomas,
app51.fecha_modificacion,
app51.fecha_creacion,
app51.id_cliente,
app51.id_rumas,
app51.id_usuario,
app51.id,
app51.id_publico,
app51.habilitado,
app51.borrado,
app51.produccion_fin,
app51.produccion_inicio
from
(
select
distinct app51.id_cliente,
app51.id_rumas,
app51.id_usuario,
app51.id,
app51.id_publico,
app51.habilitado,
app51.borrado,
app51.atributos ->> 'produccion_fin' as produccion_fin,
app51.atributos ->> 'produccion_inicio' as produccion_inicio,
app51_tomas.atributos ->> 'toma_parada' as app51_tomas__toma_parada,
app51_tomas.atributos ->> 'lado' as app51_tomas__lado,
app51_tomas.atributos ->> 'existen_tomas_paradas' as app51_tomas__existen_tomas_paradas,
app51_tomas.atributos ->> 'b' as app51_tomas__b,
app51_tomas.atributos ->> 'a' as app51_tomas__a,
app51_tomas.atributos ->> 'm' as app51_tomas__m,
app51_tomas.atributos ->> 'nombre' as app51_tomas__nombre,
app51_tomas.id as app51_tomas__id ,
app51_tomas.id_publico as app51_tomas__id_publico ,
jsonb_build_object() as id_usuario_creacion,
jsonb_build_object() as id_usuario_modificacion,
(
select
TO_CHAR(app51.fecha_creacion::timestamptz at time zone 'GMT' , 'YYYY-MM-DD"T"HH24:MI:SS.ss0"Z"')) as fecha_creacion ,
(
select
TO_CHAR(app51.fecha_modificacion::timestamptz at time zone 'GMT' , 'YYYY-MM-DD"T"HH24:MI:SS.ss0"Z"')) as fecha_modificacion
from
app51
left join app51_app51_tomas on
app51_app51_tomas.id_app51 = app51.id
left join app51_tomas on
app51_app51_tomas.id_app51_tomas = app51_tomas.id
and ((app51_tomas.habilitado = true
and app51_tomas.borrado = false)
or app51_tomas is null )
where
(app51.habilitado = true
and app51.borrado = false)
order by
app51_tomas.id desc) as app51
group by
app51.fecha_modificacion,
app51.fecha_creacion,
app51.id_cliente,
app51.id_rumas,
app51.id_usuario,
app51.id,
app51.id_publico,
app51.habilitado,
app51.borrado,
app51.produccion_fin,
app51.produccion_inicio

Show Disk Space Used for all Tables - Azure SQL Data Warehouse

We are transitioning across to Azure SQL Data Warehouse - and an issue that's been highlighted is the need to change some smaller tables from Round-Robin / Hash-distributed to Replicated to improve performance.
MS Design Guidance (See Here) suggests one criteria for this decision is Tables that take up less than 2Gb Disk Space. i.e. these tables could be made into Replicated tables. They suggest using DBCC PDW_SHOWSPACEUSED to determine this.
I can run this against the whole DB, or one specific table, but i'd really like to get a list of all tables and the space used (preferably in MB) - but it's beyond me.
A lot of google searching either gives me the two basic commands I already know (against the whole DB / against 1 table) or give me SQL Server queries that don't run against Azure DW - e.g. using sys.allocation_units - which is not supported in Azure DW.
I was just directed to this Microsoft article that provides a pretty solid solution to this problem.
In particular, create a view:
CREATE VIEW dbo.vTableSizes
AS
WITH base
AS (
SELECT
GETDATE() AS [execution_time],
DB_NAME() AS [database_name],
s.name AS [schema_name],
t.name AS [table_name],
QUOTENAME(s.name) + '.' + QUOTENAME(t.name) AS [two_part_name],
nt.[name] AS [node_table_name],
ROW_NUMBER() OVER (PARTITION BY
nt.[name]
ORDER BY
(
SELECT
NULL
)
) AS [node_table_name_seq],
tp.[distribution_policy_desc] AS [distribution_policy_name],
c.[name] AS [distribution_column],
nt.[distribution_id] AS [distribution_id],
i.[type] AS [index_type],
i.[type_desc] AS [index_type_desc],
nt.[pdw_node_id] AS [pdw_node_id],
pn.[type] AS [pdw_node_type],
pn.[name] AS [pdw_node_name],
di.name AS [dist_name],
di.position AS [dist_position],
nps.[partition_number] AS [partition_nmbr],
nps.[reserved_page_count] AS [reserved_space_page_count],
nps.[reserved_page_count] - nps.[used_page_count] AS [unused_space_page_count],
nps.[in_row_data_page_count] + nps.[row_overflow_used_page_count] + nps.[lob_used_page_count] AS [data_space_page_count],
nps.[reserved_page_count] - (nps.[reserved_page_count] - nps.[used_page_count])
- ([in_row_data_page_count] + [row_overflow_used_page_count] + [lob_used_page_count]) AS [index_space_page_count],
nps.[row_count] AS [row_count]
FROM
sys.schemas s
INNER JOIN
sys.tables t
ON s.[schema_id] = t.[schema_id]
INNER JOIN
sys.indexes i
ON t.[object_id] = i.[object_id]
AND i.[index_id] <= 1
INNER JOIN
sys.pdw_table_distribution_properties tp
ON t.[object_id] = tp.[object_id]
INNER JOIN
sys.pdw_table_mappings tm
ON t.[object_id] = tm.[object_id]
INNER JOIN
sys.pdw_nodes_tables nt
ON tm.[physical_name] = nt.[name]
INNER JOIN
sys.dm_pdw_nodes pn
ON nt.[pdw_node_id] = pn.[pdw_node_id]
INNER JOIN
sys.pdw_distributions di
ON nt.[distribution_id] = di.[distribution_id]
INNER JOIN
sys.dm_pdw_nodes_db_partition_stats nps
ON nt.[object_id] = nps.[object_id]
AND nt.[pdw_node_id] = nps.[pdw_node_id]
AND nt.[distribution_id] = nps.[distribution_id]
AND i.[index_id] = nps.[index_id]
LEFT OUTER JOIN
(
SELECT
*
FROM
sys.pdw_column_distribution_properties
WHERE
distribution_ordinal = 1
) cdp
ON t.[object_id] = cdp.[object_id]
LEFT OUTER JOIN
sys.columns c
ON cdp.[object_id] = c.[object_id]
AND cdp.[column_id] = c.[column_id]
WHERE
pn.[type] = 'COMPUTE'),
size
AS ( SELECT
[execution_time],
[database_name],
[schema_name],
[table_name],
[two_part_name],
[node_table_name],
[node_table_name_seq],
[distribution_policy_name],
[distribution_column],
[distribution_id],
[index_type],
[index_type_desc],
[pdw_node_id],
[pdw_node_type],
[pdw_node_name],
[dist_name],
[dist_position],
[partition_nmbr],
[reserved_space_page_count],
[unused_space_page_count],
[data_space_page_count],
[index_space_page_count],
[row_count],
([reserved_space_page_count] * 8.0) AS [reserved_space_KB],
([reserved_space_page_count] * 8.0) / 1000 AS [reserved_space_MB],
([reserved_space_page_count] * 8.0) / 1000000 AS [reserved_space_GB],
([reserved_space_page_count] * 8.0) / 1000000000 AS [reserved_space_TB],
([unused_space_page_count] * 8.0) AS [unused_space_KB],
([unused_space_page_count] * 8.0) / 1000 AS [unused_space_MB],
([unused_space_page_count] * 8.0) / 1000000 AS [unused_space_GB],
([unused_space_page_count] * 8.0) / 1000000000 AS [unused_space_TB],
([data_space_page_count] * 8.0) AS [data_space_KB],
([data_space_page_count] * 8.0) / 1000 AS [data_space_MB],
([data_space_page_count] * 8.0) / 1000000 AS [data_space_GB],
([data_space_page_count] * 8.0) / 1000000000 AS [data_space_TB],
([index_space_page_count] * 8.0) AS [index_space_KB],
([index_space_page_count] * 8.0) / 1000 AS [index_space_MB],
([index_space_page_count] * 8.0) / 1000000 AS [index_space_GB],
([index_space_page_count] * 8.0) / 1000000000 AS [index_space_TB]
FROM
base)
SELECT
*
FROM
size;
Then, this "Table space summary" query provides a list of tables and how much space each is currently using (among other information):
SELECT
database_name,
schema_name,
table_name,
distribution_policy_name,
distribution_column,
index_type_desc,
COUNT(DISTINCT partition_nmbr) AS nbr_partitions,
SUM(row_count) AS table_row_count,
SUM(reserved_space_GB) AS table_reserved_space_GB,
SUM(data_space_GB) AS table_data_space_GB,
SUM(index_space_GB) AS table_index_space_GB,
SUM(unused_space_GB) AS table_unused_space_GB
FROM
dbo.vTableSizes
GROUP BY
database_name,
schema_name,
table_name,
distribution_policy_name,
distribution_column,
index_type_desc
ORDER BY
table_reserved_space_GB DESC;
Here's a sample execution against one of my databases showing the table names sorted by space used.

ELO Formula in SQL Query

This isn't my original query, it's one posted by a gentleman at http://elliot.land/. This is the script for the ELO Rating system. Problem with me is, I've tried and tried, but I don't know how to convert this POSTGRESQL (RECURSIVE) to TSQL (CTE). Anyone can help me with the conversion?
This is the data:
CREATE TABLE plays (
game_number INT NOT NULL, -- must be consecutive and start at 1!
p1name VARCHAR(255) NOT NULL, -- first player
p1score FLOAT NOT NULL, -- 0 or 1
p2name VARCHAR(255) NOT NULL, -- second player
p2score FLOAT NOT NULL -- 0 or 1
);
INSERT INTO plays VALUES
(1, 'Elliot', 0, 'Brendan', 1),
(2, 'Bob', 1, 'Brendan', 0),
(3, 'Bob', 1, 'Elliot', 0),
(4, 'Jane', 1, 'Bob', 0),
(5, 'Bob', 0, 'Brendan', 1),
(6, 'Jane', 1, 'Elliot', 0);
Here is the Formula where I need help with the conversion:
WITH RECURSIVE p(current_game_number) AS (
WITH players AS (
SELECT DISTINCT p1name AS player_name
FROM plays
UNION
SELECT DISTINCT p2name
FROM plays
)
SELECT
0 AS game_number,
player_name,
1000.0 :: FLOAT AS previous_elo,
1000.0 :: FLOAT AS new_elo
FROM players
UNION ALL
(
WITH previous_elos AS (
SELECT *
FROM p
)
SELECT
plays.game_number,
player_name,
previous_elos.new_elo AS previous_elo,
round(CASE WHEN player_name NOT IN (p1name, p2name)
THEN previous_elos.new_elo
WHEN player_name = p1name
THEN previous_elos.new_elo + 32.0 * (p1score - (r1 / (r1 + r2)))
ELSE previous_elos.new_elo + 32.0 * (p2score - (r2 / (r1 + r2))) END)
FROM plays
JOIN previous_elos
ON current_game_number = plays.game_number - 1
JOIN LATERAL (
SELECT
pow(10.0, (SELECT new_elo
FROM previous_elos
WHERE current_game_number = plays.game_number - 1 AND player_name = p1name) / 400.0) AS r1,
pow(10.0, (SELECT new_elo
FROM previous_elos
WHERE current_game_number = plays.game_number - 1 AND player_name = p2name) / 400.0) AS r2
) r
ON TRUE
)
)
SELECT
player_name,
(
SELECT new_elo
FROM p
WHERE t.player_name = p.player_name
ORDER BY current_game_number DESC
LIMIT 1
) AS elo,
count(CASE WHEN previous_elo < new_elo
THEN 1
ELSE NULL END) AS wins,
count(CASE WHEN previous_elo > new_elo
THEN 1
ELSE NULL END) AS losses
FROM
(
SELECT *
FROM p
WHERE previous_elo <> new_elo
ORDER BY current_game_number, player_name
) t
GROUP BY player_name
ORDER BY elo DESC;

PostgreSQL Where clause using calculated distance

I have a PostgreSQL query that looks like this:
SELECT *,
2 * 3961 * asin(sqrt((sin(radians((latitude - 40.2817993164062) / 2))) ^ 2 + cos(radians(40.2817993164062)) * cos(radians(latitude)) * (sin(radians((longitude - -111.720901489258) / 2))) ^ 2)) as distance,
(SELECT json_agg(deals.*) FROM deals WHERE vendors.id = deals.vendorid) as deals FROM vendors
WHERE ( category = 'Food' )
AND (distance < 80)
AND (nationwide IS FALSE OR nationwide is NULL)
ORDER BY featured ASC, created DESC, distance ASC
I'm getting the distance in miles using the second select part.
The problem is the part that says AND (distance < 80) I get the following error: column "distance" does not exist the weird thing is that if I remove the AND (distance < 80) it works and it also sorts correctly by distance, also the outputted data includes distance, so it's grabbing the distance correctly but for some reason wont let me use the distance as a filter in the WHERE clauses and I can't figure out why.
distance is just an alias. You could try something like:
WITH vendors_distance as (
SELECT *,
2 * 3961 * asin(sqrt((sin(radians((latitude - 40.2817993164062) / 2))) ^ 2 + cos(radians(40.2817993164062)) * cos(radians(latitude)) * (sin(radians((longitude - -111.720901489258) / 2))) ^ 2)) as distance
FROM vendors
WHERE ( category = 'Food' )
AND (nationwide IS FALSE OR nationwide is NULL)
)
SELECT vendors_distance.*,
(SELECT json_agg(deals.*) FROM deals WHERE vendors_distance.id = deals.vendorid) as deals
FROM vendors_distance
WHERE (distance < 80)
ORDER BY featured ASC, created DESC, distance ASC

Postgres DISTINCT Query issue

SELECT DISTINCT "Users"."id" , "Users".name,
"Users"."surname", "Users"."gender",
"Users"."dob", "Searches"."start_date"
FROM "Users"
LEFT JOIN "Searches" ON "Users"."id" = "Searches"."user_id"
WHERE (SQRT( POW(69.1 * ("Users"."latitude" - 45.465454), 2) + POW(69.1 * (9.186515999999983 - "Users"."longitude") * COS("Users"."latitude" / 57.3), 2))) < 20
AND "Users"."status" = true
AND "Users"."id" != 18
AND "Searches"."activity" = \'clubbing\'
AND "Users"."gender" = \'m\'
AND "Users"."age" BETWEEN 18 AND 30
ORDER BY ABS( "Searches"."start_date" - date \'2016-07-07\' )
For some reasons the above query returns the following error:
for SELECT DISTINCT, ORDER BY expressions must appear in select list
I only want to return unique users but I really don't know what's wrong with it.
Thanks for your help
Just doing what the error message says I would include the expression ABS( "Searches"."start_date" - date '2016-07-07' ) in the SELECT list. No need to change your query logic.
absdiffdate can be discarded later when processing the result.
SELECT DISTINCT "Users"."id" , "Users".name,
"Users"."surname", "Users"."gender",
"Users"."dob", "Searches"."start_date",
ABS( "Searches"."start_date" - date '2016-07-07' ) absdiffdate
FROM "Users"
LEFT JOIN "Searches" ON "Users"."id" = "Searches"."user_id"
WHERE (SQRT( POW(69.1 * ("Users"."latitude" - 45.465454), 2) + POW(69.1 * (9.186515999999983 - "Users"."longitude") * COS("Users"."latitude" / 57.3), 2))) < 20
AND "Users"."status" = true
AND "Users"."id" != 18
AND "Searches"."activity" = 'clubbing'
AND "Users"."gender" = 'm'
AND "Users"."age" BETWEEN 18 AND 30
ORDER BY ABS( "Searches"."start_date" - date '2016-07-07' )
Will this new column results in possibly more records when DISTINCT is applied?
I don't think so because you are subtracting a constant from start_date and for similar start_date corresponds similar outcome.
In Postgres, you can use DISTINCT ON to get one row per user id:
SELECT DISTINCT ON (u."id") u."id", u.name, u."surname", u."gender", u."dob", s."start_date"
FROM "Users" u LEFT JOIN
"Searches" s
ON u."id" = s."user_id"
WHERE (SQRT( POW(69.1 * (u."latitude" - 45.465454), 2) + POW(69.1 * (9.186515999999983 - u."longitude") * COS(u."latitude" / 57.3), 2))) < 20 AND
u."status" = true AND
u."id" != 18 AND "Searches"."activity" = \'clubbing\' AND
u."gender" = \'m\' AND
u."age" BETWEEN 18 AND 30
ORDER BY users.id, ABS(s."start_date" - date \'2016-07-07\' );
Notice how table aliases make the query easier to write and to read.