How to workaround unsupported percentile_cont in Postgres/Citus? - postgresql

I have a query similar to this:
select
coalesce(s.import_date, r.import_date) as import_date,
coalesce(s.bedrooms, r.bedrooms) as bedrooms,
coalesce(s.ptype, r.ptype) as property_type,
s.s_price,
s.s_transactions,
....
r.r_rent,
....
from
(
select
sc.import_date,
sc.bedrooms,
sc.ptype,
percentile_cont(array[0.25,0.5,0.75,0.9]) within group (order by sc.asking_price) filter(where sc.price > 0) as s_price,
sum(1) filter(where sc.sold_price > 0) as s_transactions,
......
from prices sc
where sc.ptype = 'F' and sc.bedrooms = 2 and st_Intersects('010300002.....'::geometry,sc.geom)
and sc.import_date between '2012-01-01' and '2019-01-01'
group by sc.import_date, sc.bedrooms, sc.property_type
) s
full join
(
select
rc.import_date,
rc.bedrooms,
rc.ptype,
percentile_cont(array[0.25,0.5,0.75,0.9]) within group (order by rc.rent) filter(where rc.rent > 0) as r_rent,
.....
from rents rc
where rc.ptype = 'F' and rc.bedrooms = 2 and st_Intersects('010300002....'::geometry,rc.geom)
and rc.import_date between '2012-01-01' and '2019-01-01'
group by rc.import_date, rc.bedrooms, rc.property_type
) r
on r.import_date = s.import_date;
When I run it against my distributed tables on Citus/Postgres-11 I get:
ERROR: unsupported aggregate function percentile_cont
Is there any way to workaround this limitation?

AFAIK there is no easy workaround for this.
You can always pull all the data to the coordinator and calculate percentiles there. It is not advisable to do this in the same query though.
SELECT percentile_cont(array[0.25,0.5,0.75,0.9]) within group (order by r.order_col)
FROM
(
SELECT order_col, ...
FROM rents
WHERE ...
) r
GROUP BY ...
This query will pull all the data returned by the inner subquery to the coordinator and calculate the percentiles in the coordinator.

Related

How to repeat some data points in query results?

I am trying to get the max date by account from 3 different tables and view those dates side by side. I created a separate query for each table, merged the results with UNION ALL, and then wrapped all that in a PIVOT.
The first 2 sections in the link/pic below show what I have been able to accomplish and the 3rd section is what I would like to do.
Query results by step
How can I get the results from 2 of the tables to repeat? Is that possible?
--define var_ent_type = 'ACOM'
--define var_ent_id = '52766'
--define var_dict_id = 113
SELECT
*
FROM
(
SELECT
E.ENTITY_TYPE,
E.ENTITY_ID,
'PERF_SUMMARY' as "TableName",
PS.DICTIONARY_ID,
to_char(MAX(PS.END_EFFECTIVE_DATE), 'YYYY-MM-DD') as "MaxDate"
FROM
RULESDBO.ENTITY E
INNER JOIN PERFORMDBO.PERF_SUMMARY PS ON (PS.ENTITY_ID = E.ENTITY_ID)
WHERE
1=1
-- AND E.ENTITY_TYPE = '&var_ent_type'
-- AND E.ENTITY_ID = '&var_ent_id'
AND PS.DICTIONARY_ID >= 100
AND (E.ACTIVE_STATUS <> 'N' )--and E.TERMINATION_DATE is null )
GROUP BY
E.ENTITY_TYPE,
E.ENTITY_ID,
'PERF_SUMMARY',
PS.DICTIONARY_ID
union all
SELECT
E.ENTITY_TYPE,
E.ENTITY_ID,
'POSITION' as "TableName",
0 as DICTIONARY_ID,
to_char(MAX(H.EFFECTIVE_DATE), 'YYYY-MM-DD') as "MaxDate"
FROM
RULESDBO.ENTITY E
INNER JOIN HOLDINGDBO.POSITION H ON (H.ENTITY_ID = E.ENTITY_ID)
WHERE
1=1
-- AND E.ENTITY_TYPE = '&var_ent_type'
-- AND E.ENTITY_ID = '&var_ent_id'
AND (E.ACTIVE_STATUS <> 'N' )--and E.TERMINATION_DATE is null )
GROUP BY
E.ENTITY_TYPE,
E.ENTITY_ID,
'POSITION',
1
union all
SELECT
E.ENTITY_TYPE,
E.ENTITY_ID,
'CASH_ACTIVITY' as "TableName",
0 as DICTIONARY_ID,
to_char(MAX(C.EFFECTIVE_DATE), 'YYYY-MM-DD') as "MaxDate"
FROM
RULESDBO.ENTITY E
INNER JOIN CASHDBO.CASH_ACTIVITY C ON (C.ENTITY_ID = E.ENTITY_ID)
WHERE
1=1
-- AND E.ENTITY_TYPE = '&var_ent_type'
-- AND E.ENTITY_ID = '&var_ent_id'
AND (E.ACTIVE_STATUS <> 'N' )--and E.TERMINATION_DATE is null )
GROUP BY
E.ENTITY_TYPE,
E.ENTITY_ID,
'CASH_ACTIVITY',
1
--ORDER BY
-- 2,3, 4
)
PIVOT
(
MAX("MaxDate")
FOR "TableName"
IN ('CASH_ACTIVITY', 'PERF_SUMMARY','POSITION')
)
Everything is possible. You only need a window function to make the value repeat across rows w/o data.
--Assuming current query is QC
With QC as (
...
)
select code, account, grouping,
--cash,
first_value(cash) over (partition by code, account order by grouping asc rows unbounded preceding) as cash_repeat,
perf,
--pos,
first_value(pos) over (partition by code, account order by grouping asc rows unbounded preceding) as pos_repeat
from QC
;
See first_value() help here: https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/FIRST_VALUE.html#GUID-D454EC3F-370C-4C64-9B11-33FCB10D95EC

SQL Server - Select with Group By together Raw_Number

I'm using SQL Server 2000 (80). So, it's not possible to use the LAG function.
I have a code a data set with four columns:
Purchase_Date
Facility_no
Seller_id
Sale_id
I need to identify missing Sale_ids. So every sale_id is a 100% sequential, so the should not be any gaps in order.
This code works for a specific date and store if specified. But i need to work on entire data set looping looping through every facility_id and every seller_id for ever purchase_date
declare #MAXCOUNT int
set #MAXCOUNT =
(
select MAX(Sale_Id)
from #table
where
Facility_no in (124) and
Purchase_date = '2/7/2020'
and Seller_id = 1
)
;WITH TRX_COUNT AS
(
SELECT 1 AS Number
union all
select Number + 1 from TRX_COUNT
where Number < #MAXCOUNT
)
select * from TRX_COUNT
where
Number NOT IN
(
select Sale_Id
from #table
where
Facility_no in (124)
and Purchase_Date = '2/7/2020'
and seller_id = 1
)
order by Number
OPTION (maxrecursion 0)
My Dataset
This column:
case when
Sale_Id=0 or 1=Sale_Id-LAG(Sale_Id) over (partition by Facility_no, Purchase_Date, Seller_id)
then 'OK' else 'Previous Missing' end
will tell you which Seller_Ids have some sale missing. If you want to go a step further and have exactly your desired output, then filter out and distinct the 'Previous Missing' ones, and join with a tally table on not exists.
Edit: OP mentions in comments they can't use LAG(). My suggestion, then, would be:
Make a temp table that that has the max(sale_id) group by facility/seller_id
Then you can get your missing results by this pseudocode query:
Select ...
from temptable t
inner join tally N on t.maxsale <=N.num
where not exists( select ... from sourcetable s where s.facility=t.facility and s.seller=t.seller and s.sale=N.num)
> because the only way to "construct" nonexisting combinations is to construct them all and just remove the existing ones.
This one worked out
; WITH cte_Rn AS (
SELECT *, ROW_NUMBER() OVER(PARTITION BY Facility_no, Purchase_Date, Seller_id ORDER BY Purchase_Date) AS [Rn_Num]
FROM (
SELECT
Facility_no,
Purchase_Date,
Seller_id,
Sale_id
FROM MyTable WITH (NOLOCK)
) a
)
, cte_Rn_0 as (
SELECT
Facility_no,
Purchase_Date,
Seller_id,
Sale_id,
-- [Rn_Num] AS 'Skipped Sale'
-- , case when Sale_id = 0 Then [Rn_Num] - 1 Else [Rn_Num] End AS 'Skipped Sale for 0'
, [Rn_Num] - 1 AS 'Skipped Sale for 0'
FROM cte_Rn a
)
SELECT
Facility_no,
Purchase_Date,
Seller_id,
Sale_id,
-- [Skipped Sale],
[Skipped Sale for 0]
FROM cte_Rn_0 a
WHERE NOT EXISTS
(
select * from cte_Rn_0 b
where b.Sale_id = a.[Skipped Sale for 0]
and a.Facility_no = b.Facility_no
and a.Purchase_Date = b.Purchase_Date
and a.Seller_id = b.Seller_id
)
--ORDER BY Purchase_Date ASC

Select not null column in full join postgresql

I have 3 tables:
with current_exclusive as(
select id_station, area_type,
count(*) as total_entries
from c1169.data_cashier
where id_station IN(2439,2441,2443,2445,2447,2449) and date >= '2017-10-30' and date <= '2017-12-30'
group by id_station, area_type
), current_table as(
select id_station, area_type,
sum(total_time) filter (where previous_status = 1) as total_time
from c1169.data_table
where id_station IN(2439,2441,2443,2445,2447,2449) and date >= '2017-10-30' and date < '2017-12-30'
group by id_station, area_type
), current_cashier as(
select id_station, area_type,
sum(1) as total_transactions
from c1169.data_cashier
where id_station IN(2439,2441,2443,2445,2447,2449) and date >= '2017-10-30' and date < '2017-12-30'
group by id_station, area_type
)
select *
from current_exclusive
full join current_table on current_exclusive.id_station = current_table.id_station and current_exclusive.area_type = current_table.area_type
full join current_cashier on current_exclusive.id_station = current_cashier.id_station and current_exclusive.area_type = current_cashier.area_type
and the result is:
but my expected result is:
Are there any way to select * and show the expected result? Because when I do full join then id_station and area_type can be null in some tables, so it very hard to choose which column is not null.
Like: select case id_station is not null then id_station else id_station1 end, but I have up to 10 tables so can not do in select case
Use USING, per the documentation:
USING ( join_column [, ...] )
A clause of the form USING ( a, b, ... ) is shorthand for ON left_table.a = right_table.a AND left_table.b = right_table.b .... Also, USING implies that only one of each pair of equivalent columns will be included in the join output, not both.
select *
from current_exclusive
full join current_table using (id_station, area_type)
full join current_cashier using (id_station, area_type)
You cannot accomplish anything if you insist on using select *, since you are getting the values from different tables.
The option you have is to include a COALESCE block which gives you the first non-null value from the list of columns.
So, you could use.
select COALESCE( current_exclusive.id_station, current_table.id_station, current_cashier.id_station ) as id_station ,
COALESCE( current_exclusive.area_type , current_table.area_type, current_cashier.area_type ) as area_type ,.....
...
from current_exclusive
full join current_table..
...

Use an Alias in Where Clause Subquery in Oracle

i need to show some field from another table in oracle here is my query
SELECT
ANGGARAN.SIMPEG_PEGAWAI.ID_PEGAWAI AS KODE,
ANGGARAN.SIMPEG_PEGAWAI.NAMA,
ANGGARAN.SIMPEG_PEGAWAI.NIP,
ANGGARAN.SIMPEG_ESELON_JABATAN.JABATAN,
ANGGARAN.SIMPEG_KODE_GOLONGAN_PANGKAT.GOLONGAN,
ANGGARAN.SIMPEG_KODE_GOLONGAN_PANGKAT.PANGKAT,
(SELECT *
FROM (SELECT CONCAT(TO_CHAR(abs(sysdate - TO_DATE(TMT_JABATAN))/360,'9,999,999.9'),' TAHUN')
FROM SIMPEG_JABATAN where ID_PEGAWAI=KODE ORDER BY TMT_JABATAN desc)
WHERE ROWNUM = 1) AS MASA_KERJA
FROM
ANGGARAN.SIMPEG_PEGAWAI
INNER JOIN ANGGARAN.SIMPEG_ESELON_JABATAN
ON ANGGARAN.SIMPEG_PEGAWAI.ESELON_JABATAN = ANGGARAN.SIMPEG_ESELON_JABATAN.ID_ESELON_JABATAN
INNER JOIN ANGGARAN.SIMPEG_KODE_GOLONGAN_PANGKAT
ON ANGGARAN.SIMPEG_PEGAWAI.PANGKAT = ANGGARAN.SIMPEG_KODE_GOLONGAN_PANGKAT.ID_GOLONGAN_PANGKAT
WHERE
ANGGARAN.SIMPEG_PEGAWAI.ST_AKTIF = 1 AND
ANGGARAN.SIMPEG_PEGAWAI.ESELON2 <> 1 AND
ANGGARAN.SIMPEG_PEGAWAI.PANGKAT >= 12 AND
ANGGARAN.SIMPEG_ESELON_JABATAN.STATUS = 1 AND
ANGGARAN.SIMPEG_ESELON_JABATAN.ID_ESELON2=2
ORDER BY
ANGGARAN.SIMPEG_KODE_GOLONGAN_PANGKAT.SORT DESC
result i got
[Err] ORA-00904: "KODE": invalid identifier
the KODE come from query ANGGARAN.SIMPEG_PEGAWAI.ID_PEGAWAI AS KODE, and used for this query
(SELECT *
FROM (SELECT CONCAT(TO_CHAR(abs(sysdate - TO_DATE(TMT_JABATAN))/360,'9,999,999.9'),' TAHUN')
FROM SIMPEG_JABATAN where ID_PEGAWAI=KODE ORDER BY TMT_JABATAN desc)
WHERE ROWNUM = 1) AS MASA_KERJA
that i miss something ? or that could be worogn using an alias in subquery where clause in oracle database ?
You can use an identifier defined in an external query in only one level deep queries. You have to rethink your strategy. My suggestion is to remove the subquery from the select list and put it in the FROM clause. And add another rownumber column like this:
(SELECT
ID_PEGAWAI,
CONCAT(TO_CHAR(abs(sysdate - TO_DATE(TMT_JABATAN))/360,'9,999,999.9'),' TAHUN') MASA_KERJA,
ROW_NUMBER() OVER (PARTITION BY ID_PEGAWAI ORDER BY TMT_JABATAN DESC) rownumber
FROM SIMPEG_JABATAN) xxx
And join like:
ON ANGGARAN.SIMPEG_PEGAWAI = xxx.ID_PEGAWAI
Then in the where clause you can do simply:
WHERE
....
AND xxx.rownumber = 1
Complete query:
SELECT
ANGGARAN.SIMPEG_PEGAWAI.ID_PEGAWAI AS KODE,
ANGGARAN.SIMPEG_PEGAWAI.NAMA,
ANGGARAN.SIMPEG_PEGAWAI.NIP,
ANGGARAN.SIMPEG_ESELON_JABATAN.JABATAN,
ANGGARAN.SIMPEG_KODE_GOLONGAN_PANGKAT.GOLONGAN,
ANGGARAN.SIMPEG_KODE_GOLONGAN_PANGKAT.PANGKAT
FROM
ANGGARAN.SIMPEG_PEGAWAI
INNER JOIN ANGGARAN.SIMPEG_ESELON_JABATAN
ON ANGGARAN.SIMPEG_PEGAWAI.ESELON_JABATAN = ANGGARAN.SIMPEG_ESELON_JABATAN.ID_ESELON_JABATAN
INNER JOIN ANGGARAN.SIMPEG_KODE_GOLONGAN_PANGKAT
ON ANGGARAN.SIMPEG_PEGAWAI.PANGKAT = ANGGARAN.SIMPEG_KODE_GOLONGAN_PANGKAT.ID_GOLONGAN_PANGKAT
INNER JOIN (
SELECT
ID_PEGAWAI,
CONCAT(TO_CHAR(abs(sysdate - TO_DATE(TMT_JABATAN))/360,'9,999,999.9'),' TAHUN') MASA_KERJA,
ROW_NUMBER() OVER (PARTITION BY ID_PEGAWAI ORDER BY TMT_JABATAN DESC) rownumber
FROM SIMPEG_JABATAN
) xxx
ON ANGGARAN.SIMPEG_PEGAWAI.ID_PEGAWAI = xxx.ID_PEGAWAI
WHERE
ANGGARAN.SIMPEG_PEGAWAI.ST_AKTIF = 1 AND
ANGGARAN.SIMPEG_PEGAWAI.ESELON2 <> 1 AND
ANGGARAN.SIMPEG_PEGAWAI.PANGKAT >= 12 AND
ANGGARAN.SIMPEG_ESELON_JABATAN.STATUS = 1 AND
ANGGARAN.SIMPEG_ESELON_JABATAN.ID_ESELON2=2 AND
xxx.rownumber = 1
ORDER BY ANGGARAN.SIMPEG_KODE_GOLONGAN_PANGKAT.SORT DESC
Oracle does not support columns aliases in WHERE clauses (or similar situations like here). You have to name the column again (by its original name).
select dummy as kode from dual where kode = 'X'
> ORA-00904: "KODE": invalid identifier
You need to assign an alias in the level below to use it in a query (I haven't checked the syntax and workability of your query, just changed the part which is essential to answer your question):
SELECT
TMP.KODE,
TMP.NAMA,
TMP.NIP,
ANGGARAN.SIMPEG_ESELON_JABATAN.JABATAN,
ANGGARAN.SIMPEG_KODE_GOLONGAN_PANGKAT.GOLONGAN,
ANGGARAN.SIMPEG_KODE_GOLONGAN_PANGKAT.PANGKAT,
(SELECT *
FROM (SELECT CONCAT(TO_CHAR(abs(sysdate - TO_DATE(TMT_JABATAN))/360,'9,999,999.9'),' TAHUN')
FROM SIMPEG_JABATAN where ID_PEGAWAI=TMP.KODE ORDER BY TMT_JABATAN desc)
WHERE ROWNUM = 1) AS MASA_KERJA
FROM
(SELECT ANGGARAN.SIMPEG_PEGAWAI.ID_PEGAWAI AS KODE, ANGGARAN.SIMPEG_PEGAWAI.* FROM ANGGARAN.SIMPEG_PEGAWAI) TMP
INNER JOIN ANGGARAN.SIMPEG_ESELON_JABATAN
ON TMP.ESELON_JABATAN = ANGGARAN.SIMPEG_ESELON_JABATAN.ID_ESELON_JABATAN
INNER JOIN ANGGARAN.SIMPEG_KODE_GOLONGAN_PANGKAT
ON TMP.PANGKAT = ANGGARAN.SIMPEG_KODE_GOLONGAN_PANGKAT.ID_GOLONGAN_PANGKAT
WHERE
TMP.ST_AKTIF = 1 AND
TMP.ESELON2 <> 1 AND
TMP.PANGKAT >= 12 AND
ANGGARAN.SIMPEG_ESELON_JABATAN.STATUS = 1 AND
ANGGARAN.SIMPEG_ESELON_JABATAN.ID_ESELON2=2
ORDER BY
ANGGARAN.SIMPEG_KODE_GOLONGAN_PANGKAT.SORT DESC

TSQL Compare 2 select's result and return result with most recent date

Wonder if someone could give me a quick hand. I have 2 select queries (as shown below) and I want to compare the results of both and only return the result that has the most recent date.
So say I have the following 2 results from the queries:-
--------- ---------- ----------------------- --------------- ------ --
COMPANY A EMPLOYEE A 2007-10-16 17:10:21.000 E-mail 6D29D6D5 SYSTEM 1
COMPANY A EMPLOYEE A 2007-10-15 17:10:21.000 E-mail 6D29D6D5 SYSTEM 1
I only want to return the result with the latest date (so the first one). I thought about putting the results into a temporary table and then querying that but just wondering if there's a simpler, more efficient way?
SELECT * FROM (
SELECT fc.accountidname, fc.owneridname, fap.actualend, fap.activitytypecodename, fap.createdby, fap.createdbyname,
ROW_NUMBER() OVER (PARTITION BY fc.accountidname ORDER BY fap.actualend DESC) AS RN
FROM FilteredContact fc
INNER JOIN FilteredActivityPointer fap ON fc.parentcustomerid = fap.regardingobjectid
WHERE fc.statecodename = 'Active'
AND fap.ownerid = '0F995BDC'
AND fap.createdon < getdate()
) tmp WHERE RN = 1
SELECT * FROM (
SELECT fa.name, fa.owneridname, fa.new_technicalaccountmanageridname, fa.new_customerid, fa.new_riskstatusname,
fa.new_numberofopencases, fa.new_numberofurgentopencases, fap.actualend, fap.activitytypecodename, fap.createdby, fap.createdbyname,
ROW_NUMBER() OVER (PARTITION BY fa.name ORDER BY fap.actualend DESC) AS RN
FROM FilteredAccount fa
INNER JOIN FilteredActivityPointer fap ON fa.accountid = fap.regardingobjectid
WHERE fa.statecodename = 'Active'
AND fap.ownerid = '0F995BDC'
AND fap.createdon < getdate()
) tmp2 WHERE RN = 1
if the tables have the same structure (column count and column types to match), then you could just union the results of the two queries, then order by the date desc and then select the top 1.
select top 1 * from
(
-- your first query
union all
-- your second query.
) T
order by YourDateColumn1 desc
You should GROUP BY and use MAX(createdon)