Postgres distinct union only for specific columns - postgresql

I have two sets of data, one of which is dynamically generated.
If I leave off the column state it works perfectly as that column doesn't really exist, my question is how can I ignore a column for the UNION so that it combines the two datasets (as it is it's the same as UNION ALL). eg I prefer the first table and want any rows from the second dataset ignored if they exist in the first one.
SELECT event_id, start_at, state
FROM event_logs
WHERE start_at BETWEEN current_date AND current_date + interval '3 weeks'
UNION
SELECT id event_id,
GENERATE_SERIES(date_trunc('week', current_date)::date + (extract(isodow from start_at)::int - 1) + start_at::time, current_date + interval '3 weeks', '1 week'::INTERVAL) AS start_at,
'draft' AS state
FROM events
Update, also tried:
WITH future_logs AS (
SELECT id event_id,
GENERATE_SERIES(date_trunc('week', current_date)::date + (extract(isodow from start_at)::int - 1) + start_at::time, current_date + interval '3 weeks', '1 week'::INTERVAL) AS start_at,
'draft' AS state
FROM events)
SELECT future_logs.event_id, future_logs.start_at, future_logs.state
FROM future_logs
LEFT JOIN event_logs ON future_logs.event_id = event_logs.event_id AND future_logs.start_at = event_logs.start_at
WHERE event_logs.start_at BETWEEN current_date AND current_date + interval '3 weeks'
But got too few results 77 vs ~1000 expected.

Just add NOT EXISTS() to the second leg, and you can use UNION ALL to avoid sort/merging.
SELECT event_id, start_at, state
FROM event_logs
WHERE start_at BETWEEN current_date AND current_date + interval '3 weeks'
UNION ALL
SELECT id AS event_id
, generate_series(date_trunc('week', current_date)::date + (extract(isodow from start_at)::int - 1) + start_at::time
, current_date + interval '3 weeks'
, '1 week'::INTERVAL) AS start_at
, 'draft' AS state
FROM events ev
WHERE NOT EXISTS ( SELECT*
FROM event_logs nx
WHERE nx.event_id =ev.id
AND nx.start_at BETWEEN current_date AND current_date + interval '3 weeks' )
;

select DISTINCT ON (date_day) date_day, state from(
SELECT day::date as date_day, null as state
FROM generate_series(now()- interval '2 week'
, now()
, interval '1 day') day
UNION ALL
select distinct
date_trunc('day',e.updated_at) as date_day,
max(des.state) over (partition by date_trunc('day',des.updated_at)) as state
from device_event as des where e.id=49 and e.updated_at >= now() - interval '2 week'
) dba order by 1

I would add one other column taborder into your UNION query to ensure simple ordering of the rows and use window function row_number() over(...) in following way:
SELECT
event_id,
start_at,
state
FROM (
SELECT
event_id,
start_at,
state,
row_number(*) OVER (PARTITION BY event_id, start_at ORDER BY taborder) AS rownum
FROM (
SELECT
event_id,
start_at,
state,
1 AS taborder
FROM original_table
UNION
SELECT
event_id,
start_at,
state,
2 AS taborder
FROM draft_table
) src0
) src1
WHERE rownum = 1
ORDER BY 1, 2, 3

Related

Dynamic value passing in Postgres

Here is a complex query where i need to pass some dates as dynamic to this, As of now i have hardcoded this '2021-08-01' AND '2022-07-31' these 2 dates.
But i have to pass this dates dynamically in such a way that next dates ie, 2022-06 month , thew dates passed will be '2021-07-01' and '2022-06-30' , basically 12 months behind data.
if we take 2022-05 then the passed date should be '2021-06-01' and '2022-05-31'.
How can we achieve this ? Any suggestions or help will be much appreciated.
below is the query for reference
WITH base as
(
SELECT created_at as period ,order_number, TRIM(email) as email ,is_first_order
FROM orders
WHERE created_at::DATE BETWEEN '2021-08-01' AND '2022-07-31'
)
,base_agg as
(
select TO_CHAR(period,'YYYY-MM') as period
,COUNT(DISTINCT email)FILTER(WHERE is_first_order IS TRUE) as new_users
,COUNT(DISTINCT order_number)FILTER(WHERE is_first_order IS FALSE) as returning_orders
FROM base
GROUP BY 1
)
,base_cumulative as
(
SELECT ROW_NUMBER() OVER(ORDER BY PERIOD DESC ) as rno
,period
,new_users
,returning_orders
,sum("new_users")over (order by "period" asc rows between unbounded preceding and current row) as "cumulative_total"
from base_agg
)
SELECT
(SELECT period FROM base_cumulative WHERE rno=1) period
,(SELECT cumulative_total FROM base_cumulative WHERE rno=1) as cumulated_customers
,SUM(returning_orders) as returning_orders
,SUM(returning_orders)/NULLIF((SELECT cumulative_total FROM base_cumulative WHERE rno=1),0) as rate
FROM base_cumulative
You can calculate the end of current month based on NOW() and some logic, the same can be applied with the rest of the calculation
select date_trunc('month', now())::date + interval '1 month - 1 day' end_of_this_month,
date_trunc('month', now())::date + interval '1 month - 1 day'::interval - '1 year'::interval + '1 day'::interval first_day_of_prev_year_month
;
Result
end_of_this_month | first_day_of_prev_year_month
---------------------+------------------------------
2022-08-31 00:00:00 | 2021-09-01 00:00:00
(1 row)

Speed up postgres read window query on overlapping date ranges

I have a table (simplified) that contains readings like follows
meter_id read_date value
1 2017-01-01 10
1 2017-01-15 15
1 2017-02-05 20
1 2017-04-15 22
2 2016-12-14 120
2 2016-03-02 200
This table contains millions of readings.
And I have a view (or query) that goes something like
select meter_id, read_date as start_read_date, value as start_value,
CASE
WHEN lead(read_date) OVER read_wdw IS NULL THEN date_trunc('month'::text, read_date + '1 day'::interval) + '1 mon'::interval - '1 day'::interval) + '1 mon'::interval - '1 day'::interval
ELSE lead(.read_date) OVER read_wdw::date
END::date AS read_end_date,
lead(value) OVER read_wdw AS end_value,
from reads_table
WINDOW read_wdw AS (PARTITION BY meter_id ORDER BY read_date);
I need to be able to query dates within a certain month. So start_read_date, end_read_date between e.g. '2017-01-01' and '2017-01-31'
So e.g.
select * from my_view where daterange(start_read_date,end_read_date, '[]') && daterange('2017-01-01', '2017-01-31', '[])
Which with the above table would return
meter_id start_read_date start_value end_read_date end_value
1 2017-01-01 10 2017-01-15 15
1 2017-01-15 15 2017-02-05 20
2 2016-12-14 120 2016-03-02 200
Is there a way to do a similar query on this table without having to build the whole view first to get my desired result?
Something like (which doesn't work)
select meter_id, read_date as start_read_date, value as start_value,
CASE
WHEN lead(read_date) OVER read_wdw IS NULL THEN date_trunc('month'::text, read_date + '1 day'::interval) + '1 mon'::interval - '1 day'::interval) + '1 mon'::interval - '1 day'::interval
ELSE lead(.read_date) OVER read_wdw::date
END::date AS read_end_date,
lead(value) OVER read_wdw AS end_value,
from reads_table
where read_date between '2017-01-01' and '2017-01-31'
or lead(read_date) over read_window between '2017-01-01' and '2017-01-31'
WINDOW read_wdw AS (PARTITION BY meter_id ORDER BY read_date);
Actually wrapping it in another select seems to resolve...
select * from (
select meter_id, read_date as start_read_date, value as start_value,
CASE
WHEN lead(read_date) OVER read_wdw IS NULL THEN date_trunc('month'::text, read_date + '1 day'::interval) + '1 mon'::interval - '1 day'::interval) + '1 mon'::interval - '1 day'::interval
ELSE lead(.read_date) OVER read_wdw::date
END::date AS read_end_date,
lead(value) OVER read_wdw AS end_value,
from reads_table
WINDOW read_wdw AS (PARTITION BY meter_id ORDER BY read_date)
)sub
where read_start_date between ...
or read_end_date between ...

Generating series Postgres

I want to be able to generate groups of row by days, weeks, month or depending on the interval I set
Following this solution, it works when granularity is by month. But trying the interval of 1 week, no records are being returned.
This is the rows on my table
This is the current query I have for per month interval, which works perfectly.
SELECT *
FROM (
SELECT day::date
FROM generate_series(timestamp '2018-09-01'
, timestamp '2018-12-01'
, interval '1 month') day
) d
LEFT JOIN (
SELECT date_trunc('month', created_date)::date AS day
, SUM(escrow_amount) AS profit, sum(total_amount) as revenue
FROM (
select distinct on (order_id) order_id, escrow_amount, total_amount, create_time from order_item
WHERE created_date >= date '2018-09-01'
AND created_date <= date '2018-12-01'
-- AND ... more conditions
) t2 GROUP BY 1
) t USING (day)
ORDER BY day;
Result from this query
And this is the per week interval query. I will reduce the range to two months for brevity.
SELECT *
FROM (
SELECT day::date
FROM generate_series(timestamp '2018-09-01'
, timestamp '2018-11-01'
, interval '1 week') day
) d
LEFT JOIN (
SELECT date_trunc('week', created_date)::date AS day
, SUM(escrow_amount) AS profit, sum(total_amount) as revenue
FROM (
select distinct on (order_id) order_id, escrow_amount, total_amount, create_time from order_item
WHERE created_date >= date '2018-09-01'
AND created_date <= date '2018-11-01'
-- AND ... more conditions
) t2 GROUP BY 1
) t USING (day)
ORDER BY day;
Take note that I have records from October, but the result here doesn't show anything for October dates.
Any idea what I am missing here?
Results from your first query are not truncated to the begin of the week.
date_trunc('2018-09-01'::date, 'week')::date
is equal to
'2018-08-27'::date
so your join using day is not working
'2018-09-01'::date <> '2018-08-27'::date
Your query should look more like that:
SELECT *
FROM (
SELECT day::date
FROM generate_series(date_trunc('week',timestamp '2018-09-01') --series begin trunc
, timestamp '2018-11-01'
, interval '1 week') day
) d
LEFT JOIN (
SELECT date_trunc('week', created_date::date)::date AS day
, SUM(escrow_amount) AS profit, sum(total_amount) as revenue
FROM (
select distinct on (order_id) order_id, escrow_amount, total_amount, create_time from order_item
WHERE created_date::date >= date '2018-09-01'
AND created_date::date <= date '2018-11-01'
-- AND ... more conditions
) t2 GROUP BY 1
) t USING (day)
WHERE day >= '2018-09-01' --to skip days from begining of the week to the begining of the series before trunc
ORDER BY day;

Unify select sql. Postgres

I can unify the two select below in a single, where in the first column return the result of the first and second column the result of the second.
select count(*) from rrhh.empleado where fecha_contratado > current_date - interval '100 days'; // select1
select count(*) from rrhh.empleado where fecha_fin_contrato > current_date - interval '100 days'; //select2
Thank you
try:
with a as (
select
case when fecha_contratado > current_date - interval '100 days' then 1
else 0 end q1
, case when fecha_fin_contrato > current_date - interval '100 days' then 1
else 0 end q2
from rrhh.empleado
)
select sum(q1), sum(q2)
from a
;
This is a typical case for conditional aggregation:
select count(*) filter (where fecha_contratado > current_date - interval '100 days'),
count(*) filter (where fecha_fin_contrato > current_date - interval '100 days')
from rrhh.empleado
You can use the CASE expression (and the fact that most aggregates does not use NULL values) for versions earlier than 9.4:
select count(case when fecha_contratado > current_date - interval '100 days' then 1 end),
count(case when fecha_fin_contrato > current_date - interval '100 days' then 1 end)
from rrhh.empleado
Note: these queries will scan the whole table, while your original queries could make use of indexes on fecha_contratado and fecha_fin_contrato. If performance matters to you, you could append a filter to these queries too:
where least(fecha_contratado, fecha_fin_contrato) > current_date - interval '100 days'
and you could index the expression: least(fecha_contratado, fecha_fin_contrato).

Compare current data with data of the a year ago, the same date (Postgres)

I am trying to return the data of the orders of the current date and the orders of the same date a year ago.
My idea was to create two similar tables and merge the date by adding WHERE clauses. But it seems to not work.
Could you have a look at my code and see if you identify something wrong?
My outcome of this is totally blank.
Thanks a lot!
WITH
orders_channels AS
(SELECT
'BLH' AS brand,
date_trunc('week', date)::date AS date,
channel,
order_type,
case when (date_trunc('week', date)::date = current_date - interval '1 day') then 'current'
else 'previous' end
as week_type,
sum(orders) AS orders
FROM
de_data.orders_daily_channel_attribution_dashboard
WHERE
date > date_trunc('day', current_date) - interval '1 day'
GROUP BY 1,2,3,4),
wow_orders_channels AS
(SELECT
'BLH' AS brand,
date_trunc('week', date)::date AS date,
channel,
order_type,
case when (date_trunc('week', date)::date = current_date - interval '1 day') then 'current'
else 'previous' end
as week_type,
sum(orders) AS orders
FROM
de_data.orders_daily_channel_attribution_dashboard
WHERE
date >= date_trunc('week', current_date) - INTERVAL '1 year'
GROUP BY 1,2,3,4)
SELECT
*
FROM
(SELECT
o.brand,
date_trunc('week', o.date)::date as week,
'SEO_ACQ' AS name,
o.orders,
wow.orders as wow_orders
FROM
orders_channels o
join wow_orders_channels wow on wow.date = o.date - interval '1 year' and o.order_type = wow.order_type
where
o.channel = 'SEO'
AND o.order_type = 'ACQUISITION'
UNION ALL
SELECT
o.brand,
date_trunc('week', o.date) as week,
'CRM_ORDERS' AS name,
SUM(o.orders),
sum(wow.orders) as wow_orders
FROM
orders_channels o
join wow_orders_channels wow on wow.date = o.date - interval '1 year' and o.order_type = wow.order_type
WHERE
o.channel = 'CRM'
GROUP BY 1,2,3) x
ORDER BY 3,2