Postgresql select date range between two tables - postgresql

I have two tables that have date fields in them. I want to select data from table 1 where the date is +/- 1 day from any date in table 2.

try something like this :
select * from table1,table2
where table1.date BETWEEN (table2.date - '1 day'::interval)
AND (table2.date + '1 day'::interval)
and ...

If only +/- 1 day, you could use a workaround like this:
select col1, col2, ...
from table1
where date_col in (select distinct date_col
from table2
union all
select distinct (date_col - '1 day'::interval)
from table2
union all
select distinct (date_col + '1 day'::interval)
from table2
);
This has quite good peformance because the subquery only be calculated one time and will be cache for comparing

Related

Create additional columns based on other column values in PostgreSQL

I have following data in a PostgreSQL table:
trial start_date end_date
1 20_12_2001 20_01_2005
The expected output is below:
trial start_date end_date Date[(start_end_date)] marker_start_end
1 20_12_2001 20_01_2005 20_12_2001 start
1 20_12_2001 20_01_2005 20_01_2005 end
Is there a way to calculate the additional two columns (Date[(start_end_date)], marker_start_end) without join, but a CASE expression
You can use a lateral join to turn two columns into two rows:
select *
from the_table t
cross join lateral (
values (t.start_date, 'start'), (t.end_date, 'end')
) as x(start_end_date, marker);
The UNION ALL solution might be faster though.
UNION ALL
select trial, start_date, end_date, start_date as date, 'start' marker_start_end from table1
union all
select trial, start_date, end_date, end_date as date, 'end' marker_start_end from table1
UNNEST with CASE
select trial, start_date, end_date,
case when a.num = 1 then start_date else end_date end date,
case when a.num = 1 then 'start' else 'end' end marker_start_end from
(
select trial, start_date, end_date,
unnest(array[1,2]) num from table1
) a
Hidden JOIN (but still join)
select
trial,
start_date,
end_date,
case when a.num = 1 then start_date else end_date end date,
marker_start_end
from table1, (values(1,'start'),(2, 'end')) a(num,marker_start_end)
Db fiddle

Use min function without grouping

How can I retrieve the min of a date without group by?
declare #table table
(
SaleDate date
)
insert into #table
select '7/8/2021' union
select '7/21/2021'
declare #dimdate table
(
fulldate date,
WeekNumberOfYear int
)
insert into #dimdate
select '7/4/2021', 28 union
select '7/5/2021', 28 union
select '7/6/2021', 28 union
select '7/7/2021', 28 union
select '7/8/2021', 28 union
select '7/9/2021', 28 union
select '7/10/2021', 28 union
select '7/11/2021', 29 union
select '7/18/2021', 30 union
select '7/19/2021', 30 union
select '7/20/2021', 30 union
select '7/21/2021', 30 union
select '7/22/2021', 30 union
select '7/23/2021', 30 union
select '7/24/2021', 30
select datepart(week, saledate) 'wk',
min(fulldate) as 'Beginning_Week'
from #table t inner join #dimdate d on
datepart(week, saledate) = WeekNumberOfYear
group by datepart(week, saledate), WeekNumberOfYear
How can I retrieve the same result as above without a group by?
Do you mean something like this?
select dt,
(select min(WeekNumberOfYear) from DimDate) as minWeekNumberOfYear
from #table
Just use a windowed aggregate:
SELECT dt,
MIN(dt) OVER () AS MinDt
FROM #table;
You can use MIN() window function if you partition by WeekNumberOfYear and use DISTINCT in the SELECT statement so that there are no duplicates:
SELECT DISTINCT
d.WeekNumberOfYear wk,
MIN(fulldate) OVER (PARTITION BY d.WeekNumberOfYear) Beginning_Week
FROM #table t INNER JOIN #dimdate d
ON DATEPART(week, t.saledate) = d.WeekNumberOfYear;
See the demo.

PostgreSQL Time Dimension (By Hours and Days) Error

I am am building a Time Dimension table in PostgreSQL with DATE_ID and DATE_DESC.
My T-SQL (works perfectly) script is:
set DATEFIRST 1
;WITH DATES AS (
SELECT CAST('2019-01-01 00:00:00.000' AS datetime) AS [DATE]
UNION ALL
SELECT DATEADD(HH,1,[DATE])
FROM DATES
WHERE DATEADD(HH,1,[DATE]) <= CAST('2019-12-31' AS datetime)
)
SELECT
DATE_ID, DATE_DESC
from
(
SELECT
CONVERT(int, CONVERT(char(8), DATE, 112)) AS DATE_ID,
DATE AS DATE_DESC
FROM
DATES)a
order by 1
OPTION (MAXRECURSION 0)
At the moment Im trying to convert this code to PostgreSQL readable one and it does not work..
Here is mine at the moment:
set EXTRACT(DOW FROM TIMESTAMP '2019-01-01 00:00:00.000')+1
;WITH DATES AS (
SELECT CAST('2019-01-01 00:00:00.000' AS timestamp) AS DATE
UNION ALL
SELECT CURRENT_DATE + INTERVAL '1 hour'
FROM DATES
WHERE CURRENT_DATE + INTERVAL '1 hour' <= CAST('2019-12-31' AS timestamp)
)
SELECT DATE_ID, DATE_DESC from
(SELECT cast(to_char((DATE)::TIMESTAMP,'yyyymmddhhmiss') as BIGInt) AS DATE_ID,
DATE AS DATE_DESC
FROM
DATES)a
order by 1
OPTION (MAXRECURSION 0)
I need all the hours (24h) between 2019-01-01 and 2019-12-31 . At the moment I think OPTION (MAXRECURSION 0) and set EXTRACT(DOW FROM TIMESTAMP '2019-01-01 00:00:00.000')+1 is not working properly.
Its a problem of Recursive CTE, In Postgresql, your desired query will be like below
WITH recursive DATES AS (
SELECT CAST('2019-01-01 00:00:00.000' AS timestamp) AS date_
UNION ALL
SELECT date_ + INTERVAL '1 hour'
FROM DATES
WHERE date_ + INTERVAL '1 hour' <= CAST('2019-12-31' AS timestamp)
)
SELECT DATE_ID, DATE_DESC from
(SELECT cast(to_char((date_)::TIMESTAMP,'yyyymmddhhmiss') as BIGInt) AS DATE_ID,
date_ AS DATE_DESC
FROM
DATES)a
order by 1
DEMO

How to "loop" through dates in PostgreSQL

Say I have a query with a nested query inside of a where condition.
SELECT COUNT(id)
FROM table
WHERE create_date = date_trunc('month', current_timestamp)
and id NOT IN (
SELECT DISTINCT id
FROM some_table
WHERE date_trunc('month', current_timestamp)
)
This query gets the metric for this month. However, what if I want it for all months?
I tried this query, although it doesn't seem to run/takes a very long time:
SELECT date_trunc('month', t.create_date), COUNT(id)
FROM table t
WHERE id NOT IN (
SELECT DISTINCT id
FROM some_table tt
WHERE date_trunc('month', tt.create_date)= date_trunc('month', t.create_date)
)
GROUP BY date_trunc('month', t.create_date)
I would like to execute this command via Postgres CLI (from the command line).
Any guidance to make this query more efficient or logical appreciated!

SQL Server SUM() for DISTINCT records

I have a field called "Users", and I want to run SUM() on that field that returns the sum of all DISTINCT records. I thought that this would work:
SELECT SUM(DISTINCT table_name.users)
FROM table_name
But it's not selecting DISTINCT records, it's just running as if I had run SUM(table_name.users).
What would I have to do to add only the distinct records from this field?
Use count()
SELECT count(DISTINCT table_name.users)
FROM table_name
SQLFiddle demo
This code seems to indicate sum(distinct ) and sum() return different values.
with t as (
select 1 as a
union all
select '1'
union all
select '2'
union all
select '4'
)
select sum(distinct a) as DistinctSum, sum(a) as allSum, count(distinct a) as distinctCount, count(a) as allCount from t
Do you actually have non-distinct values?
select count(1), users
from table_name
group by users
having count(1) > 1
If not, the sums will be identical.
You can see for yourself that distinct works with the following example. Here I create a subquery with duplicate values, then I do a sum distinct on those values.
select DistinctSum=sum(distinct x), RegularSum=Sum(x)
from
(
select x=1
union All
select 1
union All
select 2
union All
select 2
) x
You can see that the distinct sum column returns 3 and the regular sum returns 6 in this example.
You can use a sub-query:
select sum(users)
from (select distinct users from table_name);
SUM(DISTINCTROW table_name.something)
It worked for me (innodb).
Description - "DISTINCTROW omits data based on entire duplicate records, not just duplicate fields." http://office.microsoft.com/en-001/access-help/all-distinct-distinctrow-top-predicates-HA001231351.aspx
;WITH cte
as
(
SELECT table_name.users , rn = ROW_NUMBER() OVER (PARTITION BY users ORDER BY users)
FROM table_name
)
SELECT SUM(users)
FROM cte
WHERE rn = 1
SQL Fiddle
Try here yourself
TEST
DECLARE #table_name Table (Users INT );
INSERT INTO #table_name Values (1),(1),(1),(3),(3),(5),(5);
;WITH cte
as
(
SELECT users , rn = ROW_NUMBER() OVER (PARTITION BY users ORDER BY users)
FROM #table_name
)
SELECT SUM(users) DisSum
FROM cte
WHERE rn = 1
Result
DisSum
9
If circumstances make it difficult to weave a "distinct" into the sum clause, it will usually be possible to add an extra "where" clause to the entire query - something like:
select sum(t.ColToSum)
from SomeTable t
where (select count(*) from SomeTable t1 where t1.ColToSum = t.ColToSum and t1.ID < t.ID) = 0
May be a duplicate to
Trying to sum distinct values SQL
As per Declan_K's answer:
Get the distinct list first...
SELECT SUM(SQ.COST)
FROM
(SELECT DISTINCT [Tracking #] as TRACK,[Ship Cost] as COST FROM YourTable) SQ