Generate random values from table - postgresql

I'd like to generate random values in order to fill a table.
First, I have a city_table :
CREATE TABLE t_city_ci (
ci_id SERIAL PRIMARY KEY,
ci_name VARCHAR(100) NOT NULL
);
So I insert random values like this :
INSERT INTO t_city_ci ("ci_name")
SELECT DISTINCT(d.str)
FROM (
SELECT
(
SELECT string_agg(x, '') as str
FROM (
SELECT chr(ascii('A') + (random() * 25)::integer)
-- reference 'b' so it is correlated and re-evaluated
FROM generate_series(1, 10 + b * 0)
) AS y(x)
)
FROM generate_series(1,10000) as a(b)) as d;
Now, I have a temperature table that looks like this :
CREATE TABLE dw_core.t_temperatures_te (
te_id SERIAL PRIMARY KEY,
ci_id INTEGER,
te_temperature FLOAT NOT NULL,
te_date TIMESTAMP NOT NULL DEFAULT NOW()
);
How can I fill a temperature table with :
Random date from last year
Random temperature between -30 and 50
Random values from t_city table ?
I tried this but the date never changes :
INSERT INTO dw_core.t_temperatures_te ("ci_id","te_temperature","te_date")
SELECT *
FROM (
SELECT (random() * (SELECT MAX(ci_id) FROM dw_core.t_city_ci) + 1)::integer
-- reference 'b' so it is correlated and re-evaluated
FROM generate_series(1, 100000 )
) AS y
,(select random() * -60 + 45 FROM generate_series(1,1005)) d(f),
(select timestamp '2014-01-10 20:00:00' +
random() * (timestamp '2014-01-20 20:00:00' -
timestamp '2016-01-10 10:00:00')) dza(b)
LIMIT 1000000;
Thanks a lot

Something like this?
select * from (
select
(random() * 100000)::integer as ci_id,
-30 + (random() * 80) as temp,
'2014-01-01'::date + (random() * 365 * '1 day'::interval) as time_2014
from generate_series(1,1000000) s
) foo
inner join t_city_ci c on c.ci_id = foo.ci_id;
Here's a sample of the generated data:
select
(random() * 100000)::integer as ci_id,
-30 + (random() * 80) as temp,
'2014-01-01'::date + (random() * 365 * '1 day'::interval) as time_2014
from generate_series(1,10);
ci_id | temp | time_2014
-------+-------------------+----------------------------
84742 | 31.6278865475337 | 2014-10-16 21:36:45.371176
16390 | 10.665458049935 | 2014-11-13 19:59:54.148177
87067 | 43.2082599369847 | 2014-06-01 16:14:43.021094
25718 | -7.78245567240867 | 2014-07-23 05:53:10.036914
99538 | -5.82924078024423 | 2014-06-08 06:44:02.081918
71720 | 22.3102275898262 | 2014-06-15 08:24:00.327841
24740 | 4.65809369210996 | 2014-05-19 02:20:58.804213
56861 | -20.750980894431 | 2014-10-01 06:09:54.117367
47929 | -24.4018202994027 | 2014-11-24 13:39:54.096337
30772 | 46.7239395141247 | 2014-08-27 04:50:46.785239
(10 rows)

Related

postgres, group by date, and bucketize per hour

I would like to create a result object that can be used with Grafana for a heatmap. In order to display the data correctly I need it the output to be like:
| date | 00:00 | 01:00 | 02:00 | 03:00 | ...etc |
| 2023-01-01 | 1 | 2 | 0 | 1 | ... |
| 2023-01-02 | 0 | 0 | 1 | 1 | ... |
| 2023-01-03 | 4 | 0 | 2 | 0 | ... |
my data table structure:
trades
-----
id
closed_at
asset
So far, I know that I need to use generate_series and use the interval function to return the hours, but I need my query to plot these hours as columns, but I've not been able to do that, as its getting a bit too advanced.
So far I have the following query:
SELECT
closed_at::DATE,
COUNT(id)
FROM trades
GROUP BY closed_at
ORDER BY closed_at
It now shows the amount of rows grouped by the days, I want to further aggregate the data, so it outputs the count per hour, as shown above.
Thanks for your help!
You can add more columns, now I only add 0:00 to 05:00.
filter usage: https://www.postgresql.org/docs/current/sql-expressions.html#SYNTAX-AGGREGATES
date_trunc usage: https://www.postgresql.org/docs/current/functions-datetime.html#FUNCTIONS-DATETIME-TRUNC
BEGIN;
CREATE temp TABLE trades (
id bigint GENERATED BY DEFAULT AS IDENTITY,
closed_a timestamp,
asset text
) ON COMMIT DROP;
INSERT INTO trades (closed_a)
SELECT
date '2023-01-01' + interval '10 min' * (random() * i * 10)::int
FROM
generate_series(1, 10) g (i);
INSERT INTO trades (closed_a)
SELECT
date '2023-01-02' + interval '10 min' * (random() * i * 10)::int
FROM
generate_series(1, 10) g (i);
SELECT
closed_a::date
,COUNT(id) FILTER (WHERE date_trunc('hour', closed_a) = closed_a::date) AS "0:00"
,COUNT(id) FILTER (WHERE date_trunc('hour', closed_a) = closed_a::date + interval '1 hour') AS "1:00"
,COUNT(id) FILTER (WHERE date_trunc('hour', closed_a) = closed_a::date + interval '2 hour') AS "2:00"
,COUNT(id) FILTER (WHERE date_trunc('hour', closed_a) = closed_a::date + interval '3 hour') AS "3:00"
,COUNT(id) FILTER (WHERE date_trunc('hour', closed_a) = closed_a::date + interval '4 hour') AS "4:00"
,COUNT(id) FILTER (WHERE date_trunc('hour', closed_a) = closed_a::date + interval '5 hour') AS "5:00"
FROM
trades
GROUP BY
1;
END;

How to use date_part function to split value per month to each day and country

DB-Fiddle
CREATE TABLE sales (
id SERIAL PRIMARY KEY,
country VARCHAR(255),
sales_date DATE,
sales_volume DECIMAL,
fix_costs DECIMAL
);
INSERT INTO sales
(country, sales_date, sales_volume, fix_costs
)
VALUES
('DE', '2020-01-03', '500', '2000'),
('FR', '2020-01-03', '350', '2000'),
('None', '2020-01-31', '0', '2000'),
('DE', '2020-02-15', '0', '5000'),
('FR', '2020-02-15', '0', '5000'),
('None', '2020-02-29', '0', '5000'),
('DE', '2020-03-27', '180', '4000'),
('FR', '2020-03-27', '970', '4000'),
('None', '2020-03-31', '0', '4000');
Expected Result:
sales_date | country | sales_volume | fix_costs
-------------|--------------|------------------|------------------------------------------
2020-01-03 | DE | 500 | 37.95 (= 2000/31 = 64.5 x 0.59)
2020-01-03 | FR | 350 | 26.57 (= 2000/31 = 64.5 x 0.41)
-------------|--------------|------------------|------------------------------------------
2020-02-15 | DE | 0 | 86.21 (= 5000/28 = 172.4 x 0.50)
2020-02-15 | FR | 0 | 86.21 (= 5000/28 = 172.4 x 0.50)
-------------|--------------|------------------|------------------------------------------
2020-03-27 | DE | 180 | 20.20 (= 4000/31 = 129.0 x 0.16)
2020-03-27 | FR | 970 | 108.84 (= 4000/31 = 129.0 x 0.84)
-------------|--------------|------------------|-------------------------------------------
The column fix_costs in the expected result is calculated as the following:
Step 1) Get the daily rate of the fix_costs per month.(2000/31 = 64.5; 5000/29 = 172.4; 4000/31 = 129.0)
Step 2) Split the daily value to the countries DE and FR based on their share in the sales_volume. (500/850 = 0.59; 350/850 = 0.41; 180/1150 = 0.16; 970/1150 = 0.84)
Step 3) In case the sales_volume is 0 the daily rate gets split 50/50 to DE and FR as you can see for 2020-02-15.
In MariaDB I was able to this with the below query:
SELECT
s.sales_date,
s.country,
s.sales_volume,
(CASE WHEN SUM(sales_volume) OVER (PARTITION BY sales_date) > 0
THEN ((s.fix_costs/ DAY(LAST_DAY(sales_date))) *
sales_volume / NULLIF(SUM(sales_volume) OVER (PARTITION BY sales_date), 0)
)
ELSE (s.fix_costs / DAY(LAST_DAY(sales_date))) * 1 / SUM(country <> 'None') OVER (PARTITION by sales_date)
END) AS imputed_fix_costs
FROM sales s
WHERE country <> 'None'
GROUP BY 1,2,3
ORDER BY 1;
However, in PostgresSQL I get an error on DAY(LAST_DAY(sales_date)).
I tried to replace this part with (date_part('DAY', ((date_trunc('MONTH', s.sales_date) + INTERVAL '1 MONTH - 1 DAY')::date)))
However, this is causing another error.
How do I need to modify the query to get the expected result?
The Postgresql equivalent of DAY(LAST_DAY(sales_date)) would be:
extract(day from (date_trunc('month', sales_date + interval '1 month') - interval '1 day'))
The expression SUM(country <> 'None') also needs to be fixed as
SUM(case when country <> 'None' then 1 else 0 end)
It might be a good idea to define this compatibility function:
create function last_day(d date) returns date as
$$
select date_trunc('month', d + interval '1 month') - interval '1 day';
$$ language sql immutable;
Then the first expression becomes simply
extract(day from last_day(sales_date))
I would create a function to return the last day (number) for a given date - which is actually the "length" of the month.
create function month_length(p_input date)
returns integer
as
$$
select extract(day from (date_trunc('month', p_input) + interval '1 month - 1 day'));
$$
language sql
immutable;
Then the query can be written as:
select sales_date, country,
sum(sales_volume),
sum(fix_costs_per_day * cost_factor)
from (
select id, country, sales_date, sales_volume, fix_costs,
fix_costs / month_length(sales_date) as fix_costs_per_day,
case
when sum(sales_volume) over (partition by sales_date) > 0
then sales_volume::numeric / sum(sales_volume) over (partition by sales_date)
else sales_volume::numeric / 2
end as cost_factor
from sales
where country <> 'None'
) t
group by sales_date, country
order by sales_date, country

PostgreSQL: Find percentages of total_films_rented

The code below gives me the following results
Early: 7738
Late: 6586
On Time: 1720
How would I take this a step further and add a third column that finds the percentages?
Here is a link to the ERD and database set-up: https://www.postgresqltutorial.com/postgresql-sample-database/
WITH
t1
AS
(
SELECT *, DATE_PART('day', return_date - rental_date) AS days_rented
FROM rental
),
t2
AS
(
SELECT rental_duration, days_rented,
CASE WHEN rental_duration > days_rented THEN 'Early'
WHEN rental_duration = days_rented THEN 'On Time'
ELSE 'Late'
END AS rental_return_status
FROM film f, inventory i, t1
WHERE f.film_id = i.film_id AND t1.inventory_id = i.inventory_id
)
SELECT rental_return_status, COUNT(*) AS total_films_rented
FROM t2
GROUP BY 1
ORDER BY 2 DESC;
You can use a window function with one CTE table (instead of 2):
WITH raw_status AS (
SELECT rental_duration - DATE_PART('day', return_date - rental_date) AS days_remaining
FROM rental r
JOIN inventory i ON r.inventory_id=i.inventory_id
JOIN film f on f.film_id=i.film_id
)
SELECT CASE WHEN days_remaining > 0 THEN 'Early'
WHEN days_remaining = 0 THEN 'On Time'
ELSE 'Late' END AS rental_status,
count(*),
(100*count(*))/sum(count(*)) OVER () AS percentage
FROM raw_status
GROUP BY 1;
rental_status | count | percentage
---------------+-------+---------------------
Early | 7738 | 48.2298678633757168
On Time | 1720 | 10.7205185739217153
Late | 6586 | 41.0496135627025679
(3 rows)
Disclosure: I work for EnterpriseDB (EDB)
Use a window function to get the sum of the count column (sum(count(*)) over ()), then just divide the count by that (count(*)/sum(count(*)) over ()). Multiply by 100 to make it a percentage.
psql (12.1 (Debian 12.1-1))
Type "help" for help.
testdb=# CREATE TABLE faket2 AS (
SELECT 'early' AS rental_return_status UNION ALL
SELECT 'early' UNION ALL
SELECT 'ontime' UNION ALL
SELECT 'late');
SELECT 4
testdb=# SELECT
rental_return_status,
COUNT(*) as total_films_rented,
(100*count(*))/sum(count(*)) over () AS percentage
FROM faket2
GROUP BY 1
ORDER BY 2 DESC;
rental_return_status | total_films_rented | percentage
----------------------+--------------------+---------------------
early | 2 | 50.0000000000000000
late | 1 | 25.0000000000000000
ontime | 1 | 25.0000000000000000
(3 rows)

Fixed range of timestamps for every uuid in SQL

I would like to generate a table with the last n weeks timestamps of data (in this case, n=3) and all the data, even if it is null.
I am using the following pieces of code
with raw_weekly_data as (SELECT
distinct d.uuid,
date_trunc('week',a.start_timestamp) as tstamp,
avg(price) as price
FROM
a join d on a.uuid = d.uuid
where start_timestamp between date_trunc('week',now()) - interval '3 week' and date_trunc('week',now())
group by 1,2,3
order by 1)
,tstamp as (SELECT
distinct tstamp
FROM
raw_weekly_data
)
SELECT
t.tstamp,
r.*
from raw_weekly_data r right join tstamp t on r.tstamp = t.tstamp
order by uuid
I would like to have something like that:
week | uuid | price
w1 | 1 | 10
w2 | 1 | 2
w3 | 1 |
w1 | 2 | 20
w2 | 2 |
w3 | 2 |
w1 | 3 | 10
w2 | 3 | 10
w3 | 3 | 20
But instead all the null results are not showed. What is the best approach in here?
week | uuid | price
w1 | 1 | 10
w2 | 1 | 2
w1 | 2 | 20
w1 | 3 | 10
w2 | 3 | 10
w3 | 3 | 20
Form a Cartesian product of all weeks an UUIDs, then LEFT JOIN to actual avg, prices per (week, uuid). Like:
SELECT *
FROM generate_series (date_trunc('week', now() - interval '3 week')
, now() - interval '1 week'
, interval '1 week') tstamp
CROSS JOIN (SELECT DISTINCT uuid FROM a) a
LEFT JOIN (
SELECT d.uuid
, date_trunc('week', a.start_timestamp) AS tstamp
, avg(price) AS price -- d.price?
FROM a
JOIN d USING (uuid)
WHERE a.start_timestamp >= date_trunc('week',now()) - interval '3 week'
AND a.start_timestamp < date_trunc('week',now())
) ad USING (uuid, tstamp)
GROUP BY 1, 2
ORDER BY 1, 2
This way you get all combinations of the last three weeks and UUIDs, extended by the average price - if one should exist for the combination.
Based on some educated guesses to fill in missing information ..

how to get this year opening from last year closing with condition?

I have table of data as below needs to use t-sql to generate
Year | Id | Entitle | Use | Max
-----------------------------------
2016 | 0001 | 15 | 5 | 20
2017 | 0001 | 15 | 2 | 20
2018 | 0001 | 15 | 4 | 20
I need to get opening and closing for each year, this year opening will be last year (opening + Entitle - Use), but it cannot exceed Max, if exceed Max then "Max" will be the opening.
this is the result I expected
year | Id | Opening | Entitle | Use | Max | Closing
-----------------------------------------------------
2016 | 0001 | 0 | 15 | 5 | 20 | 10
2017 | 0001 | 10 | 15 | 2 | 20 | 23
2018 | 0001 | 20 | 15 | 4 | 20 | 31
Here's another option, a recursive CTE will get you there.
DECLARE #TestData TABLE
(
[Year] INT
, [Id] NVARCHAR(10)
, [Entitle] INT
, [Use] INT
, [Max] INT
);
INSERT INTO #TestData (
[Year]
, [Id]
, [Entitle]
, [Use]
, [Max]
)
VALUES ( 2016, '0001', 15, 5, 20 )
, ( 2017, '0001', 15, 2, 20 )
, ( 2018, '0001', 15, 4, 20 );
INSERT INTO #TestData (
[Year]
, [Id]
, [Entitle]
, [Use]
, [Max]
)
VALUES ( 2015, '0002', 20, 7, 20 )
, ( 2016, '0002', 20, 7, 20 )
, ( 2017, '0002', 20, 4, 20 )
, ( 2018, '0002', 20, 13, 20 );
WITH [cte]
AS ( SELECT [a].[Year]
, [a].[Id]
, 0 AS [Opening]
, [a].[Entitle]
, [a].[Use]
, [a].[Entitle] - [a].[Use] AS [Closing]
FROM #TestData [a]
--Cross apply here to get our first record, earliest year for each Id for our anchor
CROSS APPLY (
SELECT [aa].[Id]
, MIN([aa].[Year]) AS [Year]
FROM #TestData [aa]
WHERE [aa].[Id] = [a].[Id]
GROUP BY [aa].[Id]
) [aaa]
WHERE [a].[Year] = [aaa].[Year]
AND [a].[Id] = [aaa].[Id]
UNION ALL
SELECT [c].[Year]
, [c].[Id]
, CASE WHEN [b].[Closing] > [c].[Max] THEN [c].[Max]
ELSE [b].[Closing]
END
, [c].[Entitle]
, [c].[Use]
, CASE WHEN [b].[Closing] > [c].[Max] THEN [c].[Max]
ELSE [b].[Closing]
END + [c].[Entitle] - [c].[Use] AS [Closing]
FROM [cte] [b]
INNER JOIN #TestData [c]
ON [c].[Id] = [b].[Id]
AND [c].[Year] = [b].[Year] + 1 )
SELECT *
FROM [cte]
ORDER BY [cte].[Id]
, [cte].[Year];
Simple SQL will not be enough here. You need to go trough each row and calculate the closing and opening values based on the previous year.
The idea would be to loop trough each row. Store the results. Add the results into a temp table.
I have made you the code here. Please note that I have used SSMS to implement it.
DECLARE #TempTable table (_ID varchar(255),Year int,Opening int, Entitle int,Used int,Max int, Closing int)
DECLARE #idColumn INT
DECLARE #ID varchar(255)
DECLARE #entitle INT
DECLARE #used INT
DECLARE #max INT
DECLARE #opening INT
DECLARE #closing INT
DECLARE #year INT
SELECT #idColumn = min( Id ) FROM MyTable
WHILE #idColumn is not null
BEGIN
SET #year = (SELECT Year FROM MyTable WHERE Id = #idColumn)
SET #ID = (SELECT [_ID] FROM MyTable WHERE Id = #idColumn)
IF #idColumn = 1
BEGIN
SET #entitle = (SELECT Entitle FROM MyTable WHERE Id = #idColumn);
SET #used = (SELECT Used FROM MyTable WHERE Id = #idColumn);
SET #opening = 0;
SET #closing = #opening + #entitle - #used;
SET #max = (SELECT Max FROM MyTable WHERE Id = #idColumn);
END
ELSE
BEGIN
SET #opening = #opening + #entitle - #used;
IF #opening > #max
BEGIN
SET #opening = #max;
END
SET #entitle = (SELECT Entitle FROM MyTable WHERE Id = #idColumn);
SET #used = (SELECT Used FROM MyTable WHERE Id = #idColumn);
SET #max = (SELECT Max FROM MyTable WHERE Id = #idColumn);
SET #closing = #opening + #entitle - #used;
END
INSERT INTO #TempTable (_ID , Year , Opening , Entitle , Used ,Max , Closing )
VALUES (#ID, #year, #opening, #entitle, #used, #max, #closing);
SELECT #idColumn = min( Id ) FROM MyTable WHERE Id > #idColumn
END
SELECT * FROM #TempTable