IBM DB2 retrieve resultset from Compound SQL - db2

I am new to DB2, so please excuse if this seems too novice to you.
I am trying to read the output of the following code in a Crystal Report
I get 'No Data to Retrieve' error when I try to map it to the report. Is there a 'RETURN' dataset command I am missing here?
BEGIN Atomic
DECLARE m INT DEFAULT 6; --
WHILE m > 0 DO
Select Year(Current date - m Month) || '-' ||Month(Current date - m Month) AS Rpt_Month,Count(Ch.Caseid) As Outcome_Value from Case Ch Inner Join Status Cs on Ch.Caseid = Cs.CaseId Where Cs.Startdate <= (Select LAST_DAY(CURRENT_DATE - m MONTH) FROM SYSIBM.SYSDUMMY1) and (Cs.Enddate is null or Cs.Enddate > (Select LAST_DAY(CURRENT_DATE - m MONTH) FROM SYSIBM.SYSDUMMY1)); --
SET m = m - 1;--
END WHILE; -- END;

Related

In PostgreSQL, how can I optimize a query with which I obtain the differences between the current column and the immediately previous one?

I have this audit table
User
date
text
text 2
u1
2023-01-01
hi
yes
u1
2022-12-20
hi
no
u1
2022-12-01
hello
maybe
And I need as a result, something like this:
User
date
text
text 2
u1
2023-01-01
null
x
u1
2022-12-20
x
x
u1
2022-12-01
null
null
So I can know which column changed from the last time.
Something like this is working, but I think may be a way to optimize it? or at least generate a "more easy to look" query? (i need the information for almost 20 columns, not only 3)
SELECT
ta.audit_date,
ta.audit_user,
CASE
WHEN ta.audit_operation = 'I' THEN 'Insert'
WHEN ta.audit_operation = 'U' THEN 'Update'
END AS action,
CASE WHEN ta.column1 <> (SELECT column1
FROM audit_table ta1
WHERE ta1.id = 9207 AND ta1.audit_date < ta.audit_date
ORDER BY ta1.audit_date DESC
LIMIT 1)
THEN 'X' ELSE null END column1,
CASE WHEN ta.column2 <> (SELECT column2
FROM audit_table ta1
WHERE ta1.id = 9207 AND ta1.audit_date < ta.audit_date
ORDER BY ta1.audit_date DESC
LIMIT 1)
THEN 'X' ELSE null END column2,
CASE WHEN ta.column3 <> (SELECT column3
FROM audit_table ta1
WHERE ta1.id = 9207 AND ta1.audit_date < ta.audit_date
ORDER BY ta1.audit_date DESC
LIMIT 1)
THEN 'X' ELSE null END column3
FROM
audit_table ta
WHERE
ta.id = 9207
ORDER BY
audit_date DESC
Thank you!
I think you can just use the LAG() analytic function here. If I understand correctly:
SELECT *, CASE WHEN text != LAG(text) OVER (ORDER BY date) THEN 'x' END AS text_label,
CASE WHEN text2 != LAG(text) OVER (ORDER BY date) THEN 'x' END AS text2_label
FROM yourTable
ORDER BY date;

Postgres query won't finish in function but if run separately it works

This is simplified version of my function with the query in it (therefore any variables are now useless) and this function won't finished but if I run the same query separately it finishes in under a second.
function that never finishes
select * from test_function_difference(1);
CREATE OR REPLACE FUNCTION test_function_difference (
p_does_nothing int
)
RETURNS TABLE(
t_datum date,
t_capacity numeric,
t_used numeric,
t_category int,
t_category_name text,
t_used_p numeric,
t_unused_p numeric
)
VOLATILE
AS $dbvis$
declare
p_sql text := '';
p_execute text := '';
rec record;
begin
p_sql :=
'
with
vytizeni as (
select
date_trunc(''day'',mcz.datum)::date as datum ,
sum(zd.v_vytizeni)/3600.0 used
from v_ui_cdc_s5_misto_cas_zdroj_aggregace mcz
left join (select * , pul_den as den_noc from v_ui_cdc_s5_misto_cas_zdroj_aggregace_zdrobneni) zd on mcz.id = zd.id
where
datum between ''2018-12-31'' and ''2018-12-31''
and ( zahranicni = 0 or zahranicni is null )
and den_noc = -1
group by
date_trunc(''day'',mcz.datum)::date
)
,kapacita as (
select
date_trunc(''day'',datum)::date as datum ,
sum(obsazeni_g)/3600.0 capacity
from v_ui_cdc_s5_misto_cas_zdroj_aggregace
where
datum between ''2018-12-31'' and ''2018-12-31''
group by
date_trunc(''day'',datum)::date
)
,zdroj as (
select
k.datum,
k.capacity,
v.used,
-1 category
from kapacita k
join vytizeni v on k.datum = v.datum
)
select
c.* ,
kc.nazev::text categeroy_name,
case when sum(capacity)over(partition by datum) = 0 then 1 else used/sum(capacity)over(partition by datum) end as used_p,
greatest(1 - case when sum(capacity)over(partition by datum) = 0 then 1 else sum(used)over(partition by datum)/sum(capacity)over(partition by datum) end,0) as unused_p
from zdroj c
left join v_ui_cdc_s5_kategorie_cinnosti kc on kc.id = c.category
order by c.datum
';
raise notice '% ' , p_sql;
RETURN QUERY
execute p_sql;
END;
$dbvis$ LANGUAGE plpgsql
and the query I run separately (finished in 533 ms)
with
vytizeni as (
select
date_trunc('day',mcz.datum)::date as datum ,
sum(zd.v_vytizeni)/3600.0 used
from v_ui_cdc_s5_misto_cas_zdroj_aggregace mcz
left join (select * , pul_den as den_noc from v_ui_cdc_s5_misto_cas_zdroj_aggregace_zdrobneni) zd on mcz.id = zd.id
where
datum between '2018-12-31' and '2018-12-31'
and ( zahranicni = 0 or zahranicni is null )
and den_noc = -1
group by
date_trunc('day',mcz.datum)::date
)
,kapacita as (
select
date_trunc('day',datum)::date as datum ,
sum(obsazeni_g)/3600.0 capacity
from v_ui_cdc_s5_misto_cas_zdroj_aggregace
where
datum between '2018-12-31' and '2018-12-31'
group by
date_trunc('day',datum)::date
)
,zdroj as (
select
k.datum,
k.capacity,
v.used,
-1 category
from kapacita k
join vytizeni v on k.datum = v.datum
)
select
c.* ,
kc.nazev::text categeroy_name,
case when sum(capacity)over(partition by datum) = 0 then 1 else used/sum(capacity)over(partition by datum) end as used_p,
greatest(1 - case when sum(capacity)over(partition by datum) = 0 then 1 else sum(used)over(partition by datum)/sum(capacity)over(partition by datum) end,0) as unused_p
from zdroj c
left join v_ui_cdc_s5_kategorie_cinnosti kc on kc.id = c.category
order by c.datum
Edit: I was able to get results from the function after almost 28 minutes (also I tried it on Sunday night, means I had the resources of the whole server, because during normal load function have not finished even after one hour) and right after that I ran query standalone and get results after 2.1 sec Here are the explain analyze
function: 28 minutes
https://explain.depesz.com/s/v9xJ
standalone query: 2.1 sec
https://explain.depesz.com/s/aBri
second run stand alone 430ms
https://explain.depesz.com/s/ENva
Interesting note: if I edit start date for the interval to '2018-12-30' or any other date the function finishes as well
which means
start date = '2018-12-31'
query => finishes under 1 second
function => won't finish
start date = '2018-12-30'
query => finishes under 1 second
function => finishes under 1 second
Version details: PostgreSQL 10.7 on x86_64-pc-linux-gnu, compiled by gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-36), 64-bit
The reason for the difference in the performance is that the execution inside the function does not use parallel queries, and parallel execution by chance picks a better execution plan.
Is the function not marked as parallel safe? That may make all the difference.
The core problem, however, is the gross mies-estimate of the number of result rows for the scan on ui_cdc_s5_misto_cas_zdroj_aggregace, which estimates 1 instead of 2243 rows.
You should ANALYZE that table to get a better estimate. If that alone doesn't improve the estimate, try to raise default_statistics_target before the ANALYZE.
If you need to raise default_statistics_target for a better estimate, persist that change with
ALTER TABLE ui_cdc_s5_misto_cas_zdroj_aggregace
ALTER datum SET STATISTICS <whatever proved useful>

Looping SQL query - PostgreSQL

I'm trying to get a query to loop through a set of pre-defined integers:
I've made the query very simple for this question.. This is pseudo code as well obviously!
my_id = 0
WHILE my_id < 10
SELECT * from table where id = :my_id`
my_id += 1
END
I know that for this query I could just do something like where id < 10.. But the actual query I'm performing is about 60 lines long, with quite a few window statements all referring to the variable in question.
It works, and gets me the results I want when I have the variable set to a single figure.. I just need to be able to re-run the query 10 times with different variables hopefully ending up with one single set of results.
So far I have this:
CREATE OR REPLACE FUNCTION stay_prices ( a_product_id int ) RETURNS TABLE (
pid int,
pp_price int
) AS $$
DECLARE
nights int;
nights_arr INT[] := ARRAY[1,2,3,4];
j int;
BEGIN
j := 1;
FOREACH nights IN ARRAY nights_arr LOOP
-- query here..
END LOOP;
RETURN;
END;
$$ LANGUAGE plpgsql;
But I'm getting this back:
ERROR: query has no destination for result data
HINT: If you want to discard the results of a SELECT, use PERFORM instead.
So do I need to get my query to SELECT ... INTO the returning table somehow? Or is there something else I can do?
EDIT: this is an example of the actual query I'm running:
\x auto
\set nights 7
WITH x AS (
SELECT
product_id, night,
LAG(night, (:nights - 1)) OVER (
PARTITION BY product_id
ORDER BY night
) AS night_start,
SUM(price_pp_gbp) OVER (
PARTITION BY product_id
ORDER BY night
ROWS BETWEEN (:nights - 1) PRECEDING
AND CURRENT ROW
) AS pp_price,
MIN(spaces_available) OVER (
PARTITION BY product_id
ORDER BY night
ROWS BETWEEN (:nights - 1) PRECEDING
AND CURRENT ROW
) AS min_spaces_available,
MIN(period_date_from) OVER (
PARTITION BY product_id
ORDER BY night
ROWS BETWEEN (:nights - 1) PRECEDING
AND CURRENT ROW
) AS min_period_date_from,
MAX(period_date_to) OVER (
PARTITION BY product_id
ORDER BY night
ROWS BETWEEN (:nights - 1) PRECEDING
AND CURRENT ROW
) AS max_period_date_to
FROM products_nightlypriceperiod pnpp
WHERE
spaces_available >= 1
AND min_group_size <= 1
AND night >= '2016-01-01'::date
AND night <= '2017-01-01'::date
)
SELECT
product_id as pid,
CASE WHEN x.pp_price > 0 THEN x.pp_price::int ELSE null END as pp_price,
night_start as from_date,
night as to_date,
(night-night_start)+1 as duration,
min_spaces_available as spaces
FROM x
WHERE
night_start = night - (:nights - 1)
AND min_period_date_from = night_start
AND max_period_date_to = night;
That will get me all the nights night periods available for all my products in 2016 along with the price for the period and the max number of spaces I could fill in that period.
I'd like to be able to run this query to get all the periods available between 2 and 30 days for all my products.
This is likely to produce a table with millions of rows. The plan is to re-create this table periodically to enable a very quick look up of what's available for a particular date. The products_nightlypriceperiod represents a night of availability of a product - e.g. Product X has 3 spaces left for Jan 1st 2016, and costs £100 for the night.
Why use a loop? You can do something like this (using your first query):
with params as (
select generate_series(1, 10) as id
)
select t.*
from params cross join
table t
where t.id = params.id;
You can modify params to have the values you really want. Then just use cross join and let the database "do the looping."

Replace Subselect for something more efficient

I have this query which takes a long time, partly because the number of records in the table excedd 500 000 records, but the join I have to use slows it down quite a lot, at least to my beliefs
SELECT TOP (10) PERCENT H1.DateCompteur, CASE WHEN (h1.cSortie - h2.cSortie > 0)
THEN h1.cSortie - h2.cSortie ELSE 0 END AS Compte, H1.IdMachine
FROM dbo.T_HistoriqueCompteur AS H1 INNER JOIN
dbo.T_HistoriqueCompteur AS H2 ON H1.IdMachine = H2.IdMachine AND H2.DateCompteur =
(SELECT MAX(DateCompteur) AS Expr1
FROM dbo.T_HistoriqueCompteur AS HS
WHERE (DateCompteur < H1.DateCompteur) AND (H1.IdMachine = IdMachine))
ORDER BY H1.DateCompteur DESC
The order by is important since I need only the most recent informations. I tried using the ID field in my sub select since they are ordred by date but could not detect any significant improvement.
SELECT TOP (10) PERCENT H1.DateCompteur, CASE WHEN (h1.cSortie - h2.cSortie > 0)
THEN h1.cSortie - h2.cSortie ELSE 0 END AS Compte, H1.IdMachine
FROM dbo.T_HistoriqueCompteur AS H1 INNER JOIN
dbo.T_HistoriqueCompteur AS H2 ON H1.IdMachine = H2.IdMachine AND H2.ID =
(SELECT MAX(ID) AS Expr1
FROM dbo.T_HistoriqueCompteur AS HS
WHERE (ID < H1.ID) AND (H1.IdMachine = IdMachine))
ORDER BY H1.DateCompteur DESC
the table I use look a little like this (I got much more columns but they are unused in this query).
ID bigint
IdMachine bigint
cSortie bigint
DateCompteur datetime
I think that if I could get rid of the sub select, my query would run much faster but I can't really find a way to do so. What I really want to do is to find the previous row with the same IdMachine so that I can calculate the difference between the two cSortie values. The case in the query is because something it's reseted to 0 and in this case, I want to return 0 instead of a negative value.
So my question is this : Can I do better than what I already have ??? I plan to put this in a view if that makes a difference.
Try this query
WITH T as
(
SELECT TOP (10) PERCENT H1.DateCompteur, H1.cSortie as cSortie1, H1.IdMachine,
(
SELECT TOP 1 H2.cSortie
FROM dbo.T_HistoriqueCompteur H2
WHERE (H2.DateCompteur < H1.DateCompteur) AND (H1.IdMachine = H2.IdMachine)
ORDER BY H2.DateCompteur DESC
) as cSortie2
FROM dbo.T_HistoriqueCompteur AS H1
ORDER BY H1.DateCompteur DESC
)
select DateCompteur,
CASE WHEN (cSortie1 - cSortie2 > 0)
THEN cSortie1 - cSortie2
ELSE 0 END
AS Compte,
IdMachine
FROM T
You could also try CTE's (common table expressions) with windowing functions (ROW_NUMBER):
;WITH CTE AS
(
SELECT ID,IdMachine,cSortie,ROW_NUMBER() OVER(PARTITION BY h.IdMachine ORDER BY ID ASC) AS [ROW]
FROM T_HistoriqueCompteur h
)
SELECT
TOP (10) PERCENT
H1.DateCompteur,
CASE WHEN (h1.cSortie - h2.cSortie > 0) THEN h1.cSortie - h2.cSortie
ELSE 0
END AS Compte,
H1.IdMachine
FROM dbo.T_HistoriqueCompteur AS H1
INNER JOIN CTE cte on cte.idmachine = h1.idmachine and cte.id = h1.id
INNER JOIN CTE h2 on h2.idmachine = cte.idmachine and h2.row + 1 = cte.row
ORDER BY H1.DateCompteur DESC

Getting the minimum of two values in SQL

I have two variables, one is called PaidThisMonth, and the other is called OwedPast. They are both results of some subqueries in SQL. How can I select the smaller of the two and return it as a value titled PaidForPast?
The MIN function works on columns, not variables.
SQL Server 2012 and 2014 supports IIF(cont,true,false) function. Thus for minimal selection you can use it like
SELECT IIF(first>second, second, first) the_minimal FROM table
While IIF is just a shorthand for writing CASE...WHEN...ELSE, it's easier to write.
The solutions using CASE, IIF, and UDF are adequate, but impractical when extending the problem to the general case using more than 2 comparison values. The generalized
solution in SQL Server 2008+ utilizes a strange application of the VALUES clause:
SELECT
PaidForPast=(SELECT MIN(x) FROM (VALUES (PaidThisMonth),(OwedPast)) AS value(x))
Credit due to this website:
http://sqlblog.com/blogs/jamie_thomson/archive/2012/01/20/use-values-clause-to-get-the-maximum-value-from-some-columns-sql-server-t-sql.aspx
Use Case:
Select Case When #PaidThisMonth < #OwedPast
Then #PaidThisMonth Else #OwedPast End PaidForPast
As Inline table valued UDF
CREATE FUNCTION Minimum
(#Param1 Integer, #Param2 Integer)
Returns Table As
Return(Select Case When #Param1 < #Param2
Then #Param1 Else #Param2 End MinValue)
Usage:
Select MinValue as PaidforPast
From dbo.Minimum(#PaidThisMonth, #OwedPast)
ADDENDUM:
This is probably best for when addressing only two possible values, if there are more than two, consider Craig's answer using Values clause.
For SQL Server 2022+ (or MySQL or PostgreSQL 9.3+), a better way is to use the LEAST and GREATEST functions.
SELECT GREATEST(A.date0, B.date0) AS date0,
LEAST(A.date1, B.date1, B.date2) AS date1
FROM A, B
WHERE B.x = A.x
With:
GREATEST(value [, ...]) : Returns the largest (maximum-valued) argument from values provided
LEAST(value [, ...]) Returns the smallest (minimum-valued) argument from values provided
Documentation links :
MySQL http://dev.mysql.com/doc/refman/5.0/en/comparison-operators.html
Postgres https://www.postgresql.org/docs/current/functions-conditional.html
SQL Server https://learn.microsoft.com/en-us/sql/t-sql/functions/logical-functions-least-transact-sql
I just had a situation where I had to find the max of 4 complex selects within an update.
With this approach you can have as many as you like!
You can also replace the numbers with aditional selects
select max(x)
from (
select 1 as 'x' union
select 4 as 'x' union
select 3 as 'x' union
select 2 as 'x'
) a
More complex usage
#answer = select Max(x)
from (
select #NumberA as 'x' union
select #NumberB as 'x' union
select #NumberC as 'x' union
select (
Select Max(score) from TopScores
) as 'x'
) a
I'm sure a UDF has better performance.
Here is a trick if you want to calculate maximum(field, 0):
SELECT (ABS(field) + field)/2 FROM Table
returns 0 if field is negative, else, return field.
Use a CASE statement.
Example B in this page should be close to what you're trying to do:
http://msdn.microsoft.com/en-us/library/ms181765.aspx
Here's the code from the page:
USE AdventureWorks;
GO
SELECT ProductNumber, Name, 'Price Range' =
CASE
WHEN ListPrice = 0 THEN 'Mfg item - not for resale'
WHEN ListPrice < 50 THEN 'Under $50'
WHEN ListPrice >= 50 and ListPrice < 250 THEN 'Under $250'
WHEN ListPrice >= 250 and ListPrice < 1000 THEN 'Under $1000'
ELSE 'Over $1000'
END
FROM Production.Product
ORDER BY ProductNumber ;
GO
This works for up to 5 dates and handles nulls. Just couldn't get it to work as an Inline function.
CREATE FUNCTION dbo.MinDate(#Date1 datetime = Null,
#Date2 datetime = Null,
#Date3 datetime = Null,
#Date4 datetime = Null,
#Date5 datetime = Null)
RETURNS Datetime AS
BEGIN
--USAGE select dbo.MinDate('20120405',null,null,'20110305',null)
DECLARE #Output datetime;
WITH Datelist_CTE(DT)
AS (
SELECT #Date1 AS DT WHERE #Date1 is not NULL UNION
SELECT #Date2 AS DT WHERE #Date2 is not NULL UNION
SELECT #Date3 AS DT WHERE #Date3 is not NULL UNION
SELECT #Date4 AS DT WHERE #Date4 is not NULL UNION
SELECT #Date5 AS DT WHERE #Date5 is not NULL
)
Select #Output=Min(DT) FROM Datelist_CTE;
RETURN #Output;
END;
Building on the brilliant logic / code from mathematix and scottyc, I submit:
DECLARE #a INT, #b INT, #c INT = 0;
WHILE #c < 100
BEGIN
SET #c += 1;
SET #a = ROUND(RAND()*100,0)-50;
SET #b = ROUND(RAND()*100,0)-50;
SELECT #a AS a, #b AS b,
#a - ( ABS(#a-#b) + (#a-#b) ) / 2 AS MINab,
#a + ( ABS(#b-#a) + (#b-#a) ) / 2 AS MAXab,
CASE WHEN (#a <= #b AND #a = #a - ( ABS(#a-#b) + (#a-#b) ) / 2)
OR (#a >= #b AND #a = #a + ( ABS(#b-#a) + (#b-#a) ) / 2)
THEN 'Success' ELSE 'Failure' END AS Status;
END;
Although the jump from scottyc's MIN function to the MAX function should have been obvious to me, it wasn't, so I've solved for it and included it here: SELECT #a + ( ABS(#b-#a) + (#b-#a) ) / 2. The randomly generated numbers, while not proof, should at least convince skeptics that both formulae are correct.
Use a temp table to insert the range of values, then select the min/max of the temp table from within a stored procedure or UDF. This is a basic construct, so feel free to revise as needed.
For example:
CREATE PROCEDURE GetMinSpeed() AS
BEGIN
CREATE TABLE #speed (Driver NVARCHAR(10), SPEED INT);
'
' Insert any number of data you need to sort and pull from
'
INSERT INTO #speed (N'Petty', 165)
INSERT INTO #speed (N'Earnhardt', 172)
INSERT INTO #speed (N'Patrick', 174)
SELECT MIN(SPEED) FROM #speed
DROP TABLE #speed
END
Select MIN(T.V) FROM (Select 1 as V UNION Select 2 as V) T
SELECT (WHEN first > second THEN second ELSE first END) the_minimal FROM table