How can I increment the numerical value in my WHERE clause using a loop? - postgresql

I am currently using the UNION ALL workaround below to calculate old_eps_tfq regression slopes of each ticker based off its corresponding rownum value (see WHERE rownum < x). I am interested to know what the old_eps_tfq is when rownum < 4 then increment 4 by 1 to find out what old_eps_tfq is when rownum < 5, and so on (there are ~20 rownum)
Could I use PL/pgSQL for this?
SELECT * FROM(
WITH regression_slope AS(
SELECT
ROW_NUMBER() OVER ( PARTITION BY ticker ORDER BY earnings_growths_ped) AS rownum,
*
FROM "ANALYTICS"."vEARNINGS_GROWTHS"
--WHERE ticker = 'ACN'
ORDER BY ticker )
SELECT
ticker,
current_period_end_date,
max(earnings_growths_ped) AS max_earnings_growths_ped,
--max(rownum) AS max_rownum,
round(regr_slope(old_eps_tfq, rownum)::numeric, 2) AS slope,
round(regr_intercept(old_eps_tfq, rownum)::numeric, 2) AS y_intercept,
round(regr_r2(old_eps_tfq, rownum)::numeric, 3) AS r_squared
FROM regression_slope
WHERE rownum < 4
GROUP BY ticker, current_period_end_date
ORDER BY ticker asc ) q
UNION ALL
SELECT * FROM(
WITH regression_slope AS(
SELECT
ROW_NUMBER() OVER ( PARTITION BY ticker ORDER BY earnings_growths_ped) AS rownum,
*
FROM "ANALYTICS"."vEARNINGS_GROWTHS"
--WHERE ticker = 'ACN'
ORDER BY ticker )
SELECT
ticker,
current_period_end_date,
max(earnings_growths_ped) AS max_earnings_growths_ped,
--max(rownum) AS max_rownum,
round(regr_slope(old_eps_tfq, rownum)::numeric, 2) AS slope,
round(regr_intercept(old_eps_tfq, rownum)::numeric, 2) AS y_intercept,
round(regr_r2(old_eps_tfq, rownum)::numeric, 3) AS r_squared
FROM regression_slope
WHERE rownum < 5
GROUP BY ticker, current_period_end_date
ORDER BY ticker asc ) q
Here is my table

The top query SELECT * FROM (...) q sounds like useless.
Then you can try this :
WITH regression_slope AS(
SELECT
ROW_NUMBER() OVER ( PARTITION BY ticker ORDER BY earnings_growths_ped) AS rownum,
*
FROM "ANALYTICS"."vEARNINGS_GROWTHS"
--WHERE ticker = 'ACN'
ORDER BY ticker )
SELECT
max,
ticker,
current_period_end_date,
max(earnings_growths_ped) AS max_earnings_growths_ped,
--max(rownum) AS max_rownum,
round(regr_slope(old_eps_tfq, rownum)::numeric, 2) AS slope,
round(regr_intercept(old_eps_tfq, rownum)::numeric, 2) AS y_intercept,
round(regr_r2(old_eps_tfq, rownum)::numeric, 3) AS r_squared
FROM regression_slope
INNER JOIN generate_series(4, 24) AS max -- the range 4 to 24 can be adjusted to the need
ON rownum < max
GROUP BY max, ticker, current_period_end_date
ORDER BY max asc, ticker asc

Related

how to order output of arrays when using union

I have a query like this:
SELECT array_agg(candles) as candles FROM ( SELECT * FROM ... ) AS candles
UNION ALL
SELECT array_agg(trades) as trades FROM ( SELECT * FROM ... ) AS trades
UNION ALL
SELECT ...
But then I'll get rows that contain arrays, but the order of the rows doesn't necessarily match the query order.
For example, it is possible that the output will have the trades row before the candles row.
How can I get the rows in a predictable order?
Edit:
updated the query based on the answer but getting an error:
SELECT a FROM
(
SELECT 1 as o, array_agg(candles) as a
FROM (
SELECT ts, open, high, low, close, midpoint, volume
FROM exchange.binance.candles
WHERE instrument = 'BTCUSDT' AND ts >= '2022-04-01 00:00:00' AND ts < '2022-04-01 01:00:00'
ORDER BY ts) AS candles
UNION ALL
SELECT 2 as o, array_agg(trades)
FROM (
SELECT ts, price, quantity, direction
FROM exchange.binance.trades
WHERE instrument = 'BTCUSDT' AND ts >= '2022-04-01 00:00:00' AND ts < '2022-04-01 01:00:00'
ORDER BY ts) AS trades
UNION ALL
SELECT 3 as o, array_agg(kvwap)
FROM (
SELECT ts, price, "interval"
FROM exchange.binance.kvwap
WHERE instrument = 'BTCUSDT' AND "interval" IN ('M5', 'H1', 'H4') AND ts >= '2022-04-01 00:00:00' AND ts < '2022-04-01 01:00:00'
ORDER BY ts) AS kvwap
)
ORDER BY o;
the error is:
[42601] ERROR: subquery in FROM must have an alias Hint: For example, FROM (SELECT ...) [AS] foo. Position: 15
Add a column for ordering to each subquery, but don't include it in the output:
SELECT a FROM (
SELECT 1 as o, array_agg(candles) as a FROM ( SELECT * FROM ... ) c group by 1
UNION ALL
SELECT 2, array_agg(trades) FROM ( SELECT * FROM ... ) t group by 1
UNION ALL
SELECT ...
) x
ORDER BY o
Note that with UNION only the first subquery's column names are relevant - the entire union uses column names from the first subquery - so don't bother providing aliases for the others.

How to group by when using a modulo

For each company, I want to sum the revenue for the 4 most recent quarters, then the 4 subsequent ones, and so on (see screenshot attached for details). How can I do that?
SQL query and result - 1st attempt (failed)
https://i.stack.imgur.com/wWhhb.png
SELECT
ticker,
period_end_date,
revenue,
1+ ((rn - 1) % 4) AS test
FROM (
SELECT
ticker,
period_end_date,
revenue,
ROW_NUMBER() OVER (PARTITION BY ticker ORDER BY period_end_date DESC) rn
FROM "ANALYTICS"."vQUARTERLY_MASTER_MATERIALIZED"
--WHERE ticker = 'ACN'
ORDER BY ticker
) q
EDIT: the following code meets my needs. The 'revenue' is summed using the most recent quarter and the 3 quarters thereafter.
SELECT
ticker,
period_end_date,
SUM(revenue) OVER (PARTITION BY ticker ORDER BY period_end_date DESC ROWS BETWEEN CURRENT ROW AND 3 FOLLOWING) AS total_revenue
FROM "ANALYTICS"."vQUARTERLY_MASTER_MATERIALIZED"
--WHERE ticker = 'ACN'
ORDER BY ticker
You can try this :
SELECT ticker
, period_end_date
, total_revenue
FROM (
SELECT ticker
, period_end_date
, SUM(revenue) OVER (PARTITION BY ticker ORDER BY period_end_date DESC ROWS BETWEEN CURRENT ROW AND 3 FOLLOWING) AS total_revenue
, max(period_end_date) OVER (PARTITION BY ticker) AS period_end_date_max
FROM "ANALYTICS"."vQUARTERLY_MASTER_MATERIALIZED"
--WHERE ticker = 'ACN
) q
WHERE EXTRACT(MONTH FROM period_end_date) = EXTRACT(MONTH FROM period_end_date_max)
ORDER BY ticker, period_end_date ASC

How to PIVOT this query and display only TOP 10 records filtered by SUM(NetWrittenPremium) DESC

In this query I cant understand what would be the proper syntax to PIVOT it by month and also display just top 10 records based on SUM(NetWrittenPremium).
;with cte_TopClasses
AS (
select
b.YearNum,
b.MonthNum,
REPLACE(ClassCode,'+','') + ' - '+ QLL.Description as Description,
SUM( Premium) as NetWrittenPremium
FROM tblCalendar b
LEFT JOIN ProductionReportMetrics prm ON b.MonthNum=Month(prm.EffectiveDate) AND b.YearNum = YEAR(EffectiveDate)
AND prm.EffectiveDate >=DateAdd(yy, -1, DATEADD(d, 1, EOMONTH(GETDATE()))) AND prm.EffectiveDate <= EOMONTH(GETDATE()) AND CompanyLine = 'Ironshore Insurance Company'
LEFT JOIN NetRate_Quote_Insur_Quote Q ON prm.NetRate_QuoteID = Q.QuoteID
LEFT JOIN NetRate_Quote_Insur_Quote_Locat QL ON Q.QuoteID = QL.QuoteID
LEFT JOIN (SELECT * FROM NetRate_Quote_Insur_Quote_Locat_Liabi nqI
JOIN ( SELECT LocationID as LocID, MAX(ClassCode) as ClCode
FROM NetRate_Quote_Insur_Quote_Locat_Liabi GROUP BY LocationID ) nqA
ON nqA.LocID = nqI.LocationID AND nqA.ClCode = nqI.ClassCode ) QLL
ON QLL.LocationID = QL.LocationID
WHERE ( b.YearNum = YEAR(GETDATE())-1 and b.MonthNum >= MONTH(GETDATE())+1 ) OR
( b.YearNum = YEAR(GETDATE()) and b.MonthNum <= MONTH(GETDATE()) )
GROUP BY b.YearNum,b.MonthNum,ClassCode, QLL.Description
)
SELECT
--TOP 10
RANK() OVER (ORDER BY NetWrittenPremium DESC) AS Rank, *
FROM cte_TopClasses
WHERE Description IS NOT NULL
ORDER BY NetWrittenPremium DESC,YearNum,MonthNum
The result should look something like that:
If I use the query below and then using matrics in SSRS to PIVOT it - then after grouping by Description it only displays me 2 Description.
;with cte_TopClasses
AS (
select
b.YearNum,
b.MonthNum,
REPLACE(ClassCode,'+','') + ' - '+ QLL.Description as Description,
SUM( Premium) as NetWrittenPremium
FROM tblCalendar b
LEFT JOIN ProductionReportMetrics prm ON b.MonthNum=Month(prm.EffectiveDate) AND b.YearNum = YEAR(EffectiveDate)
AND prm.EffectiveDate >=DateAdd(yy, -1, DATEADD(d, 1, EOMONTH(GETDATE()))) AND prm.EffectiveDate <= EOMONTH(GETDATE()) AND CompanyLine = 'Ironshore Insurance Company'
LEFT JOIN NetRate_Quote_Insur_Quote Q ON prm.NetRate_QuoteID = Q.QuoteID
LEFT JOIN NetRate_Quote_Insur_Quote_Locat QL ON Q.QuoteID = QL.QuoteID
LEFT JOIN (SELECT * FROM NetRate_Quote_Insur_Quote_Locat_Liabi nqI
JOIN ( SELECT LocationID as LocID, MAX(ClassCode) as ClCode
FROM NetRate_Quote_Insur_Quote_Locat_Liabi GROUP BY LocationID ) nqA
ON nqA.LocID = nqI.LocationID AND nqA.ClCode = nqI.ClassCode ) QLL
ON QLL.LocationID = QL.LocationID
WHERE ( b.YearNum = YEAR(GETDATE())-1 and b.MonthNum >= MONTH(GETDATE())+1 ) OR
( b.YearNum = YEAR(GETDATE()) and b.MonthNum <= MONTH(GETDATE()) )
GROUP BY b.YearNum,b.MonthNum,ClassCode, QLL.Description
)
SELECT *
FROM (SELECT RANK() OVER (ORDER BY NetWrittenPremium DESC) AS Rank, *
FROM cte_TopClasses
WHERE Description IS NOT NULL) AA
WHERE AA.Rank <= 10
ORDER BY AA.NetWrittenPremium DESC, AA.YearNum, AA.MonthNum
And the result of it in SSRS matrics :
You could try something like this at the end of the query, rather than what is there now:
SELECT *
FROM (SELECT RANK() OVER (ORDER BY [Description] DESC) AS Rank, *
FROM cte_TopClasses
WHERE Description IN (SELECT [Description]
FROM (SELECT RANK() OVER (ORDER BY SUM(NetWrittenPremium) DESC) AS [Rank], [Description], SUM(NetWrittenPremium) AS total
FROM cte_TopClasses
WHERE [Description] IS NOT NULL
GROUP BY [Description]) BB
WHERE [Rank] <= 10)) AA
ORDER BY YearNum, MonthNum
This wraps the query in a SELECT, and filters the ranked results to the 10 you want.
Then use a matrix in the report to pivot the results.

Is T-SQL (2005) RANK OVER(PARTITION BY) the answer?

I have a stored procedure that does paging for the front end and is working fine. I now need to modify that procedure to group by four columns of the 20 returned and then only return the row within each group that contains the lowest priority. So when resort_id, bedrooms, kitchen and checkin (date) all match then only return the row that has the min priority. I have to still maintain the paging functionality. The #startIndex and #upperbound are parms passed into the procedure from the front end for paging. I’m thinking that RANK OVER (PARTITION BY) is the answer I just can’t quite figure out how to put it all together.
SELECT I.id,
I.resort_id,
I.[bedrooms],
I.[kitchen],
I.[checkin],
I.[priority],
I.col_1,
I.col_2 /* ..... (more cols) */
FROM (
SELECT ROW_NUMBER() OVER(ORDER by checkin) AS rowNumber,
*
FROM Inventory
) AS I
WHERE rowNumber >= #startIndex
AND rowNumber < #upperBound
ORDER BY rowNumber
Example 2 after fix:
SELECT I.resort_id,
I.[bedrooms],
I.[kitchen],
I.[checkin],
I.[priority],
I.col_1,
I.col_2 /* ..... (more cols) */
FROM Inventory i
JOIN
(
SELECT ROW_NUMBER() OVER(ORDER BY h.checkin) as rowNumber, MIN(h.id) as id
FROM Inventory h
JOIN (
SELECT resort_id, bedrooms, kitchen, checkin, id, MIN(priority) as priority
FROM Inventory
GROUP BY resort_id, bedrooms, kitchen, checkin, id
) h2 on h.resort_id = h2.resort_id and
h.bedrooms = h2.bedrooms and
h.kitchen = h2.kitchen and
h.checkin = h2.checkin and
h.priority = h2.priority
GROUP BY h.resort_id, h.bedrooms, h.kitchen, h.checkin, h.priority
) AS I2
on i.id = i2.id
WHERE rowNumber >= #startIndex
AND rowNumber < #upperBound
ORDER BY rowNumber
I would accompish it this way.
SELECT I.resort_id,
I.[bedrooms],
I.[kitchen],
I.[checkin],
I.[priority],
I.col_1,
I.col_2 /* ..... (more cols) */
FROM Inventory i
JOIN
(
SELECT ROW_NUMBER(ORDER BY Checkin) as rowNumber, MIN(id) id
FROM Inventory h
JOIN (
SELECT resort_id, bedrooms, kitchen, checkin id, MIN(priority) as priority
FROM Inventory
GROUP BY resort_id, bedrooms, kitchen, checkin
) h2 on h.resort_id = h2.resort and
h.bedrooms = h2.bedrooms and
h.kitchen = h2.kitchen and
h.checkin = h2.checkin and
h.priority = h2.priority
GROUP BY h.resort_id, h.bedrooms, h.kitchen, h.checkin, h.priority
) AS I2
on i.id = i2.id
WHERE rowNumber >= #startIndex
AND rowNumber < #upperBound
ORDER BY rowNumber

Need to retrieve n-rows that are not at the beginning or in the end of the selected list

I have written sql statement :
select * from (
select count(*) as NumberofSignals,signals.transmitter_account,signals.class,signals.type,signals.signal_mode,
signals.area_id,signals.sector_id,signals.region_info_id,signals.zone_info_id,signals.user_id,signals.device_id,
signals.panel_name,signals.panel_id,signals.sector_name,signals.region_code,signals.area_name,signals.zone_code,
signals.description,signals.transmitter_name,signals.transmitter_id,signals.color,'event' as Event,get_name(signals.id,'event') as event_value,
'packetnumber' as packetnumber,get_name(signals.id,'packetnumber') as packetnumber_value,wm_concat(distinct get_name(signals.id,'repeater')) as repeater,
round(avg(get_name(signals.id,'signallevel'))) as avg_signallevel,min(to_char(signals.signal_forming_time, 'yyyy/mm/dd hh24:mi:ss')) as formingtime,
get_name(signals.id,'address') as address,get_name(signals.id,'username') as username,get_name(signals.id,'chaneltype') as channeltype,
get_name(signals.id,'code') as code,get_name(signals.id,'account') as account
from signals,signal_custom_fields where signals.id = signal_custom_fields.signal_id and
signals.id in (select id from (select id,rownum num from((select signals.id
from signals,signal_custom_fields where signal_custom_fields.field_name = 'event'
and signal_custom_fields.field_value is not null and signals.id = signal_custom_fields.signal_id
and signals.signal_forming_time >= to_date('2011/5/10 14:34:44', 'yyyy/mm/dd hh24:mi:ss')
AND signals.signal_forming_time <= to_date('2011/5/10 15:34:44', 'yyyy/mm/dd hh24:mi:ss'))
intersect (select distinct signals.id from signals,signal_custom_fields
where signal_custom_fields.field_name = 'packetnumber' and signal_custom_fields.field_value is not null
and signals.id = signal_custom_fields.signal_id
and signals.signal_forming_time >= to_date('2011/5/10 14:34:44', 'yyyy/mm/dd hh24:mi:ss')
AND signals.signal_forming_time <= to_date('2011/5/10 15:34:44', 'yyyy/mm/dd hh24:mi:ss')))
order by id desc)) group by 'event',signals.transmitter_account,signals.class,
signals.type,signals.signal_mode,signals.area_id,signals.sector_id,signals.region_info_id,signals.zone_info_id,
signals.user_id,signals.device_id,signals.panel_name,signals.panel_id,signals.sector_name,signals.region_code,
signals.area_name,signals.zone_code,signals.description,signals.transmitter_name,signals.transmitter_id,
signals.color, get_name(signals.id,'event'), 'packetnumber',get_name(signals.id,'username'),
get_name(signals.id,'chaneltype'),
get_name(signals.id,'code'),
get_name(signals.id,'account'), get_name(signals.id,'packetnumber'),get_name(signals.id,'address'),
TO_CHAR(signals.signal_forming_time ,'dd/mm/yyyy hh24'),
TRUNC(to_number(to_char(signals.signal_forming_time ,'mi'))/(30))
order by event)where rownum < 300
and here i get the first 300 rows, but how i need to rewright this statment to retrieve second 300 rows ???
Your query doesn't have the rownum listed in the first nested table. Add a rownum column in the first nested table then you can do a between function in the where clause at the top level:
--create a demo table
DROP TABLE paging_test;
CREATE TABLE paging_test AS
(SELECT rownum x FROM user_tables
);
--count how many records exist (in my case there is 821)
SELECT COUNT(*)
FROM paging_test;
--get the first 300 rows
SELECT *
FROM
(SELECT rownum rn, x FROM paging_test ORDER BY x
) pt
WHERE pt.rn BETWEEN 1 AND 300 ;
--get the next 300 rows
SELECT *
FROM
(SELECT rownum rn, x FROM paging_test ORDER BY x
) pt
WHERE pt.rn BETWEEN 300 AND 600 ;
You might also be interested in my reference:
References:
http://asktom.oracle.com/pls/asktom/f?p=100:11:0::::P11_QUESTION_ID:948366252775