Count the number of instances the time is above average time - postgresql

Here is my code:
arrival_cluster_raw as (
SELECT
routes.uc_id ,
cg.cluster_id ,
cg.cluster_centroid ,
routes.imei ,
routes.time_created::date as campaign_date,
min(routes.time_created) as m_per_imei_cluster
FROM cluster_groups as cg
group by 1,2,3,4,5
)
,
arrival_cluster_final as
(
select uc_id, campaign_date, cluster_id, cluster_centroid , date_trunc('second', AVG(m_per_imei_cluster::TIME)) as avg_arrival_time,
count(case when m_per_imei_cluster::TIME < (select AVG(m_per_imei_cluster::TIME) from arrival_cluster_raw) then 1 else null END) as "num_of_arrival_teams_before_avg_time"
,count(case when m_per_imei_cluster::TIME > (select AVG(m_per_imei_cluster::TIME) from arrival_cluster_raw) then 1 else null END) as "num_of_arrival_teams_after_avg_time"
FROM arrival_cluster_raw
group by uc_id,cluster_id, cluster_centroid ,campaign_date
)
The problem is that in the "arrival_cluster_final", the average value of the entire cluster
is being compared whereas I want to compare the average value for the combination of uc_id,cluster_id, cluster_centroid ,campaign_date

--can you try this one.
WITH arrival_cluster_raw AS (
SELECT
routes.uc_id,
cg.cluster_id,
cg.cluster_centroid,
routes.imei,
routes.time_created::date AS campaign_date,
min(routes.time_created) AS m_per_imei_cluster
FROM
cluster_groups AS cg
JOIN routes ON routes.uc_id = cg.id --assume the way you want join.
GROUP BY
1,2,3,4,5
),
arrival_cluster_final AS (
SELECT
uc_id,
cluster_id,
cluster_centroid,
imei,
campaign_date,
date_trunc('second', (avg(m_per_imei_cluster) OVER w))
,count( CASE WHEN (avg(m_per_imei_cluster) OVER w) < m_per_imei_cluster THEN
1
ELSE
NULL
END) AS num_of_arrival_teams_before_avg_time
,count(
CASE WHEN (avg(m_per_imei_cluster) OVER w) > m_per_imei_cluster THEN
1
ELSE
NULL
END) AS num_of_arrival_teams_after_avg_time
FROM
arrival_cluster_raw
WINDOW w AS (PARTITION BY uc_id,
cluster_id,
cluster_centroid,
campaign_date))
SELECT * FROM arrival_cluster_final ORDER BY 1;

Related

Postgresql, set order by desc or asc depending on variable parse into function

I have a function that takes product pricing data from today and yesterday and works out the difference, orders it by price_delta_percentage and then limits to 5. Now currently I order by price_delta_percentage DESC which returns the top 5 products that have increased in price since yesterday.
I would like to parse in a variable - sort - to change the function to either sort by DESC, or ASC. I have tried to use IF statements and get syntax errors and CASE statements which states that price_delta_percentage doesn't exist.
Script:
RETURNS TABLE(
product_id varchar,
name varchar,
price_today numeric,
price_yesterday numeric,
price_delta numeric,
price_delta_percentage numeric
)
LANGUAGE 'sql'
COST 100
STABLE STRICT PARALLEL SAFE
AS $BODY$
WITH cte AS (
SELECT
product_id,
name,
SUM(CASE WHEN rank = 1 THEN trend_price ELSE NULL END) price_today,
SUM(CASE WHEN rank = 2 THEN trend_price ELSE NULL END) price_yesterday,
SUM(CASE WHEN rank = 1 THEN trend_price ELSE 0 END) - SUM(CASE WHEN rank = 2 THEN trend_price ELSE 0 END) as price_delta,
ROUND(((SUM(CASE WHEN rank = 1 THEN trend_price ELSE NULL END) / SUM(CASE WHEN rank = 2 THEN trend_price ELSE NULL END) - 1) * 100), 2) as price_delta_percentage
FROM (
SELECT
magic_sets_cards.name,
pricing.product_id,
pricing.trend_price,
pricing.date,
RANK() OVER (PARTITION BY product_id ORDER BY date DESC) AS rank
FROM pricing
JOIN magic_sets_cards_identifiers ON magic_sets_cards_identifiers.mcm_id = pricing.product_id
JOIN magic_sets_cards ON magic_sets_cards.id = magic_sets_cards_identifiers.card_id
JOIN magic_sets ON magic_sets.id = magic_sets_cards.set_id
WHERE date BETWEEN CURRENT_DATE - days AND CURRENT_DATE
AND magic_sets.code = set_code
AND pricing.trend_price > 0.25) p
WHERE rank IN (1,2)
GROUP BY product_id, name
ORDER BY price_delta_percentage DESC)
SELECT * FROM cte WHERE (CASE WHEN price_today IS NULL OR price_yesterday IS NULL THEN 'NULL' ELSE 'VALID' END) !='NULL'
LIMIT 5;
$BODY$;sql
CASE Statement:
ORDER BY CASE WHEN sort = 'DESC' THEN price_delta_percentage END DESC, CASE WHEN sort = 'ASC' THEN price_delta_percentage END ASC)
Error:
ERROR: column "price_delta_percentage" does not exist
LINE 42: ORDER BY CASE WHEN sort = 'DESC' THEN price_delta_percenta...
You can't use CASE to decide between ASC and DESC like that. Those labels are not data, they are part of the SQL grammar. You would need to do it by combining the text into a string and then executing the string as a dynamic query, which means you would need to use pl/pgsql, not SQL
But since your column is numeric, you could just order by the product of the column and an indicator variable which is either 1 or -1.

how to select top 10 without duplicates

Using SQL Server 2012
I need to select TOP 10 Producer based on a ProducerCode. But the data is messed up, users were entering same Producers just spelled differently and with the same ProducerCode.
So I just need TOP 10, so if the ProducerCode is repeating, I just want to pick the first one in a list.
How can I achieve that?
Sample of my data
;WITH cte_TopWP --T
AS
(
SELECT distinct ProducerCode, Producer,SUM(premium) as NetWrittenPremium,
SUM(CASE WHEN PolicyType = 'New Business' THEN Premium ELSE 0 END) as NewBusiness1,
SUM(CASE WHEN PolicyType = 'Renewal' THEN Premium ELSE 0 END) as Renewal1,
SUM(CASE WHEN PolicyType = 'Rewrite' THEN Premium ELSE 0 END) as Rewrite1
FROM ProductionReportMetrics
WHERE YEAR(EffectiveDate) = 2016 AND TransactionType = 'Policy' AND CompanyLine = 'Arch Insurance Company'--AND ProducerType = 'Wholesaler'
GROUP BY ProducerCode,Producer
)
,
cte_Counts --C
AS
(
SELECT distinct ProducerCode, ProducerName, COUNT (distinct ControlNo) as Submissions2,
SUM(CASE WHEN QuotedPremium IS NOT NULL THEN 1 ELSE 0 END) as Quoted2,
SUM(CASE WHEN Type = 'New Business' AND Status IN ('Bound','Cancelled','Notice of Cancellation') THEN 1 ELSE 0 END ) as NewBusiness2,
SUM(CASE WHEN Type = 'Renewal' AND Status IN ('Bound','Cancelled','Notice of Cancellation') THEN 1 ELSE 0 END ) as Renewal2,
SUM(CASE WHEN Type = 'Rewrite' AND Status IN ('Bound','Cancelled','Notice of Cancellation') THEN 1 ELSE 0 END ) as Rewrite2,
SUM(CASE WHEN Status = 'Declined' THEN 1 ELSE 0 END ) as Declined2
FROM ClearanceReportMetrics
WHERE YEAR(EffectiveDate)=2016 AND CompanyLine = 'Arch Insurance Company'
GROUP BY ProducerCode,ProducerName
)
SELECT top 10 RANK() OVER (ORDER BY NetWrittenPremium desc) as Rank,
t.ProducerCode,
c.ProducerName as 'Producer',
NetWrittenPremium,
t.NewBusiness1,
t.Renewal1,
t.Rewrite1,
c.[NewBusiness2]+c.[Renewal2]+c.[Rewrite2] as PolicyCount,
c.Submissions2,
c.Quoted2,
c.[NewBusiness2],
c.Renewal2,
c.Rewrite2,
c.Declined2
FROM cte_TopWP t --LEFT OUTER JOIN tblProducers p on t.ProducerCode=p.ProducerCode
LEFT OUTER JOIN cte_Counts c ON t.ProducerCode=c.ProducerCode
You should use ROW_NUMBER to fix your issue.
https://msdn.microsoft.com/en-us/library/ms186734.aspx
A good example of this is the following answer:
https://dba.stackexchange.com/a/22198
Here's the code example from the answer.
SELECT * FROM
(
SELECT acss_lookup.ID AS acss_lookupID,
ROW_NUMBER() OVER
(PARTITION BY your_distinct_column ORDER BY any_column_you_think_is_appropriate)
as num,
acss_lookup.product_lookupID AS acssproduct_lookupID,
acss_lookup.region_lookupID AS acssregion_lookupID,
acss_lookup.document_lookupID AS acssdocument_lookupID,
product.ID AS product_ID,
product.parent_productID AS productparent_product_ID,
product.label AS product_label,
product.displayheading AS product_displayheading,
product.displayorder AS product_displayorder,
product.display AS product_display,
product.ignorenewupdate AS product_ignorenewupdate,
product.directlink AS product_directlink,
product.directlinkURL AS product_directlinkURL,
product.shortdescription AS product_shortdescription,
product.logo AS product_logo,
product.thumbnail AS product_thumbnail,
product.content AS product_content,
product.pdf AS product_pdf,
product.language_lookupID AS product_language_lookupID,
document.ID AS document_ID,
document.shortdescription AS document_shortdescription,
document.language_lookupID AS document_language_lookupID,
document.document_note AS document_document_note,
document.displayheading AS document_displayheading
FROM acss_lookup
INNER JOIN product ON (acss_lookup.product_lookupID = product.ID)
INNER JOIN document ON (acss_lookup.document_lookupID = document.ID)
)a
WHERE a.num = 1
ORDER BY product_displayheading ASC;
You could do this:
SELECT ProducerCode, MIN(Producer) AS Producer, ...
GROUP BY ProducerCode

Performance tune tSQL Query count(*) & subqueries

I know that there's a better way to do what I'm trying to accomplish here. Though the query works I fear it's performance will suffer as the dataset's it is applied to grow.
I don't even necesarily need someone to rewrite what I have if they would just be willing to point me in the direction of the topic I should study I would greatly appreciate it.
What I'm trying to return with this query is a count of the number of records at or above a certain status.
Thanks in advance for your help!
SELECT
( SELECT count(*)
FROM TABLE1 c1
WHERE ( c1.U_KEY3 NOT LIKE 'z%' AND (c1.U_KEY1 = '' or c1.U_KEY1 IS NULL) )
) AS 'STATUS is EMPTY'
,
( SELECT count(*)
FROM TABLE1 c1
WHERE ( c1.U_KEY3 NOT LIKE 'z%' AND LEFT(c1.U_KEY1,2) >= '70' )
) AS 'STATUS > 70'
,
( SELECT count(*)
FROM TABLE1 c1
WHERE ( c1.U_KEY3 NOT LIKE 'z%' AND LEFT(c1.U_KEY1,2) >= '50' )
) AS 'STATUS > 50'
,
( SELECT count(*)
FROM TABLE1 c1
WHERE ( c1.U_KEY3 NOT LIKE 'z%' AND LEFT(c1.U_KEY1,2) >= '30' )
) AS 'STATUS > 30'
,
( SELECT count(*)
FROM TABLE1 c1
WHERE ( c1.U_KEY3 NOT LIKE 'z%' AND LEFT(c1.U_KEY1,2) >= '10' )
) AS 'STATUS > 10'
You could roll all the subqueries into a single query using a CASE statement:
SELECT
SUM(CASE WHEN c1.U_KEY1 = '' OR c1.U_KEY1 IS NULL THEN 1 ELSE 0 END) AS 'STATUS IS EMPTY',
SUM(CASE WHEN LEFT(c1.U_KEY1,2) >= '70' THEN 1 ELSE 0 END) AS 'STATUS > 70',
SUM(CASE WHEN LEFT(c1.U_KEY1,2) >= '50' THEN 1 ELSE 0 END) AS 'STATUS > 50',
SUM(CASE WHEN LEFT(c1.U_KEY1,2) >= '30' THEN 1 ELSE 0 END) AS 'STATUS > 30',
SUM(CASE WHEN LEFT(c1.U_KEY1,2) >= '10' THEN 1 ELSE 0 END) AS 'STATUS > 10'
FROM TABLE1 c1
WHERE c1.U_KEY3 NOT LIKE 'z%'
But this might not run as fast as the individual subqueries.
I would turn the question around like this:
DECLARE #t TABLE (Id INT, U_Key1 VARCHAR(4) null);
INSERT INTO #t (id,U_Key1)
VALUES
(1,null),
(2,'902'),
(3,'452'),
(4,'401'),
(5,'103'),
(6,'359'),
(7,'335'),
(8,'772'),
(9,'143'),
(10,'222'),
(11,'664'),
(12,'992'),
(13,'122'),
(14,'332'),
(15,'421'),
(16,'622'),
(17,'982'),
(18,'1234'),
(19,null),
(20,'012');
WITH A AS (
SELECT CAST(LEFT(U_Key1,2) AS INT) val FROM #t
), limits AS (
SELECT 10 limitval, 'Status >= 10' limittext
UNION ALL
SELECT 30 , 'Status >= 30'
UNION ALL
SELECT 50 , 'Status >= 50'
UNION ALL
SELECT 70 , 'Status >= 70'
), Counts AS (
SELECT 'Status is empty' Limittext, COUNT(id) Count FROM #t
WHERE U_Key1 IS null
UNION ALL
SELECT l.limittext, COUNT( A.val) Count FROM A
CROSS JOIN limits l
WHERE A.val >= l.limitval
GROUP BY l.limittext
)
SELECT * FROM Counts
That produces the result:
Status is empty 2
Status >= 10 17
Status >= 30 12
Status >= 50 6
Status >= 70 4

Count Distinct with Answer side by side instead of underneath

Here is my query:
SELECT substring(date,1,10), count(distinct id),
CASE WHEN name IS NOT NULL THEN 1 ELSE 0 END
FROM table
WHERE (date >= '2015-09-01')
GROUP BY substring(date,1,10), CASE WHEN name IS NOT NULL THEN 1 ELSE 0 END
ORDER BY substring(date,1,10)
This is my result:
substring count case
2015-09-01 20472 0
2015-09-01 7 1
2015-09-02 20465 0
2015-09-02 470 1
What I want it to look like is this:
substring count count
2015-09-01 20472 7
2015-09-02 20465 470
Thank you!
With PostgreSQL 9.4 or newer, we can filter directly an aggregate with the new FILTER clause:
SELECT substring(date,1,10),
count(distinct id),
count(*) FILTER (WHERE name IS NOT NULL)
FROM table
WHERE (date >= '2015-09-01')
GROUP BY 1
ORDER BY 1
SELECT substring(date,1,10)
, count(distinct CASE WHEN name IS NOT NULL THEN id ELSE null END ) AS count1
, count(distinct CASE WHEN name IS NOT NULL THEN null ELSE id END ) AS count2
FROM event
WHERE (date >= '2015-09-01')
GROUP BY substring(date,1,10)
ORDER BY substring(date,1,10)
This gave me an answer like this: (which is exactly what I wanted so thank you so much)
substring count1 count2
2015-09-01 7 20472
2015-09-02 470 20465
Use case in count to get columns for some condition (name IS NOT NULL), like this:
SELECT substring(date,1,10)
, count(distinct CASE WHEN name IS NOT NULL THEN id ELSE null END ) AS count1
, count(distinct CASE WHEN name IS NOT NULL THEN null ELSE id END ) AS count2
FROM table
WHERE (date >= '2015-09-01')
GROUP BY substring(date,1,10)
ORDER BY substring(date,1,10)
you can also use subquery to create columns:
SELECT dt, Count(id1) count1, Count(distinct id2) count2
FROM (
SELECT distinct substring(date,1,10) AS dt
, CASE WHEN name IS NOT NULL THEN id ELSE null END AS id1
, CASE WHEN name IS NOT NULL THEN null ELSE id END AS id2,
FROM table
WHERE (date >= '2015-09-01')) d
GROUP BY dt
ORDER BY dt

T-SQL group by partition

I have below table in SQL server 2008.Please help to get expected output
Thanks.
CREATE TABLE [dbo].[Test]([Category] [varchar](10) NULL,[Value] [int] NULL,
[Weightage] [int] NULL,[Rn] [smallint] NULL ) ON [PRIMARY]
insert into Test values ('Cat1',310,674,1),('Cat1',783,318,2),('Cat1',310,96,3),('Cat1',109,917,4),('Cat2',441,397,1),('Cat2',637,725,2),('Cat2',460,742,3),('Cat2',542,583,4),('Cat2',601,162,5),('Cat2',45,719,6),('Cat2',46,305,7),('Cat3',477,286,1),('Cat3',702,484,2),('Cat3',797,836,3),('Cat3',541,890,4),('Cat3',750,962,5),('Cat3',254,407,6),('Cat3',136,585,7),('Cat3',198,477,8),('Cat4',375,198,1),('Cat4',528,351,2),('Cat4',845,380,3),('Cat4',716,131,4),('Cat4',781,919,5)
For per category Average Weightage
SELECT
Category,
AVG(Value),
SUM(CASE WHEN RN<4 THEN Weightage ELSE 0 END) / (NULLIF(SUM(CASE WHEN RN<4 THEN 1 ELSE 0 END), 0))
FROM
MyTable
GROUP BY
Category
Average Weightage over the whole set
SELECT
M.Category,
AVG(Value),
foo.AvgWeightage
FROM
MyTable M
CROSS JOIN
(SELECT AVG(Weightage) As AvgWeightage FROM MyTable WHERE Rn < 4) foo
GROUP BY
M.Category, foo.AvgWeightage
Simple:)
SELECT Category,
AVG(Value) AS AvgValue,
AVG(CASE WHEN RN< 4 THEN (Weightage) END ) AS AvgWeightage
FROM Test
GROUP BY Category
Try this
SELECT AvgValue.Category, AvgValue.AvgValue, AvgWeight.Weight
FROM(
(SELECT c.Category,
AVG(c.Value) AS AvgValue
FROM Test c
GROUP BY Category) AvgValue
INNER JOIN
(SELECT Category, AVG(Weightage) AS Weight
FROM Test
WHERE Rn < 4
GROUP BY Category) AvgWeight
ON AvgValue.Category = AvgWeight.Category)