Count Distinct with Answer side by side instead of underneath - postgresql

Here is my query:
SELECT substring(date,1,10), count(distinct id),
CASE WHEN name IS NOT NULL THEN 1 ELSE 0 END
FROM table
WHERE (date >= '2015-09-01')
GROUP BY substring(date,1,10), CASE WHEN name IS NOT NULL THEN 1 ELSE 0 END
ORDER BY substring(date,1,10)
This is my result:
substring count case
2015-09-01 20472 0
2015-09-01 7 1
2015-09-02 20465 0
2015-09-02 470 1
What I want it to look like is this:
substring count count
2015-09-01 20472 7
2015-09-02 20465 470
Thank you!

With PostgreSQL 9.4 or newer, we can filter directly an aggregate with the new FILTER clause:
SELECT substring(date,1,10),
count(distinct id),
count(*) FILTER (WHERE name IS NOT NULL)
FROM table
WHERE (date >= '2015-09-01')
GROUP BY 1
ORDER BY 1

SELECT substring(date,1,10)
, count(distinct CASE WHEN name IS NOT NULL THEN id ELSE null END ) AS count1
, count(distinct CASE WHEN name IS NOT NULL THEN null ELSE id END ) AS count2
FROM event
WHERE (date >= '2015-09-01')
GROUP BY substring(date,1,10)
ORDER BY substring(date,1,10)
This gave me an answer like this: (which is exactly what I wanted so thank you so much)
substring count1 count2
2015-09-01 7 20472
2015-09-02 470 20465

Use case in count to get columns for some condition (name IS NOT NULL), like this:
SELECT substring(date,1,10)
, count(distinct CASE WHEN name IS NOT NULL THEN id ELSE null END ) AS count1
, count(distinct CASE WHEN name IS NOT NULL THEN null ELSE id END ) AS count2
FROM table
WHERE (date >= '2015-09-01')
GROUP BY substring(date,1,10)
ORDER BY substring(date,1,10)
you can also use subquery to create columns:
SELECT dt, Count(id1) count1, Count(distinct id2) count2
FROM (
SELECT distinct substring(date,1,10) AS dt
, CASE WHEN name IS NOT NULL THEN id ELSE null END AS id1
, CASE WHEN name IS NOT NULL THEN null ELSE id END AS id2,
FROM table
WHERE (date >= '2015-09-01')) d
GROUP BY dt
ORDER BY dt

Related

Count the number of instances the time is above average time

Here is my code:
arrival_cluster_raw as (
SELECT
routes.uc_id ,
cg.cluster_id ,
cg.cluster_centroid ,
routes.imei ,
routes.time_created::date as campaign_date,
min(routes.time_created) as m_per_imei_cluster
FROM cluster_groups as cg
group by 1,2,3,4,5
)
,
arrival_cluster_final as
(
select uc_id, campaign_date, cluster_id, cluster_centroid , date_trunc('second', AVG(m_per_imei_cluster::TIME)) as avg_arrival_time,
count(case when m_per_imei_cluster::TIME < (select AVG(m_per_imei_cluster::TIME) from arrival_cluster_raw) then 1 else null END) as "num_of_arrival_teams_before_avg_time"
,count(case when m_per_imei_cluster::TIME > (select AVG(m_per_imei_cluster::TIME) from arrival_cluster_raw) then 1 else null END) as "num_of_arrival_teams_after_avg_time"
FROM arrival_cluster_raw
group by uc_id,cluster_id, cluster_centroid ,campaign_date
)
The problem is that in the "arrival_cluster_final", the average value of the entire cluster
is being compared whereas I want to compare the average value for the combination of uc_id,cluster_id, cluster_centroid ,campaign_date
--can you try this one.
WITH arrival_cluster_raw AS (
SELECT
routes.uc_id,
cg.cluster_id,
cg.cluster_centroid,
routes.imei,
routes.time_created::date AS campaign_date,
min(routes.time_created) AS m_per_imei_cluster
FROM
cluster_groups AS cg
JOIN routes ON routes.uc_id = cg.id --assume the way you want join.
GROUP BY
1,2,3,4,5
),
arrival_cluster_final AS (
SELECT
uc_id,
cluster_id,
cluster_centroid,
imei,
campaign_date,
date_trunc('second', (avg(m_per_imei_cluster) OVER w))
,count( CASE WHEN (avg(m_per_imei_cluster) OVER w) < m_per_imei_cluster THEN
1
ELSE
NULL
END) AS num_of_arrival_teams_before_avg_time
,count(
CASE WHEN (avg(m_per_imei_cluster) OVER w) > m_per_imei_cluster THEN
1
ELSE
NULL
END) AS num_of_arrival_teams_after_avg_time
FROM
arrival_cluster_raw
WINDOW w AS (PARTITION BY uc_id,
cluster_id,
cluster_centroid,
campaign_date))
SELECT * FROM arrival_cluster_final ORDER BY 1;

Postgresql, set order by desc or asc depending on variable parse into function

I have a function that takes product pricing data from today and yesterday and works out the difference, orders it by price_delta_percentage and then limits to 5. Now currently I order by price_delta_percentage DESC which returns the top 5 products that have increased in price since yesterday.
I would like to parse in a variable - sort - to change the function to either sort by DESC, or ASC. I have tried to use IF statements and get syntax errors and CASE statements which states that price_delta_percentage doesn't exist.
Script:
RETURNS TABLE(
product_id varchar,
name varchar,
price_today numeric,
price_yesterday numeric,
price_delta numeric,
price_delta_percentage numeric
)
LANGUAGE 'sql'
COST 100
STABLE STRICT PARALLEL SAFE
AS $BODY$
WITH cte AS (
SELECT
product_id,
name,
SUM(CASE WHEN rank = 1 THEN trend_price ELSE NULL END) price_today,
SUM(CASE WHEN rank = 2 THEN trend_price ELSE NULL END) price_yesterday,
SUM(CASE WHEN rank = 1 THEN trend_price ELSE 0 END) - SUM(CASE WHEN rank = 2 THEN trend_price ELSE 0 END) as price_delta,
ROUND(((SUM(CASE WHEN rank = 1 THEN trend_price ELSE NULL END) / SUM(CASE WHEN rank = 2 THEN trend_price ELSE NULL END) - 1) * 100), 2) as price_delta_percentage
FROM (
SELECT
magic_sets_cards.name,
pricing.product_id,
pricing.trend_price,
pricing.date,
RANK() OVER (PARTITION BY product_id ORDER BY date DESC) AS rank
FROM pricing
JOIN magic_sets_cards_identifiers ON magic_sets_cards_identifiers.mcm_id = pricing.product_id
JOIN magic_sets_cards ON magic_sets_cards.id = magic_sets_cards_identifiers.card_id
JOIN magic_sets ON magic_sets.id = magic_sets_cards.set_id
WHERE date BETWEEN CURRENT_DATE - days AND CURRENT_DATE
AND magic_sets.code = set_code
AND pricing.trend_price > 0.25) p
WHERE rank IN (1,2)
GROUP BY product_id, name
ORDER BY price_delta_percentage DESC)
SELECT * FROM cte WHERE (CASE WHEN price_today IS NULL OR price_yesterday IS NULL THEN 'NULL' ELSE 'VALID' END) !='NULL'
LIMIT 5;
$BODY$;sql
CASE Statement:
ORDER BY CASE WHEN sort = 'DESC' THEN price_delta_percentage END DESC, CASE WHEN sort = 'ASC' THEN price_delta_percentage END ASC)
Error:
ERROR: column "price_delta_percentage" does not exist
LINE 42: ORDER BY CASE WHEN sort = 'DESC' THEN price_delta_percenta...
You can't use CASE to decide between ASC and DESC like that. Those labels are not data, they are part of the SQL grammar. You would need to do it by combining the text into a string and then executing the string as a dynamic query, which means you would need to use pl/pgsql, not SQL
But since your column is numeric, you could just order by the product of the column and an indicator variable which is either 1 or -1.

Count distinct column case when/conditional

I'm trying to count the distinct number of ids in a column and this works fine.
COUNT(DISTINCT messages.id) AS link_created
But when I try to count with a conditional, I get a syntax error, what's the proper syntax to add a case when or some other condition to only count the distinct message ids where the messages.link_label is present?
COUNT(DISTINCT messages.id CASE WHEN messages.link_label IS NOT NULL 1 END) AS link_created
My full query looks like this.
#customers = Customer.select("customers.*,
COUNT(DISTINCT recipient_lists.id) messages_sent,
COUNT(DISTINCT messages.id CASE WHEN messages.link_label IS NOT NULL 1 END) AS link_created,
COALESCE(SUM(video_activities.video_watched_count),0) AS watched_count,
COALESCE(SUM(video_activities.response_count),0) AS response_count,
COALESCE(SUM(video_activities.email_opened_count),0) AS email_opened_count,
COALESCE(SUM(CASE WHEN video_activities.video_watched_at IS NOT NULL THEN 1 ELSE 0 END),0) AS unique_watches,
COALESCE(SUM(CASE WHEN video_activities.email_opened_at IS NOT NULL THEN 1 ELSE 0 END),0) AS unique_opens,
COALESCE(SUM(CASE WHEN video_activities.response_count > 0 THEN 1 ELSE 0 END),0) AS unique_responses,
customers.updated_at AS last_login,
SUBSTRING( email from POSITION( '#' in email) + 1 for length(email)) AS company")
.joins("LEFT JOIN messages ON customers.id = messages.customer_id
LEFT JOIN recipient_lists ON messages.id = recipient_lists.message_id AND messages.link_label is NULL
LEFT JOIN video_activities ON messages.id = video_activities.message_id")
.group("customers.id")
Try this:
COUNT(DISTINCT CASE
WHEN messages.link_label IS NOT NULL
THEN messages.id
ELSE NULL END)
AS link_created

Count based on Or is not differentiating the count

My results are showing both counts the same but there should be some that have different counts as CarCode is sometimes null.
SELECT distinct car.carKey,
car.Weight,
car.CarCode,
COUNT(car.carKey)OVER(PARTITION BY car.carKey) AS TotalCarKeyCount,
COUNT(Case When (car.[Weight] IS not null) and (car.CarCode is null) as CarCountWithoutCode
then 0
else car.carKey End) OVER(PARTITION BY car.carKey) AS CarCount
from car
results show TotalCarKeyCount and CarCountWithoutCode always with the same counts like the case statement isn't working or something.
It sounds like you might want to use SUM() instead:
SELECT distinct car.carKey,
car.Weight,
car.CarCode,
COUNT(car.carKey)OVER(PARTITION BY car.carKey) AS TotalCarKeyCount,
SUM(Case When (car.[Weight] IS not null) and (car.CarCode is null) as CarCountWithoutCode
then 0 else 1 End) OVER(PARTITION BY car.carKey) AS CarCount
from car
SQL Fiddle demo showing the difference between using COUNT() and SUM():
create table test
(
id int
);
insert into test values
(1), (null), (23), (4), (2);
select
count(case when id is null then 0 else id end) [count],
sum(case when id is null then 0 else 1 end) [sum]
from test;
Count returns 5 and Sum returns 4. Or you can change the COUNT() to use null and the null values will be excluded in the final count()
select
count(case when id is null then null else id end) [count],
sum(case when id is null then 0 else 1 end) [sum]
from test;
Your query would be:
SELECT distinct car.carKey,
car.Weight,
car.CarCode,
COUNT(car.carKey)OVER(PARTITION BY car.carKey) AS TotalCarKeyCount,
COUNT(Case When (car.[Weight] IS not null) and (car.CarCode is null) as CarCountWithoutCode
then null else 1 End) OVER(PARTITION BY car.carKey) AS CarCount
from car
Change the then 0 to then null. Zero values are counted, nulls are not.

T-SQL group by partition

I have below table in SQL server 2008.Please help to get expected output
Thanks.
CREATE TABLE [dbo].[Test]([Category] [varchar](10) NULL,[Value] [int] NULL,
[Weightage] [int] NULL,[Rn] [smallint] NULL ) ON [PRIMARY]
insert into Test values ('Cat1',310,674,1),('Cat1',783,318,2),('Cat1',310,96,3),('Cat1',109,917,4),('Cat2',441,397,1),('Cat2',637,725,2),('Cat2',460,742,3),('Cat2',542,583,4),('Cat2',601,162,5),('Cat2',45,719,6),('Cat2',46,305,7),('Cat3',477,286,1),('Cat3',702,484,2),('Cat3',797,836,3),('Cat3',541,890,4),('Cat3',750,962,5),('Cat3',254,407,6),('Cat3',136,585,7),('Cat3',198,477,8),('Cat4',375,198,1),('Cat4',528,351,2),('Cat4',845,380,3),('Cat4',716,131,4),('Cat4',781,919,5)
For per category Average Weightage
SELECT
Category,
AVG(Value),
SUM(CASE WHEN RN<4 THEN Weightage ELSE 0 END) / (NULLIF(SUM(CASE WHEN RN<4 THEN 1 ELSE 0 END), 0))
FROM
MyTable
GROUP BY
Category
Average Weightage over the whole set
SELECT
M.Category,
AVG(Value),
foo.AvgWeightage
FROM
MyTable M
CROSS JOIN
(SELECT AVG(Weightage) As AvgWeightage FROM MyTable WHERE Rn < 4) foo
GROUP BY
M.Category, foo.AvgWeightage
Simple:)
SELECT Category,
AVG(Value) AS AvgValue,
AVG(CASE WHEN RN< 4 THEN (Weightage) END ) AS AvgWeightage
FROM Test
GROUP BY Category
Try this
SELECT AvgValue.Category, AvgValue.AvgValue, AvgWeight.Weight
FROM(
(SELECT c.Category,
AVG(c.Value) AS AvgValue
FROM Test c
GROUP BY Category) AvgValue
INNER JOIN
(SELECT Category, AVG(Weightage) AS Weight
FROM Test
WHERE Rn < 4
GROUP BY Category) AvgWeight
ON AvgValue.Category = AvgWeight.Category)