group by 2 fields oracle sql inner join - group-by

I can't get the syntax correct to be able to group by two fields: as_of_date and ISSUERID. Thanks!
select as_of_date, count(distinct(issuer_id)) from
crd_own.ml_corp_index_data_monthly tb1
INNER JOIN pm_own.esg_credit_factors tb2
ON tb1.TICKER = tb2.ISSUER_TICKER
AND trunc(tb1.DATADATE, 'month') = trunc(tb2.AS_OF_DATE, 'month')
where INDEXNAME ='IG'
and DATADATE = '31-DEC-17'
group by as_of_date, ISSUERID
order by as_of_date asc

You do not have the same number of "non-aggregating" columns in both the select and group by clauses
SELECT
as_of_date
, COUNT( DISTINCT (issuer_id) )
...
GROUP BY
as_of_date
, ISSUERID <<< this is the problem
You need to either include ISSUERID in the select clause:
SELECT
as_of_date
, ISSUERID
, COUNT( DISTINCT (issuer_id) )
...
GROUP BY
as_of_date
, ISSUERID
ORDER BY
as_of_date ASC
Or remove ISSUERID completely.
SELECT
-- non-aggregating columns
as_of_date
, ISSUERID
-- aggregating columns
, COUNT( DISTINCT (issuer_id) )
FROM ...
WHERE ...
GROUP BY
-- repeat all non-aggregating columns here
as_of_date
, ISSUERID

Related

Calculate difference between the row counts of tables in two schemas in PostgreSQL

I have two table with same name in two different schemas (old and new dump). I would like to know the difference between the two integration.
I have two queries, that gives old and new count:
select count(*) as count_old from(
SELECT
distinct id
FROM
schema1.compound)q1
select count(*) as count_new from(
SELECT
distinct id
FROM
schema2.compound)q2
I would like have the following output.
table_name count_new count_new diff
compound 4740 4735 5
Any help is appreciated. Thanks in advance
with counts as (
select
(select count(distinct id) from schema1.compound) as count_old,
(select count(distinct id) from schema2.compound) as count_new
)
select
'compound' as table_name,
count_old,
count_new,
count_old - count_new as diff
from counts;
I think you could do something like this:
SELECT 'compound' AS table_name, count_old, count_new, (count_old - count_new) AS diff FROM (
SELECT(
(SELECT count(*) FROM (SELECT DISTINCT id FROM schema1.compound)) AS count_old,
(SELECT count(*) FROM (SELECT DISTINCT id FROM schema2.compound)) AS count_new
)
It was probably answered already, but it is a subquery/nested query.
You can directly compute the COUNT on distinct values if you use the DISTINCT keyword inside your aggregation function. Then you can join the queries extracting your two needed values, and use them inside your query to get the output table.
WITH cte AS (
SELECT new.cnt AS count_new,
old.cnt AS count_old
FROM (SELECT COUNT(DISTINCT id) AS cnt FROM schema1.compound) AS old
INNER JOIN (SELECT COUNT(DISTINCT id) AS cnt FROM schema2.compound) AS new
ON 1 = 1
)
SELECT 'compound' AS table_name,
count_new,
count_old,
count_new = count_old AS diff
FROM cte

SQL Server : group by with corresponding row values

I need to write a T-SQL group by query for a table with multiple dates and seq columns:
DROP TABLE #temp
CREATE TABLE #temp(
id char(1),
dt DateTime,
seq int)
Insert into #temp values('A','2015-03-31 10:00:00',1)
Insert into #temp values('A','2015-08-31 10:00:00',2)
Insert into #temp values('A','2015-03-31 10:00:00',5)
Insert into #temp values('B','2015-09-01 10:00:00',1)
Insert into #temp values('B','2015-09-01 10:00:00',2)
I want the results to contains only the items A,B with their latest date and the corresponding seq number, like:
id MaxDate CorrespondentSeq
A 2015-08-31 10:00:00.000 2
B 2015-09-01 10:00:00.000 2
I am trying with (the obviously wrong!):
select id, max(dt) as MaxDate, max(seq) as CorrespondentSeq
from #temp
group by id
which returns:
id MaxDate CorrespondentSeq
A 2015-08-31 10:00:00.000 5 <-- 5 is wrong
B 2015-09-01 10:00:00.000 2
How can I achieve that?
EDIT
The dt datetime column has duplicated values (exactly same date!)
I am using SQL Server 2005
You can use a ranking subselect to get only the highest ranked entries for an id:
select id, dt, seq
from (
select id, dt, seq, rank() over (partition by id order by dt desc, seq desc) as r
from #temp
) ranked
where r=1;
SELECT ID, DT, SEQ
FROM (
SELECT ID, DT, SEQ, Row_Number()
OVER (PARTITION BY id ORDER BY dt DESC, seq DESC) AS row_number
FROM temp
) cte
WHERE row_number = 1;
Demo : http://www.sqlfiddle.com/#!3/3e3d5/5
With trial and errors maybe I have found a solution, but I'm not completely sure this is correct:
select A.id, B.dt, max(B.seq)
from (select id, max(dt) as maxDt
from #temp
group by id) as A
inner join #temp as B on A.id = B.id AND A.maxDt = B.dt
group by A.id, B.dt
Select id, dt, seq
From #temp t
where dt = (Select Max(dt) from #temp
Where id = t.Id)
If there are duplicate rows, then you also need to specify what the query processor should use to determine which of the duplicates to return. Say you want the lowest value of seq,
Then you could write:
Select id, dt, seq
From #temp t
where dt = (Select Max(dt) from #temp
Where id = t.Id)
and seq = (Select Min(Seq) from #temp
where id = t.Id
and dt = t.dt)

SQL Server SUM() for DISTINCT records

I have a field called "Users", and I want to run SUM() on that field that returns the sum of all DISTINCT records. I thought that this would work:
SELECT SUM(DISTINCT table_name.users)
FROM table_name
But it's not selecting DISTINCT records, it's just running as if I had run SUM(table_name.users).
What would I have to do to add only the distinct records from this field?
Use count()
SELECT count(DISTINCT table_name.users)
FROM table_name
SQLFiddle demo
This code seems to indicate sum(distinct ) and sum() return different values.
with t as (
select 1 as a
union all
select '1'
union all
select '2'
union all
select '4'
)
select sum(distinct a) as DistinctSum, sum(a) as allSum, count(distinct a) as distinctCount, count(a) as allCount from t
Do you actually have non-distinct values?
select count(1), users
from table_name
group by users
having count(1) > 1
If not, the sums will be identical.
You can see for yourself that distinct works with the following example. Here I create a subquery with duplicate values, then I do a sum distinct on those values.
select DistinctSum=sum(distinct x), RegularSum=Sum(x)
from
(
select x=1
union All
select 1
union All
select 2
union All
select 2
) x
You can see that the distinct sum column returns 3 and the regular sum returns 6 in this example.
You can use a sub-query:
select sum(users)
from (select distinct users from table_name);
SUM(DISTINCTROW table_name.something)
It worked for me (innodb).
Description - "DISTINCTROW omits data based on entire duplicate records, not just duplicate fields." http://office.microsoft.com/en-001/access-help/all-distinct-distinctrow-top-predicates-HA001231351.aspx
;WITH cte
as
(
SELECT table_name.users , rn = ROW_NUMBER() OVER (PARTITION BY users ORDER BY users)
FROM table_name
)
SELECT SUM(users)
FROM cte
WHERE rn = 1
SQL Fiddle
Try here yourself
TEST
DECLARE #table_name Table (Users INT );
INSERT INTO #table_name Values (1),(1),(1),(3),(3),(5),(5);
;WITH cte
as
(
SELECT users , rn = ROW_NUMBER() OVER (PARTITION BY users ORDER BY users)
FROM #table_name
)
SELECT SUM(users) DisSum
FROM cte
WHERE rn = 1
Result
DisSum
9
If circumstances make it difficult to weave a "distinct" into the sum clause, it will usually be possible to add an extra "where" clause to the entire query - something like:
select sum(t.ColToSum)
from SomeTable t
where (select count(*) from SomeTable t1 where t1.ColToSum = t.ColToSum and t1.ID < t.ID) = 0
May be a duplicate to
Trying to sum distinct values SQL
As per Declan_K's answer:
Get the distinct list first...
SELECT SUM(SQ.COST)
FROM
(SELECT DISTINCT [Tracking #] as TRACK,[Ship Cost] as COST FROM YourTable) SQ

Using two different where clauses

I would like to know how to use a different WHERE clause based on a CASE or IF. I'd prefer a CASE, as the rest of the statement is complex, and I don't like the idea of that complexity being in two places with only a minor difference. However, I know cases are only used for values. I've replicated a simple version of my issue below.
Essentially, I have three tables. The first contains the master information (MasterTable). The second contains a one-to-many relationship belonging to the master table (Table1). The third is a list of selectors indicating which of the records in Table1 are to be used in this instance. I want the most recent record of Table2 to drive what is selected from Table1, with precedence given to SubID over OrderNum.
MasterTable | MasterID, OtherInfo
Table1 | T1UniqueId, MasterID, SubID, Text, OrderNum
Table2 | T2UniqueId, MasterID, SubID, OrderNum, Date
SELECT MasterID, OtherInfo, SubID
FROM MasterTable
OUTER APPLY(
SELECT TOP 1 SubID FROM Table1
WHERE Table1.MasterID=MasterTable.MasterID
CASE
WHEN
(
SELECT TOP 1 SubID FROM Table2
WHERE Table2.MasterID=MasterTable.MasterID
ORDER BY Date DESC
) Is NULL
THEN Table1.OrderNum=
(
SELECT TOP 1 OrderNum
FROM Table2
WHERE Table2.MasterId=MasterTable.MasterId
ORDER BY Date DESC
)
ELSE Table1.SubId=
(
SELECT TOP 1 SubId
FROM Table2
WHERE Table2.MasterId=MasterTable.MasterId
ORDER BY Date DESC
)
END
) SubData
One quick rewrite of this would result in the following:
IF ((SELECT TOP 1 SubID FROM Table2 WHERE Table2.MasterID=MasterTable.MasterID ORDER BY Date DESC) IS NULL)
BEGIN
SELECT
MasterID, OtherInfo, SubID
FROM MasterTable
OUTER APPLY(
SELECT TOP 1 SubID FROM Table1
WHERE
Table1.MasterID=MasterTable.MasterID
AND Table1.OrderNum =
(
SELECT TOP 1 OrderNum
FROM Table2
WHERE Table2.MasterId=MasterTable.MasterId
ORDER BY Date DESC
)
) SubData
END
ELSE
BEGIN
SELECT
MasterID, OtherInfo, SubID
FROM MasterTable
OUTER APPLY(
SELECT TOP 1 SubID FROM Table1
WHERE
Table1.MasterID=MasterTable.MasterID
AND Table1.SubId=
(
SELECT TOP 1 SubId
FROM Table2
WHERE Table2.MasterId=MasterTable.MasterId
ORDER BY Date DESC
)
) SubData
END
But as you noted that makes it look ugly, because you now have that complexity in two places...
I guess you could also formulate it this way (untested, but this should keep your complex logic in one place):
SELECT
MasterID, OtherInfo, SubID
FROM MasterTable
OUTER APPLY(
SELECT TOP 1 SubID FROM Table1
WHERE Table1.MasterID=MasterTable.MasterID
AND
(
(
(
SELECT
TOP 1 SubID
FROM Table2
WHERE Table2.MasterID=MasterTable.MasterID
ORDER BY Date DESC
) IS NULL
AND
Table1.OrderNum =
(
SELECT TOP 1 OrderNum
FROM Table2
WHERE Table2.MasterId=MasterTable.MasterId
ORDER BY Date DESC
)
)
OR
(
Table1.SubId =
(
SELECT
TOP 1 SubId
FROM Table2
WHERE Table2.MasterId=MasterTable.MasterId
ORDER BY Date DESC
)
)
)
) SubData
If SubID and OrderNum in Table1 and Table2 are the same you can utilize simple query with nested select statement:
select m.MasterID, m.OtherInfo, (
select top 1 coalesce(t2.SubID, t2.OrderNum) from Table2 t2
where t2.MasterID = m.MasterID order by date desc
) as SubID
from MasterTable m;

partition over two columns

I'm wanting to partition by two columns (PROJECT_ID, AND CATEGORY_NAME) and I'm having trouble writing the correct syntax. My query below is functional but when I attempt to add an additional over clause it doesn't work correctly. The recursive query was used to concatenate rows partitioning over project_id, creating a list of admins combining and concatenating name_last and name_first to make a list. I need to use an additional over clause to include the CATEGORY_NAME due to admins in the list that work in different categories ('INVISION' AND 'INSIGHT') but are under the same project_id. The first subquery
SELECT
RowNumber() over (PARTITION BY F13.DIM_PROJECT_ID, F13.CATEGORY_NAME ORDER BY F13.PROJECT_NAME),
F13.DIM_PROJECT_ID.....etc.
extracts the correct data, I'm just unsure of how to pull that correct data out partitioning by both project and category. I'm using db2.
with
t1(rowNum, PROJECT_ID, NAME_LAST, NAME_FIRST, POINT_OF_CONTACT, PROJECT_NAME, BUSINESS_NAME) as
(
SELECT
RowNumber() over (PARTITION BY F13.DIM_PROJECT_ID, F13.CATEGORY_NAME ORDER BY F13.PROJECT_NAME),
F13.DIM_PROJECT_ID,
F2P.NAME_LAST,
F2P.NAME_FIRST,
REPLACE(F2P.POINT_OF_CONTACT, ',', ' |') AS POINT_OF_CONTACT,
F13.PROJECT_NAME,
F2H.CATEGORY_NAME,
FROM FACT_TABLE AS F13
INNER JOIN ADMIN AS F2P ON F13.DIM_PROJECT_ID = F2P.DIM_PROJECT_ID
LEFT JOIN HOURS AS F2H ON F13.DIM_PROJECT_ID = F2H.DIM_PROJECT_ID
WHERE F2H.CATEGORY_NAME = ('INVISION')
group by
F13.DIM_PROJECT_ID,
F13.PROJECT_NAME,
F2P.NAME_LAST,
F2P.NAME_FIRST,
F2P.POINT_OF_CONTACT,
F2H.CATEGORY_NAME
) ,
t2(PROJECT_ID, LIST, POINT_OF_CONTACT, PROJECT_NAME, BUSINESS_NAME, cnt) AS
( SELECT PROJECT_ID,
VARCHAR(NAME_FIRST CONCAT ' ' CONCAT NAME_LAST, 6000),
POINT_OF_CONTACT,
PROJECT_NAME,
CATEGORY_NAME,
1
FROM t1
WHERE rowNum = 1
UNION ALL
SELECT t2.PROJECT_ID,
t2.list || ' | ' || t1.NAME_FIRST CONCAT ' ' CONCAT t1.NAME_LAST,
t1.POINT_OF_CONTACT,
t1.PROJECT_NAME,
t1.CATEGORY_NAME
FROM t2, t1
WHERE t2.project_id = t1.project_id
AND t2.cnt + 1 = t1.rowNum )
SELECT PROJECT_ID,
PROJECT_NAME,
POINT_OF_CONTACT,
CATEGORY_NAME
list
FROM t2
WHERE ( PROJECT_ID, cnt ) IN (
SELECT PROJECT_ID, MAX(rowNum)
FROM t1
GROUP BY PROJECT_ID )
The results that I'm getting are producing duplicates but only when the second column (category_name is included in the partition clause. Current results:
Desired results:
I figured it out. I added an ID for category and partitioned by category_id and project_id.
with
t1(rowNum, PROJECT_ID, NAME_LAST, NAME_FIRST, POINT_OF_CONTACT, PROJECT_NAME, CATEGORY_ID, CATEGORY_NAME) as
(
SELECT
RowNumber() over (PARTITION BY F13.DIM_PROJECT_ID, F13.CATEGORY_ID ORDER BY F13.PROJECT_NAME, F13.CATEGORY_NAME),
F13.DIM_PROJECT_ID,
F2P.NAME_LAST,
F2P.NAME_FIRST,
REPLACE(F2P.POINT_OF_CONTACT, ',', ' |') AS POINT_OF_CONTACT,
F13.PROJECT_NAME,
F13.CATEGORY_ID
F13.CATEGORY_NAME,
FROM FACT_TABLE AS F13
INNER JOIN ADMIN AS F2P ON F13.DIM_PROJECT_ID = F2P.DIM_PROJECT_ID
LEFT JOIN HOURS AS F2H ON F13.DIM_PROJECT_ID = F2H.DIM_PROJECT_ID
WHERE F13.CATEGORY_NAME = ('INVISION')
group by
F13.DIM_PROJECT_ID,
F13.PROJECT_NAME,
F2P.NAME_LAST,
F2P.NAME_FIRST,
F2P.POINT_OF_CONTACT,
F13.CATEGORY_ID
F13.CATEGORY_NAME
) ,
t2(PROJECT_ID, LIST, POINT_OF_CONTACT, PROJECT_NAME, CATEGORY_ID, CATEGORY_NAME, cnt) AS
( SELECT PROJECT_ID,
VARCHAR(NAME_FIRST CONCAT ' ' CONCAT NAME_LAST, 6000),
POINT_OF_CONTACT,
PROJECT_NAME,
CATEGORY_ID,
CATEGORY_NAME,
1
FROM t1
WHERE rowNum = 1
UNION ALL
SELECT t2.PROJECT_ID,
t2.list || ' | ' || t1.NAME_FIRST CONCAT ' ' CONCAT t1.NAME_LAST,
t1.POINT_OF_CONTACT,
t1.PROJECT_NAME,
t1.CATEGORY_ID,
t1.CATEGORY_NAME
FROM t2, t1
WHERE t2.project_id = t1.project_id
AND t2.category_id = t1.category_id
AND t2.cnt + 1 = t1.rowNum )
SELECT PROJECT_ID,
PROJECT_NAME,
POINT_OF_CONTACT,
CATEGORY_ID,
CATEGORY_NAME
list
FROM t2
WHERE ( PROJECT_ID, CATEGORY_ID, cnt ) IN (
SELECT PROJECT_ID, CATEGORY_ID, MAX(rowNum)
FROM t1
GROUP BY PROJECT_NAME )