determine some stats with tsql - tsql

I have documents which can belong to several classes and can contain several tokens (words):
create table Tokens (
Id INT not null,
Text NVARCHAR(255) null,
primary key (Id)
)
create table DocumentClassTokens (
Id INT not null,
DocumentFk INT null,
ClassFk INT null,
TokenFk INT null,
primary key (Id)
)
I would like to determine these stats (for all tokens given the class):
A = number of distinct documents which contain token and belong to class
B = number of distinct documents which contain token and do not belong to class
C = number of distinct documents which do not contain token and belong to class
D = number of distinct documents which do not contain token and do not belong to class
I am using this at the moment but it does not look right (I am pretty sure that the computation of A and B is correct):
declare #class int;
select #class = id from dbo.Classes where text = 'bla'
;with A as
(
select
a.text as token,
count(distinct DocumentFk) as A
from dbo.Tokens as a
inner join dbo.DocumentClassTokens as b on a.id = b.TokenFk and b.ClassFk = #class
group by a.text
)
,B as
(
select
a.text as token,
count(distinct DocumentFk) as B
from dbo.Tokens as a
inner join dbo.DocumentClassTokens as b on a.id = b.TokenFk and b.ClassFk != #class
group by a.text
)
,C as
(
select
a.text as token,
count(distinct DocumentFk) as C
from dbo.Tokens as a
inner join dbo.DocumentClassTokens as b on a.id != b.TokenFk and b.ClassFk = #class
group by a.text
)
,D as
(
select
a.text as token,
count(distinct DocumentFk) as D
from dbo.Tokens as a
inner join dbo.DocumentClassTokens as b on a.id != b.TokenFk and b.ClassFk != #class
group by a.text
)
select
case when A is null then 0 else A end as A,
case when B is null then 0 else B end as B,
case when C is null then 0 else C end as C,
case when D is null then 0 else D end as D,
t.Text,
t.id
from dbo.Tokens as t
left outer join A as a on t.text = a.token
left outer join B as b on t.text = b.token
left outer join C as c on t.text = c.token
left outer join D as d on t.text = d.token
order by t.text
Any feedback would be very much appreciated. Many thanks!
Best wishes,
Christian
PS:
Some test data:
use play;
drop table tokens
create table Tokens
(
Id INT not null,
Text NVARCHAR(255) null,
primary key (Id)
)
insert into Tokens (id, text) values (1,'1')
insert into Tokens (id, text) values (2,'2')
drop table DocumentClassTokens
create table DocumentClassTokens (
Id INT not null,
DocumentFk INT null,
ClassFk INT null,
TokenFk INT null,
primary key (Id)
)
insert into DocumentClassTokens (Id,documentfk,ClassFk,TokenFk) values (1,1,1,1)
insert into DocumentClassTokens (Id,documentfk,ClassFk,TokenFk) values (2,1,1,2)
insert into DocumentClassTokens (Id,documentfk,ClassFk,TokenFk) values (3,2,1,1)
insert into DocumentClassTokens (Id,documentfk,ClassFk,TokenFk) values (4,2,2,1)
insert into DocumentClassTokens (Id,documentfk,ClassFk,TokenFk) values (5,3,2,1)
insert into DocumentClassTokens (Id,documentfk,ClassFk,TokenFk) values (6,3,2,3)

Your question seems now much clearer, and if I haven't overlooked anything, then here's a query you might try to run against your data.
DECLARE #class int;
SET #class = 1;
SELECT
TokenFk,
TokenClassDocs AS A,
TokenNonClassDocs AS B,
TotalClassDocs - TokenClassDocs AS C,
TotalNonClassDocs - TokenNonClassDocs AS D
FROM (
SELECT
TokenFk,
COUNT(DISTINCT CASE ClassFk WHEN #class THEN DocumentFk ELSE NULL END) AS TokenClassDocs,
COUNT(DISTINCT CASE ClassFk WHEN #class THEN NULL ELSE DocumentFk END) AS TokenNonClassDocs
FROM DocumentClassTokens dct
GROUP BY dct.TokenFk
) AS bytoken
CROSS JOIN (
SELECT
COUNT(DISTINCT CASE ClassFk WHEN #class THEN DocumentFk ELSE NULL END) AS TotalClassDocs,
COUNT(DISTINCT CASE ClassFk WHEN #class THEN NULL ELSE DocumentFk END) AS TotalNonClassDocs
FROM DocumentClassTokens
) AS totals
Please let us know if it's all right.
EDIT
The above solution is wrong. Here's the fixed one, and it certainly seems correct only I do not like it as much as the wrong version (what an irony...).
DECLARE #class int;
SET #class = 1;
SELECT
TokenFk,
TokenClassDocs AS A,
TokenNonClassDocs AS B,
TotalClassDocs - TokenClassDocs AS C,
TotalNonClassDocs - TokenNonClassDocs AS D
FROM (
SELECT
TokenFk,
COUNT(DISTINCT cls.DocumentFk) AS TokenClassDocs,
COUNT(DISTINCT CASE WHEN cls.DocumentFk IS NULL THEN dct.DocumentFk END) AS TokenNonClassDocs
FROM DocumentClassTokens dct
LEFT JOIN (
SELECT DISTINCT DocumentFk
FROM DocumentClassTokens
WHERE ClassFk = #class
) cls ON dct.DocumentFk = cls.DocumentFk
GROUP BY dct.TokenFk
) AS bytoken
CROSS JOIN (
SELECT
COUNT(DISTINCT cls.DocumentFk) AS TotalClassDocs,
COUNT(DISTINCT CASE WHEN cls.DocumentFk IS NULL THEN dct.DocumentFk END) AS TotalNonClassDocs
FROM DocumentClassTokens dct
LEFT JOIN (
SELECT DISTINCT DocumentFk
FROM DocumentClassTokens
WHERE ClassFk = #class
) cls ON dct.DocumentFk = cls.DocumentFk
) AS totals
Note: I think I can see now how you can check if the figures are wrong: the sum of A, B, C, D in every row (i.e. for every token) must be equal to the total document count, which should not be surprising, because every document can satisfy 1 and only 1 of the 4 cases being explored. If the row sum is different from the total document count then some figures in the row are certainly wrong.

This seams to do what you want by your description. Looking at your code, I'm not so sure.
Edit 1 With columns instead of rows and #ClassID as filter.
declare #ClassID int
set #ClassID = 1
;with cte(DokumentFk, TokenFk, ClassFk) as
(
select DocumentFk, max(TokenFK), max(ClassFk)
from DocumentClassTokens
where ClassFK = #ClassID
group by DocumentFK
)
select
(select count(*)
from cte
where
TokenFk is not null and
ClassFk is not null) as A,
(select count(*)
from cte
where
TokenFk is not null and
ClassFk is null) as B,
(select count(*)
from cte
where
TokenFk is null and
ClassFk is not null) as C,
(select count(*)
from cte
where
TokenFk is null and
ClassFk is null) as D

Related

sql recursion: find tree given middle node

I need to get a tree of related nodes given a certain node, but not necessary top node. I've got a solution using two CTEs, since I am struggling to squeeze it all into one CTE :). Might somebody have a sleek solution to avoid using two CTEs? Here is some code that I was playing with:
DECLARE #temp AS TABLE (ID INT, ParentID INT)
INSERT INTO #temp
SELECT 1 ID, NULL AS ParentID
UNION ALL
SELECT 2, 1
UNION ALL
SELECT 3, 2
UNION ALL
SELECT 4, 3
UNION ALL
SELECT 5, 4
UNION ALL
SELECT 6, NULL
UNION ALL
SELECT 7, 6
UNION ALL
SELECT 8, 7
DECLARE #startNode INT = 4
;WITH TheTree (ID,ParentID)
AS (
SELECT ID, ParentID
FROM #temp
WHERE ID = #startNode
UNION ALL
SELECT t.id, t.ParentID
FROM #temp t
JOIN TheTree tr ON t.ParentID = tr.ID
)
SELECT * FROM TheTree
;WITH Up(ID,ParentID)
AS (
SELECT t.id, t.ParentID
FROM #temp t
WHERE t.ID = #startNode
UNION ALL
SELECT t.id, t.ParentID
FROM #temp t
JOIN Up c ON t.id = c.ParentID
)
--SELECT * FROM Up
,TheTree (ID,ParentID)
AS (
SELECT ID, ParentID
FROM Up
WHERE ParentID is null
UNION ALL
SELECT t.id, t.ParentID
FROM #temp t
JOIN TheTree tr ON t.ParentID = tr.ID
)
SELECT * FROM TheTree
thanks
Meh. This avoids using two CTEs, but the result is a brute force kludge that hardly qualifies as "sleek" as it won’t be efficient if your table is at all sizeable. It will:
Recursively build all possible hierarchies
As you build them, flag the target NodeId as you find it
Return only the targeted tree
I threw in column “TreeNumber” on the off-chance the TargetId appears in multiple hierarchies, or if you’d ever have multiple values to check in one pass. “Depth” was added to make the output a bit more legible.
A more complex solution like #John’s might do, and more and subtler tricks could be done with more detailed table sturctures.
DECLARE #startNode INT = 4
;WITH cteAllTrees (TreeNumber, Depth, ID, ParentID, ContainsTarget)
AS (
SELECT
row_number() over (order by ID) TreeNumber
,1
,ID
,ParentID
,case
when ID = #startNode then 1
else 0
end ContainsTarget
FROM #temp
WHERE ParentId is null
UNION ALL
SELECT
tr.TreeNumber
,tr.Depth + 1
,t.id
,t.ParentID
,case
when tr.ContainsTarget = 1 then 1
when t.ID = #startNode then 1
else 0
end ContainsTarget
FROM #temp t
INNER JOIN cteAllTrees tr
ON t.ParentID = tr.ID
)
SELECT
TreeNumber
,Depth
,ID
,ParentId
from cteAllTrees
where TreeNumber in (select TreeNumber from cteAllTrees where ContainsTarget = 1)
order by
TreeNumber
,Depth
,ID
Here is a technique where you can select the entire hierarchy, a specific node with all its children, and even a filtered list and how they roll.
Note: See the comments next to the DECLAREs
Declare #YourTable table (id int,pt int,name varchar(50))
Insert into #YourTable values
(1,null,'1'),(2,1,'2'),(3,1,'3'),(4,2,'4'),(5,2,'5'),(6,3,'6'),(7,null,'7'),(8,7,'8')
Declare #Top int = null --<< Sets top of Hier Try 2
Declare #Nest varchar(25) = '|-----' --<< Optional: Added for readability
Declare #Filter varchar(25) = '' --<< Empty for All or try 4,6
;with cteP as (
Select Seq = cast(1000+Row_Number() over (Order by name) as varchar(500))
,ID
,pt
,Lvl=1
,name
From #YourTable
Where IsNull(#Top,-1) = case when #Top is null then isnull(pt,-1) else ID end
Union All
Select Seq = cast(concat(p.Seq,'.',1000+Row_Number() over (Order by r.name)) as varchar(500))
,r.ID
,r.pt
,p.Lvl+1
,r.name
From #YourTable r
Join cteP p on r.pt = p.ID)
,cteR1 as (Select *,R1=Row_Number() over (Order By Seq) From cteP)
,cteR2 as (Select A.Seq,A.ID,R2=Max(B.R1) From cteR1 A Join cteR1 B on (B.Seq like A.Seq+'%') Group By A.Seq,A.ID )
Select Distinct
A.R1
,B.R2
,A.ID
,A.pt
,A.Lvl
,name = Replicate(#Nest,A.Lvl-1) + A.name
From cteR1 A
Join cteR2 B on A.ID=B.ID
Join (Select R1 From cteR1 where IIF(#Filter='',1,0)+CharIndex(concat(',',ID,','),concat(',',#Filter+','))>0) F on F.R1 between A.R1 and B.R2
Order By A.R1

postgres hierarchy - count of child levels and sort by date of children or grandchildren

I would like to know how to write a postgres subquery so that the following table example will output what I need.
id parent_id postdate
1   -1 2015-03-10
2     1 2015-03-11 (child level 1)
3     1 2015-03-12 (child level 1)
4     3 2015-03-13 (child level 2)
5    -1 2015-03-14
6    -1 2015-03-15
7     6 2015-03-16 (child level 1)
If I want to sort all the root ids by child level 1 with a count of children(s) from the parent, the output would be something like this
id count  date
6   2    2015-03-15
1   4    2015-03-10
5   1    2015-03-14
The output is sorted by postdate based on the root's child. The 'date' being outputted is the date of the root's postdate. Even though id#5 has a more recent postdate, the rootid#6's child (id#7) has the most recent postdate because it is being sorted by child's postdate. id#5 doesnt have any children so it just gets placed at the end, sorted by date. The 'count' is the number children(child level 1), grandchildren(child level 2) and itself (root). For instance, id #2,#3,#4 all belong to id#1 so for id#1, the count would be 4.
My current subquery thus far:
SELECT p1.id,count(p1.id),p1.postdate
FROM mytable p1
LEFT JOIN mytable c1 ON c1.parent_id = p1.id AND p1.parent_id = -1
LEFT JOIN mytable c2 ON c2.parent_id = c1.id AND p1.parent_id = -1
GROUP BY p1.id,c1.postdate,p1.postdate
ORDER by c1.postdate DESC,p1.postdate DESC
create table mytable ( id serial primary key, parent_id int references mytable, postdate date );
create index mytable_parent_id_idx on mytable (parent_id);
insert into mytable (id, parent_id, postdate) values (1, null, '2015-03-10');
insert into mytable (id, parent_id, postdate) values (2, 1, '2015-03-11');
insert into mytable (id, parent_id, postdate) values (3, 1, '2015-03-12');
insert into mytable (id, parent_id, postdate) values (4, 3, '2015-03-13');
insert into mytable (id, parent_id, postdate) values (5, null, '2015-03-14');
insert into mytable (id, parent_id, postdate) values (6, null, '2015-03-15');
insert into mytable (id, parent_id, postdate) values (7, 6, '2015-03-16');
with recursive recu as (
select id as parent, id as root, null::date as child_postdate
from mytable
where parent_id is null
union all
select r.parent, mytable.id, mytable.postdate
from recu r
join mytable
on parent_id = r.root
)
select m.id, c.cnt, m.postdate, c.max_child_date
from mytable m
join ( select parent, count(*) as cnt, max(child_postdate) as max_child_date
from recu
group by parent
) c on c.parent = m.id
order by c.max_child_date desc nulls last, m.postdate desc;
You'll need a recursive query to count the elements in the subtrees:
WITH RECURSIVE opa AS (
SELECT id AS par
, id AS moi
FROM the_tree
WHERE parent_id IS NULL
UNION ALL
SELECT o.par AS par
, t.id AS moi
FROM opa o
JOIN the_tree t ON t.parent_id = o.moi
)
SELECT t.id
, c.cnt
, t.postdate
FROM the_tree t
JOIN ( SELECT par, COUNT(*) AS cnt
FROM opa o
GROUP BY par
) c ON c.par = t.id
ORDER BY t.id
;
UPDATE (it appears the OP also wants the maxdate per tree)
-- The same, but also select the postdate
-- --------------------------------------
WITH RECURSIVE opa AS (
SELECT id AS par
, id AS moi
, postdate AS postdate
FROM the_tree
WHERE parent_id IS NULL
UNION ALL
SELECT o.par AS par
, t.id AS moi
-- , GREATEST(o.postdate,t.postdate) AS postdate
, t.postdate AS postdate
FROM opa o
JOIN the_tree t ON t.parent_id = o.moi
)
SELECT t.id
, c.cnt
, t.postdate
, c.maxdate
FROM the_tree t
JOIN ( SELECT par, COUNT(*) AS cnt
, MAX(o.postdate) AS maxdate -- and obtain the max()
FROM opa o
GROUP BY par
) c ON c.par = t.id
ORDER BY c.maxdate, t.id
;
After looking at everyone's code, I created the subquery I needed. I can use PHP to vary the 'case when' code depending on the user's sort selection. For instance, the code below will sort the root nodes based on child level 1's postdate.
with recursive cte as (
select id as parent, id as root, null::timestamp as child_postdate,0 as depth
from mytable
where parent_id = -1
union all
select r.parent, mytable.id, mytable.postdate,depth+1
from cte r
join mytable
on parent_id = r.root
)
select m.id, c.cnt, m.postdate
from ssf.dtb_021 m
join ( select parent, count(*) as cnt, max(child_postdate) as max_child_date,depth
from cte
group by parent,depth
) c on c.parent = m.id
order by
case
when depth=2 then 1
when depth=1 then 2
else 0
end DESC,
c.max_child_date desc nulls last, m.postdate desc;
select
p.id,
(1+c.n) as parent_post_plus_number_of_subposts,
p.postdate
from
table as p
inner join
(
select
parent_id, count(*) as n, max(postdate) as _postdate
from table
group by parent_id
) as c
on p.id = c.parent_id
where p.parent_id = -1
order by c._postdate desc

How to display all parents IDs in one line (row) from hierarchical table

I have a hierarchical table where for each child there is defined a parent id (except of top level child where parent id is null). Now I would like to display each parent id in one line for selected child.
I was trying to use CTE but CTE is adding additional records to initial set. Instead of that I would like to update the initial set (SELECT statement selecting some children) with list of parents.
Example of table (Table1):
ChildID ParentID
A P
P Q
Q Z
B P
C F
F M
M Q
D H
H Y
Y NULL
Z NULL
If initial statement will select C, A, Q from list of children IDs then expected result is following:
Selected Child Parent IDs
C F, M, Q, Z
A P, Q, Z
Q Z
You were right about doing this with a CTE, but then you have to take all the rows and put them into one column (see cross apply - xml path).
with cte (selected, child, parent) as
(
select s.id, t.*
from #t as t
inner join #selected as s on t.childid = s.id
union all
select cte.selected, t.*
from #t as t
inner join cte on t.childid = cte.parent
where cte.parent is not null
)
select distinct
t.selected as [Selected Child],
left(a.parents,len(a.parents) - 1) as Parents
from cte t
cross apply (select cast(parent + ',' as text)
from cte tt
where t.selected = tt.selected
for xml path('')) a(parents);
With sample data:
declare #t as table
(
childid char(1),
parentid char(1)
);
declare #selected as table
(
id char(1)
);
insert into #t (childid,parentid) values ('a','p'),
('p','q'),
('q','z'),
('b','p'),
('c','f'),
('f','m'),
('m','q'),
('d','h'),
('h','y'),
('y',null),
('z',null);
insert into #selected (id) values ('c'),('a'),('q');
with cte (selected, child, parent) as
(
select s.id, t.*
from #t as t
inner join #selected as s on t.childid = s.id
union all
select cte.selected, t.*
from #t as t
inner join cte on t.childid = cte.parent
where cte.parent is not null
)
select distinct
t.selected as [Selected Child],
left(a.parents,len(a.parents) - 1) as Parents
from cte t
cross apply (select cast(parent + ',' as text)
from cte tt
where t.selected = tt.selected
for xml path('')) a(parents);

Can I Choose Different Table for inner join operation?

This is my T-SQL
select Id,Profile,Type ,
case Profile
when 'Soft' then 'SID'
when 'Hard' then 'HID'
end as [Profile]
from ProductDetail p1
inner join [tableA or tableB] on xxxxxxxx
I want join tableA when Profile = Soft and join tableB when Profile = Hard, how can I do just only using T-SQL in one batch?
Thanks
You can't directly do it, but could achieve the same effect with outer joins
select Id,Profile,Type ,
case Profile
when 'Soft' then 'SID'
when 'Hard' then 'HID'
end as [Profile]
from ProductDetail p1
left outer join tableA ON tableA.x = p1.x AND p1.Profile = 'Soft'
left outer join tableB ON tableB.x = p1.x AND p1.Profile = 'Hard'
where
where
(tableA.x IS NOT NULL and p1.Profile = 'Soft')
or (tableB.x IS NOT NULL and p1.Profile = 'Hard')
Of course, you can choose different tables for inner join operation, but it must be based on some condition or variable.
For Example:
select Id,Profile,Type ,
case Profile
when 'Soft' then 'SID'
when 'Hard' then 'HID'
end as [Profile]
from ProductDetail p1
inner join tableA A
on Profile='Soft'
AND <any other Condition>
UNION
select Id,Profile,Type ,
case Profile
when 'Soft' then 'SID'
when 'Hard' then 'HID'
end as [Profile]
from ProductDetail p1
inner join tableB B
on Profile='Hard'
AND <any other Condition>
You can do this in a single statement with the same or similar case statement in your join. Below is sample code using temp tables that joins to 2 different reference tables merged into a single result set using a UNION
DECLARE #ProductDetail TABLE (Id INT, sProfile VARCHAR(100), StID INT, HdID INT)
DECLARE #TableA TABLE (StId INT, Field1 VARCHAR(100))
DECLARE #TableB TABLE (HdId INT, Field1 VARCHAR(100))
INSERT INTO #ProductDetail (Id, sProfile, StID , HdID ) VALUES (1,'Soft',1,1)
INSERT INTO #ProductDetail (Id, sProfile, StID , HdID ) VALUES (2,'Hard',2,2)
INSERT INTO #TableA (StId,Field1) VALUES (1,'Soft 1')
INSERT INTO #TableA (StId,Field1) VALUES (2,'Soft 2')
INSERT INTO #TableB (HdId,Field1) VALUES (1,'Hard 1')
INSERT INTO #TableB (HdId,Field1) VALUES (2,'Hard 2')
SELECT
p1.Id,p1.sProfile,
CASE
WHEN p1.sProfile = 'Soft' THEN StID
WHEN p1.sProfile = 'Hard' THEN HdId
END AS [Profile]
,ReferenceTable.FieldName
FROM
#ProductDetail p1
INNER JOIN
(
SELECT StID AS id, 'Soft' AS sProfile, Field1 AS FieldName
FROM #TableA AS tableA
UNION ALL
SELECT HdID AS id, 'Hard' AS sProfile, Field1 AS FieldName
FROM #TableB AS tableB
)
AS ReferenceTable
ON
CASE
WHEN p1.sProfile = 'Soft' THEN StID
WHEN p1.sProfile = 'Hard' THEN HdID
END = ReferenceTable.Id
AND p1.sProfile = ReferenceTable.sProfile
This will return the following result set:
Id sProfile Profile FieldName
1 Soft 1 Soft 1
2 Hard 2 Hard 2

Syntax for SQL Not In List?

I am trying to develop a T-SQL query to exclude all rows from another table "B". This other table "B" has 3 columns comprising its PK for a total of 136 rows. So I want to select all columns from table "A" minus those from table "B". How do I do this? I don't think this query is correct because I am still getting a duplicate record error:
CREATE TABLE #B (STUDENTID VARCHAR(50), MEASUREDATE SMALLDATETIME, MEASUREID VARCHAR(50))
INSERT #B
SELECT studentid, measuredate, measureid
from [J5C_Measures_Sys]
GROUP BY studentid, measuredate, measureid
HAVING COUNT(*) > 1
insert into J5C_MasterMeasures (studentid, measuredate, measureid, rit)
select A.studentid, A.measuredate, B.measurename+' ' +B.LabelName, A.score_14
from [J5C_Measures_Sys] A
join [J5C_ListBoxMeasures_Sys] B on A.MeasureID = B.MeasureID
join sysobjects so on so.name = 'J5C_Measures_Sys' AND so.type = 'u'
join syscolumns sc on so.id = sc.id and sc.name = 'score_14'
join [J5C_MeasureNamesV2_Sys] v on v.Score_field_id = sc.name
where a.score_14 is not null AND B.MEASURENAME IS NOT NULL
and (A.studentid NOT IN (SELECT studentid from #B)
and a.measuredate NOT IN (SELECT measuredate from #B)
and a.measureid NOT IN (SELECT measureid from #B))
use NOT EXISTS...NOT IN doesn't filter out NULLS
insert into J5C_MasterMeasures (studentid, measuredate, measureid, rit)
select A.studentid, A.measuredate, B.measurename+' ' +B.LabelName, A.score_14
from [J5C_Measures_Sys] A
join [J5C_ListBoxMeasures_Sys] B on A.MeasureID = B.MeasureID
join sysobjects so on so.name = 'J5C_Measures_Sys' AND so.type = 'u'
join syscolumns sc on so.id = sc.id and sc.name = 'score_14'
join [J5C_MeasureNamesV2_Sys] v on v.Score_field_id = sc.name
where a.score_14 is not null AND B.MEASURENAME IS NOT NULL
AND NOT EXISTS (select 1 from #B where #b.studentid = A.studentid
and a.measuredate = #B.measuredate
and a.measureid = #B.measureid)
and not exists (select 1 from J5C_MasterMeasures z
where z.studentid = A.studentid)
Just so you know, take a look at Select all rows from one table that don't exist in another table
Basically there are at least 5 ways to select all rows from onr table that are not in another table
NOT IN
NOT EXISTS
LEFT and RIGHT JOIN
OUTER APLY (2005+)
EXCEPT (2005+)
Here is a general solution for the difference operation using left join:
select * from FirstTable
left join SecondTable on FirstTable.ID = SecondTable.ID
where SecondTable.ID is null
Of course yours would have a more complicated join on clause, but the basic operation is the same.
I think you can use "NOT IN" with a subquery, but you say you have a multi-field key?
I'd be thinking about using a left outer join and then testing for null on the right...
Martin.