delete sql rows that have same value in other rows - sql-server-2008-r2

I've a table having data as:
C1 || C2
-----------------
a || 1
b || 1
c || 1
a || 0
b || 0
c || 0
d || 0
I've to delete row 4,5,6. All rows with C2 = 0, that have same C1 and C2 = 1. Suggestions ?

delete from your_table
where c2 = 0
and c1 in
(
select c1
from your_table
where c2 in (0,1)
group by c1
having count(distinct c2) = 2
)

Related

Split comma separated data and get its respective value from another table

I have concated data in table1
id
concats
sum
1
b,c
2
a,k,f,l,s
3
b,f,t
4
a,b,h,k,l,q,s,t
5
b,c,k,f,p,s
6
a,c,q,s
and another table with value
grade
score
a
4.82
b
2.65
c
2.56
d
2.75
g
6.90
h
5.90
k
6.41
f
12.80
l
2.56
p
12.80
q
1.35
s
2.90
t
5.97
I want to update table1.sum, something like b,c=(2.65+2.56=5.21)
Tried the below mentioned code, but there is an error.
UPDATE table1 as t1 SET sum =
(SELECT (CASE WHEN (SELECT SPLIT_PART(concats,',',1) from t1) = t2.grade then t2.score ELSE 0 END) +
(CASE WHEN (SELECT SPLIT_PART(concats,',',2) from t1) = t2.grade then t2.score ELSE 0 END) +
(CASE WHEN (SELECT SPLIT_PART(concats,',',3) from t1) = t2.grade then t2.score ELSE 0 END) +
(CASE WHEN (SELECT SPLIT_PART(concats,',',4) from t1) = t2.grade then t2.score ELSE 0 END) +
(CASE WHEN (SELECT SPLIT_PART(concats,',',5) from t1) = t2.grade then t2.score ELSE 0 END) +
(CASE WHEN (SELECT SPLIT_PART(concats,',',6) from t1) = t2.grade then t2.score ELSE 0 END) +
(CASE WHEN (SELECT SPLIT_PART(concats,',',7) from t1) = t2.grade then t2.score ELSE 0 END ) +
(CASE WHEN (SELECT SPLIT_PART(concats,',',8) from t1) = t2.grade then t2.score ELSE 0 END )
FROM table2 AS t2 )
You can join the two tables by converting the dreaded CSV columns to an array, then do the GROUP BY and sum on the result of that. This can be used to update the target table:
update table1
set sum = x.sum_score
from (
select t1.id,
sum(t2.score) as sum_score
from table1 t1
join table2 t2 on t2.grade = any(string_to_array(t1.concats, ','))
group by t1.id
) x
where x.id = table1.id;

Postgres: how to find rows having duplicate values in fields

How can I find if any value exists more than once in one row? An example:
id | c1 | c2 | c3
----+----+----+----
1 | a | b | c
2 | a | a | b
3 | b | b | b
The query should return rows 2 and 3 since they have the same value more than once. The solution I'm looking for is not 'where c1 = c2 or c1 = c3 or c2 = c3' since there can be any number of columns in tables I need to test. All values are text but can be any length.
One way to do that is to convert the columns to rows:
select *
from the_table tt
where exists (select 1
from ( values (c1), (c2), (c3) ) as t(v)
group by v
having count(*) > 1)
If you want a dynamic solution where you don't have to list each column, you can do that by converting the row to a JSON value:
select *
from the_table tt
where exists (select 1
from jsonb_each_text(to_jsonb(tt)) as j(k,v)
group by v
having count(*) > 1)
Online example

T-SQL Multiple Negative Where Conditions

I have a table A which has 85337 rows (Total).
Then the following query (Q1)
SELECT *
FROM A
WHERE 1 = 1
AND c1 = 0
AND c2 = 0
AND c3 = 0
AND c4 = 0;
returns 590 rows.
The next query (Q2):
SELECT *
FROM A
WHERE 1 = 1
AND c1 != 0
AND c2 != 0
AND c3 != 0
AND c4 != 0;
returns: 44245 rows. (should return 84747)
Why (Total) does not equal sum of (Q1) + (Q2)?
Why in the second query has to be "OR" instead of "AND" to get the "correct" values when the logic is the same? It must be trivial but cannot imagine the logic behind.
Based on De Morgan's laws on Negation:
NOT (P AND Q) => (NOT P OR NOT Q)
This is because your two queries don't encapsulate every possible combination of your data as you are using AND:
declare #a table (c1 int,c2 int,c3 int,c4 int);
insert into #a values
(1,1,1,1)
,(0,0,1,1) -- This row is not returned as it doesn't meet either criteria below
,(0,0,0,0)
,(0,0,0,0);
SELECT *
FROM #a
WHERE 1 = 1
AND c1 = 0
AND c2 = 0
AND c3 = 0
AND c4 = 0;
SELECT *
FROM #a
WHERE 1 = 1
AND c1 != 0
AND c2 != 0
AND c3 != 0
AND c4 != 0;

How to match on at least 2 where conditions?

I have a table containing five boolean columns.
How can I construct a query that returns rows where at least 2 columns are true?
Cast the boolean types to integer (0=false, 1=true) and check their sum:
select *
from my_table
where a::int + b::int + c::int + d::int + e::int >= 2;
The long way:
SELECT * from t where c1 and c2 or c1 and c3 or c1 and c4 or c1 and c5
or c2 and c3 or c2 and c4 or c2 and c5 or c3 and c4 or c3 and c5 or c4 and c5;
The accepted answer only works assuming all columns are defined NOT NULL, which has not been specified. To make it work with NULL, too:
SELECT *
FROM tbl
WHERE (a IS TRUE)::int
+ (b IS TRUE)::int
+ (c IS TRUE)::int
+ (d IS TRUE)::int
+ (e IS TRUE)::int > 1;
Or:
SELECT *
FROM tbl
WHERE COALESCE(a::int, 0)
+ COALESCE(b::int, 0)
+ COALESCE(c::int, 0)
+ COALESCE(d::int, 0)
+ COALESCE(e::int, 0) > 1;

T-SQL Query to convert rows to coumns based on mutiple tables

I have two master tables CompanyMaster, ActivityMaster for a child table CompanyActivities
ActivityMaster
ACTIVITYID ACTIVITYNAME
A1 testActivity
A2 someActivity
A3 otheractivity
A4 someotheractivity
A5 anyotheractivity
CompanyMaster
COMPANYID COMPANYNAME
C1 testcompany
C2 ACompany
C3 MyCompany
C4 SomeCompany
C5 ZCompany
C6 Company123
C7 ComapnyABC
CompanyActivities - The COMPANYID in CompanyActivities is having a primarykey-foreighkey relation ship with COMPANYID in CompanyMaster (primary key table) and ACTIVITYID is having a primarykey-foreighkey relation ship with ACTIVITYID in ActivityMaster(primary key table)
COMPANYID ACTIVITYID
C1 A1
C1 A3
C3 A1
C3 A2
C4 A5
C5 A1
C6 A3
C7 A3
I want to do write a query to get the following output where all the rows in ACTIVITYID column of the ActivityMaster table will be converted to columns
Output
Companies A1 A2 A3 A4 A5
C1 Y N Y N N
C2 N N N N N
C3 Y Y N N N
C4 N N N N Y
C5 Y N N N N
C6 N N Y N N
C7 N N Y N N
The output table displays all the companies as rows in the first column and all the activities are shown as columns that start after the first column, if there is row that contains both ACTIVITYID and COMPANYID it will set to Y in output otherwise it would be set to N
eg- COMPANYID C1 is having an activity ACTIVITYID A1 in CompanyActivities table so the first row in the second column that comes just below A1 and in the right to C1 is set Y, whereas C1 and A2 are not having a row, so the third column in the first row is set to N
I am using C#.net and 4 for loops to achieve the output now which is talking a heavy toll on the performance of the application, So i would like to do this using a query, I have searched for pivot queries, but all the examples i found knows the column names before-hand, which i don't i only get the names of the column names by querying the ActivityMaster.
create table #CompanyMaster (COMPANYID int, COMPANYNAME varchar(30))
create table #ActivityMaster (ACTIVITYID int, ACTIVITYNAME varchar(30))
create table #CompanyActivities (COMPANYID int, ACTIVITYID int)
insert into #CompanyMaster
SELECT 1, 'Company A'
union all
SELECT 2, 'Company B'
insert into #ActivityMaster
SELECT 101, 'Activity X'
union all
SELECT 102, 'Activity Y'
union all
SELECT 103, 'Activity Z'
insert into #CompanyActivities
select 1, 102
union all
select 2, 101
-- build activities column names
--case [Activity X] when 0 then ''N'' else ''Y'' end as [Activity X],
--case [Activity Y] when 0 then ''N'' else ''Y'' end as [Activity Y],
--case [Activity Z] when 0 then ''N'' else ''Y'' end as [Activity Z]
declare #activities nvarchar(max)
set #activities
= (
select 'case [' + ACTIVITYNAME + '] when 0 then ''N'' else ''Y'' end as [' + ACTIVITYNAME + '],' + char(10)
from #ActivityMaster
for xml path('')
)
set #activities = substring(#activities, 0, len(#activities)-1)
declare #activities_for nvarchar(max)
-- build activities column names in for
--[Activity X], [Activity Y], [Activity Z]
set #activities_for
= (
select '[' + ACTIVITYNAME + '],' + char(10)
from #ActivityMaster
for xml path('')
)
set #activities_for = substring(#activities_for, 0, len(#activities_for)-1)
declare #sql nvarchar(MAX) = N'
select COMPANYNAME,
<activities>
From
(select c.COMPANYNAME, a.ACTIVITYNAME,
(case
when ca.ACTIVITYID is not null and ca.COMPANYID is not null then 1
else 0
end) as STATUS
from #CompanyMaster c
cross join #ActivityMaster a
left join #CompanyActivities ca on ca.COMPANYID = c.COMPANYID and a.ACTIVITYID = ca.ACTIVITYID) p
pivot
(
sum(STATUS) for ACTIVITYNAME IN (<activities_for>)
) as pvt
'
set #sql = replace(#sql, '<activities>', #activities)
set #sql = replace(#sql, '<activities_for>', #activities_for)
print #sql
exec sp_executesql #sql
drop table #CompanyMaster
drop table #ActivityMaster
drop table #CompanyActivities