select opposite operation group by month SQL Redshift - amazon-redshift

Iam trying to extract a list of operation that was inactive in each month :
table 1 "all_opreration" is containing the whole list of operation id
table all_operation
the second table "active_operation" is containing the operations that was active on the specified month
table active operation
So I want to get "inactive operation" by month (for each month the operation that was not in active_operation table
==> Wished table :
wished table inactive operation
I have tried several ways but without success
Thank you in advance

You just need to left join and check the the right side is NULL. You will need to expand your all_operations to have all dates but this can be done with a cross join. Like this:
Set up:
create table all_ops (op_id varchar(32));
insert into all_ops values ('A'), ('B'), ('C');
create table active_ops (month date, op_id varchar(32));
insert into active_ops values ('2021-10-01', 'A'),
('2021-10-01', 'B'),
('2021-07-01', 'C');
Find the missing data / id pairs:
select l.month, l.op_id
from (
select month, op_id
from all_ops
cross join (select distinct month from active_ops) m
) l
left join active_ops r
on l.op_id = r.op_id and l.month = r.month
where r.op_id is null;

Related

SQL Server 2008 R2 query related to replacement of data

I have a scenario wherein I have to remove all the strings except a or b or c
My sample table is as follows:
Id Product
------------------
1. a,b,Da,c
2. Ty,a,b,c
3. a,sds,b
Sample output
Id Product
----------------
1. a,b,c
2. a,b,c
3. a,b
My current version is Microsoft SQL Server 2008 R2
This should help you out. As I state in the comments, I make use of Jeff Moden's DelimitedSplit8k, as you're using an older version of SQL Server. if you were using 2016+, you would have access to STRING_SPLIT. I also normalise your data; as storing delimited data is almost always a bad idea.
CREATE TABLE #Sample (id int, Product varchar(20));
INSERT INTO #Sample
VALUES (1,'a,b,Da,c'),
(2,'Ty,a,b,c'),
(3,'a,sds,b');
GO
--The first problem you have is you're storing delimited data
--You really should be storing each item on a separate row.
--This is, however, quite easy to do. i'm going to use a different
--table, however, you can change this fairly easily for your
--needs.
CREATE TABLE #Sample2 (id int, Product varchar(2));
GO
--You can split the data out by using a Splitter.
--My personal preference is Jeff Moden's DelimitedSplit8K
--which I've linked to above.
INSERT INTO #Sample2 (id, Product)
SELECT id, Item AS Product
FROM #Sample S
CROSS APPLY dbo.DelimitedSplit8K(S.Product,',') DS
WHERE DS.Item IN ('a','b','c');
GO
--And hey presto! Your normalised data, and without the unwanted values
SELECT *
FROM #Sample2;
GO
DROP TABLE #Sample;
DROP TABLE #Sample2;
If you have to keep the delimited format, you can use STUFF and FOR XML PATH:
WITH Split AS(
SELECT id,
Item AS Product,
ItemNumber
FROM #Sample S
CROSS APPLY dbo.DelimitedSplit8K(S.Product,',') DS
WHERE DS.Item IN ('a','b','c'))
SELECT id,
STUFF((SELECT ',' + Product
FROM Split sq
WHERE sq.id = S.id
ORDER BY ItemNumber
FOR XML PATH('')),1,1,'')
FROM Split S
GROUP BY id;
This also will do the thing, using xml only:
select * into #t from (values('a,b,Da,c'),('Ty,a,b,c'),('a,sds,b'))v(Product)
;
with x as (
SELECT t.Product, st.sProduct
FROM #t t
cross apply (
SELECT CAST(N'<root><r>' + REPLACE(t.Product,',', N'</r><r>') + N'</r></root>' as xml) xProduct
)xt
cross apply (
select CAST(r.value('.','NVARCHAR(MAX)') as nvarchar) sProduct
from xt.xProduct.nodes(N'//root/r') AS RECORDS(r)
) st
where st.sProduct in ('a', 'b', 'c')
)
select distinct x.Product, REVERSE(SUBSTRING(REVERSE(cleared.cProduct), 2, 999)) cleared
from x
cross apply ( select (
select distinct ref.sProduct + ','
from x ref
where ref.Product = x.Product
for xml path('') )
)cleared(cProduct)
;
drop table #t

Insert into Table using JOIN T-SQL

I want to insert into a specific column in my table A which belongs to DB 1
from my DB 2 table B
In table A I have a unique ID field called F6 same goes for table B field name F68; both fields are the same they are simply a copy of each other which gives me the opportunity to do a join on them.
So far so good, what I want now is to insert into my table A in the field F110 the values from table B F64 since I did a join on the "ID's" they should be in the right manner.
All fields are of type VARCHAR.
INSERT INTO [D061_15018659].[dbo].[A](F110)
SELECT v.F64,v.F68
FROM [VFM6010061V960P].[dbo].[B] v LEFT JOIN
ON v.F68 = F6
I have the problem that I have an error on "ON" why so ever I can't figure it out.
Your select query provide 2 columns ==> you need concatenate the columns
You need repeat the tabel A in join clause.
Try this :
INSERT INTO [D061_15018659].[dbo].[A] (F110)
SELECT
v.F64 || v.F68 as theNewF110
FROM
[VFM6010061V960P].[dbo].[B] v
LEFT JOIN
[D061_15018659].[dbo].[A] w ON v.F68 = w.F6

How to remove all the duplicate records except the last occurence in a table

I have a interim table without any primary key and identity. I need to check one of the columns (branch_ref) value for duplicate entries and should mark the flag as 'D' if the branch_ref is same for more than one record except the last occurrence in the table. How can we do this?
Actual data as stored in table.
select branch_name,branch_reference,address_1,zip_cd,null as flag_val FROM Branch_Master
As per above table, I need all flag to be updated as ā€˜Dā€™ except for 6th (brach_reference=9910) and 16th record (branch_reference=99100 and zip_cd=612).
When I use row_number function to identify the duplicates order gets changed.
SELECT branch_name,branch_reference,address_1,zip_cd,flag_val, ROW_NUMBER() OVER(PARTITION BY branch_reference ORDER BY branch_reference) RID
FROM Branch_Master
Am using below query to update flag_val and its updating wrong records.
;WITH CTE AS
(
SELECT branch_name,branch_reference,address_1,zip_cd,flag_val, ROW_NUMBER() OVER(PARTITION BY branch_reference ORDER BY branch_reference) RID
FROM Branch_Master
WHERE flag_val IS NULL
)
UPDATE C1 SET flag_val = 'D'
FROM CTE C1
LEFT OUTER JOIN (SELECT branch_reference, max(RID) MRID FROM CTE GROUP BY branch_reference) C2
ON C1.branch_reference=C2.branch_reference and C1.RID=C2.MRID
WHERE C2.branch_reference IS NULL

How to set new column equal to subquery PostGreSQL

Full disclosure: I've seen 1 variation of this question for mySQL, and the PostgreSQL answer didn't satisfy me.
I have 2 tables: Reviews & businesses. In the Reviews table, the only 3 relevant columns for the purpose of this question are 'business_id', 'date' (yyyy-mm-dd), and stars (1-5), and the primary key is (review_id). In the businesses table, the relevant columns are 'business_id', 'year', and 'month'.' The 'year' and 'month' columns are there because there is another column in the business table called 'review_count', which represents the number of reviews a business received on each month of each year. Because of this, the composite primary key of this table is (business_id, year, month).
Essentially, I am trying to create a column in the business table with the average rating (represented by stars) a business received on each month of each year it was open.
The following query gives me the exact result I want:
SELECT round(CAST(AVG(stars) AS NUMERIC), 2)
FROM reviews_for_trending_businesses
WHERE business_id IN (SELECT DISTINCT(business_id)
FROM trending_businesses_v2)
GROUP BY business_id, EXTRACT("year" FROM reviews_for_trending_businesses.date), EXTRACT('month' FROM reviews_for_trending_businesses.date);
This code returns the column and all the correct values that I want to insert into my business table.
However, when I try to actually update the table, I get an error saying more than one row was returned by the subquery used as an expression. This is the code I'm trying to update with:
UPDATE trending_businesses_v2
SET avg_monthly_rating = (SELECT round(CAST(AVG(stars) AS NUMERIC), 2)
FROM reviews_for_trending_businesses
WHERE business_id IN (SELECT DISTINCT(business_id)
FROM trending_businesses_v2)
GROUP BY business_id, EXTRACT("year" FROM reviews_for_trending_businesses.date), EXTRACT('month' FROM reviews_for_trending_businesses.date);
I've tried a number of other solutions as well, including using joins, but keep getting a similar error.
UPDATE: Still No Answer but getting Closer:
Still can't quite figure out where I'm going wrong here. I also don't understand why I have to groupby 'rtb.date' here if I'm only extracting values from it (returned error if I didn't).
UPDATE trending_businesses_v2 tb
SET avg_monthly_rating = t.val
FROM (SELECT business_id, EXTRACT("year" FROM rtb.date) AS year, EXTRACT('month' FROM rtb.date) AS month, round(CAST(AVG(stars) AS NUMERIC), 2) as val
FROM reviews_for_trending_businesses rtb
WHERE business_id IN (SELECT DISTINCT(business_id)
FROM trending_businesses_v2
)
GROUP BY business_id, year, month, rtb.date
) t
WHERE t.business_id = tb.business_id AND
t.year = tb.year AND t.month = tb.month;
You need to match the rows, presumably using a business id and date. Something like this:
UPDATE trending_businesses_v2 tb
SET avg_monthly_rating = t.val
FROM (SELECT business_id, date_trunc('month', rtb.date) as yyyymm, round(CAST(AVG(stars) AS NUMERIC), 2) as val
FROM reviews_for_trending_businesses rtb
WHERE business_id IN (SELECT DISTINCT(business_id)
FROM trending_businesses_v2
)
GROUP BY business_id, date_trunc('month', rtb.date)
) t
WHERE t.business_id = tb.business_id AND
t.yyyymm = tb.?;

PostgreSQL Removing duplicates

I am working on postgres query to remove duplicates from a table. The following table is dynamically generated and I want to write a select query which will remove the record if the first row has duplicate values.
The table looks something like this
Ist col 2nd col
4 62
6 34
5 26
5 12
I want to write a select query which remove either row 3 or 4.
There is no need for an intermediate table:
delete from df1
where ctid not in (select min(ctid)
from df1
group by first_column);
If you are deleting many rows from a large table, the approach with an intermediate table is probably faster.
If you just want to get unique values for one column, you can use:
select distinct on (first_column) *
from the_table
order by first_column;
Or simply
select first_column, min(second_column)
from the_table
group by first_column;
select count(first) as cnt, first, second
from df1
group by first
having(count(first) = 1)
if you want to keep one of the rows (sorry, I initially missed it if you wanted that):
select first, min(second)
from df1
group by first
Where the table's name is df1 and the columns are named first and second.
You can actually leave off the count(first) as cnt if you want.
At the risk of stating the obvious, once you know how to select the data you want (or don't want) the delete the records any of a dozen ways is simple.
If you want to replace the table or make a new table you can just use create table as for the deletion:
create table tmp as
select count(first) as cnt, first, second
from df1
group by first
having(count(first) = 1);
drop table df1;
create table df1 as select * from tmp;
or using DELETE FROM:
DELETE FROM df1 WHERE first NOT IN (SELECT first FROM tmp);
You could also use select into, etc, etc.
if you want to SELECT unique rows:
SELECT * FROM ztable u
WHERE NOT EXISTS ( -- There is no other record
SELECT * FROM ztable x
WHERE x.id = u.id -- with the same id
AND x.ctid < u.ctid -- , but with a different(lower) "internal" rowid
); -- so u.* must be unique
if you want to SELECT the other rows, which were suppressed in the previous query:
SELECT * FROM ztable nu
WHERE EXISTS ( -- another record exists
SELECT * FROM ztable x
WHERE x.id = nu.id -- with the same id
AND x.ctid < nu.ctid -- , but with a different(lower) "internal" rowid
);
if you want to DELETE records, making the table unique (but keeping one record per id):
DELETE FROM ztable d
WHERE EXISTS ( -- another record exists
SELECT * FROM ztable x
WHERE x.id = d.id -- with the same id
AND x.ctid < d.ctid -- , but with a different(lower) "internal" rowid
);
So basically I did this
create temp t1 as
select first, min (second) as second
from df1
group by first
select * from df1
inner join t1 on t1.first = df1.first and t1.second = df1.second
Its a satisfactory answer. Thanks for your help #Hack-R