Optimize MySQl Query (Substitution from Column Level query) - mysql-workbench

I have Following MySQL Query.How can I optimize this.
Basically what this query is doing .
I have two different table for employee and his pay history, I need to show the sum of national insurance contribution for each employee and for each month as column.
I am able to acheive this functionality but I dont think the approach is right.How can I modify this statement in light of performance.
DECLARE firstStartDate DATETIME;
SET firstStartDate = GetFirstPeriodStartDateForFiscalYear(iGroupID,iFiscalYear);
SELECT
(SELECT fullname) as 'Employee Name',
(SELECT SUM(EE_NI_Contrib)
FROM eepayhistory
WHERE eepayhistory.employee_id = Employee.pkEmpid
AND MONTHNAME(firstStartDate)=MONTHNAME(PeriodEndDate)) as 'Mon1',
(SELECT SUM(EE_NI_Contrib)
FROM eepayhistory
WHERE eepayhistory.employee_id = Employee.pkEmpid
AND MONTHNAME(DATE_ADD(firstStartDate, INTERVAL 1 MONTH) )=MONTHNAME(PeriodEndDate)) as 'Mon2' ,
(SELECT SUM(EE_NI_Contrib)
FROM eepayhistory
WHERE eepayhistory.employee_id = Employee.pkEmpid
AND MONTHNAME(DATE_ADD(firstStartDate, INTERVAL 2 MONTH) )=MONTHNAME(PeriodEndDate)) as 'Mon3' ,
(SELECT SUM(EE_NI_Contrib)
FROM eepayhistory
WHERE eepayhistory.employee_id = Employee.pkEmpid
AND MONTHNAME(DATE_ADD(firstStartDate, INTERVAL 3 MONTH) )=MONTHNAME(PeriodEndDate)) as 'Mon4' ,
(SELECT SUM(EE_NI_Contrib)
FROM eepayhistory
WHERE eepayhistory.employee_id = Employee.pkEmpid
AND MONTHNAME(DATE_ADD(firstStartDate, INTERVAL 4 MONTH) )=MONTHNAME(PeriodEndDate)) as 'Mon5' ,
(SELECT SUM(EE_NI_Contrib)
FROM eepayhistory
WHERE eepayhistory.employee_id = Employee.pkEmpid
AND MONTHNAME(DATE_ADD(firstStartDate, INTERVAL 5 MONTH) )=MONTHNAME(PeriodEndDate)) as 'Mon6' ,
(SELECT SUM(EE_NI_Contrib)
FROM eepayhistory
WHERE eepayhistory.employee_id = Employee.pkEmpid
AND MONTHNAME(DATE_ADD(firstStartDate, INTERVAL 6 MONTH) )=MONTHNAME(PeriodEndDate)) as 'Mon7' ,
(SELECT SUM(EE_NI_Contrib)
FROM eepayhistory
WHERE eepayhistory.employee_id = Employee.pkEmpid
AND MONTHNAME(DATE_ADD(firstStartDate, INTERVAL 7 MONTH) )=MONTHNAME(PeriodEndDate)) as 'Mon8' ,
(SELECT SUM(EE_NI_Contrib)
FROM eepayhistory
WHERE eepayhistory.employee_id = Employee.pkEmpid
AND MONTHNAME(DATE_ADD(firstStartDate, INTERVAL 8 MONTH) )=MONTHNAME(PeriodEndDate)) as 'Mon9' ,
(SELECT SUM(EE_NI_Contrib)
FROM eepayhistory
WHERE eepayhistory.employee_id = Employee.pkEmpid
AND MONTHNAME(DATE_ADD(firstStartDate, INTERVAL 9 MONTH) )=MONTHNAME(PeriodEndDate)) as 'Mon10' ,
(SELECT SUM(EE_NI_Contrib)
FROM eepayhistory
WHERE eepayhistory.employee_id = Employee.pkEmpid
AND MONTHNAME(DATE_ADD(firstStartDate, INTERVAL 10 MONTH) )=MONTHNAME(PeriodEndDate)) as 'Mon11' ,
(SELECT SUM(EE_NI_Contrib)
FROM eepayhistory
WHERE eepayhistory.employee_id = Employee.pkEmpid
AND MONTHNAME(DATE_ADD(firstStartDate, INTERVAL 11 MONTH) )=MONTHNAME(PeriodEndDate)) as 'Mon12'
FROM Employee
LEFT OUTER JOIN eepayhistory
ON eepayhistory.employee_id = Employee.pkEmpid
AND Employee.CompanyId = iCompanyID
Group by 'Employee Name';
How can I optimize this query.

Related

Redshift Optimization

I am looking for some help with optimizing the below. Is there a method if Redshift that can be used to eliminate the need to call the sub select each time since they are virtually the same for different fields:
select
IdText AS EquipmentPointCode,
OnRLT AS Downtime,
( SELECT OnRLT FROM temp_pointsvr_uptime_dataset WHERE IdText = (select IDtext from list_of_ids where rownum =1) AND OnRLT > t1.OnRLT ORDER BY OnRLT ASC LIMIT 1) AS Uptime,
( SELECT LineDefault_Value1 FROM temp_not_pointsvr_downtime WHERE IdText = (select IDtext from list_of_ids where rownum =1) AND OffRLT = t1.OnRLT ORDER BY ResetRLT DESC Limit 1) AS LineDefault_Value1,
( SELECT LineDefault_Value2 FROM temp_not_pointsvr_downtime WHERE IdText = (select IDtext from list_of_ids where rownum =1) AND OffRLT = t1.OnRLT ORDER BY ResetRLT DESC Limit 1) AS LineDefault_Value2,
( SELECT LineDefault_Value3 FROM temp_not_pointsvr_downtime WHERE IdText = (select IDtext from list_of_ids where rownum =1) AND OffRLT = t1.OnRLT ORDER BY ResetRLT DESC Limit 1) AS LineDefault_Value3,
( SELECT LineDefault_Value4 FROM temp_not_pointsvr_downtime WHERE IdText = (select IDtext from list_of_ids where rownum =1) AND OffRLT = t1.OnRLT ORDER BY ResetRLT DESC Limit 1) AS LineDefault_Value4,
( SELECT LineDefault_Value5 FROM temp_not_pointsvr_downtime WHERE IdText = (select IDtext from list_of_ids where rownum =1) AND OffRLT = t1.OnRLT ORDER BY ResetRLT DESC Limit 1) AS LineDefault_Value5,
( SELECT LineDefault_Value6 FROM temp_not_pointsvr_downtime WHERE IdText = (select IDtext from list_of_ids where rownum =1) AND OffRLT = t1.OnRLT ORDER BY ResetRLT DESC Limit 1) AS LineDefault_Value6,
( SELECT LineDefault_Value7 FROM temp_not_pointsvr_downtime WHERE IdText = (select IDtext from list_of_ids where rownum =1) AND OffRLT = t1.OnRLT ORDER BY ResetRLT DESC Limit 1) AS LineDefault_Value7,
( SELECT StateDefault_Value1 FROM temp_not_pointsvr_downtime WHERE IdText = (select IDtext from list_of_ids where rownum =1) AND OffRLT = t1.OnRLT ORDER BY ResetRLT DESC Limit 1) AS StateDefault_Value1,
( SELECT StateDefault_Value2 FROM temp_not_pointsvr_downtime WHERE IdText = (select IDtext from list_of_ids where rownum =1) AND OffRLT = t1.OnRLT ORDER BY ResetRLT DESC Limit 1) AS StateDefault_Value2,
( SELECT StateDefault_Value3 FROM temp_not_pointsvr_downtime WHERE IdText = (select IDtext from list_of_ids where rownum =1) AND OffRLT = t1.OnRLT ORDER BY ResetRLT DESC Limit 1) AS StateDefault_Value3,
'FM 10' AS Equipment
FROM temp_pointsvr_downtime t1
WHERE IdText = (select IDtext from list_of_ids where rownum =1)
I feel like what you're trying to do can be accomplished using case statements, but without seeing an example of the tables/data you're working with it's tough to determine.
Tackling just the nested select statements, you should be able to use a common table expression to hit the table one time, instead of re-selecting from it each time:
WITH
id_text_filter AS (
SELECT IDtext
FROM list_of_ids
WHERE rownum = 1
)
SELECT
IdText AS EquipmentPointCode,
OnRLT AS Downtime,
( SELECT OnRLT FROM temp_pointsvr_uptime_dataset WHERE IdText = (select IDtext from list_of_ids where rownum =1) AND OnRLT > t1.OnRLT ORDER BY OnRLT ASC LIMIT 1) AS Uptime,
( SELECT LineDefault_Value1 FROM temp_not_pointsvr_downtime WHERE ti.IdText = f.IDtext AND OffRLT = t1.OnRLT ORDER BY ResetRLT DESC Limit 1) AS LineDefault_Value1,
( SELECT LineDefault_Value2 FROM temp_not_pointsvr_downtime WHERE ti.IdText = f.IDtext AND OffRLT = t1.OnRLT ORDER BY ResetRLT DESC Limit 1) AS LineDefault_Value2,
( SELECT LineDefault_Value3 FROM temp_not_pointsvr_downtime WHERE ti.IdText = f.IDtext AND OffRLT = t1.OnRLT ORDER BY ResetRLT DESC Limit 1) AS LineDefault_Value3,
( SELECT LineDefault_Value4 FROM temp_not_pointsvr_downtime WHERE ti.IdText = f.IDtext AND OffRLT = t1.OnRLT ORDER BY ResetRLT DESC Limit 1) AS LineDefault_Value4,
( SELECT LineDefault_Value5 FROM temp_not_pointsvr_downtime WHERE ti.IdText = f.IDtext AND OffRLT = t1.OnRLT ORDER BY ResetRLT DESC Limit 1) AS LineDefault_Value5,
( SELECT LineDefault_Value6 FROM temp_not_pointsvr_downtime WHERE ti.IdText = f.IDtext AND OffRLT = t1.OnRLT ORDER BY ResetRLT DESC Limit 1) AS LineDefault_Value6,
( SELECT LineDefault_Value7 FROM temp_not_pointsvr_downtime WHERE ti.IdText = f.IDtext AND OffRLT = t1.OnRLT ORDER BY ResetRLT DESC Limit 1) AS LineDefault_Value7,
( SELECT StateDefault_Value1 FROM temp_not_pointsvr_downtime WHERE ti.IdText = f.IDtext AND OffRLT = t1.OnRLT ORDER BY ResetRLT DESC Limit 1) AS StateDefault_Value1,
( SELECT StateDefault_Value2 FROM temp_not_pointsvr_downtime WHERE ti.IdText = f.IDtext AND OffRLT = t1.OnRLT ORDER BY ResetRLT DESC Limit 1) AS StateDefault_Value2,
( SELECT StateDefault_Value3 FROM temp_not_pointsvr_downtime WHERE ti.IdText = f.IDtext AND OffRLT = t1.OnRLT ORDER BY ResetRLT DESC Limit 1) AS StateDefault_Value3,
'FM 10' AS Equipment
FROM temp_pointsvr_downtime t1
JOIN id_text_filter f ON ti.IDtext = f.IDtext
WHERE IdText = (SELECT IDtext FROM list_of_ids WHERE rownum =1)
Again, I'm not sure if this code will work straight out of the box, as I'm not able to test it. But the idea should be similar:
1) Create a common table expression with the data you want to use as a filter
2) Join to that table in the main SELECT
3) Define WHERE clauses based on an equality to the common table expression.

SELECT from alias used in sub query

I define a field called a.onhold_endtime in the below code. I want to return that value as part of overall SELECT statement, but when I include it the alias a.* is not recognised. It returns an error "ERROR: missing FROM-clause entry for table "a" "
Select distinct
woas.workorderid,
a.onhold_endtime,
(SELECT
(SELECT count(*) AS work_hours
FROM generate_series (b.onhold_starttime
, a.onhold_endtime - interval '1h'
, interval '1h') h
WHERE EXTRACT(ISODOW FROM h) < 6
AND h::time >= '08:00'
AND h::time <= '18:00')
FROM (
SELECT DISTINCT woas.workorderid,
timestamp 'epoch' +
nth_value(wos.endtime,1) OVER(PARTITION BY woas.workorderid ORDER BY wos.endtime ASC
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
/1000 * INTERVAL '1 second' as onhold_endtime from wo_status_info wos
LEFT JOIN wo_assessment woas ON woas.assessmentid = wos.assessmentid
WHERE tmp.workorderid = woas.workorderid AND wos.statusid = 1 AND wos.nextstatusid = 2
) as a
LEFT JOIN (
SELECT DISTINCT woas.workorderid,
timestamp 'epoch' +
nth_value(wos.endtime,1) OVER(PARTITION BY woas.workorderid ORDER BY wos.endtime ASC
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
/1000 * INTERVAL '1 second' as onhold_starttime from wo_status_info wos
LEFT JOIN wo_assessment woas ON woas.assessmentid = wos.assessmentid
WHERE
(wos.statusid = 2 AND wos.nextstatusid <> 2)
) as b ON a.workorderid = b.workorderid) AS onhold_difference1
FROM wo_assessment as tmp
LEFT JOIN wo_assessment woas ON tmp.assessmentid = woas.assessmentid
LEFT JOIN wo_status_info wos ON woas.assessmentid = wos.assessmentid
ORDER BY woas.workorderid ASC
Is there a way I can structure the code so I can include items using the "a" alias?
Update with simplified code
I define a value called "onhold_endtime" within a subquery with an alias of "a". I would like to return the value a.onhold_endtime in my main SELECT statement
Select distinct
woas.workorderid,
a.onhold_endtime,
(SELECT
(SELECT count(*))
FROM (
SELECT DISTINCT woas.workorderid,
nth_value(wos.endtime,1) OVER(PARTITION BY woas.workorderid ORDER BY wos.endtime ASC
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
as onhold_endtime from wo_status_info wos
WHERE y.workorderid = woas.workorderid AND wos.statusid = 1 AND wos.nextstatusid = 2
) as a
LEFT JOIN (
SELECT DISTINCT woas.workorderid,
nth_value(wos.endtime,1) OVER(PARTITION BY woas.workorderid ORDER BY wos.endtime ASC
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
as onhold_starttime from wo_status_info wos
WHERE
(wos.statusid = 2 AND wos.nextstatusid <> 2)
) as b ON a.workorderid = b.workorderid) AS x
FROM wo_assessment as y
LEFT JOIN wo_assessment woas ON y.assessmentid = woas.assessmentid
In short - you can use outer query alias in correlated subquery:
t=# select o.oid, (select o.datname||'*') from pg_database o limit 1;
oid | ?column?
-------+-----------
13505 | postgres*
(1 row)
but can't do the opposite:
t=# select b.*, (select o.datname||'*') b from pg_database o limit 1;
ERROR: missing FROM-clause entry for table "b"
LINE 1: select b.*, (select o.datname||'*') b from pg_database o lim...
^
the closest to the expected would be:
t=# select oid,b.* from pg_database o, LATERAL (select o.datname||'*') b limit 1;
oid | ?column?
-------+-----------
13505 | postgres*
(1 row)

Creating a Void Function in PostgreSQL

I am getting an error on this create function code in Postgresql. The error says it is happening around Line 2 at DELETE, but it happens at WITH if I remove that line so I think it is a problem with the format of my Creat Function
create or replace function retention_data(shopId integer) returns void as $$
delete from retention where shop_id = shopId;
WITH ret_grid_step1 as (
select * from (
SELECT
order_id as order_name,
cust_name as cust_name,
email as email,
date(order_date) as created_at,
count(*) as num_items_in_order,
sum(total_price) as sales ,
rank() over (partition BY order_id ORDER BY cust_name ASC) as rnk_shipping_name,
rank() over (partition BY order_id ORDER BY email ASC) as rnk_email
FROM orders
WHERE shop_id = shopId
and order_date is not null and order_date > now()::date - 365 and order_date < now()::date + 1
group by 1,2,3,4
) x
where rnk_shipping_name = 1 and rnk_email = 1
)
insert into retention(shop_id, cust_name, email, last_purchase_dt, total_sales, num_orders, days_since_last_order)
select
shopId as shop_id,
coalesce(b.cust_name,'null') as cust_name,
a.email,
a.last_purchase_dt,
total_sales,
num_orders,
current_date - last_purchase_dt as days_since_last_order
from (
select
email,
max(created_at) as last_purchase_dt,
count(*) as num_orders,
sum(sales) as total_sales
from ret_grid_step1
group by 1
) as a
left join (
select
email,
cust_name,
rank() over (partition BY email ORDER BY created_at DESC) as rnk
from ret_grid_step1
--where cust_name is not null
group by 1,2,created_at
) as b
on a.email = b.email
where b.rnk = 1
and a.email <> '';
$$ language plpgsql;

How to create a table with dates in sequence between range in Hive?

I'm trying to Create a table with column date, And I want to insert date in sequence between Range.
Here's what I have tried:
SET StartDate = '2009-01-01';
SET EndDate = '2016-06-31';
CREATE TABLE DateRangeTable(mydate DATE, qty INT);
INSERT INTO DateRangeTable VALUES (select a.Date, 0
from (
select current_date - INTERVAL (a.a + (10 * b.a) + (100 * c.a)) DAY as Date
from (select 0 as a union all select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9) as a
cross join (select 0 as a union all select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9) as b
cross join (select 0 as a union all select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9) as c
) AS a where a.Date between '2019-01-01' and '2016-06-30');
This is the similar one:
select date_add(t.f1, t.start_r - pe.i) as date_range from (select '2022-01-01' as f1,datediff('2022-01-07','2022-01-01') as start_r,0 as end_r) t lateral view posexplode(split(space(start_r - end_r),' ')) pe as i,s;
You do not need VALUES keyword when using INSERT ... SELECT.
Working example:
set hivevar:start_date=2009-01-01;
set hivevar:end_date=2016-06-31;
CREATE TABLE DateRangeTable(mydate DATE, qty INT);
with date_range as
(--this query generates date range
select date_add ('${hivevar:start_date}',s.i) as dt
from ( select posexplode(split(space(datediff('${hivevar:end_date}','${hivevar:start_date}')),' ')) as (i,x) ) s
)
INSERT INTO TABLE DateRangeTable
select d.dt, 0 qty
from date_range d
where d.dt between '2019-01-01' and '2016-06-30');

DATEDIFF On Dates

I've been struggling with this but I'm not really good at tsql.
This is what I got, and I can't have the DateTime calculates all right. I'm getting the sum between A and B but not the total sum. For example in the last column I have a 0 which is getting me back to -x.
Here is the procedure, and some of the data are like this:
Code_Procedure date_evenement codes_situation
---------------------------------------------------------------
000079500000 2013-05-21 13:07:00.000 COMCFM
000079500000 2013-05-21 20:24:00.000 PCHCFM
000079500000 2013-05-22 09:58:00.000 PCHCFM
000079500000 2013-05-23 00:00:00.000 AARCFM
000079500000 2013-05-23 00:00:00.000 LIVCFM
000079600000 2013-05-21 13:07:00.000 COMCFM
000079600000 2013-05-21 20:24:00.000 PCHCFM
000079600000 2013-05-22 11:18:00.000 PCHCFM
000079600000 2013-05-23 00:00:00.000 AARCFM
000079600000 2013-05-23 00:00:00.000 LIVCFM
Here is the proc:
DECLARE #COMCFM TABLE(numero_colis VARCHAR(25), date_evenement DATETIME);
INSERT #COMCFM SELECT TOP(5) numero_colis, date_evenement FROM cartitem_colis_postaux_etats WHERE (code_situation = 'PCH' AND code_justification = 'CFM')
WHILE (SELECT COUNT(*) FROM #COMCFM) > 0
BEGIN
DECLARE #Colis TABLE(numero_colis VARCHAR(25), date_evenement DATETIME, code_situation_code_justification NVARCHAR(32));
INSERT #Colis SELECT numero_colis, date_evenement, code_situation + code_justification FROM cartitem_colis_postaux_etats WHERE numero_colis = (SELECT TOP(1) numero_colis FROM #COMCFM) ORDER BY numero_colis, date_evenement
;WITH CTE AS
(
Select DISTINCT
*
,ROW_NUMBER() OVER(PARTITION BY numero_colis ORDER BY date_evenement ASC) Rn FROM #Colis
),CTE1 AS
(
SELECT DISTINCT
A.*
,DATEDIFF(mi, B.date_evenement, A.date_evenement) AS DIFF
FROM CTE A INNER JOIN CTE B On B.Rn + 1 = A.Rn
UNION All
SELECT A.*, 0 FROM CTE A Where Rn = 1
)
SELECT
A.*
,ISNULL((
SELECT
A.DIFF + B.DIFF
FROM CTE1 AS B
WHERE A.numero_colis = B.numero_colis
AND A.Rn = B.Rn + 1), 0) AS Sums
FROM CTE1 AS a
ORDER BY numero_colis, Rn ASC
DELETE FROM #Colis
DELETE FROM #COMCFM WHERE numero_colis = (SELECT TOP(1) numero_colis FROM #COMCFM)
END
I'm not really sure what you would like to achieve. Do you need date differencies as a cummulated value? If you need this, change your BEGIN-END block of your stored procedure with this code
BEGIN
DECLARE #Colis TABLE(numero_colis VARCHAR(25), date_evenement DATETIME, code_situation_code_justification NVARCHAR(32));
INSERT #Colis SELECT numero_colis, date_evenement, code_situation + code_justification FROM cartitem_colis_postaux_etats WHERE numero_colis = (SELECT TOP(1) numero_colis FROM #COMCFM) ORDER BY numero_colis, date_evenement
;WITH CTE AS
(
SELECT DISTINCT
*,
ROW_NUMBER() OVER(PARTITION BY numero_colis ORDER BY date_evenement ASC) Rn
FROM #Colis
),CTE1 AS
(
SELECT A.*, 0 AS CummulatedDiff
FROM CTE A
WHERE Rn = 1
UNION ALL
SELECT DISTINCT A.*, B.CummulatedDiff + DATEDIFF(mi, B.date_evenement, A.date_evenement) AS CummulatedDiff
FROM CTE AS A INNER JOIN
CTE1 AS B ON B.Rn + 1 = A.Rn AND B.numero_colis = A.numero_colis
)
SELECT *
FROM CTE1 AS a
ORDER BY numero_colis, Rn ASC
DELETE FROM #Colis
DELETE FROM #COMCFM WHERE numero_colis = (SELECT TOP(1) numero_colis FROM #COMCFM)
END
I hope this takes you further to your goal.