A long query tuning - sql

I have the following query, in this query, I am selecting the ebs tables, with a custom table which has header_id and populating the the data in a custom table XXREPORT_L1_TBL.
I want to tune this query.
[update] made changes to the query as bellow:
splited the query in 3 different insert statements
removed the columns which do in line queries for values
added an update statement at the end for these columns.
insert into XX.XXREPORT_L1_TBL ( ORDER_NUMBER
, LINE_NUMBER
, UOM
, CUSTOMER_LENGTH
, THEORETICAL_WEIGHT
, FINISH
, ORDER_QTY_PCS
, ORDER_QTY_KGS
, SALES_VALUE
, TOTAL_VALUE
, ORDERED_QUANTITY
, WIP_ENTITY_ID
, JOB_NAME
, JOB_TYPE
, JOB_STATUS
, JOB_RELEASED_DATE
, DATE_COMPLETED
, DATE_CLOSED
, JOB_CARD_QTY
, ALLOY
, PROFILE
, PROD_QTY_KGS
, COST_KGS_THEORY
, COST_KGS_ACTUAL
)
SELECT
---- Sales Order
xx.order_number
,xx.line_number
,xx.UOM,
xx.customer_length,
xx.theoretical_weight,
xx.finish,
xx.order_qty_pcs,
xx.order_qty_kgs,
xx.sales_value, -- total value / total kgs
xx.total_value, -- line total
xx.ordered_quantity,
-- Production
xx.wip_entity_id,
xx.job_name,
( select case when a.inventory_item_id = 5716770 and a.job_type='NOT CHILD' then 'PARENT'
when a.job_type='CHILD' and a.inventory_item_id is null then 'CHILD'
when a.job_type='NOT CHILD' and a.inventory_item_id is NOT null then 'NOT CHILD' END JOB_TYPE
from ( select disc2.wip_entity_id as wip_entity_id, decode ( nvl(disc2.attribute9,-1) , -1,'NOT CHILD', 'CHILD') job_type, oel.inventory_item_id
from APPS.wip_discrete_jobs disc2, APPS.oe_order_lines_all oel
where oel.line_id(+) = disc2.source_line_id
)a
where a.wip_entity_id = xx.wip_entity_id
) job_type,
( select decode ( xx.status_type, 6, 'Open',
3, 'Open',
4, 'Completed',
LU1.MEANING )
from APPS.FND_LOOKUP_VALUES LU1
where LU1.LOOKUP_TYPE = 'WIP_JOB_STATUS'
AND LU1.LOOKUP_CODE = xx.STATUS_TYPE
) job_status,
xx.job_released_date,
xx.date_completed,
xx.date_closed
,xx.net_quantity as job_card_qty
,xx.alloy
,xx.profile
,xx.prod_qty_kgs
-- Theoretical Order cost
,xx.cost_kgs_theory
-- Actual Order cost
,xx.cost_kgs_actual
from (
select a.*
-- Theoretical Order cost
, DECODE (a.qty_completed * a.customer_length * a.theoretical_weight,0,0,
a.TOT_THEORY_COST_RELIEVED/(a.qty_completed * a.customer_length * a.theoretical_weight) ) as cost_kgs_theory
-- Actual Order cost
, DECODE ( a.qty_completed * a.customer_length * a.theoretical_weight, 0, 0,
a.TOT_ACTUAL_COST_INCURRED/(a.qty_completed * a.customer_length * a.theoretical_weight )) as cost_kgs_actual
from (
select
-- Normal orders, INTERNAL Orders, Crimped Profile (parent jobs)
-- Sales Order
oeh.order_number as order_number
,oel.line_number
,oel.pricing_quantity_uom as UOM
,oel.attribute1 as customer_length
,oel.attribute6 as theoretical_weight
,oel.attribute5 as finish
,oel.attribute18 as order_qty_pcs
,oel.attribute7 as order_qty_kgs
,xx_om.GetLineUnitSellingPrice(oel.line_id) sales_value
,xx_om.GetHeaderUnitSellingPrice(oeh.header_id) total_value
,oel.ordered_quantity ordered_quantity
-- Production
, tbl0.qty_completed as qty_completed
,disc.wip_entity_id as wip_entity_id
,( select wip_entity_name from APPS.wip_entities ent
where ent.wip_entity_id = disc.wip_entity_id) job_name
,disc.status_type
,disc.date_released as job_released_date
, DECODE ( disc.date_completed, NULL, disc.date_completed,
-- my day Definition
to_date(to_char(to_date(TO_CHAR(disc.date_completed- interval '7' hour,'DD-MON-YYYY')||'00:00:00','DD-MON-YYYY HH24:MI:SS'), 'DD-MON-YYYY HH24:MI:SS'), 'DD-MON-YYYY HH24:MI:SS')) as date_completed
, DECODE ( disc.date_closed, NULL, disc.date_closed,
to_date(to_char(to_date(TO_CHAR(disc.date_closed- interval '7' hour,'DD-MON-YYYY')||'00:00:00','DD-MON-YYYY HH24:MI:SS'), 'DD-MON-YYYY HH24:MI:SS'), 'DD-MON-YYYY HH24:MI:SS')) as date_closed
, disc.net_quantity
, ( select opr2.quantity_completed
from APPS.wip_operations opr2
where opr2.wip_entity_id = disc.wip_entity_id
and opr2.operation_seq_num = (select max(opr.operation_seq_num)
from APPS.wip_operations opr, APPS.wip_discrete_jobs disc2
where opr.wip_entity_id = disc2.wip_entity_id
and disc2.wip_entity_id = disc.wip_entity_id))* oel.attribute1 * oel.attribute6 as prod_qty_kgs
,oel.attribute4 as alloy
,oel.attribute2 as profile
-- Theoretical Order cost
,tbl0.TOT_THEORY_COST_RELIEVED
-- Actual Order cost
,tbl0.TOT_ACTUAL_COST_INCURRED
from XX.XXREPORT_Lzero_TBL tbl0
join APPS.oe_order_headers_all oeh on oeh.header_id = tbl0.header_id
join APPS.oe_order_lines_all oel on oeh.org_id = oel.org_id and oeh.header_id = oel.header_id
join APPS.xx_assemblies asm on oel.line_id = asm.line_id
join APPS.wip_discrete_jobs disc on disc.primary_item_id = asm.inventory_item_id
where oel.link_to_line_id is null
union
-- Crimped Child Jobs
select
-- Sales Order
oeh.order_number as order_number
,oel.line_number
,oel.pricing_quantity_uom as UOM
,oel.attribute1 as customer_length
,oel.attribute6 as theoretical_weight
,oel.attribute5 as finish
,oel.attribute18 as order_qty_pcs
,oel.attribute7 as order_qty_kgs
,xx_om.GetLineUnitSellingPrice(oel.line_id) sales_value
,xx_om.GetHeaderUnitSellingPrice(oeh.header_id) total_value
,oel.ordered_quantity ordered_quantity
-- Production
, tbl0.qty_completed as qty_completed
,child_jobs.wip_entity_id as wip_entity_id
,( select wip_entity_name from APPS.wip_entities ent
where ent.wip_entity_id = child_jobs.wip_entity_id) job_name
,disc.status_type
,disc.date_released as job_released_date
, DECODE ( disc.date_completed, NULL, disc.date_completed,
to_date(to_char(to_date(TO_CHAR(disc.date_completed-interval '7' hour,'DD-MON-YYYY')||'00:00:00','DD-MON-YYYY HH24:MI:SS'), 'DD-MON-YYYY HH24:MI:SS'), 'DD-MON-YYYY HH24:MI:SS')) as date_completed
, DECODE ( disc.date_closed, NULL, disc.date_closed,
to_date(to_char(to_date(TO_CHAR(disc.date_closed-interval '7' hour,'DD-MON-YYYY')||'00:00:00','DD-MON-YYYY HH24:MI:SS'), 'DD-MON-YYYY HH24:MI:SS'), 'DD-MON-YYYY HH24:MI:SS')) as date_closed
, disc.net_quantity
, ( select opr2.quantity_completed
from APPS.wip_operations opr2
where opr2.wip_entity_id = disc.wip_entity_id
and opr2.operation_seq_num = (select max(opr.operation_seq_num)
from APPS.wip_operations opr, APPS.wip_discrete_jobs disc2
where opr.wip_entity_id = disc2.wip_entity_id
and disc.wip_entity_id = disc.wip_entity_id))* oel.attribute1 * oel.attribute6 as prod_qty_kgs
,oel.attribute4 as alloy
,oel.attribute2 as profile
-- Theoretical Order cost
,tbl0.TOT_THEORY_COST_RELIEVED
-- Actual Order cost
,tbl0.TOT_ACTUAL_COST_INCURRED
from XX.XXREPORT_Lzero_TBL tbl0
join APPS.oe_order_headers_all oeh on oeh.header_id = tbl0.header_id
join APPS.oe_order_lines_all oel on oeh.org_id = oel.org_id and oeh.header_id = oel.header_id
join APPS.xx_assemblies asm on oel.line_id = asm.line_id
join APPS.wip_discrete_jobs disc on disc.primary_item_id = asm.inventory_item_id
join ( select wdj2.source_line_id, wdj2.attribute9 child_wip, wdj2.wip_entity_id, wdj2.status_type status_type
from APPS.wip_discrete_jobs wdj2
where attribute9 IS NOT NULL ) child_jobs on child_jobs.child_wip = to_char(disc.wip_entity_id)
where oel.link_to_line_id is null
union
-- Orders with star (*) items need to pick profile and customer length etc from ego_configured_pr_agv view
select
-- Sales Order
oeh.order_number as order_number
,oel.line_number
,oel.pricing_quantity_uom as UOM
,to_char(agv.gx_cp_length) as customer_length
,to_char(agv.gx_cp_th_weight) as theoretical_weight
,agv.gx_cp_surfacetreatment as finish
,oel.attribute18 as order_qty_pcs
, to_char(agv.gx_cp_th_weight * agv.gx_cp_length * oel.ordered_quantity) as order_qty_kgs
,XX.xx_om.GetLineUnitSellingPrice(oel.line_id) sales_value
,XX.xx_om.GetHeaderUnitSellingPrice(oeh.header_id) total_value
,oel.ordered_quantity ordered_quantity
-- Production
, tbl0.qty_completed as qty_completed
,disc.wip_entity_id as wip_entity_id
,( select wip_entity_name from APPS.wip_entities ent
where ent.wip_entity_id = disc.wip_entity_id) job_name
,disc.status_type
,disc.date_released as job_released_date
, DECODE ( disc.date_completed, NULL, disc.date_completed,
to_date(to_char(to_date(TO_CHAR(disc.date_completed-interval '7' hour,'DD-MON-YYYY')||'00:00:00','DD-MON-YYYY HH24:MI:SS'), 'DD-MON-YYYY HH24:MI:SS'), 'DD-MON-YYYY HH24:MI:SS')) as date_completed
, DECODE ( disc.date_closed, NULL, disc.date_closed,
to_date(to_char(to_date(TO_CHAR(disc.date_closed-interval '7' hour,'DD-MON-YYYY')||'00:00:00','DD-MON-YYYY HH24:MI:SS'), 'DD-MON-YYYY HH24:MI:SS'), 'DD-MON-YYYY HH24:MI:SS')) as date_closed
, disc.net_quantity
, ( select opr2.quantity_completed
from APPS.wip_operations opr2
where opr2.wip_entity_id = disc.wip_entity_id
and opr2.operation_seq_num = (select max(opr.operation_seq_num)
from APPS.wip_operations opr, APPS.wip_discrete_jobs disc2
where opr.wip_entity_id = disc2.wip_entity_id
and disc2.wip_entity_id = disc.wip_entity_id))* agv.gx_cp_length * agv.gx_cp_th_weight as prod_qty_kgs
,gx_cp_alloy as alloy
,gx_cp_profile_id as profile
-- Theoretical Order cost
,tbl0.TOT_THEORY_COST_RELIEVED
-- Actual Order cost
,tbl0.TOT_ACTUAL_COST_INCURRED
from XX.XXREPORT_Lzero_TBL tbl0
join APPS.oe_order_headers_all oeh on oeh.header_id = tbl0.header_id
join APPS.oe_order_lines_all oel on oeh.org_id = oel.org_id and oeh.header_id = oel.header_id
join APPS.wip_discrete_jobs disc on oel.line_id = disc.source_line_id
join APPS.ego_gx_configured_pr_agv agv on agv.inventory_item_id= oel.inventory_item_id
where oel.link_to_line_id is null
)a
) xx;

There's almost certainly no short and simple solution to tuning this query. The problem here is it's size and complexity. Lack of performance is merely a consequence.
As a first step I would consider taking a break from the keyboard. Grab a pen and paper and in plain English (or whichever "human" language you prefer) write the questions you want answered from your database via this query. Then ask yourself what columns/variables/attributes do you absolutely need to answer those questions? Write them down as well.
Now, do you really need all of those columns, nested joins, selects, and so forth to produce those variables? Maybe, but probably not. The key point here is to focus on only the data/information you need (YAGNI) and from there map out a picture the bare relationships you need to produce the information that answer your question. In other words, work from the outside in, not the other way around.
I realize that this perhaps sounds a bit abstract and vague, but the whole point is that maintaining clear and simple code is always an ongoing struggle. Keeping your eye on the objective at hand will help keep your head of the weeds.
Finally, a few more specific thoughts at a glance:
Do you really need that union? Try to do without it if you can.
Nesting sucks. Nested nesting especially sucks. Keep things flat whenever possible and practical.
Is it possible or practical to split this into independent, smaller queries?
Use more descriptive names for your variables, add comments judiciously.
Learn and master the SQL EXPLAIN command.

Related

Combining grouping set queries into one

I have 5 different queries that work fine but basically do the same thing. The difference is that they group by different periods.
My question is can these 5 queries be combined into 1 query perhaps a procedure, Where I pass in a D (Day), W (Week) M (Month), Q (quarter) or Y (year).
Below are the queries and some test data. Thanks in advance to all who respond.
ALTER SESSION SET NLS_TIMESTAMP_FORMAT = 'DD-MON-YYYY HH24:MI:SS.FF';
ALTER SESSION SET NLS_DATE_FORMAT = 'DD-MON-YYYY HH24:MI:SS';
CREATE TABLE customers
(CUSTOMER_ID, FIRST_NAME, LAST_NAME) AS
SELECT 1, 'Faith', 'Mazzarone' FROM DUAL UNION ALL
SELECT 2, 'Lisa', 'Saladino' FROM DUAL UNION ALL
SELECT 3, 'Micheal', 'Palmice' FROM DUAL UNION ALL
SELECT 4, 'Jerry', 'Torchiano' FROM DUAL;
CREATE TABLE items
(PRODUCT_ID, PRODUCT_NAME, PRICE) AS
SELECT 100, 'Black Shoes', 79.99 FROM DUAL UNION ALL
SELECT 101, 'Brown Pants', 111.99 FROM DUAL UNION ALL
SELECT 102, 'White Shirt', 10.99 FROM DUAL;
CREATE TABLE purchases
(CUSTOMER_ID, PRODUCT_ID, QUANTITY, PURCHASE_DATE) AS
SELECT 1, 101, 3, TIMESTAMP'2022-10-11 09:54:48' FROM DUAL UNION ALL
SELECT 1, 100, 1, TIMESTAMP '2022-10-12 19:04:18' FROM DUAL UNION ALL
SELECT 2, 101,1, TIMESTAMP '2022-10-11 09:54:48' FROM DUAL UNION ALL
SELECT 2, 101, 3, TIMESTAMP '2022-10-17 19:34:58' FROM DUAL UNION ALL
SELECT 2, 102, 3,TIMESTAMP '2022-12-06 11:41:25' + NUMTODSINTERVAL ( LEVEL * 2, 'DAY') FROM dual CONNECT BY LEVEL <= 6 UNION ALL
SELECT 2, 102, 3,TIMESTAMP '2022-12-26 11:41:25' + NUMTODSINTERVAL ( LEVEL * 2, 'DAY') FROM dual CONNECT BY LEVEL <= 6 UNION ALL
SELECT 3, 101,1, TIMESTAMP '2022-12-21 09:54:48' FROM DUAL UNION ALL
SELECT 3, 102,1, TIMESTAMP '2022-12-27 19:04:18' FROM DUAL UNION ALL
SELECT 3, 102, 4,TIMESTAMP '2022-12-22 21:44:35' + NUMTODSINTERVAL ( LEVEL * 2, 'DAY') FROM dual
CONNECT BY LEVEL <= 15 UNION ALL
SELECT 3, 101,1, TIMESTAMP '2022-12-11 09:54:48' FROM DUAL UNION ALL
SELECT 3, 102,1, TIMESTAMP '2022-12-17 19:04:18' FROM DUAL UNION ALL
SELECT 3, 102, 4,TIMESTAMP '2022-12-12 21:44:35' + NUMTODSINTERVAL ( LEVEL * 2, 'DAY') FROM dual
CONNECT BY LEVEL <= 5;
/* purchases per day for each customer */
SELECT TO_CHAR (p.purchase_date, 'YYYY-MM-DD') AS year_mon_day
, p.customer_id
, c.first_name
, c.last_name
, SUM (p.quantity * i.price) AS total_amt
FROM purchases p
JOIN customers c ON p.customer_id = c.customer_id
JOIN items i ON p.product_id = i.product_id
GROUP BY GROUPING SETS ( (TO_CHAR (p.purchase_date, 'YYYY-MM-DD'), p.customer_id, c.first_name, c.last_name)
, (TO_CHAR (p.purchase_date, 'YYYY-MM-DD'))
, ()
)
ORDER BY TO_CHAR (p.purchase_date, 'YYYY-MM-DD'), p.customer_id;
/* purchases per week for each customer */
SELECT TO_CHAR (p.purchase_date, 'IYYY"W"IW') AS year_week
, p.customer_id
, c.first_name
, c.last_name
, SUM (p.quantity * i.price) AS total_amt
FROM purchases p
JOIN customers c ON p.customer_id = c.customer_id
JOIN items i ON p.product_id = i.product_id
GROUP BY GROUPING SETS ( (TO_CHAR (p.purchase_date, 'IYYY"W"IW'), p.customer_id, c.first_name, c.last_name)
, (TO_CHAR (p.purchase_date, 'IYYY"W"IW'))
, ()
)
ORDER BY TO_CHAR (p.purchase_date, 'IYYY"W"IW'), p.customer_id;
/* purchases per month for each customer */
SELECT TO_CHAR (p.purchase_date, 'YYYY"M"MM') AS year_month
, p.customer_id
, c.first_name
, c.last_name
, SUM (p.quantity * i.price) AS total_amt
FROM purchases p
JOIN customers c ON p.customer_id = c.customer_id
JOIN items i ON p.product_id = i.product_id
GROUP BY GROUPING SETS ( (TO_CHAR (p.purchase_date, 'YYYY"M"MM'), p.customer_id, c.first_name, c.last_name)
, (TO_CHAR (p.purchase_date, 'YYYY"M"MM'))
, ()
)
ORDER BY TO_CHAR (p.purchase_date, 'YYYY"M"MM'), p.customer_id;
/* purchases per quarter for each customer */
SELECT TO_CHAR (p.purchase_date, 'YYYY"Q"Q') AS year_quarter
, p.customer_id
, c.first_name
, c.last_name
, SUM (p.quantity * i.price) AS total_amt
FROM purchases p
JOIN customers c ON p.customer_id = c.customer_id
JOIN items i ON p.product_id = i.product_id
GROUP BY GROUPING SETS ( (TO_CHAR (p.purchase_date, 'YYYY"Q"Q'), p.customer_id, c.first_name, c.last_name)
, (TO_CHAR (p.purchase_date, 'YYYY"Q"Q'))
, ()
)
ORDER BY TO_CHAR (p.purchase_date, 'YYYY"Q"Q'), p.customer_id;
/* purchases per year for each customer */
SELECT TO_CHAR (p.purchase_date, 'YYYY"Y"') AS year
, p.customer_id
, c.first_name
, c.last_name
, SUM (p.quantity * i.price) AS total_amt
FROM purchases p
JOIN customers c ON p.customer_id = c.customer_id
JOIN items i ON p.product_id = i.product_id
GROUP BY GROUPING SETS ( (TO_CHAR (p.purchase_date, 'YYYY"Y"'), p.customer_id, c.first_name, c.last_name)
, (TO_CHAR (p.purchase_date, 'YYYY"Y"'))
, ()
)
ORDER BY TO_CHAR (p.purchase_date, 'YYYY"Y"'), p.customer_id;
If you want it as a procedure then:
CREATE PROCEDURE get_customer_data(
i_period IN VARCHAR2,
o_cursor OUT SYS_REFCURSOR
)
AS
v_format VARCHAR2(10);
BEGIN
v_format := CASE UPPER(i_period)
WHEN 'D' THEN 'YYYY-MM-DD'
WHEN 'W' THEN 'IYYY"W"IW'
WHEN 'M' THEN 'YYYY"M"MM'
WHEN 'Q' THEN 'YYYY"Q"Q'
WHEN 'Y' THEN 'YYYY"Y"'
ELSE 'YYYY-MM-DD'
END;
OPEN o_cursor FOR
SELECT TO_CHAR (p.purchase_date, v_format) AS period
, p.customer_id
, c.first_name
, c.last_name
, SUM (p.quantity * i.price) AS total_amt
FROM purchases p
JOIN customers c ON p.customer_id = c.customer_id
JOIN items i ON p.product_id = i.product_id
GROUP BY
GROUPING SETS(
( TO_CHAR (p.purchase_date, v_format), p.customer_id, c.first_name, c.last_name )
, TO_CHAR (p.purchase_date, v_format)
, ()
)
ORDER BY TO_CHAR (p.purchase_date, v_format), p.customer_id;
END;
/
fiddle
You can certainly do this in a procedure or function, either REF CURSOR return like MT0's answer, (if you can handle how to interface with that) or returning a nested table object, etc. But involving functions does add more complication and may not be ideal for simple needs. It may be simpler just to write a view with a set of UNION ALLs and use a literal to select the one you want.
CREATE OR REPLACE myview AS
SELECT 'D' period,
[column_list]
FROM [table list with joins]
GROUP BY TO_CHAR(purchase_date,'YYYY-MM-DD'),customer_id,first_name,last_name
UNION ALL
SELECT 'M' period,
[column_list]
FROM [table list with joins]
GROUP BY TO_CHAR(purchase_date,'YYYY-MM'),customer_id,first_name,last_name
UNION ALL
SELECT 'Y' period,
[column_list]
FROM [table list with joins]
GROUP BY TO_CHAR(purchase_date,'YYYY'),customer_id,first_name,last_name
[etc...]
Then query it:
SELECT * FROM myview WHERE period = 'D'
Oracle should skip the work behind the other query blocks in the UNION ALL that don't match the literal period requested in your predicate, so there's no performance penalty.

CTE query Not returning any result

The following query works
select TOP 100 T.DWH_ID,T.date_time, T.TimeDiff, T.[End Date], T.SPS_Bereich, T.txtName from (
SELECT sto.[DWH_ID]
,sto.[SPS_Bereich]
,FORMAT(sto.[DateTime], 'dd-MM-yyyy HH:mm') as date_time
,sto.[txtName]
,sto.[TimeDiff]
, DATEADD(second,sto.[TimeDiff],FORMAT(sto.[DateTime], 'dd-MM-yyyy HH:mm'))as [End Date]
FROM [Stoerdaten].[sta].[Stoerungen] sto where sto.Classname='Alarm' and sto.TimeDiff>60 ) as T
join [IgnitionServer].[dbo].[scheduled_events_ISTProduction] cal on
((T.date_time between cal.start_date and cal.end_date) and T.[End Date] between cal.start_date and cal.end_date) where cal.typ=1 order by [DWH_ID] desc
But when I change to CTE it didn't give me any result.
CTE Query
;with q1 as
(
select TOP 1000 [DWH_ID],
SPS_Bereich ,
FORMAT([DateTime], 'dd-MM-yyyy HH:mm') as date_time,
[txtName],
[TimeDiff]
, DATEADD(second,[TimeDiff],FORMAT([DateTime], 'dd-MM-yyyy HH:mm'))as [End_Date]
FROM [Stoerdaten].[sta].[Stoerungen] where Classname='Alarm' and TimeDiff>60
)
select q1.DWH_ID,
q1.date_time,
q1.TimeDiff, q1.[End_Date], q1.txtName, q1.SPS_Bereich from q1 join [IgnitionServer].[dbo].[scheduled_events_ISTProduction] cal on
((q1.date_time between cal.start_date and cal.end_date) and q1.[End_Date] between cal.start_date and cal.end_date) where cal.typ=1
I don't understand what I am missing here. Any help is greatly appreciated.
Your CTE contains the TOP 1000 records in [sta].[Stoerungen] with the WHERE criteria and you then join from that to [scheduled_events_ISTProduction].
In your initial query, you're returning the TOP 100 after the JOIN has been made, so I imagine that whatever is appearing in the result of your CTE can't be joined to the records in [scheduled_events_ISTProduction].
If you just select everything out of your CTE you should see that there are up to 1000 records in there but should also be able to verify the JOIN issue.

Nesting a WITH statement into an existing select and passing dates into it

I have found an excellent solution on here that will allow me to compare number working days between 2 dates excluding holidays. However what I cannot figure out is how to pass t1.ATTRIBUTE_DATE1 into the start_date and t1.CHECK_DATE into the end_date.
I have tried and put the With into the select it says too many arguments.
SELECT DISTINCT t1.invoice_date
, t1.creation_date
, t1.INVOICE_RECEIVED_DATE
, (t1.check_Date - t1.INVOICE_RECEIVED_DATE)
, ((t1.check_Date - t2.REPORT_SUBMIT_DATE)+3)
, ((t1.check_Date - t1.invoice_date)+3)
, t1.ATTRIBUTE_DATE1
, t1.invoice_num
, t1.payment_number
, t1.check_date
, t1.vendor_type_lookup_code
, t1.source
, t1.PAY_GROUP_LOOKUP_CODE
, t1.Batch_Name
, t1.Description
, t1.Vendor_Name
, t1.Amount_Paid
, t1.Invoice_ID
, t2.REPORT_SUBMIT_DATE
, t2.FINAL_APPROVAL_DATE
FROM ( SELECT DISTINCT APA.INVOICE_ID
, APA.INVOICE_DATE
, APA.CREATION_DATE
, APA.ATTRIBUTE_DATE1
, APA.INVOICE_NUM
, ACA.CHECK_NUMBER as PAYMENT_NUMBER
, ACA.CHECK_DATE
, APA.INVOICE_RECEIVED_DATE
, APA.CREATION_DATE
, SUP.VENDOR_TYPE_LOOKUP_CODE
, APA.SOURCE
, APA.PAY_GROUP_LOOKUP_CODE
, BAT.BATCH_NAME
, APA.DESCRIPTION
, APA.AMOUNT_PAID
, ACA.VENDOR_NAME
FROM AP_INVOICES_ALL APA
LEFT JOIN AP_INVOICE_LINES_ALL AIL
ON APA.INVOICE_ID= AIL.INVOICE_ID
LEFT JOIN AP_INVOICE_DISTRIBUTIONS_ALL AID
ON APA.INVOICE_ID = AID.INVOICE_ID AND AIL.LINE_NUMBER =
AID.INVOICE_LINE_NUMBER
JOIN AP_INVOICE_PAYMENTS_ALL AIP
ON APA.INVOICE_ID = AIP.INVOICE_ID
JOIN AP_CHECKS_ALL ACA
ON AIP.CHECK_ID = ACA.CHECK_ID
LEFT JOIN AP_BATCHES_ALL BAT
ON APA.BATCH_ID = BAT.BATCH_ID
LEFT JOIN POZ_SUPPLIERS_V SUP
ON APA.PARTY_ID = SUP.PARTY_ID
WHERE AID.LINE_TYPE_LOOKUP_CODE = 'ITEM'
AND APA.SOURCE NOT IN ('INVOICE GATEWAY' , 'B2B XML INVOICE')
AND ACA.STATUS_LOOKUP_CODE<> 'VOIDED'
AND APA.INVOICE_TYPE_LOOKUP_CODE NOT IN ('CREDIT' , 'PREPAYMENT')
AND ACA.CHECK_DATE BETWEEN :Start_Date AND :End_Date
AND BAT.BATCH_NAME IS NOT NULL) t1
LEFT JOIN (Select EXPENSE_REPORT_NUM
,REPORT_SUBMIT_DATE
,FINAL_APPROVAL_DATE
,EXPENSE_REPORT_TOTAL
FROM EXM_EXPENSE_REPORTS)t2
ON t1.INVOICE_NUM =t2.EXPENSE_REPORT_NUM
ORDER BY t1.CHECK_DATE ASC
With statment that I would like to use to give me days between t1.ATTRIBUTE_DATE1(can be blank) and t1.check_date
(WITH test_data AS
(
SELECT TO_DATE('01/01/2019', 'DD/MM/YYYY') AS start_date,-----t1.ATTRIBUTE_DATE1
TO_DATE('27/08/2019', 'DD/MM/YYYY') AS end_date------t1.check_date
FROM dual
),
all_dates AS
(
SELECT td.start_date, td.end_date, td.start_date + LEVEL-1 as week_day
FROM test_data td
CONNECT BY td.start_date + LEVEL-1 <= td.end_date)
SELECT TO_CHAR(week_day, 'MON'), COUNT(*)
FROM all_dates
WHERE to_char(week_day, 'FMDAY', 'NLS_DATE_LANGUAGE=ENGLISH') NOT IN
('SATURDAY','SUNDAY')
AND to_char(week_day, 'DD/MM/YYYY') NOT IN ( '01/01/2019', '25/12/2019',
'26/12/2019', '26/08/2019', '19/04/2019', '22/04/2019', '06/05/2019',
'27/05/2019')
GROUP BY TO_CHAR(week_day, 'MON')
)
Want to replace the the above with statement with this
(WITH test_data AS
(
SELECT TO_DATE(t1.ATTRIBUTE_DATE1, 'DD/MM/YYYY') AS start_date,
TO_DATE(t1.check_date, 'DD/MM/YYYY') AS end_date
FROM dual
),
all_dates AS
(
SELECT td.start_date, td.end_date, td.start_date + LEVEL-1 as week_day
FROM test_data td
CONNECT BY td.start_date + LEVEL-1 <= td.end_date)
SELECT TO_CHAR(week_day, 'MON'), COUNT(*)
FROM all_dates
WHERE to_char(week_day, 'FMDAY', 'NLS_DATE_LANGUAGE=ENGLISH') NOT IN ('SATURDAY','SUNDAY')
AND to_char(week_day, 'DD/MM/YYYY') NOT IN ( '01/01/2019', '25/12/2019', '26/12/2019', '26/08/2019', '19/04/2019', '22/04/2019', '06/05/2019', '27/05/2019')
GROUP BY TO_CHAR(week_day, 'MON')
)
I don't have a clue what your real issue is, but if you want to count working days between 2 dates you can use something like:
with test_data AS
(SELECT TO_DATE('01/01/2019', 'DD/MM/YYYY') AS start_date, -- t1.ATTRIBUTE_DATE1
TO_DATE('27/08/2019', 'DD/MM/YYYY') AS end_date -- t1.check_date
FROM dual
union all
SELECT TO_DATE('01/04/2019', 'DD/MM/YYYY') AS start_date, -- t1.ATTRIBUTE_DATE1
TO_DATE('10/08/2019', 'DD/MM/YYYY') AS end_date -- t1.check_date
FROM dual)
, dates as (
select trunc(sysdate,'YYYY') + level -1 bd from dual connect by level <= 250)
, evaluate_free as (
select bd
, case when to_char(bd, 'FMDAY', 'NLS_DATE_LANGUAGE=ENGLISH') IN ('SATURDAY','SUNDAY')
or to_char(bd, 'DD/MM/YYYY') IN ( '01/01/2019', '25/12/2019', '26/12/2019', '26/08/2019', '19/04/2019', '22/04/2019', '06/05/2019', '27/05/2019')
then 0 else 1 end free_work
from dates)
select start_date,end_date, sum(free_work) working_days
from evaluate_free, test_data
where bd between start_date and end_date
group by start_date,end_date

SQL query from Oracle SQL to T-SQL

I have a subquery which is used for an Oracle database, but I want to use an equivalent query for a SQL Server database.
I didn't figure out how to migrate the TO_TIMESTAMP(TO_CHAR(TO_DATE part and also didn't know how to handle the thing with rownums in T-SQL.
Is it even possible to migrate this query?
SELECT 0 run_id,
0 tran_id,
0 sort_id,
' ' tran_type,
10 prod_id,
72 type_id,
1 value,
TO_TIMESTAMP(TO_CHAR(TO_DATE('2016-03-18 00:00:00', 'YYYY.MM.DD HH24:MI:SS') + rownum -1, 'YYYY.MM.DD') || to_char(sw.end_time, 'HH24:MI:SS'), 'YYYY.MM.DD HH24:MI:SS') event_publication,
EXTRACT (YEAR
FROM (TO_DATE('2016-03-18 00:00:00', 'YYYY.MM.DD HH24:MI:SS') + rownum -1)) y,
EXTRACT (MONTH
FROM (TO_DATE('2016-03-18 00:00:00', 'YYYY.MM.DD HH24:MI:SS') + rownum -1)) mo,
EXTRACT (DAY
FROM (TO_DATE('2016-03-18 00:00:00', 'YYYY.MM.DD HH24:MI:SS') + rownum -1)) d,
to_number(to_char (sw.end_time, 'HH24')) h,
to_number(to_char (sw.end_time, 'MI')) mi,
to_number(to_char (sw.end_time, 'SS')) s,
0 ms
FROM all_objects ao,
settlement_win sw,
prod_def pd
WHERE pd.prod_id = 10
AND sw.country = pd.country
AND sw.commodity = pd.commodity
AND rownum <= TO_DATE('2016-03-18 23:59:00', 'YYYY.MM.DD HH24:MI:SS') -TO_DATE('2016-03-18 00:00:00', 'YYYY.MM.DD HH24:MI:SS')+1
The first thing to address is the use of rownum which has no direct equivalent in TSQL but we can mimic it, and for this particular query you need to recognize that the table ALL_OBJECTS is only being used to produce a number of rows. It has no other purpose to the query.
In TSQL we can generate rows using a CTE and there are many many variants of this, but for here I suggest:
;WITH
cteDigits AS (
SELECT 0 AS digit UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL
SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9
)
, cteTally AS (
SELECT
d1s.digit
+ d10s.digit * 10
+ d100s.digit * 100 /* add more like this as needed */
-- + d1000s.digit * 1000 /* add more like this as needed */
+ 1 AS rownum
FROM cteDigits d1s
CROSS JOIN cteDigits d10s
CROSS JOIN cteDigits d100s /* add more like this as needed */
--CROSS JOIN cteDigits d1000s /* add more like this as needed */
)
This will quickly spin-up 1000 rows as is and can be extended to produce many more rows by adding more cross joins. Note this returns a column called rownum which starts at 1 thus mimicking the Oracle rownum.
So next you can just add some of the remaining query, like this:
SELECT
0 run_id
, 0 tran_id
, 0 sort_id
, ' ' tran_type
, 10 prod_id
, 72 type_id
, 1 value
, convert(varchar, dateadd(day, rownum - 1,'20160318'),121) event_publication
-- several missing rows here
, 0 ms
FOM cteTally
INNER JOIN settlement_win sw
INNER JOIN prod_def pd ON sw.country = pd.country AND sw.commodity = pd.commodity
WHERE pd.prod_id = 10
AND rownum <= datediff(day,'20160318','20160318') + 1
Note that you really do not need a to_timestamp() equivalent you just need the ability to output date and time to the maximum precision of your data which appears to be to the level of seconds.
To progress further (I think) requires an understanding of the data held in the column sw.end_time. If this can be converted to the mssql datetime data type then it is just a matter of adding a number of days to that value to arrive at the event_publication and similarly if sw.end_time is converted to a datetime data type then use date_part() to get the hours, minutes and seconds from that column. e.g.
, DATEADD(day,rownum-1,CONVERT(datetime, sw.end_time)) AS event_publication
also, if such a calculation works then it would be possible to use an apply operator to simplify the overall query, something like this
;WITH
cteDigits AS (
SELECT 0 AS digit UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL
SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9
)
, cteTally AS (
SELECT
d1s.digit
+ d10s.digit * 10
+ d100s.digit * 100 /* add more like this as needed */
-- + d1000s.digit * 1000 /* add more like this as needed */
+ 1 AS rownum
FROM cteDigits d1s
CROSS JOIN cteDigits d10s
CROSS JOIN cteDigits d100s /* add more like this as needed */
--CROSS JOIN cteDigits d1000s /* add more like this as needed */
)
SELECT
0 run_id
, 0 tran_id
, 0 sort_id
, ' ' tran_type
, 10 prod_id
, 72 type_id
, 1 value
, convert(varchar(23), CA.Event_publication, 121) Event_publication
, datepart(day,CA.Event_publication) dd
, datepart(month,CA.Event_publication) mm
, datepart(year,CA.Event_publication) yyyy
, datepart(hour,CA.Event_publication) hh24
, datepart(minute,CA.Event_publication) mi
, datepart(second,CA.Event_publication) ss
, 0 ms
FOM cteTally
INNER JOIN settlement_win sw
INNER JOIN prod_def pd ON sw.country = pd.country AND sw.commodity = pd.commodity
CROSS APPLY (
SELECT DATEADD(day,rownum-1,CONVERT(datetime, sw.end_time)) AS event_publication ) CA
WHERE pd.prod_id = 10
AND rownum <= datediff(day,'20160318','20160318') + 1
NB: IT may be necessary to include this datediff(day,'19000101,'20160318') (which equals 42445) into the calculation of the event_date e.g.
SELECT DATEADD(day,42445 + (rownum-1),CONVERT(datetime, sw.end_time)) AS event_publication
One last point is that you could use datetime2 instead of datetime if you really do need a greater degree of time precision but there is no easily apparent requirement for that.

Summarize values across timeline in SQL

The Problem
I have a PostgreSQL database on which I am trying to summarize the revenue of a cash register over time. The cash register can either have status ACTIVE or INACTIVE, but I only want to summarize the earnings created when it was ACTIVE for a given period of time.
I have two tables; one that marks the revenue and one that marks the cash register status:
CREATE TABLE counters
(
id bigserial NOT NULL,
"timestamp" timestamp with time zone,
total_revenue bigint,
id_of_machine character varying(50),
CONSTRAINT counters_pkey PRIMARY KEY (id)
)
CREATE TABLE machine_lifecycle_events
(
id bigserial NOT NULL,
event_type character varying(50),
"timestamp" timestamp with time zone,
id_of_affected_machine character varying(50),
CONSTRAINT machine_lifecycle_events_pkey PRIMARY KEY (id)
)
A counters entry is added every 1 minute and total_revenue only increases. A machine_lifecycle_events entry is added every time the status of the machine changes.
I have added an image illustrating the problem. It is the revenue during the blue periods which should be summarized.
What I have tried so far
I have created a query which can give me the total revenue in a given instant:
SELECT total_revenue
FROM counters
WHERE timestamp < '2014-03-05 11:00:00'
AND id_of_machine='1'
ORDER BY
timestamp desc
LIMIT 1
The questions
How do I calculate the revenue earned between two timestamps?
How do I determine the start and end timestamps of the blue periods when I have to compare the timestamps in machine_lifecycle_events with the input period?
Any ideas on how to attack this problem?
Update
Example data:
INSERT INTO counters VALUES
(1, '2014-03-01 00:00:00', 100, '1')
, (2, '2014-03-01 12:00:00', 200, '1')
, (3, '2014-03-02 00:00:00', 300, '1')
, (4, '2014-03-02 12:00:00', 400, '1')
, (5, '2014-03-03 00:00:00', 500, '1')
, (6, '2014-03-03 12:00:00', 600, '1')
, (7, '2014-03-04 00:00:00', 700, '1')
, (8, '2014-03-04 12:00:00', 800, '1')
, (9, '2014-03-05 00:00:00', 900, '1')
, (10, '2014-03-05 12:00:00', 1000, '1')
, (11, '2014-03-06 00:00:00', 1100, '1')
, (12, '2014-03-06 12:00:00', 1200, '1')
, (13, '2014-03-07 00:00:00', 1300, '1')
, (14, '2014-03-07 12:00:00', 1400, '1');
INSERT INTO machine_lifecycle_events VALUES
(1, 'ACTIVE', '2014-03-01 08:00:00', '1')
, (2, 'INACTIVE', '2014-03-03 00:00:00', '1')
, (3, 'ACTIVE', '2014-03-05 00:00:00', '1')
, (4, 'INACTIVE', '2014-03-06 12:00:00', '1');
SQL Fiddle with sample data.
Example query:
The revenue between '2014-03-02 08:00:00' and '2014-03-06 08:00:00' is 300. 100 for the first ACTIVE period, and 200 for the second ACTIVE period.
DB design
To make my work easier I sanitized your DB design before I tackled the questions:
CREATE TEMP TABLE counter (
id bigserial PRIMARY KEY
, ts timestamp NOT NULL
, total_revenue bigint NOT NULL
, machine_id int NOT NULL
);
CREATE TEMP TABLE machine_event (
id bigserial PRIMARY KEY
, ts timestamp NOT NULL
, machine_id int NOT NULL
, status_active bool NOT NULL
);
Test case in the fiddle.
Major points
Using ts instead of "timestamp". Never use basic type names as column names.
Simplified & unified the name machine_id and made it out to be integer as it should be, instead of varchar(50).
event_type varchar(50) should be an integer foreign key, too, or an enum. Or even just a boolean for only active / inactive. Simplified to status_active bool.
Simplified and sanitized INSERT statements as well.
Answers
Assumptions
total_revenue only increases (per question).
Borders of the outer time frame are included.
Every "next" row per machine in machine_event has the opposite status_active.
1. How do I calculate the revenue earned between two timestamps?
WITH span AS (
SELECT '2014-03-02 12:00'::timestamp AS s_from -- start of time range
, '2014-03-05 11:00'::timestamp AS s_to -- end of time range
)
SELECT machine_id, s.s_from, s.s_to
, max(total_revenue) - min(total_revenue) AS earned
FROM counter c
, span s
WHERE ts BETWEEN s_from AND s_to -- borders included!
AND machine_id = 1
GROUP BY 1,2,3;
2. How do I determine the start and end timestamps of the blue periods when I have to compare the timestamps in machine_event with the input period?
This query for all machines in the given time frame (span).
Add WHERE machine_id = 1 in the CTE cte to select a specific machine.
WITH span AS (
SELECT '2014-03-02 08:00'::timestamp AS s_from -- start of time range
, '2014-03-06 08:00'::timestamp AS s_to -- end of time range
)
, cte AS (
SELECT machine_id, ts, status_active, s_from
, lead(ts, 1, s_to) OVER w AS period_end
, first_value(ts) OVER w AS first_ts
FROM span s
JOIN machine_event e ON e.ts BETWEEN s.s_from AND s.s_to
WINDOW w AS (PARTITION BY machine_id ORDER BY ts)
)
SELECT machine_id, ts AS period_start, period_end -- start in time frame
FROM cte
WHERE status_active
UNION ALL -- active start before time frame
SELECT machine_id, s_from, ts
FROM cte
WHERE NOT status_active
AND ts = first_ts
AND ts <> s_from
UNION ALL -- active start before time frame, no end in time frame
SELECT machine_id, s_from, s_to
FROM (
SELECT DISTINCT ON (1)
e.machine_id, e.status_active, s.s_from, s.s_to
FROM span s
JOIN machine_event e ON e.ts < s.s_from -- only from before time range
LEFT JOIN cte c USING (machine_id)
WHERE c.machine_id IS NULL -- not in selected time range
ORDER BY e.machine_id, e.ts DESC -- only the latest entry
) sub
WHERE status_active -- only if active
ORDER BY 1, 2;
Result is the list of blue periods in your image.
SQL Fiddle demonstrating both.
Recent similar question:
Sum of time difference between rows
ok, I have an answer, but I had to assume that the id of the machine_lifecycle_events can be used to determine accessor and predecessor. So for my solution to work better you should have a link between the active and inactive events. There might be also other ways to solve it but those would add even more complexity.
first, to get the revenue for all active periods per machine you can do the following:
select c.id_of_machine, cycle_id, cycle_start, cycle_end, sum(total_revenue)
from counters c join (
select e1.id as cycle_id,
e1.timestamp as cycle_start,
e2.timestamp as cycle_end,
e1.id_of_affected_machine as cycle_machine_id
from machine_lifecycle_events e1 join machine_lifecycle_events e2
on e1.id + 1 = e2.id and -- this should be replaced with a specific column to find cycles which belong together
e1.id_of_affected_machine = e2.id_of_affected_machine
where e1.event_type = 'ACTIVE'
) cycle
on c.id_of_machine = cycle_machine_id and
cycle_start <= c.timestamp and c.timestamp <= cycle_end
group by c.id_of_machine, cycle_id, cycle_start, cycle_end
order by c.id_of_machine, cycle_id
you can further use this query and add more where conditions to get the revenue only within a time frame or for specific machines:
select sum(total_revenue)
from counters c join (
select e1.id as cycle_id,
e1.timestamp as cycle_start,
e2.timestamp as cycle_end,
e1.id_of_affected_machine as cycle_machine_id
from machine_lifecycle_events e1 join machine_lifecycle_events e2
on e1.id + 1 = e2.id and -- this should be replaced with a specific column to find cycles which belong together
e1.id_of_affected_machine = e2.id_of_affected_machine
where e1.event_type = 'ACTIVE'
) cycle
on c.id_of_machine = cycle_machine_id and
cycle_start <= c.timestamp and c.timestamp <= cycle_end
where '2014-03-02 08:00:00' <= c.timestamp and c.timestamp <= '2014-03-06 08:00:00'
and c.id_of_machine = '1'
As mentioned in the beginning, and in the comments, my way of finding connecting events isn't suitable for any more complex examples with multiple machines. The easiest way would be to have another column which would always point to the preceding event. Another way would be to have a function which would find those events but this solution couldn't make use of indices.
Use self-join and build intervals table with actual status of each interval.
with intervals as (
select e1.timestamp time1, e2.timestamp time2, e1.EVENT_TYPE as status
from machine_lifecycle_events e1
left join machine_lifecycle_events e2 on e2.id = e1.id + 1
) select * from counters c
join intervals i on (timestamp between i.time1 and i.time2 or i.time2 is null)
and i.status = 'ACTIVE';
I didn't use aggregation to show the result set, you can do this simply, I think. Also I missed machineId to simplify demonstration of this pattern.