How to go look back 7 days in a hive query - sql

I've got a sql where I need to constantly look back 4 days. This code would run once a week so I need to look 7 days back. My where clause is set stationary and looks between two dates, However I need it to look back 7 days all the time.
Here is a snippet of my code:
WITH gps_traces AS(
SELECT
gtrips.trip_id
, to_date(gtrips.trip_date) as trip_date
, gtrips.fleet_id
, vin.vehicle_vin
, gtrips.driver_id
, gtrips.trip_distance_travelled
, gtrips.trip_duration
, to_timestamp(gdata.trip_timestamp, "yyyy-MM-dd'T'HH:mm:ss") as gps_timestamp
, rank() over
(partition by gtrips.trip_id
order by to_timestamp(gdata.trip_timestamp, "yyyy-MM-dd'T'HH:mm:ss") asc)
as timestamp_rank
, gdata.latitude
, gdata.longitude
, gdata.postcode
FROM
cms.gps_trips gtrips
INNER JOIN
cms.gps_data gdata
ON gtrips.trip_id = gdata.trip_id
INNER JOIN
(
SELECT
DISTINCT --why are there duplicates?
devices.vehicle_id
, devices.vehicle_vin
, devices.data_effective_timestamp
FROM
cms.devices devices
INNER JOIN
(
SELECT
vehicle_id
, max(data_effective_timestamp) as data_effective_timestamp
FROM
cms.devices
GROUP BY
vehicle_id
) max_data_effective
ON devices.vehicle_id = max_data_effective.vehicle_id
AND devices.data_effective_timestamp = max_data_effective.data_effective_timestamp
) vin
WHERE
to_date(gtrips.trip_date) >= "2020-12-11" --Only keeping this date for now
AND
to_date(gtrips.trip_date) <= "2020-12-17"
AND
gtrips.fleet_id = 10211 --Only keeping due for this example
)
SELECT
gps.trip_id
, gps.trip_date
, gps.fleet_id
, gps.vehicle_vin
, gps.driver_id
, gps.trip_distance_travelled
, gps.trip_duration
, gps.gps_timestamp
, gps.latitude
, gps.longitude
, gps.postcode
, gps1.gps_timestamp as next_timestamp
, gps1.latitude as next_latitude
, gps1.longitude as next_longitude
, ACOS(
SIN(RADIANS(gps.latitude))*SIN(RADIANS(gps1.latitude)) +
COS(RADIANS(gps.latitude))*COS(RADIANS(gps1.latitude))*COS(RADIANS(gps1.longitude) - RADIANS(gps.longitude))
)*3958.76 AS COSINES_DISTANCE
, ASIN(
SQRT(
POWER(SIN((RADIANS(gps.latitude) - RADIANS(gps1.latitude))/2), 2) +
COS(RADIANS(gps.latitude))*COS(RADIANS(gps1.latitude))*
POWER(SIN((RADIANS(gps.longitude) - RADIANS(gps1.longitude))/2), 2)
)
)*3958.76*2 AS HAVERSINE_DISTANCE
, (UNIX_TIMESTAMP(gps1.gps_timestamp) - UNIX_TIMESTAMP(gps.gps_timestamp)) AS GPS_INTERVAL
FROM
gps_traces gps
LEFT JOIN
gps_traces gps1
ON gps.trip_id = gps1.trip_id
AND gps.timestamp_rank = (gps1.timestamp_rank - 1)
ORDER BY
gps.fleet_id
, gps.trip_id
, gps.timestamp_rank
specifically, I'm needing to change this snippet here:
WHERE
to_date(gtrips.trip_date) >= "2020-12-11" --Needs to be rolling 7 days
AND
to_date(gtrips.trip_date) <= "2020-12-17"
I tried converting the date but it's falling over in Hive. Can someone assist with this?

You can use current_date:
WHERE
to_date(gtrips.trip_date) >= date_sub(current_date, 7) --7 days back
AND
to_date(gtrips.trip_date) <= current_date
Or pass current date as a -hiveconf parameter:
WHERE
to_date(gtrips.trip_date) >= date_sub(to_date('${hiveconf:current_date}'), 7) --7 days back
AND
to_date(gtrips.trip_date) <= to_date('${hiveconf:current_date}')

Related

Total customer per reporting date without union

I would like to display to run this report where I show the total number of customers per reporting date. Here is a how I need the data to look like:
My original dataset look like this (please see query): In order to calculate the number of customers. I need to use the start and end date: if Start_Date>reporting_date and End_Date<=reporting_date then count as a customer.
I was able to develop a script, but it only gives me the total number of customers for only one reporting date.
select '2022-10-31' reporting_date, count(case when Start_Date>'2022-10-31' and End_Date<='2022-10-31' then Customer_ID end)
from (values ('2022-10-14','2022-8-19','0010Y654012P6KuQAK')
, ('2022-3-15','2022-9-14','0011v65402PoSpVAAV')
, ('2021-1-11','2022-10-11','0010Y654012P6DuQAK')
, ('2022-12-1','2022-5-14','0011v65402u7muLAAQ')
, ('2021-1-30','2022-3-14','0010Y654012P6DuQAK')
, ('2022-10-31','2022-2-14','0010Y654012P6PJQA0')
, ('2021-10-31','US','0010Y654012P6PJQA0')
, ('2021-5-31','2022-5-14','0011v65402x8cjqAAA')
, ('2022-6-2','2022-1-13','0010Y654016OqkJQAS')
, ('2022-1-1','2022-11-11','0010Y654016OqIaQAK')
) a(Start_Date ,End_Date ,Customer_ID)
Is there a way to amend the code with cross-join or other workarounds to the total customers per reporting date without doing many unions
select '2022-10-31' reporting_date, count(case when Start_Date>'2022-10-31' and End_Date<='2022-10-31' then Customer_ID end)
from (values ('2022-10-14','2022-8-19','0010Y654012P6KuQAK')
, ('2022-3-15','2022-9-14','0011v65402PoSpVAAV')
, ('2021-1-11','2022-10-11','0010Y654012P6DuQAK')
, ('2022-12-1','2022-5-14','0011v65402u7muLAAQ')
, ('2021-1-30','2022-3-14','0010Y654012P6DuQAK')
, ('2022-10-31','2022-2-14','0010Y654012P6PJQA0')
, ('2021-10-31','US','0010Y654012P6PJQA0')
, ('2021-5-31','2022-5-14','0011v65402x8cjqAAA')
, ('2022-6-2','2022-1-13','0010Y654016OqkJQAS')
, ('2022-1-1','2022-11-11','0010Y654016OqIaQAK')
) a(Start_Date ,End_Date ,Customer_ID)
UNION ALL
select '2022-9-30' reporting_date, count(case when Start_Date>'2022-9-301' and End_Date<='2022-9-30' then Customer_ID end)
from (values ('2022-10-14','2022-8-19','0010Y654012P6KuQAK')
, ('2022-3-15','2022-9-14','0011v65402PoSpVAAV')
, ('2021-1-11','2022-10-11','0010Y654012P6DuQAK')
, ('2022-12-1','2022-5-14','0011v65402u7muLAAQ')
, ('2021-1-30','2022-3-14','0010Y654012P6DuQAK')
, ('2022-10-31','2022-2-14','0010Y654012P6PJQA0')
, ('2021-10-31','US','0010Y654012P6PJQA0')
, ('2021-5-31','2022-5-14','0011v65402x8cjqAAA')
, ('2022-6-2','2022-1-13','0010Y654016OqkJQAS')
, ('2022-1-1','2022-11-11','0010Y654016OqIaQAK')
) a(Start_Date ,End_Date ,Customer_ID)
It is possible to provide date ranges as a separate table/subquery, join to the actual data and perform grouping:
select s.start_d, s.end_d, COUNT(Customer_ID) AS total
FROM (SELECT '2022-10-31'::DATE, '2022-10-31'::DATE
UNION SELECT '2022-09-30', '2022-09-30')
AS s(start_d, end_d)
LEFT JOIN (values ('2022-10-14','2022-8-19','0010Y654012P6KuQAK')
, ('2022-3-15','2022-9-14','0011v65402PoSpVAAV')
, ('2021-1-11','2022-10-11','0010Y654012P6DuQAK')
, ('2022-12-1','2022-5-14','0011v65402u7muLAAQ')
, ('2021-1-30','2022-3-14','0010Y654012P6DuQAK')
, ('2022-10-31','2022-2-14','0010Y654012P6PJQA0')
, ('2021-10-31','2021-10-31','0010Y654012P6PJQA0')
, ('2021-5-31','2022-5-14','0011v65402x8cjqAAA')
, ('2022-6-2','2022-1-13','0010Y654016OqkJQAS')
, ('2022-1-1','2022-11-11','0010Y654016OqIaQAK')
) a(Start_Date ,End_Date ,Customer_ID)
ON a.Start_Date>s.start_d and a.End_Date<=s.end_d
GROUP BY s.start_d, s.end_d;
Output:

calculate a date diff on a joined table

i am trying to calculate the date diff between endDTS and startDTS. both have date and time inside, for example start time is 2022-04-10 7:06:11 and end time is 2022-04-13 19:44:17. Please see my query below.
SELECT TOP (1000)
a.[LocationID]
,a.[LocationSourceID]
,a.[ParentLocationID]
,a.[LocationTypeCD]
,a.[FloorDisplayNM]
,a.[UnitDisplayNM]
,a.[RoomID]
,a.[RoomDisplayNM]
,a.[BedID]
,a.[BedDisplayNM]
, ls.StatusCD
, ls.StartDTS
, ls.EndDTS
, ls.EncounterID
, ls.ReservedIndicatorFLG
, ls.TransferInMilestoneID
, ls.PatientIsolationCD
, ls.ReasonCD
, a.[ActiveIndicatorCD]
, a.[EDWLastModifiedDTS]
FROM [FirstTable].[Reference].[LocationBASE] a
inner join [SecondTable].[Clinical].[LocationStatus] ls on a.BedID = ls.LocationID
where BedID IS NOT NULL
and BuildingDisplayNM = 'hospital'
and ls.StartDTS >= '2022-04-10'
order by ls.StartDTS
select CEILING((datediff(minute, ls.EndDTS, ls.StartDTS)/60.0)/24.0) [diff], as ElapsedTime
ls
from [secondtable].[Clinical].[LocationStatus] ls

How to use a SUM, LEFT JOIN, group by and order by in one SQL statement

I am trying to construct a query I can use within my ASP.NET code that pulls from a data base and then exports it into an excel file. My goal is to have SQL do most of the work before I iterate onto my worksheet. This is my code before using sum and group by
SELECT EOD_Rental_Fees.*, POSH5_Prod_CoreBankingDetails.description as TotalFeeAmount)
FROM EOD_Rental_Fees
LEFT JOIN POSH5_Prod_CoreBankingDetails ON EOD_Rental_Fees.CoreBankingID = POSH5_Prod_CoreBankingDetails.ID
WHERE DateProcessed >= '2018-07-01 00:00:00.000'
AND DateProcessed <= '2018-08-30 00:00:00:000'
ORDER BY description, DateProcessed;
This is the result of it:
When I add SUM or GROUP BY like so
Select EOD_Rental_Fees.*, POSH5_Prod_CoreBankingDetails.description, (Select SUM (EOD_Rental_Fees.TotalFee) as TotalFeeAmount) from
EOD_Rental_Fees
LEFT JOIN POSH5_Prod_CoreBankingDetails ON EOD_Rental_Fees.CoreBankingID = POSH5_Prod_CoreBankingDetails.ID
WHERE DateProcessed >= '2018-07-01 00:00:00.000'
AND DateProcessed <= '2018-08-30 00:00:00:000'
Group By Currency
Order By description, DateProcessed;
I get the following error: I am trying to have a column that shows TotalFeeAmount that is grouped by Currency.
From what I can see my query looks fine. What am I doing wrong to cause this?
What I am trying to achieve is something like this:
You keep adding to the question which makes it difficult to answer. In the latest image you are displaying a report - not a query result, so I am going to ignore the sub-totals and that i something for your presentation layer to deal with.
"From what I can see my query looks fine" Sorry, it isn't fine because it produces SQL errors. That error actually tells you that if you want to use group by you must specify which columns to be grouped by. Your query does not do that.
To avoid that error every "non-aggregating" column needs to be spelled out in the group by clause like so (note you cannot use * for this)
SELECT
rf.id
, rf.year
, rf.month
, rf.DateProcessed
, rf.CoreBankingID
, rf.MerchantRecordID
, rf.DeployedDate
, rf.TerminalRecordID
, rf.DeployedDate
, rf.RecoveredDate
, rf.MonthlyFee
, rf.IsProRated
, rf.DaysActive
, rf.TotalFee
, rf.IsPinPad
, rf.Currency
, det.description
, sum(rf.TotalFee) as TotalFeeAmount
FROM EOD_Rental_Fees AS rf
LEFT JOIN POSH5_Prod_CoreBankingDetails as det ON rf.CoreBankingID = det.ID
WHERE rf.DateProcessed >= '2018-07-01 00:00:00.000'
AND rf.DateProcessed < '2018-09-01 00:00:00:000'
GROUP BY
rf.id
, rf.year
, rf.month
, rf.DateProcessed
, rf.CoreBankingID
, rf.MerchantRecordID
, rf.DeployedDate
, rf.TerminalRecordID
, rf.DeployedDate
, rf.RecoveredDate
, rf.MonthlyFee
, rf.IsProRated
, rf.DaysActive
, rf.TotalFee
, rf.IsPinPad
, rf.Currency
, det.description
ORDER BY
det.description
, rf.DateProcessed
However I suspect this isn't going to produce the wanted outcome because you are after both details and summary at the same time which GROUP BY isn't designed to achieve.
I think you will find using SUM() OVER() will be closer to you need, but exactly how you need the PARTITION BY subclause to work isn't clear to me. notwithstanding this may work for you:
SELECT
rf.id
, rf.year
, rf.month
, rf.DateProcessed
, rf.CoreBankingID
, rf.MerchantRecordID
, rf.DeployedDate
, rf.TerminalRecordID
, rf.DeployedDate
, rf.RecoveredDate
, rf.MonthlyFee
, rf.IsProRated
, rf.DaysActive
, rf.TotalFee
, rf.IsPinPad
, rf.Currency
, det.description
, sum(rf.TotalFee) over(partition by rf.CoreBankingID, Currency) as TotalFeeAmount
FROM EOD_Rental_Fees AS rf
LEFT JOIN POSH5_Prod_CoreBankingDetails as det ON rf.CoreBankingID = det.ID
WHERE rf.DateProcessed >= '2018-07-01 00:00:00.000'
AND rf.DateProcessed < '2018-09-01 00:00:00:000'
ORDER BY
det.description
, rf.DateProcessed
Notes:
Use table aliases to simplify your queries
when joining tables include the table aliases in all column references
I subtly changed the way the date range works, always use a combination of >= with < and the upper boundary is "the next day". With this approach you cover yourself for any data rows that have both date and time.
here is how you can do it by using GROUP BY ROLLUP:
Select
DESCRIPTION
, currency
, DateProcessed
SUM (EOD_Rental_Fees.TotalFee)
from
EOD_Rental_Fees
LEFT JOIN POSH5_Prod_CoreBankingDetails
ON EOD_Rental_Fees.CoreBankingID = POSH5_Prod_CoreBankingDetails.ID
WHERE
DateProcessed >= '2018-07-01 00:00:00.000'
AND DateProcessed <= '2018-08-30 00:00:00:000'
GROUP BY ROLLUP (DESCRIPTION, currency, DateProcessed)
Order By
description,DateProcessed;

Performance issue using IsNull function in the Select statement

I have a financial application. I have ViewHistoricInstrumentValue which has rows like this
instrument1, date1, price, grossValue, netValue
instrument2, date1, price, grossValue, netValue
...
instrument1, date2, price, grossValue, netValue
...
My views are complicated but the db itself is small (4000 transactions). ViewHistoricInstrumentValue was executed in less than 1 second before I added the next CTE to the view. After that it takes 26s. ActualEvaluationPrice is the price for instrumentX at dateY. If this value is missing from HistoricPrice table then I find the previous price for instrumentX.
, UsedEvaluationPriceCte AS (
SELECT *
, isnull(ActualEvaluationPrice,
(select top 1 HistoricPrice.Price -- PreviousPrice
from HistoricPrice JOIN ValidDate
on HistoricPrice.DateId = ValidDate.Id
and HistoricPrice.InstrumentId = StartingCte.InstrumentId
and ValidDate.[Date] < StartingCte.DateValue
order by ValidDate.[Date]))
as UsedEvaluationPrice
FROM StartingCte
)
My problem is that the execution time increased needlessly. Right now the HistoricPrice table has no missing value so ActualEvaluationPrice is never null, so the previous price should be never determined.
ViewHistoricInstrumentValue returns 1815 rows. One other mystery is that the first query takes 26s, but the second only 2s.
SELECT * FROM [ViewHistoricInstrumentValue]
SELECT top(2000) * FROM [ViewHistoricInstrumentValue]
Appendix
The execution plan: https://www.dropbox.com/s/5st69uhjkpd3b5y/IsNull.sqlplan?dl=0
The same plan: https://www.brentozar.com/pastetheplan/?id=rk9bK1Wiv
The view:
ALTER VIEW [dbo].[ViewHistoricInstrumentValue] AS
WITH StartingCte AS (
SELECT
HistoricInstrumentValue.DateId
, ValidDate.Date as DateValue
, TransactionId
, TransactionId AS [Row]
, AccountId
, AccountName
, ViewTransaction.InstrumentId
, ViewTransaction.InstrumentName
, OpeningDate
, OpeningPrice
, Price AS ActualEvaluationPrice
, ClosingDate
, Amount
, isnull(ViewTransaction.FeeValue, 0) as FeeValue
, HistoricInstrumentValue.Id AS Id
FROM ViewBriefHistoricInstrumentValue as HistoricInstrumentValue
JOIN ValidDate on HistoricInstrumentValue.DateId = ValidDate.Id
JOIN ViewTransaction ON ViewTransaction.Id = HistoricInstrumentValue.TransactionId
left JOIN ViewHistoricPrice ON ViewHistoricPrice.DateId = HistoricInstrumentValue.DateId AND
ViewHistoricPrice.InstrumentId = ViewTransaction.InstrumentId
)
, UsedEvaluationPriceCte AS (
SELECT *
, isnull(ActualEvaluationPrice,
(select top 1 HistoricPrice.Price -- PreviousPrice
from HistoricPrice JOIN ValidDate
on HistoricPrice.DateId = ValidDate.Id
and HistoricPrice.InstrumentId = StartingCte.InstrumentId
and ValidDate.[Date] < StartingCte.DateValue
order by ValidDate.[Date]))
as UsedEvaluationPrice
FROM StartingCte
)
, GrossEvaluationValueCte AS (
SELECT *
, Amount * UsedEvaluationPrice AS GrossEvaluationValue
, (UsedEvaluationPrice - OpeningPrice) * Amount AS GrossCapitalGains
FROM UsedEvaluationPriceCte
)
, CapitalGainsTaxCte AS (
SELECT *
, dbo.MyMax(GrossCapitalGains * 0.15, 0) AS CapitalGainsTax
FROM GrossEvaluationValueCte
)
, IsOpenCte AS (
SELECT
DateId
, DateValue
, TransactionId
, [Row]
, AccountId
, AccountName
, InstrumentId
, InstrumentName
, OpeningDate
, OpeningPrice
, ActualEvaluationPrice
, UsedEvaluationPrice
, ClosingDate
, Amount
, GrossEvaluationValue
, GrossCapitalGains
, CapitalGainsTax
, FeeValue
, GrossEvaluationValue - CapitalGainsTax - FeeValue AS NetEvaluationValue
, GrossCapitalGains - CapitalGainsTax - FeeValue AS NetUnrealizedGains
, CASE WHEN ClosingDate IS NULL OR DateValue < ClosingDate
THEN CAST(1 AS BIT)
ELSE CAST(0 AS BIT)
END
AS IsOpen
, convert(NVARCHAR, DateValue, 20) + cast([Id] AS NVARCHAR(MAX)) AS Temp
, Id
FROM CapitalGainsTaxCte
)
Select * from IsOpenCte
I have no idea what your query is supposed to be doing. But this process:
ActualEvaluationPrice is the price for instrumentX at dateY. If this value is missing from HistoricPrice table then I find the previous price for instrumentX.
is handled easily with lag():
select vhiv.*
coalesce(vhiv.ActualEvaluationPrice,
lag(vhiv.ActualEvaluationPrice) over (partition by vhiv.InstrumentId order by DateValue)
) as UsedEvaluationPrice
from ViewHistoricInstrumentValue vhiv;
Note: If you need to filter out certain dates by joining to ValidDates, you can include the JOIN in the query. However, that is not part of the problem statement.

Oracle SQL- Flag records based on record's date vs history

This is my 1st post on the forum. Usually I was able to find what I needed - but to tell the truth - I am not really sure how to ask a correct question to the issue. Therefore, please accept my apologies if there already is an answer on the forum and I missed it.
I am running the following code in an Oracle database via Benthic Software:
SELECT
T1."REGION"
, T1."COUNTRY"
, T1."IDNum"
, T1."CUSTOMER"
, T1."BUSSINESS"
, T3."FISCALYEARMONTH"
, T3."FISCALYEAR"
, SUM(T4."VALUE")
,"HISTORICAL_PURCHASE_FLAG"
FROM
"DATABASE"."SALES" T4
, "DATABASE"."CUSTOMER" T1
, "DATABASE"."PRODUCT" T2
, "DATABASE"."TIME" T3
WHERE
T4."CUSTOMERID" = T1."CUSTOMERID"
AND T4."PRODUCTID" = T2."PRODUCTID"
AND T4."DATEID" = T3."DATEID"
AND T3."FISCALYEAR" IN ('2016')
AND T1."COUNTRY" IN ('ENGLAND', 'France')
GROUP BY
T1."REGION"
, T1."COUNTRY"
, T1."IDNum"
, T1."CUSTOMER"
, T1."BUSSINESS"
, T3."FISCALYEARMONTH"
, T3."FISCALYEAR"
;
This query provides me with information on transactions. As you can see above, I would like to add a column named "HISTORICAL_PURCHASE_FLAG".
I would like the query to take CUSTOMER and FISCALYEARMONTH. Then, I would like to check if there are any transactions registered for the CUSTOMER, up to 2 years in the past.
So lets say I get the following result:
LineNum REGION COUNTRY IDNum CUSTOMER BUSSINESS FISCALYEARMONTH FISCALYEAR VALUE HISTORICAL_PURCHASE_FLAG
1 Europe ENGLAND 255 Abraxo Cleaner Co. Chemicals 201605 2016 34,567.00
2 Europe FRANCE 123 Metal Trade Heavy 201602 2016 12,500.00
3 Europe ENGLAND 255 Abraxo Cleaner Co. Chemicals 201601 2016 8,400.00
LineNum 1 shows transaction for Abraxo Cleaner Co. registered on 201605. And LineNum 3 is also for Abraxo Cleaner Co. but registered on 201601. What I would need the query to do, is to flag LineNum 1 as 'Existing'. Because there was a previous transaction registered.
On the other hand, LineNum 3 was the first time transactions was registered for Abraxo Cleaner Co. so the line would be flagged as 'New'.
To sum up, I would like for each row of data to be treated individually. And to check if there are any previous records of data for CUSTOMER & FISCALYEARMONTH - 24 months.
Thank you in advance for the help.
You can use LAG function:
SELECT
"REGION"
, "COUNTRY"
, "IDNum"
, "CUSTOMER"
, "BUSSINESS"
, "FISCALYEARMONTH"
, "FISCALYEAR"
, SUM("VALUE")
, MAX(CASE WHEN to_date(prev_fym,'YYYYMM') >= ADD_MONTHS (to_date("FISCALYEARMONTH",'YYYYMM'), -24) THEN 'Existing'
ELSE NULL END) "HISTORICAL_PURCHASE_FLAG"
FROM
(
SELECT
T1."REGION"
, T1."COUNTRY"
, T1."IDNum"
, T1."CUSTOMER"
, T1."BUSSINESS"
, T3."FISCALYEARMONTH"
, T3."FISCALYEAR"
, T4."VALUE"
, LAG ("FISCALYEARMONTH", 1) OVER (PARTITION BY T1."IDNum" ORDER BY T3."FISCALYEARMONTH" DESC) prev_fym
FROM
"DATABASE"."SALES" T4
, "DATABASE"."CUSTOMER" T1
, "DATABASE"."PRODUCT" T2
, "DATABASE"."TIME" T3
WHERE
T4."CUSTOMERID" = T1."CUSTOMERID"
AND T4."PRODUCTID" = T2."PRODUCTID"
AND T4."DATEID" = T3."DATEID"
AND T1."COUNTRY" IN ('ENGLAND', 'France')
AND T3."FISCALYEAR" IN ('2014','2015','2016')
)
WHERE "FISCALYEAR" IN ('2016')
GROUP BY
"REGION"
, "COUNTRY"
, "IDNum"
, "CUSTOMER"
, "BUSSINESS"
, "FISCALYEARMONTH"
, "FISCALYEAR"
;
Using a simplified "input" table... You can use the LAG() analytic function and a comparison condition to populate your last column. I assume your fiscalyearmonth is a number - if it is a character field, wrap fiscalyearmonth within TO_NUMBER(). (It would be much better if in fact you stored these as true Oracle dates, perhaps date 2016-06-01 instead of 201606, but I worked with what you have currently... and took advantage that in numeric format, "24 months ago" simply means "subtract 200").
with inputs (linenum, idnum, fiscalyearmonth) as (
select 1, 255, 201605 from dual union all
select 2, 123, 201602 from dual union all
select 3, 255, 201601 from dual union all
select 4, 255, 201210 from dual
)
select linenum, idnum, fiscalyearmonth,
case when fiscalyearmonth
- lag(fiscalyearmonth)
over (partition by idnum order by fiscalyearmonth) < 200
then 'Existing' else 'New' end as flag
from inputs
order by linenum;
LINENUM IDNUM FISCALYEARMONTH FLAG
---------- ---------- --------------- --------
1 255 201605 Existing
2 123 201602 New
3 255 201601 New
4 255 201210 New
Another solution might be to outer Join "DATABASE"."SALES" T4 a second time as T5, wilter the fiscal year via WHERE to < t4.FiscalYear-2. If the Column is NULL, the record is new, if the outer join results in a value, the record is historic.
You can achieve using row_number() function as below... modify as per your need...I assumed 2 years (means previous 24 months from sysdate ).
You can run the sub-queries separately to check how its working.
Select
"REGION"
,"COUNTRY"
,"IDNum"
,"CUSTOMER"
,"BUSSINESS"
,"FISCALYEARMONTH"
,"FISCALYEAR"
,"VALUE"
, ( case when ( TXNNO = 1 or TOTAL_TXN_LAST24MTH = 0 ) then 'New' else 'Existing' end ) as "HISTORICAL_PURCHASE_FLAG" -- if no txn in last 24 month or its first txn then 'new' else 'existing'
from
(
select
SubQry."REGION"
, SubQry."COUNTRY"
, SubQry."IDNum"
, SubQry."CUSTOMER"
, SubQry."BUSSINESS"
, SubQry."FISCALYEARMONTH"
, SUBQRY."FISCALYEAR"
, SUBQRY."VALUE"
, ROW_NUMBER() over (partition by SUBQRY."REGION",SUBQRY."COUNTRY",SUBQRY."IDNum",SUBQRY."CUSTOMER",SUBQRY."BUSSINESS" order by SUBQRY."FISCALYEARMONTH") as TXNNO,
, SUM(case when (TO_NUMBER(TO_CHAR(sysdate,'YYYYMM')) - SUBQRY."FISCALYEARMONTH") < 24 then 1 else 0 end) as TOTAL_TXN_LAST24MTH
From
(
SELECT
T1."REGION"
, T1."COUNTRY"
, T1."IDNum"
, T1."CUSTOMER"
, T1."BUSSINESS"
, T3."FISCALYEARMONTH"
, T3."FISCALYEAR"
, SUM(T4."VALUE") as VALUE
FROM
"DATABASE"."SALES" T4
, "DATABASE"."CUSTOMER" T1
, "DATABASE"."PRODUCT" T2
, "DATABASE"."TIME" T3
WHERE
T4."CUSTOMERID" = T1."CUSTOMERID"
AND T4."PRODUCTID" = T2."PRODUCTID"
AND T4."DATEID" = T3."DATEID"
AND T3."FISCALYEAR" IN ('2016')
AND T1."COUNTRY" IN ('ENGLAND', 'France')
GROUP BY
T1."REGION"
, T1."COUNTRY"
, T1."IDNum"
, T1."CUSTOMER"
, T1."BUSSINESS"
, T3."FISCALYEARMONTH"
, T3."FISCALYEAR"
) SUBQRY
);