Expand a query from a date to a range of dates - sql

I have a query as below:
SELECT
"2022-05-10 00:00:00 UTC" AS date_,
COUNT(salesId) AS total-sales
FROM
`project1.sales.sales-growth`
WHERE
(promoDate BETWEEN "2022-05-10 00:00:00 UTC"
AND "2022-05-11 00:00:00 UTC")
OR
(purchaseDate BETWEEN "2022-05-10 00:00:00 UTC"
AND "2022-05-11 00:00:00 UTC")
Which shows the total sale for a particular date (2022-05-11) as below:
date_ total-sales
2022-05-10 560
I am wondering how I can change the query to show all the May month sales per day (desired output):
date_ total-sales
2022-05-01 567
2022-05-02 687
2022-05-03 878
... ...
2022-05-31 500

One option: generate a date array for the target time range, group by those dates and compare those dates in the WHERE clause with your two date columns.
With an assumed table of yours:
WITH your_table AS
(
SELECT TIMESTAMP("2022-05-01 15:30:00+00") AS promoDate, NULL AS purchaseDate, 1 AS salesId
UNION ALL
SELECT NULL AS promoDate, TIMESTAMP("2022-05-01 18:30:00+00") AS purchaseDate, 1 AS salesId
UNION ALL
SELECT TIMESTAMP("2022-05-02 15:30:00+00") AS promoDate, NULL AS purchaseDate, 1 AS salesId
UNION ALL
SELECT TIMESTAMP("2022-05-03 15:30:00+00") AS promoDate, NULL AS purchaseDate, 1 AS salesId
UNION ALL
SELECT TIMESTAMP("2022-05-04 15:30:00+00") AS promoDate, NULL AS purchaseDate, 1 AS salesId
UNION ALL
SELECT NULL AS promoDate, TIMESTAMP("2022-05-04 18:30:00+00") AS purchaseDate, 1 AS salesId
)
SELECT
date_,
COUNT(salesId) AS total_sales
FROM
UNNEST(GENERATE_DATE_ARRAY("2022-05-01", "2022-05-31")) AS date_, your_table
WHERE
date_ = EXTRACT(DATE FROM promoDate)
OR
date_ = EXTRACT(DATE FROM purchaseDate)
GROUP BY
date_
Output:
Row
date_
total_sales
1
2022-05-01
2
2
2022-05-02
1
3
2022-05-03
1
4
2022-05-04
2

Related

SQL Start Date and End Date Matching

I have one table that contains customer id and start date and one table that contains customer id and end date.
table A
customer_id
start_date
1
2022-01-01
1
2022-04-01
1
2022-07-01
2
2022-01-15
2
2022-03-25
3
2022-04-01
3
2022-08-01
4
2022-09-01
table B
customer_id
end_date
1
2022-01-25
1
2022-05-03
2
2022-03-24
2
2022-03-29
3
2022-04-15
Is there a way that I can get an output that looks like below?
desired output
customer_id
start_date
end_date
1
2022-01-01
2022-01-25
1
2022-04-01
2022-05-03
1
2022-07-01
2
2022-01-15
2022-03-24
2
2022-03-25
2022-03-29
3
2022-04-01
2022-04-15
3
2022-08-01
4
2022-09-01
As per your desire result please check the below query you can change it as per your table name and requirements.
DECLARE #table1 TABLE(
[customer_id] INT,
[start_date] DATE
)
DECLARE #table2 TABLE(
[customer_id] INT,
[end_date] DATE
)
INSERT INTO #table1 VALUES
(1,'2022-01-01'),
(1,'2022-04-01'),
(1,'2022-07-01'),
(2,'2022-01-15'),
(2,'2022-03-25'),
(3,'2022-04-01'),
(3,'2022-08-01'),
(4,'2022-09-01')
INSERT INTO #table2 VALUES
(1,'2022-01-25'),
(1,'2022-05-03'),
(2,'2022-03-24'),
(2,'2022-03-29'),
(3,'2022-04-15')
SELECT [Table1].[customer_id],[Table1].[start_date],[Table2].[end_date] FROM (
SELECT *, ROW_NUMBER() OVER (ORDER BY [start_date]) row_num FROM #table1
) AS [Table1]
LEFT JOIN (
SELECT t2.*, ROW_NUMBER() OVER (ORDER BY [end_date] ) row_num FROM #table2 t2
) AS [Table2]
ON [Table2].[customer_id] = [Table1].[customer_id]
AND [Table1].[row_num] = [Table2].[row_num]
ORDER BY [Table1].[customer_id]
Output
This hint is using TSQL in SQL Server.
select A.customer_id, A.start_date, B.end_date
from
(select X.costumer_id, X.start_date, ROW_NUMBER() over (order by X.start_date) as ORDEM from TableA X) A
left outer join (select X.customer_id, X.end_date, ROW_NUMBER() over (order by X.end_date ) as ORDEM from TableB X) B on A.customer_id = B.customer_id and A.ORDEM = B.ORDEM
order by A.customer_id, A.start_date
Hope it helps.
Here is how it's done in Vertica 12 - now that we have INTERPOLATE NEXT VALUE for the event series join :
\pset null (null)
WITH
tba(customer_id,start_date) AS (
SELECT 1,DATE '2022-01-01'
UNION ALL SELECT 1,DATE '2022-04-01'
UNION ALL SELECT 1,DATE '2022-07-01'
UNION ALL SELECT 2,DATE '2022-01-15'
UNION ALL SELECT 2,DATE '2022-03-25'
UNION ALL SELECT 3,DATE '2022-04-01'
UNION ALL SELECT 3,DATE '2022-08-01'
UNION ALL SELECT 4,DATE '2022-09-01'
)
,
tbb(customer_id,end_date) AS (
SELECT 1,DATE '2022-01-25'
UNION ALL SELECT 1,DATE '2022-05-03'
UNION ALL SELECT 2,DATE '2022-03-24'
UNION ALL SELECT 2,DATE '2022-03-29'
UNION ALL SELECT 3,DATE '2022-04-15'
)
SELECT
tba.customer_id
, start_date
, end_date
FROM tba
LEFT JOIN tbb
ON tba.customer_id = tbb.customer_id
AND end_date INTERPOLATE NEXT VALUE start_date
ORDER BY 1,2
;
-- out Null display is "(null)".
-- out customer_id | start_date | end_date
-- out -------------+------------+------------
-- out 1 | 2022-01-01 | 2022-01-25
-- out 1 | 2022-04-01 | 2022-05-03
-- out 1 | 2022-07-01 | (null)
-- out 2 | 2022-01-15 | 2022-03-24
-- out 2 | 2022-03-25 | 2022-03-29
-- out 3 | 2022-04-01 | 2022-04-15
-- out 3 | 2022-08-01 | (null)
-- out 4 | 2022-09-01 | (null)

Grouping by Date inclusivity

Here is the data I'm working with here
Accountid
Month
123
08/01/2021
123
09/01/2021
123
03/01/2022
123
04/01/2022
123
05/01/2022
123
06/01/2022
I'm trying to insert into a new table where the data is like this
Accountid
Start Month
End Month
123
08/01/2021
09/01/2021
123
03/01/2022
06/01/2022
I'm not sure how to separate them with the gap, and group by the account id in this case.
Thanks in advance
In 12c+ you may also use match_recognize for gaps-and-islands problems to define grouping rules (islands) in a more readable and natural way.
select *
from input_
match_recognize(
partition by accountid
order by month asc
measures
first(month) as start_month,
last(month) as end_month
/*Any month followed by any number of subsequent month */
pattern(any_ next*)
define
/*Next is the month right after the previous one*/
next as months_between(month, prev(month)) = 1
)
ACCOUNTID
START_MONTH
END_MONTH
123
2021-08-01
2021-09-01
123
2022-03-01
2022-06-01
db<>fiddle here
That's a gaps and islands problem; one option to do it is:
Sample data:
SQL> with test (accountid, month) as
2 (select 123, date '2021-01-08' from dual union all
3 select 123, date '2021-01-09' from dual union all
4 select 123, date '2021-01-03' from dual union all
5 select 123, date '2021-01-04' from dual union all
6 select 123, date '2021-01-05' from dual union all
7 select 123, date '2021-01-06' from dual
8 ),
Query begins here:
9 temp as
10 (select accountid, month,
11 to_char(month, 'J') - row_number() Over
12 (partition by accountid order by month) diff
13 from test
14 )
15 select accountid,
16 min(month) as start_month,
17 max(month) as end_Month
18 from temp
19 group by accountid, diff
20 order by accountid, start_month;
ACCOUNTID START_MONT END_MONTH
---------- ---------- ----------
123 03/01/2021 06/01/2021
123 08/01/2021 09/01/2021
SQL>
Although related to MS SQL Server, have a look at Introduction to Gaps and Islands Analysis; should be interesting reading for you, I presume.

SQL - Constructing an SCD2 type dimension from overlapping periods

I have data like this:
GroupId DateFrom DateTo value_
Gr1 2022-03-01 2022-08-01 10
Gr2 2022-01-01 2022-12-31 20
Gr3 2022-01-01 2022-12-31 30
I'm trying to construct an SCD2 type dimension by doing an unpivot on data above
WITH UnPivoted AS (SELECT 'Gr1' AS GroupId, '2022-03-01' AS DateFrom, '2022-08-01' AS DateTo, 10 as value_ UNION ALL
SELECT 'Gr2', '2022-01-01', '2022-12-31', 20 UNION ALL
SELECT 'Gr3', '2022-01-01', '2022-12-31', 30
)
SELECT DateFrom, DateTo, SUM([Gr1]) Gr1, SUM([Gr2]) Gr2, SUM([Gr3]) Gr3
FROM UnPivoted
PIVOT (
SUM(value_) FOR GroupId IN ([Gr1],[Gr2],[Gr3])
) pvt
GROUP BY DateFrom, DateTo
with result:
DateFrom DateTo Gr1 Gr2 Gr3
2022-03-01 2022-08-01 10 NULL NULL
2022-01-01 2022-12-31 NULL 20 30
But, as you can see, date ranges are not identical so my GROUP BY does not work. And there is an overlap in date ranges so output is not correct.
I would like to get this result instead:
DateFrom DateTo Gr1 Gr2 Gr3
2022-01-01 2022-03-01 20 30
2022-03-01 2022-08-01 10 20 30
2022-08-01 2022-12-31 20 30
The best approach that I can come up with is to get all distinct values of DateFrom and DateTo and go through intervals between them one by one, constructing a new row for each interval.
Is there an easier way of getting the desired result?
In case someone else has the same situation, script below works. It also has some additional logic to adjust end dates so they do not overlap with start dates.
input (Unpivoted CTE):
GroupId DateFrom DateTo value_
Gr1 2022-03-01 2022-08-01 10
Gr2 2022-01-01 2022-12-31 20
Gr3 2022-01-01 2022-12-31 30
script:
WITH UnPivoted AS (SELECT 'Gr1' AS GroupId, CAST('2022-03-01' AS date) AS DateFrom, CAST('2022-08-01' AS date) AS DateTo, 10 as value_ UNION ALL
SELECT 'Gr2', '2022-01-01', '2022-12-31', 20 UNION ALL
SELECT 'Gr3', '2022-01-01', '2022-12-31', 30
)
,UniqueDateRanges AS (
SELECT DISTINCT DateFrom
FROM UnPivoted
UNION
SELECT DISTINCT DATEADD(d,1,DateTo)
FROM UnPivoted
)
,DateIntervals_SCD2 AS (
SELECT DateFrom
,CAST(NULLIF(LEAD(DateFrom,1,NULL) OVER(PARTITION BY '1' ORDER BY DateFrom),NULL) AS date) AS DateTo1
,CAST(DATEADD(d,-1,NULLIF(LEAD(DateFrom,1,NULL) OVER(PARTITION BY '1' ORDER BY DateFrom),NULL)) AS date) AS DateTo1_adjusted
FROM UniqueDateRanges
)
,Dataset_Fixed_SCD2 AS (
SELECT di.DateFrom, DateTo1_adjusted AS DateTo, up.GroupId, up.value_
FROM DateIntervals_SCD2 di
LEFT JOIN UnPivoted up ON di.DateFrom BETWEEN up.DateFrom AND up.DateTo AND DateTo1_adjusted BETWEEN up.DateFrom AND up.DateTo
WHERE DateTo1_adjusted IS NOT NULL
)
SELECT *
FROM Dataset_Fixed_SCD2
PIVOT (
SUM(value_) FOR GroupId IN ([Gr1],[Gr2],[Gr3])
) pvt
output:
DateFrom DateTo Gr1 Gr2 Gr3
2022-01-01 2022-02-28 20 30
2022-03-01 2022-08-01 10 20 30
2022-08-02 2022-12-31 20 30

Exclude duplicates and capture only changes

I have a scenario, where I have to exclude duplicates and capture only the changes. Also calculate the valid_from and valid_to on the fly. I have tried a query and it works but it is very slow in performance and it is failing with memory error .
Input : Only capture Entries where there is a change either in Amount/Check-In-Out.
Calculate Valid_from and Valid_to based on Date Changed.
Output:
SQL I tried.
select * from (select
lead(start_date, "window_offset" - rn + 1, '9999-12-31') over (order by "grp" ) as valid_to,
case when rn = max(rn) over (partition by "grp") then 1 else 0 end as "isLastUpdate",
start_date as valid_from,*
from (
select
min("DateChanged") over (partition by "grp") as start_date,
count(*) over (partition by "grp") as "window_offset",
row_number() over (partition by "grp" order by "DateChanged") as rn,
*
from (
select sum("isChanged") over (partition by OrderId order by "DateChanged") as "grp",*
from (
select
case when "Amount" = lag( "Amount" ) over (partition by OrderId order by "DateChanged") and
"Check-In" = lag( "Check-In" ) over (partition by OrderId order by "DateChanged") and
"Check-Out" = lag( "Check-Out" ) over (partition by OrderId order by "DateChanged")
then 0
else 1
end "isChanged",
*
FROM :in_table
)
))
where "isLastUpdate" = 1;
The logic of your expected answer is unclear as to why you get valid_from as 8-mar-21 for the first order_id and 9-apr-21 for the second order_id as both order_ids have overlapping ranges but you take the least of the previous check_out and the next check_in for the first order_id and the greatest of those two for the second and it is inconsistent.
If you want to get valid_from as the greatest of either the current check_in or the previous check_outs and valid_to as the greatest of either the current check_out or the next check_in or, if there are no more rows, 9999-12-31 then:
SELECT orderid,
amount,
check_in,
check_out,
GREATEST(
check_in,
COALESCE(
MAX(check_out) OVER (
PARTITION BY orderid
ORDER BY check_in, check_out
ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
),
check_in
)
) AS valid_from,
GREATEST(
check_out,
LEAD(check_in, 1, DATE '9999-12-31') OVER (
PARTITION BY orderid ORDER BY check_in, check_out
)
) AS valid_to
FROM (
SELECT DISTINCT *
FROM table_name
)
Which, for the sample data:
CREATE TABLE table_name (orderid, datechanged, amount, check_in, check_out) AS
SELECT 1, DATE '2021-03-3', 12.12, DATE '2021-03-03', DATE '2021-03-10' FROM DUAL UNION ALL
SELECT 1, DATE '2021-03-3', 12.12, DATE '2021-03-03', DATE '2021-03-10' FROM DUAL UNION ALL
SELECT 1, DATE '2021-03-3', 12.12, DATE '2021-03-03', DATE '2021-03-10' FROM DUAL UNION ALL
SELECT 1, DATE '2021-03-8', 21.12, DATE '2021-03-08', DATE '2021-03-18' FROM DUAL UNION ALL
SELECT 1, DATE '2021-03-8', 21.12, DATE '2021-03-08', DATE '2021-03-18' FROM DUAL UNION ALL
SELECT 2, DATE '2021-04-4', 9.10, DATE '2021-04-04', DATE '2021-04-09' FROM DUAL UNION ALL
SELECT 2, DATE '2021-04-4', 9.10, DATE '2021-04-04', DATE '2021-04-09' FROM DUAL UNION ALL
SELECT 2, DATE '2021-04-4', 10.20, DATE '2021-04-04', DATE '2021-04-12' FROM DUAL;
Outputs:
ORDERID
AMOUNT
CHECK_IN
CHECK_OUT
VALID_FROM
VALID_TO
1
12.12
2021-03-03 00:00:00
2021-03-10 00:00:00
2021-03-03 00:00:00
2021-03-10 00:00:00
1
21.12
2021-03-08 00:00:00
2021-03-18 00:00:00
2021-03-10 00:00:00
9999-12-31 00:00:00
2
9.1
2021-04-04 00:00:00
2021-04-09 00:00:00
2021-04-04 00:00:00
2021-04-09 00:00:00
2
10.2
2021-04-04 00:00:00
2021-04-12 00:00:00
2021-04-09 00:00:00
9999-12-31 00:00:00
db<>fiddle here

SQL find effective price of the products based on the date

I have a table with four columns : id,validFrom,validTo and price.
This table contains the price of an article and the duration when that price is effective.
| id| validFrom | validTo | price
|---|-----------|-----------|---------
| 1 | 01-01-17 | 10-01-17 | 30000
| 1 | 04-01-17 | 09-01-17 | 20000
Now, for this inputs in my table my query output should be :
| id| validFrom | validTo | price
|---|-----------|----------|-------
| 1 | 01-01-17 | 03-01-17 | 30000
| 1 | 04-01-17 | 09-01-17 | 20000
| 1 | 10-01-17 | 10-01-17 | 30000
I can compare the dates and check if products with same id have overlapping dates but I have no idea how to split those dates into non-overlapping dates. Also I am not allowed to use PL/SQL.
Is this possible using only SQL ?
Oracle Setup:
CREATE TABLE prices ( id, validFrom, validTo, price ) AS
SELECT 1, DATE '2017-01-01', DATE '2017-01-10', 30000 FROM DUAL UNION ALL
SELECT 1, DATE '2017-01-04', DATE '2017-01-09', 20000 FROM DUAL UNION ALL
SELECT 1, DATE '2017-01-11', DATE '2017-01-15', 10000 FROM DUAL UNION ALL
SELECT 1, DATE '2017-01-16', DATE '2017-01-18', 15000 FROM DUAL UNION ALL
SELECT 1, DATE '2017-01-17', DATE '2017-01-20', 40000 FROM DUAL UNION ALL
SELECT 1, DATE '2017-01-21', DATE '2017-01-24', 28000 FROM DUAL UNION ALL
SELECT 1, DATE '2017-01-23', DATE '2017-01-26', 23000 FROM DUAL UNION ALL
SELECT 1, DATE '2017-01-26', DATE '2017-01-26', 17000 FROM DUAL;
Query:
WITH daily_prices ( id, dt, price, duration ) AS (
-- Unroll the price ranges to individual days
SELECT id,
d.COLUMN_VALUE,
price,
validTo - validFrom
FROM prices p,
TABLE(
CAST(
MULTISET(
SELECT p.validFrom + LEVEL - 1
FROM DUAL
CONNECT BY p.validFrom + LEVEL - 1 <= p.validTo
)
AS SYS.ODCIDATELIST
)
) d
),
min_daily_prices ( id, dt, price ) AS (
-- Where a day falls between multiple ranges group them so the price
-- is for the shortest duration offer and if there are two equally short
-- durations then take the minimum price
SELECT id,
dt,
MIN( price ) KEEP ( DENSE_RANK FIRST ORDER BY duration )
FROM daily_prices
GROUP BY id, dt
),
group_changes ( id, dt, price, has_changed_group ) AS (
-- Find when the price changes or a day is skipped which means a new price
-- group is beginning
SELECT id,
dt,
price,
CASE WHEN dt = LAG( dt ) OVER ( PARTITION BY id ORDER BY dt ) + 1
AND price = LAG( price ) OVER ( PARTITION BY id ORDER BY dt )
THEN 0
ELSE 1
END
FROM min_daily_prices
),
groups ( id, dt, price, grp ) AS (
-- Calculate unique indexes (per id) for each group of price ranges
SELECT id,
dt,
price,
SUM( has_changed_group ) OVER ( PARTITION BY id ORDER BY dt )
FROM group_changes
)
SELECT id,
MIN( dt ) AS validFrom,
MAX( dt ) AS validTo,
MIN( price ) AS price
FROM groups
GROUP BY id, grp
ORDER BY id, validFrom;
Output:
ID VALIDFROM VALIDTO PRICE
---------- -------------------- -------------------- ----------
1 01-JAN-2017 00:00:00 03-JAN-2017 00:00:00 30000
1 04-JAN-2017 00:00:00 09-JAN-2017 00:00:00 20000
1 10-JAN-2017 00:00:00 10-JAN-2017 00:00:00 30000
1 11-JAN-2017 00:00:00 15-JAN-2017 00:00:00 10000
1 16-JAN-2017 00:00:00 18-JAN-2017 00:00:00 15000
1 19-JAN-2017 00:00:00 20-JAN-2017 00:00:00 40000
1 21-JAN-2017 00:00:00 22-JAN-2017 00:00:00 28000
1 23-JAN-2017 00:00:00 25-JAN-2017 00:00:00 23000
1 26-JAN-2017 00:00:00 26-JAN-2017 00:00:00 17000