I have one table that contains customer id and start date and one table that contains customer id and end date.
table A
customer_id
start_date
1
2022-01-01
1
2022-04-01
1
2022-07-01
2
2022-01-15
2
2022-03-25
3
2022-04-01
3
2022-08-01
4
2022-09-01
table B
customer_id
end_date
1
2022-01-25
1
2022-05-03
2
2022-03-24
2
2022-03-29
3
2022-04-15
Is there a way that I can get an output that looks like below?
desired output
customer_id
start_date
end_date
1
2022-01-01
2022-01-25
1
2022-04-01
2022-05-03
1
2022-07-01
2
2022-01-15
2022-03-24
2
2022-03-25
2022-03-29
3
2022-04-01
2022-04-15
3
2022-08-01
4
2022-09-01
As per your desire result please check the below query you can change it as per your table name and requirements.
DECLARE #table1 TABLE(
[customer_id] INT,
[start_date] DATE
)
DECLARE #table2 TABLE(
[customer_id] INT,
[end_date] DATE
)
INSERT INTO #table1 VALUES
(1,'2022-01-01'),
(1,'2022-04-01'),
(1,'2022-07-01'),
(2,'2022-01-15'),
(2,'2022-03-25'),
(3,'2022-04-01'),
(3,'2022-08-01'),
(4,'2022-09-01')
INSERT INTO #table2 VALUES
(1,'2022-01-25'),
(1,'2022-05-03'),
(2,'2022-03-24'),
(2,'2022-03-29'),
(3,'2022-04-15')
SELECT [Table1].[customer_id],[Table1].[start_date],[Table2].[end_date] FROM (
SELECT *, ROW_NUMBER() OVER (ORDER BY [start_date]) row_num FROM #table1
) AS [Table1]
LEFT JOIN (
SELECT t2.*, ROW_NUMBER() OVER (ORDER BY [end_date] ) row_num FROM #table2 t2
) AS [Table2]
ON [Table2].[customer_id] = [Table1].[customer_id]
AND [Table1].[row_num] = [Table2].[row_num]
ORDER BY [Table1].[customer_id]
Output
This hint is using TSQL in SQL Server.
select A.customer_id, A.start_date, B.end_date
from
(select X.costumer_id, X.start_date, ROW_NUMBER() over (order by X.start_date) as ORDEM from TableA X) A
left outer join (select X.customer_id, X.end_date, ROW_NUMBER() over (order by X.end_date ) as ORDEM from TableB X) B on A.customer_id = B.customer_id and A.ORDEM = B.ORDEM
order by A.customer_id, A.start_date
Hope it helps.
Here is how it's done in Vertica 12 - now that we have INTERPOLATE NEXT VALUE for the event series join :
\pset null (null)
WITH
tba(customer_id,start_date) AS (
SELECT 1,DATE '2022-01-01'
UNION ALL SELECT 1,DATE '2022-04-01'
UNION ALL SELECT 1,DATE '2022-07-01'
UNION ALL SELECT 2,DATE '2022-01-15'
UNION ALL SELECT 2,DATE '2022-03-25'
UNION ALL SELECT 3,DATE '2022-04-01'
UNION ALL SELECT 3,DATE '2022-08-01'
UNION ALL SELECT 4,DATE '2022-09-01'
)
,
tbb(customer_id,end_date) AS (
SELECT 1,DATE '2022-01-25'
UNION ALL SELECT 1,DATE '2022-05-03'
UNION ALL SELECT 2,DATE '2022-03-24'
UNION ALL SELECT 2,DATE '2022-03-29'
UNION ALL SELECT 3,DATE '2022-04-15'
)
SELECT
tba.customer_id
, start_date
, end_date
FROM tba
LEFT JOIN tbb
ON tba.customer_id = tbb.customer_id
AND end_date INTERPOLATE NEXT VALUE start_date
ORDER BY 1,2
;
-- out Null display is "(null)".
-- out customer_id | start_date | end_date
-- out -------------+------------+------------
-- out 1 | 2022-01-01 | 2022-01-25
-- out 1 | 2022-04-01 | 2022-05-03
-- out 1 | 2022-07-01 | (null)
-- out 2 | 2022-01-15 | 2022-03-24
-- out 2 | 2022-03-25 | 2022-03-29
-- out 3 | 2022-04-01 | 2022-04-15
-- out 3 | 2022-08-01 | (null)
-- out 4 | 2022-09-01 | (null)
Related
I have a query as below:
SELECT
"2022-05-10 00:00:00 UTC" AS date_,
COUNT(salesId) AS total-sales
FROM
`project1.sales.sales-growth`
WHERE
(promoDate BETWEEN "2022-05-10 00:00:00 UTC"
AND "2022-05-11 00:00:00 UTC")
OR
(purchaseDate BETWEEN "2022-05-10 00:00:00 UTC"
AND "2022-05-11 00:00:00 UTC")
Which shows the total sale for a particular date (2022-05-11) as below:
date_ total-sales
2022-05-10 560
I am wondering how I can change the query to show all the May month sales per day (desired output):
date_ total-sales
2022-05-01 567
2022-05-02 687
2022-05-03 878
... ...
2022-05-31 500
One option: generate a date array for the target time range, group by those dates and compare those dates in the WHERE clause with your two date columns.
With an assumed table of yours:
WITH your_table AS
(
SELECT TIMESTAMP("2022-05-01 15:30:00+00") AS promoDate, NULL AS purchaseDate, 1 AS salesId
UNION ALL
SELECT NULL AS promoDate, TIMESTAMP("2022-05-01 18:30:00+00") AS purchaseDate, 1 AS salesId
UNION ALL
SELECT TIMESTAMP("2022-05-02 15:30:00+00") AS promoDate, NULL AS purchaseDate, 1 AS salesId
UNION ALL
SELECT TIMESTAMP("2022-05-03 15:30:00+00") AS promoDate, NULL AS purchaseDate, 1 AS salesId
UNION ALL
SELECT TIMESTAMP("2022-05-04 15:30:00+00") AS promoDate, NULL AS purchaseDate, 1 AS salesId
UNION ALL
SELECT NULL AS promoDate, TIMESTAMP("2022-05-04 18:30:00+00") AS purchaseDate, 1 AS salesId
)
SELECT
date_,
COUNT(salesId) AS total_sales
FROM
UNNEST(GENERATE_DATE_ARRAY("2022-05-01", "2022-05-31")) AS date_, your_table
WHERE
date_ = EXTRACT(DATE FROM promoDate)
OR
date_ = EXTRACT(DATE FROM purchaseDate)
GROUP BY
date_
Output:
Row
date_
total_sales
1
2022-05-01
2
2
2022-05-02
1
3
2022-05-03
1
4
2022-05-04
2
I'm new to SQL, hope you guys don't find it silly. Working with two tables here, one contains start dates and other contains end dates. Entries do not follow sequence/possibility of duplicates.
**TABLE 1**
id start_date
1 2019-04-23
1 2019-06-05
1 2019-06-05
1 2019-10-29
1 2019-12-16
2 2019-01-05
3 2020-02-01
**TABLE 2**
id end_date
1 2019-04-23
1 2019-06-05
1 2019-06-06
1 2019-06-06
1 2019-07-24
1 2019-10-16
2 2020-01-04
**EXPECTED OUTPUT**
id start_date end_date
1 2019-04-23 2019-06-05
1 2019-10-29 null
2 2019-01-05 2020-01-04
3 2020-02-01 null
You can use union all and aggregation with some window functions:
with table1 as (
select 1 as id, date('2019-04-23') as start_date union all
select 1, '2019-06-05' union all
select 1, '2019-06-05' union all
select 1, '2019-10-29' union all
select 1, '2019-12-16' union all
select 2, '2019-01-05' union all
select 3, '2020-02-01'
),
table2 as (
SELECT 1 as id, DATE('2019-04-23') as end_date union all
SELECT 1, '2019-06-05' union all
select 1, '2019-06-06' union all
select 1, '2019-06-06' union all
select 1, '2019-07-24' union all
select 1, '2019-10-16' union all
select 2, '2020-01-04'
)
select id, min(start_date), end_date
from (select id, start_date,
first_value(end_date ignore nulls) over (partition by id order by DATE_DIFF(coalesce(start_date, end_date), CURRENT_DATE, day) RANGE between 1 following and unbounded following) as end_date
from ((select id, start_date, null as end_date
from table1
) union all
(select id, null as start_date, end_date
from table2
)
) se
)
group by id, end_date
having min(start_date) is not null;
Why do you have multiple records with the same id (Am assuming id is a primary key)? My suggestion would be for you to make the id's unique and creating a foreign key constraint in the end dates table (Since there can't be and end date without a start date) and use the foreign key relationship to retrieve the desired results. E.g SELECT S.start_date,E.end_date FROM table1 S JOIN table2 E where S.id=E.table1_fk
Below is for BigQuery Standard SQL
#standardSQL
SELECT id, start_date, IF(end_date = '9999-01-01', NULL, end_date) end_date
FROM (
SELECT id, start_date, ARRAY_AGG(end_date ORDER BY end_date LIMIT 1)[OFFSET(0)] end_date
FROM (
SELECT id, start_date, IF(start_date < end_date, end_date, '9999-01-01') end_date
FROM `project.dataset.table1`
LEFT JOIN `project.dataset.table2`
USING (id)
)
GROUP BY id, start_date
)
If to apply to sample data from your question - result is
Row id start_date end_date
1 1 2019-04-23 2019-06-05
2 1 2019-06-05 2019-06-06
3 1 2019-10-29 null
4 1 2019-12-16 null
5 2 2019-01-05 2020-01-04
6 3 2020-02-01 null
Note: quick and not optimized - but looks like produces desired result
I am unable to group by on date from a timestamp column in below query:
CHG_TABLE
+----+--------+----------------+-----------------+-------+-----------+
| Key|Seq_Num | Start_Date | End_Date | Value |Record_Type|
+----+--------+----------------+-----------------+-------+-----------+
| 1 | 1 | 5/25/2019 2.05 | 12/31/9999 00.00| 800 | Insert |
| 1 | 1 | 5/25/2019 2.05 | 5/31/2019 11.12 | 800 | Update |
| 1 | 2 | 5/31/2019 11.12| 12/31/9999 00.00| 900 | Insert |
| 1 | 2 | 5/31/2019 11.12| 6/15/2019 12.05 | 900 | Update |
| 1 | 3 | 6/15/2019 12.05| 12/31/9999 00.00| 1000 | Insert |
| 1 | 3 | 6/15/2019 12.05| 6/25/2019 10.20 | 1000 | Update |
+---+---------+----------------+-----------------+-------+-----------+
RESULT:
+-----+------------------+----------------+-----------+----------+
| Key | Month_Start_Date | Month_End_Date |Begin_Value|End_Value |
+---- +------------------+----------------+-----------+----------+
| 1 | 6/1/2019 | 6/30/2019 | 1700 | 1000 |
| 1 | 7/1/2019 | 7/31/2019 | 1000 | 1000 |
+-----+------------------+----------------+-----------+----------+
Begin_Value : Sum(Value) for Max(Start_Date) < Month_Start_Date -> Should pick up latest date from last month
End_Value : Sum(Value) for Max(Start_Date) <= Month_End_Date -> Should pick up the latest date
SELECT k.key,
dd.month_start_date,
dd.month_end_date,
gendata.value first_value,
gendata.next_value last_value
FROM dim_date dd CROSS JOIN dim_person k
JOIN (SELECT ct.key,
dateadd('day',1,last_day(ct.start_date)) start_date ,
SUM(ct.value),
lead(SUM(ct.value)) OVER(ORDER BY ct.start_date) next_value
FROM (SELECT key,to_char(start_Date,'MM-YYYY') MMYYYY, max(start_Date) start_date
FROM CHG_TABLE
GROUP BY to_char(start_Date,'MM-YYYY'), key
) dt JOIN CHG_TABLE ct ON
dt.start_date = ct.start_date AND
dt.key = ct.key
group by ct.key, to_char(start_Date,'MM-YYYY')
) gendata ON
to_char(dd.month_end_date,'MM-YYYY') = to_char(to_char(start_Date,'MM-YYYY')) AND
k.key = gendata.key;
Error:
start_Date is not a valid group by expression
Related post:
Monthly Snapshot using Date Dimension
Hoping, I understood your question correctly.
You can check below query
WITH chg_table ( key, seq_num, start_date, end_date, value, record_type ) AS
(
SELECT 1,1,TO_DATE('5/25/2019 2.05','MM/DD/YYYY HH24.MI'),TO_DATE('12/31/9999 00.00','MM/DD/YYYY HH24.MI'), 800, 'Insert' FROM DUAL UNION ALL
SELECT 1,1,TO_DATE('5/25/2019 2.05','MM/DD/YYYY HH24.MI'),TO_DATE('5/31/2019 11.12','MM/DD/YYYY HH24.MI'), 800, 'Update' FROM DUAL UNION ALL
SELECT 1,2,TO_DATE('5/31/2019 11.12','MM/DD/YYYY HH24.MI'),TO_DATE('12/31/9999 00.00','MM/DD/YYYY HH24.MI'), 900, 'Insert' FROM DUAL UNION ALL
SELECT 1,2,TO_DATE('5/31/2019 11.12','MM/DD/YYYY HH24.MI'),TO_DATE('6/15/2019 12.05','MM/DD/YYYY HH24.MI'), 900, 'Update' FROM DUAL UNION ALL
SELECT 1,3,TO_DATE('6/15/2019 12.05','MM/DD/YYYY HH24.MI'),TO_DATE('12/31/9999 00.00','MM/DD/YYYY HH24.MI'), 1000, 'Insert' FROM DUAL UNION ALL
SELECT 1,3,TO_DATE('6/15/2019 12.05','MM/DD/YYYY HH24.MI'),TO_DATE('6/25/2019 10.20','MM/DD/YYYY HH24.MI'), 1000, 'Update' FROM DUAL
)
select key , new_start_date Month_Start_Date , new_end_date Month_End_Date , begin_value ,
nvl(lead(begin_value) over(order by new_start_date),begin_value) end_value
from
(
select key , new_start_date , new_end_date , sum(value) begin_value
from
(
select key, seq_num, start_date
, value, record_type ,
trunc(add_months(start_date,1),'month') new_start_date ,
trunc(add_months(start_date,2),'month')-1 new_end_date
from chg_table
where record_type = 'Insert'
)
group by key , new_start_date , new_end_date
)
order by new_start_date
;
Db Fiddle link: https://dbfiddle.uk/?rdbms=oracle_18&fiddle=c77a71afa82769b48f424e1c0fa1c0b6
I am assuming that you are getting an "ORA-00979: not a GROUP BY expression" and this is due to your use of the TO_CHAR(timestamp_col,'DD-MM-YYYY') in the GROUP BY clause.
Adding the TO_CHAR(timestamp_col,'DD-MM-YYYY') to the select side of your statement should resolve this and provide the results you are expecting.
a, b, dateadd('day',1,last_day(timestamp_col)) start_date, TO_CHAR(timestamp_col,'DD-MM-YYYY'), ...```
I have a table with 200.000 rows in a SQL Server 2014 database looking like this:
CREATE TABLE DateRanges
(
Contract VARCHAR(8),
Sector VARCHAR(8),
StartDate DATE,
EndDate DATE
);
INSERT INTO DateRanges (Contract, Sector, StartDate, Enddate)
SELECT '111', '999', '01-01-2014', '03-31-2014'
union
SELECT '111', '999', '04-01-2014', '06-30-2014'
union
SELECT '111', '999', '07-01-2014', '09-30-2014'
union
SELECT '111', '999', '10-01-2014', '12-31-2014'
union
SELECT '111', '888', '08-01-2014', '08-31-2014'
union
SELECT '111', '777', '08-15-2014', '08-31-2014'
union
SELECT '222', '999', '01-01-2014', '03-31-2014'
union
SELECT '222', '999', '04-01-2014', '06-30-2014'
union
SELECT '222', '999', '07-01-2014', '09-30-2014'
union
SELECT '222', '999', '10-01-2014', '12-31-2014'
union
SELECT '222', '666', '11-01-2014', '11-30-2014'
UNION
SELECT '222', '555', '11-15-2014', '11-30-2014';
As you can see there can be multiple overlaps for each contract and what I would like to have is the result like this
Contract Sector StartDate EndDate
---------------------------------------------
111 999 01-01-2014 07-31-2014
111 888 08-01-2014 08-14-2014
111 777 08-15-2014 08-31-2014
111 999 09-01-2014 12-31-2014
222 999 01-01-2014 10-31-2014
222 666 11-01-2014 11-14-2014
222 555 11-15-2014 11-30-2014
222 999 12-01-2014 12-31-2014
I can not figure out how this can be done and the examples i have seen on this site quite do not fit my problem.
This answer makes use of a few different techniques. The first is a recursive-cte that creates a table with every relevant cal_date which then gets cross apply'd with unique Contract values to get every combination of both values. The second is window-functions such as lag and row_number to determine a variety of things detailed in the comments below. Lastly, and probably most importantly, gaps-and-islands to determine when one Contract/Sector combination ends and the next begins.
Answer:
--determine range of dates
declare #bgn_dt date = (select min(StartDate) from DateRanges)
, #end_dt date = (select max(EndDate) from DateRanges)
--use a recursive CTE to create a record for each day / Contract
; with dates as
(
select #bgn_dt as cal_date
union all
select dateadd(d, 1, a.cal_date) as cal_date
from dates as a
where a.cal_date < #end_dt
)
select d.cal_date
, c.Contract
into #contract_dates
from dates as d
cross apply (select distinct Contract from DateRanges) as c
option (maxrecursion 0)
--Final Select
select f.Contract
, f.Sector
, min(f.cal_date) as StartDate
, max(f.cal_date) as EndDate
from (
--Use the sum-over to obtain the Island Numbers
select dr.Contract
, dr.Sector
, dr.cal_date
, sum(dr.IslandBegin) over (partition by dr.Contract order by dr.cal_date asc) as IslandNbr
from (
--Determine if the record is the start of a new Island
select a.Contract
, a.Sector
, a.cal_date
, case when lag(a.Sector, 1, NULL) over (partition by a.Contract order by a.cal_date asc) = a.Sector then 0 else 1 end as IslandBegin
from (
--Determine which Contract/Date combinations are valid, and rank the Sectors that are in effect
select cd.cal_date
, dr.Contract
, dr.Sector
, dr.EndDate
, row_number() over (partition by dr.Contract, cd.cal_date order by dr.StartDate desc) as ConractSectorRnk
from #contract_dates as cd
left join DateRanges as dr on cd.Contract = dr.Contract
and cd.cal_date between dr.StartDate and dr.EndDate
) as a
where a.ConractSectorRnk = 1
and a.Contract is not null
) as dr
) as f
group by f.Contract
, f.Sector
, f.IslandNbr
order by f.Contract asc
, min(f.cal_date) asc
Output:
+----------+--------+------------+------------+
| Contract | Sector | StartDate | EndDate |
+----------+--------+------------+------------+
| 111 | 999 | 2014-01-01 | 2014-07-31 |
| 111 | 888 | 2014-08-01 | 2014-08-14 |
| 111 | 777 | 2014-08-15 | 2014-08-31 |
| 111 | 999 | 2014-09-01 | 2014-12-31 |
| 222 | 999 | 2014-01-01 | 2014-10-31 |
| 222 | 666 | 2014-11-01 | 2014-11-14 |
| 222 | 555 | 2014-11-15 | 2014-11-30 |
| 222 | 999 | 2014-12-01 | 2014-12-31 |
+----------+--------+------------+------------+
I have a table with four columns : id,validFrom,validTo and price.
This table contains the price of an article and the duration when that price is effective.
| id| validFrom | validTo | price
|---|-----------|-----------|---------
| 1 | 01-01-17 | 10-01-17 | 30000
| 1 | 04-01-17 | 09-01-17 | 20000
Now, for this inputs in my table my query output should be :
| id| validFrom | validTo | price
|---|-----------|----------|-------
| 1 | 01-01-17 | 03-01-17 | 30000
| 1 | 04-01-17 | 09-01-17 | 20000
| 1 | 10-01-17 | 10-01-17 | 30000
I can compare the dates and check if products with same id have overlapping dates but I have no idea how to split those dates into non-overlapping dates. Also I am not allowed to use PL/SQL.
Is this possible using only SQL ?
Oracle Setup:
CREATE TABLE prices ( id, validFrom, validTo, price ) AS
SELECT 1, DATE '2017-01-01', DATE '2017-01-10', 30000 FROM DUAL UNION ALL
SELECT 1, DATE '2017-01-04', DATE '2017-01-09', 20000 FROM DUAL UNION ALL
SELECT 1, DATE '2017-01-11', DATE '2017-01-15', 10000 FROM DUAL UNION ALL
SELECT 1, DATE '2017-01-16', DATE '2017-01-18', 15000 FROM DUAL UNION ALL
SELECT 1, DATE '2017-01-17', DATE '2017-01-20', 40000 FROM DUAL UNION ALL
SELECT 1, DATE '2017-01-21', DATE '2017-01-24', 28000 FROM DUAL UNION ALL
SELECT 1, DATE '2017-01-23', DATE '2017-01-26', 23000 FROM DUAL UNION ALL
SELECT 1, DATE '2017-01-26', DATE '2017-01-26', 17000 FROM DUAL;
Query:
WITH daily_prices ( id, dt, price, duration ) AS (
-- Unroll the price ranges to individual days
SELECT id,
d.COLUMN_VALUE,
price,
validTo - validFrom
FROM prices p,
TABLE(
CAST(
MULTISET(
SELECT p.validFrom + LEVEL - 1
FROM DUAL
CONNECT BY p.validFrom + LEVEL - 1 <= p.validTo
)
AS SYS.ODCIDATELIST
)
) d
),
min_daily_prices ( id, dt, price ) AS (
-- Where a day falls between multiple ranges group them so the price
-- is for the shortest duration offer and if there are two equally short
-- durations then take the minimum price
SELECT id,
dt,
MIN( price ) KEEP ( DENSE_RANK FIRST ORDER BY duration )
FROM daily_prices
GROUP BY id, dt
),
group_changes ( id, dt, price, has_changed_group ) AS (
-- Find when the price changes or a day is skipped which means a new price
-- group is beginning
SELECT id,
dt,
price,
CASE WHEN dt = LAG( dt ) OVER ( PARTITION BY id ORDER BY dt ) + 1
AND price = LAG( price ) OVER ( PARTITION BY id ORDER BY dt )
THEN 0
ELSE 1
END
FROM min_daily_prices
),
groups ( id, dt, price, grp ) AS (
-- Calculate unique indexes (per id) for each group of price ranges
SELECT id,
dt,
price,
SUM( has_changed_group ) OVER ( PARTITION BY id ORDER BY dt )
FROM group_changes
)
SELECT id,
MIN( dt ) AS validFrom,
MAX( dt ) AS validTo,
MIN( price ) AS price
FROM groups
GROUP BY id, grp
ORDER BY id, validFrom;
Output:
ID VALIDFROM VALIDTO PRICE
---------- -------------------- -------------------- ----------
1 01-JAN-2017 00:00:00 03-JAN-2017 00:00:00 30000
1 04-JAN-2017 00:00:00 09-JAN-2017 00:00:00 20000
1 10-JAN-2017 00:00:00 10-JAN-2017 00:00:00 30000
1 11-JAN-2017 00:00:00 15-JAN-2017 00:00:00 10000
1 16-JAN-2017 00:00:00 18-JAN-2017 00:00:00 15000
1 19-JAN-2017 00:00:00 20-JAN-2017 00:00:00 40000
1 21-JAN-2017 00:00:00 22-JAN-2017 00:00:00 28000
1 23-JAN-2017 00:00:00 25-JAN-2017 00:00:00 23000
1 26-JAN-2017 00:00:00 26-JAN-2017 00:00:00 17000