Find missing months in SQL

Find missing months in SQL - sql

So this post remains unanswered and not useful
Finding missing month from my table
This link Get Missing Month from table requires a lookup table... which is not my first choice.
I have a table with Financial Periods, and a reference number. Each reference numbers has a series of financial periods which may start anywhere, and end anywhere. The test is simply that between the start and end, there is no gap - i.e. there must be every financial period period the smallest and largest dates, when grouped by reference number.
A financial period is a month.
So... in this example below, Reference Number A is missing May 2016.
REF MONTH
A 2016-04-01
A 2016-06-01
A 2016-07-01
B 2016-03-01
B 2016-04-01
B 2016-05-01
C 2022-05-01
-- Find the boundaries of each ref
select REF
, MIN(Month) as smallest
, MAX(Month) as largest
from myTable
group by REF
-- But how to find missing items?
SQL Server 2019.

Clearly a Calendar Table would make this a small task (among many others)
Here is an alternative using the window function lead() over()
Example
Declare #YourTable Table ([REF] varchar(50),[MONTH] date) Insert Into #YourTable Values
('A','2016-04-01')
,('A','2016-06-01')
,('A','2016-07-01')
,('B','2016-03-01')
,('B','2016-04-01')
,('B','2016-05-01')
,('C','2022-05-01')
;with cte as (
Select *
,Missing = datediff(MONTH,[Month],lead([Month],1) over (partition by Ref order by [Month]))-1
From #YourTable
)
Select * from cte where Missing>0
Results
REF MONTH Missing
A 2016-04-01 1

I added one more row of input to demonstrate the solution better.
with forecast (
REF,
[MONTH]
) as (
select REF
, [MONTH]
from (
values
('A', {d '2016-04-01'})
, ('A', {d '2016-06-01'})
, ('A', {d '2016-07-01'})
, ('B', {d '2016-03-01'})
, ('B', {d '2016-04-01'})
, ('B', {d '2016-05-01'})
, ('B', {d '2016-09-01'})
, ('C', {d '2022-05-01'})
) x (REF, [MONTH])
),
-- define the date ranges
daterange as (
select REF
, min([MONTH]) as dtmin
, max([MONTH]) as dtmax
from forecast
group by REF
),
-- get all of the [end of month] dates in the range
dt (
REF,
[MONTH]
) as (
select REF
, dtmin
from daterange dr
union all
select dt.REF
, dateadd(month, 1, [MONTH])
from dt dt
inner join daterange dr on dr.REF = dt.REF
where dateadd(month, 1, [MONTH]) <= dr.dtmax
)
-- find the missing months
select REF
, [MONTH]
from dt
except
select REF
, [MONTH]
from forecast
order by 1, 2
-- or list all of the months for each REF
--select REF
--, [MONTH]
--from dt

Related

SQL fill next date (month) with loop

I have input table, and need to add missing dates, but not to max, but up to next available month.
so I need to use loop.
SET #mindate = '2021.01'
SET #maxdate = CAST( GETDATE() AS Date ) --date today as max date
while
begin
if #mindate => #maxdate
begin
break
end
set #mindate = #mindate + 1
end
then i can get 1+.. but it does not stop to 7 month
so i totally got stuck with writing loop.
Data table :
could anybody help on code? as most examples are with joins, to data tables, or to one max value.

Paul, I'm assuming that you forgot to specify the month in your mock data.
I hope the code below may help you understand how non-trivial is what you are trying to accomplish :-) Kudos for your will to get rid of loops.
To make it better, I propose a denormalization (CAUTION!):
create another column price_valid_until
the latest prices records will have price_valid_until = '21000101' (aka, far away in the future)
when registering a new price, update the previous with new price_valid_from - 1 day
Here's the solution, with a pretty complex, but efficient query (http://sqlfiddle.com/#!18/4ab23/4)
create table price_history(
SKU varchar(255),
price_valid_from date,
price decimal(16, 2)
)
insert into price_history
values
('a', '20210101', 10),
('a', '20210107', 12),
('b', '20210102', 4),
('b', '20210110', 2),
('b', '20210214', 5);
-- This fiddler won't let me initialize and reference:
--
-- declare
-- #from_date date,
-- #to_date date;
--
-- select
-- #from_date = min(date_from),
-- #to_date = max(date_from)
-- from price_history
with
date_range as(
select
min(price_valid_from) as from_date,
--
eomonth(
max(price_valid_from)
) as to_date
from price_history
),
--
all_dates as(
select from_date as date_in_range
from date_range
-- ----------
union all
-- ----------
select dateadd(day, 1, date_in_range)
from all_dates
where
date_in_range < (
select to_date
from date_range
)
),
--
price_history_boundaries as(
select
ph.SKU,
ph.price,
--
ph.price_valid_from,
-- The latest price, so far, is valid until 01/01/2100
coalesce(
dateadd(
day,
-1,
min(ph_next.price_valid_from)
),
'21000101'
) as price_valid_until
from
price_history ph
left outer join price_history ph_next
on(
ph_next.SKU = ph.SKU
and ph_next.price_valid_from > ph.price_valid_from
)
group by ph.SKU, ph.price_valid_from, ph.price
)
select
phb.SKU,
ad.date_in_range,
phb.price
from
all_dates ad
inner join price_history_boundaries phb
on(
phb.price_valid_from <= ad.date_in_range
and phb.price_valid_until >= ad.date_in_range
)
order by phb.SKU, ad.date_in_range

You can easily achieve your desired result by creating list of dates from which to join to. Here I've used a recursive CTE to create a range of dates, adding 1 month per iteration up to the current date.
It's then a simple matter of joining to your source data, here lead() is handy for limiting the joined rows. Also, assuming SQL Server from the usage of Getdate:
declare #start date=(select Min([date]) from sourcetable);
with m as (
select 1 num, #start [Date]
union all
select num+1 , DateAdd(month,1,m.[date])
from m
where DateAdd(month,1,m.[date]) <= GetDate()
), t as (
select *, Lead([date],1,GetDate()) over (order by [date]) NextDate
from sourcetable
)
select m.[Date], t.sku, t.price
from m
join t on m.[date] >= t.[date] and m.[date] < t.nextdate
See Working Fiddle

Calculate inactive customers from single table

I have table with fields Customer.No. , Posting date, Order_ID . I want to find total inactive customers for last 12 months on month basis which means they have placed order before 12 months back and became in active. So want calculate this every month basis to under stand how inactive customers are growing month by month.
if I run the query in July it should go back 365 days from the previous month end and give total number of inactive customers. I want to do this month by month.
I am in learning stage please help.
Thanks for your time in advance.

to get the customers
SELECT DISTINCT a.CustomerNo
FROM YourTable a
WHERE NOT EXISTS
(SELECT 0 FROM YourTable b WHere a.CustomerNo = b.CustomerNo
and b.PostingDate >
dateadd(day,-365 -datepart(day,getdate()),getdate())
)
to get a count
SELECT DISTINCT count(0) as InnactiveCount
FROM YourTable a
WHERE NOT EXISTS
(SELECT 0 FROM YourTable b WHere a.CustomerNo = b.CustomerNo
and b.PostingDate >
dateadd(day,-365 -datepart(day,getdate()),getdate())
..
generate a 'months' table by CTE, then look for inactive in those months
;WITH month_gen as (SELECT dateadd(day,-0 -datepart(day,getdate()),getdate()) eom, 1 as x
UNION ALL
SELECT dateadd(day,-datepart(day,eom),eom) eom, x + 1 x FROM month_gen where x < 12
)
SELECT DISTINCT CONVERT(varchar(7), month_gen.eom, 102), count(0) innactiveCount FROM YourTable a
cross join month_gen
WHERE NOT EXISTS(SELECT 0 FROM YourTable b WHere a.CustomerNo = b.CustomerNo and
YEAR(b.PostingDate) = YEAR(eom) and
MONTH(b.PostingDate) = MONTH(eom)
)
GROUP BY CONVERT(varchar(7), month_gen.eom, 102)
if that gets you anywhere, maybe a final step is to filter out anything getting 'counted' before it was ever active i.e. don't count 'new' customers before they became active

Try below query. To achieve your goal you need calendar table (which I defined with CTE). Below query counts inactivity for the first day of a month:
declare #tbl table (custNumber int, postDate date, orderId int);
insert into #tbl values
(1, '2017-01-01', 123),
(2, '2017-02-01', 124),
(3, '2017-02-01', 125),
(1, '2018-02-02', 126),
(2, '2018-05-01', 127),
(3, '2018-06-01', 128)
;with cte as (
select cast('2018-01-01' as date) dt
union all
select dateadd(month, 1, dt) from cte
where dt < '2018-12-01'
)
select dt, sum(case when t2.custNumber is null then 1 else 0 end)
from cte c
left join #tbl t1 on dateadd(year, -1, c.dt) >= t1.postDate
left join #tbl t2 on t2.postDate > dateadd(year, -1, c.dt) and t2.postDate <= c.dt and t1.custNumber = t2.custNumber
group by dt

SQL Selecting the earliest date from many within a row

I am working in MS SQL Server Management Studio. I have created a view keyed on patientId, each patient/row has 12 associated dates. Is there a way in SQL that I can find the min and max dates for each row?
Any help would be much appreciated.

You can use "unpivot". Check the example below:
CREATE TABLE dates
(
number INT PRIMARY KEY ,
date1 DATETIME ,
date2 DATETIME ,
date3 DATETIME
)
INSERT INTO dates
VALUES ( 1, '1/1/2018', '2/4/2018', '3/1/2018')
INSERT INTO dates
VALUES ( 2, '1/2/2018', '2/3/2018', '3/3/2018')
INSERT INTO dates
VALUES ( 3, '1/3/2018', '2/2/2018', '3/2/2018')
INSERT INTO dates
VALUES ( 4, '1/4/2018', '2/1/2018', '3/4/2018')
GO
SELECT number ,
MIN(dDate) mindate,
MAX(dDate) maxDate
FROM dates UNPIVOT ( dDate FOR nDate IN ( Date1, Date2,Date3 ) ) as u
GROUP BY number
GO

I would do this using cross apply:
select t.*, v.mind, v.maxd
from t cross apply
(select min(v.d) as mind, max(v.d) as maxd
from (values (d1), (d2), (d3), (d4), (d5), (d6), (d7), (d8), (d9), (d10), (d11), (d12)
) v(d)
) v;
Note that min() and max() ignore NULL values.

Another way:
create table MyDates
(
ID int,
D1 datetime,
D2 datetime,
D3 datetime
)
insert MyDates(ID, D1, D2, D3)
values (1, '19000101', '19720506', '20060204'),
(2, '20170624', '20180821', '20180901'),
(3, '19820202', '19840721', '19851231')
select *,
(select min(v) from (values(D1), (D2), (D3)) t(v)) [Min],
(select max(v) from (values(D1), (D2), (D3)) t(v)) [Max]
from MyDates

SQL SUM up date ranges that collide on a group by

I have a table with columns: name, start date (a date value) and finish date(a date value). I want to group by name adding up the dates so I get the total time with no collisions. So, if I have a table
name | start date | finish date
===============================
a | 20/10/2015 | 22/10/2015
a | 21/10/2015 | 22/10/2015
a | 26/10/2015 | 27/10/2015
So, if I group by name, the 3 rows will aggregate, if I simply add the DATEDIFF day per row I'll get 4, if I calculate the DATEDIFF between the MIN start date and the MAX finish date it will be 7, when in reality the right answer would be 3, since the second row collides with the first one and I only need to count that time once.

Thanks for your comments below. I have used a completely different approach. First L build a calendar CTE a with all the dates that exist in your table. You may use an existing calendar table from your database if you have one. Then in the CTE b I CROSS JOIN the calendar CTE to get the dates that exist for the date ranges. In this CTE it does not matter how many overlapping ranges you have as The date will be included once only using the GROUP BY [name] clause. And now all you need to do is to count the number of the individual dates in the CTE c:
SQL Fiddle
MS SQL Server 2008 Schema Setup:
CREATE TABLE Table1
([name] varchar(1), [start date] datetime, [finish date] datetime)
;
INSERT INTO Table1
([name], [start date], [finish date])
VALUES
('a', '2015-10-20 00:00:00', '2015-10-22 00:00:00'),
('a', '2015-10-21 00:00:00', '2015-10-22 00:00:00'),
('a', '2015-10-21 00:00:00', '2015-10-23 00:00:00'),
('a', '2015-10-26 00:00:00', '2015-10-27 00:00:00')
;
Query 1:
with dt as(
select min([start date]) as sd, max([finish date]) as fd from Table1
),
a as (
select sd from dt
union all
select dateadd(day, 1, a.sd)
FROM a cross join dt
where a.sd < fd
),
b as(
select [name], sd
from table1 cross join a where a.sd between [start date] and [finish date]
group by [name], sd
),
c as (
select [name], count(*) days from b group by [name]
)
select * from c
option (maxrecursion 0)
Results:
| name | days |
|------|------|
| a | 6 |

Get average of last 7 days

I'm attacking a problem, where I have a value for a a range of dates. I would like to consolidate the rows in my table by averaging them and reassigning the date column to be relative to the last 7 days. My SQL experience is lacking and could use some help. Thanks for giving this a look!!
E.g.
7 rows with dates and values.
UniqueId Date Value
........ .... .....
a 2014-03-20 2
a 2014-03-21 2
a 2014-03-22 3
a 2014-03-23 5
a 2014-03-24 1
a 2014-03-25 0
a 2014-03-26 1
Resulting row
UniqueId Date AvgValue
........ .... ........
a 2014-03-26 2
First off I am not even sure this is possible. I'm am trying to attack a problem with this data at hand. I thought maybe using a framing window with a partition to roll the dates into one date with the averaged result, but am not exactly sure how to say that in SQL.

Am taking following as sample
CREATE TABLE some_data1 (unique_id text, date date, value integer);
INSERT INTO some_data1 (unique_id, date, value) VALUES
( 'a', '2014-03-20', 2),
( 'a', '2014-03-21', 2),
( 'a', '2014-03-22', 3),
( 'a', '2014-03-23', 5),
( 'a', '2014-03-24', 1),
( 'a', '2014-03-25', 0),
( 'a', '2014-03-26', 1),
( 'b', '2014-03-01', 1),
( 'b', '2014-03-02', 1),
( 'b', '2014-03-03', 1),
( 'b', '2014-03-04', 1),
( 'b', '2014-03-05', 1),
( 'b', '2014-03-06', 1),
( 'b', '2014-03-07', 1)
OPTION A : - Using PostgreSQL Specific Function WITH
with cte as (
select unique_id
,max(date) date
from some_data1
group by unique_id
)
select max(sd.unique_id),max(sd.date),avg(sd.value)
from some_data1 sd inner join cte using(unique_id)
where sd.date <=cte.date
group by cte.unique_id
limit 7
> SQLFIDDLE DEMO
OPTION B : - To work in PostgreSQL and MySQL
select max(sd.unique_id)
,max(sd.date)
,avg(sd.value)
from (
select unique_id
,max(date) date
from some_data1
group by unique_id
) cte inner join some_data1 sd using(unique_id)
where sd.date <=cte.date
group by cte.unique_id
limit 7
> SQLFDDLE DEMO

Maybe something along the lines of SELECT AVG(Value) AS 'AvgValue' FROM tableName WHERE Date BETWEEN dateStart AND dateEnd That will get you the average between those dates and you have dateEnd already so you could use that result to create the row you're looking for.

For PostgreSQL a window function might be what you want:
DROP TABLE IF EXISTS some_data;
CREATE TABLE some_data (unique_id text, date date, value integer);
INSERT INTO some_data (unique_id, date, value) VALUES
( 'a', '2014-03-20', 2),
( 'a', '2014-03-21', 2),
( 'a', '2014-03-22', 3),
( 'a', '2014-03-23', 5),
( 'a', '2014-03-24', 1),
( 'a', '2014-03-25', 0),
( 'a', '2014-03-26', 1),
( 'a', '2014-03-27', 3);
WITH avgs AS (
SELECT unique_id, date,
avg(value) OVER w AS week_avg,
count(value) OVER w AS num_days
FROM some_data
WINDOW w AS (
PARTITION BY unique_id
ORDER BY date
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW))
SELECT unique_id, date, week_avg
FROM avgs
WHERE num_days=7
Result:
unique_id | date | week_avg
-----------+------------+--------------------
a | 2014-03-26 | 2.0000000000000000
a | 2014-03-27 | 2.1428571428571429
Questions include:
What happens if a day from the preceding six days is missing? Do we want to add it and count it as zero?
What happens if you add a day? Is the result of the code above what you want (a rolling 7-day average)?

For SQL Server, you can follow the below approach. Try this
1. For weekly value's average
SET DATEFIRST 4
;WITH CTE AS
(
SELECT *,
DATEPART(WEEK,[DATE])WK,
--Find last day in that week
ROW_NUMBER() OVER(PARTITION BY UNIQUEID,DATEPART(WEEK,[DATE]) ORDER BY [DATE] DESC) RNO,
-- Find average value of that week
AVG(VALUE) OVER(PARTITION BY UNIQUEID,DATEPART(WEEK,[DATE])) AVGVALUE
FROM DATETAB
)
SELECT UNIQUEID,[DATE],AVGVALUE
FROM CTE
WHERE RNO=1
Click here to view result
2. For last 7 days value's average
DECLARE #DATE DATE = '2014-03-26'
;WITH CTE AS
(
SELECT UNIQUEID,[DATE],VALUE,#DATE CURRENTDATE
FROM DATETAB
WHERE [DATE] BETWEEN DATEADD(DAY,-7,#DATE) AND #DATE
)
SELECT UNIQUEID,CURRENTDATE [DATE],AVG(VALUE) AVGVALUE
FROM CTE
GROUP BY UNIQUEID,CURRENTDATE
Click here to view result

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

Find missing months in SQL - sql

Related

SQL fill next date (month) with loop

Calculate inactive customers from single table

SQL Selecting the earliest date from many within a row

SQL SUM up date ranges that collide on a group by

Get average of last 7 days

Categories

Resources