SQL query for date intervals comparing non-adjacent rows? - sql

I want to flag the first date in every window of at least 31 days for each ID unit in my data.
ROW ID INDEX_DATE
1 ABC 1/1/2019
2 ABC 1/7/2019
3 ABC 1/21/2019
4 ABC 2/2/2019
5 ABC 2/9/2019
6 ABC 3/6/2019
7 DEF 1/5/2019
8 DEF 2/1/2019
9 DEF 2/8/2019
The desired rows are 1, 4, 6, 7 and 9; these are either the first INDEX_DATE for the given ID, or they occur at least 31 days after the previously flagged INDEX_DATE. Every suggestion I have found uses LAG() or LEAD with window functions, but I could only get these to compare adjacent rows. Row 4, for example, needs to be compared to Row 1 in order to be identified as the first after a 31-day window has completed.
I tried the following:
Data
DROP TABLE tTest IF EXISTS;
CREATE TEMP TABLE tTest
(
ROWN INT,
ID VARCHAR(3),
INDEX_DATE DATE
) ;
GO
INSERT INTO tTEST VALUES (1, 'ABC', '1/1/2019');
INSERT INTO tTEST VALUES (2, 'ABC', '1/7/2019');
INSERT INTO tTEST VALUES (3, 'ABC', '1/21/2019');
INSERT INTO tTEST VALUES (4, 'ABC', '2/2/2019');
INSERT INTO tTEST VALUES (5, 'ABC', '2/9/2019');
INSERT INTO tTEST VALUES (6, 'ABC', '3/6/2019');
INSERT INTO tTEST VALUES (7, 'DEF', '1/5/2019');
INSERT INTO tTEST VALUES (8, 'DEF', '2/1/2019');
INSERT INTO tTEST VALUES (9, 'DEF', '2/8/2019');
GO
Query:
DROP TABLE TTEST2 IF EXISTS;
CREATE TEMP TABLE TTEST2 AS (
WITH
RN_CTE(ROWN, ID, INDEX_DATE, RN) AS (
SELECT *, ROW_NUMBER() OVER (PARTITION BY ID ORDER BY INDEX_DATE)
FROM tTEST),
MIN_CTE(ROWN, ID, INDEX_DATE, RN) AS (SELECT * FROM RN_CTE WHERE RN=1),
DIFF_CTE(ROWN,ID, INDEX_DATE, RN, DAY_DIFF) AS (
SELECT RN.*, DATE(RN.INDEX_DATE + INTERVAL '30 DAYS')
FROM RN_CTE AS RN
JOIN MIN_CTE AS MC ON RN.ID=MC.ID
WHERE RN.RN=1
OR RN.INDEX_DATE > MC.INDEX_DATE + INTERVAL '30 DAYS' ),
MIN_DIFF_CTE AS (
SELECT ID, DAY_DIFF, MIN(ROWN) AS MIN_ROW
FROM DIFF_CTE
GROUP BY ID, DAY_DIFF)
SELECT T.*
FROM MIN_DIFF_CTE AS MDC
JOIN tTEST AS T ON MDC.MIN_ROW = T.ROWN
ORDER BY ID, INDEX_DATE
);
Result:
SELECT * FROM TTEST2 ORDER BY ID, INDEX_DATE;
ROWN ID INDEX_DATE
1 ABC 2019-01-01
4 ABC 2019-02-02
5 ABC 2019-02-09
6 ABC 2019-03-06
7 DEF 2019-01-05
9 DEF 2019-02-08
Row 5 with INDEX_DATE = 2019-02-09 should not be in the output because it is less than 31 days after Row 4's INDEX_DATE.

Something like this. The CTE's locate the unique window of the minimum ROW value for each ID transition and 31 days rolling too.
Data
drop table if exists #tTEST;
go
select * INTO #tTEST from (values
(1, 'abc', '1/1/2019'),
(2, 'abc', '1/7/2019'),
(3, 'abc', '1/21/2019'),
(4, 'abc', '2/2/2019'),
(5, 'abc', '2/9/2019'),
(6, 'abc', '3/6/2019'),
(7, 'def', '1/5/2019'),
(8, 'def', '2/1/2019'),
(9, 'def', '2/8/2019')) V([ROW], ID, INDEX_DATE);
Query
;with
rn_cte([ROW], ID, INDEX_DATE, rn) as (
select *, row_number() over (partition by ID order by INDEX_DATE)
from #tTEST),
min_cte([ROW], ID, INDEX_DATE, rn) as (select * from rn_cte where rn=1),
diff_cte([ROW], ID, INDEX_DATE, rn, day_diff) as (
select rn.*, datediff(d, mc.INDEX_DATE, rn.INDEX_DATE)/31
from rn_cte rn
join min_cte mc on rn.ID=mc.ID
where rn.rn=1
or datediff(d, mc.INDEX_DATE, rn.INDEX_DATE)/31>0),
min_diff_cte as (
select ID, day_diff, min([ROW]) min_row
from diff_cte
group by ID, day_diff)
select t.*
from min_diff_cte mdc
join #tTEST t on mdc.min_row=t.ROW
order by 1;
Output
ROW ID INDEX_DATE
1 abc 1/1/2019
4 abc 2/2/2019
6 abc 3/6/2019
7 def 1/5/2019
9 def 2/8/2019

Related

How can distribute one table data to another table

I have two tables Orderapproval and stock:
Table 1: Orderapproval
itemcode
approvalqty
1
25
2
10
Table 2: stock
itemcode
stockqty
batch
date
1
5
aa
2021-02-01
1
10
bb
2021-02-10
1
15
cc
2021-02-02
2
5
dd
2021-02-01
2
20
aa
2021-02-05
we have stock batch-wise.
when we get an order request for an item
we are trying to pick up items from the old batch first & so on
How can I get a result set like this:
itemcode
qty
batch
date
1
5
aa
2021-02-01
1
15
cc
2021-02-02
1
5
bb
2021-02-10
2
5
dd
2021-02-01
I am trying as......
Declare #StockTable table
(ItemCode int,BranchCode int,Qty int)
Insert Into #StockTable values
(1, 101, 5),
(1, 102, 10),
(1, 101, 15),
(2, 102, 5),
(2, 103, 20)
Declare #Orderapproval table (ItemCode int,Qty int)
Insert Into #Orderapproval values
(1,25),
(2,10)
;with T1 as (
Select A.*
,ToDist = cast(D.Qty as int),
running_total = SUM(D.Qty) OVER (PARTITION BY A.ItemCode,BranchCode ORDER BY BranchCode DESC
ROWS BETWEEN UNBOUNDED PRECEDING
AND CURRENT ROW)
From #StockTable A
Join #Orderapproval D on A.ItemCode=D.ItemCode )
, T2 as (
Select *, prev_running_total = LAG(Qty,1,0) OVER (PARTITION BY ItemCode ORDER BY BranchCode DESC) From T1
)
select *,
CASE WHEN prev_running_total >= ToDist THEN 0
WHEN running_total > ToDist THEN ToDist - prev_running_total
ELSE qty
END 'qty'
from T2;
I didn't get the right output.
Please suggest a better way
Try this:
SELECT s.itemcode
, s.stockqty AS qty
, s.batch
, s.date
FROM stock AS s
INNER JOIN Orderapproval AS oa
ON s.stockqty <= oa.approvalqty
If required, insert resultset into new table
I try it on SQLite 3.30
Schema
CREATE TABLE stock (
id INT,
itemcode INT,
stockqty INT,
batch CHAR,
date DATE
);
INSERT INTO stock (id, itemcode, stockqty, batch, date) VALUES (1, 1, 5, 'aa', '2021-02-01');
INSERT INTO stock (id, itemcode, stockqty, batch, date) VALUES (2, 1, 10, 'bb', '2021-02-10');
INSERT INTO stock (id, itemcode, stockqty, batch, date) VALUES (3, 1, 15, 'cc', '2021-02-02');
INSERT INTO stock (id, itemcode, stockqty, batch, date) VALUES (4, 2, 5, 'dd', '2021-02-01');
INSERT INTO stock (id, itemcode, stockqty, batch, date) VALUES (5, 2, 20, 'aa', '2021-02-05');
CREATE TABLE orderapproval (
id INT,
itemcode INT,
approvalqty INT
);
INSERT INTO orderapproval (id, itemcode, approvalqty) VALUES (1, 1, 25);
INSERT INTO orderapproval (id, itemcode, approvalqty) VALUES (2, 2, 10);
Query
with a as ( SELECT *, sum(stockqty) over (partition by itemcode order by date ) as acc FROM stock)
select a.itemcode, case when acc<=approvalqty then stockqty else approvalqty-(acc-stockqty) end as qty, batch, date from a
inner join orderapproval o
on a.itemcode = o.itemcode
where qty > 0
order by a.itemcode, date
Result
itemcode
qty
batch
date
1
5
aa
2021-02-01
1
15
cc
2021-02-02
1
5
bb
2021-02-10
2
5
dd
2021-02-01
2
5
aa
2021-02-05
db-fiddle
SQL CODE---(Tested)
Select stock.itemcode
,stock.stockqty AS qty
,stock.batch,stock.date
from stock
join orderapproval on stock.stockqty <= orderapproval.approvalgty
group by stock.batch;

SQL Occurrence of Sequence Number

I want to find if any Name has straight 4 or more occurrences of SeqNo in consecutive sequence only.
If there is a break in seqNo but 4 or more rows are consecutive then also i need that Name.
Example:
SeqNo Name
10 | A
15 | A
16 | A
17 | A
18 | A
9 | B
10 | B
13 | B
14 | B
6 | C
7 | C
9 | C
10 | C
OUTPUT:
A
BELOW IS SCRIPT FOR ANYONE HELPING.
create table testseq (Id int, Name char)
INSERT into testseq values
(10, 'A'),
(15, 'A'),
(16, 'A'),
(17, 'A'),
(18, 'A'),
(9, 'B'),
(10, 'B'),
(13, 'B'),
(14, 'B'),
(6, 'C'),
(7, 'C'),
(9, 'C'),
(10, 'C')
SELECT * FROM testseq
You can use some gaps-and-islands techniques for this.
If you want names that have at least 4 consecutive records where seqno is increasing by 1, then you can use the difference between seqno androw_number()` to define the groups, and then aggregate:
select distinct name
from (
select t.*, row_number() over(partition by name order by seqno) rn
from testseq t
) t
group by name, rn - seqno
having count(*) >= 4
Note that for your sample data, this returns no rows. A has 3 consecutive records where seqno is incrementing by 1, B and C have two.
I don't really view this as a "gaps-and-islands" problem. You are just looking for a minimum number of adjacent rows. This is easily handled using lag() or lead():
select t.*
from (select t.*,
lead(seqno, 3) over (partition by name order by seqno) as seqno_name_3
from t
) t
where seqno_name_3 = seqno + 3;
This checks the third sequence number on the same name. The third one after means that four names are the same in a row.
If you just want the name and to handle duplicates:
select distinct name
from (select t.*,
lead(seqno, 3) over (partition by name order by seqno) as seqno_name_3
from t
) t
where seqno_name_3 = seqno + 3;
If the sequence numbers can have gaps (but are otherwise adjacent):
select distinct name
from (select t.*,
lead(seqno, 3) over (partition by name order by seqno) as seqno_name_3,
lead(seqno, 3) over (order by seqno) as seqno_3
from t
) t
where seqno_name_3 = seqno_3;
A solution in plain SQL, no LAG() or LEAD() or ROW_NUMBER():
SELECT t1.Name
FROM testseq t1
WHERE (
SELECT count(t2.Id)
FROM testseq t2
WHERE t2.Name=t1.Name
and t2.Id between t1.Id and t1.Id+3
GROUP BY t2.Name)>=4
GROUP BY t1.Name;

I need use CTE with two tables

I have 2 tables and I need to use a CTE.
I need a set of rows of Table_2 on rows from Table_1, such that dActiveDate is biggest in table_1, and table_1.dActiveDate <= table_2.dDateFactor for table_1.dcidKala = table_2.dcidKala.
CREATE TABLE #Table_1
(
dcidKala INT,
dcPercentDiscount FLOAT,
dActiveDate DATE
)
CREATE TABLE #Table_2
(
dcRow INT,
dcidKala INT,
dcNum FLOAT,
dDateFactor DATE
)
INSERT INTO #Table_1
(
dcidKala,
dcPercentDiscount,
dActiveDate
)
VALUES
(109,10,'2017-08-23' ),
(109, 15, '2017-10-12'),
(100, 20, '2017-01-20'),
(102, 20, '2017-01-20')
INSERT INTO #Table_2
(
dcRow,
dcidKala,
dcNum,
dDateFactor
)
VALUES
( 1,109,1, '2017-10-05' ),
(2, 109, 2, '2017-10-07'),
(3, 109, 1, '2017-10-14'),
(4, 109, 5, '2017-10-19'),
(5, 100, 2, '2017-01-25')
;WITH cte AS
(
SELECT th.dcPercentDiscount,
tb.dcRow,
ROW_NUMBER() OVER(PARTITION BY th.dcidKala, tb.dcRow ORDER BY th.dActiveDate) AS
rn
FROM #Table_1 th
INNER JOIN #Table_2 tb
ON tb.dcidKala = th.dcidKala
AND tb.dDateFactor >= th.dActiveDate
)
SELECT *
FROM #Table_2 t2
LEFT JOIN cte t3
ON t2.dcRow = t3.dcRow
AND t3.rn = 1
DROP TABLE [#Table_1]
DROP TABLE [#Table_2]
--result myCode is:
--1 109 1 2017-10-05 10 1 1
--2 109 2 2017-10-07 10 2 1
--3 109 1 2017-10-14 10 3 1
--4 109 5 2017-10-19 10 4 1
--5 100 2 2017-01-25 20 5 1
--Rows 3 and Rows 4 is wrong
--i need this result :
-- on Result From table_1 on table_2
--1 109 1 2017-10-05 10 1 1
--2 109 2 2017-10-07 10 2 1
--3 109 1 2017-10-14 15 3 1
--4 109 5 2017-10-19 15 4 1
--5 100 2 2017-01-25 20 5 1
For each row in table_2 just one result from table_1 that is biggest on
dActiveDate and smaller dDateFactor
Please help me
thank you
You can use this.
;WITH cte AS
(
SELECT
th.dcPercentDiscount, tb.dcRow,
ROW_NUMBER() OVER(PARTITION BY th.dcidKala, tb.dcRow ORDER BY th.dActiveDate DESC) AS rn
FROM
Table_1 th
INNER JOIN
Table_2 tb ON tb.dcidKala = th.dcidKala
AND tb.dDateFactor >= th.dActiveDate
)
SELECT *
FROM Table_2 t2
LEFT JOIN cte t3 ON t2.dcRow = t3.dcRow and t3.rn=1
CREATE TABLE #Table_1 (dcidKala INT,
dcPercentDiscount FLOAT,
dActiveDate DATE)
CREATE TABLE #Table_2 (dcRow INT,
dcidKala INT,
dcNum FLOAT,
dDateFactor DATE)
INSERT INTO #Table_1 (dcidKala,
dcPercentDiscount,
dActiveDate)
VALUES (100, 10, '2017-01-01'),
(101, 15, '2017-01-02'),
(100, 20, '2017-01-20'),
(102, 20, '2017-01-20')
INSERT INTO #Table_2 (dcRow,
dcidKala,
dcNum,
dDateFactor)
VALUES (1, 100, 1, '2017-01-05'),
(2, 100, 2, '2017-01-09'),
(3, 101, 1, '2017-01-01'),
(4, 101, 5, '2017-01-20'),
(5, 100, 2, '2017-01-25')
SELECT *
FROM [#Table_2] AS [t2]
CROSS APPLY ( SELECT TOP 1 *
FROM ( SELECT TOP 1 [t1].[dcidKala],
[t1].[dcPercentDiscount],
[t1].[dActiveDate]
FROM [#Table_1] AS [t1]
WHERE [t1].[dcidKala] = [t2].[dcidKala]
AND [t2].[dDateFactor] >= [t1].[dActiveDate]
ORDER BY [t1].[dActiveDate] DESC
UNION ALL
SELECT NULL,
NULL,
NULL) t3
ORDER BY CASE
WHEN [t2].[dcidKala] IS NOT NULL THEN 0
ELSE 1
END) AS t1
DROP TABLE [#Table_1]
DROP TABLE [#Table_2]

Select start date and end date form records with subsequent date field in SQL Server 2008 R2

I have a table in SQL Server 2008 R2 called ReserveLog. This is an existing table that stores the reserve date of each room in a complex.
It is like this:
RoomNumber ReserveDate
----------------------
1 2017-07-01
1 2017-07-02
1 2017-07-03
1 2017-07-06
1 2017-07-07
1 2017-07-08
2 2017-01-02
2 2017-01-03
2 2017-01-04
2 2017-01-09
2 2017-01-10
I want to query this table so that I get the following result:
RoomNumber ReserveStartDate ReserveEndDate
------------------------------------------
1 2017-07-01 2017-07-03
1 2017-07-06 2017-07-08
2 2017-07-02 2017-07-04
2 2017-07-09 2017-07-10
Is it possible? I can't make my mind how to do it. Any help is appreciated in advance
create table #reservs
(
roomnumber INT, ReserveDate DATE
)
INSERT INTO #reservs VALUES (1, '2017-07-01');
INSERT INTO #reservs VALUES (1, '2017-07-02');
INSERT INTO #reservs VALUES (1, '2017-07-03');
INSERT INTO #reservs VALUES (1, '2017-07-06');
INSERT INTO #reservs VALUES (1, '2017-07-07');
INSERT INTO #reservs VALUES (1, '2017-07-08');
INSERT INTO #reservs VALUES (2, '2017-01-02');
INSERT INTO #reservs VALUES (2, '2017-01-03');
INSERT INTO #reservs VALUES (2, '2017-01-04');
INSERT INTO #reservs VALUES (2, '2017-01-09');
INSERT INTO #reservs VALUES (2, '2017-01-10');
select roomnumber, MIN(reservedate) as mn, MAX(reservedate) as mx
FROM (
SELECT *
, DATEDIFF(day, ROW_NUMBER() OVER(partition by roomnumber order by reservedate) ,reservedate) as ind
FROM #reservs
) a
group by roomnumber, ind
order by 1, 2
Try this using common table expressions, comments in line and SQL Fiddle link:
SQL Fiddle
create table Reservelog
(
RoomNumber INT,
ReserveDate Date
)
INSERT INTO ReserveLog
VALUES
(1, '2017-07-01'),
(1, '2017-07-02'),
(1, '2017-07-03'),
(1, '2017-07-06'),
(1, '2017-07-07'),
(1, '2017-07-08'),
(2, '2017-01-02'),
(2, '2017-01-03'),
(2, '2017-01-04'),
(2, '2017-01-09'),
(2, '2017-01-10')
Query 1:
;WITH CTE
As
(
SELECT *,
(
-- Get Previous Reserve Date for this room
SELECT TOP 1 ReserveDate
FROM ReserveLog R2
WHERE R1.RoomNumber = R2.RoomNumber AND
R1.ReserveDate > R2.ReserveDate
ORDER BY ReserveDate DESC
) As PrevReserveDate,
(
-- Get NExt ReserveDate For this room
SELECT TOP 1 ReserveDate
FROM ReserveLog R2
WHERE R1.RoomNumber = R2.RoomNumber AND
R1.ReserveDate < R2.ReserveDate
ORDER BY ReserveDate
) As NextReserveDate
FROM ReserveLog R1
),
CTE2
AS
(
SELECT *,
CASE
WHEN PrevReserveDate IS NULL OR
DATEDIFF(D, PrevReserveDate, ReserveDate ) > 1
THEN 1 -- Flag as a StartDate
ELSE 0
END As DateStart,
CASE
WHEN NextReserveDate IS NULL OR
DATEDIFF(D, ReserveDate, NExtReserveDate) > 1
THEN 1 -- Flag as an end date
ELSE 0
END As DateEnd,
ROW_NUMBER() OVER
(PARTITION BY RoomNumber ORDER BY ReserveDate) AS RN
FROM CTE
-- only select rows which have no previous or next reservation or
-- ones where the difference between consecutive reservations > 1 day
WHERE PrevReserveDate IS NULL OR
NextReserveDate IS NULL OR
DATEDIFF(D, PrevReserveDate, ReserveDate ) > 1 OR
DATEDIFF(D, ReserveDate, NExtReserveDate) > 1
)
SELECT startRows.RoomNumber,
startRows.ReserveDate As ReserveStartDate,
endRows.ReserveDate As ReserveEndDate
FROM CTE2 startRows
INNER JOIN CTE2 endRows
ON startRows.RN + 1 = endRows.RN AND
startRows.RoomNumber = endRows.RoomNumber AND
endRows.DateEnd = 1
WHERE startRows.DateStart = 1
Results:
| RoomNumber | ReserveStartDate | ReserveEndDate |
|------------|------------------|----------------|
| 1 | 2017-07-01 | 2017-07-03 |
| 1 | 2017-07-06 | 2017-07-08 |
| 2 | 2017-01-02 | 2017-01-04 |
| 2 | 2017-01-09 | 2017-01-10 |
Use this query:
SELECT * FROM R2 WHERE ReserveDate between ('2017-07-01 ' AND '2017-07-03');

Finding Missing Numbers When Data Is Grouped In SQL Server

I need to to write a query that will calculate the missing numbers in a sequence when the data is "grouped". The data in each group is in sequence, but each individual group would have its own sequence. The data would look something like this:
Id| Number|
-----------
1 | 250 |
1 | 270 | <260 Missing
1 | 280 | <290 Missing
1 | 300 |
1 | 310 |
2 | 110 |
2 | 130 | <120 Missing
2 | 140 |
3 | 260 |
3 | 270 |
3 | 290 | <280 Missing
3 | 300 |
3 | 340 | <310, 320 & 330 Missing
I have found a solution based on this post from CELKO here:
http://bytes.com/topic/sql-server/answers/511668-query-find-missing-number
In essence to set up a demo run the following:
CREATE TABLE Sequence
(seq INT NOT NULL
PRIMARY KEY (seq));
INSERT INTO Sequence VALUES (1);
INSERT INTO Sequence VALUES (2);
INSERT INTO Sequence VALUES (3);
INSERT INTO Sequence VALUES (4);
INSERT INTO Sequence VALUES (5);
INSERT INTO Sequence VALUES (6);
INSERT INTO Sequence VALUES (7);
INSERT INTO Sequence VALUES (8);
INSERT INTO Sequence VALUES (9);
INSERT INTO Sequence VALUES (10);
CREATE TABLE Tickets
(buyer CHAR(5) NOT NULL,
ticket_nbr INTEGER DEFAULT 1 NOT NULL
PRIMARY KEY (buyer, ticket_nbr));
INSERT INTO Tickets VALUES ('a', 2);
INSERT INTO Tickets VALUES ('a', 3);
INSERT INTO Tickets VALUES ('a', 4);
INSERT INTO Tickets VALUES ('b', 4);
INSERT INTO Tickets VALUES ('c', 1);
INSERT INTO Tickets VALUES ('c', 2);
INSERT INTO Tickets VALUES ('c', 3);
INSERT INTO Tickets VALUES ('c', 4);
INSERT INTO Tickets VALUES ('c', 5);
INSERT INTO Tickets VALUES ('d', 1);
INSERT INTO Tickets VALUES ('d', 6);
INSERT INTO Tickets VALUES ('d', 7);
INSERT INTO Tickets VALUES ('d', 9);
INSERT INTO Tickets VALUES ('e', 10);
SELECT DISTINCT T1.buyer, S1.seq
FROM Tickets AS T1, Sequence AS S1
WHERE seq <= (SELECT MAX(ticket_nbr) -- set the range
FROM Tickets AS T2
WHERE T1.buyer = T2.buyer)
AND seq NOT IN (SELECT ticket_nbr -- get missing numbers
FROM Tickets AS T3
WHERE T1.buyer = T3.buyer);
CELKO does mention that this is for a small number of tickets, in my example my numbers table is limited to 200 rows with a single column which is a primary key with each row an increment of 10 as that is what I am interested in. I modified CELKOs query as follows (added in min range):
SELECT DISTINCT T1.buyer, S1.seq
FROM Tickets AS T1, Sequence AS S1
WHERE seq <= (SELECT MIN(ticket_nbr) -- set the MIN range
FROM Tickets AS T2
WHERE T1.buyer = T2.buyer)
AND seq <= (SELECT MAX(ticket_nbr) -- set the MAX range
FROM Tickets AS T2
WHERE T1.buyer = T2.buyer)
AND seq NOT IN (SELECT ticket_nbr -- get missing numbers
FROM Tickets AS T3
WHERE T1.buyer = T3.buyer)
ORDER BY buyer, seq;
The output would be those numbers that are missing:
buyer seq
a 1
b 1
b 2
b 3
e 1
e 2
e 3
e 4
e 5
e 6
e 7
e 8
e 9
This works exactly as I want, however, on my data set it is very slow (11 second run time at the moment - it appears to be the DISTINCT which slows things down tremendously and presumably will gt worse as the base data set grows). I have tried all manner of things to make it more efficient but sadly my ambition exceeds my knowledge. Is it possible to make the query above more efficient/faster. My only constraint is that the dataset I am making needs to be a SQL View (as it feeds a report) and will execute on SQL Azure.
Cheers
David
If my understanding is correct, you want to fill in the missing data from the table. The table would consist of ID and a Number which is incremented by 10.
CREATE TABLE Test(
ID INT,
Number INT
)
INSERT INTO Test VALUES
(1, 250), (1, 270), (1, 280), (1, 300), (1, 310),
(2, 110), (2, 130), (2, 140), (3, 260), (3, 270),
(3, 290), (3, 300), (3, 340);
You could do this by using a Tally Table and doing a CROSS JOIN on the Test table:
;WITH E1(N) AS(
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
)
,E2(N) AS(SELECT 1 FROM E1 a, E1 b)
,E4(N) AS(SELECT 1 FROM E2 a, E2 b)
,Tally(N) AS(
SELECT TOP (SELECT MAX(Number)/10 FROM Test)
(ROW_NUMBER() OVER(ORDER BY(SELECT NULL)) - 1) * 10
FROM E4
),
MinMax AS(
SELECT
ID,
Minimum = MIN(Number),
Maximum = MAX(Number)
FROM Test
GROUP BY ID
),
CrossJoined AS(
SELECT
m.ID,
Number = Minimum + t.N
FROM MinMax m
CROSS JOIN Tally t
WHERE
Minimum + t.N <= Maximum
)
SELECT * FROM CrossJoined c
ORDER BY c.ID, c.Number
RESULT
ID Seq
----------- --------------------
1 250
1 260
1 270
1 280
1 290
1 300
1 310
2 110
2 120
2 130
2 140
3 260
3 270
3 280
3 290
3 300
3 310
3 320
3 330
3 340
If you only want to find the missing Number from Test grouped by ID, just replace the final SELECT statement:
SELECT * FROM CrossJoined c
ORDER BY c.ID, c.Number
to:
SELECT c.ID, c.Number
FROM CrossJoined c
WHERE NOT EXISTS(
SELECT 1 FROM Test t
WHERE
t.ID = c.ID
AND t.Number = c.Number
)
ORDER BY c.ID, c.Number
RESULT
ID Number
----------- --------------------
1 260
1 290
2 120
3 280
3 310
3 320
3 330