Select nearest date on the basis of ID in SQL - sql

I have two table Dispense & Pro Table, I want to select all rows from Dispense table and nearest Shipment date from Pro table.
Dispense Table
ID Dispense date Row ID
604743 10/18/2016 1
604743 11/4/2016 2
604743 11/28/2016 3
604743 12/16/2016 4
Pro table
ID Shipment Date Row ID
604743 11/1/2016 1
604743 11/19/2016 2
604743 11/21/2016 3
604743 11/28/2016 4
604743 12/13/2016 5
Output Needed
ID Dispense date Pre Ship date
604743 10/18/2016 NULL
604743 11/4/2016 11/1/2016
604743 11/28/2016 11/19/2016
604743 12/16/2016 12/13/2016
Reason for NULL: Because less than 10/18 dispense date no nearest date in Shipping date in Pro Table

I tried with recursive CTE, but they don't all to use the anchor in a subquery.
So a while-loop is the best I could do.
The script allows for several Dispense or Pro at same date, see the comments on which part you can throw out if that is not allowed by constraints.
--- Dispense_Date must not be unique
declare #Dispense table(
ID int not null,
Dispense_Date date not null,
Row_ID int not null identity primary key
)
-- Shipment_Date must not be unique
declare #Pro table(
ID int not null,
Shipment_Date date not null,
Row_ID int not null identity primary key
)
declare #Result table(
ID int not null,
Dispense_Date date not null,
Dispense_Row_ID int not null unique,
Shipment_Date date not null,
Pro_Row_ID int not null unique,
dayDiff int not null,
Row_ID int not null identity primary key,
iter int not null
)
insert into #Dispense(ID, Dispense_Date)
values (604743, '10/18/2016'),
(604743, '11/4/2016'),
/* (604743, '11/26/2016'),
(604743, '11/27/2016'),
(604743, '11/27/2016'),
(604743, '11/28/2016'), */
(604743, '11/28/2016'),
(604743, '12/16/2016')
insert into #Pro(ID, Shipment_Date)
values (604743, '11/1/2016'),
/* (604743, '11/16/2016'),
(604743, '11/19/2016'), */
(604743, '11/19/2016'),
(604743, '11/21/2016'),
(604743, '11/28/2016'),
(604743, '12/13/2016')
declare #iter int = 0
while exists(
select 1
from #Dispense Dispense
inner join
#Pro Pro
on Pro.ID = Dispense.ID
and
Pro.Shipment_Date < Dispense.Dispense_Date
where not exists(
select 1
from #Result Result
where Result.Dispense_Row_ID = Dispense.Row_ID
or
Result.Pro_Row_ID = Pro.Row_ID
)
)
begin
set #iter = #iter + 1
;
with distance(
ID, Dispense_Row_ID, Dispense_Date, Pro_Row_ID, Shipment_Date, dayDiff
) as(
select Dispense.ID,
Dispense.Row_ID Dispense_Row_ID,
Dispense.Dispense_Date,
Pro.Row_ID Pro_Row_ID,
Pro.Shipment_Date,
DATEDIFF(DAY, Pro.Shipment_Date, Dispense.Dispense_Date) dayDiff
from #Dispense Dispense
inner join
#Pro Pro
on Pro.ID = Dispense.ID
and
Pro.Shipment_Date < Dispense.Dispense_Date
where not exists(
select 1
from #Result Result
where Result.Dispense_Row_ID = Dispense.Row_ID
or
Result.Pro_Row_ID = Pro.Row_ID
)
)
insert into #Result(ID, Dispense_Row_ID, Dispense_Date, Pro_Row_ID, Shipment_Date, daydiff, iter)
select Dispense.ID,
Dispense.Row_ID Dispense_Row_ID,
Dispense.Dispense_Date,
distance.Pro_Row_ID,
distance.Shipment_Date,
distance.dayDiff,
#iter
from #Dispense Dispense
inner join
distance
on distance.Dispense_Row_ID = Dispense.Row_ID
and
not exists(
select 1
from distance dtExists
where dtExists.ID = distance.ID
and
dtExists.Shipment_Date = distance.Shipment_Date
and
(
dtExists.dayDiff < distance.dayDiff
-- below OR not needed if Dispense_Date and Shipment_Date are unique
or
(
dtExists.dayDiff = distance.dayDiff
and
(
dtExists.Pro_Row_ID < distance.Pro_Row_ID
or
(
dtExists.Pro_Row_ID = distance.Pro_Row_ID
and
dtExists.Dispense_Row_ID < distance.Dispense_Row_ID
)
)
)
)
)
and
not exists(
select 1
from distance dtExists
where dtExists.ID = distance.ID
and
dtExists.Dispense_Date = distance.Dispense_Date
and
(
dtExists.dayDiff < distance.dayDiff
-- below OR not needed if Dispense_Date and Shipment_Date are unique
or
(
dtExists.dayDiff = distance.dayDiff
and
(
dtExists.Pro_Row_ID < distance.Pro_Row_ID
or
(
dtExists.Pro_Row_ID = distance.Pro_Row_ID
and
dtExists.Dispense_Row_ID < distance.Dispense_Row_ID
)
)
)
)
)
end
select Dispense.ID,
Dispense.Row_ID Dispense_Row_ID,
Dispense.Dispense_Date,
Result.Pro_Row_ID,
Result.Shipment_Date,
Result.dayDiff,
Result.iter
from #Dispense Dispense
left join
#Result Result
on Result.Dispense_Row_ID = Dispense.Row_ID
order by Dispense.ID,
Dispense.Dispense_Date,
Result.Shipment_Date,
Result.Dispense_Row_ID,
Result.Pro_Row_ID

Related

Get Results based on the latest date in a Joined table

I have 2 tables tbl_job & tbl_appointment
I want to get all active jobs starting with job_refrence = '1%' also the latest active appointment.created_when and appointment_type associated with it ordered by the appointment.created_when
A job can have many appointments
If all the appointments associated with the job has deleted_flag = 1
then the resultset should return tbl_job.created-when as the
latest_appointment_date
CREATE TABLE [dbo].[tbl_job]
(
[job_id] UNIQUEIDENTIFIER NOT NULL,
[account_id] INT NOT NULL,
[description] NVARCHAR(1024) NULL,
[deleted_flag] BIT NOT NULL,
[created_when] DATETIME2 (6) NOT NULL,
[job_reference] NVARCHAR(15) NOT NULL
)
CREATE TABLE [dbo].[tbl_appointment]
(
[appointment_id] UNIQUEIDENTIFIER NOT NULL,
[job_id] UNIQUEIDENTIFIER NOT NULL,
[deleted_flag] BIT NOT NULL,
[appointment_type] NVARCHAR(35) NOT NULL,
[created_when] DATETIME2 (6) NOT NULL,
[account_id] INT NULL,
)
insert into dbo.tbl_job (job_id, account_id, [description], deleted_flag, created_when, job_reference) values
('A29A6381-EF0D-47F6-BFC3-051679E343D0', 1, 'descr1', 0, '2020-06-12 00:58:17.7221410', 1 )
,('4D8C1B04-9E00-41FA-BDB8-653C26712144', 1, 'descr2', 0, '2020-06-10 00:58:17.7221410', 12 )
,('F8DC690E-74AB-46F4-90D3-55E032F21C99', 1, 'descr3', 0, '2020-06-26 00:58:17.7221410', 123 )
,('C5D8AA45-FA45-41A4-877D-5B803C1BE61B', 1, 'descr4', 0, '2020-06-27 00:58:17.7221410', 76 )
insert into dbo.tbl_appointment(appointment_id, job_id, deleted_flag, appointment_type, created_when, account_id) Values
('9E24451F-5703-414F-ACF1-9304AFBEA8F1', 'A29A6381-EF0D-47F6-BFC3-051679E343D0', 0, 'job1_cat1', '2020-06-12 00:58:17.7221410', 1)
,('A8121DC1-271E-4BD0-A6AA-D753CF4D310E', 'A29A6381-EF0D-47F6-BFC3-051679E343D0', 0, 'job1_cat2', '2020-06-14 00:58:17.7221410', 1)
,('61ED5B48-DF95-4FC8-AF1D-1418C6DD9088', '4D8C1B04-9E00-41FA-BDB8-653C26712144', 0, 'job2_cat1', '2020-06-15 00:58:17.7221410', 1)
,('0e4fc735-96c3-4cab-8ade-796bae4639d1', 'F8DC690E-74AB-46F4-90D3-55E032F21C99', 1, 'job3_cat1', '2020-06-28 00:58:17.7221410', 1)
Expected ResultSet
job_id job_reference latest_appointment_date appointment_type total_rows
F8DC690E-74AB-46F4-90D3-55E032F21C99 123 2020-06-26 00:58:17.722141 NULL 3
4D8C1B04-9E00-41FA-BDB8-653C26712144 12 2020-06-15 00:58:17.722141 job2_cat1 3
A29A6381-EF0D-47F6-BFC3-051679E343D0 1 2020-06-14 00:58:17.722141 job1_cat2 3
Below query works, but its not an efficient as we have millions of rows in the tables. I would like to replace the OUTER apply with a Left join or some other way to make it more efficient
DECLARE #filtered_jobs TABLE
(
job_domain_id UNIQUEIDENTIFIER
,job_reference NVARCHAR(15)
,job_created_when DATETIME2(6)
,latest_appointment_date DATETIME2(6)
,appointment_type NVARCHAR(35)
);
declare #account_id int = 1
declare #job_reference nvarchar(35) = '1'
declare #offset int = 0
declare #limit int = 10
declare #is_sort_ascending int = 0
INSERT INTO #filtered_jobs (job_domain_id, job_reference, job_created_when, latest_appointment_date,appointment_type)
SELECT
j.job_id
,j.job_reference
,j.created_when
,ap.created_when AS latest_appointment_date
,ap.appointment_type
FROM dbo.tbl_job j
OUTER APPLY (
SELECT TOP (1) ap.appointment_type,ap.created_when,ap.deleted_flag
FROM dbo.tbl_appointment ap
WHERE ap.job_id = j.job_id AND ap.deleted_flag = 0
ORDER BY ap.created_when desc
) ap
WHERE j.account_id = #account_id
AND j.job_reference LIKE (#job_reference + '%')
AND j.deleted_flag = 0
SELECT
fj.job_domain_id
,fj.job_reference
,ISNULL(fj.latest_appointment_date,fj.job_created_when) AS latest_appointment_date
,fj.appointment_type
FROM #filtered_jobs fj
ORDER BY
CASE WHEN #is_sort_ascending = 0 THEN ISNULL(fj.latest_appointment_date,fj.job_created_when) END DESC,
CASE WHEN #is_sort_ascending = 1 THEN ISNULL(fj.latest_appointment_date,fj.job_created_when) END ASC
OFFSET #offset ROWS FETCH NEXT #limit ROWS ONLY;
SELECT COUNT(1) AS total_records
FROM #filtered_jobs;
You could use a ranking function but you need to test whether it is faster:
From the query you posted, replace the insert into #filtered_jobs with these 2:
INSERT INTO #filtered_jobs (job_domain_id, job_reference, job_created_when)
select j.job_id
,j.job_reference
,j.created_when
FROM dbo.tbl_job j
WHERE j.account_id = #account_id
AND j.job_reference LIKE (#job_reference + '%')
AND j.deleted_flag = 0
update f
set latest_appointment_date=x.latest_appointment_date,
appointment_type=x.appointment_type
from #filtered_jobs f
inner join (
select f.job_domain_id
,ap.created_when AS latest_appointment_date
,ap.appointment_type as appointment_type
, rank() over (partition by ap.job_id order by ap.created_when desc) rnk
from #filtered_jobs f
inner join dbo.tbl_appointment ap on ap.job_id = f.job_domain_id
where ap.deleted_flag = 0) x on f.job_domain_id=x.job_domain_id
where x.rnk=1
Make sure you have a index on dbo.tbl_appointment(column job_id, ap.deleted_flag), preferably including (created_when, appointment_type) if this query is ran alot.

SQL - "NOT IN" in WHERE clause using INNER JOIN not working

I need to filter a table based in a sub table data.
I'll exemplify with a hypnotic data to be easier to explain:
Master table: Cars
Sub table: Attributes (like Color, car type, accessories)
These attributes have an id (idOption) and the selected value (idList)
So, in an example, I need to filter all the cars with the color (idOption = 10) yellow (idList = 45). I can't filter this directly because the search need to consider the other option's results (which include the types, accessories.
When I use NOT IN for just one table, it works. But when I use merging the 2 tables with INNER JOIN, it does not work.
So in summary, I need to filter the 3 idOption (when is not NULL) with a given value, and this needs to reflect in the main table, grouped by product.
Table Cars:
idProduct | Description
1 Product A
2 Product B
3 Product C
Table Attributes:
idRow idProduct idOption idList
---------------------------------------
1 1 10 45
2 2 10 46
3 3 10 47
4 1 11 10
5 2 11 98
6 1 14 56
7 3 16 28
8 2 20 55
This is the stored procedure that I created which is not working:
ALTER PROCEDURE [dbo].[SP_GET_TestSearch]
(#Param1 BIGINT = NULL,
#PValue1 BIGINT = NULL,
#Param2 BIGINT = NULL,
#PValue2 BIGINT = NULL,
#Param3 BIGINT = NULL,
#PValue3 BIGINT = NULL)
AS
SET NOCOUNT ON;
SELECT
Cars.idProduct,
Cars.[Description]
FROM
Cars
INNER JOIN
Attributes ON Cars.idProduct = Attributes.idProduct
WHERE
((#Param1 IS NULL OR (idOption NOT IN (#Param1)))
AND
(#Param2 IS NULL OR (idOption NOT IN (#Param2)))
AND
(#Param3 IS NULL OR (idOption NOT IN (#Param3))))
OR
(idOption = ISNULL(#Param1, NULL)
AND idList = ISNULL(#PValue1, NULL))
OR
(idOption = ISNULL(#Param2, NULL)
AND idList = ISNULL(#PValue2, NULL))
OR
(idOption = ISNULL(#Param3, NULL)
AND idList = ISNULL(#PValue3, NULL))
GROUP BY
Cars.idProduct, Cars.[Description]
The following code demonstrates how to implement the logic of excluding vehicles from query results if they have any "bad" property values. The rejection is handled by ... where not exists ... which is used to check each car against the "bad" property values.
Rather than using an assortment of (hopefully) paired parameters to pass the undesirable properties, the values are passed in a table. The stored procedure to implement this ought to use a table-valued parameter (TVP) to pass the table.
-- Sample data.
declare #Cars as Table ( CarId Int Identity, Description VarChar(16) );
insert into #Cars ( Description ) values
( 'Esplanade' ), ( 'Tankigator' ), ( 'Land Yacht' );
select * from #Cars;
declare #Properties as Table ( PropertyId Int Identity, Description VarChar(16) );
insert into #Properties ( Description ) values
( 'Turbochargers' ), ( 'Superchargers' ), ( 'Hyperchargers' ), ( 'Color' ), ( 'Spare Tires' );
select * from #Properties;
declare #CarProperties as Table ( CarId Int, PropertyId Int, PropertyValue Int );
insert into #CarProperties ( CarId, PropertyId, PropertyValue ) values
( 1, 1, 1 ), ( 1, 4, 24 ), ( 1, 4, 42 ), -- Two tone!
( 2, 2, 1 ), ( 2, 4, 7 ),
( 3, 1, 2 ), ( 3, 4, 0 ), ( 3, 5, 6 );
select C.CarId, C.Description as CarDescription,
P.PropertyId, P.Description as PropertyDescription,
CP.PropertyValue
from #Cars as C inner join
#CarProperties as CP on CP.CarId = C.CarId inner join
#Properties as P on P.PropertyId = CP.PropertyId
order by C.CarId, P.PropertyId;
-- Test data: Avoid vehicles that have _any_ of these property values.
-- This should be passed to the stored procedure as a table-value parameter (TVP).
declare #BadProperties as Table ( PropertyId Int, PropertyValue Int );
insert into #BadProperties ( PropertyId, PropertyValue ) values
( 2, 1 ), ( 2, 2 ), ( 2, 4 ),
( 4, 62 ), ( 4, 666 );
select BP.PropertyId, BP.PropertyValue, P.Description
from #BadProperties as BP inner join
#Properties as P on P.PropertyId = BP.PropertyId;
-- Query the data.
select C.CarId, C.Description as CarDescription
from #Cars as C
where not exists (
select 42
from #CarProperties as CP inner join
#BadProperties as BP on BP.PropertyId = CP.PropertyId and BP.PropertyValue = CP.PropertyValue
where CP.CarId = C.CarId )
order by C.CarId;
A few things here.
Firstly, this kind of catch all procedure is a bit of an anti pattern for all sorts of reasons, see here for a full explanation:- https://sqlinthewild.co.za/index.php/2018/03/13/revisiting-catch-all-queries/
Secondly, you need to be very careful of using NOT IN with nullable values in a list: http://www.sqlbadpractices.com/using-not-in-operator-with-null-values/
I've added the DDL for the tables:-
IF OBJECT_ID('Attributes') IS NOT NULL
DROP TABLE Attributes;
IF OBJECT_ID('Cars') IS NOT NULL
DROP TABLE Cars;
IF OBJECT_ID('SP_GET_TestSearch') IS NOT NULL
DROP PROCEDURE SP_GET_TestSearch
CREATE TABLE Cars
(idProduct INT PRIMARY KEY
, Description VARCHAR(20) NOT NULL);
CREATE TABLE Attributes
(idRow INT PRIMARY KEY
, idProduct INT NOT NULL FOREIGN KEY REFERENCES dbo.Cars(idProduct)
, idOption INT NOT NULL
, idList INT NOT NULL);
INSERT INTO dbo.Cars
VALUES
(1, 'Product A')
,(2 , 'Product B')
,(3, 'Product C');
INSERT INTO dbo.Attributes
(
idRow,
idProduct,
idOption,
idList
)
VALUES (1,1,10,45)
,(2,2,10,46)
,(3,3,10,47)
,(4,1,11,10)
,(5,2,11,98)
,(6,1,14,56)
,(7,3,16,28)
,(8,2,20,55);
GO
The issue with your query, is that the first part of the block is always evaluated to TRUE for any idOption that you don't specify:-
((#Param1 IS NULL OR (idOption NOT IN (#Param1)))
AND
(#Param2 IS NULL OR (idOption NOT IN (#Param2)))
AND
(#Param3 IS NULL OR (idOption NOT IN (#Param3))))
To explain; if I pass in the following:-
DECLARE #Param1 BIGINT
, #Param2 BIGINT
, #Param3 BIGINT
, #PValue1 BIGINT
, #PValue2 BIGINT
, #PValue3 BIGINT;
SET #Param1 = 11
SET #Pvalue1 = 42
SET #Param2 = 11
SET #Pvalue2 = 10
SET #Param3 = 14
SET #PValue3= 56
EXEC dbo.SP_GET_TestSearch #Param1, #PValue1, #Param2, #PValue2, #Param3, #PValue3
Then you effectively have WHERE idOption NOT IN (11,14) as the evaluation for the first part of the clause, so all other rows are returned.
I suspect you really want the WHERE clause to be:-
WHERE
(#Param1 IS NULL AND #Param2 IS NULL AND #Param3 IS NULL)
OR
(idOption = #Param1
AND idList = #PValue1)
OR
(idOption = #Param2
AND idList = #PValue2)
OR
(idOption = #Param3
AND idList = #PValue3)

SQL Server How to insert when not exist?

I have two tables, one is called Invoices and another is called Records.
CREATE TABLE Invoices
(
InvoiceNum INT NOT NULL,
Amount DECIMAL,
RecordPK UNIQUEIDENTIFIER NOT NULL
)
CREATE TABLE Records(
RecordPK UNIQUEIDENTIFIER NOT NULL PRIMARY KEY,
StartNum INT NOT NULL,
NextNum INT NOT NULL,
MaxNum INT NOT NULL,
InvPrefix VARCHAR(2) NOT NULL
)
The records table will record the invoice start number, how many invoices we have created(NextNum) and how many invoices we can create(MaxNum).
For example, Assume we have several records in two tables.
Invoice Table:
InvoiceNum Amount RecordPk
1 19.00 EDFA0541-5583-4CDD-BDFF-21D6F6504522
2 50.00 EDFA0541-5583-4CDD-BDFF-21D6F6504522
3 3.00 EDFA0541-5583-4CDD-BDFF-21D6F6504522
10 1.00 D64EFF0E-65D5-467E-8C82-BFBB6A24AAC9
11 99.00 D64EFF0E-65D5-467E-8C82-BFBB6A24AAC9
12 13.00 D64EFF0E-65D5-467E-8C82-BFBB6A24AAC9
Records Table:
RecordPk StartNum NextNum MaxNum Prefix
EDFA0541-5583-4CDD-BDFF-21D6F6504522 1 4 10 AA
D64EFF0E-65D5-467E-8C82-BFBB6A24AAC9 10 13 14 AA
My question is when I search the invoice table with Prefix AA, how can I get the result like below, the InvoiceNum should reach the MaxNum, the Amount and RecordPK of not exist rows should left blank, the Remark column should fill with Blank.
InvoiceNum Amount RecordPk Remark
1 19.00 EDFA0541-5583-4CDD-BDFF-21D6F6504522
2 50.00 EDFA0541-5583-4CDD-BDFF-21D6F6504522
3 3.00 EDFA0541-5583-4CDD-BDFF-21D6F6504522
4 Blank
5 Blank
6 Blank
7 Blank
8 Blank
9 Blank
10 1.00 D64EFF0E-65D5-467E-8C82-BFBB6A24AAC9
11 99.00 D64EFF0E-65D5-467E-8C82-BFBB6A24AAC9
12 13.00 D64EFF0E-65D5-467E-8C82-BFBB6A24AAC9
13 Blank
14 Blank
You need to generate a table with numbers to cover the range of numbers that you need (for each row in Records table, from StartNum to MaxNum). You can do this for example, by selecting from some existing table with enough rows and using ROW_NUMBER window function. Then filter this sequence to include only the numbers you need. Left join the Invoices table to show the data for the corresponding invoice and use IIF function to check is there invoice with this number or not.
declare #Invoices table(InvoiceNum INT NOT NULL, Amount DECIMAL, RecordPK UNIQUEIDENTIFIER NOT NULL)
declare #Records table(RecordPK UNIQUEIDENTIFIER NOT NULL PRIMARY KEY, StartNum INT NOT NULL, NextNum INT NOT NULL, MaxNum INT NOT NULL, InvPrefix VARCHAR(2) NOT NULL)
insert into #Invoices(InvoiceNum, Amount, RecordPk) values
(1 , 19.00, 'EDFA0541-5583-4CDD-BDFF-21D6F6504522'),
(2 , 50.00, 'EDFA0541-5583-4CDD-BDFF-21D6F6504522'),
(3 , 3.00 , 'EDFA0541-5583-4CDD-BDFF-21D6F6504522'),
(10, 1.00 , 'D64EFF0E-65D5-467E-8C82-BFBB6A24AAC9'),
(11, 99.00, 'D64EFF0E-65D5-467E-8C82-BFBB6A24AAC9'),
(12, 13.00, 'D64EFF0E-65D5-467E-8C82-BFBB6A24AAC9')
insert into #Records(RecordPk, StartNum, NextNum, MaxNum, InvPrefix) values
('EDFA0541-5583-4CDD-BDFF-21D6F6504522', 1 , 4 , 10, 'AA'),
('D64EFF0E-65D5-467E-8C82-BFBB6A24AAC9', 10, 13, 14, 'AA')
;with numbers as (select ROW_NUMBER() over(order by object_id) as No from sys.objects)
select
n.No as InvoiceNum
, inv.Amount
, inv.RecordPK
, IIF(inv.InvoiceNum is null, 'Blank', null) as Remark
from numbers n
left join #Invoices inv on n.No = inv.InvoiceNum
where exists(select * from #Records r where r.StartNum <= n.No and n.No <= r.MaxNum)
#Andrey Nikolov has it covered, however I've been working on this for the last 15 minutes so I thought I'd post it anyway.
Essentially an intermediary table should be used to count up the values you don't have, then in my version of this answer I've used a union query to generate the "Blank" value. I have not included the unique identifier for brevity but the application is the same.
if OBJECT_ID('tempdb..#invoice') is not null drop table #invoice;
if OBJECT_ID('tempdb..#rowcount') is not null drop table #rowcount;
create table #invoice
(
invoicenum int,
amount decimal
);
insert into #invoice (invoicenum, amount)
values
(1, 19.00),
(2, 50.00),
(3, 3.00),
(10, 1.00),
(11, 99.00),
(12, 13.00);
create table #rowcount
(
rownumber int
);
declare #max int = 1;
select #max=count(*) from #invoice;
declare #runs int = 1;
while #runs<=#max
begin
insert into #rowcount (rownumber)
values (#runs);
select #runs=#runs+1;
end
select invoicenum, cast(amount as nvarchar(25)) as amount from #invoice
union
select rownumber, 'BLANK' from #rowcount r left join #invoice i on
r.rownumber=i.invoicenum where i.invoicenum is null
order by invoicenum;
drop table #invoice, #rowcount;
You need a LEFT JOIN
SELECT I.*,
CASE WHEN I.InvoiceNum IS NULL THEN 'Blank' END Remark
FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9), (10), (11), (12), (13), (14)) RC (InvoiceNum)
LEFT JOIN Invoices I
ON RC.InvoiceNum = I.InvoiceNum;
The value 1 is the StartNum and 14 is the MAX MaxNum.
I used VALUES cause the number is know, you can use a RecursiveCTE to generate the missing InvoiceNum then LEFT JOIN the CTE with your table.
Demo
I will do it this way:
IF OBJECT_ID('tempdb..#Invoices') IS NOT NULL DROP TABLE #Invoices
CREATE TABLE #Invoices
(
InvoiceNum INT NOT NULL,
Amount DECIMAL,
RecordPK UNIQUEIDENTIFIER NOT NULL
)
IF OBJECT_ID('tempdb..#Records') IS NOT NULL DROP TABLE #Records
CREATE TABLE #Records(
RecordPK UNIQUEIDENTIFIER NOT NULL PRIMARY KEY,
StartNum INT NOT NULL,
NextNum INT NOT NULL,
MaxNum INT NOT NULL,
InvPrefix VARCHAR(2) NOT NULL
)
INSERT INTO #Invoices
SELECT 1, 19.00, 'EDFA0541-5583-4CDD-BDFF-21D6F6504522'
UNION SELECT 2 , 50.00, 'EDFA0541-5583-4CDD-BDFF-21D6F6504522'
UNION SELECT 3 , 3.00 , 'EDFA0541-5583-4CDD-BDFF-21D6F6504522'
UNION SELECT 10 , 1.00 , 'D64EFF0E-65D5-467E-8C82-BFBB6A24AAC9'
UNION SELECT 11 , 99.00, 'D64EFF0E-65D5-467E-8C82-BFBB6A24AAC9'
UNION SELECT 12 , 13.00, 'D64EFF0E-65D5-467E-8C82-BFBB6A24AAC9'
INSERT INTO #Records
SELECT 'EDFA0541-5583-4CDD-BDFF-21D6F6504522', 1, 4, 10, 'AA'
UNION SELECT 'D64EFF0E-65D5-467E-8C82-BFBB6A24AAC9', 10, 13, 14, 'AA'
DECLARE #MAX_NUM INT = (SELECT MAX(MaxNum) FROM #Records)
DECLARE #TEMP_INV TABLE (InvoiceNum INT)
INSERT INTO #TEMP_INV
SELECT Num
FROM
(
SELECT ROW_NUMBER() OVER(ORDER BY object_id) AS Num FROM sys.objects
) A
WHERE Num <= #MAX_NUM
IF OBJECT_ID('tempdb..#TEMP') IS NOT NULL DROP TABLE #TEMP
SELECT I.InvoiceNum, I.Amount, I.RecordPK
INTO #TEMP
FROM #Invoices I
INNER JOIN #Records R
ON I.RecordPK = R.RecordPK
WHERE R.InvPrefix = 'AA'
SELECT A.InvoiceNum, B.Amount, B.RecordPK, CASE WHEN B.InvoiceNum IS NULL THEN 'BLANK' END AS Remark
FROM #TEMP_INV A
LEFT JOIN #TEMP B
ON A.InvoiceNum = B.InvoiceNum

sql compute difference between 2 rows

I'm looking for a methodology to compare the difference between 2 rows in the same table. From what I found here (How to get difference between two rows for a column field?) it's almost what I wanted. I have done the following code:
create table #tmpTest
(
id_fund int null,
id_ShareType int null,
ValueDate datetime null,
VarNAV float null,
FundPerf float null,
)
insert into #tmpTest(id_fund, id_ShareType, ValueDate, VarNAV)
values(1,1,'20140101',100)
insert into #tmpTest(id_fund, id_ShareType, ValueDate, VarNAV)
values(1,1,'20140102',20)
update #tmpTest
set hrc.FundPerf = (isnull(hrn.VarNAV, 0) - hrc.VarNAV)/hrc.VarNAV
from #tmpTest hrc
left join #tmpTest hrn on hrn.ValueDate = (select min(ValueDate) from #tmpTest where ValueDate > hrc.ValueDate)
and hrc.id_fund = hrn.id_fund and hrc.id_ShareType = hrn.id_ShareType
My issue is that the result I'm computing starts on line 1 instead of line 2.
Hereunder the result I'm obtaining:
id_fund id_ShareType ValueDate VarNAV FundPerf
------- ------------ ------------------- ------- -----------------------------
1 1 2014-01-01 00:00:00 100 -0.8
1 1 2014-01-02 00:00:00 20 -1
whereas I'd like it to be that way:
id_fund id_ShareType ValueDate VarNAV FundPerf
------- ------------ ------------------- ------- -----------------------------
1 1 2014-01-01 00:00:00 100 -1
1 1 2014-01-02 00:00:00 20 -0.8
What's wrong with my approach?
You are not restricting the minimum to the same fund and share type.
update #tmpTest
set hrc.FundPerf = (isnull(hrn.VarNAV, 0) - hrc.VarNAV)/hrc.VarNAV
from #tmpTest hrc left join
#tmpTest hrn
on hrn.ValueDate = (select min(ValueDate)
from #tmpTest tt
where tt.ValueDate > hrc.ValueDate and
hrc.id_fund = tt.id_fund and hrc.id_ShareType = tt.id_ShareType
) and
hrc.id_fund = hrn.id_fund and hrc.id_ShareType = hrn.id_ShareType ;
Try this:
update hrn
set FundPerf = (isnull(hrn.VarNAV, 0) - hrc.VarNAV)/hrc.VarNAV
from #tmpTest hrc
left join #tmpTest hrn on hrn.ValueDate = (select min(ValueDate) from #tmpTest where ValueDate > hrc.ValueDate)
and hrc.id_fund = hrn.id_fund and hrc.id_ShareType = hrn.id_ShareType
Hi you can achieve this using by CTE (Common Table Expression)
create table #tmpTest
(
id_fund int null,
id_ShareType int null,
ValueDate datetime null,
VarNAV float null,
FundPerf float null,
)
insert into #tmpTest(id_fund, id_ShareType, ValueDate, VarNAV)
values(1,1,'20140101',100)
insert into #tmpTest(id_fund, id_ShareType, ValueDate, VarNAV)
values(1,1,'20140102',20)
;With tbl as
( Select Row_Number() OVER (Order by T.ValueDate) as RowNumber,*
From #tmpTest T
)SELECT Cur.*,(ISNULL(Cur.VarNAV,0) - ISNULL(Prv.VarNAV,0))/Prv.VarNAV as [Col Name]
FROM tbl Cur
LEFT OUTER JOIN tbl Prv ON Cur.RowNumber = Prv.RowNumber+1
ORDER BY Cur.ValueDate

Find conflicted date intervals using SQL

Suppose I have following table in Sql Server 2008:
ItemId StartDate EndDate
1 NULL 2011-01-15
2 2011-01-16 2011-01-25
3 2011-01-26 NULL
As you can see, this table has StartDate and EndDate columns. I want to validate data in these columns. Intervals cannot conflict with each other. So, the table above is valid, but the next table is invalid, becase first row has End Date greater than StartDate in the second row.
ItemId StartDate EndDate
1 NULL 2011-01-17
2 2011-01-16 2011-01-25
3 2011-01-26 NULL
NULL means infinity here.
Could you help me to write a script for data validation?
[The second task]
Thanks for the answers.
I have a complication. Let's assume, I have such table:
ItemId IntervalId StartDate EndDate
1 1 NULL 2011-01-15
2 1 2011-01-16 2011-01-25
3 1 2011-01-26 NULL
4 2 NULL 2011-01-17
5 2 2011-01-16 2011-01-25
6 2 2011-01-26 NULL
Here I want to validate intervals within a groups of IntervalId, but not within the whole table. So, Interval 1 will be valid, but Interval 2 will be invalid.
And also. Is it possible to add a constraint to the table in order to avoid such invalid records?
[Final Solution]
I created function to check if interval is conflicted:
CREATE FUNCTION [dbo].[fnIntervalConflict]
(
#intervalId INT,
#originalItemId INT,
#startDate DATETIME,
#endDate DATETIME
)
RETURNS BIT
AS
BEGIN
SET #startDate = ISNULL(#startDate,'1/1/1753 12:00:00 AM')
SET #endDate = ISNULL(#endDate,'12/31/9999 11:59:59 PM')
DECLARE #conflict BIT = 0
SELECT TOP 1 #conflict = 1
FROM Items
WHERE IntervalId = #intervalId
AND ItemId <> #originalItemId
AND (
(ISNULL(StartDate,'1/1/1753 12:00:00 AM') >= #startDate
AND ISNULL(StartDate,'1/1/1753 12:00:00 AM') <= #endDate)
OR (ISNULL(EndDate,'12/31/9999 11:59:59 PM') >= #startDate
AND ISNULL(EndDate,'12/31/9999 11:59:59 PM') <= #endDate)
)
RETURN #conflict
END
And then I added 2 constraints to my table:
ALTER TABLE dbo.Items ADD CONSTRAINT
CK_Items_Dates CHECK (StartDate IS NULL OR EndDate IS NULL OR StartDate <= EndDate)
GO
and
ALTER TABLE dbo.Items ADD CONSTRAINT
CK_Items_ValidInterval CHECK (([dbo].[fnIntervalConflict]([IntervalId], ItemId,[StartDate],[EndDate])=(0)))
GO
I know, the second constraint slows insert and update operations, but it is not very important for my application.
And also, now I can call function fnIntervalConflict from my application code before inserts and updates of data in the table.
Something like this should give you all overlaping periods
SELECT
*
FROM
mytable t1
JOIN mytable t2 ON t1.EndDate>t2.StartDate AND t1.StartDate < t2.StartDate
Edited for Adrians comment bellow
This will give you the rows that are incorrect.
Added ROW_NUMBER() as I didnt know if all entries where in order.
-- Testdata
declare #date datetime = '2011-01-17'
;with yourTable(itemID, startDate, endDate)
as
(
SELECT 1, NULL, #date
UNION ALL
SELECT 2, dateadd(day, -1, #date), DATEADD(day, 10, #date)
UNION ALL
SELECT 3, DATEADD(day, 60, #date), NULL
)
-- End testdata
,tmp
as
(
select *
,ROW_NUMBER() OVER(order by startDate) as rowno
from yourTable
)
select *
from tmp t1
left join tmp t2
on t1.rowno = t2.rowno - 1
where t1.endDate > t2.startDate
EDIT:
As for the updated question:
Just add a PARTITION BY clause to the ROW_NUMBER() query and alter the join.
-- Testdata
declare #date datetime = '2011-01-17'
;with yourTable(itemID, startDate, endDate, intervalID)
as
(
SELECT 1, NULL, #date, 1
UNION ALL
SELECT 2, dateadd(day, 1, #date), DATEADD(day, 10, #date),1
UNION ALL
SELECT 3, DATEADD(day, 60, #date), NULL, 1
UNION ALL
SELECT 4, NULL, #date, 2
UNION ALL
SELECT 5, dateadd(day, -1, #date), DATEADD(day, 10, #date),2
UNION ALL
SELECT 6, DATEADD(day, 60, #date), NULL, 2
)
-- End testdata
,tmp
as
(
select *
,ROW_NUMBER() OVER(partition by intervalID order by startDate) as rowno
from yourTable
)
select *
from tmp t1
left join tmp t2
on t1.rowno = t2.rowno - 1
and t1.intervalID = t2.intervalID
where t1.endDate > t2.startDate
declare #T table (ItemId int, IntervalID int, StartDate datetime, EndDate datetime)
insert into #T
select 1, 1, NULL, '2011-01-15' union all
select 2, 1, '2011-01-16', '2011-01-25' union all
select 3, 1, '2011-01-26', NULL union all
select 4, 2, NULL, '2011-01-17' union all
select 5, 2, '2011-01-16', '2011-01-25' union all
select 6, 2, '2011-01-26', NULL
select T1.*
from #T as T1
inner join #T as T2
on coalesce(T1.StartDate, '1753-01-01') < coalesce(T2.EndDate, '9999-12-31') and
coalesce(T1.EndDate, '9999-12-31') > coalesce(T2.StartDate, '1753-01-01') and
T1.IntervalID = T2.IntervalID and
T1.ItemId <> T2.ItemId
Result:
ItemId IntervalID StartDate EndDate
----------- ----------- ----------------------- -----------------------
5 2 2011-01-16 00:00:00.000 2011-01-25 00:00:00.000
4 2 NULL 2011-01-17 00:00:00.000
Not directly related to the OP, but since Adrian's expressed an interest. Here's a table than SQL Server maintains the integrity of, ensuring that only one valid value is present at any time. In this case, I'm dealing with a current/history table, but the example can be modified to work with future data also (although in that case, you can't have the indexed view, and you need to write the merge's directly, rather than maintaining through triggers).
In this particular case, I'm dealing with a link table that I want to track the history of. First, the tables that we're linking:
create table dbo.Clients (
ClientID int IDENTITY(1,1) not null,
Name varchar(50) not null,
/* Other columns */
constraint PK_Clients PRIMARY KEY (ClientID)
)
go
create table dbo.DataItems (
DataItemID int IDENTITY(1,1) not null,
Name varchar(50) not null,
/* Other columns */
constraint PK_DataItems PRIMARY KEY (DataItemID),
constraint UQ_DataItem_Names UNIQUE (Name)
)
go
Now, if we were building a normal table, we'd have the following (Don't run this one):
create table dbo.ClientAnswers (
ClientID int not null,
DataItemID int not null,
IntValue int not null,
Comment varchar(max) null,
constraint PK_ClientAnswers PRIMARY KEY (ClientID,DataItemID),
constraint FK_ClientAnswers_Clients FOREIGN KEY (ClientID) references dbo.Clients (ClientID),
constraint FK_ClientAnswers_DataItems FOREIGN KEY (DataItemID) references dbo.DataItems (DataItemID)
)
But, we want a table that can represent a complete history. In particular, we want to design the structure such that overlapping time periods can never appear in the database. We always know which record was valid at any particular time:
create table dbo.ClientAnswerHistories (
ClientID int not null,
DataItemID int not null,
IntValue int null,
Comment varchar(max) null,
/* Temporal columns */
Deleted bit not null,
ValidFrom datetime2 null,
ValidTo datetime2 null,
constraint UQ_ClientAnswerHistories_ValidFrom UNIQUE (ClientID,DataItemID,ValidFrom),
constraint UQ_ClientAnswerHistories_ValidTo UNIQUE (ClientID,DataItemID,ValidTo),
constraint CK_ClientAnswerHistories_NoTimeTravel CHECK (ValidFrom < ValidTo),
constraint FK_ClientAnswerHistories_Clients FOREIGN KEY (ClientID) references dbo.Clients (ClientID),
constraint FK_ClientAnswerHistories_DataItems FOREIGN KEY (DataItemID) references dbo.DataItems (DataItemID),
constraint FK_ClientAnswerHistories_Prev FOREIGN KEY (ClientID,DataItemID,ValidFrom)
references dbo.ClientAnswerHistories (ClientID,DataItemID,ValidTo),
constraint FK_ClientAnswerHistories_Next FOREIGN KEY (ClientID,DataItemID,ValidTo)
references dbo.ClientAnswerHistories (ClientID,DataItemID,ValidFrom),
constraint CK_ClientAnswerHistory_DeletionNull CHECK (
Deleted = 0 or
(
IntValue is null and
Comment is null
)),
constraint CK_ClientAnswerHistory_IntValueNotNull CHECK (Deleted=1 or IntValue is not null)
)
go
That's a lot of constraints. The only way to maintain this table is through merge statements (see examples below, and try to reason about why yourself). We're now going to build a view that mimics that ClientAnswers table defined above:
create view dbo.ClientAnswers
with schemabinding
as
select
ClientID,
DataItemID,
ISNULL(IntValue,0) as IntValue,
Comment
from
dbo.ClientAnswerHistories
where
Deleted = 0 and
ValidTo is null
go
create unique clustered index PK_ClientAnswers on dbo.ClientAnswers (ClientID,DataItemID)
go
And we have the PK constraint we originally wanted. We've also used ISNULL to reinstate the not null-ness of the IntValue column (even though the check constraints already guarantee this, SQL Server is unable to derive this information). If we're working with an ORM, we let it target ClientAnswers, and the history gets automatically built. Next, we can have a function that lets us look back in time:
create function dbo.ClientAnswers_At (
#At datetime2
)
returns table
with schemabinding
as
return (
select
ClientID,
DataItemID,
ISNULL(IntValue,0) as IntValue,
Comment
from
dbo.ClientAnswerHistories
where
Deleted = 0 and
(ValidFrom is null or ValidFrom <= #At) and
(ValidTo is null or ValidTo > #At)
)
go
And finally, we need the triggers on ClientAnswers that build this history. We need to use merge statements, since we need to simultaneously insert new rows, and update the previous "valid" row to end date it with a new ValidTo value.
create trigger T_ClientAnswers_I
on dbo.ClientAnswers
instead of insert
as
set nocount on
;with Dup as (
select i.ClientID,i.DataItemID,i.IntValue,i.Comment,CASE WHEN cah.ClientID is not null THEN 1 ELSE 0 END as PrevDeleted,t.Dupl
from
inserted i
left join
dbo.ClientAnswerHistories cah
on
i.ClientID = cah.ClientID and
i.DataItemID = cah.DataItemID and
cah.ValidTo is null and
cah.Deleted = 1
cross join
(select 0 union all select 1) t(Dupl)
)
merge into dbo.ClientAnswerHistories cah
using Dup on cah.ClientID = Dup.ClientID and cah.DataItemID = Dup.DataItemID and cah.ValidTo is null and Dup.Dupl = 0 and Dup.PrevDeleted = 1
when matched then update set ValidTo = SYSDATETIME()
when not matched and Dup.Dupl=1 then insert (ClientID,DataItemID,IntValue,Comment,Deleted,ValidFrom)
values (Dup.ClientID,Dup.DataItemID,Dup.IntValue,Dup.Comment,0,CASE WHEN Dup.PrevDeleted=1 THEN SYSDATETIME() END);
go
create trigger T_ClientAnswers_U
on dbo.ClientAnswers
instead of update
as
set nocount on
;with Dup as (
select i.ClientID,i.DataItemID,i.IntValue,i.Comment,t.Dupl
from
inserted i
cross join
(select 0 union all select 1) t(Dupl)
)
merge into dbo.ClientAnswerHistories cah
using Dup on cah.ClientID = Dup.ClientID and cah.DataItemID = Dup.DataItemID and cah.ValidTo is null and Dup.Dupl = 0
when matched then update set ValidTo = SYSDATETIME()
when not matched then insert (ClientID,DataItemID,IntValue,Comment,Deleted,ValidFrom)
values (Dup.ClientID,Dup.DataItemID,Dup.IntValue,Dup.Comment,0,SYSDATETIME());
go
create trigger T_ClientAnswers_D
on dbo.ClientAnswers
instead of delete
as
set nocount on
;with Dup as (
select d.ClientID,d.DataItemID,t.Dupl
from
deleted d
cross join
(select 0 union all select 1) t(Dupl)
)
merge into dbo.ClientAnswerHistories cah
using Dup on cah.ClientID = Dup.ClientID and cah.DataItemID = Dup.DataItemID and cah.ValidTo is null and Dup.Dupl = 0
when matched then update set ValidTo = SYSDATETIME()
when not matched then insert (ClientID,DataItemID,Deleted,ValidFrom)
values (Dup.ClientID,Dup.DataItemID,1,SYSDATETIME());
go
Obviously, I could have built a simpler table (not a join table), but this is my standard go-to example (albeit it took me a while to reconstruct it - I forgot the set nocount on statements for a while). But the strength here is that, the base table, ClientAnswerHistories is incapable of storing overlapping time ranges for the same ClientID and DataItemID values.
Things get more complex when you need to deal with temporal foreign keys.
Of course, if you don't want any real gaps, then you can remove the Deleted column (and associated checks), make the not null columns really not null, modify the insert trigger to do a plain insert, and make the delete trigger raise an error instead.
I've always taken a slightly different approach to the design if I have data that is never to have overlapping intervals... namely don't store intervals, but only start times. Then, have a view that helps with displaying the intervals.
CREATE TABLE intervalStarts
(
ItemId int,
IntervalId int,
StartDate datetime
)
CREATE VIEW intervals
AS
with cte as (
select ItemId, IntervalId, StartDate,
row_number() over(partition by IntervalId order by isnull(StartDate,'1753-01-01')) row
from intervalStarts
)
select c1.ItemId, c1.IntervalId, c1.StartDate,
dateadd(dd,-1,c2.StartDate) as 'EndDate'
from cte c1
left join cte c2 on c1.IntervalId=c2.IntervalId
and c1.row=c2.row-1
So, sample data might look like:
INSERT INTO intervalStarts
select 1, 1, null union
select 2, 1, '2011-01-16' union
select 3, 1, '2011-01-26' union
select 4, 2, null union
select 5, 2, '2011-01-26' union
select 6, 2, '2011-01-14'
and a simple SELECT * FROM intervals yields:
ItemId | IntervalId | StartDate | EndDate
1 | 1 | null | 2011-01-15
2 | 1 | 2011-01-16 | 2011-01-25
3 | 1 | 2011-01-26 | null
4 | 2 | null | 2011-01-13
6 | 2 | 2011-01-14 | 2011-01-25
5 | 2 | 2011-01-26 | null