SQL Server field calculation based on multiple condition - sql

Here is my scenario:
I have a Person table with following fields.
create table Person(PersonID int primary key identity(1,1),
Age int,
height decimal(4,2),
weight decimal(6,2)
);
insert into Person(Age,height,weight) values (60,6.2,169); -- 1
insert into Person(Age,height,weight) values (15,5.1,100); -- 2
insert into Person(Age,height,weight) values (10,4.5,50); -- 3
What I need to do is,
if the person Age >= 18 and height >= 6 then calculationValue = 20
if the person Age >= 18 and height < 6 then calculationValue = 15
if the person Age < 18 and weight >= 60 then calculationValue = 10
if the person Age < 18 and weight < 60 then calculationValue = 5
based on these condition I need to find the calculationValue and do some math.
I tried to make a flexible model so in future it would be easier to add any more conditions and can easily change the constant values (like 18, 6, 60 etc)
I created couple of tables as below:
create table condTable(condTableID int primary key identity(1,1),
condCol varchar(20),
startValue int,
endValue int
);
insert into condTable(condCol,startValue,endValue) values ('Age',18,999) -- 1
insert into condTable(condCol,startValue,endValue) values ('Height',6,99) -- 2
insert into condTable(condCol,startValue,endValue) values ('Height',0,5.99) -- 3
insert into condTable(condCol,startValue,endValue) values ('Age',0,17) -- 4
insert into condTable(condCol,startValue,endValue) values ('Weight',60,999) -- 5
insert into condTable(condCol,startValue,endValue) values ('Weight',0,59) -- 6
I join two condition to make it one in the following table as given by the requirement.(ie. if age >=18 and height >=6 then calculationValue = 20. etc)
create table CondJoin(CondJoin int,condTableID int,CalculationValue int)
insert into CondJoin values (1,1,20)
insert into CondJoin values (1,2,20)
insert into CondJoin values (2,1,15)
insert into CondJoin values (2,3,15)
insert into CondJoin values (3,4,10)
insert into CondJoin values (3,5,10)
insert into CondJoin values (4,4,5)
insert into CondJoin values (4,6,5)
I think this model will provide the flexibility of adding more conditions in future. But I am having difficulties on implementing it in SQL Server 2005. Anyone can write a sql that process in set basis and compare the value in Person table with CondJoin table and provide the corresponding calculationvalue. For eg. for person ID 1 it should look at CondJoin table and give the calculationValue 20 since his age is greater than 18 and height is greater than 6.

this looks like you are headed towards dynamic sql generation.
i think maybe you would be better off with a row for each column and cutoff values for the ranges, and a value if true ... maybe something like:
age_condition
-----------------
min_age
max_age
value
this is something that you could populate and then query without some dynamic generation.

The following is extremely rough but it should get the point across. It normalizes the data and moves towards a semi-object oriented (attribute/value/attribute value) structure. I'll leave it up to you to reinforce referential integrity, but the following is flexible and will return the results you want:
CREATE TABLE Person (
PersonID INT PRIMARY KEY IDENTITY(1,1)
,Name NVARCHAR(255)
);
GO
CREATE TABLE PersonAttribute (
PersonID INT
,CondAttributeID INT
,Value NVARCHAR(255)
);
GO
CREATE TABLE CondAttribute (
AttributeID INT PRIMARY KEY IDENTITY(1,1)
,Attribute NVARCHAR(255));
GO
CREATE TABLE CondTable (
CondTableID INT PRIMARY KEY IDENTITY(1,1)
,CondAttributeID INT
,StartValue MONEY
,EndValue MONEY
);
GO
CREATE TABLE CalculationValues (
CalculationID INT PRIMARY KEY IDENTITY(1,1)
,CalculationValue INT
);
GO
CREATE TABLE CondCalculation (
CondTableID INT
,CalculationID INT
);
INSERT Person (Name)
VALUES ('Joe')
,('Bob')
,('Tom');
INSERT PersonAttribute (
PersonID
,CondAttributeID
,Value
)
VALUES (1, 1, '60')
,(1, 2, '6.2')
,(1, 3, '169')
,(2, 1, '15')
,(2, 2, '5.1')
,(2, 3, '100')
,(3, 1, '10')
,(3, 2, '4.5')
,(3, 3, '50');
INSERT CondAttribute (Attribute)
VALUES ('Age')
,('height')
,('weight');
INSERT CondTable (
CondAttributeID
,StartValue
,EndValue)
VALUES (1,18,999) --Age
,(2,6,99) --Height
,(2,0,5.99) -- Height
,(1,0,17) -- Age
,(3,60,999) -- Weight
,(3,0,59); -- Weight
INSERT CalculationValues (CalculationValue)
VALUES (5)
,(10)
,(15)
,(20);
INSERT CondCalculation (CondTableID, CalculationID)
VALUES (1,4)
,(2,4)
,(1,3)
,(3,3)
,(4,2)
,(5,2)
,(5,1)
,(6,1);
SELECT *
FROM Person AS p
JOIN PersonAttribute AS pa ON p.PersonID = pa.PersonID
JOIN CondAttribute AS ca ON pa.CondAttributeID = ca.AttributeID
JOIN CondTable AS ct ON ca.AttributeID = ct.CondAttributeID
AND CONVERT(money,pa.Value) BETWEEN ct.StartValue AND ct.EndValue
JOIN CondCalculation AS cc ON cc.CondTableID = ct.CondTableID
JOIN CalculationValues AS c ON cc.CalculationID = c.CalculationID
WHERE p.PersonID = 1

The following solution uses PIVOT (twice) to transform the combination of CondJoin and condTable into a chart, then joins the chart to the Person table to calculate the target value. I believe, a series of CASE expressions could be used instead just as well. Anyway...
All the tables have been turned into table variables, for easier testing. So first, DDL and data preparation:
declare #Person table(PersonID int primary key identity(1,1),
Age int,
height decimal(4,2),
weight decimal(6,2)
);
insert into #Person(Age,height,weight) values (60,6.2,169); -- 1
insert into #Person(Age,height,weight) values (15,5.1,100); -- 2
insert into #Person(Age,height,weight) values (10,4.5,50); -- 3
declare #condTable table(condTableID int primary key identity(1,1),
condCol varchar(20),
startValue int,
endValue int
);
insert into #condTable(condCol,startValue,endValue) values ('Age',18,999) -- 1
insert into #condTable(condCol,startValue,endValue) values ('Height',6,99) -- 2
insert into #condTable(condCol,startValue,endValue) values ('Height',0,5.99) -- 3
insert into #condTable(condCol,startValue,endValue) values ('Age',0,17) -- 4
insert into #condTable(condCol,startValue,endValue) values ('Weight',60,999) -- 5
insert into #condTable(condCol,startValue,endValue) values ('Weight',0,59) -- 6
declare #CondJoin table(CondJoin int,condTableID int,CalculationValue int);
insert into #CondJoin values (1,1,20)
insert into #CondJoin values (1,2,20)
insert into #CondJoin values (2,1,15)
insert into #CondJoin values (2,3,15)
insert into #CondJoin values (3,4,10)
insert into #CondJoin values (3,5,10)
insert into #CondJoin values (4,4,5)
insert into #CondJoin values (4,6,5)
And now the query:
;with startValues as (
select
CondJoin,
Age,
Height,
Weight,
CalculationValue
from (
select
j.CondJoin,
j.CalculationValue,
t.condCol,
t.startValue
from #CondJoin j
inner join #condTable t on j.condTableID = t.condTableID
) j
pivot (
max(startValue) for condCol in (Age, Height, Weight)
) p
),
endValues as (
select
CondJoin,
Age,
Height,
Weight,
CalculationValue
from (
select
j.CondJoin,
j.CalculationValue,
t.condCol,
t.endValue
from #CondJoin j
inner join #condTable t on j.condTableID = t.condTableID
) j
pivot (
max(endValue) for condCol in (Age, Height, Weight)
) p
),
combinedChart as (
select
s.CondJoin,
AgeFrom = s.Age,
AgeTo = e.Age,
HeightFrom = s.Height,
HeightTo = e.Height,
WeightFrom = s.Weight,
WeightTo = e.Weight,
s.CalculationValue
from startValues s
inner join endValues e on s.CondJoin = e.CondJoin
)
select
p.*,
c.CalculationValue
from #Person p
left join combinedChart c
on (c.AgeFrom is null or p.Age between c.AgeFrom and c.AgeTo)
and (c.HeightFrom is null or p.Height between c.HeightFrom and c.HeightTo)
and (c.WeightFrom is null or p.Weight between c.WeightFrom and c.WeightTo)

Related

Find data by multiple Lookup table clauses

declare #Character table (id int, [name] varchar(12));
insert into #Character (id, [name])
values
(1, 'tom'),
(2, 'jerry'),
(3, 'dog');
declare #NameToCharacter table (id int, nameId int, characterId int);
insert into #NameToCharacter (id, nameId, characterId)
values
(1, 1, 1),
(2, 1, 3),
(3, 1, 2),
(4, 2, 1);
The Name Table has more than just 1,2,3 and the list to parse on is dynamic
NameTable
id | name
----------
1 foo
2 bar
3 steak
CharacterTable
id | name
---------
1 tom
2 jerry
3 dog
NameToCharacterTable
id | nameId | characterId
1 1 1
2 1 3
3 1 2
4 2 1
I am looking for a query that will return a character that has two names. For example
With the above data only "tom" will be returned.
SELECT *
FROM nameToCharacterTable
WHERE nameId in (1,2)
The in clause will return every row that has a 1 or a 3. I want to only return the rows that have both a 1 and a 3.
I am stumped I have tried everything I know and do not want to resort to dynamic SQL. Any help would be great
The 1,3 in this example will be a dynamic list of integers. for example it could be 1,3,4,5,.....
Filter out a count of how many times the Character appears in the CharacterToName table matching the list you are providing (which I have assumed you can convert into a table variable or temp table) e.g.
declare #Character table (id int, [name] varchar(12));
insert into #Character (id, [name])
values
(1, 'tom'),
(2, 'jerry'),
(3, 'dog');
declare #NameToCharacter table (id int, nameId int, characterId int);
insert into #NameToCharacter (id, nameId, characterId)
values
(1, 1, 1),
(2, 1, 3),
(3, 1, 2),
(4, 2, 1);
declare #RequiredNames table (nameId int);
insert into #RequiredNames (nameId)
values
(1),
(2);
select *
from #Character C
where (
select count(*)
from #NameToCharacter NC
where NC.characterId = c.id
and NC.nameId in (select nameId from #RequiredNames)
) = 2;
Returns:
id
name
1
tom
Note: Providing DDL+DML as shown here makes it much easier for people to assist you.
This is classic Relational Division With Remainder.
There are a number of different solutions. #DaleK has given you an excellent one: inner-join everything, then check that each set has the right amount. This is normally the fastest solution.
If you want to ensure it works with a dynamic amount of rows, just change the last line to
) = (SELECT COUNT(*) FROM #RequiredNames);
Two other common solutions exist.
Left-join and check that all rows were joined
SELECT *
FROM #Character c
WHERE EXISTS (SELECT 1
FROM #RequiredNames rn
LEFT JOIN #NameToCharacter nc ON nc.nameId = rn.nameId AND nc.characterId = c.id
HAVING COUNT(*) = COUNT(nc.nameId) -- all rows are joined
);
Double anti-join, in other words: there are no "required" that are "not in the set"
SELECT *
FROM #Character c
WHERE NOT EXISTS (SELECT 1
FROM #RequiredNames rn
WHERE NOT EXISTS (SELECT 1
FROM #NameToCharacter nc
WHERE nc.nameId = rn.nameId AND nc.characterId = c.id
)
);
A variation on the one from the other answer uses a windowed aggregate instead of a subquery. I don't think this is performant, but it may have uses in certain cases.
SELECT *
FROM #Character c
WHERE EXISTS (SELECT 1
FROM (
SELECT *, COUNT(*) OVER () AS cnt
FROM #RequiredNames
) rn
JOIN #NameToCharacter nc ON nc.nameId = rn.nameId AND nc.characterId = c.id
HAVING COUNT(*) = MIN(rn.cnt)
);
db<>fiddle

Need to get value from a lookup table

I have a table-X with ecode,emp ID ( some values)
37,10
47,20
57,30
There are 2 lookup tables
lookup table 1 has just the emp ID details( which am interested in)
10
20
so when i join..i get all the values needed (thats one part)
my result will be
37 10
47 20
second part is,
the ones which doesnt satisfy the join condition should lookup on table 2 which has
2 columns
ecode, other_codes
37 xxx
47 YYY
57 AAA
So when 30 comes in , i want to return AAA and my final dataset should be,
37, 10
47,20
57 AAA
appreciate any help!
Thanks
You can left join both tables and use CASE statement for selecting a value from one table or the other.
I've created a db-fiddle which I think exemplifies your situation based on your description: https://www.db-fiddle.com/f/ujW8Unf44CqbsiJZXqXXtK/0
Here is the code for posterity. To set up your tables:
CREATE TABLE tableX (
eCode int,
employeeId int
);
INSERT INTO tableX (eCode, employeeId) VALUES (37, 10);
INSERT INTO tableX (eCode, employeeId) VALUES (47, 20);
INSERT INTO tableX (eCode, employeeId) VALUES (57, 30);
CREATE TABLE employeeIds (employeeId int);
INSERT INTO employeeIds (employeeId) VALUES (10);
INSERT INTO employeeIds (employeeId) VALUES (20);
CREATE TABLE otherCodes (
eCode int,
other_codes varchar(10)
);
INSERT INTO otherCodes (eCode, other_codes) VALUES (37, 'XXX');
INSERT INTO otherCodes (eCode, other_codes) VALUES (47, 'YYY');
INSERT INTO otherCodes (eCode, other_codes) VALUES (57, 'AAA');
The query based on this schema:
SELECT
tx.eCode,
CASE WHEN ei.employeeId IS NULL THEN oc.other_codes ELSE ei.employeeId END as 'result'
FROM tableX tx
LEFT JOIN employeeIds ei ON tx.employeeId = ei.employeeId
LEFT JOIN otherCodes oc ON tx.eCode = oc.eCode;

Updating thousands of records with different values

I've been given a spreadsheet in the format of :
Id | Val
1 57
2 99
There's approximately 10,000 records - Any ideas to handle the query below for 10,000 records without manually writing each case statement, tediously. Thanks.
update person
SET val = (
case
when Id = 1 then 57
when Id = 2 then 99
end),
where Id in (1, 2)
Quick and dirty? here you go
Add a new spredsheet call the old one datatable
In the first row first column you write
"Update person set val = ("
in the second column you link to the value on datatable spreadsheet
third column ") where ID = ("
fourth column you link to the ID of the datatable spreadsheet
fifth column ")"
Then you mark the whole row and pull it downwards to row 10000
Copy past into query escecute
I think this example can be help you :
CREATE TABLE #Person
(PrimaryKey int PRIMARY KEY,
ValueSome varchar(50)
);
GO
CREATE TABLE #MySpreadSheet
(PrimaryKey int PRIMARY KEY,
ValueSpread varchar(50)
);
GO
INSERT INTO #Person
SELECT 1, 'someValue'
INSERT INTO #Person
SELECT 2, 'someValueBeforeUpdate'
INSERT INTO #Person
SELECT 3, ''
INSERT INTO #MySpreadSheet
SELECT 1, '45'
INSERT INTO #MySpreadSheet
SELECT 2, '56'
INSERT INTO #MySpreadSheet
SELECT 3, '34'
SELECT * FROM #Person
SELECT * FROM #MySpreadSheet
UPDATE P SET P.ValueSome = SS.ValueSpread FROM #Person P JOIN #MySpreadSheet SS ON P.PrimaryKey = SS.PrimaryKey
SELECT * FROM #Person
DROP TABLE #Person
DROP TABLE #MySpreadSheet
If anyones interested, I went with this :
CREATE TABLE #TempTable(
Id int,
val int
)
INSERT INTO #TempTable (Id, val)
Values (1, 57),
(2, 99)
Update Person
Set Id = tp.Id,
val = tp.val
FROM Person p
INNER JOIN #TempTable as tp on tp.Id = p.Id
create table #example (id int , value int)
insert into #example (id, value) values (1, 10)
insert into #example (id, value) values (2, 20)
select * from #example
id value
1 10
2 20
update #example
set value = case when id = 1 then 100
when id = 2 then 200 end
where id in (1,2)
select * from #example
id value
1 100
2 200

SQL stored procedure results solution

I have a stored procedure that I inherited. The goal is for the data in the temp table: #multi_nominees_uf to be joined with the data in #temp_reviewers_UF and assign #temp_reviewers_UF.uf_rev_id to #multi_nominees_uf.application_number where the corresponding uf_rev_id's short_plan does not match the major associated with the appNumber's major .
It is not as simple as doing a JOIN.
The following has to be in place:
short_plan can not match the major(associated with the uf_rev_id)
count of each uf_rev_id can only be in the table a certain number of times (#RevsPerRevieweruf). Also, the uf_rev_id will be in the #temp_reviewers_uf table more than once with a different short_plan, it should only be looking at DISTINCT uf_rev_id when calculating the #RevPerRevieweruf.
The way it is written now, the counts are not consistent. One uf_rev_ID may have 122 records and another may have 50 - each distinct uf_rev_id should have the same count (or very close). I have researched and tried NTILE but I couldn't figure it out.
Any ideas of the best way to accomplish this?? Any input is appreciated.
-----Sample Data -----
CREATE TABLE #mult_nominees_uf(
appnum VARCHAR(8)
,major VARCHAR(8)
,compid INT
);
INSERT INTO #mult_nominees_uf(appnum,major,compid) VALUES ('00012345','ACT',2);
INSERT INTO #mult_nominees_uf(appnum,major,compid) VALUES ('10002343','BBC',2);
INSERT INTO #mult_nominees_uf(appnum,major,compid) VALUES ('10002777','BBC',2);
INSERT INTO #mult_nominees_uf(appnum,major,compid) VALUES ('10000023','DED',2);
INSERT INTO #mult_nominees_uf(appnum,major,compid) VALUES ('23457829','AAR',2);
INSERT INTO #mult_nominees_uf(appnum,major,compid) VALUES ('78954321','RRE',2);
INSERT INTO #mult_nominees_uf(appnum,major,compid) VALUES ('90002342','ACT',2);
INSERT INTO #mult_nominees_uf(appnum,major,compid) VALUES ('11156726','AAR',2);
INSERT INTO #mult_nominees_uf(appnum,major,compid) VALUES ('88855593','RRE',2);
INSERT INTO #mult_nominees_uf(appnum,major,compid) VALUES ('10000001','DED',2);
INSERT INTO #mult_nominees_uf(appnum,major,compid) VALUES ('20000393','ACT',2);
INSERT INTO #mult_nominees_uf(appnum,major,compid) VALUES ('11119999','DED',2);
INSERT INTO #mult_nominees_uf(appnum,major,compid) VALUES ('78927626','AAR',2);
INSERT INTO #mult_nominees_uf(appnum,major,compid) VALUES ('67589393','RRE',2);
INSERT INTO #mult_nominees_uf(appnum,major,compid) VALUES ('12453647','AAR',2);
INSERT INTO #mult_nominees_uf(appnum,major,compid) VALUES ('00012345','ACT',2);
INSERT INTO #mult_nominees_uf(appnum,major,compid) VALUES ('10002343','BBC',2);
INSERT INTO #mult_nominees_uf(appnum,major,compid) VALUES ('10002777','BBC',2);
INSERT INTO #mult_nominees_uf(appnum,major,compid) VALUES ('10000023','DED`',2);
INSERT INTO #mult_nominees_uf(appnum,major,compid) VALUES ('23457829','AAR',2);
--with this sample data the #RevsPerReviewerUF count would be 4 since A5 is listed twice and we only want distinct values used
CREATE TABLE #Temp_Reviewers_uf(
uf_rev_id VARCHAR(8)
,short_plan VARCHAR(8)
,fac_emplid INTEGER
);
INSERT INTO #Temp_Reviewers_uf(uf_rev_id,short_plan,fac_emplid) VALUES ('A1','ACT',00000012);
INSERT INTO #Temp_Reviewers_uf(uf_rev_id,short_plan,fac_emplid) VALUES ('A2','BBC',00000145);
INSERT INTO #Temp_Reviewers_uf(uf_rev_id,short_plan,fac_emplid) VALUES ('A3','DED',10002934);
INSERT INTO #Temp_Reviewers_uf(uf_rev_id,short_plan,fac_emplid) VALUES ('A5','RRE',90001223);
INSERT INTO #Temp_Reviewers_uf(uf_rev_id,short_plan,fac_emplid) VALUES ('A5','ACT',90001223);
----Stored procedure -
DECLARE #Index INT
DECLARE #Num_nomineesUF INT
DECLARE #Num_reviewersUF INT
DECLARE #Num_reviewersUFDISTINCT INT
DECLARE #Num_reviews INT
DECLARE #Rev_ID nvarchar(25), #Nom_ID varchar(8), #Short_Plan varchar(8), #Major varchar(8)
DECLARE #RevsPerReviewerUF INT
SET #Num_reviews = 4
DECLARE #actualCompID int
DECLARE #UF_Flag INT
DECLARE #InsertNum int
SET #InsertNum = 1
create table #mult_nominees_UF (appNumber varchar(8), Major varchar(8), comp_id INT)
create table #TempNomineeTable (uf_rev_id varchar(8), fac_emplid varchar(9), appNumber varchar(8), Major varchar(8), short_plan varchar(8), comp_id int)
create table #Temp_Reviewers_UF (uf_rev_id varchar(8), short_plan varchar(8), fac_emplid varchar(9)) -- temp table used to hold Nom_IDs already assigned to Rev_IDs
set #actualCompID = 21
-- * * SELECT APPLICATION NUMBER & MAJOR FROM FS_RESULTS TABLE * * * * * --
select appNumber, LEFT(Major, CHARINDEX('-', Major)-1) as Major, comp_id into #Delete_nomineesUF
from FS_Results
where UF='Y'
and comp_id = #actualCompID
and nominated=1;
SET #Num_nomineesUF = ##rowcount; --GET RECORD COUNT
IF (#Num_nomineesUF > 0)
BEGIN
SET #UF_Flag = 1;
END
SET #Index = 1 ; -- reinit variable
WHILE #Index <= 4 BEGIN
if (#UF_Flag > 0)
BEGIN
INSERT into #mult_nominees_uf
select * from #Delete_nomineesUF
END
-- Create temp table for UF Reviewers
select uf_rev_id, short_plan, fac_emplid into #temp_reviewers_UF
from ReviewersID_ShortPlan
where uf_rev_id like 'UF%'
and competition_id = #actualCompID
SET #Num_reviewersUF = ##rowcount
SELECT DISTINCT UF_REV_ID FROM ReviewersID_ShortPlan WHERE UF_REV_ID like 'UF%' AND competition_id = #actualCompID
SET #Num_reviewersUFDistinct = ##rowcount
SET #RevsPerReviewerUF = (#Num_nomineesUF * #Num_reviews) / nullif(#Num_reviewersUFDistinct,0)
WITH Match_NomineesWithReviewers AS(
SELECT DISTINCT
appNumber,
RTRIM(Major) AS Major,
COUNT(1) as rowcnt,
comp_id
FROM #mult_nominees_uf
GROUP BY appNumber,
RTRIM(Major),
comp_id
)
, rownum_matches AS (
SELECT m.appNumber,
m.Major,
t.short_plan,
t.uf_rev_id,
t.fac_emplid,
m.rowcnt,
m.comp_id,
ROW_NUMBER() OVER (PARTITION BY m.appNumber order by newid()) AS rownum
FROM Match_NomineesWithReviewers m
JOIN #temp_reviewers_UF t ON t.short_plan != m.major
GROUP BY m.appNumber, m.Major, t.short_plan,
t.uf_rev_id, t.fac_emplid, m.rowcnt, m.comp_id
HAVING COUNT(t.uf_rev_id) <= #RevsPerRevieweruf
)
INSERT INTO #TempNomineeTable
SELECT uf_rev_id, fac_emplid, appNumber, Major, short_plan, null, 0, null, comp_id FROM rownum_matches rm
WHERE rownum <= rowcnt
group by uf_rev_id, fac_emplid, appNumber, Major, short_plan, comp_id
HAVING COUNT(uf_rev_id) <= #RevsPerRevieweruf

project a sparse result at some level

I don't really know what to call this but it's not that hard to explain
Basically what I have is a result like this
Similarity ColumnA ColumnB ColumnC
1 SomeValue NULL SomeValue
2 NULL SomeB NULL
3 SomeValue NULL SomeC
4 SomeA NULL NULL
This result is created by matching a set of strings against another table. Each string also contains some values for these ColumnA..C which are the values I wan't to aggregate in some way.
Something like min/max works very well but I can't figure out how to get it to account for the highest similarity not just the min/max value. I don't really want the min/max, I want the first non-null value with the highest similarity.
Ideally the result would look like this
ColumnA ColumnB ColumnC
SomeA SomeB SomeC
I'd like be able to efficiently join in the temporary result to compute the rest and I've been exploring different options. Something which I've been considering is creating a SQL Server CLR aggregate the yields the "first" non-null value but I'm unsure if there's even such a thing as a first or last when running an aggregate on a result.
Okay, so I figured it out, I originally had trouble with the UPDATE FROM and JOIN not playing well together. I was counting on that the UPDATE would just occur multiple times and that would give me the correct results, however, there's no such guarantee from SQL Server (it's actually undefined behavior and alltough it appeared to work we'll have none of that) but since you can run UPDATE against a CTE I combined that with the OUTER APPLY to select the exactly 1 row to complement a missing value if possible.
Here's the whole thing with test data as well.
DECLARE #cost TABLE (
make nvarchar(100) not null,
model nvarchar(100),
a numeric(18,2),
b numeric(18,2)
);
INSERT #cost VALUES ('a%', null, 100, 2);
INSERT #cost VALUES ('a%', 'a%', 149, null);
INSERT #cost VALUES ('a%', 'ab', 349, null);
INSERT #cost VALUES ('b', null, null, 2.5);
INSERT #cost VALUES ('b', 'b%', 249, null);
INSERT #cost VALUES ('b', 'b', null, 3);
DECLARE #unit TABLE (
id int,
make nvarchar(100) not null,
model nvarchar(100)
);
INSERT #unit VALUES (1, 'a', null);
INSERT #unit VALUES (2, 'a', 'a');
INSERT #unit VALUES (3, 'a', 'ab');
INSERT #unit VALUES (4, 'b', null);
INSERT #unit VALUES (5, 'b', 'b');
DECLARE #tmp TABLE (
id int,
specificity int,
a numeric(18,2),
b numeric(18,2),
primary key(id, specificity)
);
INSERT #tmp
OUTPUT inserted.* --FOR DEBUGGING
SELECT
unit.id
, ROW_NUMBER() OVER (
PARTITION BY unit.id
ORDER BY cost.make DESC, cost.model DESC
) AS specificity
, cost.a
, cost.b
FROM #unit unit
INNER JOIN #cost cost ON unit.make LIKE cost.make
AND (cost.model IS NULL OR unit.model LIKE cost.model)
;
--fix the holes
WITH tmp AS (
SELECT *
FROM #tmp
WHERE specificity = 1
AND (a IS NULL OR b IS NULL) --where necessary
)
UPDATE tmp
SET
tmp.a = COALESCE(tmp.a, a.a)
, tmp.b = COALESCE(tmp.b, b.b)
OUTPUT inserted.* --FOR DEBUGGING
FROM tmp
OUTER APPLY (
SELECT TOP 1 a
FROM #tmp a
WHERE a.id = tmp.id
AND a.specificity > 1
AND a.a IS NOT NULL
ORDER BY a.specificity
) a
OUTER APPLY (
SELECT TOP 1 b
FROM #tmp b
WHERE b.id = tmp.id
AND b.specificity > 1
AND b.b IS NOT NULL
ORDER BY b.specificity
) b
;