SQL query to reconstruct inherited EAV model - sql

I have 5 tables in my database representing an inherited EAV model:
CREATE TABLE AttributeNames
("ID" int, "Name" varchar(8))
;
INSERT INTO AttributeNames
("ID", "Name")
VALUES
(1, 'Color'),
(2, 'FuelType'),
(3, 'Doors'),
(4, 'Price')
;
CREATE TABLE MasterCars
("ID" int, "Name" varchar(10))
;
INSERT INTO MasterCars
("ID", "Name")
VALUES
(5, 'BMW'),
(6, 'Audi'),
(7, 'Ford')
;
CREATE TABLE MasterCarAttributes
("ID" int, "AttributeNameId" int, "Value" varchar(10), "MasterCarId" int)
;
INSERT INTO MasterCarAttributes
("ID", "AttributeNameId", "Value", "MasterCarId")
VALUES
(100, 1, 'Red', 5),
(101, 2, 'Gas', 5),
(102, 3, '4', 5),
(102, 4, '$100K', 5),
(103, 1, 'Blue', 6),
(104, 2, 'Diesel', 6),
(105, 3, '3', 6),
(106, 4, '$80k', 6),
(107, 1, 'Green', 7),
(108, 2, 'Diesel', 7),
(109, 3, '5', 7),
(110, 4, '$60k', 7)
;
CREATE TABLE LocalCars
("ID" int, "MasterCarId" int)
;
INSERT INTO LocalCars
("ID", "MasterCarId")
VALUES
(8, '5'),
(9, '6'),
(10, NULL)
;
CREATE TABLE LocalCarAttributes
("ID" int, "AttributeNameId" int, "Value" varchar(6), "LocalCarId" int)
;
INSERT INTO LocalCarAttributes
("ID", "AttributeNameId", "Value", "LocalCarId")
VALUES
(43, 1, 'Yellow', 8),
(44, 3, '6', 9),
(45, 1, 'Red', 10),
(46, 2, 'Gas', 10),
(47, 3, '2', 10),
(48, 4, '$60k', 10)
;
I can retrieve all of master car attributes as follows:
SELECT MC.ID, MCA.AttributeNameId, MCA.Value
FROM MasterCars MC
left join MasterCarAttributes MCA on MC.ID = MCA.MasterCarId
order by MC.ID;
Likewise, I can retrieve all of the local car attributes as follows:
SELECT LC.ID, LCA.AttributeNameId, LCA.Value
FROM LocalCars LC
left join LocalCarAttributes LCA on LC.ID = LCA.LocalCarId
order by LC.ID;
If LocalCars.MasterCarId is not NULL, then that local car can inherit the attributes of that master car. A local car attribute with the same AttributeNameId overrides any master attribute with the same AttributeNameId.
So given the data above, I have 3 local cars each with 4 attributes (color, fuelType, doors, price). Inherited attribute values in bold:
Local Car Id = 1 (Yellow, Gas, 4, $100K)
Local Car Id = 2 (Blue, Diesel, 6, $80k)
Local Car Id = 3 (Red, Gas, 2, $60k)
I'm trying to find the necessary joins required to join the two queries above together to give a complete set of local cars attributes, some inherited:
LocalCarId AttributeNameId Value
------------------------------------------
1 1 Yellow
1 2 Gas
1 3 4
1 4 $100K
2 1 Blue
2 2 Diesel
2 3 6
2 4 $80K
3 1 Red
3 2 Gas
3 3 2
3 4 $60K
or possibly even:
LocalCarId AttributeNameId LocalValue MasterValue
-------------------------------------------------------------
1 1 Yellow Red
1 2 NULL Gas
1 3 NULL 4
1 4 NULL $100K
2 1 NULL Blue
2 2 NULL Diesel
2 3 6 3
2 4 NULL $80K
3 1 Red NULL
3 2 Gas NULL
3 3 2 NULL
3 4 $60K NULL

The problem can be solved by performing a union on all of your local car attributes and master car attributes. Each record is marked with an [IsMasterAttribute] flag. The next step is then use the ROW_NUMBER() window function to rank each of the duplicate attributes. The final step is to only select attributes which has a rank of 1.
;WITH CTE_CombinedAttributes
AS
(
SELECT 1 AS IsMasterAttribute
,LC.ID
,MC.ID AS MasterCarId
,MCA.AttributeNameId
,MCA.Value
FROM MasterCars MC
LEFT OUTER JOIN MasterCarAttributes MCA on MC.ID = MCA.MasterCarId
INNER JOIN LocalCars LC ON LC.MasterCarId = MC.ID
UNION ALL
SELECT 0 AS IsMasterAttribute
,LC.ID
,LC.MasterCarId
,LCA.AttributeNameId
,LCA.Value
FROM LocalCars LC
LEFT OUTER JOIN LocalCarAttributes LCA on LC.ID = LCA.LocalCarId
)
,
CTE_RankedAttributes
AS
(
SELECT [IsMasterAttribute]
,[ID]
,[AttributeNameId]
,[Value]
,ROW_NUMBER() OVER (PARTITION BY [ID], [AttributeNameId] ORDER BY [IsMasterAttribute]) AS [AttributeRank]
FROM CTE_CombinedAttributes
)
SELECT [IsMasterAttribute]
,[ID]
,[AttributeNameId]
,[Value]
FROM CTE_RankedAttributes
WHERE [AttributeRank] = 1
ORDER BY [ID]
The second output is also possible by performing a simple pivot on the final result:
;WITH CTE_CombinedAttributes
AS
(
SELECT 1 AS IsMasterAttribute
,LC.ID
,MC.ID AS MasterCarId
,MCA.AttributeNameId
,MCA.Value
FROM MasterCars MC
LEFT OUTER JOIN MasterCarAttributes MCA on MC.ID = MCA.MasterCarId
INNER JOIN LocalCars LC ON LC.MasterCarId = MC.ID
UNION ALL
SELECT 0 AS IsMasterAttribute
,LC.ID
,LC.MasterCarId
,LCA.AttributeNameId
,LCA.Value
FROM LocalCars LC
LEFT OUTER JOIN LocalCarAttributes LCA on LC.ID = LCA.LocalCarId
)
,
CTE_RankedAttributes
AS
(
SELECT [IsMasterAttribute]
,[ID]
,[AttributeNameId]
,[Value]
,ROW_NUMBER() OVER (PARTITION BY [ID], [AttributeNameId] ORDER BY [IsMasterAttribute]) AS [AttributeRank]
FROM CTE_CombinedAttributes
)
SELECT [ID]
,[AttributeNameId]
,MAX(
CASE [IsMasterAttribute]
WHEN 0 THEN [Value]
END
) AS LocalValue
,MAX(
CASE [IsMasterAttribute]
WHEN 1 THEN [Value]
END
) AS MasterValue
FROM CTE_RankedAttributes
GROUP BY [ID], [AttributeNameId]
ORDER BY [ID]

SQL Fiddle Demo
SELECT LC."ID" as LocalCarID,
COALESCE(LCA."AttributeNameId", MCA."AttributeNameId") as "AttributeNameId",
COALESCE(LCA."Value", MCA."Value") as "Value"
FROM LocalCars LC
LEFT JOIN MasterCars MC
ON LC."MasterCarId" = MC."ID"
LEFT JOIN MasterCarAttributes MCA
ON MC."ID" = MCA."MasterCarId"
LEFT JOIN LocalCarAttributes LCA
ON ( MCA."AttributeNameId" = LCA."AttributeNameId"
OR MCA."AttributeNameId" IS NULL)
-- This is the important part
-- Try to join with a MasterAtribute otherwise use the Car Atribute.
AND LC."ID" = LCA."ID"
OUTPUT
| LocalCarID | AttributeNameId | Value |
|------------|-----------------|--------|
| 1 | 1 | Blue |
| 1 | 2 | Gas |
| 2 | 1 | Green |
| 2 | 2 | Diesel |

Related

Multiple conditions on multiple columns

I have table that looks like this
WO | PS | C
----------------
12 | 1 | a
12 | 2 | b
12 | 2 | b
12 | 2 | c
13 | 1 | a
I want to find values from WO column where PS has value 1 and C value a AND PS has value 2 and C has value b. So on one column I need to have multiple conditions and I need to find it within WO column. If there is no value that matches two four conditions I don't want to have column WO included.
I tried using condition:
WHERE PS = 1 AND C = a AND PS = 2 AND C = b
but it does not work and does not have connection to WO column as mentioned above.
Edit:
I need to find WO which has (PS = 1 AND C = a) and at the same time it also has rows where (PS = 2 and C = b).
The result should be:
WO | PS | C
----------------
12 | 1 | a
12 | 2 | b
12 | 2 | b
If either of rows: (PS = 1 and C = a) or (PS = 2 and C = b) does not exist then nothing should be returned.
WHERE (PS = 1 AND C = a) or (PS = 2 AND C = b)
try this condition
As I understand this, you need two IN clauses or two EXIST clauses, something like this:
SELECT DISTINCT wo, ps, c
FROM yourtable
WHERE wo IN
(SELECT wo FROM yourtable WHERE ps = 1 and c = 'a')
AND wo IN
(SELECT wo FROM yourtable WHERE ps = 2 and c = 'b');
This will produce this outcome:
WO | PS | C
----------------
12 | 1 | a
12 | 2 | b
12 | 2 | c
Please note that in the last row of the result, the column C has value c instead of b as you have shown in your question. I guess this was your mistake when creating the sample outcome?
If I understand your question incorrect, please let me know and explain what's wrong, then I would review it.
Edit: To create the same result as shown in your question, this query would do:
SELECT wo, ps, c
FROM yourtable
WHERE ps IN (1,2) AND c IN ('a','b')
AND wo IN
(SELECT wo FROM yourtable WHERE ps = 1 and c = 'a')
AND wo IN
(SELECT wo FROM yourtable WHERE ps = 2 and c = 'b');
But I really don't believe this is what you were looking for ;)
Try out: db<>fiddle
I think you can make use of an exists criteria here to filter your rows correctly, I would like to see a wider sample data set to be sure though.
select *
from t
where ps in (1,2) and C in ('a','b')
and exists (
select * from t t2 where t2.WO = t.WO
and t2.PS != t.PS and t2.C != t.C
);
Just to throw in one more solution, you can do this with a single reference to your table, but this may not necessarily mean that it is more efficient. The first part is to filter based on the combinations you want:
DECLARE #T TABLE (WO INT, PS INT, C CHAR(1))
INSERT #T (WO, PS, C)
VALUES (12, 1, 'a'), (12, 2, 'b'), (12, 2, 'b'), (12, 2, 'c'), (13, 1, 'a');
SELECT *
FROM #T AS t
WHERE (t.PS = 1 AND t.C = 'a')
OR (t.PS = 2 AND t.C = 'B');
WO
PS
C
12
1
a
12
2
b
12
2
b
13
1
a
But you want to exclude WO 13 because this doesn't have both combinations, so what we ideally need is a count distinct of WS and C to find those with a distinct count of 2. You can't do COUNT(DISTINCT ..) in a windowed function directly, but you can do this indirectly with DENSE_RANK():
DECLARE #T TABLE (WO INT, PS INT, C CHAR(1))
INSERT INTO #T (WO, PS, C)
VALUES (12, 1, 'a'), (12, 2, 'b'), (12, 2, 'b'), (12, 2, 'c'), (13, 1, 'a');
SELECT *,
CntDistinct = DENSE_RANK() OVER(PARTITION BY t.WO ORDER BY t.PS, t.C) +
DENSE_RANK() OVER(PARTITION BY t.WO ORDER BY t.PS DESC, t.C DESC) - 1
FROM #T AS t
WHERE (t.PS = 1 AND t.C = 'a')
OR (t.PS = 2 AND t.C = 'B');
Which gives:
WO
PS
C
CntDistinct
12
1
a
2
12
2
b
2
12
2
b
2
13
1
a
1
You can then put this in a subquery and chose only the rows with a count of 2:
DECLARE #T TABLE (WO INT, PS INT, C CHAR(1))
INSERT INTO #T (WO, PS, C)
VALUES (12, 1, 'a'), (12, 2, 'b'), (12, 2, 'b'), (12, 2, 'c'), (13, 1, 'a');
SELECT t.WO, t.PS, t.C
FROM ( SELECT t.*,
CntDistinct = DENSE_RANK() OVER(PARTITION BY t.WO ORDER BY t.PS, t.C) +
DENSE_RANK() OVER(PARTITION BY t.WO ORDER BY t.PS DESC, t.C DESC) - 1
FROM #T AS t
WHERE (t.PS = 1 AND t.C = 'a')
OR (t.PS = 2 AND t.C = 'B')
) AS t
WHERE t.CntDistinct = 2;
Finally, if the combinations are likely change, or are a lot more than 2, you may find building a table of the combinations you are looking for a more maintainable solution:
DECLARE #T TABLE (WO INT, PS INT, C CHAR(1))
INSERT INTO #T (WO, PS, C)
VALUES (12, 1, 'a'), (12, 2, 'b'), (12, 2, 'b'), (12, 2, 'c'), (13, 1, 'a');
DECLARE #Combinations TABLE (PS INT, C CHAR(1), PRIMARY KEY (PS, C));
INSERT #Combinations(PS, C)
VALUES (1, 'a'), (2, 'b');
SELECT t.WO, t.PS, t.C
FROM ( SELECT t.*,
CntDistinct = DENSE_RANK() OVER(PARTITION BY t.WO ORDER BY t.PS, t.C) +
DENSE_RANK() OVER(PARTITION BY t.WO ORDER BY t.PS DESC, t.C DESC) - 1
FROM #T AS t
INNER JOIN #Combinations AS c
ON c.PS = t.PS
AND c.C = t.C
) AS t
WHERE t.CntDistinct = (SELECT COUNT(*) FROM #Combinations);
Let's chat about demo data. You provided some useful data that helps us see what your problem is, but no DDL. If you provide your demo data similar to this, it makes it easier for us to understand the issue:
DECLARE #table TABLE (WO INT, PS INT, C NVARCHAR(10))
INSERT INTO #table (WO, PS, C) VALUES
(12, 1, 'a'), (12, 2, 'b'),
(12, 2, 'b'), (12, 2, 'c'),
(13, 1, 'a')
Now on to your question. It looks to me like you just need a composite conditions and that one of them needs to evaluate to fully true. Consider this:
SELECT *
FROM #table
WHERE (
PS = 1
AND C = 'a'
)
OR (
PS = 2
AND C = 'b'
)
The predicates wrapped in the parens are evaluated as a whole in the WHERE clause. If one of the predicates is false, the whole thing is. If either composite evaluates to true, we return the row.
WO PS C
---------
12 1 a
12 2 b
12 2 b
13 1 a
This result set does include WO 13, as by your definition it should be there. I don't know if there are additional things you wanted to evaluate which may exclude it, but it does have a PS of 1 and a C of a.
Edit:
if the question is as discussed in the comments that a single WO must contain BOTH then this may be the answer:
SELECT *
FROM #table t
INNER JOIN (
SELECT t1.WO
FROM #table t1
INNER JOIN #table t2
ON t1.WO = t2.WO
WHERE t1.PS = 1
AND t1.C = 'a'
AND t2.PS = 2
AND t2.C = 'b'
GROUP BY t1.WO
) a
ON t.WO = a.WO
WHERE (
t.PS = 1
AND t.C = 'a'
)
OR (
t.PS = 2
AND t.C = 'b'
)
WO PS C WO
--------------
12 1 a 12
12 2 b 12
12 2 b 12

SQL View with count from multiple tables

I have a nodejs app with SQL Server. I want to be able to have a View where I can get a tally of number of users, projects, tasks for multiple organization. Let's say I have 4 tables as follows:
TABLES:
Organization: orgId(PK), orgName
Project: projId(PK), orgId(FK), projName
Tasks: taskId(PK), projId(FK), taskName
Users: userId(PK), orgId(FK), userName
VIEW:
OrganizationStats: numberOfProjects, numberOfUsers, numberOfTasks orgId(FK)
It was suggested to use something like this:
CREATE VIEW dbo.vw_OrganisationStats AS
SELECT {Columns from OrganizationStats},
P.Projects AS NumProjects
FROM dbo.OrganizationStats OS
CROSS APPLY (SELECT COUNT(*) AS Projects
FROM dbo.Projects P
WHERE P.OrgID = OS.OrgID) P;
My problem is I am having problem adding the count for Tasks and Users in addition to the Project within the same View. Any help would be appreciate it.
**Sample Data:**
* **Organization**: orgId(PK), orgName
1, ACME Inc.
2, Walmart Inc.
3, YoYo Inc.
* **Project**: projId(PK), orgId(FK), projName
1, 1, ACME Project 1
2, 1, ACME Project 2
3, 2, Walmart Project 1
4, 2, Walmart Project 2
5, 2, Walmart Project 3
* **Tasks**: taskId(PK), projId(FK), taskName
1, 1, Task 1 for Acme Project 1
2, 1, Task 2 for Acme Project 1
3, 4, Task 1 for Walmart Project 2
* **Users**: userId(PK), orgId(FK), userName
1, 1, Bob
2, 1, Alex
3, 1, Jim
4, 2, John
Expected Result
VIEW:
* **OrganizationStats**: numberOfProjects, numberOfUsers, numberOfTasks orgId(FK)
2, 3, 2, 1
3, 1, 1, 2
0, 0 ,0, 3
Consider:
select o.orgid,
count(distinct p.projid) as cnt_projects,
count(distinct u.userid) as cnt_users,
count(*) cnt_tasks
from organization o
inner join projects p on p.orgid = o.orgid
inner join users u on u.orgid = o.orgid
inner join tasks t on t.projid = p.projid and t.userid = u.userid
group by o.orgid
SELECT O.orgId
,COUNT(DISTINCT U.userId) AS 'numberOfUsers'
,COUNT(DISTINCT P.projId) AS 'numberOfProjects'
,COUNT(DISTINCT T.taskId) AS 'numberOfTasks'
FROM Organization O
LEFT JOIN Users U ON O.orgId = U.orgId
LEFT JOIN Project P ON O.orgId = P.orgId
LEFT JOIN Tasks T ON P.projId = T.projId
GROUP BY O.orgId
SCRIPT USED:
CREATE TABLE Organization (orgId INT , orgName VARCHAR(20))
CREATE TABLE Project(projId INT, orgId INT, projName VARCHAR(20))
CREATE TABLE Tasks (taskId INT, projId INT, taskName VARCHAR(20))
CREATE TABLE Users (userId INT, orgId INT, userName VARCHAR(20))
INSERT INTO Organization VALUES (1, 'ORG 1'), (2, 'ORG 2')
INSERT INTO Organization VALUES (3, 'ORG 3')
INSERT INTO Project VALUES (1,1, 'PRO 11'), (2,2, 'PRO 21'), (3,2, 'PRO 22')
INSERT INTO Tasks VALUES (1, 1, 'TASK 11'), (2, 1, 'TASK 21'), (3, 2, 'TASK 32'), (4, 2, 'TASK 42'), (5, 2, 'TASK 52')
INSERT INTO Users VALUES (1, 1, 'USER 11'), (2, 1, 'USER 12'), (3, 2, 'USER 21')
RESULTS :
+-------+---------------+------------------+---------------+
| orgId | numberOfUsers | numberOfProjects | numberOfTasks |
+-------+---------------+------------------+---------------+
| 1 | 2 | 1 | 2 |
+-------+---------------+------------------+---------------+
| 2 | 1 | 2 | 3 |
+-------+---------------+------------------+---------------+
| 3 | 0 | 0 | 0 |
+-------+---------------+------------------+---------------+

Conditionally select value from one of two tables

I have a questionnaire application, where a user will submit answers. Some of the questions are text based and some have fixed options.
The values are saved to the tAnswers table as either the entered text value, or the Id of the selected option. There is a QuestionTypeId column which defines if the answer is a reference to tOptions.Id.
I want to select the answers, returning the entered value or the value related to the selected Id.
For example;
SET NOCOUNT ON
DECLARE #tSubmissions TABLE (Id INT)
DECLARE #tSubmissionQuestions TABLE (SubmissionId INT, QuestionId INT)
DECLARE #tQuestions TABLE (Id INT, QuestionText NVARCHAR(MAX), ColName NVARCHAR(MAX), QuestionTypeId INT)
DECLARE #tOptions TABLE (Id INT, OptionValue NVARCHAR(MAX), OptionGroupId INT)
DECLARE #tAnswers TABLE (Id INT IDENTITY(1,1), SubmissionId INT, QuestionId INT, AnswerValue NVARCHAR(MAX))
INSERT INTO #tQuestions VALUES (1, 'What is your name?', 'Name', 1)
INSERT INTO #tQuestions VALUES (2, 'What is your age?', 'Age', 1)
INSERT INTO #tQuestions VALUES (3, 'What is your gender?', 'Gender', 2)
INSERT INTO #tQuestions VALUES (4, 'What is your favourite colour?', 'Colour', 2)
-- Answers for question 3 - gender
INSERT INTO #tOptions VALUES (1, 'Male', 1)
INSERT INTO #tOptions VALUES (2, 'Female', 1)
-- answers for question 4 - colour
INSERT INTO #tOptions VALUES (3, 'Blue', 2)
INSERT INTO #tOptions VALUES (4, 'Green', 2)
INSERT INTO #tOptions VALUES (5, 'Red', 2)
INSERT INTO #tOptions VALUES (6, 'Yellow', 2)
INSERT INTO #tSubmissions VALUES (1)
INSERT INTO #tSubmissions VALUES (2)
INSERT INTO #tSubmissions VALUES (3)
INSERT INTO #tSubmissionQuestions VALUES (1, 1)
INSERT INTO #tSubmissionQuestions VALUES (1, 2)
INSERT INTO #tSubmissionQuestions VALUES (1, 3)
INSERT INTO #tSubmissionQuestions VALUES (1, 4)
INSERT INTO #tSubmissionQuestions VALUES (2, 1)
INSERT INTO #tSubmissionQuestions VALUES (2, 2)
INSERT INTO #tSubmissionQuestions VALUES (2, 3)
INSERT INTO #tSubmissionQuestions VALUES (2, 4)
INSERT INTO #tSubmissionQuestions VALUES (3, 1)
INSERT INTO #tSubmissionQuestions VALUES (3, 2)
INSERT INTO #tSubmissionQuestions VALUES (3, 3)
INSERT INTO #tSubmissionQuestions VALUES (3, 4)
-- form submissions
INSERT INTO #tAnswers (SubmissionId, QuestionId, AnswerValue) VALUES (1, 1, 'Tony Stark')
INSERT INTO #tAnswers (SubmissionId, QuestionId, AnswerValue) VALUES (1, 2, '39')
INSERT INTO #tAnswers (SubmissionId, QuestionId, AnswerValue) VALUES (1, 3, '1') -- reference to #tOptions
INSERT INTO #tAnswers (SubmissionId, QuestionId, AnswerValue) VALUES (1, 4, '5') -- reference to #tOptions
INSERT INTO #tAnswers (SubmissionId, QuestionId, AnswerValue) VALUES (2, 1, 'Pepper Potts')
INSERT INTO #tAnswers (SubmissionId, QuestionId, AnswerValue) VALUES (2, 2, '38')
INSERT INTO #tAnswers (SubmissionId, QuestionId, AnswerValue) VALUES (2, 3, '2') -- reference to #tOptions
INSERT INTO #tAnswers (SubmissionId, QuestionId, AnswerValue) VALUES (2, 4, '6') -- reference to #tOptions
INSERT INTO #tAnswers (SubmissionId, QuestionId, AnswerValue) VALUES (3, 1, 'James Rhodes')
INSERT INTO #tAnswers (SubmissionId, QuestionId, AnswerValue) VALUES (3, 2, '41') -- has choosen to not answer question 3
INSERT INTO #tAnswers (SubmissionId, QuestionId, AnswerValue) VALUES (3, 4, '3') -- reference to #tOptions
SELECT
s.Id as SubmissionId, q.Id as QuestionId, a.AnswerValue
FROM
#tSubmissions s
INNER JOIN #tSubmissionQuestions sq
ON sq.SubmissionId = s.Id
INNER JOIN #tQuestions q
ON q.Id = sq.QuestionId
LEFT JOIN #tAnswers a
ON a.QuestionId = q.Id
AND a.SubmissionId = s.Id
DBFiddle
This returns;
SubmissionId | QuestionId | AnswerValue
=============|============|===============
1 | 1 | Tony Stark
1 | 2 | 39
1 | 3 | 1 <-- this is the Id of the selected option
1 | 4 | 5 <-- this is the Id of the selected option
2 | 1 | Pepper Potts
2 | 2 | 38
2 | 3 | 2 <-- this is the Id of the selected option
2 | 4 | 6 <-- this is the Id of the selected option
3 | 1 | James Rhodes
3 | 2 | 41
3 | 3 | NULL <-- the option was not selected
3 | 4 | 3 <-- this is the Id of the option
Instead I would like;
SubmissionId | QuestionId | AnswerValue
=============|============|===============
1 | 1 | Tony Stark
1 | 2 | 39
1 | 3 | Male <-- this is the value of the selected option
1 | 4 | Red <-- this is the value of the selected option
2 | 1 | Pepper Potts
2 | 2 | 38
2 | 3 | Female <-- this is the value of the selected option
2 | 4 | Yellow <-- this is the value of the selected option
3 | 1 | James Rhodes
3 | 2 | 41
3 | 3 | NULL <-- the option was not selected
3 | 4 | Blue <-- this is the value of the selected option
How do I conditionally pull values from the tOptions table?
I guess this is what you're looking for:
Another LEFT JOIN on tOptions to select the values, in case of QuestionTypeId = 2
I just added the ISNUMERIC to avoid conversion errors.
SELECT
s.Id as SubmissionId,
q.Id as QuestionId,
COALESCE(t.OptionValue,a.AnswerValue) AS AnswerValue
FROM
#tSubmissions s
INNER JOIN #tSubmissionQuestions sq
ON sq.SubmissionId = s.Id
INNER JOIN #tQuestions q
ON q.Id = sq.QuestionId
LEFT JOIN #tAnswers a
ON a.QuestionId = q.Id
AND a.SubmissionId = s.Id
LEFT JOIN #tOptions t
ON q.QuestionTypeId = 2
AND ISNUMERIC(a.AnswerValue) = 1
AND a.AnswerValue = t.Id
I would make two columns in the Answers table. One that you have AnswerValue NVARCHAR(MAX) NULL and another one AnswerOptionID int NULL. It would make joining way more efficient and it would eliminate problems when engine tries to convert text "Tony Stark" into integer.
But, given the schema as is, here is one variant.
I added LEFT JOIN to the #tOptions table. Note, that I'm converting integer IDs into text, not other way around.
SELECT
s.Id as SubmissionId, q.Id as QuestionId
-- , a.AnswerValue, Options.OptionValue
,CASE WHEN q.QuestionTypeId = 2
THEN Options.OptionValue
ELSE a.AnswerValue
END AS AnswerText
FROM
#tSubmissions s
INNER JOIN #tSubmissionQuestions sq ON sq.SubmissionId = s.Id
INNER JOIN #tQuestions q ON q.Id = sq.QuestionId
LEFT JOIN #tAnswers a
ON a.QuestionId = q.Id
AND a.SubmissionId = s.Id
LEFT JOIN #tOptions AS Options
ON q.QuestionTypeId = 2
AND a.AnswerValue = CAST(Options.Id AS NVARCHAR(MAX))
;
Please try this.
SELECT
s.Id as SubmissionId, q.Id as QuestionId,
CASE WHEN q.QuestionTypeId = 1 THEN
a.AnswerValue
ELSE
ISNULL((SELECT CONVERT(VARCHAR(100),OptionValue) FROM #tOptions o WHERE o.Id = a.AnswerValue),a.AnswerValue)
END AS AnswerValue
FROM
#tSubmissions s
INNER JOIN #tSubmissionQuestions sq
ON sq.SubmissionId = s.Id
INNER JOIN #tQuestions q
ON q.Id = sq.QuestionId
LEFT JOIN #tAnswers a
ON a.QuestionId = q.Id
AND a.SubmissionId = s.Id
ORDER BY s.Id ASC

Create a table with maximum values out of 2 combined tables in SQL Server

Currently running into a problem with SQL Server (SSMS 17.4), I have combined 2 tables with different columns into 1 table, making use of the unique ID done in Abc_ID.
Table 1
Abc_ID Color Value
-----------------------
1 1
2 a -0.5
2 b 0
2 c -0.1
2 d 0
2 e 0
2 f 0
2 g 1
2 h 3
2 i -5
3 a -0.9
4 a -.023
5 a 0
5 b 7.548
5 c -0.8774
6 a 1
6 b 0.5
6 c 0
7 a 2.1
7 b -1
7 c -2.5
8 a -1.1
8 b 5
Table 2
Abc_ID ProductLine Name
----------------------------
1 prod1 INTERCEPT
2 prod1 BASE
3 prod1 RawCost
4 prod1 FEAT1
5 prod1 FEAT2
6 prod1 FEAT3
7 prod1 FEAT4
8 prod1 FEAT5
Table 1 just returns the material (Abc_ID), the color and the value, and table 2 returns the productline and the name per material.
The merged table now looks like
Abc_ID,Color, Value, Productline, name
Query:
select
ah.Abc_ID, ah.Color, ah.value, ad.ProductLine, ad.name
From
[dbo].[table1] ah
Join
[dbo].[tl_table2] ad on ah.Abc_ID = ad.Abc_ID
Now, I would like to calculate the MAX value of the color (as a material can have different colors, only 1 color per material is the most expensive)
where
(ah.Abc_ID, ah.value) in (select ah.Abc_ID, max(ah.value)
from [dbo].[table1]
group by [dbo].[table1].Abc_ID)
But that code throws an error:
An expression of non-boolean type specified in a context where a condition is expected, near ','.
What am I doing wrong here?
Expected result should be 1 max value per Abc_ID
Abc_ID Color Value ProductLine Name
------------------------------------------
1 1 prod1 INTERCEPT
2 h 3 prod1 BASE
3 a -0.9 prod1 RawCost
4 a -0.023 prod1 FEAT1
5 b 7.584 prod1 FEAT2
6 a 1 prod1 FEAT3
7 a 2.1 prod1 FEAT4
8 b 5 prod1 FEAT5
Sql Server does not supports this syntax where (ah.Abc_ID, ah.value) in
From your sample data and expected outcome this might do it for you
declare #table1 table (Abc_ID int, Color varchar(1), Value decimal(16,2))
declare #table2 table (Abc_ID int, ProductLine varchar(10), Name varchar(10))
insert into #table1 (Abc_ID, Color, Value)
values (1, null, 1), (2, 'a', -0.5), (2, 'b', 0), (2, 'c', -0.1), (2, 'd', 0), (2, 'e', 0), (2, 'f', 0), (2, 'g', 1), (2, 'h', 3),
(2, 'i', -5), (3, 'a', -0.9), (4, 'a', -0.023), (5, 'a', 0), (5, 'b', 7.548), (5, 'c', -0.8774), (6, 'a', 1), (6, 'b', 0.5),
(6, 'c', 0), (7, 'a', 2.1), (7, 'b', -1), (7, 'c', -2.5), (8, 'a', -1.1), (8, 'b', 5)
insert into #table2 (Abc_ID, ProductLine, Name)
values (1, 'prod1', 'INTERCEPT'), (2, 'prod1', 'BASE'), (3, 'prod1', 'RawCost'), (4, 'prod1', 'FEAT1'),
(5, 'prod1', 'FEAT2'), (6, 'prod1', 'FEAT3'), (7, 'prod1', 'FEAT4'), (8, 'prod1', 'FEAT5 ')
select ah.Abc_ID,
(select top 1 ah2.Color from #table1 ah2 where ah2.Abc_ID = ah.Abc_ID order by ah2.Value desc) as Color,
max(Value) as Value,
ad.ProductLine,
ad.Name
from #table1 ah
left join #table2 ad on ah.Abc_ID = ad.Abc_ID
group by ah.Abc_ID, ad.ProductLine, ad.Name
This produces this result :
Abc_ID Color Value ProductLine Name
------ ----- ----- ----------- ----
1 null 1 prod1 INTERCEPT
2 h 3 prod1 BASE
3 a -0,9 prod1 RawCost
4 a -0,023 prod1 FEAT1
5 b 7,548 prod1 FEAT2
6 a 1 prod1 FEAT3
7 a 2,1 prod1 FEAT4
8 b 5 prod1 FEAT5
You can also test it yourself and fiddle with this here http://sqlfiddle.com/#!18/a90f0/1
SQL Server doesn't support multiple columns in an IN filter. You should use EXISTS instead.
Change
where
(ah.Abc_ID, ah.value) in (select ah.Abc_ID, max(ah.value)
from [dbo].[table1]
group by [dbo].[table1].Abc_ID)
for
where
EXISTS (
select
'there is a match'
from
[dbo].[table1] AS T
group by
T.Abc_ID
HAVING
T.Abc_ID = ah.Abc_ID AND
max(T.value) = ah.value)

Merging groups of interval data - SQL Server

I have two sets of interval data I.E.
Start End Type1 Type2
0 2 L NULL
2 5 L NULL
5 7 L NULL
7 10 L NULL
2 3 NULL S
3 5 NULL S
5 8 NULL S
11 12 NULL S
What I'd like to do is merge these sets into one. This seems possible by utilising an islands and gaps solution but due to the non-continuous nature of the intervals I'm not sure how to go about applying it... The output I'm expecting would be:
Start End Type1 Type2
0 2 L NULL
2 3 L S
3 5 L S
5 7 L S
7 8 L S
8 10 L NULL
11 12 NULL S
Anyone out there done something like this before??? Thanks!
Create script below:
CREATE TABLE Table1
([Start] int, [End] int, [Type1] varchar(4), [Type2] varchar(4))
;
INSERT INTO Table1
([Start], [End], [Type1], [Type2])
VALUES
(0, 2, 'L', NULL),
(2, 3, NULL, 'S'),
(2, 5, 'L', NULL),
(3, 5, NULL, 'S'),
(5, 7, 'L', NULL),
(5, 8, NULL, 'S'),
(7, 10, 'L', NULL),
(11, 12, NULL, 'S')
;
I assume that Start is inclusive, End is exclusive and given intervals do not overlap.
CTE_Number is a table of numbers. Here it is generated on the fly. I have it as a permanent table in my database.
CTE_T1 and CTE_T2 expand each interval into the corresponding number of rows using a table of numbers. For example, interval [2,5) generates rows with Values
2
3
4
This is done twice: for Type1 and Type2.
Results for Type1 and Type2 are FULL JOINed together on Value.
Finally, a gaps-and-islands pass groups/collapses intervals back.
Run the query step-by-step, CTE-by-CTE and examine intermediate results to understand how it works.
Sample data
I added few rows to illustrate a case when there is a gap between values.
DECLARE #Table1 TABLE
([Start] int, [End] int, [Type1] varchar(4), [Type2] varchar(4))
;
INSERT INTO #Table1 ([Start], [End], [Type1], [Type2]) VALUES
( 0, 2, 'L', NULL),
( 2, 3, NULL, 'S'),
( 2, 5, 'L', NULL),
( 3, 5, NULL, 'S'),
( 5, 7, 'L', NULL),
( 5, 8, NULL, 'S'),
( 7, 10, 'L', NULL),
(11, 12, NULL, 'S'),
(15, 20, 'L', NULL),
(15, 20, NULL, 'S');
Query
WITH
e1(n) AS
(
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
) -- 10
,e2(n) AS (SELECT 1 FROM e1 CROSS JOIN e1 AS b) -- 10*10
,e3(n) AS (SELECT 1 FROM e1 CROSS JOIN e2) -- 10*100
,CTE_Numbers
AS
(
SELECT ROW_NUMBER() OVER (ORDER BY n) AS Number
FROM e3
)
,CTE_T1
AS
(
SELECT
T1.[Start] + CA.Number - 1 AS Value
,T1.Type1
FROM
#Table1 AS T1
CROSS APPLY
(
SELECT TOP(T1.[End] - T1.[Start]) CTE_Numbers.Number
FROM CTE_Numbers
ORDER BY CTE_Numbers.Number
) AS CA
WHERE
T1.Type1 IS NOT NULL
)
,CTE_T2
AS
(
SELECT
T2.[Start] + CA.Number - 1 AS Value
,T2.Type2
FROM
#Table1 AS T2
CROSS APPLY
(
SELECT TOP(T2.[End] - T2.[Start]) CTE_Numbers.Number
FROM CTE_Numbers
ORDER BY CTE_Numbers.Number
) AS CA
WHERE
T2.Type2 IS NOT NULL
)
,CTE_Values
AS
(
SELECT
ISNULL(CTE_T1.Value, CTE_T2.Value) AS Value
,CTE_T1.Type1
,CTE_T2.Type2
,ROW_NUMBER() OVER (ORDER BY ISNULL(CTE_T1.Value, CTE_T2.Value)) AS rn
FROM
CTE_T1
FULL JOIN CTE_T2 ON CTE_T2.Value = CTE_T1.Value
)
,CTE_Groups
AS
(
SELECT
Value
,Type1
,Type2
,rn
,ROW_NUMBER() OVER
(PARTITION BY rn - Value, Type1, Type2 ORDER BY Value) AS rn2
FROM CTE_Values
)
SELECT
MIN(Value) AS [Start]
,MAX(Value) + 1 AS [End]
,Type1
,Type2
FROM CTE_Groups
GROUP BY rn-rn2, Type1, Type2
ORDER BY [Start];
Result
+-------+-----+-------+-------+
| Start | End | Type1 | Type2 |
+-------+-----+-------+-------+
| 0 | 2 | L | NULL |
| 2 | 8 | L | S |
| 8 | 10 | L | NULL |
| 11 | 12 | NULL | S |
| 15 | 20 | L | S |
+-------+-----+-------+-------+
A step-by-step way is:
-- Finding all break points
;WITH breaks AS (
SELECT Start
FROM yourTable
UNION
SELECT [End]
FROM yourTable
) -- Finding Possible Ends
, ends AS (
SELECT Start
, (SELECT Min([End]) FROM yourTable WHERE yourTable.Start = breaks.Start) End1
, (SELECT Max([End]) FROM yourTable WHERE yourTable.Start < breaks.Start) End2
FROM breaks
) -- Finding periods
, periods AS (
SELECT Start,
CASE
WHEN End1 > End2 And End2 > Start THEN End2
WHEN End1 IS NULL THEN End2
ELSE End1
END [End]
FROM Ends
WHERE NOT(End1 IS NULL AND Start = End2)
) -- Generating results
SELECT p.Start, p.[End], Max(Type1) Type1, Max(Type2) Type2
FROM periods p, yourTable t
WHERE p.start >= t.Start AND p.[End] <= t.[End]
GROUP BY p.Start, p.[End];
In above query some situations may not fit at analyzing all of them, you can improve it as you want ;).
First getting all the numbers of start and end via a Union.
Then joining those numbers on both the 'L' and 'S' records.
Uses a table variable for the test.
DECLARE #Table1 TABLE (Start int, [End] int, Type1 varchar(4), Type2 varchar(4));
INSERT INTO #Table1 (Start, [End], Type1, Type2)
VALUES (0, 2, 'L', NULL),(2, 3, NULL, 'S'),(2, 5, 'L', NULL),(3, 5, NULL, 'S'),
(5, 7, 'L', NULL),(5, 8, NULL, 'S'),(7, 10, 'L', NULL),(11, 12, NULL, 'S');
select
n.Num as Start,
(case when s.[End] is null or l.[End] <= s.[End] then l.[End] else s.[End] end) as [End],
l.Type1,
s.Type2
from
(select Start as Num from #Table1 union select [End] from #Table1) n
left join #Table1 l on (n.Num >= l.Start and n.Num < l.[End] and l.Type1 = 'L')
left join #Table1 s on (n.Num >= s.Start and n.Num < s.[End] and s.Type2 = 'S')
where (l.Start is not null or s.Start is not null)
order by Start, [End];
Output:
Start End Type1 Type2
0 2 L NULL
2 3 L S
3 5 L S
5 7 L S
7 8 L S
8 10 L NULL
11 12 NULL S