Aggregate SQL query across multiple tables by grouping - sql

I want to group the record from the multiple tables.
Sample data:
create table UserTable (
Id integer not null,
Name varchar(12) not null
);
insert into UserTable values (1, 'A B');
insert into UserTable values (2, 'A C');
insert into UserTable values (3, 'A C A C');
insert into UserTable values (4, 'A C C');
insert into UserTable values (5, 'A C B');
insert into UserTable values (6, 'A C C');
insert into UserTable values (7, 'A C D');
insert into UserTable values (8, 'A C E');
insert into UserTable values (9, 'A C F');
create table LogTable (
LogId integer not null,
Username varchar(12) not null,
Event varchar(12) not null
);
insert into LogTable values (1, 'A C A C', 'Read');
insert into LogTable values (2, 'A C F', 'Write');
insert into LogTable values (3, 'A C F', 'Read');
insert into LogTable values (4, 'A C C', 'Update');
insert into LogTable values (5,'A C C', 'Read');
insert into LogTable values (6,'A C F', 'Read');
insert into LogTable values (7,'A C F', 'Update');
insert into LogTable values (7,'A C F', 'Write');
insert into LogTable values (7,'A C E','Update');
insert into LogTable values (7,'A C F', 'Delete');
insert into LogTable values (10,'A C B', 'Delete');
insert into LogTable values (11, 'A C F','Copy');
insert into LogTable values (12, 'A C B','Read');
insert into LogTable values (13, 'A C F','Update');
insert into LogTable values (14, 'A C F','Copy');
insert into LogTable values (15, 'A C F','Read');
insert into LogTable values (16, 'A C F','Update');
insert into LogTable values (17, 'A C F','Copy');
insert into LogTable values (18, 'A C C','Read');
insert into LogTable values (19, 'A C D','Update');
create table Activity (
Id integer not null,
ActivityType varchar(12) not null,
UserId varchar(12) not null
);
insert into Activity values (1, 'Videos', 8);
insert into Activity values (2, 'Text', 7);
insert into Activity values (3, 'Page', 7);
insert into Activity values (4, 'Text', 7);
insert into Activity values (5, 'Text', 9);
insert into Activity values (6, 'Chat', 8);
insert into Activity values (7, 'Chat', 5);
insert into Activity values (7, 'File', 8);
insert into Activity values (7, 'Videos', 1);
insert into Activity values (7, 'Text', 4);
insert into Activity values (10, 'Image', 4);
insert into Activity values (11, 'Image', 6);
insert into Activity values (12, 'Chat', 3);
insert into Activity values (13, 'Chat', 2);
insert into Activity values (14, 'Page', 1);
insert into Activity values (15, 'Vidoes',1);
insert into Activity values (16, 'Vidoes',6);
insert into Activity values (17, 'Vidoes',5);
insert into Activity values (18, 'Vidoes',5);
insert into Activity values (19, 'Chat', 5);
What I have tried:
SELECT UT.Id,UT.Name,
SUM(CASE
WHEN LT.Event = 'Read' THEN 1
ELSE 0 END
) AS [USER READ],
SUM(CASE
WHEN LT.Event = 'Delete' THEN 1
ELSE 0 END
) AS [USER DELETE],
SUM(CASE
WHEN AC.ActivityType = 'Videos' THEN 1
WHEN AC.ActivityType = 'Text' THEN 1
WHEN AC.ActivityType = 'Page' THEN 1
WHEN AC.ActivityType = 'Image' THEN 1
ELSE 0 END
) AS [LEARNING ACTIVITY],
SUM(CASE WHEN AC.ActivityType = 'Chat' THEN 1 ELSE 0 END) AS [Chat]
FROM UserTable UT
LEFT JOIN Activity AC ON UT.Id = AC.UserId
LEFT JOIN LogTable LT ON LT.Username = UT.Name
GROUP BY UT.Id, UT.Name
Desired Output:
Id | Name | LEARNING ACTIVITY | Chat | USER READ | USER DELETE|
------------------------------------------------------------------------
1 | A B | 2 | 0 | 0 | 0 |
2 | A C | 0 | 1 | 0 | 0 |
3 | A C A C | 0 | 1 | 1 | 0 |
4 | A C C | 2 | 0 | 2 | 0 |
5 | A C B | 0 | 2 | 1 | 1 |
6 | A C R | 1 | 0 | 0 | 0 |
7 | A C D | 3 | 0 | 0 | 0 |
8 | A C E | 1 | 1 | 0 | 0 |
9 | A C F | 1 | 0 | 3 | 1 |
How can I aggregate two tables which are not related and group by with Id and Name?
Join and Aggregate Activity with Users
Learning Activity is the sum of (Videos, Text, Page and Image) as ActivityType
Chat is all the rows having the Chat as ActivityType
Join and Aggregate LogTable with Users

You should aggregate before joining, this avoids getting a many-to-many-join which results in overcounting:
SELECT UT.Id,UT.Name,
coalesce([LEARNING ACTIVITY],0),
coalesce([Chat],0),
coalesce([USER READ],0),
coalesce([USER DELETE],0)
FROM UserTable UT
LEFT JOIN
(
select UserId,
SUM(CASE
WHEN ActivityType = 'Videos' THEN 1
WHEN ActivityType = 'Text' THEN 1
WHEN ActivityType = 'Page' THEN 1
WHEN ActivityType = 'Image' THEN 1
ELSE 0
END) AS [LEARNING ACTIVITY],
SUM(CASE WHEN ActivityType = 'Chat' THEN 1 ELSE 0 END) AS [Chat]
from Activity
group by UserId
) AC
ON UT.Id = AC.UserId
LEFT JOIN
(
select Username,
SUM(CASE
WHEN Event = 'Read' THEN 1
ELSE 0 END
) AS [USER READ],
SUM(CASE
WHEN Event = 'Delete' THEN 1
ELSE 0 END
) AS [USER DELETE]
from LogTable
group by UserName
) LT
ON LT.Username = UT.Name
See fiddle
Simplifying the CASEs (COALESCE takes care of NULLs):
SELECT UT.Id,UT.Name,
coalesce([LEARNING ACTIVITY],0),
coalesce([Chat],0),
coalesce([USER READ],0),
coalesce([USER DELETE],0)
FROM UserTable UT
LEFT JOIN
(
select UserId,
SUM(CASE WHEN ActivityType IN ('Videos','Text','Page','Image') THEN 1 END) AS [LEARNING ACTIVITY],
SUM(CASE WHEN ActivityType = 'Chat' THEN 1 END) AS [Chat]
from Activity
group by UserId
) AC
ON UT.Id = AC.UserId
LEFT JOIN
(
select Username,
SUM(CASE WHEN Event = 'Read' THEN 1 END) AS [USER READ],
SUM(CASE WHEN Event = 'Delete' THEN 1 END) AS [USER DELETE]
from LogTable
group by UserName
) LT
ON LT.Username = UT.Name

I recommend you make use of windowing functions and outer apply to do the aggregations you need. Here is the query I came up with that matches your expected result:
https://learn.microsoft.com/en-us/sql/t-sql/queries/select-over-clause-transact-sql?view=sql-server-ver15
https://learn.microsoft.com/en-us/u-sql/statements-and-expressions/select/from/select-selecting-from-cross-apply-and-outer-apply
select distinct
UserId,
UT.Name,
sum(case when A.ActivityType in ('Videos','Text','Page','Image') then 1 else 0 end) over (partition by UserId) [Learning Activity],
sum(case when A.ActivityType = 'Chat' then 1 else 0 end) over (partition by UserId) Chat,
coalesce(LT2.[User Read],0) as [User Read],
coalesce(LT2.[User Delete],0) as [User Delete]
from Activity A
join UserTable UT
on A.UserId = UT.Id
outer apply
(
select distinct
sum(case when LT.[Event] = 'Read' then 1 else 0 end) over (partition by UserId) [User Read],
sum(case when LT.[Event] = 'Delete' then 1 else 0 end) over (partition by UserId) [User Delete]
from LogTable LT
where LT.Username = UT.Name
) LT2

Related

Remove duplicate value in different categories in same table SQL but keep the first category value

Let's say I have a table with id and category like the table below
D_id | D_category
-----------------
1 | A
2 | A
3 | A
1 | B
2 | B
4 | B
5 | B
1 | C
2 | C
4 | C
5 | C
6 | C
Hence the rules are like this
values in category A should not be appear in category B and category C
values in category B should not be appear in category C
The end result should be like this
D_id | D_category
-----------------
1 | A
2 | A
3 | A
4 | B
5 | B
6 | C
I will provide a solution that works but its not an ideal solution can anyone help me to provide a better solution in case there are more categories meaning that if there are more category then it should follow the rules the values in previous categories should not appear in any other categories
DECLARE #A TABLE(
D_id INT NOT NULL,
D_category VARCHAR(MAX));
INSERT INTO #A(D_id,D_category)
VALUES (1, 'A'),
(2, 'A'),
(3, 'A'),
(1, 'B'),
(2, 'B'),
(4, 'B'),
(5, 'B'),
(1, 'C'),
(2, 'C'),
(4, 'C'),
(5, 'C'),
(6, 'C')
DELETE t
FROM #A t
WHERE t.D_category = 'B' AND EXISTS (SELECT 1 FROM #A t2 WHERE t2.D_category = 'A' and t.D_id = t2.D_id)
DELETE t
FROM #A t
WHERE t.D_category = 'C' AND EXISTS (SELECT 1 FROM #A t2 WHERE t2.D_category = 'B' and t.D_id = t2.D_id)
DELETE t
FROM #A t
WHERE t.D_category = 'C' AND EXISTS (SELECT 1 FROM #A t2 WHERE t2.D_category = 'A' and t.D_id = t2.D_id)
select * from #A
Just check that the specified record doesn't exist earlier in the sequence.
select *
from #A A1
where not exists (
select 1
from #A A2
where A2.D_id = A1.D_id
and A2.D_category < A1.D_category
)
or just make use of row_number()
select *
from
(
select *, r = row_number() over (partition by D_id order by D_category)
from #A
) a
where a.r = 1
Delete using the join syntax:
delete a
from my_table a
join my_table b on a.D_id = b.D_id
and a.D_category > b.D_category
See live demo.

SQL questions about how calculation after joining tables and dynamic types

See and test tables and my queries here: http://sqlfiddle.com/#!17/e5a87/3
I'm pretty new to SQL and I got this 3 tables:
tb1 is the data in store level
create table tb1 (id varchar(1), store_id varchar(3), sold_count int);
insert into tb1 values
('1', 's1', 40),
('2', 's2', 20),
('3', 's2', 30);
tb2 is the data about each product
create table tb2 (id varchar(1), product_id varchar(3), shelf_id varchar(4), error_type varchar(24));
insert into tb2 values
('a', 'p1', 'row1', 'A'),
('b', 'p2', 'row2', 'A'),
('c', 'p3', 'row3', 'B'),
('d', 'p4', 'row4', 'C'),
('e', 'p5', 'row4', 'C');
tb3 is a table that links store and product, a store may have multiple products
create table tb3 (tb1_id varchar(1), tb2_id varchar(1));
insert into tb3 values
('1', 'a'),
('2', 'b'),
('2', 'c'),
('3', 'd'),
('3', 'e');
now I want to write a query and get a result like this:
store_id
total_sold
total_shelf
percentage
type_A
type_B
type_C
s1
40
1
0.025 (1/40)
1
0
0
s2
50 (20+30)
3
0.06 (3/50)
1
1
2
I wrote a query as follow:
SELECT
tb1.store_id,
SUM(tb1.sold_count) AS total_sold,
MAX(t.shelves_count) AS total_shelf_count,
(MAX(t.shelves_count) / SUM(tb1.sold_count)) AS percentage,
t.A,
t.B,
t.C
FROM tb1
JOIN (
SELECT
tb1.store_id AS store_id,
COUNT(DISTINCT tb2.shelf_id) AS shelves_count,
SUM(CASE WHEN tb2.error_type = 'A' THEN 1 ELSE 0 END) AS A,
SUM(CASE WHEN tb2.error_type = 'B' THEN 1 ELSE 0 END) AS B,
SUM(CASE WHEN tb2.error_type = 'C' THEN 1 ELSE 0 END) AS C
FROM tb1
JOIN tb3 ON tb3.tb1_id = tb1.id
JOIN tb2 ON tb3.tb2_id = tb2.id
GROUP BY store_id
) AS t ON tb1.store_id = t.store_id
GROUP BY tb1.store_id, t.A, t.B, t.C;
It did most things correct, but I got 2 questions:
(1) The percentage column seems not working, it constantly gives me 0, when it should has a number, how can I fix it?
(2) If I have a lot of error types (not just A, B, C), and I can't really list them all specifically. Is there a way that postgresql can automatically gather the distinct types and count, and still shows a final result like this?
Really need some help, thanks in advance!! Please see the sample table and my current query here: http://sqlfiddle.com/#!17/e5a87/3
You need to convert the type before you do division on values otherwise, PostgreSQL will use bigint to calculate when you do aggregate function.
Query 1:
SELECT
tb1.store_id,
SUM(tb1.sold_count) AS total_sold,
MAX(t.shelves_count) AS total_shelf_count,
(MAX(t.shelves_count)::decimal / SUM(tb1.sold_count)) AS percentage,
t.A,
t.B,
t.C
FROM tb1
JOIN (
SELECT
tb1.store_id AS store_id,
COUNT(DISTINCT tb2.shelf_id) AS shelves_count,
SUM(CASE WHEN tb2.error_type = 'A' THEN 1 ELSE 0 END) AS A,
SUM(CASE WHEN tb2.error_type = 'B' THEN 1 ELSE 0 END) AS B,
SUM(CASE WHEN tb2.error_type = 'C' THEN 1 ELSE 0 END) AS C
FROM tb1
JOIN tb3 ON tb3.tb1_id = tb1.id
JOIN tb2 ON tb3.tb2_id = tb2.id
GROUP BY store_id
) AS t ON tb1.store_id = t.store_id
GROUP BY tb1.store_id, t.A, t.B, t.C
Results:
| store_id | total_sold | total_shelf_count | percentage | a | b | c |
|----------|------------|-------------------|------------|---|---|---|
| s2 | 50 | 3 | 0.06 | 1 | 1 | 2 |
| s1 | 40 | 1 | 0.025 | 1 | 0 | 0 |
you can try to use pg_typeof to see the type of values.
Query 1:
SELECT
pg_typeof(SUM(tb1.sold_count)) AS total_sold,
pg_typeof(MAX(t.shelves_count)) AS total_shelf_count,
pg_typeof(MAX(t.shelves_count)::decimal / SUM(tb1.sold_count)) AS total_shelf_count
FROM tb1
JOIN (
SELECT
tb1.store_id AS store_id,
COUNT(DISTINCT tb2.shelf_id) AS shelves_count,
SUM(CASE WHEN tb2.error_type = 'A' THEN 1 ELSE 0 END) AS A,
SUM(CASE WHEN tb2.error_type = 'B' THEN 1 ELSE 0 END) AS B,
SUM(CASE WHEN tb2.error_type = 'C' THEN 1 ELSE 0 END) AS C
FROM tb1
JOIN tb3 ON tb3.tb1_id = tb1.id
JOIN tb2 ON tb3.tb2_id = tb2.id
GROUP BY store_id
) AS t ON tb1.store_id = t.store_id
[Results]:
| total_sold | total_shelf_count | total_shelf_count |
|------------|-------------------|-------------------|
| bigint | bigint | numeric |

Conditionally select value from one of two tables

I have a questionnaire application, where a user will submit answers. Some of the questions are text based and some have fixed options.
The values are saved to the tAnswers table as either the entered text value, or the Id of the selected option. There is a QuestionTypeId column which defines if the answer is a reference to tOptions.Id.
I want to select the answers, returning the entered value or the value related to the selected Id.
For example;
SET NOCOUNT ON
DECLARE #tSubmissions TABLE (Id INT)
DECLARE #tSubmissionQuestions TABLE (SubmissionId INT, QuestionId INT)
DECLARE #tQuestions TABLE (Id INT, QuestionText NVARCHAR(MAX), ColName NVARCHAR(MAX), QuestionTypeId INT)
DECLARE #tOptions TABLE (Id INT, OptionValue NVARCHAR(MAX), OptionGroupId INT)
DECLARE #tAnswers TABLE (Id INT IDENTITY(1,1), SubmissionId INT, QuestionId INT, AnswerValue NVARCHAR(MAX))
INSERT INTO #tQuestions VALUES (1, 'What is your name?', 'Name', 1)
INSERT INTO #tQuestions VALUES (2, 'What is your age?', 'Age', 1)
INSERT INTO #tQuestions VALUES (3, 'What is your gender?', 'Gender', 2)
INSERT INTO #tQuestions VALUES (4, 'What is your favourite colour?', 'Colour', 2)
-- Answers for question 3 - gender
INSERT INTO #tOptions VALUES (1, 'Male', 1)
INSERT INTO #tOptions VALUES (2, 'Female', 1)
-- answers for question 4 - colour
INSERT INTO #tOptions VALUES (3, 'Blue', 2)
INSERT INTO #tOptions VALUES (4, 'Green', 2)
INSERT INTO #tOptions VALUES (5, 'Red', 2)
INSERT INTO #tOptions VALUES (6, 'Yellow', 2)
INSERT INTO #tSubmissions VALUES (1)
INSERT INTO #tSubmissions VALUES (2)
INSERT INTO #tSubmissions VALUES (3)
INSERT INTO #tSubmissionQuestions VALUES (1, 1)
INSERT INTO #tSubmissionQuestions VALUES (1, 2)
INSERT INTO #tSubmissionQuestions VALUES (1, 3)
INSERT INTO #tSubmissionQuestions VALUES (1, 4)
INSERT INTO #tSubmissionQuestions VALUES (2, 1)
INSERT INTO #tSubmissionQuestions VALUES (2, 2)
INSERT INTO #tSubmissionQuestions VALUES (2, 3)
INSERT INTO #tSubmissionQuestions VALUES (2, 4)
INSERT INTO #tSubmissionQuestions VALUES (3, 1)
INSERT INTO #tSubmissionQuestions VALUES (3, 2)
INSERT INTO #tSubmissionQuestions VALUES (3, 3)
INSERT INTO #tSubmissionQuestions VALUES (3, 4)
-- form submissions
INSERT INTO #tAnswers (SubmissionId, QuestionId, AnswerValue) VALUES (1, 1, 'Tony Stark')
INSERT INTO #tAnswers (SubmissionId, QuestionId, AnswerValue) VALUES (1, 2, '39')
INSERT INTO #tAnswers (SubmissionId, QuestionId, AnswerValue) VALUES (1, 3, '1') -- reference to #tOptions
INSERT INTO #tAnswers (SubmissionId, QuestionId, AnswerValue) VALUES (1, 4, '5') -- reference to #tOptions
INSERT INTO #tAnswers (SubmissionId, QuestionId, AnswerValue) VALUES (2, 1, 'Pepper Potts')
INSERT INTO #tAnswers (SubmissionId, QuestionId, AnswerValue) VALUES (2, 2, '38')
INSERT INTO #tAnswers (SubmissionId, QuestionId, AnswerValue) VALUES (2, 3, '2') -- reference to #tOptions
INSERT INTO #tAnswers (SubmissionId, QuestionId, AnswerValue) VALUES (2, 4, '6') -- reference to #tOptions
INSERT INTO #tAnswers (SubmissionId, QuestionId, AnswerValue) VALUES (3, 1, 'James Rhodes')
INSERT INTO #tAnswers (SubmissionId, QuestionId, AnswerValue) VALUES (3, 2, '41') -- has choosen to not answer question 3
INSERT INTO #tAnswers (SubmissionId, QuestionId, AnswerValue) VALUES (3, 4, '3') -- reference to #tOptions
SELECT
s.Id as SubmissionId, q.Id as QuestionId, a.AnswerValue
FROM
#tSubmissions s
INNER JOIN #tSubmissionQuestions sq
ON sq.SubmissionId = s.Id
INNER JOIN #tQuestions q
ON q.Id = sq.QuestionId
LEFT JOIN #tAnswers a
ON a.QuestionId = q.Id
AND a.SubmissionId = s.Id
DBFiddle
This returns;
SubmissionId | QuestionId | AnswerValue
=============|============|===============
1 | 1 | Tony Stark
1 | 2 | 39
1 | 3 | 1 <-- this is the Id of the selected option
1 | 4 | 5 <-- this is the Id of the selected option
2 | 1 | Pepper Potts
2 | 2 | 38
2 | 3 | 2 <-- this is the Id of the selected option
2 | 4 | 6 <-- this is the Id of the selected option
3 | 1 | James Rhodes
3 | 2 | 41
3 | 3 | NULL <-- the option was not selected
3 | 4 | 3 <-- this is the Id of the option
Instead I would like;
SubmissionId | QuestionId | AnswerValue
=============|============|===============
1 | 1 | Tony Stark
1 | 2 | 39
1 | 3 | Male <-- this is the value of the selected option
1 | 4 | Red <-- this is the value of the selected option
2 | 1 | Pepper Potts
2 | 2 | 38
2 | 3 | Female <-- this is the value of the selected option
2 | 4 | Yellow <-- this is the value of the selected option
3 | 1 | James Rhodes
3 | 2 | 41
3 | 3 | NULL <-- the option was not selected
3 | 4 | Blue <-- this is the value of the selected option
How do I conditionally pull values from the tOptions table?
I guess this is what you're looking for:
Another LEFT JOIN on tOptions to select the values, in case of QuestionTypeId = 2
I just added the ISNUMERIC to avoid conversion errors.
SELECT
s.Id as SubmissionId,
q.Id as QuestionId,
COALESCE(t.OptionValue,a.AnswerValue) AS AnswerValue
FROM
#tSubmissions s
INNER JOIN #tSubmissionQuestions sq
ON sq.SubmissionId = s.Id
INNER JOIN #tQuestions q
ON q.Id = sq.QuestionId
LEFT JOIN #tAnswers a
ON a.QuestionId = q.Id
AND a.SubmissionId = s.Id
LEFT JOIN #tOptions t
ON q.QuestionTypeId = 2
AND ISNUMERIC(a.AnswerValue) = 1
AND a.AnswerValue = t.Id
I would make two columns in the Answers table. One that you have AnswerValue NVARCHAR(MAX) NULL and another one AnswerOptionID int NULL. It would make joining way more efficient and it would eliminate problems when engine tries to convert text "Tony Stark" into integer.
But, given the schema as is, here is one variant.
I added LEFT JOIN to the #tOptions table. Note, that I'm converting integer IDs into text, not other way around.
SELECT
s.Id as SubmissionId, q.Id as QuestionId
-- , a.AnswerValue, Options.OptionValue
,CASE WHEN q.QuestionTypeId = 2
THEN Options.OptionValue
ELSE a.AnswerValue
END AS AnswerText
FROM
#tSubmissions s
INNER JOIN #tSubmissionQuestions sq ON sq.SubmissionId = s.Id
INNER JOIN #tQuestions q ON q.Id = sq.QuestionId
LEFT JOIN #tAnswers a
ON a.QuestionId = q.Id
AND a.SubmissionId = s.Id
LEFT JOIN #tOptions AS Options
ON q.QuestionTypeId = 2
AND a.AnswerValue = CAST(Options.Id AS NVARCHAR(MAX))
;
Please try this.
SELECT
s.Id as SubmissionId, q.Id as QuestionId,
CASE WHEN q.QuestionTypeId = 1 THEN
a.AnswerValue
ELSE
ISNULL((SELECT CONVERT(VARCHAR(100),OptionValue) FROM #tOptions o WHERE o.Id = a.AnswerValue),a.AnswerValue)
END AS AnswerValue
FROM
#tSubmissions s
INNER JOIN #tSubmissionQuestions sq
ON sq.SubmissionId = s.Id
INNER JOIN #tQuestions q
ON q.Id = sq.QuestionId
LEFT JOIN #tAnswers a
ON a.QuestionId = q.Id
AND a.SubmissionId = s.Id
ORDER BY s.Id ASC

SQL query to reconstruct inherited EAV model

I have 5 tables in my database representing an inherited EAV model:
CREATE TABLE AttributeNames
("ID" int, "Name" varchar(8))
;
INSERT INTO AttributeNames
("ID", "Name")
VALUES
(1, 'Color'),
(2, 'FuelType'),
(3, 'Doors'),
(4, 'Price')
;
CREATE TABLE MasterCars
("ID" int, "Name" varchar(10))
;
INSERT INTO MasterCars
("ID", "Name")
VALUES
(5, 'BMW'),
(6, 'Audi'),
(7, 'Ford')
;
CREATE TABLE MasterCarAttributes
("ID" int, "AttributeNameId" int, "Value" varchar(10), "MasterCarId" int)
;
INSERT INTO MasterCarAttributes
("ID", "AttributeNameId", "Value", "MasterCarId")
VALUES
(100, 1, 'Red', 5),
(101, 2, 'Gas', 5),
(102, 3, '4', 5),
(102, 4, '$100K', 5),
(103, 1, 'Blue', 6),
(104, 2, 'Diesel', 6),
(105, 3, '3', 6),
(106, 4, '$80k', 6),
(107, 1, 'Green', 7),
(108, 2, 'Diesel', 7),
(109, 3, '5', 7),
(110, 4, '$60k', 7)
;
CREATE TABLE LocalCars
("ID" int, "MasterCarId" int)
;
INSERT INTO LocalCars
("ID", "MasterCarId")
VALUES
(8, '5'),
(9, '6'),
(10, NULL)
;
CREATE TABLE LocalCarAttributes
("ID" int, "AttributeNameId" int, "Value" varchar(6), "LocalCarId" int)
;
INSERT INTO LocalCarAttributes
("ID", "AttributeNameId", "Value", "LocalCarId")
VALUES
(43, 1, 'Yellow', 8),
(44, 3, '6', 9),
(45, 1, 'Red', 10),
(46, 2, 'Gas', 10),
(47, 3, '2', 10),
(48, 4, '$60k', 10)
;
I can retrieve all of master car attributes as follows:
SELECT MC.ID, MCA.AttributeNameId, MCA.Value
FROM MasterCars MC
left join MasterCarAttributes MCA on MC.ID = MCA.MasterCarId
order by MC.ID;
Likewise, I can retrieve all of the local car attributes as follows:
SELECT LC.ID, LCA.AttributeNameId, LCA.Value
FROM LocalCars LC
left join LocalCarAttributes LCA on LC.ID = LCA.LocalCarId
order by LC.ID;
If LocalCars.MasterCarId is not NULL, then that local car can inherit the attributes of that master car. A local car attribute with the same AttributeNameId overrides any master attribute with the same AttributeNameId.
So given the data above, I have 3 local cars each with 4 attributes (color, fuelType, doors, price). Inherited attribute values in bold:
Local Car Id = 1 (Yellow, Gas, 4, $100K)
Local Car Id = 2 (Blue, Diesel, 6, $80k)
Local Car Id = 3 (Red, Gas, 2, $60k)
I'm trying to find the necessary joins required to join the two queries above together to give a complete set of local cars attributes, some inherited:
LocalCarId AttributeNameId Value
------------------------------------------
1 1 Yellow
1 2 Gas
1 3 4
1 4 $100K
2 1 Blue
2 2 Diesel
2 3 6
2 4 $80K
3 1 Red
3 2 Gas
3 3 2
3 4 $60K
or possibly even:
LocalCarId AttributeNameId LocalValue MasterValue
-------------------------------------------------------------
1 1 Yellow Red
1 2 NULL Gas
1 3 NULL 4
1 4 NULL $100K
2 1 NULL Blue
2 2 NULL Diesel
2 3 6 3
2 4 NULL $80K
3 1 Red NULL
3 2 Gas NULL
3 3 2 NULL
3 4 $60K NULL
The problem can be solved by performing a union on all of your local car attributes and master car attributes. Each record is marked with an [IsMasterAttribute] flag. The next step is then use the ROW_NUMBER() window function to rank each of the duplicate attributes. The final step is to only select attributes which has a rank of 1.
;WITH CTE_CombinedAttributes
AS
(
SELECT 1 AS IsMasterAttribute
,LC.ID
,MC.ID AS MasterCarId
,MCA.AttributeNameId
,MCA.Value
FROM MasterCars MC
LEFT OUTER JOIN MasterCarAttributes MCA on MC.ID = MCA.MasterCarId
INNER JOIN LocalCars LC ON LC.MasterCarId = MC.ID
UNION ALL
SELECT 0 AS IsMasterAttribute
,LC.ID
,LC.MasterCarId
,LCA.AttributeNameId
,LCA.Value
FROM LocalCars LC
LEFT OUTER JOIN LocalCarAttributes LCA on LC.ID = LCA.LocalCarId
)
,
CTE_RankedAttributes
AS
(
SELECT [IsMasterAttribute]
,[ID]
,[AttributeNameId]
,[Value]
,ROW_NUMBER() OVER (PARTITION BY [ID], [AttributeNameId] ORDER BY [IsMasterAttribute]) AS [AttributeRank]
FROM CTE_CombinedAttributes
)
SELECT [IsMasterAttribute]
,[ID]
,[AttributeNameId]
,[Value]
FROM CTE_RankedAttributes
WHERE [AttributeRank] = 1
ORDER BY [ID]
The second output is also possible by performing a simple pivot on the final result:
;WITH CTE_CombinedAttributes
AS
(
SELECT 1 AS IsMasterAttribute
,LC.ID
,MC.ID AS MasterCarId
,MCA.AttributeNameId
,MCA.Value
FROM MasterCars MC
LEFT OUTER JOIN MasterCarAttributes MCA on MC.ID = MCA.MasterCarId
INNER JOIN LocalCars LC ON LC.MasterCarId = MC.ID
UNION ALL
SELECT 0 AS IsMasterAttribute
,LC.ID
,LC.MasterCarId
,LCA.AttributeNameId
,LCA.Value
FROM LocalCars LC
LEFT OUTER JOIN LocalCarAttributes LCA on LC.ID = LCA.LocalCarId
)
,
CTE_RankedAttributes
AS
(
SELECT [IsMasterAttribute]
,[ID]
,[AttributeNameId]
,[Value]
,ROW_NUMBER() OVER (PARTITION BY [ID], [AttributeNameId] ORDER BY [IsMasterAttribute]) AS [AttributeRank]
FROM CTE_CombinedAttributes
)
SELECT [ID]
,[AttributeNameId]
,MAX(
CASE [IsMasterAttribute]
WHEN 0 THEN [Value]
END
) AS LocalValue
,MAX(
CASE [IsMasterAttribute]
WHEN 1 THEN [Value]
END
) AS MasterValue
FROM CTE_RankedAttributes
GROUP BY [ID], [AttributeNameId]
ORDER BY [ID]
SQL Fiddle Demo
SELECT LC."ID" as LocalCarID,
COALESCE(LCA."AttributeNameId", MCA."AttributeNameId") as "AttributeNameId",
COALESCE(LCA."Value", MCA."Value") as "Value"
FROM LocalCars LC
LEFT JOIN MasterCars MC
ON LC."MasterCarId" = MC."ID"
LEFT JOIN MasterCarAttributes MCA
ON MC."ID" = MCA."MasterCarId"
LEFT JOIN LocalCarAttributes LCA
ON ( MCA."AttributeNameId" = LCA."AttributeNameId"
OR MCA."AttributeNameId" IS NULL)
-- This is the important part
-- Try to join with a MasterAtribute otherwise use the Car Atribute.
AND LC."ID" = LCA."ID"
OUTPUT
| LocalCarID | AttributeNameId | Value |
|------------|-----------------|--------|
| 1 | 1 | Blue |
| 1 | 2 | Gas |
| 2 | 1 | Green |
| 2 | 2 | Diesel |

ADD Specific values in SQL Column determined by other Column

I have a Database that determines different values based on a label.
Where the label determines whether it's an exempted value or not.
For instance, 2 = non exempted and 3 = exempted. If I run a query my results look something like this
|Name |ExemptionStatus |Total Value|
|X |2 |100 |
|X |3 |200 |
My Query is
SELECT NAME, EXEMPTIONSTATUS
SUM(TOTAL_VALUE) AS 'TOTAL VALUE'
FROM ORDER_ACCOUNT JOIN ACCOUNT_INVOICE
WHERE ORDER_ACCOUNT.DATE BETWEEN 'M/D/YEAR' AND 'M/D/YEAR'
GROUP BY NAME, EXEMPTIONSTATUS
ORDER BY NAME ASC
How can I get my query to create a new column for the values, for example:
|Name |NON EXEMPT VALUE|EXEMPT VALUE|
|X |100 |200 |
I just don't know how how I would sort it whether it's in my Where clause or not.
Use a CASE statement within a SUM to only total NON EXEMPT, then EXEMPT, and select them as separate columns. Similar to the following (might need to add TOTAL_VALUE to the GROUP BY, or remove EXEMPTIONSTATUS)
SELECT
NAME
,SUM(CASE WHEN EXEMPTIONSTATUS = 2 THEN TOTAL_VALUE ELSE 0 END) AS 'NON EXEMPT VALUE'
,SUM(CASE WHEN EXEMPTIONSTATUS = 3 THEN TOTAL_VALUE ELSE 0 END) AS 'EXEMPT VALUE'
FROM ORDER_ACCOUNT JOIN ACCOUNT_INVOICE
WHERE ORDER_ACCOUNT.DATE BETWEEN 'M/D/YEAR' AND 'M/D/YEAR'
GROUP BY NAME, EXEMPTIONSTATUS
ORDER BY NAME ASC
EDIT: New code below adds new columns to your existing table. you will need to replace the #Test with your tables, but I believe this will get you what you're looking for.
SELECT
NAME,
EXEMPTIONSTATUS
,[TOTAL_VALUE]
,(SELECT SUM(CASE WHEN EXEMPTIONSTATUS = 2 THEN TOTAL_VALUE ELSE 0 END) FROM #Test t WHERE t.NAME = NAME) 'NON EXEMPT VALUE'
,(SELECT SUM(CASE WHEN EXEMPTIONSTATUS = 3 THEN TOTAL_VALUE ELSE 0 END) FROM #Test t WHERE t.NAME = NAME) 'EXEMPT VALUE'
FROM #Test
This gives me the following output
| NAME | EXEMPTIONSTATUS | TOTAL_VALUE | NON EXEMPT VALUE | EXEMPT VALUE |
| X | 2 | 100 | 100 | 200 |
| X | 3 | 200 | 100 | 200 |
Let's say your table structure is like this:
CREATE TABLE tab(ID int, Name nvarchar(20), ExemptionStatus int, TotalValue int);
INSERT INTO tab(ID, Name, ExemptionStatus, TotalValue) values (1, 'X', 2, 100);
INSERT INTO tab(ID, Name, ExemptionStatus, TotalValue) values (2, 'X', 3, 200);
So your data looks like this:
ID Name ExemptionStatus TotalValue
1 X 2 100
2 X 3 200
Then the query you'd use is:
SELECT NotExempted.Name,
NotExempted.NonExemptValue,
Exempted.ExemptValue
FROM (SELECT Name,
CASE
WHEN ExemptionStatus = 2 THEN TotalValue
END
AS 'NonExemptValue'
FROM #tab
) NotExempted
INNER JOIN (SELECT Name,
CASE
WHEN ExemptionStatus = 3 THEN TotalValue
END
AS 'ExemptValue'
FROM #tab
) Exempted ON NotExempted.Name = Exempted.Name
WHERE NotExempted.NonExemptValue IS NOT NULL
AND Exempted.ExemptValue IS NOT NULL
GROUP BY NotExempted.Name,
NotExempted.NonExemptValue,
Exempted.ExemptValue
You result will look like this :
Name NonExemptValue ExemptValue
X 100 200
You can see this here -> http://sqlfiddle.com/#!9/8902d3/2
Now, let's say you have data like this :
CREATE TABLE #tab(ID int, Name nvarchar(20), ExemptionStatus int, TotalValue int)
INSERT INTO #tab(ID, Name, ExemptionStatus, TotalValue) values (1, 'X', 2, 100)
INSERT INTO #tab(ID, Name, ExemptionStatus, TotalValue) values (2, 'X', 3, 200)
INSERT INTO #tab(ID, Name, ExemptionStatus, TotalValue) values (3, 'X', 2, 1000)
INSERT INTO #tab(ID, Name, ExemptionStatus, TotalValue) values (4, 'X', 3, 2000)
INSERT INTO #tab(ID, Name, ExemptionStatus, TotalValue) values (5, 'X', 2, 1045)
INSERT INTO #tab(ID, Name, ExemptionStatus, TotalValue) values (6, 'X', 3, 2045)
INSERT INTO #tab(ID, Name, ExemptionStatus, TotalValue) values (7, 'X', 2, 1034)
INSERT INTO #tab(ID, Name, ExemptionStatus, TotalValue) values (8, 'X', 3, 2023)
INSERT INTO #tab(ID, Name, ExemptionStatus, TotalValue) values (9, 'X', 2, 1023)
INSERT INTO #tab(ID, Name, ExemptionStatus, TotalValue) values (10, 'X', 3, 2076)
which looks like this:
ID Name ExemptionStatus TotalValue
1 X 2 100
2 X 3 200
3 X 2 1000
4 X 3 2000
5 X 2 1045
6 X 3 2045
7 X 2 1034
8 X 3 2023
9 X 2 1023
10 X 3 2076
If you need to sum the total value up, then you can use the following query (which is a slight modification of the query above):
SELECT NotExempted.Name,
NotExempted.NonExemptValue,
Exempted.ExemptValue
FROM (SELECT Name,
CASE
WHEN ExemptionStatus = 2 THEN (SELECT SUM(TotalValue) FROM #tab WHERE ExemptionStatus = 2)
END
AS 'NonExemptValue'
FROM #tab
) NotExempted
INNER JOIN (SELECT Name,
CASE
WHEN ExemptionStatus = 3 THEN (SELECT SUM(TotalValue) FROM #tab WHERE ExemptionStatus = 3)
END
AS 'ExemptValue'
FROM #tab
) Exempted ON NotExempted.Name = Exempted.Name
WHERE NotExempted.NonExemptValue IS NOT NULL
AND Exempted.ExemptValue IS NOT NULL
GROUP BY NotExempted.Name,
NotExempted.NonExemptValue,
Exempted.ExemptValue
Your result will look like this :
Name NonExemptValue ExemptValue
X 4202 8344
You can see this here -> http://sqlfiddle.com/#!9/02c76/3
Hope this helps!!!