QUERY:
drop table #foot
create table #foot (
id int primary key not null,
name varchar(50) not null
)
go
drop table #note
create table #note (
id int primary key not null,
note varchar(MAX) not null,
foot_id int not null references #foot(id)
)
go
insert into #foot values
(1, 'Joe'), (2, 'Mike'), (3, 'Rob')
go
insert into #note (id, note, foot_id) values (1, 'Joe note 1', 1)
go
insert into #note (id, note, foot_id) values(2, 'Joe note 2', 1)
go
insert into #note (id, note, foot_id) values(3, 'Mike note 1', 2)
go
select F.name, N.note, N.id
from #foot F left outer join #note N on N.foot_id=F.id
RESULT:
QUESTION:
How can I create a view/query resulting in one row for each master record (#foot) along with fields from the most recently inserted detail (#note), if any?
GOAL:
(NOTE: the way I would tell which one is most recent is the id which would be higher for newer records)
select t.name, t.note, t.id
from (select F.name, N.note, N.id,
ROW_NUMBER() over(partition by F.id order by N.id desc) as RowNum
from #foot F
left outer join #note N
on N.foot_id=F.id) t
where t.RowNum = 1
Assuming the ID created in the #note table is always incremental (imposed by using IDENTITY or by controlling the inserts to always increment the by by max value) you can use the following query (which uses rank function):
WITH Dat AS
(
SELECT f.name,
n.note,
n.id,
RANK() OVER(PARTITION BY n.foot_id ORDER BY n.id DESC) rn
FROM #foot f LEFT OUTER JOIN #note n
ON n.foot_id = f.id
)
SELECT *
FROM Dat
WHERE rn = 1
Related
I am trying to figure out how to select all records that are associated with all categories on a list.
For instance take this DB setup:
create table blog (
id integer PRIMARY KEY,
url varchar(100)
);
create table blog_category (
id integer PRIMARY KEY,
name varchar(50)
);
create table blog_to_blog_category (
blog_id integer,
blog_category_id integer
);
insert into blog values
(1, 'google.com'),
(2, 'pets.com'),
(3, 'petsearch.com');
insert into blog_category values
(1, 'search'),
(2, 'pets'),
(3, 'misc');
insert into blog_to_blog_category values
(1,1),
(2,2),
(3,1),
(3,2),
(3,3);
I can query on the main table like this:
select b.*, string_agg(bc.name, ', ') from blog b
join blog_to_blog_category btbc on b.id = btbc.blog_id
join blog_category bc on btbc.blog_category_id = bc.id
where b.url like '%.com'
group by b.id
But lets say I want to only return blogs that have BOTH category 1 & 2 connected with them how do I do that?
This would return just the petsearch.com domain as it is only record to have both of those categories.
Here you go:
Added a check to count the blog_category id (HAVING Clause) and if it is 2 then it should be either 1 or 2 (IN Clause),
select b.*, string_agg(bc.name, ', ') from blog b
join blog_to_blog_category btbc on b.id = btbc.blog_id
join blog_category bc on btbc.blog_category_id = bc.id
where b.url like '%.com' and bc.id in (1,2)
group by b.id
having count(distinct bc.id ) =2
here is one way:
select * from blog where id in (
select blog_id
from blog_to_blog_category bbc
where blog_category_id in (1, 2)
group by blog_id
having count(distinct blog_category_id) = 2
)
I have two tables with a foreign key relationship on an ID. I'll refer to them as master and secondary to make things easier and also not worry about the FK for now. Here is cut down, easy to reproduce example using table variables to represent the problem:
DECLARE #Master TABLE (
[MasterID] Uniqueidentifier NOT NULL
,[Description] NVARCHAR(50)
)
DECLARE #Secondary TABLE (
[SecondaryID] Uniqueidentifier NOT NULL
,[MasterID] Uniqueidentifier NOT NULL
,[OtherInfo] NVARCHAR(50)
)
INSERT INTO #Master ([MasterID], [Description])
VALUES ('0C1F1A0C-1DB5-4FA2-BC70-26AA9B10D5C3', 'Test')
,('2696ECD2-FFDB-4E26-83D0-F146ED419C9C', 'Test 2')
,('F21568F0-59C5-4950-B936-AA73DA6009B5', 'Test 3')
INSERT INTO #Secondary (SecondaryID, MasterID, Otherinfo)
VALUES ('514673A6-8B5C-429B-905F-15BD8B55CB5D','0C1F1A0C-1DB5-4FA2-BC70-26AA9B10D5C3','Other info')
SELECT [MasterID], [Description], NULL AS [OtherInfo] FROM #Master
UNION
SELECT S.[MasterID], M.[Description], [OtherInfo] FROM #Secondary S
JOIN #Master M ON M.MasterID = S.MasterID
With the results.....
0C1F1A0C-1DB5-4FA2-BC70-26AA9B10D5C3 Test NULL
0C1F1A0C-1DB5-4FA2-BC70-26AA9B10D5C3 Test Other info
F21568F0-59C5-4950-B936-AA73DA6009B5 Test 3 NULL
2696ECD2-FFDB-4E26-83D0-F146ED419C9C Test 2 NULL
.... I would like to only return records from #Secondary if there is a duplicate MasterID, so this is my expected output:
0C1F1A0C-1DB5-4FA2-BC70-26AA9B10D5C3 Test Other info
F21568F0-59C5-4950-B936-AA73DA6009B5 Test 3 NULL
2696ECD2-FFDB-4E26-83D0-F146ED419C9C Test 2 NULL
I tried inserting my union query into a temporary table, then using a CTE with the partition function. This kind of works but unfortunately returns the row from the #Master table rather than the #Secondary table (regardless of the order I select). See below.
DECLARE #Results TABLE (MasterID UNIQUEIDENTIFIER,[Description] NVARCHAR(50),OtherInfo NVARCHAR(50))
INSERT INTO #Results
SELECT [MasterID], [Description], NULL AS [OtherInfo] FROM #Master
UNION
SELECT S.[MasterID], M.[Description], [OtherInfo] FROM #Secondary S
JOIN #Master M ON M.MasterID = S.MasterID
;WITH CTE AS (
SELECT *, RN= ROW_NUMBER() OVER (PARTITION BY [MasterID] ORDER BY [Description] DESC) FROM #Results
)
SELECT * FROM CTE WHERE RN =1
Results:
0C1F1A0C-1DB5-4FA2-BC70-26AA9B10D5C3 Test NULL 1
F21568F0-59C5-4950-B936-AA73DA6009B5 Test 3 NULL 1
2696ECD2-FFDB-4E26-83D0-F146ED419C9C Test 2 NULL 1
Note that I am not just trying to select the rows which have a value for OtherInfo, this is just to help differentiate the two tables in the result set.
Just to reiterate, what I need to only return the rows present in #Secondary, when there is a duplicate MasterID. If #Secondary has a row for a particular MasterID, I don't need the row from #Master. I hope this makes sense.
What is the best way to do this? I am happy to redesign my database structure. I'm effectively trying to have a master list of items but sometimes take one of those and assign extra info to it + tie it to another ID. In this instance, that record replaces the master list.
You are way overcomplicating this. All you need is a left join.
SELECT M.[MasterID], M.[Description], S.[OtherInfo] FROM #Master M
LEFT JOIN #Secondary S ON M.MasterID = S.MasterID
Union seems to be the wrong approach... I would suggest a left join:
SELECT m.[MasterID], m.[Description], s.[OtherInfo]
FROM #Master m
LEFT JOIN #Secondary s ON s.MasterID = m.MasterID
Suppose I have the following table
DROP TABLE IF EXISTS #toy_example
CREATE TABLE #toy_example
(
Id int,
Pet varchar(10)
);
INSERT INTO #toy
VALUES (1, 'dog'),
(1, 'cat'),
(1, 'emu'),
(2, 'cat'),
(2, 'turtle'),
(2, 'lizard'),
(3, 'dog'),
(4, 'elephant'),
(5, 'cat'),
(5, 'emu')
and I want to fetch all Ids that have certain pets (for example either cat or emu, so Ids 1, 2 and 5).
DROP TABLE IF EXISTS #Pets
CREATE TABLE #Pets
(
Animal varchar(10)
);
INSERT INTO #Pets
VALUES ('cat'),
('emu')
SELECT Id
FROM #toy_example
GROUP BY Id
HAVING COUNT(
CASE
WHEN Pet IN (SELECT Animal FROM #Pets)
THEN 1
END
) > 0
The above gives me the error Cannot perform an aggregate function on an expression containing an aggregate or a subquery. I have two questions:
Why is this an error? If I instead hard code the subquery in the HAVING clause, i.e. WHEN Pet IN ('cat','emu') then this works. Is there a reason why SQL server (I've checked with SQL server 2017 and 2008) does not allow this?
What would be a nice way to do this? Note that the above is just a toy example. The real problem has many possible "Pets", which I do not want to hard code. It would be nice if the suggested method could check for multiple other similar conditions too in a single query.
If I followed you correctly, you can just join and aggregate:
select t.id, count(*) nb_of_matches
from #toy_example t
inner join #pets p on p.animal = t.pet
group by t.id
The inner join eliminates records from #toy_example that have no match in #pets. Then, we aggregate by id and count how many recors remain in each group.
If you want to retain records that have no match in #pets and display them with a count of 0, then you can left join instead:
select t.id, count(*) nb_of_records, count(p.animal) nb_of_matches
from #toy_example t
left join #pets p on p.animal = t.pet
group by t.id
How about this approach?
SELECT e.Id
FROM #toy_example e JOIN
#pets p
ON e.pet = p.animal
GROUP BY e.Id
HAVING COUNT(DISTINCT e.pet) = (SELECT COUNT(*) FROM #pets);
This question already has answers here:
Retrieving the last record in each group - MySQL
(33 answers)
Closed 6 years ago.
I have two tables, one "master" is a master list of names and the second "scenario" is a list of multiple scenarios for each name from the master list. I want my INNER JOIN query to fetch the master list of ID with the column status from "scenario" table but only the most recent status based on scenarioID. Here's the code that I've tried and tables with desired output
SELECT DISTINCT a.[user], a.ID, a.Name, b.status
from master a
INNER JOIN scenario b ON a.ID = b.ID
WHERE
b.scenarioID = (
SELECT max(scenarioID) FROM scenario c2 WHERE c2.ID=c.ID)
Master
ID user Name
425 John Skyline
426 John Violin
427 Joe Pura
Scenario
ID ScenarioID status
425 1 active
425 2 active
425 3 done
426 1 active
426 2 active
427 1 done
Desired output
ID user Name status
425 John Skyline done
426 John Violin active
427 Joe Pura done
You can do this with a CROSS APPLY looking up the most recent for each value:
Select M.ID, M.[User], M.Name, X.Status
From [Master] M
Cross Apply
(
Select Top 1 S.Status
From Scenario S
Where S.ID = M.ID
Order By S.ScenarioID Desc
) X
Another way you could do it is with a ROW_NUMBER() PARTITIONED on the ID and ORDERED by the ScenarioID DESC:
;With OrderedStatuses As
(
Select M.Id, M.[User], M.Name, S.Status,
Row_Number() Over (Partition By S.Id Order By S.ScenarioID Desc) RN
From [Master] M
Join Scenario S On S.Id = M.Id
)
Select Id, [User], Name, Status
From OrderedStatuses
Where RN = 1
Here's a slightly different formulation that uses a CTE, which I generally find easier to read than a subquery (though of course, your mileage may vary).
declare #Master table
(
ID bigint,
[user] varchar(16),
Name varchar(16)
);
declare #Scenario table
(
ID bigint,
ScenarioID bigint,
[status] varchar(16)
);
insert #Master values
(425, 'John', 'Skyline'),
(426, 'John', 'Violin'),
(427, 'Joe', 'Pura');
insert #Scenario values
(425, 1, 'active'),
(425, 2, 'active'),
(425, 3, 'done'),
(426, 1, 'active'),
(426, 2, 'active'),
(427, 1, 'done');
with ReversedScenarioCTE as
(
select
ID,
[status],
rowNumber = row_number() over (partition by ID order by ScenarioID desc)
from
#Scenario
)
select
M.ID,
M.[user],
M.Name,
S.[status]
from
#Master M
inner join ReversedScenarioCTE S on
M.ID = S.ID and
S.rowNumber = 1;
If you have SQL Server 2008 or later you can use the ROW_NUMBER() function to achieve what you want. It will avoid querying the same table twice or performing joins.
SELECT *
FROM (
SELECT a.[user]
,a.ID
,a.Name
,b.status
,ROW_NUMBER() OVER (PARTITION BY a.ID ORDER BY b.scenarioID DESC) AS VersionRank
from [master] a INNER JOIN scenario b ON a.ID = b.ID
) Result
WHERE Result.VersionRank = 1
For a class project, a few others and I have decided to make a (very ugly) limited clone of StackOverflow. For this purpose, we're working on one query:
Home Page: List all the questions, their scores (calculated from votes), and the user corresponding to their first revision, and the number of answers, sorted in date-descending order according to the last action on the question (where an action is an answer, an edit of an answer, or an edit of the question).
Now, we've gotten the entire thing figured out, except for how to represent tags on questions. We're currently using a M-N mapping of tags to questions like this:
CREATE TABLE QuestionRevisions (
id INT IDENTITY NOT NULL,
question INT NOT NULL,
postDate DATETIME NOT NULL,
contents NTEXT NOT NULL,
creatingUser INT NOT NULL,
title NVARCHAR(200) NOT NULL,
PRIMARY KEY (id),
CONSTRAINT questionrev_fk_users FOREIGN KEY (creatingUser) REFERENCES
Users (id) ON DELETE CASCADE,
CONSTRAINT questionref_fk_questions FOREIGN KEY (question) REFERENCES
Questions (id) ON DELETE CASCADE
);
CREATE TABLE Tags (
id INT IDENTITY NOT NULL,
name NVARCHAR(45) NOT NULL,
PRIMARY KEY (id)
);
CREATE TABLE QuestionTags (
tag INT NOT NULL,
question INT NOT NULL,
PRIMARY KEY (tag, question),
CONSTRAINT qtags_fk_tags FOREIGN KEY (tag) REFERENCES Tags(id) ON
DELETE CASCADE,
CONSTRAINT qtags_fk_q FOREIGN KEY (question) REFERENCES Questions(id) ON
DELETE CASCADE
);
Now, for this query, if we just join to QuestionTags, then we'll get the questions and titles over and over and over again. If we don't, then we have an N query scenario, which is just as bad. Ideally, we'd have something where the result row would be:
+-------------+------------------+
| Other Stuff | Tags |
+-------------+------------------+
| Blah Blah | TagA, TagB, TagC |
+-------------+------------------+
Basically -- for each row in the JOIN, do a string join on the resulting tags.
Is there a built in function or similar which can accomplish this in T-SQL?
Here's one possible solution using recursive CTE:
The methods used are explained here
TSQL to set up the test data (I'm using table variables):
DECLARE #QuestionRevisions TABLE (
id INT IDENTITY NOT NULL,
question INT NOT NULL,
postDate DATETIME NOT NULL,
contents NTEXT NOT NULL,
creatingUser INT NOT NULL,
title NVARCHAR(200) NOT NULL)
DECLARE #Tags TABLE (
id INT IDENTITY NOT NULL,
name NVARCHAR(45) NOT NULL
)
DECLARE #QuestionTags TABLE (
tag INT NOT NULL,
question INT NOT NULL
)
INSERT INTO #QuestionRevisions
(question,postDate,contents,creatingUser,title)
VALUES
(1,GETDATE(),'Contents 1',1,'TITLE 1')
INSERT INTO #QuestionRevisions
(question,postDate,contents,creatingUser,title)
VALUES
(2,GETDATE(),'Contents 2',2,'TITLE 2')
INSERT INTO #Tags (name) VALUES ('Tag 1')
INSERT INTO #Tags (name) VALUES ('Tag 2')
INSERT INTO #Tags (name) VALUES ('Tag 3')
INSERT INTO #Tags (name) VALUES ('Tag 4')
INSERT INTO #Tags (name) VALUES ('Tag 5')
INSERT INTO #Tags (name) VALUES ('Tag 6')
INSERT INTO #QuestionTags (tag,question) VALUES (1,1)
INSERT INTO #QuestionTags (tag,question) VALUES (3,1)
INSERT INTO #QuestionTags (tag,question) VALUES (5,1)
INSERT INTO #QuestionTags (tag,question) VALUES (4,2)
INSERT INTO #QuestionTags (tag,question) VALUES (2,2)
Here's the action part:
;WITH CTE ( id, taglist, tagid, [length] )
AS ( SELECT question, CAST( '' AS VARCHAR(8000) ), 0, 0
FROM #QuestionRevisions qr
GROUP BY question
UNION ALL
SELECT qr.id
, CAST(taglist + CASE WHEN [length] = 0 THEN '' ELSE ', ' END + t.name AS VARCHAR(8000) )
, t.id
, [length] + 1
FROM CTE c
INNER JOIN #QuestionRevisions qr ON c.id = qr.question
INNER JOIN #QuestionTags qt ON qr.question=qt.question
INNER JOIN #Tags t ON t.id=qt.tag
WHERE t.id > c.tagid )
SELECT id, taglist
FROM ( SELECT id, taglist, RANK() OVER ( PARTITION BY id ORDER BY length DESC )
FROM CTE ) D ( id, taglist, rank )
WHERE rank = 1;
This was the solution I ended up settling on. I checkmarked Mack's answer because it works with arbitrary numbers of tags, and because it matches what I asked for in my question. I ended up though going with this, however, simply because I understand what this is doing, while I have no idea how Mack's works :)
WITH tagScans (qRevId, tagName, tagRank)
AS (
SELECT DISTINCT
QuestionTags.question AS qRevId,
Tags.name AS tagName,
ROW_NUMBER() OVER (PARTITION BY QuestionTags.question ORDER BY Tags.name) AS tagRank
FROM QuestionTags
INNER JOIN Tags ON Tags.id = QuestionTags.tag
)
SELECT
Questions.id AS id,
Questions.currentScore AS currentScore,
answerCounts.number AS answerCount,
latestRevUser.id AS latestRevUserId,
latestRevUser.caseId AS lastRevUserCaseId,
latestRevUser.currentScore AS lastRevUserScore,
CreatingUsers.userId AS creationUserId,
CreatingUsers.caseId AS creationUserCaseId,
CreatingUsers.userScore AS creationUserScore,
t1.tagName AS tagOne,
t2.tagName AS tagTwo,
t3.tagName AS tagThree,
t4.tagName AS tagFour,
t5.tagName AS tagFive
FROM Questions
INNER JOIN QuestionRevisions ON QuestionRevisions.question = Questions.id
INNER JOIN
(
SELECT
Questions.id AS questionId,
MAX(QuestionRevisions.id) AS maxRevisionId
FROM Questions
INNER JOIN QuestionRevisions ON QuestionRevisions.question = Questions.id
GROUP BY Questions.id
) AS LatestQuestionRevisions ON QuestionRevisions.id = LatestQuestionRevisions.maxRevisionId
INNER JOIN Users AS latestRevUser ON latestRevUser.id = QuestionRevisions.creatingUser
INNER JOIN
(
SELECT
QuestionRevisions.question AS questionId,
Users.id AS userId,
Users.caseId AS caseId,
Users.currentScore AS userScore
FROM Users
INNER JOIN QuestionRevisions ON QuestionRevisions.creatingUser = Users.id
INNER JOIN
(
SELECT
MIN(QuestionRevisions.id) AS minQuestionRevisionId
FROM Questions
INNER JOIN QuestionRevisions ON QuestionRevisions.question = Questions.id
GROUP BY Questions.id
) AS QuestionGroups ON QuestionGroups.minQuestionRevisionId = QuestionRevisions.id
) AS CreatingUsers ON CreatingUsers.questionId = Questions.id
INNER JOIN
(
SELECT
COUNT(*) AS number,
Questions.id AS questionId
FROM Questions
INNER JOIN Answers ON Answers.question = Questions.id
GROUP BY Questions.id
) AS answerCounts ON answerCounts.questionId = Questions.id
LEFT JOIN tagScans AS t1 ON t1.qRevId = QuestionRevisions.id AND t1.tagRank = 1
LEFT JOIN tagScans AS t2 ON t2.qRevId = QuestionRevisions.id AND t2.tagRank = 2
LEFT JOIN tagScans AS t3 ON t3.qRevId = QuestionRevisions.id AND t3.tagRank = 3
LEFT JOIN tagScans AS t4 ON t4.qRevId = QuestionRevisions.id AND t4.tagRank = 4
LEFT JOIN tagScans AS t5 ON t5.qRevId = QuestionRevisions.id AND t5.tagRank = 5
ORDER BY QuestionRevisions.postDate DESC
This is a common question that comes up quite often phrased in a number of different ways (concatenate rows as string, merge rows as string, condense rows as string, combine rows as string, etc.). There are two generally accepted ways to handle combining an arbitrary number of rows into a single string in SQL Server.
The first, and usually the easiest, is to abuse XML Path combined with the STUFF function like so:
select rsQuestions.QuestionID,
stuff((select ', '+ rsTags.TagName
from #Tags rsTags
inner join #QuestionTags rsMap on rsMap.TagID = rsTags.TagID
where rsMap.QuestionID = rsQuestions.QuestionID
for xml path(''), type).value('.', 'nvarchar(max)'), 1, 1, '')
from #QuestionRevisions rsQuestions
Here is a working example (borrowing some slightly modified setup from Mack). For your purposes you could store the results of that query in a common table expression, or in a subquery (I'll leave that as an exercise).
The second method is to use a recursive common table expression. Here is an annotated example of how that would work:
--NumberedTags establishes a ranked list of tags for each question.
--The key here is using row_number() or rank() partitioned by the particular question
;with NumberedTags (QuestionID, TagString, TagNum) as
(
select QuestionID,
cast(TagName as nvarchar(max)) as TagString,
row_number() over (partition by QuestionID order by rsTags.TagID) as TagNum
from #QuestionTags rsMap
inner join #Tags rsTags on rsTags.TagID = rsMap.TagID
),
--TagsAsString is the recursive query
TagsAsString (QuestionID, TagString, TagNum) as
(
--The first query in the common table expression establishes the anchor for the
--recursive query, in this case selecting the first tag for each question
select QuestionID,
TagString,
TagNum
from NumberedTags
where TagNum = 1
union all
--The second query in the union performs the recursion by joining the
--anchor to the next tag, and so on...
select NumberedTags.QuestionID,
TagsAsString.TagString + ', ' + NumberedTags.TagString,
NumberedTags.TagNum
from NumberedTags
inner join TagsAsString on TagsAsString.QuestionID = NumberedTags.QuestionID
and NumberedTags.TagNum = TagsAsString.TagNum + 1
)
--The result of the recursive query is a list of tag strings building up to the final
--string, of which we only want the last, so here we select the longest one which
--gives us the final result
select QuestionID, max(TagString)
from TagsAsString
group by QuestionID
And here is a working version. Again, you could use the results in a common table expression or subquery to join against your other tables to get your ultimate result. Hopefully the annotations help you understand a little more how the recursive common table expression works (though the link in Macks answer also goes into some detail about the method).
There is, of course, another way to do it, which doesn't handle an arbitrary number of rows, which is to join against your table aliased multiple times, which is what you did in your answer.