SQL: How do I loop through the results of a SELECT statement? - sql

How do I loop through the results of a SELECT statement in SQL? My SELECT statement will return just 1 column but n results.
I have created a fictional scenario below complete with the Pseudo code of what I'm trying to do.
Scenario:
Students are registering for their classes. They submit a form with multiple course selections (ie. select 3 different courses at once). When they submit their registration I need to ensure there is still room left int the courses they have selected (note I will do a similar check before presenting them with course selection UI but I need to verify afterwards in case somebody else has gone in and swipped up the remaining spots).
Pseudo Code:
DECLARE #StudentId = 1
DECLARE #Capacity = 20
-- Classes will be the result of a Select statement which returns a list of ints
#Classes = SELECT classId FROM Student.CourseSelections
WHERE Student.CourseSelections = #StudentId
BEGIN TRANSACTION
DECLARE #ClassId int
foreach (#classId in #Classes)
{
SET #SeatsTaken = fnSeatsTaken #classId
if (#SeatsTaken > #Capacity)
{
ROLLBACK; -- I'll revert all their selections up to this point
RETURN -1;
}
else
{
-- set some flag so that this student is confirmed for the class
}
}
COMMIT
RETURN 0
My real problem is a similar "ticketing" problem. So if this approach seems very wrong please feel free to recommend something more practical.
EDIT:
Attempting to implement the solution below. At this point it doesn't work. Always returns "reserved".
DECLARE #Students TABLE
(
StudentId int
,StudentName nvarchar(max)
)
INSERT INTO #Students
(StudentId ,StudentName)
VALUES
(1, 'John Smith')
,(2, 'Jane Doe')
,(3, 'Jack Johnson')
,(4, 'Billy Preston')
-- Courses
DECLARE #Courses TABLE
(
CourseId int
,Capacity int
,CourseName nvarchar(max)
)
INSERT INTO #Courses
(CourseId, Capacity, CourseName)
VALUES
(1, 2, 'English Literature'),
(2, 10, 'Physical Education'),
(3, 2, 'Photography')
-- Linking Table
DECLARE #Courses_Students TABLE
(
Course_Student_Id int
,CourseId int
,StudentId int
)
INSERT INTO #Courses_Students
(Course_Student_Id, StudentId, CourseId)
VALUES
(1, 1, 1),
(2, 1, 3),
(3, 2, 1),
(4, 2, 2),
(5, 3, 2),
(6, 4, 1),
(7, 4, 2)
SELECT Students.StudentName, Courses.CourseName FROM #Students Students INNER JOIN
#Courses_Students Courses_Students ON Courses_Students.StudentId = Students.StudentId INNER JOIN
#Courses Courses ON Courses.CourseId = Courses_Students.CourseId
DECLARE #StudentId int = 4
-- Ideally the Capacity would be database driven
-- ie. come from the Courses.Capcity.
-- But I didn't want to complicate the HAVING statement since it doesn't seem to work already.
DECLARE #Capacity int = 1
IF EXISTS (Select *
FROM
#Courses Courses INNER JOIN
#Courses_Students Courses_Students ON Courses_Students.CourseId = Courses.CourseId
WHERE
Courses_Students.StudentId = #StudentId
GROUP BY
Courses.CourseId
HAVING
COUNT(*) > #Capacity)
BEGIN
SELECT 'full' as Status
END
ELSE BEGIN
SELECT 'reserved' as Status
END

No loop needed. You're looking at a standard aggregate with COUNT and GROUP.
Of course, some details are needed but the principle is this...
DECLARE #StudentId = 1
DECLARE #Capacity = 20
-- Classes will be the result of a Select statement which returns a list of ints
IF EXISTS (SELECT *
FROM
Student.CourseSelections CS
JOIN
---this is where you find out course allocations somehow
ClassTable C ON CS.classId = C.classId
WHERE
Student.CourseSelections = #StudentId
GROUP BY --change this, it depends on where you find out course allocations
ClassID
HAVING
COUNT(*) > #Capacity)
'no'
ELSE
'yes'
Edit:
I've changed the link table. Course_Student_ID is usually not needed in link tables.
The JOIN now
gets the courses for that student
then looks at all students on this course and compares to capacity
Cut down version of above:
...
-- Linking Table
DECLARE #Courses_Students TABLE (
,CourseId int
,StudentId int)
INSERT INTO #Courses_Students
(StudentId, CourseId)
VALUES (1, 1), (1, 3), (2, 1), (2, 2), (3, 2), (4, 1), (4, 2)
DECLARE #StudentId int = 4
--straight list
SELECT
C.CourseName, C.Capacity, COUNT(*)
FROM
#Courses_Students CSThis
JOIN
#Courses C ON CSThis.CourseId = C.CourseId
JOIN
#Courses_Students CSOthers ON CSOthers.CourseId = C.CourseId
WHERE
CSThis.StudentId = #StudentId
GROUP BY
C.CourseName, C.Capacity
--oversubscribed list
SELECT
C.CourseName, C.Capacity, COUNT(*)
FROM
#Courses_Students CSThis
JOIN
#Courses C ON CSThis.CourseId = C.CourseId
JOIN
#Courses_Students CSOthers ON CSOthers.CourseId = C.CourseId
WHERE
CSThis.StudentId = #StudentId
GROUP BY
C.CourseName, C.Capacity
HAVING
COUNT(*) > C.Capacity

Avoid looping through result sets in SQL as much as you can. If you really can't (if you really are a standard programmer but profession leads you into SQL) use cursors. They don't smell nice, but are unavoidable at times.

Another option would be to implement a CHECK Constraint on your table that contains the Course information. The check constraint could call your existing function to check that there are free seats.
Wrap all of your Inserts/Updates in to one transaction. If any of the Inserts/Updates fails then the entire transaction will be rolled back.

Related

How to select rows in a many-to-many relationship? (SQL)

I have a Students table and a Courses table.
They have a many to many relationship between them and the StudentCourses table is the intermediary.
Now, I have a list of Course ids and want to select the Students that follow all Courses in my list.
How??
--CREATE TYPE CourseListType AS TABLE
--(
-- CourseID INT
--)
DECLARE
#CourseList CourseListType
CREATE TABLE #Students
(
ID INT
,Name CHAR(10)
)
CREATE TABLE #Courses
(
ID INT
,Name CHAR(10)
)
CREATE TABLE #StudentCourses
(
StudentID INT
,CourseID INT
)
INSERT INTO #CourseList (CourseID)
VALUES
(1) --English
,(2) --Math
INSERT INTO #Students (ID, Name)
VALUES
(1, 'John')
,(2, 'Jane')
,(3, 'Donald')
INSERT INTO #Courses (ID, Name)
VALUES
(1, 'English')
,(2, 'Math')
,(3, 'Geography')
INSERT INTO #StudentCourses (StudentID, CourseID)
VALUES
(1, 1)
,(1, 2)
,(2, 1)
,(2, 2)
,(3, 1)
,(3, 3)
In this example, I only want the result to be John and Jane, because they both have the two courses in my CourseList.
I dont want Donald, because he only has one of them.
Have tried this JOIN, construction, but it does not eliminate students that only have some of my desired courses.
SELECT
*
FROM
#CourseList CRL
INNER JOIN #Courses CRS ON CRS.ID = CRL.CourseID
INNER JOIN #StudentCourses STC ON STC.CourseID = CRS.ID
INNER JOIN #Students STD ON STD.ID = STC.StudentID
If you want students with all your required courses, you can use aggregation and having:
SELECT sc.StudentId
FROM #StudentCourses sc JOIN
#CourseList cl
ON sc.CourseID = cl.id
GROUP BY sc.StudentId
HAVING COUNT(DISTINCT sc.CourseId) = (SELECT COUNT(*) FROM #DcourseList);
If you want additional information about students, you can join in the Students table (or use a IN or a similar construct).
Note that this only needs the StudentCourses table. It has the matching ids. There is no need to join in the reference tables.

SQL return only distinct IDs from LEFT JOIN

I've inherited some fun SQL and am trying to figure out how to how to eliminate rows with duplicate IDs. Our indexes are stored in a somewhat columnar format and then we pivot all the rows into one with the values as different columns.
The below sample returns three rows of unique data, but the IDs are duplicated. I need just two rows with unique IDs (and the other columns that go along with it). I know I'll be losing some data, but I just need one matching row per ID to the query (first, top, oldest, newest, whatever).
I've tried using DISTINCT, GROUP BY, and ROW_NUMBER, but I keep getting the syntax wrong, or using them in the wrong place.
I'm also open to rewriting the query completely in a way that is reusable as I currently have to generate this on the fly (cardtypes and cardindexes are user defined) and would love to be able to create a stored procedure. Thanks in advance!
declare #cardtypes table ([ID] int, [Name] nvarchar(50))
declare #cards table ([ID] int, [CardTypeID] int, [Name] nvarchar(50))
declare #cardindexes table ([ID] int, [CardID] int, [IndexType] int, [StringVal] nvarchar(255), [DateVal] datetime)
INSERT INTO #cardtypes VALUES (1, 'Funny Cards')
INSERT INTO #cardtypes VALUES (2, 'Sad Cards')
INSERT INTO #cards VALUES (1, 1, 'Bunnies')
INSERT INTO #cards VALUES (2, 1, 'Dogs')
INSERT INTO #cards VALUES (3, 1, 'Cat')
INSERT INTO #cards VALUES (4, 1, 'Cat2')
INSERT INTO #cardindexes VALUES (1, 1, 1, 'Bunnies', null)
INSERT INTO #cardindexes VALUES (2, 1, 1, 'playing', null)
INSERT INTO #cardindexes VALUES (3, 1, 2, null, '2014-09-21')
INSERT INTO #cardindexes VALUES (4, 2, 1, 'Dogs', null)
INSERT INTO #cardindexes VALUES (5, 2, 1, 'playing', null)
INSERT INTO #cardindexes VALUES (6, 2, 1, 'poker', null)
INSERT INTO #cardindexes VALUES (7, 2, 2, null, '2014-09-22')
SELECT TOP(100)
[ID] = c.[ID],
[Name] = c.[Name],
[Keyword] = [colKeyword].[StringVal],
[DateAdded] = [colDateAdded].[DateVal]
FROM #cards AS c
LEFT JOIN #cardindexes AS [colKeyword] ON [colKeyword].[CardID] = c.ID AND [colKeyword].[IndexType] = 1
LEFT JOIN #cardindexes AS [colDateAdded] ON [colDateAdded].[CardID] = c.ID AND [colDateAdded].[IndexType] = 2
WHERE [colKeyword].[StringVal] LIKE 'p%' AND c.[CardTypeID] = 1
ORDER BY [DateAdded]
Edit:
While both solutions are valid, I ended up using the MAX() solution from #popovitsj as it was easier to implement. The issue of data coming from multiple rows doesn't really factor in for me as all rows are essentially part of the same record. I will most likely use both solutions depending on my needs.
Here's my updated query (as it didn't quite match the answer):
SELECT TOP(100)
[ID] = c.[ID],
[Name] = MAX(c.[Name]),
[Keyword] = MAX([colKeyword].[StringVal]),
[DateAdded] = MAX([colDateAdded].[DateVal])
FROM #cards AS c
LEFT JOIN #cardindexes AS [colKeyword] ON [colKeyword].[CardID] = c.ID AND [colKeyword].[IndexType] = 1
LEFT JOIN #cardindexes AS [colDateAdded] ON [colDateAdded].[CardID] = c.ID AND [colDateAdded].[IndexType] = 2
WHERE [colKeyword].[StringVal] LIKE 'p%' AND c.[CardTypeID] = 1
GROUP BY c.ID
ORDER BY [DateAdded]
You could use MAX or MIN to 'decide' on what to display for the other columns in the rows that are duplicate.
SELECT ID, MAX(Name), MAX(Keyword), MAX(DateAdded)
(...)
GROUP BY ID;
using row number windowed function along with a CTE will do this pretty well. For example:
;With preResult AS (
SELECT TOP(100)
[ID] = c.[ID],
[Name] = c.[Name],
[Keyword] = [colKeyword].[StringVal],
[DateAdded] = [colDateAdded].[DateVal],
ROW_NUMBER()OVER(PARTITION BY c.ID ORDER BY [colDateAdded].[DateVal]) rn
FROM #cards AS c
LEFT JOIN #cardindexes AS [colKeyword] ON [colKeyword].[CardID] = c.ID AND [colKeyword].[IndexType] = 1
LEFT JOIN #cardindexes AS [colDateAdded] ON [colDateAdded].[CardID] = c.ID AND [colDateAdded].[IndexType] = 2
WHERE [colKeyword].[StringVal] LIKE 'p%' AND c.[CardTypeID] = 1
ORDER BY [DateAdded]
)
SELECT * from preResult WHERE rn = 1

EXISTS and NOT EXISTS in a correlated subquery

I've been trying to work out how to do a particular query for a day or so now and it has gotten to the point where I need some outside help. Hence my question.
Given the following data;
DECLARE #Data AS TABLE
(
OrgId INT,
ThingId INT
)
DECLARE #ReplacementData AS TABLE
(
OldThingId INT,
NewThingId INT
)
INSERT INTO #Data (OrgId, ThingId)
VALUES (1, 2), (1, 3), (1, 4),
(2, 1), (2, 4),
(3, 3), (3, 4)
INSERT INTO #ReplacementData (OldThingId, NewThingId)
VALUES (3, 4), (2, 5)
I want to find any organisation that has a "thing" that has been replaced as denoted in the #ReplacementData table variable. I'd want to see the org id, the thing it is that they have that has been replaced and the id of the thing that should replace it. So for example given the data above, I should see;
Org id, Thing Id, Replacement Thing Id org doesn't have but should have
1, 2, 5 -- As Org 1 has 2, but not 5
I've had many attempts at trying to get this working, and I just can't seem to get my head around how to go about it. The following are a couple of my attempts, but I think I am just way off;
-- Attempt using correlated subqueries and EXISTS clauses
-- Show all orgs that have the old thing, but not the new thing
-- Ideally, limit results to OrgId, OldThingId and the NewThingId that they should now have too
SELECT *
FROM #Data d
WHERE EXISTS (SELECT *
FROM #Data oldstuff
WHERE oldstuff.OrgId = d.OrgId
AND oldstuff.ThingId IN
(SELECT OldThingID
FROM #ReplacementData))
AND NOT EXISTS (SELECT *
FROM #Data oldstuff
WHERE oldstuff.OrgId = d.OrgId
AND oldstuff.ThingId IN
(SELECT NewThingID
FROM #ReplacementData))
-- Attempt at using a JOIN to only include those old things that the org has (via the where clause)
-- Also try exists to show missing new things.
SELECT *
FROM #Data d
LEFT JOIN #ReplacementData rd ON rd.OldThingId = d.ThingId
WHERE NOT EXISTS (
SELECT *
FROM #Data dta
INNER JOIN #ReplacementData rep ON rep.NewThingId = dta.ThingId
WHERE dta.OrgId = d.OrgId
)
AND rd.OldThingId IS NOT NULL
Any help on this is much appreciated. I may well be going about it completely wrong, so please let me know if there is a better way of tackling this type of problem.
Try this out and let me know.
DECLARE #Data AS TABLE
(
OrgId INT,
ThingId INT
)
DECLARE #ReplacementData AS TABLE
(
OldThingId INT,
NewThingId INT
)
INSERT INTO #Data (OrgId, ThingId)
VALUES (1, 2), (1, 3), (1, 4),
(2, 1), (2, 4),
(3, 3), (3, 4)
INSERT INTO #ReplacementData (OldThingId, NewThingId)
VALUES (3, 4), (2, 5)
SELECT D.OrgId, RD.*
FROM #Data D
JOIN #ReplacementData RD
ON D.ThingId=RD.OldThingId
LEFT OUTER JOIN #Data EXCLUDE
ON D.OrgId = EXCLUDE.OrgId
AND RD.NewThingId = EXCLUDE.ThingId
WHERE EXCLUDE.OrgId IS NULL

How to compare n:m assignments?

I have two tables (entity and kind) plus a n:m table (entity_kind).
CREATE TABLE
entity
(
entity_id INT
, name NVARCHAR(100)
, PRIMARY KEY(entity_id)
)
CREATE TABLE
kind
(
kind_id INT
, name NVARCHAR(100)
, PRIMARY KEY(kind_id)
)
CREATE TABLE
entity_kind
(
entity_id INT
, kind_id INT
, PRIMARY KEY(entity_id, kind_id)
)
Test data:
INSERT INTO
entity
VALUES
(1, 'Entity A')
, (2, 'Entity B')
, (3, 'Entity C')
INSERT INTO
kind
VALUES
(1, 'Kind 1')
, (2, 'Kind 2')
, (3, 'Kind 3')
, (4, 'Kind 4')
INSERT INTO
entity_kind
VALUES
(1, 1)
, (1, 3)
, (2, 1)
, (2, 2)
, (3, 4)
My code so far:
DECLARE
#selected_entities
TABLE
(
entity_id INT
)
DECLARE
#same_kinds BIT;
INSERT INTO
#selected_entities
VALUES
(1), (2)
-- Missing code here
SELECT
#same_kinds AS "same_kinds"
The table var #selected_entities is filled with entities that should be compared.
The logical var #same_kinds should indicate whether the selected entities have exactly the same kinds assigned.
How can I achieve this?
This is a compare two sets of things type problem. The query I'm going to show gives all pairs along with a flag. You can easily incorporate comparing a subquery by changing the first two entity tables to the table of ids you want to compare.
This query has a few parts. First, it produces all pairs of entities from the entity tables. This is important, because this will pick up even entities that have no "kinds" associated with them. You want a flag, rather than just a list of those that match.
Then the heart of the logic is to do a self-join on the entity-kinds table with the match on "kind". This is then aggregated by the two entities. The result is a count of the kinds that two entities share.
The final logic is to compare this count to the count of "kinds" on each entity. If all of these counts are the same, then the entities match. If not, they do not. This approach does assume that there are no duplicates in entity_kinds.
select e1.entity_id as e1, e2.entity_id as e2,
(case when count(ek1.entity_id) = max(ek1.numkinds) and
count(ek2.entity_id) = count(ek1.entity_id) and
max(ek1.numkinds) = max(ek2.numkinds)
then 1
else 0
end) as IsSame
from entity e1 join
entity e2
on e1.entity_id < e2.entity_id left outer join
(select ek.*, count(*) over (partition by entity_id) as numkinds
from entity_kind ek
) ek1
on e1.entity_id = ek1.entity_id left outer join
(select ek.*, count(*) over (partition by entity_id) as numkinds
from entity_kind ek
) ek2
on e2.entity_id = ek2.entity_id and
ek2.kind_id = ek1.kind_id
group by e1.entity_id, e2.entity_id;
The SQL Fiddle is here.
You can do this with two checks: First, if the kind-count on each entity is not the same, then they cannot match. Second, provided the count is the same, you just need to find one kind that doesn't match the list of an arbitrary other entity (I just take the first entity in the compare list). In code:
DECLARE #firstEntity int = (SELECT TOP 1 entity_id from #selected_entities)
IF EXISTS(SELECT TOP 1 se.entity_id FROM #selected_entities se
INNER JOIN entity_kind ek ON ek.entity_id = se.entity_id
WHERE ek.kind_id NOT IN (SELECT kind_id from entity_kind where entity_id = #firstEntity)
OR ((SELECT COUNT(1) FROM entity_kind WHERE entity_id = ek.entity_id)
<> (SELECT COUNT(1) FROM entity_kind WHERE entity_id = #firstEntity)))
SET #same_kinds = 0
ELSE
SET #same_kinds = 1
DECLARE #first_entity_id INT;
SET #first_entity_id = (SELECT TOP(1) se.entity_id FROM #selected_entities se);
DECLARE #dummyvar INT;
SELECT DISTINCT #dummyvar = COUNT(ek.kind_id)
FROM dbo.entity_kind ek
LEFT JOIN (
SELECT ek.kind_id
FROM dbo.entity_kind ek
WHERE ek.entity_id = #first_entity_id
) k ON ek.kind_id = k.kind_id
WHERE ek.entity_id IN (SELECT se.entity_id FROM #selected_entities se)
GROUP BY ek.entity_id;
SET #same_kinds = CASE WHEN ##ROWCOUNT = 1 THEN 1 ELSE 0 END;
SELECT #same_kinds AS [#same_kinds];
Note: #selected_entities should be declared thus:
DECLARE
#selected_entities
TABLE
(
entity_id INT PRIMARY KEY
)

Query logic issue: Using parameters and variables in a single query

I'm trying to combine 2 different select statements (each are being put into 2 variables) and then using 2 parameters to show my results, but I'm not 100% confident with my logic.
I'm using 2 tables (Salaries and Department)
I first created the procedure with the 2 parameters:
CREATE PROCEDURE DepartmentPercentage
(
#Dept VARCHAR(20)
#DeptPercent int OUTPUT
)
Then I declared and set my 2 variables, using an inner join in one of them to connect the Salaries and Department tables:
AS
DECLARE #Sal int
DECLARE #DeptRate int
SET #Sal = (SELECT SUM(AN_RATE) FROM Salaries) --Total Annual Rate for the entire table
SET #DeptRate = (SELECT SUM(S.AN_RATE) -- Calculates the Total Annual Rate for a given Department
FROM Salaries as S
INNER JOIN Department as D
ON D.DEPT_ID = S.DEPT_ID
WHERE DESCRIPTION = #Dept)
Then I put my out parameter to equal a division between the 2 declared variables:
#DeptPercent = (#DeptRate/#Sal)
My execution statement:
EXEC DepartmentPercentage #Dept = 'Fire Department', #DeptPercent;
Any help with my logic would be much appreciated. I don't know too much about using multiple parameters and variables in a single query.
This should get you Close to the results you want without the need for all the parameters and variables:
SELECT SUM(S.AN_RATE) AS total,
SUM(CASE DESCRIPTION WHEN #Dept THEN S.AN_RATE ELSE 0 END) AS dept,
SUM(CASE DESCRIPTION WHEN #Dept THEN S.AN_RATE ELSE 0 END) / SUM(S.AN_RATE) AS deptPercent
FROM Salaries as S
INNER JOIN Department as D
ON D.DEPT_ID = S.DEPT_ID
I know you probably only want the last column but thought the other 2 might help other people. If I get chance in the morning I'll setup a SQLfiddle with test data.
SQLFiddle seems to be having some problems at the minute so here is a worked example:
--Test Data Setup
DECLARE #Salaries AS TABLE (SAL_ID int, DEPT_ID int, AN_RATE decimal(7,2))
DECLARE #Department AS TABLE (DEPT_ID int, DEPT_DESCRIPTION VARCHAR(20))
INSERT INTO #Department VALUES (1, 'Fire Department')
INSERT INTO #Department VALUES (2, 'Earth Department')
INSERT INTO #Department VALUES (3, 'Wind Department')
INSERT INTO #Salaries VALUES (1, 1, 10000.00)
INSERT INTO #Salaries VALUES (2, 1, 15000.00)
INSERT INTO #Salaries VALUES (3, 1, 20000.00)
INSERT INTO #Salaries VALUES (4, 3, 25000.00)
INSERT INTO #Salaries VALUES (5, 2, 22000.00)
INSERT INTO #Salaries VALUES (6, 2, 21000.00)
--Parameter
DECLARE #Dept VARCHAR(20)
SET #Dept = 'Fire Department'
--Query
SELECT SUM(S.AN_RATE) AS total,
SUM(CASE DEPT_DESCRIPTION WHEN #Dept THEN S.AN_RATE ELSE 0 END) AS dept,
SUM(CASE DEPT_DESCRIPTION WHEN #Dept THEN S.AN_RATE ELSE 0 END) / SUM(S.AN_RATE) * 100 AS deptPercent
FROM #Salaries as S
INNER JOIN #Department as D
ON D.DEPT_ID = S.DEPT_ID