SQL : retrieving data by rank of variables - sql

I have a dataset like this
student_id, course_id, grade
1 , 1, 2
1, 2, 5
1, 3 ,5
2, 3, 5
2, 1, 2
3, 1, 1
3, 2, 4
I created a schema for this on sqlfiddle.com like below:
CREATE TABLE enrollments(
STUDENT_ID INT NOT NULL,
COURSE_ID INT NOT NULL,
GRADE INT NOT NULL
);
INSERT INTO enrollments
(STUDENT_ID,COURSE_ID,GRADE) VALUES
(1, 1, 2);
INSERT INTO enrollments
(STUDENT_ID,COURSE_ID,GRADE) VALUES
(1, 2, 5);
INSERT INTO enrollments
(STUDENT_ID,COURSE_ID,GRADE) VALUES
(1, 3, 5);
INSERT INTO enrollments
(STUDENT_ID,COURSE_ID,GRADE) VALUES
(2, 3, 5);
INSERT INTO enrollments
(STUDENT_ID,COURSE_ID,GRADE) VALUES
(2, 1, 2);
INSERT INTO enrollments
(STUDENT_ID,COURSE_ID,GRADE) VALUES
(3, 1, 1);
INSERT INTO enrollments
(STUDENT_ID,COURSE_ID,GRADE) VALUES
(3, 2, 4);
INSERT INTO enrollments
(STUDENT_ID,COURSE_ID,GRADE) VALUES
(3, 3, 4);
Now here is what I want:
A query that returns the table with columns student_id, course_id, grade and which contains only the rows of the table corresponding to the highest grade each student was able to achieve across any of his/her courses.
If a student achieves the same highest grade in multiple courses, then only display the row corresponding to the course with the lowest course_id. Sort the output by student_id.
So I wrote the following query:
select STUDENT_ID, COURSE_ID, GRADE
from
(
select STUDENT_ID, rank() over(PARTITION BY STUDENT_ID ORDER BY GRADE Desc)
as grade_rank,
rank() over(PARTITION BY STUDENT_ID ORDER BY COURSE_ID asc) as course_rank
from enrollments
) as ss
where grade_rank=1 and course_rank=1
I want to test if this is the right logic on sqlfiddle but it throws an error for the query
ERROR: column "course_id" does not exist Position: 20
The schema has been successfully created there.
Is something wrong with this and how I can test if this is correct logic. If the logic is wrong, please highlight the error in code.
Thanks

You have to select the columns in the inner query too, if you want to select them in the outer query. Additionally have to use one RANK() with an ORDER BY regarding both columns.
SELECT STUDENT_ID,
COURSE_ID,
GRADE
FROM (SELECT STUDENT_ID,
COURSE_ID,
GRADE,
rank() OVER (PARTITION BY STUDENT_ID
ORDER BY GRADE DESC,
COURSE_ID ASC) R
FROM ENROLLMENTS) SS
WHERE R = 1;

Related

SQL UNION query with order by giving syntax error on "("

I'm trying to select 2 oldest females and 2 oldest males using 1 query. The union keeps giving me a syntax error near "(". Both queries work independantly but after union I get error.
-- create a table
CREATE TABLE students (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL,
gender TEXT NOT NULL,
age INTEGER NOT NULL
);
-- insert some values
INSERT INTO students VALUES (1, 'Ryan', 'M', 23);
INSERT INTO students VALUES (2, 'Joanna', 'F', 22);
INSERT INTO students VALUES (3, 'Alex', 'F', 25);
INSERT INTO students VALUES (4, 'Ted', 'M', 21);
INSERT INTO students VALUES (5, 'June', 'F', 26);
INSERT INTO students VALUES (6, 'Rose', 'F', 24);
INSERT INTO students VALUES (7, 'Jack', 'M', 25);
-- select * from students;
SELECT * FROM
(SELECT name FROM students WHERE GENDER = 'F' ORDER BY age DESC LIMIT 2)
UNION
(SELECT name FROM students WHERE GENDER = 'M' ORDER BY age DESC LIMIT 2);
Your online compliler uses not MySQL but SQLite!
Execute select sqlite_version(); - the output is '3.31.1'.
Use this:
WITH cte AS (
SELECT *, ROW_NUMBER() OVER (PARTITION BY gender ORDER BY age DESC) rn
FROM students
)
SELECT name
FROM cte
WHERE rn <= 2;
This code is correct for SQLite.
PS. Add ORDER BY if needed.
For SQLite, both unioned queries, since they contain an ORDER BY clause, must be used as subqueries with an external SELECT clause and you can use an ORDER BY clause at the end which will be applied to the resultset of the union and will put all Fs at the top because they are alphabetically less than the Ms:
SELECT * FROM (SELECT * FROM students WHERE gender = 'F' ORDER BY age DESC LIMIT 2)
UNION
SELECT * FROM (SELECT * FROM students WHERE gender = 'M' ORDER BY age DESC LIMIT 2)
ORDER BY gender, age;
See the demo.

Union two queries ordered by newid

I have a table that stores employees (id, name, and gender). I need to randomly get two men and two women.
CREATE TABLE employees
(
id INT,
name VARCHAR (10),
gender VARCHAR (1),
);
INSERT INTO employees VALUES (1, 'Mary', 'F');
INSERT INTO employees VALUES (2, 'Jake', 'M');
INSERT INTO employees VALUES (3, 'Ryan', 'M');
INSERT INTO employees VALUES (4, 'Lola', 'F');
INSERT INTO employees VALUES (5, 'Dina', 'F');
INSERT INTO employees VALUES (6, 'Paul', 'M');
INSERT INTO employees VALUES (7, 'Tina', 'F');
INSERT INTO employees VALUES (8, 'John', 'M');
My attempt is the following:
SELECT TOP 2 *
FROM employees
WHERE gender = 'F'
ORDER BY NEWID()
UNION
SELECT TOP 2 *
FROM employees
WHERE gender = 'M'
ORDER BY NEWID()
But it doesn't work since I can't put two order by in the same query.
Why not just use row_number()? One method without a subquery is:
SELECT TOP (4) WITH TIES e.*
FROM employees
WHERE gender IN ('M', 'F')
ORDER BY ROW_NUMBER() OVER (PARTITION BY gender ORDER BY newid());
This is slightly less performant than using ROW_NUMBER() in a subquery.
Or, a fun method would use APPLY:
select e.*
from (values ('M'), ('F')) v(gender) cross apply
(select top (2) e.*
from employees e
where e.gender = v.gender
order by newid()
) e;
You cannot put an ORDER BY in the combinable query (the first one) of the UNION. However, you can use ORDER BY if you convert each one into a table expression.
For example:
select *
from (
SELECT TOP 2 *
FROM employees
WHERE gender = 'F'
ORDER BY newid()
) x
UNION ALL
select *
from (
SELECT TOP 2 *
FROM employees
WHERE gender = 'M'
ORDER BY newid()
) y
Result:
id name gender
--- ----- ------
5 Dina F
4 Lola F
2 Jake M
3 Ryan M
See running example at SQL Fiddle.

SQL query to filter records based on count and status

I have to filter records based on the status when the count is more than 1.
Column names: Student_id, Status, term, and course.
DB: Postgres
Condition to filter:
If there exists only one record for the student then the status(true or false) does not matter. Fetch the record.
If record count for a student is more than one then fetch only those students whose status is true.
(More than one record would mean, same Student_id, term, and course). At any given time there will be only one record with status as true.
How do I write the SQL query for this?
You can try below query:
Create Table #TableA(
id int,
Student_id Varchar(100),
[Status] bit,
term int,
course varchar(10)
)
Insert Into #TableA Values(1, 1, 1, 3, 'C#')
Insert Into #TableA Values(2, 2, 0, 6, 'Php')
Insert Into #TableA Values(3, 2, 0, 6, 'Php')
Insert Into #TableA Values(4, 2, 1, 6, 'Php')
Insert Into #TableA Values(5, 2, 1, 7, 'Php')
Select a.id, a.Student_id, a.Status, a.term, a.course from
(
Select *, count(*) over (Partition By Student_id, term, course) As row_count From #TableA
) a
Where a.row_count = 1 Or a.[Status] = 1
Result will be like below:
id Student_id Status term course
1 1 1 3 C#
4 2 1 6 Php
5 2 1 7 Php
Hmmm . . . You seem to want:
select t.*
from (select t.*, count(*) over (partition by student_id) as cnt
from t
) t
where cnt = 1 or status;
This filters out students with more than one record and not true status.
If you really want one row per student, even students with no true status, then use distinct on:
select distinct on (student_id) t.*
from t
order by student_id, status desc;
CASE WHEN status='x' AND count>1 THEN 1 ELSE 0 END

postgresql join confusion

I'm trying to make this statement work. but I can't figure it out.
List the names of the students who never took the course Databases.
I have this:
select distinct s1.name, e1.section_id
from students s1
inner join enrollment e1 on e1.student_id = s1.id
where e1.course_id != 12
but that doesn't remove the student who took that section, so I'm stuck.
Database looks like (I'm soo sorry, I don't know how to insert the database into here)
table students (
id integer primary key,
name varchar(255),
graduation_date date,
major_id integer references departments(id)
);
insert into students (id, name, graduation_date, major_id) values
(1, 'Joe', null, 10);
insert into students (id, name, graduation_date, major_id) values
(2, 'Amy', '2009-04-22', 20);
insert into students (id, name, graduation_date, major_id) values
(3, 'Max', null, 10);
create table courses (
id integer primary key,
title varchar(255),
units integer,
department_id integer references departments(id)
);
insert into courses (id, title, units, department_id) values
(12, 'Databases', 4, 10);
insert into courses (id, title, units, department_id) values
(22, 'Compilers', 4, 10);
insert into courses (id, title, units, department_id) values
(32, 'Calculus 1', 4, 20);
create table sections (
id integer primary key,
course_id integer not null references courses(id),
instructor_id integer references faculty(id),
year integer
);
insert into sections (id, course_id, instructor_id, year) values
(12, 12, 6, 2007);
insert into sections (id, course_id, instructor_id, year) values
(13, 12, 1, 2008);
insert into sections (id, course_id, instructor_id, year) values
(14, 22, 1, 2008);
insert into sections (id, course_id, instructor_id, year) values
(23, 12, 6, 2009);
create table enrollment (
id integer primary key,
student_id integer not null references students(id),
section_id integer not null references sections(id),
grade_id integer references grades(id)
);
insert into enrollment (id, student_id, section_id, grade_id) values
(14, 1, 12, 8);
insert into enrollment (id, student_id, section_id, grade_id) values
(15, 1, 13, 3);
insert into enrollment (id, student_id, section_id, grade_id) values
(16, 1, 14, 5);
insert into enrollment (id, student_id, section_id, grade_id) values
(17, 1, 32, 1);
insert into enrollment (id, student_id, section_id, grade_id) values
(18, 1, 34, 2);
insert into enrollment (id, student_id, section_id, grade_id) values
(19, 1, 53, 13);
insert into enrollment (id, student_id, section_id, grade_id) values
(24, 3, 12, 2);
insert into enrollment (id, student_id, section_id, grade_id) values
(25, 3, 14, 5);
insert into enrollment (id, student_id, section_id, grade_id) values
(26, 3, 32, 1);
insert into enrollment (id, student_id, section_id, grade_id) values
(27, 3, 34, 2);
insert into enrollment (id, student_id, section_id, grade_id) values
(28, 3, 54, 7);
insert into enrollment (id, student_id, section_id, grade_id) values
(34, 2, 43, 3);
insert into enrollment (id, student_id, section_id, grade_id) values
No, don't join everything and then try to clean up with DISTINCT. That's a bad approach. Instead think of what you want to select first. Then write the query step by step.
The "students who never took the course Databases" are
all students except those who took the course Databases
all students who are not in the set of students who took the course Databases
all students for whom not exists a Databases course enrolement
I have highlighted the keywords needed. So you have three options:
Write a query with EXCEPT
Write a query with NOT IN
Write a query with NOT EXISTS
Try these and come back here if you have further problems.
UPDATE: Now that you solved it (and even accepted my answer :-), here are some ways to write the query:
Query with IN clause:
select name
from students
where id not in
(
select student_id
from enrollment
where section_id in
(
select id
from sections
where course_id = (select id from courses where title = 'Databases')
)
);
Query with EXISTS clause:
select name
from students
where not exists
(
select *
from enrollment
where section_id in
(
select id
from sections
where course_id = (select id from courses where title = 'Databases')
)
and student_id = students.id
);
Query with EXCEPT (which is not so good a solution here, because it queries the students table twice, but sometimes EXCEPT is the straight-forward way to a problem). I am using a join on a subquery here instead of WHERE students.id IN (...), just in order to show the technique.
select name
from students
join
(
select id
from students
except
select student_id
from enrollment
where section_id in
(
select id
from sections
where course_id = (select id from courses where title = 'Databases')
)
) found_students on found_students.id = students.id;
Query with COUNT and HAVING, which looks quite compact. It is however a bit more prone to errors. One thing is not to confuse ON and WHERE in outer joins, another is to count the correct column. We must make sure to count a non-nullable field of table sections, so we are sure none of the student's enrollments matched an actual Databases section.
select s.id, s.name
from students s
left join enrollment e on e.student_id = s.id
left join sections s on s.id = e.section_id
and s.course_id = (select id from courses where title = 'Databases')
group by s.id, s.name
having count(s.id) = 0;
I think the queries above over complicate the topic a bit so I added my own.
SELECT s.*
FROM students s
LEFT JOIN (enrollment e
INNER JOIN sections se
ON se.id = e.section_id
INNER JOIN courses c
ON c.id = se.course_id AND c.title = 'Databases')
ON s.id = e.student_id
WHERE
e.id IS NULL

How to get numeric range between row value SQL

I have a table which shows Grades and percentages.
Now I want to run query on table which fetch Grade between these percentages.
Example if a student get 72% I want to show the Grade as C.
How to get Grade from table?
Please refer this table picture:
Drop Table Grades
Drop Table Students
Create Table Students (Name Varchar(200), Percentage Numeric(5,2))
Insert Students Values ('John', 0.00)
Insert Students Values ('Jane', 38.00)
Insert Students Values ('Joe', 45.00)
Insert Students Values ('Greg', 50.00)
Insert Students Values ('Buck', 55.00)
Insert Students Values ('Harold', 60.00)
Insert Students Values ('Jack', 65.00)
Insert Students Values ('Bill', 68.00)
Insert Students Values ('Gerald', 75.00)
Insert Students Values ('Steve', 79.00)
Insert Students Values ('Walter', 85.00)
Insert Students Values ('Mike', 92.00)
Insert Students Values ('Mary', 100.00)
Insert Students Values ('Mark', 101.00)
Select * From Students
Create Table Grades (Grade Char(2), Percentage Numeric(5,2))
Go
Insert Grades Values ('A*', 101.00)
Insert Grades Values ('A', 85.00)
Insert Grades Values ('B', 75.00)
Insert Grades Values ('C', 65.00)
Insert Grades Values ('D', 55.00)
Insert Grades Values ('E', 45.00)
Insert Grades Values ('F', 0.00)
Select S.*, G.Grade
From
(
Select *, IsNull(Lead(Percentage) Over (Order By Percentage), (Select Max(Percentage)+.01 From Grades)) NextPercentage
From Grades ) G
Join Students S On S.Percentage >= G.Percentage And S.Percentage < G.NextPercentage
ORDER BY Percentage DESC with <= the percentage in WHERE and TOP 1 Grade will given the expected result
CREATE TABLE #GradeMaster (Grade VARCHAR(2), Percentage DECIMAL(5,2))
INSERT INTO #GradeMaster
SELECT 'A*', 101 UNION
SELECT 'A', 85 UNION
SELECT 'B', 75 UNION
SELECT 'C', 65 UNION
SELECT 'D', 55 UNION
SELECT 'E', 45 UNION
SELECT 'F', 0
SELECT TOP 1 Grade
FROM #GradeMaster
WHERE Percentage <= 72
ORDER BY Percentage DESC
DROP TABLE #GradeMaster
select grade from table1 where precentage in (
select max(percentage) from table1 where 72 > percentage);
You can substitute 72 for whatever score you like. There may be a way to do it without the 2 selects, but this should work.
You can use a order by limit 1
select grade from my_table
where percentage <= 72
order by percentage desc
limit 1;
Assuming there might also be a student table and assignment table ... I would think the lookup query would look something like this. The below will give you all students regardless of whether they have any graded assignments. Alternatively, you could join the student table directly if you have an overall grade already aggregated.
SELECT
S.*,
A.*,
G.grade
FROM
Student S
LEFT OUTER JOIN Assignment A ON S.Student_id = A.Student_id
LEFT OUTER JOIN Grade G ON A.Percentage >= G.Percentage AND A.Percentage < G.Percentage