What is the basic functioning of group by in SQL? - sql

What is the behaviour of group by?
I have two tables:
create table department
(
dep_id int primary key,
dep_id varchar(20),
dep_location varchar(20)
)
and
create table employees_dep
(
emp_id int primary key,
emp_name varchar(20),
job_name varchar(20),
manager_id int,
hire_date date,
salary decimal(10, 2),
commision decimal(7, 2),
dep_id int
foreign key references department(dep_id)
)
With data as:
insert into department values (1001, 'finance', 'sydney')
insert into department values (2001, 'audit', 'melbourne')
insert into department values (3001, 'marketing', 'perth')
insert into department values (4001, 'production', 'brisbane')
insert into employees_dep
values (68319, 'kayling', 'president', null, '11-18-1991', 6000, 0, 1001)
insert into employees_dep
values (66928, 'blaze', 'manager', 68319, '05-01-1991', 2750, 0, 3001)
insert into employees_dep
values (67832, 'clare', 'manager', 68319, '06-09-1991', 2550, 0, 1001)
insert into employees_dep
values (65646, 'jonas', 'manager', 68319, '04-02-1991', 2957, 0, 2001)
insert into employees_dep
values (67858, 'scarlet', 'analyst', 65646, '04-19-1991', 3100, 0, 2001)
insert into employees_dep
values (69062, 'frank', 'analyst', 65646, '12-03-1991', 3100, 0, 2001)
insert into employees_dep
values (63679, 'sandrine', 'clerk', 69062, '12-18-1991', 900, 0, 2001)
insert into employees_dep
values (64989, 'adelyn', 'salesman', 66928, '02-20-1991', 1700, 400, 3001)
insert into employees_dep
values (65271, 'wade', 'salesman', 66928, '02-22-1991', 1350, 600, 3001)
insert into employees_dep
values (66564, 'madden', 'salesman', 66928, '09-28-1991', 1350, 1500, 3001)
insert into employees_dep
values (68454, 'tucker', 'salesman', 66928, '09-08-1991', 1600, 0, 3001)
insert into employees_dep
values (68736, 'andres', 'clerk', 67858, '05-23-1997', 1200, 0, 2001)
insert into employees_dep
values (69000, 'julius', 'clerk', 66928, '12-03-1991', 1050, 0, 3001)
insert into employees_dep
values (69324, 'marker', 'clerk', 67832, '01-23-1992', 1400, 0, 1001)
The question: when I write a query as:
select
d.dep_name,
count(d.dep_name) as no_of_employees
from
employees_dep e, department d
group by
d.dep_name
The output is:
all the department names with no_of_employees as 14 in all the rows
Why is this so because the group by clause selects one data and make group of it so it should work as select the first department name and then group all the rows in employee table with same department id and then count the no of rows in each group
when i write query as -
select
d.dep_name,
count(d.dep_name) as no_of_employees
from
employees_dep e, department d
where
e.dep_id = d.dep_id
group by
d.dep_name
Then it returns the correct output with the correct number of occurrences of each department in the table.
Please explain this behaviour of group by....

Use inner join. hope this help
select d.dep_name,
count(d.dep_name) as no_of_employees
from employees_dep e
inner join department d on e.dep_id = d.dep_id
group by d.dep_name

You are using implicit join syntax and are creating a Cartesian Join (Cross Join) which means every possible combination of the 2 tables is then being counted. So if you have 14 departments every employee will look as if it is in 14 of them. Researching how to do joins in SQL and use explicit join syntax will help out. In this case INNER JOIN as others have also pointed out will be the key to correcting the issue.
SELECT
d.dep_name
,count(emp_id) as no_of_employees
FROM
department d
INNER JOIN employees_dep e
ON e.dep_id = d.dep_id
GROUP BY
d.dep_name

You much use a joining condition in your first query. Else it will do a cross join. Using proper joining condition will give you the proper output.
select d.dep_name,count(1) as no_of_employees
from employees_dep e join department d
On e.dep_id=d.dep_id
group by d.dep_name

Related

Analytical Query in SQL for MIN, MAX, and AVG

I am trying to figure out a query for this question: for each major, list the number of students, minimum GPA, maximum GPA, average GPA, minimum age, maximum age, and average age. (Show GPA with 2 decimal points, age with no decimal points. You may find it useful to create a view with one of the previous queries for this one.)
This is the script to create the table for SQL!
REM drop all the tables. Note that you need to drop the
REM dependent table first before dropping the base tables.
drop table Reg;
drop table Student;
drop table Course;
REM Now create all the tables.
create table Student
(
sid char(10) primary key,
sname varchar(20) not null,
gpa float,
major char(10),
dob DATE
);
create table Course
(
cno char(10) primary key,
cname varchar(20) not null,
credits int,
dept char(10)
);
create table Reg
(
sid references Student(sid) on delete cascade,
cno references Course(cno) on delete cascade,
grade char(2),
primary key (sid, cno)
);
REM Now insert all the rows.
insert into Student values ('111', 'Joe', 3.5 , 'MIS', '01-AUG-2000');
insert into Student values ('222', 'Jack', 3.4 , 'MIS', '12-JAN-1999');
insert into Student values ('333', 'Jill', 3.2 , 'CS', '15-MAY-1998');
insert into Student values ('444', 'Mary', 3.7 , 'CS', '17-DEC-2001');
insert into Student values ('555', 'Peter', 3.8 , 'CS', '19-MAR-1999');
insert into Student values ('666', 'Pat', 3.9, 'Math', '31-MAY-2000');
insert into Student values ('777', 'Tracy', 4.0, 'Math', '18-JUL-1997');
insert into Course values ('c101', 'intro', 3 , 'CS');
insert into Course values ('m415', 'database', 4 , 'Bus');
insert into Course values ('m215', 'programming', 4 , 'Bus');
insert into Course values ('a444', 'calculus', 3 , 'Math');
insert into Reg values ('111', 'c101', 'A');
insert into Reg values ('111', 'm215', 'B');
insert into Reg values ('111', 'm415', 'A');
insert into Reg values ('222', 'm215', 'A');
insert into Reg values ('222', 'm415', 'B');
insert into Reg values ('333', 'c101', 'A');
insert into Reg values ('444', 'm215', 'C');
insert into Reg values ('444', 'm415', 'B');
insert into Reg values ('555', 'c101', 'B');
insert into Reg values ('555', 'm215', 'A');
insert into Reg values ('555', 'm415', 'A');
insert into Reg values ('666', 'c101', 'A');
This is what I have so far:
SELECT major,
count(distinct SID) as students,
round(min(gpa), 2),
round(max(gpa), 2),
round(avg(gpa), 2),
trunc(min(sysdate - dob)/365) as min_age,
trunc(max(sysdate - dob)/365) as max_age,
trunc(avg(sysdate - dob)/365) as avg_age,
FROM Student
GROUP BY MAJOR;
According to your input I've made a query that I belive will show you the results. (It was kind hard to read the tables the way you posted it). The syntax may differ according to your DBMS (SQL Server, MySQL, REdshift, Postgres, etc)
Here is the query:
SELECT major,
COUNT(*) as students,
ROUND(MIN(gpa), 2) as min_gpa,
ROUND(MAX(gpa), 2) as max_gpa,
ROUND(AVG(gpa), 2) as avg_gpa,
MIN(DATEDIFF(year, current_date, dob)) as min_age,
MAX(DATEDIFF(year, current_date, dob)) as max_age,
AVG(DATEDIFF(year, current_date, dob)) as avg_date
FROM students st left join Course co on co.dept = st.major
GROUP BY major
Your query is completely fine (just remove comma(,) after avg_age.
SELECT major,
count(distinct SID) as students,
round(min(gpa), 2) as MinGPA,
round(max(gpa), 2) as MaxGPA,
round(avg(gpa), 2) as AvgGPA,
round(min(sysdate - dob)/365,0) as min_age,
round(max(sysdate - dob)/365,0) as max_age,
round(avg(sysdate - dob)/365,0) as avg_age
FROM Student
GROUP BY MAJOR;
You can also use months_between() with floor() to get the same result:
select * from student;
SELECT major,
count(distinct SID) as students,
round(min(gpa), 2) as MinGPA,
round(max(gpa), 2) as MaxGPA,
round(avg(gpa), 2) as AvgGPA,
floor(min(months_between(trunc((sysdate)), dob)) /12) as min_age,
floor(max(months_between(trunc((sysdate)), dob)) /12) as max_age,
floor(avg(months_between(trunc((sysdate)), dob)) /12) as avg_age
FROM Student
GROUP BY MAJOR;

SQL to assign covid patients to hospitals

I have 2 tables:
CREATE TABLE remdesivir_inventory
(
hospital_id int,
stock int,
state varchar(2)
);
CREATE TABLE remdesivir_requests
(
patient_id int,
prescribed_qty int,
state varchar(2)
);
I want to write a SQL that inserts rows in the remdesivir_assignments table
Every patient whose request can be fulfilled (until the stock runs out) will have a representative row in
the remdesivir_assignments table.
Each patient can be assigned to only 1 hospital (ie. requests cannot be split)
The 'state' of the patient and the hospital must match
CREATE TABLE remdesivir_assignments
(
patient_id int,
hospital_id int
);
Example:
INSERT INTO remdesivir_inventory VALUES (1, 200, 'CA');
INSERT INTO remdesivir_inventory VALUES (2, 100, 'FL');
INSERT INTO remdesivir_inventory VALUES (3, 500, 'TX');
INSERT INTO remdesivir_requests VALUES (10, 100, 'CA');
INSERT INTO remdesivir_requests VALUES (20, 200, 'FL');
INSERT INTO remdesivir_requests VALUES (30, 300, 'TX');
INSERT INTO remdesivir_requests VALUES (40, 100, 'AL');
INSERT INTO remdesivir_requests VALUES (50, 200, 'CA');
In this scenario, the following rows will be inserted to the remdesivir_assignments table
(10, 1)
(30, 3)
You can use a cumulative sum and join:
select rr.*, ri.hospital_id
from (select rr.*,
sum(prescribed_qty) over (partition by state order by patient_id) as running_pq
from remdesivir_requests rr
) rr join
remdesivir_inventory ri
on ri.state = rr.state and
rr.running_pq <= ri.stock
Here is a db<>fiddle.

Can I reference this table?

I am trying to show amount paid for each tutor sorted by month and then by tutor id. I have the first part correct and can sort by month but cannot sort by tutor id because it is from a different table.
Here is the script for my tables:
create table match_history
(match_id number(3),
tutor_id number(3),
student_id number(4),
start_date date,
end_date date,
constraint pk_match_history primary key (match_id),
constraint fk1_match_history foreign key (tutor_id) references tutor(tutor_id),
constraint fk2_match_history foreign key (student_id) references student(student_id));
create table tutor_report
(match_id number(3),
month date,
hours number(3),
lessons number(3),
constraint pk_tutor_report primary key (match_id, month),
constraint fk1_tutor_report foreign key (match_id) references match_history(match_id));
insert into tutor values (100, '05-JAN-2017', 'Active');
insert into tutor values (101, '05-JAN-2017', 'Temp Stop');
insert into tutor values (102, '05-JAN-2017', 'Dropped');
insert into tutor values (103, '22-MAY-2017', 'Active');
insert into tutor values (104, '22-MAY-2017', 'Active');
insert into tutor values (105, '22-MAY-2017', 'Temp Stop');
insert into tutor values (106, '22-MAY-2017', 'Active');
insert into student values (3000, 2.3);
insert into student values (3001, 5.6);
insert into student values (3002, 1.3);
insert into student values (3003, 3.3);
insert into student values (3004, 2.7);
insert into student values (3005, 4.8);
insert into student values (3006, 7.8);
insert into student values (3007, 1.5);
insert into match_history values (1, 100, 3000, '10-JAN-2017', null);
insert into match_history values (2, 101, 3001, '15-JAN-2017', '15-MAY-2017');
insert into match_history values (3, 102, 3002, '10-FEB-2017', '01-MAR-2017');
insert into match_history values (4, 106, 3003, '28-MAY-2017', null);
insert into match_history values (5, 103, 3004, '01-JUN-2017', '15-JUN-2017');
insert into match_history values (6, 104, 3005, '01-JUN-2017', '28-JUN-2017');
insert into match_history values (7, 104, 3006, '01-JUN-2017', null);
insert into tutor_report values (1, '01-JUN-2017', 8, 4);
insert into tutor_report values (4, '01-JUN-2017', 8, 6);
insert into tutor_report values (5, '01-JUN-2017', 4, 4);
insert into tutor_report values (4, '01-JUL-2017', 10, 5);
insert into tutor_report values (1, '01-JUL-2017', 4, 2);
This is what I have so far:
Select (hours * 10) as amount paid from tutor_report group by month, tutor_id
however obviously I cannot just say tutor_id at the end.
You can join match_history to get the tutor_id.
But your statement and the query don't match. If you want to sort use ORDER BY.
SELECT tr.hours * 10 amount_paid
FROM tutor_report tr
INNER JOIN match_history mh
ON mh.match_id = tr.match_id
ORDER BY tr.month,
mh.tutor_id;
If you want to aggregate, hours needs to be argument to some aggregation function. Maybe you're after the sum of hours?
SELECT sum(tr.hours) * 10 amount_paid
FROM tutor_report tr
INNER JOIN match_history mh
ON mh.match_id = tr.match_id
GROUP BY tr.month,
mh.tutor_id;
If you are grouping based on columns on two tables,you need to join them on the matching Id and then use group by
Select (hours * 10) as amount paid
from tutor_report a
join match_history b on a. match_id = b.match_id
group by month, tutor_id

Display User That Is Does Not Exist in Other Table

I have query that I am supposed to display The number of patients a physician can take on. He can have no more than 5 patients at a time. I have this query working with the following:
select PHYSICIAN.PHYSICIAN_ID,PHYSICIAN.firstname_physician,PHYSICIAN.lastname_physician, phone.phone_number, 5-count(patient.patient_id) as "Numbers of new patients he/she can take"
from patient, physician, physician_phone, phone
where physician.physician_id = patient.physician_id and PHYSICIAN_PHONE.PHYSICIAN_ID = PHYSICIAN.PHYSICIAN_ID and phone.PHONE_ID = physician_phone.PHONE_ID
group by PHYSICIAN.PHYSICIAN_ID, PHYSICIAN.firstname_physician, PHYSICIAN.lastname_physician, physician_phone.phone_id, phone.phone_number
having count(patient.patient_id)<5;
However this only displays the physicians that have patients not the physicians who have 0 patients connected to them.
My attempt to display the physicians who also have 0 patients was the following:
select PHYSICIAN.PHYSICIAN_ID,PHYSICIAN.firstname_physician,PHYSICIAN.lastname_physician, phone.phone_number, 5-count(patient.patient_id) as "Numbers of new patients he/she can take"
from patient, physician, physician_phone, phone
where physician.physician_id = patient.physician_id and PHYSICIAN_PHONE.PHYSICIAN_ID = PHYSICIAN.PHYSICIAN_ID and phone.PHONE_ID = physician_phone.PHONE_ID
group by PHYSICIAN.PHYSICIAN_ID, PHYSICIAN.firstname_physician, PHYSICIAN.lastname_physician, physician_phone.phone_id, phone.phone_number
having count(patient.physician_id)<5 OR NOT EXISTS ( Select patient.physician_id from patient Where patient.physician_id != physician.physician_Id group by patient.physician_id)
Below are the table creates to help anyone better understand the relationship between the tables
Create Table Physician (
Physician_ID integer not null,
Firstname_physician Char(30Char) not null,
lastname_physician Char(30Char) not null,
Constraint Physician Primary Key (Physician_ID));
Create Table Patient (
Patient_ID integer not null,
Patient_FirstName Char(20Char) not null,
Patient_LastName Char(20Char) not null,
Patient_MI Char(1Char) not null,
Patient_Gender Char(15Char) not null,
Staff_id integer not null,
Physician_ID integer not null,
Constraint Patient_pk Primary Key (Patient_ID),
Constraint HomeCareStaff_fk Foreign Key (Staff_ID) References HomeCareStaff(Staff_id),
Constraint Physician_ID_fk10 Foreign Key (Physician_ID) References Physician(Physician_ID));
Both tables queries return the same exact thing. Please see image results of query
Hope this makes sense Here are also the inserts
--populating Physician table--
insert into Physician values (100, 'Sasia', 'Applebottom');
insert into Physician values (101, 'Mac', 'Cheese');
insert into Physician values (102, 'Mick', 'Donalds');
insert into Physician values (103, 'Saint', 'West');
insert into Physician values (104, 'Chicago', 'West');
insert into Physician values (105, 'Mason', 'Disic');
---Populate Patient Table
insert into Patient values (150, 'Hayley', 'Beachump', 'F', 'Female', 50, 100);
insert into Patient values (151, 'Jacob', 'Stutzmen', 'K', 'Male', 51, 100);
insert into Patient values (152, 'Christina', 'Smush', 'P', 'Female', 52, 100);
insert into Patient values (153, 'Doris', 'Dorphish', 'D', 'Female', 53,100);
insert into Patient values (154, 'Adam', 'Wang', 'M', 'Male', 54, 100);
insert into Patient values (155, 'Levina', 'Reinhart', 'U', 'Female', 55, 101);
insert into Patient values (156, 'Harper', 'Mosbey', 'M', 'Male', 56, 102);
You can use this
select PHYSICIAN.PHYSICIAN_ID, PHYSICIAN.firstname_physician, PHYSICIAN.lastname_physician, phone.phone_number, count(patient.patient_id) as "Numbers of new patients he/she can take"
from patient, physician, physician_phone, phone
where physician.physician_id = patient.physician_id and PHYSICIAN_PHONE.PHYSICIAN_ID = PHYSICIAN.PHYSICIAN_ID and phone.PHONE_ID = physician_phone.PHONE_ID
group by PHYSICIAN.PHYSICIAN_ID, PHYSICIAN.firstname_physician, PHYSICIAN.lastname_physician, physician_phone.phone_id, phone.phone_number
having count(patient.patient_id) = 0;

SQL developer Query

QUESTION: Write a SQL SELECT statement to display the name and address of all departments (except the departments in Dallas) having maximum number of employees. Sort your output in ascending order by department name.
Creating Department Table
CREATE TABLE department
( DEPARTMENT_ID NUMBER(4) PRIMARY KEY,
DEPARTMENT_NAME VARCHAR2(20) NOT NULL UNIQUE,
ADDRESS VARCHAR2(20) NOT NULL);
Populating Department Table
INSERT INTO department VALUES(10, 'ACCOUNTING', 'NEW YORK');
INSERT INTO department VALUES(20, 'RESEARCH', 'DALLAS');
INSERT INTO department VALUES(30, 'SALES', 'CHICAGO');
INSERT INTO department VALUES(40, 'IT', 'DALLAS');
INSERT INTO department VALUES(50, 'EXECUTIVE', 'NEW YORK');
INSERT INTO department VALUES(60, 'MARKETING', 'CHICAGO');
COMMIT;
Creating Employee Table
CREATE TABLE employee
( EMPLOYEE_ID NUMBER(4) PRIMARY KEY,
EMPLOYEE_NAME VARCHAR2(20) NOT NULL,
JOB VARCHAR2(50) NOT NULL,
MANAGER_ID NUMBER(4),
HIRE_DATE DATE NOT NULL,
SALARY NUMBER(9, 2) NOT NULL,
COMMISSION NUMBER(9, 2),
DEPARTMENT_ID NUMBER(4) REFERENCES department(DEPARTMENT_ID));
Populating Employee Table
INSERT INTO employee
VALUES(7839, 'KING', 'PRESIDENT', NULL, '20-NOV-01', 5000, NULL, 50);
INSERT INTO employee
VALUES(7596, 'JOST', 'VICE PRESIDENT', 7839, '04-MAY-01', 4500, NULL, 50);
INSERT INTO employee
VALUES(7603, 'CLARK', 'VICE PRESIDENT', 7839, '12-JUN-01', 4000, NULL, 50);
INSERT INTO employee
VALUES(7566, 'JONES', 'PUBLIC ACCOUNTANT', 7596, '05-APR-01', 3000, NULL, 10);
INSERT INTO employee
VALUES(7886, 'STEEL', 'PUBLIC ACCOUNTANT', 7566, '08-MAR-03', 2500, NULL, 10);
INSERT INTO employee
VALUES(7610, 'WILSON', 'ANALYST', 7596, '03-DEC-01', 3000, NULL, 20);
INSERT INTO employee
VALUES(7999, 'WOLFE', 'ANALYST', 7610, '15-FEB-02', 2500, NULL, 20);
INSERT INTO employee
VALUES(7944, 'LEE', 'ANALYST', 7610, '04-SEP-06', 2400, NULL, 20);
INSERT INTO employee
VALUES(7900, 'FISHER', 'SALESMAN', 7603, '06-DEC-01', 3000, 500, 30);
INSERT INTO employee
VALUES(7921, 'JACKSON', 'SALESMAN', 7900, '25-FEB-05', 2500, 400, 30);
INSERT INTO employee
VALUES(7952, 'LANCASTER', 'SALESMAN', 7900, '06-DEC-06', 2000, 150, 30);
INSERT INTO employee
VALUES(7910, 'SMITH', 'DATABASE ADMINISTRATOR', 7596, '20-DEC-01', 2900, NULL, 40);
INSERT INTO employee
VALUES(7788, 'SCOTT', 'PROGRAMMER', 7910, '15-JAN-03', 2500, NULL, 40);
INSERT INTO employee
VALUES(7876, 'ADAMS', 'PROGRAMMER', 7910, '15-JAN-03', 2000, NULL, 40);
INSERT INTO employee
VALUES(7934, 'MILLER','PROGRAMMER', 7876, '25-JAN-02', 1000, NULL, 40);
INSERT INTO employee
VALUES(8000, 'BREWSTER', 'TBA', NULL, '22-AUG-13', 2500, NULL, NULL);
COMMIT;
The Query That i wrote is below and it works perfectly in finding the maximum number of employees in DALLAS department but I want to eliminate DALLAS and find the other department which has the MAXIMUM number of employee's working in it which is by the data are (CHICAGO and NEW YORK). Can anyone please help me out?
SELECT d.department_id, d.department_name, COUNT(*)
FROM department d
INNER JOIN employee e
ON d.department_id = e.department_id
GROUP BY d.department_id, d.department_name
HAVING COUNT(*) = (SELECT MAX(COUNT(*))
FROM employee
GROUP BY department_id)
ORDER BY department_id;
Using your query:
SELECT d.department_id, d.department_name, COUNT(*)
FROM department d
INNER JOIN employee e
ON d.deptartment_id = e.department_id
where d.address != 'DALLAS'
GROUP BY d.department_id, d.department_name
HAVING COUNT(*) = (SELECT MAX(COUNT(*))
FROM employee
where department_id not in (select department_id from department where address = 'DALLAS')
GROUP BY department_id)
ORDER BY department_id;
;
You can use a rank analytic function:
select *
from (
select department_id, department_name, rank() over (order by cnt desc) rnk
from(
SELECT d.department_id, d.department_name, COUNT(*) cnt
FROM department d
INNER JOIN employee e
ON d.department_id = e.department_id
GROUP BY d.department_id, d.department_name
)
)
where rnk = 2 --here you may change to what position you want
;