MySQL SQL Subquery? - sql

Given the following schema / data / output how would I format a SQL query to give the resulting output?
CREATE TABLE report (
id BIGINT AUTO_INCREMENT,
name VARCHAR(255) NOT NULL UNIQUE,
source VARCHAR(255) NOT NULL UNIQUE,
PRIMARY KEY(id)
) ENGINE = INNODB;
CREATE TABLE field (
id BIGINT AUTO_INCREMENT,
name VARCHAR(255) NOT NULL UNIQUE,
report_id BIGINT,
PRIMARY KEY(id)
) ENGINE = INNODB;
ALTER TABLE filed ADD FOREIGN KEY (report_id) REFERENCES report(id) ON DELETE CASCADE;
reports:
id, name, source
1 report1 source1
2 report2 source2
3 report3 source3
4 report4 source4
field:
id, name, report_id
1 firstname 3
2 lastname 3
3 age 3
4 state 4
5 age 4
6 rank 4
Expected output for search term "age rank"
report_id, report_name, num_fields_matched
3 report3 1
4 report4 2
Thanks in advance!

This query will return all the reports with words you need.
SELECT *
FROM report r
INNER JOIN field f ON r.id = f.report_id
WHERE name IN ('age','rank')
You have to nest it. So the final query is:
SELECT a.id, a.name, COUNT(*)
FROM
(
SELECT r.id, r.name
FROM report r
INNER JOIN field f ON r.id = f.report_id
WHERE f.name
IN ('age', 'rank')
)a
GROUP BY a.id, a.name

Related

SQL: difference between where in main body vs join clause

I'm wondering why does the following queries give me a slightly different dataset:
SELECT t.name, COUNT(e.id)
FROM event_type t
LEFT JOIN event e ON t.id = e.type_id AND e.start BETWEEN ? AND ?
GROUP BY t.name;
SELECT t.name, COUNT(e.id)
FROM event_type t
LEFT JOIN event e ON t.id = e.type_id
WHERE e.start BETWEEN ? AND ?
GROUP BY t.name;
So I just moved BETWEEN clause to the main body, logically, it does not matter where to apply it, but the result says it matters. Any suggestions? Thanks!
UPD: tried on MySQL 5.6
create table event_type
(
id int auto_increment primary key,
name varchar(100) not null,
constraint UNIQ_93151B825E237E06 unique (name)
) collate = utf8_unicode_ci;
create table event
(
id int auto_increment primary key,
type_id int null,
start datetime not null,
...
constraint FK_3BAE0AA7C54C8C93
foreign key (type_id) references event_type (id)
) collate = utf8_unicode_ci;
create index IDX_3BAE0AA7C54C8C93
on event (type_id);
Maybe it's hard to answer this question without some images! but I try.
Let's assume this is the event_type table
Id
Name
1
First
2
Second
Events table:
Id
TypeId
Start
5
1
2022-10-01
6
1
2022-10-10
So for this query:
SELECT t.name, COUNT(e.id)
FROM event_type t
LEFT JOIN event e ON t.id = e.type_id AND e.start BETWEEN '2022-10-01' AND '2022-10-05'
GROUP BY t.name;
The result will be:
Name
Count(e.id)
First
1
Second
0
But why? becuase sql engine when try to get result on left join, it will check both of id and start, actually the result of prevois query is like this:
Id
Name
Id
TypeId
Start
1
First
5
1
2022-10-01
2
Second
null
null
null
That's it! When you try to use Between in where clause, in fact you are filtering the null values so sql would ingore them and the final result would be different.
I hope it's clear enough!

Counting Occurrences from One Table and Inserting into Another but Getting an Error

I am trying to count the amount of times each school shows up in a set of records and record that value in a new table with its corresponding school name and ID.
The tables being used are similar to the following:
Table 1-> school_probs
school_code (pk, bigint)
school (text)
probability
1
school1
Irrelevant info
2
school2
ii
3
school3
ii
Table2-> simulated_records
record_id (pk, bigint)
school (text)
grade
1
school1
ii
2
school2
ii
3
school1
ii
4
school3
ii
I'm expecting to get an output like
school_code (fk, bigint)
school (text)
schoolCount (integer)
1
school1
2
2
school2
1
3
school3
1
and I was able to achieve this with the following code:
SELECT COUNT (simulated_records.school) AS schoolCount, school_probs.school_code, school_probs.school
FROM simulated_records, school_probs WHERE school_probs.school = simulated_records.school
GROUP BY simulated_records.school, school_probs.school_code, school_probs.school;
However, I need the result to be saved in a table. But when I try
CREATE TABLE studentCount (
studentNum integer, school_code bigint, school text,
CONSTRAINT fk_sC FOREIGN KEY (school_code) REFERNCES school_probs (school_code)
)
SELECT COUNT (simulated_records.school) AS schoolCount, school_probs.school_code, school_probs.school
FROM simulated_records, school_probs WHERE school_probs.school = simulated_records.school
GROUP BY simulated_records.school, school_probs.school_code, school_probs.school;
I get "ERROR: syntax error at or near "SELECT" LINE 5: SELECT COUNT (simulated_records.school) AS schoolCount, . . . SQL state: 42601 "
Line 5 reads:
SELECT COUNT (simulated_records.school) AS schoolCount, school_probs.school_code, school_probs.school
Can anyone point me in the right direction? I plan on creating a function out of this.
The code to create the tables:
DROP TABLE IF EXISTS school_probs;
CREATE TABLE school_probs
(
school_code bigint NOT NULL PRIMARY KEY,
school text NOT NULL,
probs numeric[] NOT NULL
);
INSERT INTO school_probs VALUES
(1,'school1','{0.05,0.08,0.18,0.3,0.11,0.28}'),
(2,'school2','{0.06,0.1,0.295,0.36,0.12,0.065}'),
(3,'school3','{0.05,0.11,0.35,0.32,0.12,0.05}');
DROP TABLE IF EXISTS simulated_records;
CREATE TABLE simulated_records
(
record_id bigint NOT NULL PRIMARY KEY,
school text NOT NULL,
grade text NOT NULL
);
INSERT INTO simulated_records VALUES
(1,'school1','-'),
(2,'school2','-'),
(3,'school1','-'),
(4, 'school3', '-');
Look up the JOIN syntax and don't use , in the FROM clause. Table aliases could also help.
And the syntax to create a table from a query is CREATE TABLE <table name> AS SELECT .... There are no column or constraint definitions. You can use explicit casts in the query to determine column types. Constraint definitions have to be added later with ALTER TABLE.
CREATE TABLE studentcount
AS
SELECT count(sr.school)::integer studentnum,
sp.school_code::bigint,
sp.school::text
FROM simulated_records sr
INNER JOIN school_probs sp
ON sp.school = sr.school
GROUP BY sp.school,
sp.school_code;
ALTER TABLE studentcount
ADD CONSTRAINT fk_sc
FOREIGN KEY (school_code)
REFERENCES school_probs
(school_code);
Alternatively you can first issue a "normal" CREATE TABLE with column and constraint definitions and then insert the rows from the query.
CREATE TABLE studentcount
(studentnum integer,
school_code bigint,
school text,
CONSTRAINT fk_sc
FOREIGN KEY (school_code)
REFERENCES school_probs
(school_code));
INSERT INTO studentcount
(studentnum,
school_code,
school)
SELECT count(sr.school),
sp.school_code,
sp.school
FROM simulated_records sr
INNER JOIN school_probs sp
ON sp.school = sr.school
GROUP BY sp.school,
sp.school_code;
But be aware that you're creating data redundancy either way. That can lead to inconsistencies and should be avoided. If you don't just need that temporarily but later again with then current values consider a view.
CREATE VIEW studentcount
AS
SELECT count(sr.school)::integer studentnum,
sp.school_code::bigint,
sp.school::text
FROM simulated_records sr
INNER JOIN school_probs sp
ON sp.school = sr.school
GROUP BY sp.school,
sp.school_code;

SQL: Count references to item in same table

I have the following SQL table:
id int(11) NOT NULL AUTO_INCREMENT,
name varchar(200),
parent int(11),
This stores some structured information in a tree:
"id" is the primary (uniq) key of the entries
"name" is some string
"parent" is the "id" of the parent entry (0: root element)
A sample table could be:
id name parent
--+-----------------+----------------
1 root_a 0
2 root_b 0
3 sub_b1 2
4 sub_sub_b1_1 3
5 sub_sub_b1_2 3
This could be a directory with folder ("root_"), sub-folder ("sub_"), sub-sub-folder ("sub_sub_*"), ...
Now I would like to have a SQL query, that returns for each entry how many child entries there are:
SELECT id,name,count(....) as child_count FROM table WHERE ...
For the example table this query shall return:
id name child_count
--+--------------+---------
1 root_a 0
2 root_b 1
3 sub_b1 2
4 sub_sub_b1_1 0
5 sub_sub_b1_2 0
How to perform such a count inside the same table?
Thanks
How about a correlated subquery?
select t.*,
(select count(*)
from t t2
where t2.parent = t.id
) as child_count
from t;
with qry1 as (
select parent,
count(*) as child_count
from table
group by parent
)
select table.id,
table.name,
isnull(qry1.child_count, 0) as child_count
from table
left join qry1
on table.id = qry1.parent
order by table.id
Note: Left Join. Inner join would exclude rows with no children.

How to differentiate between “no child rows exist” and “no parent row exists” in one SELECT query?

Say I have a table C that references rows from tables A and B:
id, a_id, b_id, ...
and a simple query:
SELECT * FROM C WHERE a_id=X AND b_id=Y
I would like to differentiate between the following cases:
No row exists in A where id = X
No row exists in B where id = Y
Both such rows in A and B exist, but no rows in C exist where a_id = X and b_id = Y
The above query will return empty result in all those cases.
In case of one parent table I could do a LEFT JOIN like:
SELECT * FROM A LEFT JOIN C ON a.id = c.a_id WHERE c.a_id = X
and then check if the result is empty (no row in A exists), has one row with NULL c.id (row in A exists, but no rows in C exist) or 1+ rows with non-NULL c.id (row in A exists and at least one row in C exists). A bit messy but it works, but I was wondering if there is a better way of doing this, especially if there is more than one parent table?
For example:
C is "things owned by people", A is "people", B is "types of things". When someone asks "give me a list of games owned by Bill", and there are no such records in C, I would like to return an empty list only if both "Bill" and "games" exist in their corresponding tables, but an error code if either of them doesn't.
So if there are no records matching "Bill" and "games" in table C, I would like to say "I don't know who Bill is" instead of "Bill has no games" if I don't have a record about Bill in table A.
create table a(a_id integer not null primary key);
create table b(b_id integer not null primary key);
create table c(a_id integer not null references a(a_id)
, b_id integer not null references b(b_id)
, primary key (a_id,b_id)
);
insert into a(a_id) values(0),(2),(4),(6);
insert into b(b_id) values(0),(3),(6);
insert into c(a_id,b_id) values(6,6);
PREPARE omg(integer,integer) AS
SELECT EXISTS(SELECT * FROM a where a.a_id = $1) AS a_exists
, EXISTS(SELECT * FROM b where b.b_id = $2) AS b_exists
, EXISTS(SELECT * FROM c where c.a_id = $1 and c.b_id = $2) AS c_exists
;
EXECUTE omg(1,1);
EXECUTE omg(2,1);
EXECUTE omg(1,3);
EXECUTE omg(6,6);
-- with optional payload:
PREPARE omg2(integer,integer) AS
SELECT val.a_id AS va_id
, val.b_id AS vb_id
, EXISTS(SELECT * FROM a WHERE a.a_id = $1) AS a_exists
, EXISTS(SELECT * FROM b WHERE b.b_id = $2) AS b_exists
, EXISTS(select * FROM c WHERE c.ca_id = val.a_id AND c.cb_id = val.b_id ) AS c_exists
, a.*
, b.*
, c.*
FROM (values ($1,$2)) val(a_id,b_id)
LEFT JOIN a ON a.a_id = val.a_id
LEFT JOIN b ON b.b_id = val.b_id
LEFT JOIN c ON c.ca_id = val.a_id AND c.cb_id = val.b_id
;
EXECUTE omg2(1,1);
EXECUTE omg2(2,1);
EXECUTE omg2(1,3);
EXECUTE omg2(6,6);
I think I managed to get a satisfactory solution using the following two features:
Subselect bound to a column, which allows me to check if a row exists and (importantly) get a NULL value otherwise (e.g. SELECT (SELECT id FROM a WHERE id = 1) as a_id))
Common Table Expressions
Initial data:
CREATE TABLE people
(
id integer not null primary key,
name text not null
);
CREATE TABLE thing_types
(
id integer not null primary key,
name text not null
);
CREATE TABLE things
(
id integer not null primary key,
person_id integer not null references people(id),
thing_type_id integer not null references thing_types(id),
name text not null
);
INSERT INTO people VALUES (1, 'Bill');
INSERT INTO thing_types VALUES (1, 'game');
INSERT INTO things VALUES (1, 1, 1, 'Duke Nukem');
INSERT INTO things VALUES (2, 1, 1, 'Warcraft 2');
And the query:
WITH v AS (
SELECT (SELECT id FROM people WHERE id=<person_id_param>) AS person_id,
(SELECT id FROM thing_types WHERE id=<thing_type_param>) AS thing_type_id
)
SELECT v.person_id, v.thing_type_id, things.name
FROM
v LEFT JOIN things
ON v.person_id = things.person_id AND v.thing_type_id = things.thing_type_id
This query will always return at least one row, and I just need to check which, if any, of the three columns of the first row are NULLs.
In case if both parent table ids are valid and there are some records, none of them will be NULL:
person_id thing_type_id name
-------------------------------------
1 1 Duke Nukem
1 1 Warcraft 2
If either person_id or thing_type_id are invalid, I get one row where name is NULL and either person_id or thing_type_id is NULL:
person_id thing_type_id name
-------------------------------------
NULL 1 NULL
If both person_id and thing_type_id are valid but there are no records in things, I get one row where both person_id and thing_type_id are not NULL, but the name is NULL:
person_id thing_type_id name
-------------------------------------
1 1 NULL
Since I have a NOT NULL constraint on things.name, I know that this case can only mean that there are no matching records in things. If NULLs were allowed in things.name, I could include things.id instead and check that for NULLness.
You have 3 cases, the third one is a bit more complex but can be achieved by using cross join between a and b, all three cases in a union could be like this
select a_id, b_id , 'case 1' from c
where not exists (select 1 from a where a.a_id=c.a_id)
union all
select a_id, b_id ,'case 2' from c
where not exists (select 1 from b where b.b_id=c.b_id)
union all
select a_id, b_id, 'case 3' from a cross join b
where exists (select 1 from c where c.a_id=a.a_id)
and exists (select 1 from c where c.b_id=b.b_id)
and not exists (select 1 from c where c.b_id=b.b_id and c.a_id=a.a_id)

SQL - Find duplicates with equivalencies

I'm having trouble wrapping my mind around developing this SQL query. Given the following two tables:
ACADEMIC_HISTORY ( STUDENT_ID, TERM, COURSE_ID, COURSE_GRADE )
COURSE_EQUIVALENCIES ( COURSE_ID, COURSE_ID_EQUIVALENT )
What would be the best way to detect if students have taken the same (or an equivalent) course in the past with a passing grade (C or better)?
Example
Student #1 took the course ABC001 and received a grade of C. Ten years later, the course was renamed ABC011 and the appropriate entry was made in COURSE_EQUIVALENCIES. The student retook the course under this new name and received a grade of B. How can I construct a SQL query that will detect the duplicate courses and only count the first passing grade?
(The actual case is significantly more complicated, but this should get me started.)
Thanks in advance.
EDIT:
It's not even necessary to keep or discard any information. A query that simply shows classes with duplicates will be sufficient.
you could use something like:
SELECT
STUDENT_ID
,MIN (COURSE_GRADE)
FROM (
SELECT * FROM
ACADEMIC_HISTORY
WHERE COURSE_ID =1
UNION
SELECT
h.STUDENT_ID
,h2.COURSE_ID
,h2.COURSE_GRADE
FROM
ACADEMIC_HISTORY AS h
LEFT OUTER JOIN COURSE_EQUIVELANCIES as e
ON e.COURSE_ID = h.COURSE_ID
LEFT OUTER JOIN ACADEMIC_HISTORY as h2
ON h.STUDENT_ID = h2.STUDENT_ID
AND h2.COURSE_ID = e.COURSE_ID_EQUIVELANT
WHERE
h.COURSE_ID =1
) AS t
WHERE STUDENT_ID =1
GROUP BY STUDENT_ID
http://sqlfiddle.com/#!3/d608f/20
Sorry posted with a bug.. it preferred the score of the actual course requested over any equivalencies - fixed now
this only looks for one level of equivalencies.. but maybe you want to enforce that and have that part of the data entry process.. review all possible equivalencies and enter the valid ones
EDIT: for first pass of qualifying course (using numbered terms..)
SELECT TOP 1
STUDENT_ID
,MIN (COURSE_GRADE)
FROM (
SELECT * FROM
ACADEMIC_HISTORY
WHERE COURSE_ID =1
UNION
SELECT
h.STUDENT_ID
,h2.COURSE_ID
,h2.TERM
,h2.COURSE_GRADE
FROM
ACADEMIC_HISTORY AS h
LEFT OUTER JOIN COURSE_EQUIVELANCIES as e
ON e.COURSE_ID = h.COURSE_ID
LEFT OUTER JOIN ACADEMIC_HISTORY as h2
ON h.STUDENT_ID = h2.STUDENT_ID
AND h2.COURSE_ID = e.COURSE_ID_EQUIVELANT
WHERE
h.COURSE_ID =1
) AS t
WHERE STUDENT_ID =1
GROUP BY STUDENT_ID, TERM
ORDER BY TERM ASC
http://sqlfiddle.com/#!3/fdded/6
(note TOP is a t-sql command for MySQL you need LIMIT)
The data (in LOWERCASE)
DROP SCHEMA tmp CASCADE;
CREATE SCHEMA tmp;
SET search_path='tmp';
CREATE TABLE academic_history
( student_id INTEGER NOT NULL
, course_id CHAR(6)
, course_grade CHAR(1)
, PRIMARY KEY(student_id,course_id)
);
INSERT INTO academic_history ( student_id,course_id,course_grade) VALUES
(1, 'ABC001' , 'C' )
, (1, 'ABC011' , 'B' )
, (2, 'ABC011' , 'A' )
;
CREATE TABLE course_equivalencies
( course_id CHAR(6)
, course_id_equivalent CHAR(6)
);
INSERT INTO course_equivalencies(course_id,course_id_equivalent) VALUES
( 'ABC011' , 'ABC001' )
;
The query:
-- EXPLAIN ANALYZE
WITH canon AS (
SELECT ah.student_id AS student_id
, ah.course_id AS course_id
, COALESCE (eq.course_id_equivalent,ah.course_id) AS course_id_equivalent
FROM academic_history ah
LEFT JOIN course_equivalencies eq ON eq.course_id = ah.course_id
)
SELECT h.student_id
, c.course_id_equivalent
, MIN(h.course_grade) AS the_grade
FROM academic_history h
JOIN canon c ON c.student_id = h.student_id AND c.course_id = h.course_id
GROUP BY h.student_id, c.course_id_equivalent
ORDER BY h.student_id, c.course_id_equivalent
;
The output:
NOTICE: drop cascades to 2 other objects
DETAIL: drop cascades to table tmp.academic_history
drop cascades to table tmp.course_equivalencies
DROP SCHEMA
CREATE SCHEMA
SET
NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "academic_history_pkey" for table "academic_history"
CREATE TABLE
INSERT 0 3
CREATE TABLE
INSERT 0 1
student_id | course_id_equivalent | the_grade
------------+----------------------+-----------
1 | ABC001 | B
2 | ABC001 | A
(2 rows)