SQL query : SELECT - sql

CREATE TABLE WRITTEN_BY
( Re_Id CHAR(15) NOT NULL,
Pub_Number INT NOT NULL,
PRIMARY KEY(Re_Id, Pub_Number),
FOREIGN KEY(Re_Id) REFERENCES RESEARCHER(Re_Id),
FOREIGN KEY(Pub_Number) REFERENCES PUBLICATION(Pub_Number));
CREATE TABLE WORKING_ON
( Re_Id CHAR(15) NOT NULL,
Pro_Code CHAR(15) NOT NULL,
PRIMARY KEY(Re_Id, Pro_Code, Subpro_Code)
FOREIGN KEY(Re_Id) REFERENCES RESEARCHER(Re_Id));
Re_Id stands for ID of a researcher
Pub_Number stands for ID of a publication
Pro_Code stands for ID of a project
Written_by table stores information about a Publication's ID and it's author
Working_on table stores information about a Project's ID and who is working on it
Now, I have this query :
For each project, find the researcher who wrote the most number of publications .
This is what i've done so far :
SELECT Pro_Code,WORK.Re_Id
FROM WORKING_ON AS WORK , WRITTEN_BY AS WRITE
WHERE WORK.Re_Id = WRITE.Re.Id
so I got a table which contains personal ID and project's ID of a researcher who has at least 1 publication. But what's next ? How to solve this problem?

You haven't said which platform you're on but try this. It handles the case where there are ties as well.
select g.Pro_Code, g.Re_Id, g.numpublished
from
(
SELECT work.Pro_Code, WORK.Re_Id, count(WRITE.pub_number) as numpublished
FROM WORKING_ON WORK JOIN WRITTEN_BY AS WRITE ON WORK.Re_Id = WRITE.Re_Id
GROUP BY work.Pro_Code, WORK.Re_Id
) g
inner join
(
select Pro_code, max(numpublished) as maxpublished
from (
SELECT work.Pro_Code, WORK.Re_Id, count(WRITE.pub_number) numpublished
FROM WORKING_ON WORK JOIN WRITTEN_BY AS WRITE ON WORK.Re_Id = WRITE.Re_Id
GROUP BY work.Pro_Code, WORK.Re_Id
) g2
group by Pro_code
) m
on m.Pro_code = g.Pro_Code and m.maxpublished = g.numpublished
Some platforms will allow you to write it this way:
with g as (
SELECT work.Pro_Code, WORK.Re_Id, count(WRITE.pub_number) as numpublished
FROM WORKING_ON WORK JOIN WRITTEN_BY AS WRITE ON WORK.Re_Id = WRITE.Re_Id
GROUP BY work.Pro_Code, WORK.Re_Id
)
select g.Pro_Code, g.Re_Id, g.numpublished
from g
inner join
(
select Pro_code, max(numpublished) as maxpublished
from g
group by Pro_code
) m
on m.Pro_code = g.Pro_Code and m.maxpublished = g.numpublished

I think that you are looking for something like the following :
select
tm.pro_code as pro_code,
tm.re_id as re_id,
max(total) as max_pub
from (
select *
from (
select
wo.pro_code as pro_code
wr.re_id as re_id,
count(wr.pub_number) as total
from
written_by wr,
working_on wo
where
wr.re_id = wo.re_id
group by wr.re_id,wo.pro_code
)
) tm
group by pro_code

If you are using MS SQL, this should work:
With cte as (
select a.Re_Id, Pub_Number,Pro_Code, COUNT(distinct Pub_Number) as pubs
from WRITTEN_BY a
inner join WORKING_ON b
on a.Re_Id = b.Re_Id)
SELECT Re_Id,pubs from cte
HAVING pubs = MAX(pubs)
GROUP BY Re_Id

Related

How to use Left Join in subquery SQL Server?

I am trying to use left join in subquery in SQL Server. My query looks fine to me but it gives syntax error.
This is my query:
(
SELECT
FK_OrderNo AS LHNo, VendorName AS LHVendor
FROM
tbl_ShipmentAPAR
LEFT JOIN
tbl_vendors ON FK_VendorID = VendorID
WHERE
FK_ServiceID = 'LH'
) LHBase ON PK_OrderNo = LHNo
LEFT JOIN
(SELECT
FK_OrderNo AS DANo,
VendorName AS DAVendor
FROM
tbl_ShipmentAPAR
LEFT JOIN
tbl_vendors ON FK_VendorId = VendorId
WHERE
FK_ServiceId = 'DA') DABase ON PK_OrderNo = DANo
This is the error I'm getting:
This is my table structure:
CREATE TABLE tbl_ShipmentAPAR
(
VendorID int PRIMARY KEY,
VendorName varchar(200),
FK_OrderNo int
)
CREATE TABLE tbl_vendors
(
FK_VendorID int,
FOREIGN KEY (FK_VendorID) REFERENCES tbl_ShipmentAPAR(VendorID),
FK_ServiceID varchar(200)
)
INSERT INTO tbl_ShipmentAPAR VALUES (1, 'John',123)
INSERT INTO tbl_vendors VALUES (1,'LH')
As #Chris mentioned, the query is bit incomplete. I guess you are trying to do something like this:
SELECT * FROM /*--> Added new */
(
SELECT
FK_OrderNo AS LHNo, VendorName AS LHVendor
FROM
tbl_ShipmentAPAR
LEFT JOIN
tbl_vendors ON FK_VendorID = VendorID
WHERE
FK_ServiceID = 'LH'
) LHBase
LEFT JOIN
(SELECT
FK_OrderNo AS DANo,
VendorName AS DAVendor
FROM
tbl_ShipmentAPAR
LEFT JOIN
tbl_vendors ON FK_VendorId = VendorId
WHERE
FK_ServiceId = 'DA') DABase ON LHBase.LHNo = DABase.DANo /* -->Modified PKOrder no to LHNo because PKOrder no doesn't exist in either of the sub-queries */
This query worked for me. Comment to this answer if something must be changed.

how can i get the latest record published by each singer

I have 3 tables
table 1 : songs
-songname varchar
-singerlabel varchar
-date date
-category varchar
table 2 : singer
-singerlabel varchar
-singer# varchar
table 3 : singerNote
-singer# varchar
-firstname varchar
-lastname varchar
table 1 is connected to table 2 using singerlabel.
table 2 is connected to table 3 using singer#.
With this query:
select singerlabel, max(date) maxdate
from songs
group by singerlabel
you get the max date of each singerlabel, and then join to the other 3 tables:
select sn.firstname, sn.lastname, songs.songname
from (
select singerlabel, max(date) maxdate
from songs
group by singerlabel
) s inner join singer
on singer.singerlabel = s.singerlabel
inner join singernote sn
on sn.singer = singer.singer
inner join songs
on songs.singerlabel = s.singerlabel and songs.date = s.maxdate
If your RDBMS supports window functions, this can be achieved with ROW_NUMBER() :
SELECT x.*
FROM (
SELECT
si.*, sn.first_name, sn.last_name, so.songname, so.date, so.category
ROW_NUMBER() OVER(PARTITION BY so.singerlabel ORDER BY so.date DESC) rn
FROM singer si
INNER JOIN singerNote sn ON sn.singer# = si.singer#
INNER JOIN songs so ON so.singerlabel = si.singerlabel
) x WHERE x.rn = 1
Without window function, you can use a correlated subquery with a NOT EXISTS condition to ensure that you are joining with the most recent song :
SELECT si.*, sn.first_name, sn.last_name, so.songname, so.date, so.category
FROM
singer si
INNER JOIN singerNote sn
ON sn.singer# = si.singer#
INNER JOIN songs so
ON so.singerlabel = si.singerlabel
AND NOT EXISTS (
SELECT 1
FROM songs so1
WHERE so1.singerlabel = si.singerlabel AND so1.date > so.date
)

SQL ORACLE - complicated query

I have such tables :
Author(name,surname,id_author)
Author_book(id_author, id_book)
Book_theme(id_book,id_theme)
Theme(id_theme, description)
I need to find for each author theme which was used in his every book which was written by himself and wasn't used in any book where he was co-author. Sorry for my poor english.
I agree with David, it's easier to start off with the basics than continue to add to the query to get the answer your looking for. If I understand the question, your looking for all books where the theme is not part of a book that they co-authored...
select name, Author_book.id_book, Theme.id_theme, description
from Author
join Author_book on (Author.id_author = Author_book.id_author)
join Book_theme on (Author_book.id_book = Book_theme.id_book)
join Theme on (Book_theme.id_theme = Theme.id_theme)
where name = 'Bob'
and Book_theme.id_theme not in(select c.id_theme
from Author_book b
join Book_theme c on (b.id_book = c.id_book)
where
Author_book.id_book = b.id_book
and Author.id_author <> b.id_author)
SQL Fiddle Example
I can understand why you might not know where to begin. I find in such cases that it's best to start small and work your way out using subqueries.
To find the books where a given author is not the sole author:
SELECT id_author, id_book
FROM (
SELECT id_author, id_book, COUNT(*) OVER ( PARTITION BY id_book ) AS author_cnt
FROM author_book
) WHERE author_cnt >= 2;
To get the themes from the above books:
SELECT ab2.id_author, bt.id_theme
FROM (
SELECT id_author, id_book, COUNT(*) OVER ( PARTITION BY id_book ) AS author_cnt
FROM author_book
) ab2, book_theme bt
WHERE ab2.author_cnt >= 2
AND ab2.id_book = bt.id_book;
You can do the same for books where the author is the sole author:
SELECT ab1.id_author, bt.id_theme
FROM (
SELECT id_author, id_book, COUNT(*) OVER ( PARTITION BY id_book ) AS author_cnt
FROM author_book
) ab1, book_theme bt
WHERE ab1.author_cnt = 1
AND ab1.id_book = bt.id_book;
Then you can use MINUS to get the set of themes where the author is the sole author of a book, but not the ones where he is co-author:
SELECT ab1.id_author, bt.id_theme
FROM (
SELECT id_author, id_book, COUNT(*) OVER ( PARTITION BY id_book ) AS author_cnt
FROM author_book
) ab1, book_theme bt
WHERE ab1.author_cnt = 1
AND ab1.id_book = bt.id_book
MINUS
SELECT ab2.id_author, bt.id_theme
FROM (
SELECT id_author, id_book, COUNT(*) OVER ( PARTITION BY id_book ) AS author_cnt
FROM author_book
) ab2, book_theme bt
WHERE ab2.author_cnt >= 2
AND ab2.id_book = bt.id_book;

Error while using same table data twice during inner join

Suppose I have a table, in Database name 'Old', as below:
TABLE A
(
SeniorVehicle varchar(255),
SeniorVehicleAllowance int,
JuniorVehicle varchar(255),
JuniorVehicleAllowance int
ManagerVehicle varchar(255),
ManagerVehicleAllowance int
);
And another table, in Database name 'New' as below:
TABLE B
(
SeniorVehicle int,
SeniorVehicleAllowance int,
JuniorVehicle int,
JuniorVehicleAllowance int,
ManagerVehicle int,
ManagerVehicleAllowance int
);
I want to bring the data from TABLE A of Database 'Old' to TABLE B of Database 'New'.
The thing is that, there is a table named Vehicle in both databases as bellow:
TABLE Vehicle
(
VehicleID int pk,
VehicleName varchar(255)
)
Values in SeniorVehicle, JuniorVehicle and ManagerVehicle columns in TABLE A are the VehicleName value in TABLE Vehicle. But the value of SeniorVehicle, JuniorVehicle and ManagerVehicle that must be stored in TABLE B must be the value of VehicleID column in the Vehicle Table.
How to achieve this without error????
I have tried the following:
INSERT INTO B
(SeniorVehicle, SeniorVehicleAllowance, JuniorVehicle, JuniorniorVehicleAllowance, ManagerVehicle, ManagerVehicleAllowance)
SELECT Vehicle.VehicleID, c.SeniorVehicleAllowance, c.VehicleID, c.JuniorVehicleAllowance, c.VehicleID, c.ManagerVehicleAllowance
FROM (SELECT b.SeniorVehicle, b.SeniorVehicleAllowance, Vehicle.VehicleID, b.JuniorVehicleAllowance, b.VehicleID, b.ManagerVehicleAllowance
FROM (SELECT a.SeniorVehicle, a.SeniorVehicleAllowance, a.JuniorVehicle, a.JuniorVehicleAllowance, Vehicle.VehicleID, a.ManagerVehicleAllowance
FROM (SELECT SeniorVehicle, SeniorVehicleAllowance, JuniorVehicle, JuniorVehicleAllowance, ManagerVehicle, ManagerVehicleAllowance FROM A) as a
Inner join
Vehicle
ON a.ManagerVehicle = Vehicle.VehicleName) as b
Inner join
Vehicle
ON b.JuniorVehicle = Vehicle.VehicleName) as c
Inner join
Vehicle
ON c.SeniorVehicle = Vehicle.VehicleName
I get the following error:
The column 'VehicleID' was specified multiple times for 'c'
My Databse is MSSQL Server 2008 R2
Reformatting your current query gives:
SELECT
Vehicle.VehicleID,
c.SeniorVehicleAllowance,
c.VehicleID,
c.JuniorVehicleAllowance,
c.VehicleID,
c.ManagerVehicleAllowance
FROM (
SELECT b.SeniorVehicle,
b.SeniorVehicleAllowance,
Vehicle.VehicleID,
b.JuniorVehicleAllowance,
b.VehicleID,
b.ManagerVehicleAllowance
FROM (
SELECT a.SeniorVehicle,
a.SeniorVehicleAllowance,
a.JuniorVehicle,
a.JuniorVehicleAllowance,
Vehicle.VehicleID,
a.ManagerVehicleAllowance
FROM (
SELECT SeniorVehicle,
SeniorVehicleAllowance,
JuniorVehicle,
JuniorVehicleAllowance,
ManagerVehicle,
ManagerVehicleAllowance
FROM A
) as a
Inner join Vehicle
ON a.ManagerVehicle = Vehicle.VehicleName
) as b
Inner join Vehicle
ON b.JuniorVehicle = Vehicle.VehicleName
) as c
Inner join Vehicle
ON c.SeniorVehicle = Vehicle.VehicleName
In this query, the sub query aliased c has two columns called VehicleID (which is what your error message is telling you.
The smallest change to fix the issue is to alias the columns in the sub query, e.g:
SELECT
Vehicle.VehicleID AS SeniorVehicleId,
c.SeniorVehicleAllowance,
c.JuniorVehicleId,
c.JuniorVehicleAllowance,
c.ManagerVehicleID,
c.ManagerVehicleAllowance
FROM (
SELECT b.SeniorVehicle,
b.SeniorVehicleAllowance,
Vehicle.VehicleID AS JuniorVehicleId,
b.JuniorVehicleAllowance,
b.ManagerVehicleID,
b.ManagerVehicleAllowance
FROM (
SELECT a.SeniorVehicle,
a.SeniorVehicleAllowance,
a.JuniorVehicle,
a.JuniorVehicleAllowance,
Vehicle.VehicleID AS ManagerVehicleID,
a.ManagerVehicleAllowance
-- Rest ommited for brevity
It would be also possible to re-write the query with more joins, and omit the need for the subqueries altogether also:
SELECT srmgr.VehicleId AS SeniorVehicleId,
A.SeniorVehicleAllowance,
jrmgr.VehicleId AS JuniorVehicleId,
A.JuniorVehicleAllowance,
mgr.VehicleId AS ManagerVehicleId,
A.ManagerVehicleAllowance
FROM A
INNER JOIN Vehicle AS mgr
ON a.ManagerVehicle = mgr.VehicleName
INNER JOIN Vehicle AS jrmgr
ON a.ManagerVehicle = jrmgr.VehicleName
INNER JOIN Vehicle AS srmgr
ON a.ManagerVehicle = srmgr.VehicleName

Prevent duplicate rows being inserted

I'm trying to use an SQL insert statement to migrate rows from a table in one database to a table in a different database. The statement works until I add a unique index on the destination table and at that point I'm struggling to get the insert statement to be able to exclude the duplicates. Here's what I though should work:
INSERT INTO [MyDB].[dbo].[HPB] (
[HPID],
[BusinessID]
)
SELECT
PersonId = (SELECT ID FROM [MyDB].[dbo].[HP] WHERE PersonID = lPersonId),
lBusinessId
FROM [MyOriginalDB].[dbo].[tblEmployment]
WHERE
lPersonId in (SELECT PersonID FROM [MyDB].[dbo].[HP])
AND
lBusinessId in (SELECT ID FROM [MyDB].[dbo].[Business])
AND
NOT EXISTS (SELECT * FROM [MyDB].[dbo].[HPB] WHERE
[HPID] = (SELECT ID FROM [MyDB].[dbo].[HP] WHERE PersonID = lPersonId)
AND [BusinessID] = lBusinessId)
The schema for the HPB table is:
CREATE TABLE [dbo].[HPB](
[ID] [int] IDENTITY(1,1) NOT NULL,
[HPID] [int] NOT NULL,
[BusinessID] [int] NOT NULL,
CONSTRAINT [PK_HealthProfessionalBusiness] PRIMARY KEY CLUSTERED)
The unique index is on the [MyDB].[dbo].[HPB] table for columns (HPID, BusinessID)
When I run the insert I get an error about duplicate row inserts and I can't work out why the SQL below doesn't exclude the duplicates.
NOT EXISTS (SELECT * FROM [MyDB].[dbo].[HPB] WHERE
[HPID] = (SELECT ID FROM [MyDB].[dbo].[HP] WHERE PersonID = lPersonId)
AND [BusinessID] = lBusinessId)
Insert MyDB.dbo.HPB( HPID, BusinessID )
Select HP.ID, E.IBusinessID
From [MyOriginalDB].[dbo].[tblEmployment] As E
Join [MyDB].[dbo].[HP] As HP
On HP.PersonId = E.IPersonID
Join [MyDB].[dbo].[Business] As B
On B.ID = E.IBusinessID
Left Join [MyDB].[dbo].[HPB] As HPB
On HPB.BusinessID = E.IBusinessID
And HPB.PersonID = E.IPersonId
Where HPB.ID Is Null
Group By HP.ID, E.IBusinessID
Use:
INSERT INTO [MyDB].[dbo].[HPB]
([HPID], [BusinessID])
SELECT DISTINCT
h.id,
e.lbusinessid
FROM [MyOriginalDB].[dbo].[tblEmployment] e
JOIN [MyDB].[dbo].[HP] h ON h.personid = e.lpersonid
WHERE e.lbusinessid in (SELECT ID FROM [MyDB].[dbo].[Business])
AND NOT EXISTS (SELECT NULL
FROM [MyDB].[dbo].[HPB] hb
WHERE hb.businessid = e.lbusinessid
AND hb.hpid = h.id)