SQL join multiple tables with/without data - sql

I have no idea how to create an SQL statement to join 4 tables.
1) The 'Vendor Table will always match entries from each table on Vendor #
2) Each of the remaining 3 will match to each other by Vendor # & Seq #
3) Any combination of the 3 can have data (or not)
4) I don't want to select from the Vendor table unless I get a hit on at least one of the 3
VENDOR
Vendor # Name
-------- ----
1 Tom Smith
2 Bruce Lee
3 Seamus O’Leary
4 Jonathan Stewart
5 Benjamin Franklin
Month Range Selected
Vendor # Seq # MonthFrom MonthTo
-------- ----- --------- -------
1 1 3 6
1 2 7 9
3 2 5 6
Week Selected
Vendor # Seq # Week #
-------- ----- ------
1 1 3
3 1 4
4 1 1
Day Selected
Vendor # Seq # Day #
1 1 15
1 2 25
2 1 12
4 1 05
5 1 19
Desired Table (Joined)
Vendor# Name Seq# MonthFrom MonthTo Week# Day#
1 Tom Smith 1 3 6 3 15
1 Tom Smith 2 7 9 NULL 25
2 Bruce Lee 1 NULL NULL NULL 12
3 Seamus O’Leary 1 NULL NULL 4 NULL
3 Seamus O’Leary 2 5 6 NULL NULL
4 Jonathan Stewart 1 NULL NULL 1 05
5 Benjamin Franklin 1 NULL NULL NULL 19
The trick being that any of the 3 (not including 'Vendor') can or cannot have data and I only want a row returned if there is something from one or more of the 3.
Any Advice?

To join it on Vendor and Seq, we first need to have all possible combinations. Then we can filter the tables based on these combinations. I've ran the following in SQL server:
Setup
declare #Vendors table(id int, name varchar(20));
declare #MonthRangeSelected table (vendor int, seq int null, monthFrom int null, monthTo int null);
declare #WeekSelected table (vendor int, seq int null, week int null);
declare #DaySelected table (vendor int, seq int null, day int null);
insert into #Vendors
select 1, 'Tom Smith'
union all
select 2, 'Bruce Lee'
union all
select 3, 'Seamus O’Leary'
union all
select 4, 'Jonathan Stewart'
union all
select 5, 'Benjamin Franklin';
insert into #MonthRangeSelected
select 1, 1, 3, 6
union all
select 1, 2, 7, 9
union all
select 3, 2, 5, 6;
insert into #WeekSelected
select 1, 1, 3
union all
select 3, 1, 4
union all
select 4, 1, 1;
insert into #DaySelected
select 1, 1, 15
union all
select 1, 2, 25
union all
select 2, 1, 12
union all
select 4, 1, 05
union all
select 5, 1, 19;
Query
select v.Id, v.name, combinations.seq, MonthFrom, MonthTo, Week, Day
from #Vendors v
inner join (select m.vendor, m.seq
from #MonthRangeSelected m
union
select w.vendor, w.seq
from #WeekSelected w
union
select d.vendor, d.seq
from #DaySelected d) combinations
on combinations.vendor = v.id
left join #MonthRangeSelected m
on m.Vendor = combinations.vendor
and m.seq = combinations.seq
left join #WeekSelected w
on w.Vendor = combinations.vendor
and w.seq = combinations.seq
left join #DaySelected d
on d.Vendor = combinations.vendor
and d.seq = combinations.seq
where (MonthFrom is not null
or MonthTo is not null
or Week is not null
or Day is not null)
And this is the result:
Id name seq MonthFrom MonthTo Week Day
1 Tom Smith 1 3 6 3 15
1 Tom Smith 2 7 9 NULL 25
2 Bruce Lee 1 NULL NULL NULL 12
3 Seamus O’Leary 1 NULL NULL 4 NULL
3 Seamus O’Leary 2 5 6 NULL NULL
4 Jonathan Stewart 1 NULL NULL 1 5
5 Benjamin Franklin 1 NULL NULL NULL 19

This is more complicated than it sounds. According to the result, you do not want a cartesian product when there are multiple matches in a table. So, you need to take seqnum into account.
select v.Vendor, v.name, coalesce(m.seq, w.seq, d.seq) as Seq,
m.MonthFrom, m.MonthTo, w.Week, d.Day
from Vendors v left join
SMonthRangeSelected m
on v.Vendor = m.Vendor full join
WeekSelected w
on v.Vendor = w.Vendor and m.seq = w.seq full join
DaySelected d
on v.Vendor = d.Vendor and d.seq in (w.seq, m.seq)
where m.Vendor is not null or
w.Vendor is not null or
d.Vendor is not null;
Strange things can happen when using full join, particularly if you want any filtering. An alternative approach uses union all and group by:
select mwd.Vendor, v.name, mwd.seq,
max(MonthFrom) as MonthFrom, max(MonthTo) as monthTo,
max(Week) as week, max(Day) as day
from ((select m.Vendor, m.seq, m.MonthFrom, m.MonthTo, NULL as week, NULL as day
from month m
) union all
(select w.Vendor, w.seq, NULL as MonthFrom, NULL as MonthTo, w.week, NULL as day
from week
) union all
(select d.Vendor, d.seq, NULL as MonthFrom, NULL as MonthTo, NULL as week, d.day
from day d
)
) mwd join
Vendor v
on v.vendor = vmwd.vendor
group by mwd.Vendor, v.vname, mwd.seq;
Note that this version does not require the Vendor table.

You should left outer join to each of the 3 tables, and then include the following in your where clause:
(MonthFrom is not null or Week# is not null or Day# is not null)
it sounds like you can inner join to the Vendor table

I believe that this will do what you need:
SELECT
V.[Vendor#], -- I'll never understand why people insist on using names that require brackets
V.Name,
COALESCE(M.[Seq#], W.[Seq#], D.[Seq#]) AS [Seq#],
M.MonthFrom,
M.MonthTo,
W.[Week#],
D.[Day#]
FROM
Vendor V
LEFT OUTER JOIN MonthRange M ON M.[Vendor#] = V.[Vendor#]
LEFT OUTER JOIN Week W ON W.[Vendor#] = V.[Vendor#]
LEFT OUTER JOIN Day D ON D.[Vendor#] = V.[Vendor#]
WHERE
(
M.[Vendor#] IS NOT NULL OR
W.[Vendor#] IS NOT NULL OR
D.[Vendor#] IS NOT NULL
) AND
(M.[Seq#] = W.[Seq#] OR M.[Seq#] IS NULL OR W.[Seq#] IS NULL) AND
(M.[Seq#] = D.[Seq#] OR M.[Seq#] IS NULL OR D.[Seq#] IS NULL) AND
(D.[Seq#] = W.[Seq#] OR D.[Seq#] IS NULL OR W.[Seq#] IS NULL)

You need full outer joins on the three tables, so as to get all vendor and seq number combinations. Join these with vendor and you are done:
select vendorno, v.name, x.seqno, x.monthfrom, x.monthto, x.weekno, x.dayno
from vendor v
join
(
select vendorno, seqno, m.monthfrom, m.monthto, w.weekno, d.dayno
from monthsel m
full outer join weeksel w using (vendorno, seqno)
full outer join daysel d using (vendorno, seqno)
) x using(vendorno)
order by vendorno, x.seqno;
UPDATE: Without a USING clause the same query get slightly less readable (and thus slightly more error-prone):
select v.vendorno, v.name, x.seqno, x.monthfrom, x.monthto, x.weekno, x.dayno
from vendor v
join
(
select
coalesce(m.vendorno, w.vendorno, d.vendorno) as vendorno,
coalesce(m.seqno, w.seqno, d.seqno) as seqno,
m.monthfrom, m.monthto, w.weekno, d.dayno
from monthsel m
full outer join weeksel w on w.vendorno = m.vendorno and w.seqno = m.seqno
full outer join daysel d on d.vendorno in (m.vendorno, w.vendorno)
and d.seqno in (m.seqno, w.segno)
) x on x.vendorno = v.vendorno
order by v.vendorno, x.seqno;
(Hope I didn't mix things up here. It's easy to make copy & paste errors with such a query. So if it doesn't work properly, look out for typos.)

Related

Oracle SQL Return at least one row

I have an Oracle SQL query where I would like to return all rows where a certain column has a value, but if none of the rows have a value for that column then I would like to return one of the blank rows. How can this be accomplished?
My scenario is a learning path with courses that are to completed, but I want to show the percentage completion. Obviously if there are no courses completed I would like to show the learning path with a zero percentage completion.
The query is very complicated so I rather provide a dummy scenario below:
learning_paths
--------------
learning_path_id learning_path_name
1 Oracle Developer
2 Python Developer
courses
-------
course_id course_name
--------- -----------
1 Oracle SQL
2 Oracle PL/SQL
3 Python
4 Django
5 NLTK
learning_path_items
-------------------
learning_path_id course_id
---------------- ---------
1 1
1 2
2 3
2 4
2 5
learning_path_enrollments
-------------------------
employee_id learning_path_id
----------- ----------------
1 1
2 2
3 2
course_enrollments
------------------
employee_id course_id
----------- ---------
2 3
2 4
So my results should be:
employee_id learning_path_id course_id completion
----------- ---------------- --------- ----------
1 1 0 out of 2
2 2 3 2 out of 3
2 2 4 2 out of 3
3 2 0 out of 3
Below is the actual query, which is very complicated and still a work in progress.
select sub2.director,
sub2.cost_centre,
sub1.employee_number,
sub1.employee_name,
sub2.job_title,
sub1.course_name,
sub1.learning_path_name,
sub2.course_start_date,
sub2.course_end_date,
max(sub2.course_end_date) over (partition by sub1.employee_number, sub1.learning_path_name) max_course_end_date,
sub1.original_date_of_hire,
sub2.job_start_date promotion_date,
nvl(sub1.completion_target_days, 9999999999999999) completion_target_days,
sub1.completion_target_date,
sub1.no_of_completed_courses,
sub1.no_of_mandatory_courses,
round(least(sub1.no_of_completed_courses, sub1.no_of_mandatory_courses) / sub1.no_of_mandatory_courses * 100) completion_percentage
--round() time_percentage
from (
select papf.employee_number,
nvl(papf.known_as, papf.first_name) || ' ' || papf.last_name employee_name,
papf.original_date_of_hire,
oav.version_name course_name,
olpt.name learning_path_name,
ole.completion_target_date completion_target_date,
ole.no_of_completed_courses no_of_completed_courses,
ole.no_of_mandatory_courses no_of_mandatory_courses,
olp.duration completion_target_days,
papf.person_id,
oav.activity_version_id
from ota_lp_enrollments ole,
per_all_people_f papf,
ota_learning_paths olp,
ota_learning_paths_tl olpt,
ota_lp_sections ols,
ota_lp_sections_tl olst,
ota_learning_path_members olpm,
ota_activity_versions oav -- aka Courses
where papf.person_id = ole.person_id
and xxpay_bi_util.get_effective_date(ole.person_id, trunc(sysdate), 'all') between papf.effective_start_date
and papf.effective_end_date
and olp.learning_path_id = ole.learning_path_id
and olpt.learning_path_id = ole.learning_path_id
and olpt.language = userenv('LANG')
and ols.learning_path_id = ole.learning_path_id
and olst.learning_path_section_id = ols.learning_path_section_id
and olst.language = userenv('LANG')
and ols.learning_path_id = ole.learning_path_id
and olpm.learning_path_id = ole.learning_path_id
and olpm.learning_path_section_id = ols.learning_path_section_id
and oav.activity_version_id = olpm.activity_version_id
group by papf.employee_number,
nvl(papf.known_as, papf.first_name) || ' ' || papf.last_name,
papf.original_date_of_hire,
oav.version_name,
olpt.name,
olst.name,
ole.completion_target_date,
ole.no_of_completed_courses,
ole.no_of_mandatory_courses,
olp.duration,
papf.person_id,
oav.activity_version_id
) sub1,
(
select erm.*,
paaf.assignment_id,
paaf.person_id,
pj.name job_title,
haou.name cost_centre,
paafm.job_start_date,
xxpay_util.get_lookup_value(trunc(sysdate), 'TRUW_HR_DIRECTORS', hoi.org_information1) director -- 'Derek Kohler (Stores)'
from (
select odb.booking_id,
odb.delegate_person_id,
odb.date_booking_placed,
oe.course_start_date,
oe.course_end_date,
oe.course_start_time,
oe.course_end_time,
oe.enrolment_start_date,
oe.public_event_flag,
oe.title class_title,
oe.activity_version_id
from ota_delegate_bookings odb,
ota_events oe -- aka Classes
where oe.event_id = odb.event_id
) erm,
per_all_assignments_f paaf,
per_jobs pj,
hr_all_organization_units haou,
hr_organization_information hoi,
(
select paaf.person_id,
paaf.assignment_id,
paaf.job_id,
min(paaf.effective_start_date) job_start_date
from per_all_assignments_f paaf
where paaf.assignment_type in ('E', 'C')
and paaf.primary_flag = 'Y'
group by paaf.person_id,
paaf.assignment_id,
paaf.job_id
) paafm
where paaf.person_id (+) = erm.delegate_person_id
and erm.course_start_date between nvl(paaf.effective_start_date (+), to_date('01/01/1000', 'dd/mm/yyyy'))
and nvl(paaf.effective_end_date (+), to_date('31/12/4712', 'dd/mm/yyyy'))
and paafm.person_id (+) = paaf.person_id
and paafm.assignment_id (+) = paaf.assignment_id
and paafm.job_id (+) = paaf.job_id
and pj.job_id (+) = paaf.job_id
and haou.organization_id (+) = paaf.organization_id
and hoi.organization_id (+) = paaf.organization_id
and hoi.org_information_context (+) = 'TRU_ADD_ORG'
) sub2
where sub2.activity_version_id (+) = sub1.activity_version_id
and sub2.person_id (+) = sub1.person_id
and sub1.employee_number in ('2006591', '2005681', '2004118', '2004212')
order by 3, 1, 2, 4, 5, 7
If you have tables like this:
CREATE TABLE students (
student_id NUMBER PRIMARY KEY
);
CREATE TABLE courses (
course_id NUMBER PRIMARY KEY
);
CREATE TABLE student_courses (
student_id NUMBER REFERENCES Students( student_id ),
course_id NUMBER REFERENCES Courses( course_id ),
completed NUMBER(1,0),
score NUMBER(3,0),
CONSTRAINT student_courses_pk PRIMARY KEY ( student_id, course_id )
);
Then you can do:
SELECT s.student_id,
COUNT( CASE c.completed WHEN 1 THEN 1 END ) / COUNT( c.course_id ) * 100 AS percent_enrolled_completed
FROM student s
LEFT OUTER JOIN
student_courses c
ON ( s.student_id = c.student_id )
GROUP BY s.student_id;
Edited: to answer updated question
SQL Fiddle
Oracle 11g R2 Schema Setup:
create table learning_path_items AS
SELECT 1 AS learning_path_id, 1 AS course_id FROM DUAL
UNION ALL SELECT 1, 2 FROM DUAL
UNION ALL SELECT 2, 3 FROM DUAL
UNION ALL SELECT 2, 4 FROM DUAL
UNION ALL SELECT 2, 5 FROM DUAL;
create table learning_path_enrollments AS
SELECT 1 AS employee_id, 1 AS learning_path_id FROM DUAL
UNION ALL SELECT 2, 2 FROM DUAL
UNION ALL SELECT 3, 2 FROM DUAL;
create table course_enrollments AS
SELECT 2 AS employee_id, 3 AS course_id FROM DUAL
UNION ALL SELECT 2, 4 FROM DUAL;
Query 1:
WITH num_completed AS (
SELECT e.learning_path_id,
e.employee_id,
c.course_id,
COUNT( c.course_id ) OVER ( PARTITION BY e.learning_path_id, e.employee_id ) AS num_completed
FROM learning_path_items i
INNER JOIN
learning_path_enrollments e
ON (e.learning_path_id = i.learning_path_id)
INNER JOIN
course_enrollments c
ON (i.course_id = c.course_id
AND e.employee_id = c.employee_id)
),
num_courses AS (
SELECT e.learning_path_id,
e.employee_id,
COUNT( 1 ) AS num_courses
FROM learning_path_items i
INNER JOIN
learning_path_enrollments e
ON (e.learning_path_id = i.learning_path_id)
GROUP BY
e.learning_path_id,
e.employee_id
)
SELECT c.learning_path_id,
c.employee_id,
x.course_id,
COALESCE( x.num_completed, 0 ) || ' out of ' || c.num_courses AS completion
FROM num_courses c
LEFT OUTER JOIN
num_completed x
ON ( c.employee_id = x.employee_id
AND c.learning_path_id = x.learning_path_id )
ORDER BY 1, 2, 3
Results:
| LEARNING_PATH_ID | EMPLOYEE_ID | COURSE_ID | COMPLETION |
|------------------|-------------|-----------|------------|
| 1 | 1 | (null) | 0 out of 2 |
| 2 | 2 | 3 | 2 out of 3 |
| 2 | 2 | 4 | 2 out of 3 |
| 2 | 3 | (null) | 0 out of 3 |
SELECT whatever FROM wherever UNION ALL SELECT 0 FROM DUAL LIMIT 1;
If you get a result from your first query you would get the 1 row with the percentage you are trying to get
second select returns a row that has value 0 but is stored in a VARCHAR2(1) from the dummy table DUAL so you will either get the result from your first query (assuming you only get 1 result because otherwise it will only show the first row it returns)
if the first select returns no rows, the second select will return a single column single row result with a varchar2(1) '0'
ALTERNATIVELY
SELECT
something
FROM sometable
WHERE somefield = somevalue
UNION ALL
SELECT
0
WHERE NOT EXISTS (SELECT something FROM sometable WHERE somefield = somevalue)
This will do the same except only when your first select query returns no rows, I guess this would be the better answer for your question as the first query might be returning multiple rows

SQL with multiple ROW_NUMBER or RANK

I have the need to do multiple joins with the same table, between (eg) Person and PersonEvents. There are multiple events for each person (0 or more). I need to create a VIEW that selects each person with certain columns from their most recent event, plus columns from the next-most-recent event.
Person data:
Id Name
1 Iain
2 Fred
3 Mary
4 Foo
5 Bar
PersonEvents data:
PersonId DateStarted ReasonForLeaving
1 2011-03-12 00:00:00.000 sick
1 2013-02-12 00:00:00.000 NULL
1 2012-04-12 00:00:00.000 holiday
2 2011-05-12 00:00:00.000 new baby
2 2013-06-12 00:00:00.000 NULL
2 2012-07-12 00:00:00.000 had enough
3 2011-08-12 00:00:00.000 pregnant
3 2013-09-12 00:00:00.000 NULL
4 2012-10-12 00:00:00.000 NULL
An output sample would be:
Id Name MemberSince ReasonForChange
1 Iain 2011-03-12 00:00:00.000 holiday
4 Foo 2012-10-12 00:00:00.000 NULL
...
The "old way" used a top 1 join or sub-select statement:
SELECT p.*,
(
SELECT TOP 1 DateStarted
FROM PersonEvents e
WHERE e.PersonId = p.Id
ORDER BY DateFoo DESC
) As MemberSince
FROM Person p
....
However if you need multiple columns from this Join, (eg Date, Comment, and maybe further ids), then you need to do multiple sub-select statements, which is expensive.
So the question is: How do you get multiple columns from a join using the row number for the most recent, and previous events?
The most straight forward (ie. readable SQL) answer that I have come up with uses WITH and ROW_NUMBER.
First, make a ROW_NUMBER query that orders the events and gives a number to each event unique to that PersonId:
SELECT *,
ROW_NUMBER() OVER (PARTITION BY PersonId ORDER BY DateStarted DESC) AS EventOrder
FROM PersonEvents
Results:
PersonId DateStarted ReasonForLeaving EventOrder
1 2013-02-12 00:00:00.000 NULL 1
1 2012-04-12 00:00:00.000 holiday 2
1 2011-03-12 00:00:00.000 sick 3
2 2013-06-12 00:00:00.000 NULL 1
2 2012-07-12 00:00:00.000 had enough 2
2 2011-05-12 00:00:00.000 new baby 3
3 2013-09-12 00:00:00.000 NULL 1
3 2011-08-12 00:00:00.000 pregnant 2
4 2012-10-12 00:00:00.000 NULL 1
Now, the "first" event (in my case the most recent) for every person contains the date that the change was made (real-life example: this is student enrolment history data across multiple schools, containing School ID and lots of other guff). The "Second" event for every person contains the previous event and reason for leaving. To add it together:
WITH SortedEvents AS (
SELECT *,
ROW_NUMBER() OVER (PARTITION BY PersonId ORDER BY ReasonForLeaving DESC) AS EventOrder
FROM PersonEvents
)
SELECT p.*, MostRecent.DateStarted AS MemberSince, NextRecent.ReasonForLeaving AS ReasonForChange
FROM Person p
LEFT OUTER JOIN SortedEvents AS MostRecent ON p.Id = MostRecent.PersonId AND MostRecent.EventOrder = 1
LEFT OUTER JOIN SortedEvents AS NextRecent ON p.Id = NextRecent.PersonId AND NextRecent.EventOrder = 2
which provides the nicely formatted output:
Id Name MemberSince ReasonForChange
1 Iain 2013-02-12 00:00:00.000 holiday
2 Fred 2013-06-12 00:00:00.000 had enough
3 Mary 2013-09-12 00:00:00.000 pregnant
4 Foo 2012-10-12 00:00:00.000 NULL
5 Bar NULL NULL
in reality you could pick multiple columns from any row number. The real life example (again, student enrolment history) picks:
From the master student table:
student id
name
DOB, etc
From the Enrolment History table as "current enrolment"
School id
various enrolment status info
date started
From the Enrolment History table as "previous enrolment"
reason for leaving
This method is quite efficient with about 150k students and their respective history.
complete SQL for my tests:
CREATE TABLE Person
(
Id INT NOT NULL,
Name VARCHAR(50)
)
GO
CREATE TABLE PersonEvents
(
PersonId INT NOT NULL,
DateStarted DATETIME NOT NULL,
ReasonForLeaving VARCHAR(50)
)
GO
INSERT INTO Person
SELECT 1, 'Iain' UNION ALL
SELECT 2, 'Fred' UNION ALL
SELECT 3, 'Mary' UNION ALL
SELECT 4, 'Foo' UNION ALL
SELECT 5, 'Bar'
GO
INSERT INTO PersonEvents
SELECT 1, '20110312', 'sick' UNION ALL
SELECT 1, '20130212', NULL UNION ALL
SELECT 1, '20120412', 'holiday' UNION ALL
SELECT 2, '20110512', 'new baby' UNION ALL
SELECT 2, '20130612', NULL UNION ALL
SELECT 2, '20120712', 'had enough' UNION ALL
SELECT 3, '20110812', 'pregnant' UNION ALL
SELECT 3, '20130912', NULL UNION ALL
SELECT 4, '20121012', NULL
GO
--SELECT *
--FROM Person
--SELECT *
--FROM PersonEvents
--GO
WITH SortedEvents AS (
SELECT *,
ROW_NUMBER() OVER (PARTITION BY PersonId ORDER BY DateStarted DESC) AS EventOrder
FROM PersonEvents
)
SELECT p.*, MostRecent.DateStarted AS MemberSince, NextRecent.ReasonForLeaving AS ReasonForChange
FROM Person p
LEFT OUTER JOIN SortedEvents AS MostRecent ON p.Id = MostRecent.PersonId AND MostRecent.EventOrder = 1
LEFT OUTER JOIN SortedEvents AS NextRecent ON p.Id = NextRecent.PersonId AND NextRecent.EventOrder = 2
GO
SELECT p.*,
(
SELECT TOP 1 DateStarted
FROM PersonEvents pe
WHERE pe.PersonId = p.Id
ORDER BY DateStarted DESC
) AS MemberSince,
'unknown' AS ReasonForChange
FROM Person p
GO
DROP TABLE Person
DROP TABLE PersonEvents
GO
For the last event and previous event date:
SELECT ID,NAME,NextToMostEventDate,ReasonForLeaving
FROM PersonEvents pe
INNER JOIN(
SELECT pe1.PersonId,TheMostEventDate,NextToMostEventDate=MAX(pe1.DateStarted)
FROM PersonEvents pe1
INNER JOIN(
SELECT PersonId,TheMostEventDate=MAX(DateStarted)
FROM PersonEvents
GROUP BY PersonId
) pe2
ON pe2.PersonId=pe1.PersonId
WHERE DateStarted<TheMostEventDate
GROUP BY pe1.PersonId,TheMostEventDate
) pe12 ON pe12.PersonId=pe.PersonId
INNER JOIN Person ON Id=pe.PersonId
WHERE pe.DateStarted=TheMostEventDate
For the last event date and previous event:
SELECT ID,NAME,TheMostEventDate,ReasonForLeaving
FROM PersonEvents pe
INNER JOIN(
SELECT pe1.PersonId,TheMostEventDate,NextToMostEventDate=MAX(pe1.DateStarted)
FROM PersonEvents pe1
INNER JOIN(
SELECT PersonId,TheMostEventDate=MAX(DateStarted)
FROM PersonEvents
GROUP BY PersonId
) pe2
ON pe2.PersonId=pe1.PersonId
WHERE DateStarted<TheMostEventDate
GROUP BY pe1.PersonId,TheMostEventDate
) pe12 ON pe12.PersonId=pe.PersonId
INNER JOIN Person ON Id=pe.PersonId
WHERE pe.DateStarted=NextToMostEventDate

How do I build a SQL query to show two columns of different date ranges?

I'm trying to build a query from an Oracle 11g database to use in a report. I need to use two tables CONTACT and CONTACT_EXT to get the data from, and compare the total amount of contacts over two date ranges.The tables are joined by ID's matching.
CONTACT:
ID | DATE
----------
1 12/12/2010
2 12/11/2010
3 14/09/2011
CONTACT_EXT
ID | TYPE
----------
1 MAIL
2 FAX
3 FAX
So for example if I set period A to be between 01/01/2010 and 12/12/2010 and period B to be between 01/01/2011 and 11/11/2011
TYPE | PERIOD A | PERIOD B | TOTAL
MAIL 1 0 1
FAX 1 1 2
SQL> create table contact (id,cdate)
2 as
3 select 1, date '2010-12-12' from dual union all
4 select 2, date '2010-11-12' from dual union all
5 select 3, date '2011-09-14' from dual
6 /
Table created.
SQL> create table contact_ext (id,type)
2 as
3 select 1, 'MAIL' from dual union all
4 select 2, 'FAX' from dual union all
5 select 3, 'FAX' from dual
6 /
Table created.
SQL> select ce.type
2 , count(case when c.cdate between date '2010-01-01' and date '2010-12-12' then 1 end) period_a
3 , count(case when c.cdate between date '2011-01-01' and date '2011-11-11' then 1 end) period_b
4 , count(*) total
5 from contact c
6 inner join contact_ext ce on (c.id = ce.id)
7 group by ce.type
8 /
TYPE PERIOD_A PERIOD_B TOTAL
---- ---------- ---------- ----------
FAX 1 1 2
MAIL 1 0 1
2 rows selected.
Regards,
Rob.
Just do a self join:
select type,period_a,period_b,period_a+period_b as total
from(
select type,count(1) as period_a
from contact_ext
left join contact
using(id)
where date>='20100101' and date<='20101212'
group by 1
)a
join(
select type,count(1) as period_b
from contact_ext
left join contact
using(id)
where date>='20110101' and date<='20111111'
group by 1
)b
using(type);
Ans 1 : In where clause set period A between 01/01/2010 and 12/12/2010 OR period B between 01/01/2011 and 11/11/2011
In where clause use OR condition
Ans 2: you can union two different select statements of Period A and Period B

SQL query to group based on sum

I have a simple table with values that I want to chunk/partition into distinct groups based on the sum of those values (up to a certain limit group sum total).
e.g.,. imagine a table like the following:
Key Value
-----------
A 1
B 4
C 2
D 2
E 5
F 1
And I would like to group into sets such that no one grouping's sum will exceed some given value (say, 5).
The result would be something like:
Group Key Value
-------------------
1 A 1
B 4
--------
Total: 5
2 C 2
D 2
--------
Total: 4
3 E 5
--------
Total: 5
4 F 1
--------
Total: 1
Is such a query possible?
While I am inclined to agree with the comments that this is best done outside of SQL, here is some SQL which would seem to do roughly what you're asking:
with mytable AS (
select 'A' AS [Key], 1 AS [Value] UNION ALL
select 'B', 4 UNION ALL
select 'C', 2 UNION ALL
select 'D', 2 UNION ALL
select 'E', 5 UNION ALL
select 'F', 1
)
, Sums AS (
select T1.[Key] AS T1K
, T2.[Key] AS T2K
, (SELECT SUM([Value])
FROM mytable T3
WHERE T3.[Key] <= T2.[Key]
AND T3.[Key] >= T1.[Key]) AS TheSum
from mytable T1
inner join mytable T2
on T2.[Key] >= T1.[Key]
)
select S1.T1K AS StartKey
, S1.T2K AS EndKey
, S1.TheSum
from Sums S1
left join Sums S2
on (S1.T1K >= S2.T1K and S1.T2K <= S2.T2K)
and S2.TheSum > S1.TheSum
and S2.TheSum <= 5
where S1.TheSum <= 5
AND S2.T1K IS NULL
When I ran this code on SQL Server 2008 I got the following results:
StartKey EndKey Sum
A B 5
C D 4
E E 5
F F 1
It should be straightforward to construct the required groups from these results.
If you want to have only two members or less in each set, you can use the following query:
Select
A.[Key] as K1 ,
B.[Key] as K2 ,
isnull(A.value,0) as V1 ,
isnull(B.value,0) as V2 ,
(A.value+B.value)as Total
from Table_1 as A left join Table_1 as B
on A.value+B.value<=5 and A.[Key]<>B.[Key]
For finding sets having more members, you can continue to use joins.

Grouping Hierarchical data (parentID+ID) and running sum?

I have the following data:
ID parentID Text Price
1 Root
2 1 Flowers
3 1 Electro
4 2 Rose 10
5 2 Violet 5
6 4 Red Rose 12
7 3 Television 100
8 3 Radio 70
9 8 Webradio 90
I am trying to group this data with Reporting Services 2008 and have a sum of the price per group of level 1 (Flowers/Electro) and for level 0 (Root).
I have a table grouped on [ID] with a recursive parent of [parendID] and I am able to calculate the sum for the level 0 (just one more row in the table outside the group), but somehow I am not able to create sum's per group as SRSS does "create" groups per level. My desired result looks like so:
ID Text Price
1 Root
|2 Flowers
|-4 Rose 10
|-5 Violet 5
| |-6 Red Rose 12
| Group Sum-->27
|3 Electro
|-7 Television 100
|-8 Radio 70
|-9 Webradio 90
Group Sum-->260
----------------------
Total 287
(indentation of ID just added for level clarification)
With my current approach I cannot get the group sums, so I figured out I would need the following data structure:
ID parentID Text Price level0 level1 level2 level3
1 Root 1
2 1 Flowers 1 1
3 1 Electro 1 2
4 2 Rose 10 1 1 1
5 2 Violet 5 1 1 2
6 4 Red Rose 12 1 1 1 1
7 3 Television 100 1 2 1
8 3 Radio 70 1 2 2
9 8 Webradio 90 1 2 2 1
When having the above structure I can create an outer grouping of level0, with child groupings level1, level2, level3 accordingly . When now having a "group sum" on level1, and the total sum outside the group I have EXACTLY what I want.
My question is the following:
How do I either achieve my desired result with my current data structure, or how do I convert my current data structure (outer left joins?) into the "new data structure" temporarily - so I can run my report off of the temp table?
Thanks for taking your time,
Dennis
WITH q AS
(
SELECT id, parentId, price
FROM mytable
UNION ALL
SELECT p.id, p.parentID, q.price
FROM q
JOIN mytable p
ON p.id = q.parentID
)
SELECT id, SUM(price)
FROM q
GROUP BY
id
Update:
A test script to check:
DECLARE #table TABLE (id INT NOT NULL PRIMARY KEY, parentID INT, txt VARCHAR(200) NOT NULL, price MONEY)
INSERT
INTO #table
SELECT 1, NULL, 'Root', NULL
UNION ALL
SELECT 2, 1, 'Flowers', NULL
UNION ALL
SELECT 3, 1, 'Electro', NULL
UNION ALL
SELECT 4, 2, 'Rose', 10
UNION ALL
SELECT 5, 2, 'Violet', 5
UNION ALL
SELECT 6, 4, 'Red Rose', 12
UNION ALL
SELECT 7, 3, 'Television', 100
UNION ALL
SELECT 8, 3, 'Radio', 70
UNION ALL
SELECT 9, 8, 'Webradio', 90;
WITH q AS
(
SELECT id, parentId, price
FROM #table
UNION ALL
SELECT p.id, p.parentID, q.price
FROM q
JOIN #table p
ON p.id = q.parentID
)
SELECT t.*, psum
FROM (
SELECT id, SUM(price) AS psum
FROM q
GROUP BY
id
) qo
JOIN #table t
ON t.id = qo.id
Here's the result:
1 NULL Root NULL 287,00
2 1 Flowers NULL 27,00
3 1 Electro NULL 260,00
4 2 Rose 10,00 22,00
5 2 Violet 5,00 5,00
6 4 Red Rose 12,00 12,00
7 3 Television 100,00 100,00
8 3 Radio 70,00 160,00
9 8 Webradio 90,00 90,00
I found a really ugly way to do what I want - maybe there is something better?
SELECT A.Text, A.Price,
CASE
WHEN D.Text IS NULL
THEN
CASE
WHEN C.Text IS NULL
THEN
CASE
WHEN B.Text IS NULL
THEN
A.ID
ELSE B.ID
END
ELSE C.ID
END
ELSE D.ID
END
AS LEV0,
CASE
WHEN D.Text IS NULL
THEN
CASE
WHEN C.Text IS NULL
THEN
CASE
WHEN B.Text IS NULL
THEN
NULL
ELSE A.ID
END
ELSE B.ID
END
ELSE C.ID
END
AS LEV1,
CASE
WHEN D.Text IS NULL
THEN
CASE
WHEN C.Text IS NULL
THEN
NULL
ELSE A.ID
END
ELSE B.ID
END
AS LEV2,
CASE
WHEN D.Text IS NULL
THEN NULL
ELSE A.ID
END
AS LEV3
FROM dbo.testOld AS A LEFT OUTER JOIN
dbo.testOld AS B ON A.parentID = B.ID LEFT OUTER JOIN
dbo.testOld AS C ON B.parentID = C.ID LEFT OUTER JOIN
dbo.testOld AS D ON C.parentID = D.ID
Output of this is:
Text Price LEV0 LEV1 LEV2 LEV3
---------- ----------- ----------- ----------- ----------- -----------
Root NULL 1 NULL NULL NULL
Flowers NULL 1 3 NULL NULL
Electro NULL 1 4 NULL NULL
Television 100 1 4 5 NULL
Radio 70 1 4 6 NULL
Rose 10 1 3 7 NULL
Violet 5 1 3 8 NULL
Webradio 90 1 4 5 14
Red Rose 12 1 3 7 15
With this structure I can go ahead and create 4 nested groups on the LEV0-3 columns including subtotals per group (as shown above in my desired result).