SQL Server 2005: How to join table rows only once - sql

I think I've seen answers for similar questions for MySQL, but I'm struggling to find an answer applicable to SQL Server 2005.
So I have a table like this:
| ID | RelationalID | Year
----------------------------
| 1 | A | 2014
| 2 | A | 2014
| 3 | B | 2014
| 4 | A | 2015
| 5 | B | 2015
And I'd like a result like this when I join the same table where RelationID matches but the year is different:
| 2014_ID | 2015_ID | RelationalID |
------------------------------------
| 1 | 4 | A |
| 2 | NULL | A |
| 3 | 5 | B |
But a standard JOIN ends up getting duplicate matches:
| 2014_ID | 2015_ID | RelationalID |
------------------------------------
| 1 | 4 | A |
| 2 | 4 | A |
| 3 | 5 | B |
Is there a way to join two tables where the matches from the right table are joined only once in SQL Server 2005?
I tried this query with no success:
SELECT * FROM myTable
LEFT JOIN (SELECT * FROM myTable) AS t ON t.RelationalID = myTable.RelationalID
WHERE myTable.Year = 2014 and t.Year = 2015

You can get the result based on ROW_NUMBERs, but you need a rule how to assign them, I assumed it's based on the Id.
;WITH cte AS
(SELECT Id,
RelationalId,
year,
row_number()
over (partition by RelationalId, year
order by Id) as rn
FROM [YourTable]
)
select t1.id as Id_2014,t2.id as Id_2015, t1.RelationalId
from cte as t1 left join cte as t2
on t1.RelationalId = t2.RelationalId
and t1.rn = t2.rn
and t2.year = 2015
where t1.Year = 2014
This is based on TMNT2014's fiddle

Below Sql would give you the result you are looking for but as I said before complexity would depend on the original set of data you have in your table. Here is the SQL Fiddle - http://sqlfiddle.com/#!3/d6300/24 - Good Luck!
;WITH CTE_Union AS
(SELECT
a.Id AS Id2014,
NULL AS Id2015,
a.RelationalId
FROM [YourTable] a
WHERE a.Year = 2014
UNION
SELECT
NULL AS Id2014,
b.Id AS Id2015,
b.RelationalId
FROM [YourTable] b
WHERE b.Year = 2015)
SELECT Distinct CASE WHEN Id2014 IS NULL THEN (SELECT MIN(Id2014) FROM CTE_Union C WHERE C.RelationalId =M.RelationalId) ELSE Id2014 END AS ID2014 ,
CASE WHEN Id2015 IS NULL AND Id2014 = (SELECT MIN(Id2014) FROM CTE_Union C2 WHERE C2.RelationalId =M.RelationalId) THEN (SELECT MIN(Id2015) FROM CTE_Union C WHERE C.RelationalId =M.RelationalId) ELSE Id2015 END
,RelationalID
FROM CTE_Union M

DECLARE #MyTable TABLE
(
ID INT,
RelationalID VARCHAR(10),
[Year] INT
)
INSERT INTO #MyTable
VALUES
( 1 ,'A', 2014),
( 2 ,'A', 2014),
( 3 ,'B', 2014),
( 4 ,'A', 2015),
( 5 ,'B', 2015)
;WITH TEST AS
(
SELECT
a.Id AS Id2014,
NULL AS Id2015,
a.RelationalId,
RANK() OVER (PARTITION BY RelationalId ORDER BY ID) Ranked
FROM #MyTable a
WHERE a.Year = 2014
UNION
SELECT
NULL AS Id2014,
b.Id AS Id2015,
b.RelationalId,
RANK() OVER (PARTITION BY RelationalId ORDER BY ID) Ranked
FROM #MyTable b
WHERE b.Year = 2015
)
SELECT
t1.Id2014,
t2.Id2015,
t1.RelationalID
FROM TEST t1
LEFT JOIN TEST t2
ON t1.Ranked = t2.Ranked
AND t1.RelationalID = t2.RelationalID
AND t2.Id2015 IS NOT NULL
WHERE t1.Id2014 IS NOT NULL
ORDER BY t1.Id2014
I used a union and then ranked each side by relational id and left joined them.
Here is the output:
Id2014 Id2015 RelationalID
1 4 A
2 NULL A
3 5 B

There are probably a few ways to solve this but below shows an example of utilizing "Derived Tables" in a query.
SELECT
q1.Id AS [2014_Id],
q2.Id AS [2015_Id],
q1.RelationalId
FROM (SELECT
MAX(a.Id) AS Id,
a.RelationalId
FROM [table] a
WHERE a.Year = 2014
GROUP BY
a.RelationalId) q1
INNER JOIN (SELECT
MAX(b.Id) AS Id,
b.RelationalId
FROM [table] b
WHERE b.Year = 2015
GROUP BY
b.RelationalId) q2
ON q2.RelationalId = q1.RelationalId

Related

SQL select all rows in a single row's "history"

I have a table that looks like this:
ID | PARENT_ID
--------------
0 | NULL
1 | 0
2 | NULL
3 | 1
4 | 2
5 | 4
6 | 3
Being an SQL noob, I'm not sure if I can accomplish what I would like in a single command.
What I would like is to start at row 6, and recursively follow the "history", using the PARENT_ID column to reference the ID column.
The result (in my mind) should look something like:
6|3
3|1
1|0
0|NULL
I already tried something like this:
SELECT T1.ID
FROM Table T1, Table T2
WHERE T1.ID = 6
OR T1.PARENT_ID = T2.PARENT_ID;
but that just gave me a strange result.
With a recursive cte.
If you want to start from the maximum id:
with recursive cte (id, parent_id) as (
select t.*
from (
select *
from tablename
order by id desc
limit 1
) t
union all
select t.*
from tablename t inner join cte c
on t.id = c.parent_id
)
select * from cte
See the demo.
If you want to start specifically from id = 6:
with recursive cte (id, parent_id) as (
select *
from tablename
where id = 6
union all
select t.*
from tablename t inner join cte c
on t.id = c.parent_id
)
select * from cte;
See the demo.
Results:
| id | parent_id |
| --- | --------- |
| 6 | 3 |
| 3 | 1 |
| 1 | 0 |
| 0 | |

SQL, choosing max date and if two results have a max date, choose the one with the max weight

ID | DATE_I | Weight
1 | 10/04/2014 08:13:05 | 10
2 | 02/04/2014 08:13:05 | 15
3 | 08/04/2014 08:13:05 | 10
4 | 13/04/2014 08:13:05 | 12
5 | 13/04/2014 08:13:05 | 10
My SQL request request should give me row 4.
select id, max(DATE_I)
from MyTable m
where m.Weight > (select m2.Weight from MyTable m2 having max(DATE_I));
Try this:
select y.ID, x.maxdate, x.maxweight
from
(
select a.maxdate, Max(b.Weight) as maxweight
from
(
select max(date_I) as maxdate
from mytable
)a
inner join mytable b on a.maxdate = b.date_I
group By a.maxdate
) x inner join mytable y on x.maxweight = y.weight
Demo Here
Order your rows on DATE_I and Weight descending and get the first row.
Sample code for SQL Server.
select top (1) ID, DATE_I, Weight
from mytable
order by DATE_I desc, Weight desc;

how to query range?

Raw Data
| ID | STATUS |
| 1 | A |
| 2 | A |
| 3 | B |
| 4 | B |
| 5 | B |
| 6 | A |
| 7 | A |
| 8 | A |
| 9 | C |
Result
| START | END |
| 1 | 2 |
| 6 | 8 |
Range of STATUS A
How to query ?
This should give you the correct ranges:
SELECT
STATUS,
MIN(ID),
max_id
FROM (
SELECT
t1.STATUS,
t1.ID,
COALESCE(MAX(t2.ID), t1.ID) max_id
FROM
yourtable t1 LEFT JOIN yourtable t2
ON t1.STATUS=t2.STATUS AND t1.ID<t2.ID
WHERE
NOT EXISTS (SELECT NULL
FROM yourtable t3
WHERE
t3.STATUS!=t1.STATUS
AND t3.ID>t1.ID AND t3.ID<t2.ID)
GROUP BY
t1.ID,
t1.STATUS
) s
WHERE
status = 'A'
GROUP BY
STATUS,
max_id
Please see fiddle here.
You are probably better off with a cursor-based solution or a client-side function.
However, if you were using Oracle - the following would work.
WITH LOWER_VALS AS
( -- All the Ids with no immediate predecessor
SELECT ROWNUM AS RN, STATUS, ID AS LOWER FROM
(
SELECT STATUS, ID
FROM RAWDATA RD1
WHERE RD1.ID -1 NOT IN
(SELECT ID FROM RAWDATA PRED_TABLE WHERE PRED_TABLE.STATUS = RD1.STATUS)
ORDER BY STATUS, ID
)
) ,
UPPER_VALS AS
( -- All the Ids with no immediate successor
SELECT ROWNUM AS RN, STATUS, ID AS UPPER FROM
(
SELECT STATUS, ID
FROM RAWDATA RD2
WHERE RD2.ID +1 NOT IN
(SELECT ID FROM RAWDATA SUCC_TABLE WHERE SUCC_TABLE.STATUS = RD2.STATUS)
ORDER BY STATUS, ID
)
)
SELECT
L.STATUS, L.LOWER, U.UPPER
FROM
LOWER_VALS L
JOIN UPPER_VALS U ON
U.RN = L.RN;
Results in the set
A 1 2
A 6 8
B 3 5
C 9 9
http://sqlfiddle.com/#!4/10184/2
There is not a lot to go on from what you put, but I think this might work. I am using T-SQL because I don't know what you are using?
SELECT
min(ID)
, max(ID)
FROM RawData
WHERE [Status] = 'A'

Sequence grouping in TSQL

I'm trying to group data in sequence order. Say I have the following table:
| 1 | A |
| 1 | A |
| 1 | B |
| 1 | B |
| 1 | C |
| 1 | B |
I need the SQL query to output the following:
| 1 | A | 1 |
| 1 | A | 1 |
| 1 | B | 2 |
| 1 | B | 2 |
| 1 | C | 3 |
| 1 | B | 4 |
The last column is a group number that is incremented in each group. The important thing to note is that rows 3, 4 and 5 contain the same data which should be grouped into 2 groups not 1.
For MSSQL2008:
Suppose you have a SampleStatuses table:
Status Date
A 2014-06-11
A 2014-06-14
B 2014-06-25
B 2014-07-01
A 2014-07-06
A 2014-07-19
B 2014-07-21
B 2014-08-13
C 2014-08-19
you write the following:
;with
cte as (
select top 1 RowNumber, 1 as GroupNumber, [Status], [Date] from SampleStatuses order by RowNumber
union all
select c1.RowNumber,
case when c2.Status <> c1.Status then c2.GroupNumber + 1 else c2.GroupNumber end as GroupNumber, c1.[Status], c1.[Date]
from cte c2 join SampleStatuses c1 on c1.RowNumber = c2.RowNumber + 1
)
select * from cte;
you get this result:
RowNumber GroupNumber Status Date
1 1 A 2014-06-11
2 1 A 2014-06-14
3 2 B 2014-06-25
4 2 B 2014-07-01
5 3 A 2014-07-06
6 3 A 2014-07-19
7 4 B 2014-07-21
8 4 B 2014-08-13
9 5 C 2014-08-19
The normal way you would do what you want is the dense_rank function:
select key, val,
dense_rank() over (order by key, val)
from t
However, this does not address the problem of separating the last groups.
To handle this, I have to assume there is an "id" column. Tables, in SQL, do not have an ordering, so I need the ordering. If you are using SQL Server 2012, then you can use the lag() function to get what you need. Use the lag to see if the key, val pair is the same on consecutive rows:
with t1 as (
select id, key, val,
(case when key = lead(key, 1) over (order by id) and
val = lead(val, 1) over (order by id)
then 1
else 0
end) as SameAsNext
from t
)
select id, key, val,
sum(SameAsNext) over (order by id) as GroupNum
from t
Without SQL Server 2012 (which has cumulative sums), you have to do a self-join to identify the beginning of each group:
select t.*,
from t left outer join
t tprev
on t.id = t2.id + 1 and t.key = t2.key and t.val = t2.val
where t2.id is null
With this, assign the group as the minimum id using a join:
select t.id, t.key, t.val,
min(tgrp.id) as GroupId
from t left outer join
(select t.*,
from t left outer join
t tprev
on t.id = t2.id + 1 and t.key = t2.key and t.val = t2.val
where t2.id is null
) tgrp
on t.id >= tgrp.id
If you want these to be consecutive numbers, then put them in a subquery and use dense_rank().
This will give you rankings on your columns.
It will not give you 1,2,3 however.
It will give you 1,3,6 etc based on how many in each grouping
select
a,
b,
rank() over (order by a,b)
from
table1
See this SQLFiddle for a clearer idea of what I mean: http://sqlfiddle.com/#!3/0f201/2/0

MSSQL: Only last entry in GROUP BY (with id)

Following / copying computhomas's question, but adding some twists...
I have the following table in MSSQL2008
id | business_key | result | date
1 | 1 | 0 | 9
2 | 1 | 1 | 8
3 | 2 | 1 | 7
4 | 3 | n | 6
5 | 4 | 1 | 5
6 | 4 | 0 | 4
And now i want to group based on the business_key returning the complete entry with the newest date.
So my expected result is:
id | business_key | result | date
1 | 1 | 0 | 9
3 | 2 | 1 | 7
4 | 3 | n | 6
5 | 4 | 1 | 5
I also bet that there is a way to achieve that, i just can't find / see / think of it at the moment.
edit: sorry about this, I actually meant something else from original question I did. I felt like editing this might be better than accepting a solution and making another question. my original problem was that I am not filtering by id.
SELECT t.*
FROM
(
SELECT *, ROW_NUMBER() OVER
(
PARTITION BY [business_key]
ORDER BY [date] DESC
) AS [RowNum]
FROM yourTable
) AS t
WHERE t.[RowNum] = 1
SELECT
*
FROM
mytable
WHERE
ID IN (SELECT MAX(ID) FROM mytable GROUP BY business_key)
SELECT
MAX(T1.id) AS [id],
T1.business_key,
T1.result
FROM
dbo.My_Table T1
LEFT OUTER JOIN dbo.My_Table T2 ON
T2.business_key = T1.business_key AND
T2.id > T1.id
WHERE
T2.id IS NULL
GROUP BY T1.business_key,
T1.result
ORDER BY MAX(T1.id)
Edited based on clarifications
SELECT M1.*
FROM My_Table M1
INNER JOIN
(
SELECT [business_key], MAX([date]) as MaxDate
FROM My_Table
GROUP BY [business_key]
) M2 ON M1.business_key = M2.business_key AND M1.[date] = M2.MaxDate
ORDER BY M1.[id]
Assuming the combination of business_key & date is unique then....
Working example (3rd time is a charm):
declare #src as table(id int, business_key int,result int,[date] int)
insert into #src
SELECT 1,1,0,9
UNION SELECT 2,1,1,8
UNION SELECT 3,2,1,7
UNION SELECT 4,3,1,6
UNION SELECT 5,4,1,5
UNION SELECT 6,4,0,4
;with bkdate(business_key,[date])
AS
(
select business_key,MAX([date])
from #src
group by business_key
)
select src.* from #src src
inner join bkdate
ON src.[date] = bkdate.date
and src.business_key = bkdate.business_key
order by id
How about (edited after question change):
with latestdate as (
select business_key, maxdate=max(date)
from the_table
group by business_key
), latest as (
select ID = max(id)
from the_table
inner join latestdate
on the_table.business_key=latestdate.business_key
and the_table.date=latestdate.maxdate
group by the_table.business_key
)
select the_table.*
from the_table
inner join latest
on latest.id=the_table.id