Different JOIN values depending on the value of another column - sql

I have 2 tables j and c.
Both tables have columns port and sec.
For j.port = ABC, I want to join the 1st 6 characters of c.sec with the 1st 6 characters of j.sec.
For other j.ports, I want to join c.sec = j.sec
How can I do that ?
select c.port,j.port,c.sec,j.sec from j, c
where c.SEC =
CASE WHEN j.port = 'ABC' then SUBSTRING(c.sec,1,6) = SUBSTRING(j.sec,1,6)
--> something like this
else j.sec

Performance wise breaking this into two may be beneficial. The complex join condition will force nested loops otherwise.
SELECT c.port,
j.port,
c.sec,
j.sec
FROM j
JOIN c
ON LEFT(c.sec, 6) = LEFT(j.sec, 6)
WHERE j.port = 'ABC'
UNION ALL
SELECT c.port,
j.port,
c.sec,
j.sec
FROM j
JOIN c
ON c.sec = j.sec
WHERE j.port IS NULL
OR j.port <> 'ABC'
Or in this specific case you could also do
SELECT c.port,
j.port,
c.sec,
j.sec
FROM j
JOIN c
ON LEFT(c.sec, 6) = LEFT(j.sec, 6)
and (j.port = 'ABC' OR c.sec = j.sec)
This allows the main join to be a simple equi join that can use any of the join algorithms with a residual predicate on the result.
For the following example data both of these took about 700ms on my machine whereas I killed the three competing answers after 30 seconds each as none of them completed in that time.
create table c(port varchar(10), sec varchar(10) index ix clustered )
create table j(port varchar(10), sec varchar(10))
INSERT INTO c
SELECT TOP 1000000 LEFT(NEWID(),10) , LEFT(NEWID(),10)
FROM sys.all_objects o1, sys.all_objects o2
INSERT INTO j
SELECT TOP 1000000 LEFT(NEWID(),10) , LEFT(NEWID(),10)
FROM sys.all_objects o1, sys.all_objects o2

You could use:
select c.port,j.port,c.sec,j.sec
from j
join c
on (CASE WHEN j.port = 'ABC' and SUBSTRING(c.sec,1,6) = SUBSTRING(j.sec,1,6) then 1
WHEN c.sec = j.sec THEN 1
END) = 1
The same as:
select c.port,j.port,c.sec,j.sec
from j
join c
on (j.port = 'ABC' and SUBSTRING(c.sec,1,6) = SUBSTRING(j.sec,1,6))
or (c.SEC = j.sec AND (j.port <> 'ABC' or j.port IS NULL))

Related

What is a better alternative to procedural approach to produce a string based on columns?

I am working on making string based on column values. Example:
SELECT #Registered = a.hasRegistered
,#Subscribed = a.hasSubscribed
FROM AuthorData a
WHERE a.authorId = 10 --#AUTHORID
IF (#Registered = 1)
SET #ReturnString = #ReturnString + '0,'
IF (#Subscribed = 1)
SET #ReturnString = #ReturnString + '1,'
IF EXISTS (
SELECT TOP 1 name
FROM AUTHORDATA AS A
INNER JOIN AUHTORPROFILE B on A.AUTHORID=B.AUTHORID
INNER JOIN AUTHORHISTORY C ON B.AUTHORPROFILEID=C.AUTHORPROFILEID
WHERE ISNULL(AUTHORDATA.authorId, 0) = 10 --#AUTHORID
)
BEGIN
SET #ReturnString = #ReturnString + '10,'
END
select CONVERT(NVARCHAR(50), STUFF(#ReturnString, LEN(#ReturnString), 1, ''))
This performs the calculation for 1 author (based on WHERE clause IN 2 places -> authorId=10)
I can make this into a function and then call from a select query as follows:
SELECT *,FN_CALCUALTE_OPTIONS(AUTHORID)
FROM AUTHORDATA
I want to ask if there is any way so I can do the calculations in the above SELECT query rather than create the function?
I have tried:
SELECT *,CASE WHEN A.hasRegistered=1 THEN '0,' ELSE '' END +
CASE WHEN A.hasSubscribed=1 THEN '1,' ELSE '' END
FROM AUTHORDATA as A
How can I have the exists part of select?
Pretty sure you can put them all together into a single query as follows. If you didn't need the final stuff you would just have a regular query, but because you need to use the results twice in the stuff CROSS APPLY is a convenient way to calculate it once and use it twice. Also you can correlate your sub-query since you are using the same AuthorData record.
SELECT CONVERT(NVARCHAR(50), STUFF(X.ReturnString, LEN(X.ReturnString), 1, ''))
FROM AuthorData a
CROSS APPLY (
VALUES
(
CASE WHEN a.hasRegistered = 1 THEN '0,' ELSE '' END
+ CASE WHEN a.hasSubscribed = 1 THEN '1,' ELSE '' END
+ CASE WHEN EXISTS (
SELECT 1
FROM AUHTORPROFILE B
INNER JOIN AUTHORHISTORY C ON B.AUTHORPROFILEID = C.AUTHORPROFILEID
WHERE B.AUTHORID = a.AuthorId
)
THEN '10,' else '' end
)
) AS X (ReturnString)
WHERE a.AuthorId = 10 --#AUTHORID
The easiest way IMO is to move your criteria being found via EXISTS to an OUTER APPLY. Like so:
SELECT
CONVERT(NVARCHAR(50), STUFF(calc.ReturnString, LEN(calc.ReturnString), 1, ''))
FROM
AUTHORDATA AS A
OUTER APPLY (SELECT TOP (1)
1 AS Found
FROM
AUHTORPROFILE AS B
INNER JOIN AUTHORHISTORY AS C ON B.AUTHORPROFILEID = C.AUTHORPROFILEID
WHERE
B.authorId = A.authorId) AS lookup_author
/*outer apply here just for readibility in final select*/
OUTER APPLY (SELECT
CONCAT(CASE WHEN A.hasRegistered = 1 THEN '0,' ELSE '' END
,CASE WHEN A.hasRegistered = 1 THEN '1,' ELSE '' END
,CASE WHEN lookup_author.Found = 1 THEN '10,' ELSE '' END) AS ReturnString) AS calc;
Then you can use lookup_author.Found = 1 to determine that it was found in your lookup. From there, you just have to apply the rest of your conditions correctly via CASE statements and then use your final SELECT over the result.
You can put a single CASE statement and get data as given below. Make sure that you are covering every possible condition.
SELECT *, CASE WHEN a.Registered =1 AND a.Subscribed =1
AND EXISTS (SELECT TOP 1 1
FROM AUHTORPROFILE B on
INNER JOIN AUTHORHISTORY C ON B.AUTHORPROFILEID=C.AUTHORPROFILEID
WHERE B.AUTHORID =A.AUTHORID) THEN '0,1,10'
WHEN a.Registered =1 AND a.Subscribed =0
AND EXISTS (SELECT TOP 1 1
FROM AUHTORPROFILE B on
INNER JOIN AUTHORHISTORY C ON B.AUTHORPROFILEID=C.AUTHORPROFILEID
WHERE B.AUTHORID =A.AUTHORID) THEN '0,10'
WHEN a.Registered =0 AND a.Subscribed =1
AND EXISTS (SELECT TOP 1 1
FROM AUHTORPROFILE B on
INNER JOIN AUTHORHISTORY C ON B.AUTHORPROFILEID=C.AUTHORPROFILEID
WHERE B.AUTHORID =A.AUTHORID) THEN '1,10'
WHEN a.Registered =0 AND a.Subscribed =0
AND EXISTS (SELECT TOP 1 1
FROM AUHTORPROFILE B on
INNER JOIN AUTHORHISTORY C ON B.AUTHORPROFILEID=C.AUTHORPROFILEID
WHERE B.AUTHORID =A.AUTHORID) THEN '10'
END AS CalculateOptions
FROM AUTHORDATA AS a

SELECT NOT IN with multiple columns in subquery

Regarding the statement below, sltrxid can exist as both ardoccrid and ardocdbid. I'm wanting to know how to include both in the NOT IN subquery.
SELECT *
FROM glsltransaction A
INNER JOIN cocustomer B ON A.acctid = B.customerid
WHERE sltrxstate = 4
AND araccttype = 1
AND sltrxid NOT IN(
SELECT ardoccrid,ardocdbid
FROM arapplyitem)
I would recommend not exists:
SELECT *
FROM glsltransaction t
INNER JOIN cocustomer c ON c.customerid = t.acctid
WHERE
??.sltrxstate = 4
AND ??.araccttype = 1
AND NOT EXISTS (
SELECT 1
FROM arapplyitem a
WHERE ??.sltrxid IN (a.ardoccrid, a.ardocdbid)
)
Note that I changed the table aliases to things that are more meaningful. I would strongly recommend prefixing the column names with the table they belong to, so the query is unambiguous - in absence of any indication, I represented this as ?? in the query.
IN sometimes optimize poorly. There are situations where two subqueries are more efficient:
SELECT *
FROM glsltransaction t
INNER JOIN cocustomer c ON c.customerid = t.acctid
WHERE
??.sltrxstate = 4
AND ??.araccttype = 1
AND NOT EXISTS (
SELECT 1
FROM arapplyitem a
WHERE ??.sltrxid = a.ardoccrid
)
AND NOT EXISTS (
SELECT 1
FROM arapplyitem a
WHERE ??.sltrxid = a.ardocdbid
)

Error in SQL Server of join two tables with case (when...end):

Update: I get an error when join two tables with case (when..end):
tmp1:
A B C
---------------
1 1 1
NULL 1 2
NULL 2 1
2 2 2
tmp2:
A B C D
---------------
1 1 1 X
2 1 1 Y
1 1 2 Z
2 1 2 X
1 2 1 Y
2 2 1 Z
1 2 2 X
2 2 2 Z
I want to join two tables and get tmp1.* + tmp2.D. If tmp1.A is not null, use TMP1.A = TMP2.A, TMP1.B = TMP2.B, AND TMP1.C = TMP2.C to join.
If tmp1.A is null, set tmp1.A = [MIN(TMP2.A) OVER (PARTITION BY TMP2.B, TMP2.C)] and then join. Here is the results what I want:
A B C D
-------------------
1 1 1 X
1 1 2 Z
1 2 1 Y
2 2 2 Y
The following is my codes, I got the error
An expression of non-boolean type specified in a context where a condition is expected, near 'END'
Actually I am not sure how to join them correctly? Any suggestion will be appreciate!
SELECT TMP1.*, TMP2.D
FROM TMP1
LEFT JOIN TMP2 ON CASE
WHEN TMP1.A IS NOT NULL
THEN 'TMP1.A = TMP2.A AND TMP1.B = TMP2.B AND TMP1.C = TMP2.C'
WHEN TMP1.A IS NULL
THEN 'TMP1.A = MIN(TMP2.A) OVER (PARTITION BY TMP2.B, TMP2.C)
AND TMP1.B = TMP2.B
AND TMP1.C = TMP2.C'
END
I don't know if the single quotes are typos or not. If they are intentional, then they are just strings and have nothing to do with the logic being processed (and will generate a syntax error without a comparison of some sort after the case expression)
Assuming the strings represent the logic you do want, you can express the logic without a case expression, using just boolean logic:
SELECT TMP1.*, TMP2.D
FROM TMP1 LEFT JOIN
TMP2
ON (TMP1.A IS NOT NULL AND TMP1.A = TMP2.A AND TMP1.B = TMP2.B AND
TMP1.C = TMP2.C
) OR
(TMP1.A IS NULL AND TMP1.A = MIN(TMP2.A) OVER (PARTITION BY TMP2.B, TMP2.C) AND
TMP1.B = TMP2.B AND TMP1.C = TMP2.C
)
I will don't think this will work, because the window function is not allowed in an ON clause. Perhaps this expresses what you intend:
SELECT TMP1.*, TMP2.D
FROM TMP1 LEFT JOIN
(SELECT TMP2.*,
MIN(TMP2.A) OVER (PARTITION BY TMP2.B, TMP2.C) as MIN_A
FROM TMP2
) TMP2
ON (TMP1.A IS NOT NULL AND TMP1.A = TMP2.A AND TMP1.B = TMP2.B AND
TMP1.C = TMP2.C
) OR
(TMP1.A IS NULL AND TMP1.A = TMP2.MIN_A AND
TMP1.B = TMP2.B AND TMP1.C = TMP2.C
)
Of course, the second part of the boolean expression will always be treated as FALSE (technically it evaluates as NULL) because TMP1.A is NULL and yet you are trying to compare it to something else.
These are very arcane conditions. I wonder if they are really needed. Alternative ways to implement the logic are appropriate for another question, with a better explanation of what you are doing, along with sample data and desired results.
Try this query. I found min(a) for each b and c combinations using correlated subquery, then joined with tmp2 to get final result
select
t.a, t.b, t.c, q.d
from (
select
a = isnull(a.a, (select min(b.a) from tmp2 b where a.b = b.b and a.c = b.c))
, a.b, a.c
from
tmp1 a
) t join tmp2 q on t.a = q.a and t.b = q.b and t.c = q.c

sql function case returns more than one row

Going to use this query as a subquery, the problem is it returns many rows of duplicates. Tried to use COUNT() instead of exists, but it still returns a multiple answer.
Every table can only contain one record of superRef.
The below query I`ll use in SELECT col_a, [the CASE] From MyTable
SELECT CASE
WHEN
EXISTS (SELECT 1 FROM A WHERE
A_superRef = myTable.sysno AND A_specAttr = 'value')
THEN 3
WHEN EXISTS (SELECT 1 FROM B
INNER JOIN С ON С_ReferenceForB = B_sysNo WHERE C_superRef = myTable.sysno AND b_type = 2)
THEN 2
ELSE (SELECT C_intType FROM C
WHERE C_superRef = myTable.sysno)
END
FROM A, B, C
result:
3
3
3
3
3
3...
What if you did this? Because Im guessing you are getting an implicit full outer join A X B X C then running the case statement for each row in that result set.
SELECT CASE
WHEN
EXISTS (SELECT 1 FROM A WHERE
A_superRef = 1000001838012)
THEN 3
WHEN EXISTS (SELECT 1 FROM B
INNER JOIN С ON С_ReferenceForB = B_sysNo AND C_superRef = 1000001838012 )
THEN 2
ELSE (SELECT C_type FROM C
WHERE C_superRef = 1000001838012)
END
FROM ( SELECT COUNT(*) FROM A ) --This is a hack but should work in ANSI sql.
--Your milage my vary with different RDBMS flavors.
DUAL is what I needed, thanks to Thorsten Kettner
SELECT CASE
WHEN
EXISTS (SELECT 1 FROM A WHERE
A_superRef = 1000001838012)
THEN 3
WHEN EXISTS (SELECT 1 FROM B
INNER JOIN С ON С_ReferenceForB = B_sysNo AND C_superRef = 1000001838012 )
THEN 2
ELSE (SELECT C_type FROM C
WHERE C_superRef = 1000001838012)
END
FROM DUAL

Running slow with intersect

Trying to optimize a query it is updaing the records in table A based on the INTERSECT on two data sets.
UPDATE #TableA
SET IsFlag = CASE WHEN ISNULL(RJobFlag, 0) > 0 THEN 0 ELSE 1 END
FROM #TableA AS ABC
OUTER APPLY (
SELECT 1 RJobFlag
WHERE EXISTS (
SELECT ABC.COLUMN1,ABC.COLUMN2,ABC.COLUMN3,ABC.COLUMN4,ABC.COLUMN5,ABC.COLUMN6,ABC.COLUMN7,ABC.COLUMN8,ABC.COLUMN8,ABC.COLUMN9,ABC.COLUMN10,StudentID,SubjectID
INTERSECT
SELECT XYZ.COLUMN1,XYZ.COLUMN2,XYZ.COLUMN3,XYZ.COLUMN4,XYZ.COLUMN5,XYZ.COLUMN6,XYZ.COLUMN7,XYZ.COLUMN8,XYZ.COLUMN8,XYZ.COLUMN9,XYZ.COLUMN10,StudentID,SubjectID
FROM #TableB AS XYZ
WHERE XYZ.COLUMN1 = (SELECT DISTINCT ID FROM #TableC MNOP WHERE MNOP.StudentID = ABC.StudentID)
AND StudentID = ABC.StudentID
AND SubjectID = ABC.SubjectID )
) Subquery
WHERE ABC.COLUMN1= '2'
Appretiated if you have some ideas to better optimize it.
Thanks
This might be worse never know.
UPDATE tA
SET IsFlag = COALESCE(RJobFlag, 0)
FROM #TableA tA
LEFT JOIN ( SELECT 1 RJobFlag, *
FROM #TableB tB
WHERE EXISTS ( SELECT *
FROM #TableC tC
WHERE tC.ID = tB.COLUMN1 AND tC.StudentID = tB.StudentID)
) tB
ON tA.StudentID = tB.StudentID
AND tA.SubjectID = tB.SubjectID
AND tA.COLUMN1 = tB.COLUMN1
AND tA.COLUMN2 = tB.COLUMN2
AND tA.COLUMN3 = tB.COLUMN3
AND tA.COLUMN4 = tB.COLUMN4
AND tA.COLUMN5 = tB.COLUMN5
AND tA.COLUMN6 = tB.COLUMN6
AND tA.COLUMN7 = tB.COLUMN7
AND tA.COLUMN8 = tB.COLUMN8
AND tA.COLUMN9 = tB.COLUMN9
AND tA.COLUMN10 = tB.COLUMN10
If #TableA has a bunch of records and you're using outer apply or outer join every time you update it will be slow since it has to update every single record. maybe there's a way to only update the records that have changed?