Reduce left outer join in SQL query - sql

Updated with sqlfilld SQLfiddle
I have an Oracle query where I need to reduce the number of left outer join to perform efficiently. The current query runs for more than 2 hours and I want to reduce its complexity by reducing the number of join operations.
Without the joins, the query runs in 15 minutes. Hence I want to rewrite the logic. Is there any efficient way to do that?
WITH myquery AS
(
SELECT *
FROM TEST_FILE1
)
SELECT
A.Col3, A.Col1, A.Col2, A.Col4, A.Col5
-- D.CB,
-- NVL(D.CD, 0), NVL(D.CE, 0), NVL(D.EF, 0),
,CASE WHEN V1.Col1 IS NULL THEN 0 ELSE 1 END AS QQ1
,CASE WHEN V2.Col3 IS NULL THEN 0 ELSE 1 END AS QQ2
,CASE WHEN V3.Col1 IS NULL THEN 0 ELSE 1 END AS QQ3
,CASE WHEN V4.Col3 IS NULL THEN 0 ELSE 1 END AS QQ4
, case when V5.Col1 is NULL then 0 else 1 end as QQ5
, case when V6.Col3 is NULL then 0 else 1 end as QQ6
, case when V7.Col1 is NULL then 0 else 1 end as QQ7
, case when V8.Col3 is NULL then 0 else 1 end as QQ8
FROM (
SELECT Col3, Col1, Col2, Col4, Col5
FROM (
SELECT distinct Col3
FROM myquery
) A1
CROSS JOIN (
SELECT distinct Col1
FROM myquery
) A2
CROSS JOIN (
SELECT distinct Col2
FROM myquery
) A3
CROSS JOIN (
SELECT distinct Col4
FROM myquery
) A4
CROSS JOIN (
SELECT distinct Col5
FROM myquery
) A5
WHERE Col3 = 42
) A
LEFT JOIN myquery D on NVL(D.Col3, '-') = NVL(A.Col3, '-') AND NVL(D.Col1, '-') = NVL(A.Col1, '-')
AND NVL(D.Col2, '-') = NVL(A.Col2, '-') AND NVL(D.Col4, '-') = NVL(A.Col4, '-') AND NVL(D.Col5,
'-') = NVL(A.Col5, '-')
LEFT JOIN (
SELECT distinct Col1, Col3, Col5
FROM myquery
) V1 on V1.Col1 = A.Col1 AND V1.Col3 = A.Col3 AND V1.Col5 = A.Col5
LEFT JOIN (
SELECT distinct Col3, Col5, Col2
FROM myquery
) V2 on V2.Col3 = A.Col3 AND V2.Col5 = A.Col5 AND V2.Col2 = A.Col2
LEFT JOIN (
SELECT distinct Col3, Col5, Col1, Col2
FROM myquery
) V3 on V3.Col3 = A.Col3 AND V3.Col5 = A.Col5 AND V3.Col1 = A.Col1 AND V3.Col2 = A.Col2
LEFT JOIN (
SELECT distinct Col3, Col5, Col2
FROM myquery
WHERE Col1 in ('Bert','Myra')
) V4 on V4.Col3 = A.Col3 AND V4.Col5 = A.Col5 AND V4.Col2 = A.Col2
LEFT JOIN (
SELECT distinct Col1, Col3
FROM myquery
) V5 on V5.Col1 = A.Col1 AND V5.Col3 = A.Col3
LEFT JOIN (
SELECT distinct Col3, Col2
FROM myquery
) V6 on V6.Col3 = A.Col3 AND V6.Col2 = A.Col2
LEFT JOIN (
SELECT distinct Col3, Col1, Col2
FROM myquery
) V7 on V7.Col3 = A.Col3 AND V7.Col1 = A.Col1 AND V7.Col2 = A.Col2
LEFT JOIN (
SELECT distinct Col3, Col2
FROM myquery
WHERE Col1 in ('Bert','Myra')
) V8 on V8.Col3 = A.Col3 AND V8.Col2 = A.Col2
So far I was thinking of using analytical window function but didn't get the desired output .Any leads will be highly appreciated.
Here is my input data of test_file table
+------+------+------+------+------+
| COL1 | COL2 | COL3 | COL4 | COL5 |
+------+------+------+------+------+
| Bert | "M" | 42 | 68 | 166 |
| Carl | "M" | 32 | 70 | 155 |
| Dave | "M" | 39 | 72 | 167 |
| Elly | "F" | 30 | 66 | 124 |
| Fran | "F" | 33 | 66 | 115 |
| Hank | "M" | 30 | 71 | 158 |
| Jake | "M" | 32 | 69 | 143 |
| Luke | "M" | 34 | 72 | 163 |
| Neil | "M" | 36 | 75 | 160 |
| Page | "F" | 31 | 67 | 135 |
| Alex | "M" | 41 | 74 | 170 |
| Gwen | "F" | 26 | 64 | 121 |
| Ivan | "M" | 53 | 72 | 175 |
| Kate | "F" | 47 | 69 | 139 |
| Myra | "F" | 23 | 62 | 98 |
| Omar | "M" | 38 | 70 | 145 |
| Quin | "M" | 29 | 71 | 176 |
| Ruth | "F" | 28 | 65 | 131 |
+------+------+------+------+------+
From this table I want to create every posssible combination by taking distinct values of each column by applying cross join . It will produce 7776 records with my filter on col1=42 . because I want all possible combination for this column only.
With this combination I want to check which all column combinatios are null using many combinations of left outer joins.
Output(partial):
+------+------+------+------+------+-----+-----+-----+-----+-----+-----+-----+-----+
| COL3 | COL1 | COL2 | COL4 | COL5 | QQ1 | QQ2 | QQ3 | QQ4 | QQ5 | QQ6 | QQ7 | QQ8 |
+------+------+------+------+------+-----+-----+-----+-----+-----+-----+-----+-----+
| 42 | Page | "F" | 68 | 176 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 42 | Alex | "F" | 62 | 143 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 42 | Fran | "M" | 66 | 175 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 |
| 42 | Omar | "F" | 70 | 176 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 42 | Elly | "M" | 72 | 124 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 |
| 42 | Quin | "M" | 64 | 160 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 |
| 42 | Omar | "M" | 64 | 158 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 |
| 42 | Kate | "F" | 62 | 176 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 42 | Neil | "F" | 69 | 145 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 42 | Dave | "F" | 62 | 163 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 42 | Ruth | "M" | 70 | 115 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 |
| 42 | Bert | "M" | 65 | 121 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 |
| 42 | Bert | "M" | 72 | 145 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 |
| 42 | Omar | "M" | 62 | 158 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 |
| 42 | Ruth | "M" | 75 | 131 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 |
+------+------+------+------+------+-----+-----+-----+-----+-----+-----+-----+-----+

When checking whether data exists in a table we use EXISTS or IN, not JOIN (SELECT DISTINCT ...). Hence this is the query I'd probably come up with:
WITH myquery AS
(
SELECT * FROM TEST_FILE1
)
, a as
(
select col1, col2, 42 as col3, col4, col5
from
(
(select distinct col1 from myquery)
cross join
(select distinct col2 from myquery)
cross join
(select distinct col4 from myquery)
cross join
(select distinct col5 from myquery)
)
)
select
a.col1, a.col2, a.col3, a.col4, a.col5,
case when (col1, col3, col5) in (select col1, col3, col5 from myquery ) then 1 else 0 end as v1,
case when (col2, col3, col5) in (select col2, col3, col5 from myquery ) then 1 else 0 end as v2,
case when (col1, col2, col3, col5) in (select col1, col2, col3, col5 from myquery ) then 1 else 0 end as v3,
case when (col2, col3, col5) in (select col2, col3, col5 from myquery where col1 in ('Bert', 'Myra')) then 1 else 0 end as v4,
case when (col1, col3) in (select col1, col3 from myquery ) then 1 else 0 end as v5,
case when (col2, col3) in (select col2, col3 from myquery ) then 1 else 0 end as v6,
case when (col1, col2, col3) in (select col1, col2, col3 from myquery ) then 1 else 0 end as v7,
case when (col2, col3) in (select col2, col3 from myquery where col1 in ('Bert', 'Myra')) then 1 else 0 end as v8
from a
order by a.col1, a.col2, a.col3, a.col4, a.col5;
If your real query here: WITH myquery AS (...) is more than just a mere SELECT * FROM TEST_FILE1, you may want to use the /*+MATERIALIZE*/ hint here in order to speed up access to it.

Related

Round down to nearest of Multiple of N

I have sql table as follows
+-----------------------------+
| |col1 | col2 | col3| col4| |
+-----------------------------+
| _______________________ |
| | a | 3 | d1 | 10 | |
| | a | 6 | d2 | 15 | |
| | b | 2 | d2 | 8 | |
| | b | 30 | d1 | 50 | |
+-----------------------------+
I would like transform the above table into below, where the transformation is
col4 = col4 - (col4 % min(col2) group by col1)
+------------------------------+
| |col1 | col2 | col3| col4| |
+------------------------------+
| ____________________________ |
| |a | 3 | d1 | 9 | |
| |a | 6 | d2 | 15 | |
| |b | 2 | d2 | 8 | |
| |b | 30 | d1 | 50 | |
| |
+------------------------------+
I could read the above table in application code to do transformation manually, was wondering if it was possible to offload the transformation to sql
Just run a simple select query for this:
select col1, col2, col3,
col4 - (col4 % min(col2) over (partition by col1))
from t;
There is no need to actually modify the table.
You can use a multi-table UPDATE to achieve your desired result, joining your table to a table of MIN(col2) values:
UPDATE table1
SET col4 = col4 - (col4 % t2.col2min)
FROM (SELECT col1, MIN(col2) AS col2min
FROM table1
GROUP BY col1) t2
WHERE table1.col1 = t2.col1
Output:
col1 col2 col3 col4
a 3 d1 9
a 6 d2 15
b 2 d2 8
b 30 d1 50
Demo on dbfiddle

How to select non empty value for columns from each group?

I have the following table.
| col1 | col2 | col3 | col4 |
| ---- | ---- | ---- | ---- |
| 123 | 345 | LA | 001 |
| 123 | 345 | AB | |
| 123 | 345 | LA | |
| 123 | 345 | | |
| abc | cde | LA | |
| abc | cde | | |
| abc | cde | AB | |
| abc | cde | LA | |
| ooo | zzz | LA | |
| ooo | zzz | LA | 001 |
| ooo | zzz | LA | |
| ooo | zzz | LA | 001 |
| 12 | 35 | LA | |
| 12 | 35 | LA | |
| 12 | 35 | LA | |
| 12 | 35 | LA | |
I would like to select for each group of col1 & col2, col3 must be AB if it is present in the group, else blank, and col4 should be the non-blank if non-blank value is present, else it should be blank. col4 for each group will either have a unique non-blank value or blank.
Desired output:
| col1 | col2 | col3 | col4 |
|------|------|------|------|
| 123 | 345 | AB | 001 |
| abc | cde | AB | |
| ooo | zzz | | 001 |
| 12 | 35 | | |
Desired code:
select col1, col2,
(AB if AB present else blank) as col3,
(non-blank value if non-blank is present else blank) as col4
from test
group by col1, col2;
What I have tried so far:
select col1, col2,
max(col4) as col4
from test
group by col1, col2;
I don't know how to do it for col3
SQL Fiddle: https://www.db-fiddle.com/f/dtEiDdu6Arn6VQTGwMXm4q/0
NOTE:
I require a vendor agnostic solution. I think an RDBMS specific solution might work in the platform I use, but I would have to test out that answer/solution and get back to you though.
I am not querying from the source, but from a grid, so I am not using an RDBMS at the backend.
For the column col3 you need a CASE statement and for the column col4 simply MAX() (or MIN()):
select
col1,
col2,
case when sum(case when col3 = 'AB' then 1 else 0 end) > 0 then 'AB' else null end col3,
max(col4) col4
from test
group by col1, col2;
See the demo.
Results:
| col1 | col2 | col3 | col4 |
| ---- | ---- | ---- | ---- |
| 12 | 35 | | |
| 123 | 345 | AB | 001 |
| abc | cde | AB | |
| ooo | zzz | | 001 |
This is a prioritization query. row_number() is often used:
select col1, col2, (case when col3 = 'AB' then col3 end) as col3,
col4
from (select t.*,
row_number() over (partition by col1, col2
order by (case when col3 = 'AB' then 1 else 2 end),
(case when col4 <> '' then 1 else 2 end)
) as seqnum
from t
) t
where seqnum = 1;
Check This-
SELECT B.col1,B.col2,B.N_Col3,MAX(B.col4)
FROM
(
SELECT your_table.*,A.col3 N_Col3
FROM your_table
LEFT JOIN
(
SELECT * FROM your_table
WHERE col3 = 'AB'
)A
ON A.col1 = CTE.col1 AND A.col2 = CTE.col2
)B
GROUP BY B.col1,B.col2,B.N_Col3
ORDER BY 3 DESC
select col1, col2, max(col3) col3, max(col4) col4
from
(
select col1, col2, case when col3 = 'AB' THEN 'AB' ELSE ' ' END col3, max(COL4) col4
from #table
group by col1, col2, case when col3 = 'AB' THEN 'AB' ELSE ' ' END
) A
group by col1, col2

How to return values from same table?

I've two tables A and B. I want to return all records from A and only matching from B. I can use left join for this. But after joining, I want to return records based on a flag in the same table.
Table A:
| Col1 | Col2 |
|------|------|
| 123 | 12 |
| 456 | 34 |
| 789 | 56 |
Table B:
| Col1 | Col2 | Col3 | Col4 | Col5 |
|------|------|------|------|------|
| 123 | 12 | NULL | I | 1 |
| 456 | 34 | NULL | E | 1 |
| 111 | 98 | NULL | I | 1 |
| 222 | 99 | NULL | E | 1 |
| 123 | 12 | AB | NULL | 2 |
| 456 | 34 | CD | NULL | 2 |
| 123 | 12 | EF | NULL | 2 |
| 111 | 98 | GH | NULL | 2 |
| 222 | 99 | IJ | NULL | 2 |
After left joining A and B this how the result will look like:
| Col1 | Col2 | Col3 | Col4 | Col5 |
|------|------|------|------|------|
| 123 | 12 | NULL | I | 1 |
| 456 | 34 | NULL | E | 1 |
| 123 | 12 | AB | NULL | 2 |
| 456 | 34 | CD | NULL | 2 |
| 123 | 12 | EF | NULL | 2 |
| 789 | 56 | NULL | NULL | NULL |
1 and 2 values in Col5 tells if Col4 should be populated or Col3. 1 for Col4 and 2 for Col3.
I want to return all the records for 'I'(but excluding the record which has 'I') in Col4 which will look like this:
| Col1 | Col2 | Col3 | Col4 | Col5 |
|------|------|------|--------|------|
| 123 | 12 | AB | (null) | 2 |
| 123 | 12 | EF | (null) | 2 |
I also want to return records for 'E' (again excluding the record which has 'E') in col4 but for all the values other than one in Col3. In this case CD. Which would look like this:
| Col1 | Col2 | Col3 | Col4 | Col5 |
|------|------|------|--------|------|
| 456 | 34 | AB | (null) | 2 |
| 456 | 34 | EF | (null) | 2 |
| 456 | 34 | GH | (null) | 2 |
| 456 | 34 | IJ | (null) | 2 |
Can someone suggest how to handle this in SQL?
Ok I believe the following two queries achieve your desired results. You can see all the sample code via the following SQL Fiddle.
Existence Rule:
select A.*
, B.Col3
, B.Col4
, B.Col5
from TableA A
JOIN TableB B
on A.Col1 = B.Col1
and A.Col2 = B.Col2
and B.Col5 = 2
where exists (select 1 from TableB C
where C.col1 = B.col1 and C.col2 = B.col2
and c.col4 = 'I' AND C.col5 = 1)
Results:
| Col1 | Col2 | Col3 | Col4 | Col5 |
|------|------|------|--------|------|
| 123 | 12 | AB | (null) | 2 |
| 123 | 12 | EF | (null) | 2 |
Exclusion Rule:
select A.*
, B.Col3
, B.Col4
, B.Col5
from TableA A
CROSS JOIN TableB B
where b.col5 = 2
and exists (select 1 from TableB C
where C.col1 = a.col1 and C.col2 = a.col2
and c.col4 = 'E' AND C.col5 = 1)
and b.col3 not in (select col3 from TableB b
where b.col1 = a.col1 and b.col2 = a.col2 and b.col5 = 2)
Results:
| Col1 | Col2 | Col3 | Col4 | Col5 |
|------|------|------|--------|------|
| 456 | 34 | AB | (null) | 2 |
| 456 | 34 | EF | (null) | 2 |
| 456 | 34 | GH | (null) | 2 |
| 456 | 34 | IJ | (null) | 2 |
Result for I:-
;with cte1 As(select a.col1,a.col2 from A a left join B b on a.col1 =b.col2 and a.col2=b.col2 where b.col4 = 'I'),cte2 As(select b.col3,b.col4,b.col5 from from A a left join B b on a.col1 =b.col2 and a.col2=b.col2 where b.col4 <> 'I')
Result for E:-
select a.col1,a.col2,b.col3,b.col4,b.col5 from cte1 a cross join cte2 b
;with cte1 As(select a.col1,a.col2 from A a left join B b on a.col1 =b.col2 and a.col2=b.col2 where b.col4 = 'E'),cte2 As(select b.col3,b.col4,b.col5 from from A a left join B b on a.col1 =b.col2 and a.col2=b.col2 where b.col4 <> 'E')
select a.col1,a.col2,b.col3,b.col4,b.col5 from cte1 a cross join cte2 b
select c.col1, c.col2
from
(select a.col1, a.col2, b.col3 from a inner join table b on a.id = b.id
where "condition" ) c
where c.col1 = "condition"
This is the script. The explanation is:
Inside the () i wrote the first select. There, you will do the select with your joins and your conditions. At the end of the select i wrote "c" which is the name of the table generated from the sub-select.
Then, you'll select some values from the generated table and filter them with a where that will act on the results generated by the table created with the sub-select
EDIT: I used your question's names to make it easier

Sum cases that meet multiple criteria (pandas)

I need to find the sum of cases in col2 where for each set in col1 (ABC), the col2 value has a Y in col3 100% of the time. In this case, B1 &
D1 meet this criteria, so N=2. Support in pandas or SQL are helpful (both are ideal).
| col1 | col2 | col3 | col4 | col5 |
|------|-------|-------|-------|-------|
| A | A1 | N | 1 | 256 |
| A | B1 | Y | 2 | 3 |
| A | C1 | N | 3 | 323 |
| B | F1 | N | 1 | 89 |
| B | B1 | Y | 2 | 256 |
| C | D1 | Y | 1 | 3 |
| D | A1 | N | 1 | 32 |
| D | C1 | Y | 2 | 893 |
Something like this in python pandas
df.groupby('col2').col3.apply(lambda x : sum(x=='Y')==x.count()).sum()
Out[568]: 2
More detail :
df.groupby('col2').col3.apply(lambda x : sum(x=='Y')==x.count())
Out[569]:
col2
A1 False
B1 True
C1 False
D1 True
F1 False
Name: col3, dtype: bool
I don't see what col1 has to do with this. You can do this with a SQL query:
select count(*)
from (select col2
from t
where min(col3) = max(col3) and min(col3) = 'Y'
) t;

remove null values and merge sql server 2008 r2

I have a table (TestTable) as follows
PK | COL1 | COL2 | COL3
1 | 3 | NULL | NULL
2 | 3 | 43 | 1.5
3 | 4 | NULL | NULL
4 | 4 | NULL | NULL
5 | 4 | 48 | 10.5
6 | NULL | NULL | NULL
7 | NULL | NULL | NULL
8 | NULL | NULL | NULL
9 | 5 | NULL | NULL
10 | 5 | NULL | NULL
11 | 5 | 55 | 95
I would like a result as follows
PK | COL1 | COL2 | COL3
1 | 3 | 43 | 1.5
2 | 4 | 48 | 10.5
3 | 5 | 55 | 95
You can do this, But it won't give you a serial number for the PK:
SELECT
PK,
MAX(Col1) AS Col1,
MAX(Col2) AS Col2,
MAX(Col3) AS Col3
FROM TestTable
WHERE Col1 IS NOT NULL
AND Col2 IS NOT NULL
AND COL3 IS NOT NULL
GROUP BY PK;
| PK | COL1 | COL2 | COL3 |
|----|------|------|------|
| 2 | 3 | 43 | 1.5 |
| 5 | 4 | 48 | 10.5 |
| 11 | 5 | 55 | 95 |
If you want to generate a rownumber for the column pk, you can do this:
WITH CTE
AS
(
SELECT
PK,
MAX(Col1) AS Col1,
MAX(Col2) AS Col2,
MAX(Col3) AS Col3
FROM TestTable
WHERE Col1 IS NOT NULL
AND Col2 IS NOT NULL
AND COL3 IS NOT NULL
GROUP BY PK
), Ranked
AS
(
SELECT *, ROW_NUMBER() OVER(ORDER BY PK) AS RN
FROM CTE;
)
SELECT RN AS PK, Col1, COL2, COL3 FROM Ranked
SQL Fiddle Demo
This will give you:
| PK | COL1 | COL2 | COL3 |
|----|------|------|------|
| 1 | 3 | 43 | 1.5 |
| 2 | 4 | 48 | 10.5 |
| 3 | 5 | 55 | 95 |
This can be obtained in two steps like so:
1st step: Get rid of unnecessary rows:
delete from testTable
where Col1 is null
or Col2 is null
or Col3 is null
2nd step: Set the correck PK values using a CTE (update test table):
;with sanitizeCTE
as(
select ROW_NUMBER() over (order by PK) as PK,
Col1, Col2, Col3
from testTable
)
update t
set t.PK = CTE.PK
from testTable t
join sanitizeCTE cte
on t.Col1 = cte.Col1
and t.Col2 = cte.Col2
and t.Col3 = cte.Col3
Tested here: http://sqlfiddle.com/#!3/91e86/1