Pivoting data to find unique values for specific columns - sql

I have a table:
ID
col1
col2
col3
col4
col5
1
Y
G
CA
DA
EA
2
Y
G
CA
DA
EA
3
Y
G
CA
DA
EA
1
X
Q
RA
DA
EA
2
X
Q
CA
DA
EA
For col1 and col2, I want to create 4 additional columns. Two for the distinct values for col1 (X,Y) and two for the distinct values for col2 (G,Q).
For example, I want to create a new column (col1_x) to display 'Y' if col1 have X. Basically for each ID, col3, col4 and col5 combination - I want to show if col1 have X , col1 have Y, col2 have G and col2 have Q. Each as an individual column. How do I do that? I think I'll need a pivot/group by as I would want to eliminate duplicated rows of data.
Desired output:
ID
col3
col4
col5
col1_X
col1_Y
col2_G
col2_Q
1
CA
DA
EA
Y
Y
Y
Y
1
RA
DA
EA
Y
Y
Y
Y
2
CA
DA
EA
Y
Y
Y
Y
3
CA
DA
EA
N
Y
Y
N

Test data
drop table if exists #test;
drop table if exists #result;
create table #test (
[ID] int,
[col1] varchar(1),
[col2] varchar(1),
[col3] varchar(2),
[col4] varchar(2),
[col5] varchar(2));
insert into #test ([ID], [col1], [col2], [col3], [col4], [col5])
values
(1, 'Y', 'G', 'CA', 'DA', 'EA'),
(2, 'Y', 'G', 'CA', 'DA', 'EA'),
(3, 'Y', 'Q', 'CA', 'DA', 'EA'),
(1, 'X', 'G', 'RA', 'DA', 'EA'),
(2, 'X', 'G', 'CA', 'DA', 'EA'),
(3, 'X', 'Q', 'CA', 'DA', 'EA');
1) Pivot solution
Here is a pivot approach - it is not purely dynamic as you have to provide "column names" in the select which are the values you are looking for (X/Y and G/Q) - it needs to be done twice, once for each column you are interested in (col1 and col2) so it ends up looking more complex than might be hoped for, but it is is really one simple pivot, done twice, then cleaned up per your desired output. It seems like a 'trick' to me, still - after all these years! More info on PIVOT here: Using PIVOT and UNPIVOT.
select
a.ID, a.col3, a.col4, a.col5,
case when cast(a.[X] as bit) = 1 then 'Y' else 'N' end as Col1_X,
case when cast(a.[Y] as bit) = 1 then 'Y' else 'N' end as col1_Y,
case when cast(b.[G] as bit) = 1 then 'Y' else 'N' end as col2_G,
case when cast(b.[Q] as bit) = 1 then 'Y' else 'N' end as col2_Q
from
(
select
ID, col3, col4, col5, [X], [Y]
from #test
pivot(
Count(col1) for [col1] in ([X], [Y])
) as tmp
) a
inner join
(
select
ID, col3, col4, col5, [G], [Q]
from #test
pivot(
Count(col1) for [col2] in ([G], [Q])
) as tmp
) b
on a.ID = b.ID
and a.col3 = b.col3
and a.col4 = b.col4
and a.col5 = b.col5
2) Naive solution
I created a "naive" solution that I think is not really so bad and has the benefit of being understandable to anyone with a little sql skill, whereas it is a little obscure how pivot is working in a case like this (where we are not "really" aggregating information such as sums, averages, or real counts).
-- create the output table
select
distinct ID, col3, col4, col5,
'N' as col1_X, 'N' as col1_Y, 'N' as col2_G, 'N' as col2_Q
into #result
from #Test;
-- update the results
update t1
set t1.col1_X = 'Y'
from #result t1
where exists (select * from #test t2
where t2.ID = t1.ID
and t2.col3 = t1.col3
and t2.col4 = t1.col4
and t2.col5 = t1.col5
and t2.col1 = 'X');
update t1
set t1.col1_Y = 'Y'
from #result t1
where exists (select * from #test t2
where t2.ID = t1.ID
and t2.col3 = t1.col3
and t2.col4 = t1.col4
and t2.col5 = t1.col5
and t2.col1 = 'Y');
update t1
set t1.col2_G = 'Y'
from #result t1
where exists (select * from #test t2
where t2.ID = t1.ID
and t2.col3 = t1.col3
and t2.col4 = t1.col4
and t2.col5 = t1.col5
and t2.col2 = 'G');
update t1
set t1.col2_Q = 'Y'
from #result t1
where exists (select * from #test t2
where t2.ID = t1.ID
and t2.col3 = t1.col3
and t2.col4 = t1.col4
and t2.col5 = t1.col5
and t2.col2 = 'Q');
select * from #result;
Result in both cases:
ID
col3
col4
col5
col1_X
col1_Y
col2_G
col2_Q
1
CA
DA
EA
N
Y
Y
N
1
RA
DA
EA
Y
N
Y
N
2
CA
DA
EA
Y
Y
Y
N
3
CA
DA
EA
Y
Y
N
Y

select col3, col4, col5,
max(col1_X),
max(col1_Y),
max(col2_G),
max(col2_Q)
from
(select col3, col4, col5,
case when col1 = 'X' then 'Y' else 'N' end as col1_X,
case when col1 = 'Y' then 'Y' else 'N' end as col1_Y,
case when col2 = 'G' then 'Y' else 'N' end as col2_G,
case when col2 = 'Q' then 'Y' else 'N' end as col1_Q
From table) query
Group by col3, col4, col5

Related

Perform column search on next table if record not found in previous table in db2 sql query

I have three tables with same columns
table 1
col1 col2 col3 col4
table 2
col1 col2 col3 col4
table 3
col1 col2 col3 col4
I have to perform a search if record is not found on table1 then only go to search in table2 and if not found in table2 then go to table3. But if record found in any of these table then perform some calculation on col4 and return col4 without execution further. I am using DB2 but not able to find
the exact solution . How can i achieve this ?.
If you want to keep this as one query, you can use UNION ALL to get the correct table:
SELECT col4, 1 as SortCol
FROM Table1
WHERE col1 = 'whatever'
UNION ALL
SELECT col4, 2 as SortCol
FROM Table2
WHERE col1 = 'whatever'
UNION ALL
SELECT col4, 3 as SortCol
FROM Table3
WHERE col1 = 'whatever'
ORDER BY SortCol
FETCH 1 ROW ONLY;
EDIT
Another method is possible. I must say, I'm unsure coming from SQL Server the exact syntax, but it would be something like this:
SELECT COALESCE(t1.col4, t2.col4, t3.col4)
FROM (VALUES (#col1, #col2) ) v(col1, col2)
LEFT JOIN Table1 t1 ON t1.col1 = v.col1 AND t1.col2 = v.col2
LEFT JOIN Table2 t2 ON t2.col1 = v.col1 AND t2.col2 = v.col2
AND t1.col4 IS NULL
LEFT JOIN Table3 t3 ON t3.col1 = v.col1 AND t3.col2 = v.col2
AND t1.col4 IS NULL AND t2.col4 IS NULL;
The idea being to use the VALUES clause (or a SELECT with no FROM) as a driving row.
You can try this:
select col1 col2 col3 col4
from (
select col1 col2 col3 col4, 1 as lvl
from table_1
where some_condition
union all
select col1 col2 col3 col4, 2 as lvl
from table_2
where some_condition
union all
select col1 col2 col3 col4, 3 as lvl
from table_3
where some_condition) as t
order by lvl
limit 1
Similar to what the other posters suggest, you could use code like this if you explicitly want to follow your "if then" logic
CREATE TABLE TABLE_1(C1 INT, C2 INT, C3 INT, C4 INT)
CREATE TABLE TABLE_2(C1 INT, C2 INT, C3 INT, C4 INT)
CREATE TABLE TABLE_3(C1 INT, C2 INT, C3 INT, C4 INT)
WITH
C4(C1, C2, C3, C4) AS (VALUES (1,2,3,4))
, T1 AS ( SELECT '1' AS LVL, * FROM TABLE_1 JOIN C4 USING (C1, C2, C3, C4) )
, T2 AS ( SELECT '2' AS LVL, * FROM TABLE_2 JOIN C4 USING (C1, C2, C3, C4) WHERE NOT EXISTS (SELECT 1 FROM T1))
, T3 AS ( SELECT '3' AS LVL, * FROM TABLE_3 JOIN C4 USING (C1, C2, C3, C4) WHERE NOT EXISTS (SELECT 1 FROM T2))
, T4 AS ( SELECT '4' AS LVL, * FROM C4 WHERE NOT EXISTS (SELECT 1 FROM T3))
SELECT * FROM T1 UNION ALL
SELECT * FROM T2 UNION ALL
SELECT * FROM T3 UNION ALL
SELECT * FROM T4

SQL: Ignore Condition in WHERE clause

I am confused on how to exclude one of the conditions in WHERE clause if it evaluates to NULL
SELECT TB1.COL3
FROM TB1, TB2
WHERE TB1.COL1 = TB2.Col1
AND TB2.COL1 = '12345'
AND (TB2.COL3 = (SELECT MIN(TB3.COL4)
FROM TB3
WHERE COL1 = TB2.COL1
AND COL2 in ('A', 'B')
AND COL4 IN (SELECT COL3 FROM TB4
WHERE COL1 = TB4.COL1)))
AND ROWNUM = 1;
How can I modify the above query to ignore the "AND condition TB2.COL3" if the following condition SELECT MIN(TB3.COL4) FROM TB3 evaluates to some value except NULL. If NULL the above query should be executed as below:
SELECT TB1.COL3
FROM TB1, TB2
WHERE TB1.COL1 = TB2.Col1
AND TB2.COL1 = '12345'
AND ROWNUM = 1;
I think you could pull this off with a coalesce:
SELECT TB1.COL3
FROM TB1, TB2
WHERE TB1.COL1 = TB2.Col1
AND TB2.COL1 = '12345'
AND (TB2.COL3 = COALESCE((SELECT MIN(TB3.COL4)
FROM TB3
WHERE COL1 = TB2.COL1
AND COL2 in ('A', 'B')
AND COL4 IN (SELECT COL3 FROM TB2
WHERE COL1 = TB2.COL1)), TB2.COL3))
AND ROWNUM = 1;
Here we are just wrapping that SELECT statement in COALESCE which, if it returns null, will grab the second parameter's value instead, which is the same column we are comparing. Since TB2.COL3 = TB2.COL3 is obviously TRUE then this filter will not have any impact on the result set.
Also, it's generally recommended that you stop using comma's in your FROM clause and use proper JOIN syntax. It's been around for over 2 decades now:
SELECT
TB1.COL3
FROM
TB1
INNER JOIN TB2
ON TB1.COL1 = TB2.Col1
WHERE
TB2.COL1 = '12345'
AND
(
TB2.COL3 = COALESCE
(
(
SELECT
MIN(TB3.COL4)
FROM
TB3
WHERE
COL1 = TB2.COL1
AND COL2 in ('A', 'B')
AND COL4 IN
(
SELECT
COL3
FROM
TB2
WHERE
COL1 = TB2.COL1
)
),
TB2.COL3
)
)
AND ROWNUM = 1;

Need to loop or iterate over a result set

I have two columns Col1, Col2 from Table1 in database DB1. Col1 contains data and Col2 is all null. I have a column Col3 which comes from a query on Table2 in another database DB2. I want to set Col2 based on the following condition -
If a row of Col1 EQUAL TO any of the rows in Col3, then set Col2 EQUAL TO row of Col3. ELSE, set it EQUAL TO Col1. Its weird, but I need to do it.
Example -
Col3 is ALWAYS = {Aaa, RTY, Ccc, DART, Car, Fish, SPOON}
Col1,Col2,Col3
Aaa, null, Aaa
Bbb, null, RTY
Ccc, null, Ccc
...
How to make a query which will give us the result below ?
Col1,Col2,Col3
Aaa, Aaa, Aaa
Bbb, Bbb, RTY
Ccc, Ccc, Ccc
...
Assuming that you want to select rather than update - the most obvious solution to this would be in splitting up the problem into its individual bits:
Replace exy with your Col1, Col2, Col3 view/table/alias.
;WITH exy(Col1, Col2, Col3) AS (SELECT 'Aaa', null, 'Aaa' union SELECT 'Bbb', null, 'RTY' union SELECT 'Ccc', null, 'Ccc')
SELECT a.Col1, a.Col3 AS Col2, a.Col3 FROM exy AS a WHERE EXISTS (SELECT 1 FROM exy WHERE Col3 = a.Col3)
UNION
SELECT a.Col1, a.Col1 AS Col2, a.Col3 FROM exy AS a WHERE NOT EXISTS (SELECT 1 FROM exy WHERE Col3 = a.Col3)
Then you can simplify it down a little:
;WITH exy(Col1, Col2, Col3) AS (SELECT 'Aaa', null, 'Aaa' union SELECT 'Bbb', null, 'RTY' union SELECT 'Ccc', null, 'Ccc')
SELECT a.Col1, CASE WHEN 1 = EXISTS(SELECT 1 FROM exy WHERE Col3=a.Col3) THEN a.Col3 ELSE a.Col1 END AS Col2, a.Col3 FROM exy
There might be a nicer solution but we'd need to see your original tables and understand your problem. This can also be done with joins. But there is often benefit in clarity over performance. When performance is concerned, you will have to know what your targets are before optimising too early.
Could this work for your situation?
update t1
set col2 = coalesce(a.col3,a.col1)
from database1.dbo.table1 as t1 inner join (
select t1.col1
, t2.col3
from database1.dbo.table1 t1
left outer join (
select distinct col3
from database2.dbo.table2
) as t2 on t1.col1 = t2.col3
) as a on t1.col1 = a.col1
Set col2 to the first non-null value from either col3 or col1 from table 1 left outer joined to the distinct values in table 2, so when there is no match, col3 is null.

SQL Server Query to find different names in two tables

I have a situation here.
I have two tables:
I need a sql query which will print the Col names which are different in two tables.
For example, in this case the query should print the result as:
The reason is clear that m is present in Table-1 but not present in Table-2. Similar is the case with z which is in Table-2 but not in Table-1.
I am really stcuk here, please help.
The colum names are not case-sensitive.
Thanks.
You could also use NOT EXISTS to get the result:
select col1
from table1 t1
where not exists (select 1
from table2 t2
where t1.col1 = t2.col1)
union all
select col1
from table2 t2
where not exists (select 1
from table1 t1
where t1.col1 = t2.col1);
See SQL Fiddle with Demo
Or even NOT IN:
select col1
from table1 t1
where col1 not in (select col1
from table2 t2)
union all
select col1
from table2 t2
where col1 not in (select col1
from table1 t1);
See SQL Fiddle with Demo
Try:
select coalesce(t1.Col1, t2.Col1)
from [Table-1] t1
full outer join [Table-2] t2 on t1.Col1 = t2.Col1
where t1.Col1 is null or t2.Col1 is null
SQLFiddle here.
Alternatively:
select Col1 from
(select Col1 from [Table-1] union all select Col1 from [Table-2]) sq
group by Col1 having count(*) = 1
SQLFiddle here.
I Think simplest one is this
SELECT COL1 AS ResultCol FROM TABLE1 where COL1 not in (select COL2 from TABLE2) UNION SELECT COL2 AS ResultCol FROM TABLE2 where COL2 not in (select COL1 from table1)
declare #tab1 table(id int,col1 varchar(1))
declare #tab2 table(id int,col1 varchar(1))
INSERT INTO #tab1
([id], [Col1])
VALUES
(1, 'A'),
(2, 'B'),
(3, 'm'),
(4, 'c')
INSERT INTO #tab2
([id], [Col1])
VALUES
(1, 'A'),
(2, 'B'),
(3, 'C'),
(4, 'z')
select b.id,b.col1 from
(
select a.id,a.col1,b.col1 x from #tab1 a left join #tab2 b on a.col1 = b.col1
union
select b.id,b.col1,a.col1 x from #tab1 a right join #tab2 b on a.col1 = b.col1
) b
where b.x is null
There's a feature specifically for this operation. EXCEPT and INTERCEPT.
Find which values (single column result or multi-column result) are not present in the following queries
--What's in table A that isn't in table B
SELECT col1 FROM TableA
EXCEPT
SELECT col1 FROM TableB
--What's in table B that isn't in table A
SELECT col1 FROM TableB
EXCEPT
SELECT col1 FROM TableA
Likewise, the INTERCEPT keyword tells you what is shared
--What's in table A and table B
SELECT col1 FROM TableA
INTERCEPT
SELECT col1 FROM TableB
You can also use FULL OUTER JOIN operator.
Visual Representation of SQL Joins
SELECT ROW_NUMBER() OVER(ORDER BY COALESCE(t1.Col1, t2.Col1)) AS id,
COALESCE(t1.Col1, t2.Col1) AS ResultCol
FROM Table1 t1 FULL JOIN Table2 t2 ON t1.Col1 = t2.Col1
WHERE t1.Col1 IS NULL OR t2.Col1 IS NULL
See example on SQLFiddle

SQL Server self join

I have a table as below:
table1
col1 col2 col3
1 A 1
2 B 1
3 A 2
4 D 2
5 X 3
6 G 3
Now can I get the result like below from above table. THe col2 in the below resultset is based on the col3 in table1 above. In above table1 col2, A and B have same id value in COL3 (i.e 1) so in the result set we just separate it in new columns and so on. A and D have same id COL3 (i.e 2) and X and G have same id in COL3 (i.e 3) in above table1. ANyone can write a sql query to get the following result.
col1 col2
A B
A D
X G
SELECT
col1 = t.col2,
col2 = t2.col2
FROM table1 t
INNER JOIN table1 t2 ON t.col3 = t2.col3 AND t.col1 < t2.col1
SELECT
t1.col2 as col1,
t2.col2
FROM Table1 t1
INNER JOIN Table1 t2 on t1.col3 = t2.col3
WHERE t1.col1 > t2.col1
If you are on SQL Server 2005 or later:
WITH ranked AS (
SELECT
*,
rn = ROW_NUMBER() OVER (PARTITION BY col3 ORDER BY col2)
FROM table1
)
SELECT
col1 = r1.col2,
col2 = r2.col2
FROM ranked r1
INNER JOIN ranked r2 ON r1.col3 = r2.col3
WHERE r1.rn = 1
AND r2.rn = 2
select
a.col2 as "col1",
b.col2 as "Col2"
from
table1 a
join table1 b on a.col3 = b.col3
With some assumptions on the table structure, i.e. there exists exactly 2 entries in col3 for every unique value in col3.
DECLARE #table1 TABLE([col1] int, [col2] varchar, [col3] int);
INSERT INTO #table1(col1, col2, col3) VALUES(1, 'A', 1);
INTO #table1(col1, col2, col3) VALUES(2, 'B', 1);
INSERT INTO #table1(col1, col2, col3) VALUES(3, 'A', 2);
INSERT INTO #table1(col1, col2, col3) VALUES(4, 'D', 2);
INSERT INTO #table1(col1, col2, col3) VALUES(5, 'X', 3);
INSERT INTO #table1(col1, col2, col3) VALUES(6, 'G', 3);
SELECT
(SELECT TOP(1) t1.[col2] FROM #table1 AS t1 WHERE t1.[col3] = g.[GroupId] ORDER BY t1.[col1] ASC) AS [a],
(SELECT TOP(1) t2.[col2] FROM #table1 AS t2 WHERE t2.[col3] = g.[GroupId] ORDER BY t2.[col1] DESC) AS [b]
FROM
(SELECT DISTINCT u.col3 AS [GroupId] FROM #table1 AS u) AS g