Select columns from different tables without repeating rows - sql

For this example tables:
Table A:
Col1 Col2
-----------
2015 A
2015 B
2015 C
Table B:
Col1 Col2
------------
2015 X
2015 Y
2015 Z
I want a query that returns:
Col1 Col2
------------
A X
B Y
C Z
I have tried something like this:
SELECT TA.COL2, TB.COL2
FROM
(SELECT * FROM TABLE_A WHERE COL1=2015) TA,
(SELECT * FROM TABLE_B WHERE COL1=2015) TB,
But I'm getting duplicated results
Col1 Col2
-----------
A X
A Y
A Z
B X
B Y
B Z
C X
C Y
C Z

A way to do it is to use the row number:
SELECT TA.COL2, TB.COL2
FROM
(SELECT TABLE_A.COL2, ROWNUM AS R1 FROM TABLE_A WHERE COL1=2015) TA,
(SELECT TABLE_B.COL2, ROWNUM AS R2 FROM TABLE_B WHERE COL1=2015) TB,
WHERE T1.R1 = T2.R2

In a SELECT statement, include DISTINCT or DISTINCTROW keyword after the SELECT clause.
For More Details:
http://www.geeksengine.com/database/basic-select/eliminate-duplicate-rows.php

Not sure why you want this but here is my method for doing it.
SELECT
TA.COL2 AS Col1,
TB.COL2 AS Col2
FROM
(SELECT COL1, COL2, ROW_NUMBER() OVER (PARTITION BY COL1 ORDER BY COL2) AS Seq FROM TABLE_A) TA
JOIN
(SELECT COL1, COL2, ROW_NUMBER() OVER (PARTITION BY COL1 ORDER BY COL2) AS Seq FROM TABLE_B) TB
ON TA.COL1 = TB.COL1
AND TA.Seq = TB.Seq

A union all/group by approach may be what you want:
SELECT MAX(COL2_a) as COL2_a, MAX(COL2_B) as COL2_B
FROM ((SELECT COL2 as COL2_A, NULL as COL2_B,
ROW_NUMBER() OVER (ORDER BY COL2) as seqnum
FROM TABLE_A
WHERE COL1 = 2015
) UNION ALL
(SELECT NULL, COL2, ROW_NUMBER() OVER (ORDER BY COL2) as seqnum
FROM TABLE_B
WHERE COL1 = 2015
) UNION ALL
) t
GROUP BY seqnum;
Alternatively, use a FULL JOIN:
SELECT a.COL2 as COL2_a, b.COL2 as COL2_B
FROM (SELECT a.*,
ROW_NUMBER() OVER (ORDER BY COL2) as seqnum
FROM TABLE_A a
WHERE COL1 = 2015
) a FULL JOIN
(SELECT b.*,
ROW_NUMBER() OVER (ORDER BY COL2) as seqnum
FROM TABLE_B b
WHERE COL1 = 2015
) b
ON a.seqnum = b.seqnum;
Both these methods returns all values, if one table has more values than the other.

Related

Compare main table with all records from another table to derive the column value of the main table

I have two tables tb1 & main_tbl with sample dataset as shown below and I'm trying to derive the value for the column COL_VAL for the main table. So I have created the query for getting the expected value. However I'm looking for the possibility to simply the number of code lines and achieve the same result
main_tbl Table:
col1 col2 col3 COL_VAL
123 Hi 568 ??
tbl Table:
col1 col2 col3 col4 col5
123 LN Y IP 2021-02-01
123 LN N NON-IP 2021-02-01
123 MOB Y AP 2021-02-01
123 MOB N NON-AP 2021-02-01
Main Query:
SELECT
d.COL1,
d.COL2,
d.COL3,
CAST(COALESCE(FRT_QRY.COL4,SND_QRY.COL4,FIF_QRY.COL4,TRD_QRY.COL4) AS STRING) AS COL_VAL
FROM
(
SELECT * FROM db.main_tbl)d
LEFT JOIN
(
SELECT * FROM
( SELECT *,
ROW_NUMBER() OVER(PARTITION BY col1,col2,col3 ORDER BY col5 desc) as Rnk
FROM ( select * from db.tb1 where col2 IN ('LN') and col3 = 'Y') b
) a where a.Rnk =1
) SND_QRY
on d.col1=SND_QRY.col1
LEFT JOIN
(
SELECT * FROM
( SELECT *,
ROW_NUMBER() OVER(PARTITION BY col1,col2,col3 ORDER BY col5 desc) as Rnk
FROM ( select * from db.tb1 where col2 IN ('LN') and col3 = 'N') b
) a where a.Rnk =1
) TRD_QRY
on d.col1=TRD_QRY.col1
LEFT JOIN
(
SELECT * FROM
( SELECT *,
ROW_NUMBER() OVER(PARTITION BY col1,col2,col3 ORDER BY col5 desc) as Rnk
FROM ( select * from db.tb1 where col2 IN ('MOB') and col3 = 'Y') b
) a where a.Rnk =1
) FRT_QRY
on d.col1=FRT_QRY.col1
LEFT JOIN
(
SELECT * FROM
( SELECT *,
ROW_NUMBER() OVER(PARTITION BY col1,col2,col3 ORDER BY col5 desc) as Rnk
FROM ( select * from db.tb1 where col2 IN ('MOB') and col3 = 'N') b
) a where a.Rnk =1
) FIF_QRY
on d.col1=FIF_QRY.col1
Expected Output - main_tbl Table:
col1 col2 col3 COL_VAL
123 Hi 568 AP
To start with something, I noticed that all your subqueries contain different filters applied to the same columns and those columns are in partition by clause. This means that filters do not affect row_number and you can calculate row_number once without filters and use filters as join conditions or filter in join subqueries:
WITH RANKED AS (
SELECT * FROM
( SELECT b.*,
ROW_NUMBER() OVER(PARTITION BY col1,col2,col3 ORDER BY col5 desc) as Rnk
FROM db.tb1 b
) a where a.Rnk =1
)
SELECT
d.COL1,
d.COL2,
d.COL3,
CAST(COALESCE(FRT_QRY.COL4,SND_QRY.COL4,FIF_QRY.COL4,TRD_QRY.COL4) AS STRING) AS COL_VAL
FROM
(
SELECT * FROM db.main_tbl)d
LEFT JOIN RANKED SND_QRY on d.col1=SND_QRY.col1 AND SND_QRY.col2 IN ('LN') AND SND_QRY.col3 = 'Y'
LEFT JOIN RANKED TRD_QRY on d.col1=TRD_QRY.col1 AND TRD_QRY.col2 IN ('LN') AND TRD_QRY.col3 = 'N'
LEFT JOIN RANKED FRT_QRY on d.col1=FRT_QRY.col1 AND FRT_QRY.col2 IN ('MOB') AND FRT_QRY.col3 = 'Y'
LEFT JOIN RANKED FIF_QRY on d.col1=FIF_QRY.col1 AND FIF_QRY.col2 IN ('MOB') AND FIF_QRY.col3 = 'N'
Also if you are lucky and have Hive version with CTE materialization feature, use this setting:
set hive.optimize.cte.materialize.threshold=2;--HIVE-11752
RANKED CTE will be calculated only one time and the same result used in all joins.
Also you can try to eliminate many joins with the same table. Calculate all fields in single query using CASE expressions + aggregation and join only one time. Aggregation works faster than joins:
WITH RANKED AS (
SELECT col1,
--aggregate all in single row per col1
max(case when col2 IN ('LN') AND col3 = 'Y' then COL4 else null end) as SND_COL4,
max(case when col2 IN ('LN') AND col3 = 'N' then COL4 else null end) as TRD_COL4,
max(case when col2 IN ('MOB') AND col3 = 'Y' then COL4 else null end) as FRT_COL4,
max(case when col2 IN ('MOB') AND col3 = 'N' then COL4 else null end) as FIF_COL4
FROM
( SELECT b.*,
ROW_NUMBER() OVER(PARTITION BY col1,col2,col3 ORDER BY col5 desc) as Rnk
FROM db.tb1 b
WHERE (col2 IN ('LN') AND col3 = 'Y')
or (col2 IN ('LN') AND col3 = 'N')
or (col2 IN ('MOB') AND col3 = 'Y')
or (col2 IN ('MOB') AND col3 = 'N')
) a where a.Rnk =1
GROUP BY col1
)
SELECT
d.COL1,
d.COL2,
d.COL3,
CAST(COALESCE(R.FRT_COL4,R.SND_COL4,R.FIF_COL4, R.TRD_COL4) AS STRING) AS COL_VAL
FROM
(
SELECT * FROM db.main_tbl)d
LEFT JOIN RANKED R d.col1=R.col1

If two rows have same id but different col2, how can you keep only the ones that have max col3?

I have a table with three columns (id, col2, col3, col4) where col2 is A or B and col3 and col4 are integers. My problem is, there are many columns that have the same id and a different col2 value, and I want to select ONLY the rows that have a maximum value in col3.
For instance, if we have:
id | col2 | col3 | col4
1 | A | 3 | 2
1 | B | 5 | 3
2 | A | 6 | 2
...
I want to keep only the tuple (1, B, 5, 3). How can I achieve this?
I've tried this:
SELECT id, col2, MAX(col3), col4 FROM t GROUP BY id;
but I get an error saying that this is not a valid GROUP BY statement.
You can use keep:
SELECT id,
MAX(col2) KEEP (DENSE_RANK FIRST ORDER BY col3 DESC) as col2
MAX(col3),
MAX(col4) KEEP (DENSE_RANK FIRST ORDER BY col3 DESC) as col4
FROM t
GROUP BY id;
Or:
SELECT id, col2, col3, col4
FROM (SELECT t.*,
ROW_NUMBER() OVER (PARTITION BY id ORDER BY col3 DESC) as seqnum
FROM t
) t
WHERE seqnum = 1;
This query:
select t.*
from tablename t inner join (
select id, max(col3) col3
from tablename
group by id
having count(distinct col2) > 1
) g on g.id = t.id and g.col3 = t.col3
returns for each id that has different values in col2 only 1 row: the one containing the maximum value of col3.
If you also want the other rows where each id does not have different values in col2, then use UNION ALL:
select t.*
from tablename t inner join (
select id, max(col3) col3
from tablename
group by id
having count(distinct col2) > 1
) g on g.id = t.id and g.col3 = t.col3
union all
select t.* from tablename t
where not exists (
select 1 from tablename
where id = t.id and col2 <> t.col2
)
select * from TableName where col3 = (select max(col3) from TableName)

Equally divide a field into 2

How can I equally divide the 'col' field of table 't2' with a,b,c,d as records into 2 equal fields : Col1 (a,b) & Col2 (c,d)?
Table:t2
Col
A
B
C
D
Output:
Col1 Col2
A C
B D
this is what i have tried:
SELECT a.col1, "" as col2
FROM (SELECT Top 50 Percent Col as Col1 From t2 order by Col ASc) as a
Union all
SELECT "", b.col1
FROM (SELECT top 50 Percent Col as Col1 From t2 order by Col Desc) as b
Output from above:
col1 col2
a
b
d
c
I'm only able to reach so far (in Ms access). Any help is much appreciated.
Create table 't2'
Create Table t2(Col Char)
Insert Values
INSERT INTO t2 ([Col]) VALUES ("a")
INSERT INTO t2 ([Col]) VALUES ("b")
INSERT INTO t2 ([Col]) VALUES ("c")
INSERT INTO t2 ([Col]) VALUES ("d")
Yes, you can use a crosstab if you have a sequential ID:
TRANSFORM
First(T.Col) AS Col
SELECT
([ID]+1)\2 AS DualID
FROM
(SELECT ID, [ID] Mod 2 AS ColID, [Col]
FROM YourTable) AS T
GROUP BY
([ID]+1)\2
PIVOT
T.ColID;
SELECT A1.COL1 & "-" & B1.COL1 AS PAIRS
FROM
(SELECT TOP 50 PERCENT A.Col AS COL1 FROM t2 AS A ORDER BY A.Col ASC) AS A1
INNER JOIN
(SELECT TOP 50 PERCENT B.Col AS COL1 FROM t2 AS B ORDER BY B.Col DESC) AS B1
ON A1.COL1 <> B1.COL1
WHERE (SELECT COUNT(*) FROM t2 AS C WHERE C.Col < A1.COL1) = (SELECT
COUNT(*) FROM t2 AS D WHERE D.Col > B1.COL1)

SQL - Two Columns into One Distinct Ordered Column

If I have a table like this:
Col 1 | Col 2
-------------
A | 1
A | 2
B | 1
C | 1
C | 2
C | 3
How can I write a query to pull one column that looks like this --
Col 1
------
A
1
2
B
1
C
1
2
3
SELECT col1
FROM Some_Table_You_Did_Not_Name
UNION ALL
SELECT col2
FROM Some_Table_You_Did_Not_Name
If the order matters in your example then you want this:
WITH data AS
(
SELECT col1, col2, ROW_NUMBER() OVER (ORDER BY col1, col2) as RN
FROM Some_Table_You_Did_Not_Name
)
SELECT col
FROM (
SELECT DISTINCT col1 as col, RN, 1 as O
FROM data
UNION ALL
SELECT DISTINCT col2 as col, RN, 2 as O
FROM data
) JC_IS_THAT_GUY
ORDER BY RN ASC, O ASC, col ASC
You can use a query like the following:
SELECT Col1
FROM (
SELECT DISTINCT Col1, Col1 AS Col2, 0 AS grp
FROM mytable
UNION ALL
SELECT Col2 AS Col1, Col1 AS Col2, 1 AS grp
FROM mytable) AS t
ORDER BY Col2, grp, Col1
Demo here
There is absolutely no need to do a UNION, UNION ALL or reference the table more than once to unpivot data...
-- if Col2 is always a well ordered sequense like the test data...
SELECT
Col1 = x.Value
FROM
#TestData td
CROSS APPLY ( VALUES (IIF(td.Col2 = 1, td.Col1, NULL)), (CAST(td.Col2 AS CHAR(1))) ) x (Value)
WHERE
x.Value IS NOT NULL;
-- if it isn't...
WITH
cre_Add_RN AS (
SELECT
td.Col1,
td.Col2,
RN = ROW_NUMBER() OVER (PARTITION BY td.Col1 ORDER BY td.Col2)
FROM
#TestData td
)
SELECT
x.Value
FROM
cre_Add_RN arn
CROSS APPLY ( VALUES (IIF(arn.RN = 1, arn.Col1, NULL)), (CAST(arn.Col2 AS CHAR(1))) ) x (Value)
WHERE
x.Value IS NOT NULL;
HTH,
Jason

How to get the following output?

How to get the following output ?
Input:
t1
-----------------
col1 col2
----------------
2 a
1 c
3 b
----------------
Output:
t1
-----------------
col1 col2
----------------
1 a
2 b
3 c
----------------
You can try using row number like:
SELECT row_number() OVER (ORDER BY a.col2) as col1, col2
FROM t1 a ORDER BY a.col2
select C1.col1, C2.col2
from
(select col1, row_number() over (order by col1) rn
from t1) C1
join
(select col2, row_number() over (order by col2) rn
from t1) C2
on C1.rn=C2.rn
order by C1.rn
I think following query may help you.
SELECT * FROM t1 ORDER BY col1 ;
please check this link for more practice
http://www.sqlfiddle.com/#!3/2e3e9/1/0
Try this..
select col1,col2 from
(select col1,rownum rn from(select col1 from t1 order by col1)) a,
(select col2,rownum rn from(select col2 from t1 order by col2)) b
where a.rn=b.rn