SQL Union sort by Union - sql

I have query
SELECT id, name
FROM users
WHERE id !=2
UNION
SELECT id, name
FROM users2
WHERE id != 3;
I want that sort will be, 1 union orders + 2 union it's possible ?

Add a column to order on
SELECT id, name, 1 as unionOrder FROM users WHERE id !=2
UNION
SELECT id, name, 2 as unionOrder FROM users2 WHERE id != 3
ORDER BY unionOrder

You can as well do like
(SELECT id, name
FROM users
WHERE id !=2
ORDER BY id)
UNION ALL
(SELECT id, name
FROM users2
WHERE id != 3
ORDER BY id);

Related

How to SUM values from 2 separate tables that share the same column name in SQL

I have 2 tables that have the exact same columns but different data. The columns are 'name', 'gender' and 'count'. The first table is called names_2014 and the second names_2015. My goal is simply to find the top 5 most popular names amongst both these tables.
I know that to get the most popular names for one table is:
SELECT name, count
FROM names_2014
ORDER BY count DESC
LIMIT 5;
However, the closest I've gotten to my goal is:
SELECT name, count
FROM names_2014
UNION DISTINCT -- I've tried UNION ALL as well
SELECT name, SUM(count)
FROM names_2015
GROUP BY name
ORDER BY count DESC
LIMIT 5
I've tried many similar variations to this but none of them are successful. It seems that I need to combine both of the tables, and then SUM(count) and GROUP BY name but I guess I'm not combining the tables properly. Any help is much appreciated as I've spent hours on this and I feel like the solution is so close but I just can't see it. I'm new to SQL and just trying to test my limits.
You may perform the aggregation on a subquery that unions the two tables as the following:
select name, sum(count) cnt
from
(
select name, count
from names_2014
union all
select name, count
from names_2015
) T
group by name
order by cnt desc
limit 5
From your final ask it's not clear if you want to separate these top 5 by source table or not. Following is one answer that you might be looking for:
with name_2014 as (
select 'a' as name, 'm' as gender, 1 as cnt
union all
select 'b' as name, 'f' as gender, 3 as cnt
union all
select 'c' as name, 'm' as gender, 2 as cnt
),
name_2015 as (
select 'd' as name, 'f' as gender, 10 as cnt
union all
select 'b' as name, 'f' as gender, 5 as cnt
union all
select 'e' as name, 'm' as gender, 1 as cnt
)
(select 'name_2014' as src_table_name, name, sum(cnt) as total_counts from name_2014 group by name order by 3 desc limit 1)
union all
(select 'name_2015' as src_table_name, name, sum(cnt) as total_counts from name_2015 group by name order by 3 desc limit 1)
This sample query will give you top 1 names per table. (You can change limit and get top 5 from your query.)
If you do not want to know table names you can tweak the above query.
If you do not care about source tables at all and just want top 5 then:
with name_2014 as (
select 'a' as name, 'm' as gender, 1 as cnt
union all
select 'b' as name, 'f' as gender, 3 as cnt
union all
select 'c' as name, 'm' as gender, 2 as cnt
),
name_2015 as (
select 'd' as name, 'f' as gender, 10 as cnt
union all
select 'b' as name, 'f' as gender, 5 as cnt
union all
select 'e' as name, 'm' as gender, 1 as cnt
)
select name, sum(cnt) as total_count from
(select name, cnt from name_2014
union all
select name, cnt from name_2015)
group by 1 order by 2 desc limit 5

How to dedup array_agg in bigquery

I created a new table with repeating records with duplicates.
I am trying to find the most efficient way to deduplicate records as this will be run
on a table with millions of records.
If you using multiple CTE's nested does it matter what your data structure is the processing is done in memory or does it write to temp tables when there is a lot of data.
create or replace table t1.cte4 as
WITH t1 AS (
SELECT 1 as id,'eren' AS last_name UNION ALL
SELECT 1 as id,'yilmaz' AS last_name UNION ALL
SELECT 1 as id,'kaya' AS last_name UNION ALL
SELECT 1 as id,'kaya' AS last_name UNION ALL
SELECT 2 as id,'smith' AS last_name UNION ALL
SELECT 2 as id,'jones' AS last_name UNION ALL
SELECT 2 as id,'jones' AS last_name UNION ALL
SELECT 2 as id,'jones' AS last_name UNION ALL
SELECT 2 as id,'brown' AS last_name
)
SELECT id,ARRAY_AGG(STRUCT(last_name)) AS last_name_rec
FROM t1
GROUP BY id;
I can remove duplicates as follows.
QUERY 1 How to dedup the concat_struct ?
select id,
STRING_AGG( distinct ln.last_name ,'~') as concat_string,
ARRAY_AGG(STRUCT( ln.last_name )) as concat_struct
from `t1.cte4`, unnest(last_name_rec) ln
group by id;
QUERY 1
QUERY 2 Is there a better way then this to dedup?
select distinct id,
TO_JSON_STRING(ARRAY_AGG(ln.last_name) OVER (PARTITION BY id)) json_string
from `t1.cte4`, unnest(last_name_rec) ln
group by id,
ln.last_name;
QUERY 2
How do I get it out of the table as distinct rather then using the CTE. This does not dedup.
select id, ARRAY_AGG(STRUCT( ln.last_name )) as concat_struct
from t1.cte4,
unnest(last_name_rec) ln group by id;
I can't do this.
select id, ARRAY_AGG(distinct STRUCT( ln.last_name )) as concat_struct from t1.cte4,
unnest(last_name_rec) ln group by id;
UPDATE: Decompose the struct before deduplication and then compose it back:
select id, ARRAY_AGG(STRUCT(last_name)) as concat_struct
from (
select id, ln.last_name
from cte4, unnest(last_name_rec) ln
group by id, ln.last_name
) d
group by id
(original answer based on unwanted change of table definition follows)
Just use array_agg(distinct ...):
WITH t1 AS (
SELECT 1 as id,'eren' AS last_name UNION ALL
SELECT 1 as id,'yilmaz' AS last_name UNION ALL
SELECT 1 as id,'kaya' AS last_name UNION ALL
SELECT 1 as id,'kaya' AS last_name UNION ALL
SELECT 2 as id,'smith' AS last_name UNION ALL
SELECT 2 as id,'jones' AS last_name UNION ALL
SELECT 2 as id,'jones' AS last_name UNION ALL
SELECT 2 as id,'jones' AS last_name UNION ALL
SELECT 2 as id,'brown' AS last_name
)
SELECT id,ARRAY_AGG(distinct last_name) AS last_name_rec
FROM t1
GROUP BY id;

Select number of IDs in more than one table (from three tables)

I need the count of this:
select distinct ID
from (
select ID from A
union all
select ID from B
union all
select ID from C
) ids
GROUP BY ID HAVING COUNT(*) > 1;
but I have no idea how to do it.
Use a subquery:
select count(*)
from (select ID
from (select ID from A
union all
select ID from B
union all
select ID from C
) ids
group by ID
having count(*) > 1
) i;
SELECT DISTINCT is almost never needed with GROUP BY and definitely not in this case.
You just want to find the id that appear 2 more times in the A,B,C table, the SQL is below:
select count(1) from (
select
id,
count(1)
from
(
select ID from A
union all
select ID from B
union all
select ID from C
)
group by id having(count(1)>1)
) tmp

Select value based on priority of another column Oracle SQL

I would like to select only one email address per id, if the id has both a work and personal email, I would only like to display the work email.
with emails as(
select '1' id, 'work' email_type, 'abc#gmail.com' email from dual union all
select '2' id, 'work' email_type, '123#yahoo.com' email from dual union all
select '2' id, 'personal' email_type, '456#msn.com' email from dual union all
select '3' id, 'personal' email_type, 'test#work.com' email from dual
)
For this example I would like to display:
id email_type email
1 work abc#gmail.com
2 work 123#yahoo.com
3 personal test#work.com
You can prioritize those values in row_number and get the first row for each id.
select id,email_type,email
from (select id,email_type,email
,row_number() over(partition by id order by case when email_type='work' then 1 else 2 end) as rn
from emails) t
where rn = 1
Assuming only possible value for email_type are work and personal, you can use window function row_number:
select *
from (
select t.*,
row_number() over (
partition by id order by email_type desc
) as seqnum
from emails t
) t
where seqnum = 1;
You can use a subquery to figure out if there is a work email or not. With your sample data, you can use the MAX function to return the "work" type if it exists, and if it doesn't it will just return "personal". Joining back on that will give the appropriate result.
WITH T1 AS (SELECT id, MAX(email_type) AS e_type FROM table_name GROUP BY id)
SELECT T1.id, T1.e_type, T2.email
FROM T1 LEFT JOIN table_name T2 ON T1.id = T2.id AND T1.e_type = T2.email_type
ORDER BY T1.id

SQL - How to Order By in UNION query

Is there a way to union two tables, but keep the rows from the first table appearing first in the result set? However orderby column is not in select query
For example:
Table 1
name surname
-------------------
John Doe
Bob Marley
Ras Tafari
Table 2
name surname
------------------
Lucky Dube
Abby Arnold
Result
Expected Result:
name surname
-------------------
John Doe
Bob Marley
Ras Tafari
Lucky Dube
Abby Arnold
I am bringing Data by following query
SELECT name,surname FROM TABLE 1 ORDER BY ID
UNION
SELECT name,surname FROM TABLE 2
The above query is not keeping track of order by after union.
P.S - I dont want to show ID in my select query
I am getting ORDER BY Column by joining tables. Following is my real query
SELECT tbl_Event_Type_Sort_Orders.Appraisal_Event_Type_ID AS Appraisal_Event_Type_ID , ISNULL(tbl_Appraisal_Event_Types.Appraisal_Event_Type_Display_Name, 'UnCategorized') AS Appraisal_Event_Type_Display_Name
INTO #temptbl
FROM tbl_Event_Type_Sort_Orders
INNER JOIN tbl_Appraisal_Event_Types
ON tbl_Event_Type_Sort_Orders.Appraisal_Event_Type_ID = tbl_Appraisal_Event_Types.Appraisal_Event_Type_ID
WHERE 1=1
AND User_Name='abc'
ORDER BY tbl_Event_Type_Sort_Orders.Sort_Order
SELECT * FROM #temptbl
UNION
SELECT DISTINCT (tbl_Appraisal_Event_Types.Appraisal_Event_Type_ID) AS Appraisal_Event_Type_ID , ISNULL(tbl_Appraisal_Event_Types.Appraisal_Event_Type_Display_Name, 'UnCategorized') AS Appraisal_Event_Type_Display_Name
FROM tbl_Appraisal_Event_Types
INNER JOIN tbl_Appraisal_Events
ON tbl_Appraisal_Event_Types.Appraisal_Event_Type_ID = tbl_Appraisal_Events.Event_Type_ID
INNER JOIN tbl_Appraisals
ON tbl_Appraisal_Events.Appraisal_ID = tbl_Appraisal_Events.Appraisal_ID
WHERE 1=1
AND ((tbl_Appraisals.Assigned_To_Staff_User) = 'abc' OR (tbl_Appraisals.Assigned_To_Staff_User2) = 'abc' OR (tbl_Appraisals.Assigned_To_Staff_User3) = 'abc')
Put a UNION ALL in a derived table. To keep duplicate elimination, do select distinct and also add a NOT EXISTS to second select to avoid returning same person twice if found in both tables:
select name, surname
from
(
select distinct name, surname, 1 as tno
from table1
union all
select distinct name, surname, 2 as tno
from table2 t2
where not exists (select * from table1 t1
where t2.name = t1.name
and t2.surname = t1.surname)
) dt
order by tno, surname, name
You can use a column for the table and one for the ID to order by:
SELECT x.name, x.surname FROM (
SELECT ID, TableID = 1, name, surname
FROM table1
UNION ALL
SELECT ID = -1, TableID = 2, name, surname
FROM table2
) x
ORDER BY x.TableID, x.ID
You can write as below, if you are ok with duplicate data then please use UNION ALL it will be faster:
SELECT NAME, surname FROM (
SELECT ID,name,surname FROM TABLE 1
UNION
SELECT ID,name,surname FROM TABLE 2 ) t ORDER BY ID
this will order the first row sets first then by anything you need
(haven't tested the code)
;with cte_1
as
(SELECT ID,name,surname,1 as table_id FROM TABLE 1
UNION
SELECT ID,name,surname,2 as table_id FROM TABLE 2 )
SELECT name, surname
FROM cte_1
ORDER BY table_id,ID
simply use a UNION clause with out order by.
SELECT name,surname FROM TABLE 1
UNION
SELECT name,surname FROM TABLE 2
if you wanted to order first table use the below query.
;WITH cte_1
AS
(SELECT name,surname,ROW_NUMBER()OVER(ORDER BY Id)b FROM TABLE 1 )
SELECT name,surname
FROM cte_1
UNION
SELECT name,surname
FROM TABLE 2