I have tried 2 ways but there is an error.
The first one is:
(
select id, userId, postType from posts
union
select id, createdByUserId as userId, NULL as postType from hashTags
)
union
(
select id, userId, NULL as postType businessPages
union
select id, userId, NULL as postType grpGroups
)
And the second one is:
select id, userId, postType from posts
union
select id, createdByUserId as userId, NULL as postType from hashTags
union
select id, userId, NULL as postType businessPages
union
select id, userId, NULL as postType grpGroups
Related
I created a new table with repeating records with duplicates.
I am trying to find the most efficient way to deduplicate records as this will be run
on a table with millions of records.
If you using multiple CTE's nested does it matter what your data structure is the processing is done in memory or does it write to temp tables when there is a lot of data.
create or replace table t1.cte4 as
WITH t1 AS (
SELECT 1 as id,'eren' AS last_name UNION ALL
SELECT 1 as id,'yilmaz' AS last_name UNION ALL
SELECT 1 as id,'kaya' AS last_name UNION ALL
SELECT 1 as id,'kaya' AS last_name UNION ALL
SELECT 2 as id,'smith' AS last_name UNION ALL
SELECT 2 as id,'jones' AS last_name UNION ALL
SELECT 2 as id,'jones' AS last_name UNION ALL
SELECT 2 as id,'jones' AS last_name UNION ALL
SELECT 2 as id,'brown' AS last_name
)
SELECT id,ARRAY_AGG(STRUCT(last_name)) AS last_name_rec
FROM t1
GROUP BY id;
I can remove duplicates as follows.
QUERY 1 How to dedup the concat_struct ?
select id,
STRING_AGG( distinct ln.last_name ,'~') as concat_string,
ARRAY_AGG(STRUCT( ln.last_name )) as concat_struct
from `t1.cte4`, unnest(last_name_rec) ln
group by id;
QUERY 1
QUERY 2 Is there a better way then this to dedup?
select distinct id,
TO_JSON_STRING(ARRAY_AGG(ln.last_name) OVER (PARTITION BY id)) json_string
from `t1.cte4`, unnest(last_name_rec) ln
group by id,
ln.last_name;
QUERY 2
How do I get it out of the table as distinct rather then using the CTE. This does not dedup.
select id, ARRAY_AGG(STRUCT( ln.last_name )) as concat_struct
from t1.cte4,
unnest(last_name_rec) ln group by id;
I can't do this.
select id, ARRAY_AGG(distinct STRUCT( ln.last_name )) as concat_struct from t1.cte4,
unnest(last_name_rec) ln group by id;
UPDATE: Decompose the struct before deduplication and then compose it back:
select id, ARRAY_AGG(STRUCT(last_name)) as concat_struct
from (
select id, ln.last_name
from cte4, unnest(last_name_rec) ln
group by id, ln.last_name
) d
group by id
(original answer based on unwanted change of table definition follows)
Just use array_agg(distinct ...):
WITH t1 AS (
SELECT 1 as id,'eren' AS last_name UNION ALL
SELECT 1 as id,'yilmaz' AS last_name UNION ALL
SELECT 1 as id,'kaya' AS last_name UNION ALL
SELECT 1 as id,'kaya' AS last_name UNION ALL
SELECT 2 as id,'smith' AS last_name UNION ALL
SELECT 2 as id,'jones' AS last_name UNION ALL
SELECT 2 as id,'jones' AS last_name UNION ALL
SELECT 2 as id,'jones' AS last_name UNION ALL
SELECT 2 as id,'brown' AS last_name
)
SELECT id,ARRAY_AGG(distinct last_name) AS last_name_rec
FROM t1
GROUP BY id;
How to make a UNION ALL work for repeated fields if the order of the fields does not match?
In the example below I try to UNION data_1_nested and data_2_nested, while the repeated field nested has two fields: id and age but in different order.
I could UNNEST and renest but this would not be very helpful if I have more then 1 nested field that I need to UNION on.
Example:
with
data_1 as (
Select 'a123' as id, 1 as age, 'a' as grade
union all
Select 'a123' as id, 3 as age,'b' as grade
union all
Select 'a123' as id, 4.5 as age,'c' as grade
)
,
data_2 as (
Select 'b456' as id, 6 as age,'e' as grade
union all
Select 'b456' as id, 5 as age,'f' as grade
union all
Select 'b456' as id, 2.5 as age,'g' as grade
)
,
data_1_nested as (
SELECT id,
array_agg(STRUCT(
age,grade
)) as nested
from data_1
group by 1
)
,
data_2_nested as (
SELECT id,
array_agg(STRUCT(
grade, age
)) as nested
from data_2
group by 1
)
SELECT * from data_1_nested
union all
SELECT * from data_2_nested
Below should work for you
select * from data_1_nested
union all
select id, array(select as struct age, grade from t.nested) from data_2_nested t
if applied to sample data from your question - output is
I modified your data a little bit to make 2 nested fields that need to be union. I also added a JS function for parsing the JSON. It is an ugly solution, but it seems to be working. Not sure if it is scalable (how many functions have to be created to covert different nested fields).
CREATE TEMP FUNCTION JsonToItems(input STRING)
RETURNS ARRAY<STRUCT<age INT64, grade STRING>>
LANGUAGE js AS """
return JSON.parse(input);
""";
with
data_1 as (
Select 'a123' as id, 1 as age, 'a' as grade
union all
Select 'a123' as id, 3 as age,'b' as grade
union all
Select 'a123' as id, 4.5 as age,'c' as grade
)
,
data_2 as (
Select 'b456' as id, 6 as age,'e' as grade
union all
Select 'b456' as id, 5 as age,'f' as grade
union all
Select 'b456' as id, 2.5 as age,'g' as grade
)
,
data_1_nested as (
SELECT id,
array_agg(STRUCT(
age,grade
)) as nested,
array_agg(STRUCT(
age,grade
)) as nested2
from data_1
group by 1
)
,
data_2_nested as (
SELECT id,
array_agg(STRUCT(
grade, age
)) as nested,
array_agg(STRUCT(
grade, age
)) as nested2
from data_2
group by 1
)
select id, JsonToItems(json), JsonToItems(json2) from (
SELECT id, TO_JSON_STRING(nested) as json, TO_JSON_STRING(nested2) as json2 from data_1_nested
union all
SELECT id, TO_JSON_STRING(nested) as json, TO_JSON_STRING(nested2) as json2 from data_2_nested
);
I have following query which produces following below data.
All I want to show list of users in Alphabetically order and First Record should be All , All.
Query:
SELECT 'All' created_by,
'All' Prepby
FROM dual
UNION ALL
SELECT DISTINCT
to_char(d.created_by) AS created_by,
get_user_name(d.created_by) Prepby
FROM Hpml_Gp_dtl d
WHERE d.created_by IS NOT NULL
ORDER BY 2;
Use a CASE expression in ORDER BY.
Query
select t.* from (
select 'All' created_by, 'All' Prepby
from dual
union all
select distinct to_char(d.created_by) as created_by,
get_user_name(d.created_by) Prepby
from Hpml_Gp_dtl d
where d.created_by is not null
) t
order by case Prepby when 'All' then 1 else 2 end, Prepby;
Perform the ORDER BY in a sub-query:
SELECT 'All' AS created_by,
'All' AS Prepby
FROM DUAL
UNION ALL
SELECT *
FROM (
SELECT DISTINCT
to_char(created_by),
get_user_name(created_by)
FROM Hpml_Gp_dtl
WHERE created_by IS NOT null
ORDER BY 2
)
Data is saved in a table is as below.
I need to show data as below.
Please suggest a query
This is how you would do it in SQL Server:
SELECT Name, 'Joined' AS [ACTION], JOIN_DT AS ACTION_DATE
FROM SomeTable
UNION ALL
SELECT Name, 'Started' START_DTTM
FROM SomeTable
UNION ALL
SELECT Name, 'ended', END_DT
FROM SomeTable
Try this:
SELECT Name, Action, Action_Date FROM (
SELECT Name, 'Joined' as Action, JOIN_DT as ACTION_DATE FROM TableA
UNION ALL
SELECT Name, 'Started', START_DT FROM TableA
UNION ALL
SELECT Name, 'Ended', END_DT FROM TableA)
ORDER BY Name;
I have query
SELECT id, name
FROM users
WHERE id !=2
UNION
SELECT id, name
FROM users2
WHERE id != 3;
I want that sort will be, 1 union orders + 2 union it's possible ?
Add a column to order on
SELECT id, name, 1 as unionOrder FROM users WHERE id !=2
UNION
SELECT id, name, 2 as unionOrder FROM users2 WHERE id != 3
ORDER BY unionOrder
You can as well do like
(SELECT id, name
FROM users
WHERE id !=2
ORDER BY id)
UNION ALL
(SELECT id, name
FROM users2
WHERE id != 3
ORDER BY id);