using BigQuery, I would like to be able to divide one column, column1, into two separate columns, column2, and column3 with 50% of all records in column1 in column2 and 50% of all records in column1 in column 3. Ex column1 has 8 records of the number 2. I'd like to create a column2 with 4 records of the number 2 and column3 with 4 records of the number 2.
Is there a query to write this in BigQuery?
Column1
2
2
2
2
2
2
2
2
Column2
2
2
2
2
Column3
2
2
2
2
try:
SELECT
Column1 AS Column2
FROM `my-project.my-dataset.my-table`
WHERE 1=1
QUALIFY ROW_NUMBER() OVER (ORDER BY Column1) <= (
SELECT COUNT(*)/2
FROM `my-project.my-dataset.my-table`
);
SELECT
Column1 AS Column3
FROM `my-project.my-dataset.my-table`
WHERE 1=1
QUALIFY ROW_NUMBER() OVER (ORDER BY Column1) > (
SELECT COUNT(*)/2
FROM `my-project.my-dataset.my-table`
);
This will give you 2 results: One for each Column2 and Column3 with the first and second half of the data respectively order by Column1 (to use analytical functions you always have to specify an ORDER BY inside an OVER clause)
For random order try:
CREATE TEMP TABLE a AS (
SELECT Column1 as Column2
FROM `my-project.my-dataset.my-table`
WHERE 1=1
QUALIFY
ROW_NUMBER() OVER (ORDER BY RAND()) <= (SELECT COUNT(*)/2 FROM `my-project.my-dataset.my-table`)
);
SELECT Column1 as Column3
FROM `my-project.my-dataset.my-table`
WHERE Column1 NOT IN (SELECT * FROM a);
SELECT * FROM a
In this case you'll get 3 results: first one is the temporary table creation and the other 2 are the columns 2 and 3.
My column structure:
Column0 Column1
aaa abc
aaa abc
aaa xyx
aaa NA
bbb fgh
bbb NA
bbb NA
bbb NA
ccc NA
ccc NA
ccc NA
ccc NA
What I wish to get is foreach distinct 'Column0' data 'Column1' data whose count is max unless that data is NA in which case get the second highest.
If for a 'Column0' data all values of 'Column1' are NA then the value can be NA
So expected value:
Column0 Column1
aaa abc
bbb fgh
ccc NA
This will give the correct result:
DECLARE #t table(Column0 char(3), Column1 varchar(3))
INSERT #t values
('aaa','abc'),('aaa','abc'),('aaa','xyx'),('aaa','NA')
,('bbb','fgh'),('bbb','NA'),('bbb','NA'),('bbb','NA')
,('ccc','NA'),('ccc','NA'),('ccc','NA'),('ccc','NA')
;WITH CTE as
(
SELECT
column0,
column1,
count(case when column1 <> 'NA' THEN 1 end) over (partition by column0, column1) cnt
FROM #t
), CTE2 as
(
SELECT
column0,
column1,
row_number() over (partition by column0 order by cnt desc) rn
FROM CTE
)
SELECT column0, column1
FROM CTE2
WHERE rn = 1
Result:
column0 column1
aaa abc
bbb fgh
ccc NA
You can use two CTEs and the ranking function ROW_NUMBER:
WITH CTE1 AS
(
SELECT Column0, Column1, Cnt = COUNT(*) OVER (PARTITION BY Column0, Column1)
FROM dbo.TableName
)
, CTE2 AS
(
SELECT Column0, Column1,
RN = ROW_NUMBER() OVER (PARTITION BY Column0
ORDER BY CASE WHEN Column1 = 'NA' THEN 1 ELSE 0 END ASC
, Cnt DESC)
FROM CTE1
)
SELECT Column0, Column1
FROM CTE2
WHERE RN = 1
Demo
How about something like this?
select T1.Column0,
isnull((
select top(1) T2.Column1
from dbo.YourTable as T2
where T1.Column0 = T2.Column0 and
T2.Column1 <> 'NA'
group by T2.Column1
order by count(*) desc
), 'NA') as Column1
from dbo.YourTable as T1
group by T1.Column0
SQL Fiddle
And with an index
create index IX_YourTable_Column0 on YourTable(Column0, Column1)
You get a nice looking query plan.
And a version that deals with NULL values in Column0.
select T1.Column0,
isnull((
select top(1) T2.Column1
from dbo.YourTable as T2
where exists(select T1.Column0 intersect select T2.Column0) and
T2.Column1 <> 'NA'
group by T2.Column1
order by count(*) desc
), 'NA') as Column1
from dbo.YourTable as T1
group by T1.Column0
The query plan for for this version is the same as the one above.
You can use row_number() with an aggregation:
select column0, column1
from (select column0, column1,
row_number() over (partition by column0
order by count(*) desc
) as seqnum
from [table]
group by column0, column1
) t
where seqnum = 1;
If you want to allow duplicates in the case of ties, then use rank() or dense_rank() instead of row_number().
I want have a query on the table values
Column1 Column2 Column3
-----------------------
a b c
d e f
Result should be
Column1 a
Column2 b
Column3 c
Column1 d
Column2 e
Column3 f
Basically a key value pairs. Is it possible in Oracle?
Thanks for the help in advance
You can do it with UNION ALL, like this:
SELECT 'Column1' as Name, Column1 as Value FROM my_table
UNION ALL
SELECT 'Column2' as Name, Column2 as Value FROM my_table
UNION ALL
SELECT 'Column3' as Name, Column3 as Value FROM my_table
Here is a demo on sqlfiddle.
I have an SQLite table called match that has two columns: column1 and column2 that contain integer values:
column1 column2
------------------
5 6
6 8
8 9
90 91
1 20
10 20
I want to match duplicate numbers found in either columns and join them, including the matches second value, so that my search result returns would be:
5, 6, 8, 9
1, 20, 10
(notice that 90 and 91 have no matches and therefore are not included).
My 'guess' at making this is:
SELECT column1, column2
FROM match
WHERE column2
IN (SELECT column1
FROM match
GROUP BY column1 HAVING (COUNT(column1) > 0))
UNION
SELECT column1, column2
FROM match
WHERE column1
IN (SELECT column2
FROM match
GROUP BY column1 HAVING (COUNT(column2) > 0))
UNION
SELECT column1, column2
FROM match
WHERE column1
IN (SELECT column1
FROM match
GROUP BY column1 HAVING (COUNT(column1) > 1))
UNION
SELECT column1, column2
FROM match
WHERE column2
IN (SELECT column2
FROM match
GROUP BY column2 HAVING (COUNT(column2) > 1))
and the result is almost what I need:
5 6
6 8
8 9
1 20
10 20
But what I really need is to have the result grouped somehow. For example:
(5, 6, 8, 9) (1, 10, 20)
Is this possible? And is my SQL attempt over-complicated?
I think this is what you want: http://sqlfiddle.com/#!7/05747/9
SELECT column1 as newColumn
FROM match WHERE column1 in (
SELECT myColumn
FROM(
SELECT count(*) as cnt, myColumn
FROM (
SELECT column1 as myColumn
FROM match
UNION ALL
SELECT column2 as myColumn
FROM match
) x
GROUP BY myColumn
HAVING cnt > 1
) y
) OR column2 in (
SELECT myColumn
FROM(
SELECT count(*) as cnt, myColumn
FROM (
SELECT column1 as myColumn
FROM match
UNION ALL
SELECT column2 as myColumn
FROM match
) x
GROUP BY myColumn
HAVING cnt > 1
) y
)
UNION
SELECT column2 as newColumn
FROM match WHERE column1 in (
SELECT myColumn
FROM(
SELECT count(*) as cnt, myColumn
FROM (
SELECT column1 as myColumn
FROM match
UNION ALL
SELECT column2 as myColumn
FROM match
) x
GROUP BY myColumn
HAVING cnt > 1
) y
) OR column2 in (
SELECT myColumn
FROM(
SELECT count(*) as cnt, myColumn
FROM (
SELECT column1 as myColumn
FROM match
UNION ALL
SELECT column2 as myColumn
FROM match
) x
GROUP BY myColumn
HAVING cnt > 1
) y
)
Column1 Column2 Column3
------- ------- -------
jim 1788 5F
jim 2000 9F
jim 500 9F
ben 190 4H
matt 400 46
matt 20 3G
I need to run a query that outputs:
Column1 MaxValue PL
------- ------- -------
jim 2000 9F
jim 2000 NULL
ben 190 4H
matt 400 46
matt 400 NULL
For each value in Column1 (e.g. jim, ben, matt): we group the data by Column1 and for each group we display the row that has the maximum value on column2.
Then, for each row found in this manner, it displays it again but with NULL in column3, if the groupping by Column1 returns more than 1 row and there are smaller values in Column2 than the maximum found number in the previous step.
ben 190 NULL is not displayed because we have ben only once on Column1.
Thank you in advance for any tips or suggestions.
This is what I tried so far but I receive an error prompting me to include Column2 and Column3 in the GROUP By clause, but if I do so I don't reach the desired output as shown above.
CREATE VIEW VIEWB AS
SELECT DISTINCT t1.Column1,
/* MAX_Value */
(MAX(t1.[Column2])) AS [MAX Value],
/* PL */
(CASE
WHEN t1.[Column2] = MAX(t1.[Column2]) THEN t1.[Column3]
ELSE NULL
END) AS PL
FROM TABLEA AS t1
GROUP BY t1.Column1;
Try this code:
DECLARE #t TABLE (Column1 VARCHAR(50), Column2 INT, Column3 VARCHAR(50))
INSERT #t
VALUES
('jim' ,1788 ,'5F'),
('jim' ,2000 ,'9F'),
('jim' ,500 ,'9F'),
('ben' ,190 ,'4H'),
('matt' ,400 ,'46'),
('matt' ,20 ,'3G')
;WITH a AS (
SELECT *,
ROW_NUMBER() OVER (PARTITION BY Column1 ORDER BY Column2 DESC) RowNum,
MAX(Column2) OVER (PARTITION BY Column1) Maximum
FROM #t
)
SELECT Column1,
Maximum,
CASE WHEN RowNum = 1 THEN Column3 END
FROM a
WHERE RowNum IN (1, 2)
ORDER BY Column3 DESC
If you need to, you can put this in a view.
try this
;with cte as (select *,ROW_NUMBER() over(partition by Column1
order by Column2 desc) as row_num from table_A),
a as (
select Column1,MAX(Column2) [Column2],null [row_num]
from table_A
group by Column1
having COUNT(*)>1),
b as (select Column1,Column2,Column3 from cte where row_num=1
union all
select * from a)
select Column1,Column2 [MaxValue],Column3 [PL] from b
order by Column2 desc,Column1,ISNULL(Column3,'') desc
with data as (
select * from
(values
( 'jim' , 1788 , '5F' ),
( 'jim' , 2000 , '9F' ),
( 'jim' , 500 , '9F' ),
( 'ben' , 190 , '4H' ),
( 'matt' , 400 , '46' ),
( 'matt' , 20 , '3G' )
) foo (col1, col2, col3)
),
maxes as (
select d.col1, d.col2, d.col3
from
data d
inner join (select col1, max(col2) as col2 from data group by col1) m on d.col1 = m.col1 and d.col2 = m.col2
)
select col1, col2, col3
from maxes
union all
select col1, col2, null
from maxes
where exists (select 0 from data where data.col1 = maxes.col1 and data.col2 < maxes.col2)
order by col1, col3 desc
declare #t table(column1 varchar(10),column2 int, column3 varchar(10))
insert into #t
select 'jim', 1788, '5F' union all
select 'jim', 2000, '9F' union all
select 'jim', 500, '9F' union all
select 'ben', 190, '4H' union all
select 'matt', 400, '46' union all
select 'matt', 20, '3G'
select column1,column2,column3 from
(
select *, row_number() over (partition by column1 order by column2 desc) as sno from #t
) as t
where sno=1
union
select t1.column1,t2.column2,NULL
from #t as t1 inner join
(
select Column1, max(Column2) as column2,count(*) as counting from #t
group by column1 having count(*)>1
) as t2
on t1.column1=t2.column1
Might not be the most efficient way of doing this, so if you have a really large table this may not be an ideal solution, but an option:
create table #temp (column1 varchar(10), column2 float, column3 varchar(10))
insert #temp select 'jim', 1788, '5F'
insert #temp select 'jim', 2000, '9F'
insert #temp select 'jim', 500, '9F'
insert #temp select 'ben', 190, '4H'
insert #temp select 'matt', 400, '46'
insert #temp select 'matt', 20, '3G'
SELECT column1, column2 as MaxValue, column3 as PL FROM #temp
WHERE column2=(SELECT Max(column2) FROM #temp t2 WHERE t2.column1=#temp.column1)
union
SELECT column1, column2, NULL FROM #temp
WHERE column2=(SELECT Max(column2) FROM #temp t3 WHERE t3.column1=#temp.column1)
AND EXISTS(SELECT 1 FROM #temp t4 WHERE t4.column2<>#temp.column2 and t4.column1=#temp.column1)
DROP TABLE #temp