Bigquery uncommon elements in array

Bigquery uncommon elements in array - sql

Let us say you have a column of arrays like this , I am trying to group the rows based on the count of uncommon elements. Once the number of distinct uncommon elements reaches 5, it will be in the next group.
In the below example, first three rows will be group 1 because the uncommon elements are ['3','4','6','7'] which is 4 in length, but if you add the next row to the group , the array of distinct uncommon element would be ['1','3','4','5','6','7'] it will exceed the limit of 5 distinct uncommon elements.
with arr as (
select 1 ord, ['1','2','3','4'] as ar
union all
select 2, ['1','2','3']
union all
select 3,['1','2','6','7']
union all
select 4,['2','4','5','7']
union all
select 5, ['string1','5','6','7','8']
)
select * from arr
I am looking for an output like below
Code I have written so far but definitely missing a big piece. Adding it just in case if it is helpful
with arr as (
select 1 ord, ['1','2','3','4'] as ar,1 subclass
union all
select 2, ['1','2','3'],1
union all
select 3,['1','2','6','7'],1
union all
select 4,['2','4','5','7'],1
union all
select 5, ['string1','5','6','7','8'],1
)
, history_t as (
select a.* ,
ARRAY_AGG(struct(ar)) OVER (PARTITION BY SUBCLASS ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) as history
from arr a )
, tem2 as (
select a.* except(history,ar),
(SELECT COUNT(1) FROM UNNEST(history) AS col ) AS array_cnt
,b.ar unnest1
from history_t a
,unnest(history) b
)
, tem3 as (
select a.* except(unnest1),sku_lst
from tem2 a , unnest(unnest1) sku_lst
)
, all_sku_freq as (
select
ord, array_cnt , sku_lst , subclass,count(*) sku_freq
from tem3
group by 1,2,3,4 )
, uncommon_sku_cnt as (
select ord, subclass, count( sku_lst) uncommon_sku_count from all_sku_freq where sku_freq <> array_cnt group by 1,2 )
,rolling_uncomm_sku_cnt as (
select a.*, sum(uncommon_sku_count) over(partition by subclass order by ord asc range between unbounded preceding and current row ) roll_uncomm_sku_cnt
from uncommon_sku_cnt a
)
select a.* from rolling_uncomm_sku_cnt a

Related

Query to always return a fix number of rows

I can write a query to return top 5 products in a product category basis their sales figure. However in cases when lets say a Product category has only 2 products then only 2 rows are returned.
Is there a way to insert dummy rows such that the 5 rows are always returned per product category. The dummy rows just need to have the right product category and null values for remaining columns.

Use UNION ALL to add extra rows and then filter to only keep 5
SELECT *
FROM (
SELECT col1, col2
FROM your_table
WHERE ROWNUM <= 5
UNION ALL
SELECT NULL, NULL FROM DUAL CONNECT BY LEVEL <= 5
-- ORDER BY col1 NULLS LAST, col2 NULLS LAST -- If necessary
)
WHERE ROWNUM <= 5
Or if you are doing it for multiple groups:
WITH categories ( category, col1, rn ) AS (
SELECT category,
col1,
ROW_NUMBER() OVER ( PARTITION BY category ORDER BY col1 )
FROM products
)
SELECT category,
col1
FROM (
SELECT category,
col1,
ROW_NUMBER() OVER ( PARTITION BY category ORDER BY rn ) AS rn
FROM (
SELECT category,
col1,
rn
FROM categories
WHERE rn <= 5
)
UNION ALL
(
SELECT category,
NULL,
5 + LEVEL
FROM categories
WHERE rn = 1
CONNECT BY LEVEL <= 5
)
)
WHERE rn <= 5;

Insert a column into a view that will assign a unique ID to each unique Primary Key

I am trying to insert a new column in a view in SQL Server 2012 to assign a new number to each unique TxnID.
Currently the TxnID is 28 characters long. It would be great if I could assign each TxnID with a new number starting with '1'..
Current TxnID
-------------------------
2010100009000010000006783
2010100009000010000006784
2010100009000010000006785
2010100009000010000006785
Desired TxnID
1
2
3
3

Use ranking function rank() that would be help
select *, rank() over (order by TxnID) TxnID from table

Use ROW_NUMBER()
CREATE VIEW DBO.Vw_MyView
AS
SELECT
TxnID = ROW_NUMBER() OVER(ORDER BY TxnID),
*
FROM YourTable

We can use many methods for this-
--RANK()
SELECT * , RANK() OVER (ORDER BY [Current TxnID]) rowNumx FROM (
SELECT '2010100009000010000006783' [Current TxnID] UNION ALL
SELECT '2010100009000010000006784' UNION ALL
SELECT '2010100009000010000006785' UNION ALL
SELECT '2010100009000010000006785' )p
--DENSE_RANK
SELECT * , DENSE_RANK() OVER (ORDER BY [Current TxnID]) rowNumx FROM (
SELECT '2010100009000010000006783' [Current TxnID] UNION ALL
SELECT '2010100009000010000006784' UNION ALL
SELECT '2010100009000010000006785' UNION ALL
SELECT '2010100009000010000006785' )p
--Other
;WITH CTE AS
(
SELECT '2010100009000010000006783' [Current TxnID] UNION ALL
SELECT '2010100009000010000006784' UNION ALL
SELECT '2010100009000010000006785' UNION ALL
SELECT '2010100009000010000006785'
)
SELECT [Current TxnID], rowNumx = ( SELECT COUNT(*)+1 FROM CTE c1 WHERE c1.[Current TxnID] < c.[Current TxnID] )
FROM CTE c
OUTPUT For all
/*------------------------
OUTPUT
------------------------*/
Current TxnID rowNumx
------------------------- --------------------
2010100009000010000006783 1
2010100009000010000006784 2
2010100009000010000006785 3
2010100009000010000006785 3
(4 row(s) affected)

Find Top Most AND Lowest In a Table's Group Column

I have a table and there are 4 fields in it, ID, Price, QTY, Ratting and Optional [Position].
I have all the records Grouped By Columns [Qty,Ratting]
I have to define the position of groupwise and store that Position into Optional column.
For better understanding I have added an image with data in table:
On the basis of QTY in Each Rating I have to Mark Top3, Bottom3 and Rest of them as remaining.
I am not getting how to do it.
Can anybody suggest me how to do it?
So far what I've tried is:
Declare #RankTable TABLE
(
ID INT,
Price Decimal (10,2),
Qty INT,
Ratting INT
)
INSERT INTO #RankTable
SELECT 1,10,15,1
UNION ALL
SELECT 2,11,11,1
UNION ALL
SELECT 3,96,10,1
UNION ALL
SELECT 4,96,8,1
UNION ALL
SELECT 5,56,7,1
UNION ALL
SELECT 6,74,5,1
UNION ALL
SELECT 7,93,4,1
UNION ALL
SELECT 8,98,2,1
UNION ALL
SELECT 9,12,1,1
UNION ALL
SELECT 10,32,80,2
UNION ALL
SELECT 11,74,68,2
UNION ALL
SELECT 12,58,57,2
UNION ALL
SELECT 13,37,43,2
UNION ALL
SELECT 14,79,32,2
UNION ALL
SELECT 15,29,28,2
UNION ALL
SELECT 16,46,17,2
UNION ALL
SELECT 17,86,13,2
UNION ALL
SELECT 19,75,110,3
UNION ALL
SELECT 20,27,108,3
UNION ALL
SELECT 21,38,104,3
UNION ALL
SELECT 22,87,100,3
UNION ALL
SELECT 23,47,89,3
DECLARE #PositionGroup VARCHAR(1)
SELECT *,ISNULL(#PositionGroup,'') AS Position FROM #RankTable

You can try this:
SELECT ID
,Price
,Qty
,Ratting
,CASE WHEN RowID >= 1 AND RowID <= 3
THEN 0
ELSE CASE WHEN RowID > Total - 3 THEN 1 ELSE 2 END END AS Position
FROM (SELECT ID
,Price
,Qty
,Ratting
,COUNT(*) OVER(PARTITION BY Ratting) AS Total
,ROW_NUMBER() OVER(PARTITION BY Ratting ORDER BY Qty DESC) AS RowID
,ISNULL(#PositionGroup,'') AS Position
FROM #RankTable) AS T

Use Window Function. Try this.
;WITH cte
AS (SELECT *,
Row_number()OVER(partition BY rating ORDER BY id) rn,
count(id)OVER(partition BY rating) mx
FROM #RankTable)
SELECT ID,
Price,
Qty,
Rating,
mx - rn,
CASE WHEN rn IN ( 1, 2, 3 ) THEN 0
WHEN mx - rn IN( 0, 1, 2 ) THEN 1
ELSE 2
END position
FROM cte

try this as well.
;WITH cte AS
(
SELECT MAX(Row) [Max],
MIN(Row) [Min],
LU.Ratting
FROM (
SELECT *,
ROW_NUMBER() OVER(PARTITION BY Ratting ORDER BY Qty DESC) Row
FROM #RankTable)LU
GROUP BY LU.Ratting
)
SELECT ID,
R.Price,
R.Qty,
cte.Ratting,
CASE WHEN (Row - Min) <= 2 THEN 0 WHEN (Max - Row) <= 2 THEN 1 ELSE 2 END Position
FROM cte
JOIN (
SELECT Ratting,
ID,
Price,
Qty,
ROW_NUMBER() OVER(PARTITION BY Ratting ORDER BY Qty DESC) [Row]
FROM #RankTable
) R ON R.Ratting = cte.Ratting
Result:

t-SQL Use row number, but on duplicate rows, use the same number

I have some data and want to be able to number each row sequentially, but rows with the same type consecutively, number the same number, and when it's a different type continue numbering.
There will only be types 5 and 6, ID is actually more complex than abc123. I've tried rank but I seem to get two different row counts - in the example instead of 1 2 2 3 4 it would be 1 1 2 2
original image
dense rank result
MS SQL 2008 R2

As far as I understand, you want to number your continous groups
declare #Temp table (id1 bigint identity(1, 1), ID nvarchar(128), Date date, Type int)
insert into #Temp
select 'abc123', '20130101', 5 union all
select 'abc124', '20130102', 6 union all
select 'abc125', '20130103', 6 union all
select 'abc126', '20130104', 5 union all
select 'abc127', '20130105', 6 union all
select 'abc128', '20130106', 6 union all
select 'abc129', '20130107', 6 union all
select 'abc130', '20130108', 6 union all
select 'abc131', '20130109', 5
;with cte1 as (
select
*,
row_number() over (order by T.Date) - row_number() over (order by T.Type, T.Date) as grp
from #Temp as T
), cte2 as (
select *, min(Date) over (partition by grp) as grp2
from cte1
)
select
T.ID, T.Date, T.Type,
dense_rank() over (order by grp2)
from cte2 as T
order by id1

Select Rows with Maximum Column Value group by Another Column

This should be a simple question, but I can't get it to work :(
How to select rows that have the maximum column value,as group by another column?
For example,
I have the following table definition:
ID
Del_Index
docgroupviewid
The issue now is that I want to group by results by docgroupviewid first, and then choose one row from each docgroupviewid group, depending on which one has the highest del_index.
I tried
SELECT docgroupviewid, max(del_index),id FROM table
group by docgroupviewid
But instead of return me with the correct id, it returns me with the earliest id from the group with the same docgroupviewid.
Any ideas?

I've struggled with this many times myself and the solution is to think about your query differently.
I want each DocGroupViewID row where the Del_Index is the highest(max) for all rows with that DocGroupViewID:
SELECT
T.DocGroupViewID,
T.Del_Index,
T.ID
FROM MyTable T
WHERE T.Del_Index = (
SELECT MAX( T1.Del_Index ) FROM MyTable T1
WHERE T1.DocGroupViewID = T.DocGroupViewID
)
It gets more complex when more than one row can have the same Del_Index, since then you need some way to choose which one to show.
EDIT: wanted to follow up with another option
You can use the RANK() or ROW_NUMBER() functions with a CTE to get more control over the results, as follows:
-- fake a source table
DECLARE #t TABLE (
ID int IDENTITY(1,1) PRIMARY KEY,
Del_Index int,
DocGroupViewID int
)
INSERT INTO #t
SELECT 1, 1 UNION ALL
SELECT 2, 1 UNION ALL
SELECT 3, 1 UNION ALL
SELECT 1, 2 UNION ALL
SELECT 2, 2 UNION ALL
SELECT 2, 2 UNION ALL
SELECT 1, 3 UNION ALL
SELECT 2, 3 UNION ALL
SELECT 3, 3 UNION ALL
SELECT 4, 3
-- show our source
SELECT * FROM #t
-- select using RANK (can have duplicates)
;WITH cteRank AS
(
SELECT
DocGroupViewID,
Del_Index,
ID,
RANK() OVER
(PARTITION BY DocGroupViewID ORDER BY Del_Index DESC)
AS RowRank,
ROW_NUMBER() OVER
(PARTITION BY DocGroupViewID ORDER BY Del_Index DESC)
AS RowNumber
FROM #t
)
SELECT *
FROM cteRank
WHERE RowRank = 1
-- select using ROW_NUMBER
;WITH cteRowNumber AS
(
SELECT
DocGroupViewID,
Del_Index,
ID,
RANK() OVER
(PARTITION BY DocGroupViewID ORDER BY Del_Index DESC)
AS RowRank,
ROW_NUMBER() OVER
(PARTITION BY DocGroupViewID ORDER BY Del_Index DESC)
AS RowNumber
FROM #t
)
SELECT *
FROM cteRowNumber
WHERE RowNumber = 1
If you have ways to sort out ties, just add it to the ORDER BY.

You will have to complicate your query a little bit:
select a.docgroupviewid, a.del_index, a.id from table a
where a.del_index = (select max(b.del_index) from table
where b.docgroupviewid = a.docgroupviewid)

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

Bigquery uncommon elements in array - sql

Related

Query to always return a fix number of rows

Insert a column into a view that will assign a unique ID to each unique Primary Key

Find Top Most AND Lowest In a Table's Group Column

t-SQL Use row number, but on duplicate rows, use the same number

Select Rows with Maximum Column Value group by Another Column

Categories

Resources