How to set partition id/name for row partitions in SQL Server? - sql

How to set partition id/name for row partitions in SQL Server?
name surname val
a b 10
c d 2
a b 11
a b 13
result (partitioned by name and surname):
name surname val rowno partitionid
a b 10 1 1
a b 11 2 1
a b 13 3 1
c d 2 1 2

DECLARE #table TABLE( name CHAR(1) , surname CHAR(1) , val TINYINT )
INSERT INTO #table
VALUES ( 'a' , 'b' , 10 )
, ( 'c' , 'd' , 2 )
, ('a' , 'b' , 11 )
, ( 'a' , 'b' , 13 )
SELECT * FROM #table
SELECT *
, ROW_NUMBER() OVER ( PARTITION BY name, surname ORDER BY val ) as rowno
, DENSE_RANK() OVER ( ORDER BY name ) as partitionid
FROM #table
Regards!

The dense_rank window function seems to fit the bill:
SELECT *,
DENSE_RANK() OVER (PARTITION BY name, surname ORDER BY val) AS rowno,
DENSE_RANK() OVER (ORDER BY name, surname) AS partitionid
FROM mytable

Related

Fill NULL value with progressive row_number over partition function

What I have
From the following #MyTable I just have Name and Number columns.
My goal is to fill the valus where Number = NULL with a progressive number and get the values I have wrote into the Desidered_col column.
+------+--------+---------------+
| Name | Number | Desidered_col |
+------+--------+---------------+
| John | 1 | 1 |
| John | 2 | 2 |
| John | 3 | 3 |
| John | NULL | 4 |
| John | NULL | 5 |
| John | 6 | 6 |
| Mike | 1 | 1 |
| Mike | 2 | 2 |
| Mike | NULL | 3 |
| Mike | 4 | 4 |
| Mike | 5 | 5 |
| Mike | 6 | 6 |
+------+--------+---------------+
What I have tried
I have tried with the following query
SELECT Name, Number, row_number() OVER(PARTITION BY [Name] ORDER BY Number ASC) AS rn
FROM #MyTable
but it put all the NULL values first and then count the rows.
How can I fill the empty values?
Why I don't think is a duplicate question
I have read this question and this question but I don't think it is duplicate because they don't consider the PARTITION BY construct.
This is the script to create and populate the table
SELECT *
INTO #MyTable
FROM (
SELECT 'John' AS [Name], 1 AS [Number], 1 AS [Desidered_col] UNION ALL
SELECT 'John' AS [Name], 2 AS [Number], 2 AS [Desidered_col] UNION ALL
SELECT 'John' AS [Name], 3 AS [Number], 3 AS [Desidered_col] UNION ALL
SELECT 'John' AS [Name], NULL AS [Number], 4 AS [Desidered_col] UNION ALL
SELECT 'John' AS [Name], NULL AS [Number], 5 AS [Desidered_col] UNION ALL
SELECT 'John' AS [Name], 6 AS [Number], 6 AS [Desidered_col] UNION ALL
SELECT 'Mike' AS [Name], 1 AS [Number], 1 AS [Desidered_col] UNION ALL
SELECT 'Mike' AS [Name], 2 AS [Number], 2 AS [Desidered_col] UNION ALL
SELECT 'Mike' AS [Name], NULL AS [Number], 3 AS [Desidered_col] UNION ALL
SELECT 'Mike' AS [Name], 4 AS [Number], 4 AS [Desidered_col] UNION ALL
SELECT 'Mike' AS [Name], 5 AS [Number], 5 AS [Desidered_col] UNION ALL
SELECT 'Mike' AS [Name], 6 AS [Number], 6 AS [Desidered_col]
) A
This query is a bit complicated but seems to return your expected result. The only case it may be wrong is when someone does not have Number = 1.
The idea is that you must find gaps between numbers and count how many null values can be used to fill them.
Sample data
create table #myTable (
[Name] varchar(20)
, [Number] int
)
insert into #myTable
insert into #myTable
SELECT 'John' AS [Name], 1 AS [Number] UNION ALL
SELECT 'John' AS [Name], 2 AS [Number]UNION ALL
SELECT 'John' AS [Name], 3 AS [Number] UNION ALL
SELECT 'John' AS [Name], NULL AS [Number] UNION ALL
SELECT 'John' AS [Name], NULL AS [Number] UNION ALL
SELECT 'John' AS [Name], 6 AS [Number] UNION ALL
SELECT 'Mike' AS [Name], 1 AS [Number] UNION ALL
SELECT 'Mike' AS [Name], 2 AS [Number] UNION ALL
SELECT 'Mike' AS [Name], NULL AS [Number] UNION ALL
SELECT 'Mike' AS [Name], 4 AS [Number] UNION ALL
SELECT 'Mike' AS [Name], 5 AS [Number] UNION ALL
SELECT 'Mike' AS [Name], 6 AS [Number]
Query
;with gaps_between_numbers as (
select
t.Name, cnt = t.nextNum - t.Number - 1, dr = dense_rank() over (partition by t.Name order by t.Number)
, rn = row_number() over (partition by t.Name order by t.Number)
from (
select
Name, Number, nextNum = isnull(lead(Number) over (partition by Name order by number), Number + 1)
from
#myTable
where
Number is not null
) t
join master.dbo.spt_values v on t.nextNum - t.Number - 1 > v.number
where
t.nextNum - t.Number > 1
and v.type = 'P'
)
, ordering_nulls as (
select
t.Name, dr = isnull(q.dr, 2147483647)
from (
select
Name, rn = row_number() over (partition by Name order by (select 1))
from
#myTable
where
Number is null
) t
left join gaps_between_numbers q on t.Name = q.Name and t.rn = q.rn
)
, ordering_not_null_numbers as (
select
Name, Number, rn = dense_rank() over (partition by Name order by gr)
from (
select
Name, Number, gr = sum(lg) over (partition by Name order by Number)
from (
select
Name, Number, lg = iif(Number - lag(Number) over (partition by Name order by Number) = 1, 0, 1)
from
#myTable
where
Number is not null
) t
) t
)
select
Name, Number
, Desidered_col = row_number() over (partition by Name order by rn, isnull(Number, 2147483647))
from (
select * from ordering_not_null_numbers
union all
select Name, null, dr from ordering_nulls
) t
CTE gaps_between_numbers is seeking for numbers that are not consecutive. Number difference between current and next row shows how many NULL values can be used to fill the gaps. Then master.dbo.spt_values is used to multiply each row by that amount. In gaps_between_numbers dr column is gap number and cnt is amount of NULL values that need to used.
ordering_nulls orders only NULL values and is joined with CTE gaps_between_numbersto know in which position each row should appear.
ordering_not_null_numbers orders values that are not NULL. Consecutive Numbers will have same row number
And last step is to union CTE's ordering_not_null_numbers and ordering_nulls and make desired ordering
Rextester DEMO
In order to do this, you need a column that specifies the order of the rows in the table. You can do this using the identity() function:
SELECT identity(int, 1, 1) as MyTableId, a.*
INTO #MyTable
. . .
I'm pretty sure SQL Server will follow the ordering of a values() statement and in practice will follow the ordering of a union all. You can explicitly put this column in each row, if you prefer.
Then you can use this to assign your value:
select t.*,
row_number() over (partition by name order by mytableid) as desired_col
from #MyTable
You could also assign the new ranking based on Desidered_col using row_number() function with ORDER BY clause (select 1 or select null)
select *,
row_number() over (partition by Name order by (select 1)) New_Desidered_col
from #MyTable

Conditional selection of RowNum in SQL

I have written a query which returns me following data.
ID EmpFirstName EmpLastName RowNum
1 X Y 1
2 A B 1
3 A B 2
Now I want all records where RowNum is >1. For example, in this case I need 2 and 3 record in output.
If I put condition RowNum >1 then I will get only third record but I want 2 as well.
Assuming your query is this:
select ID, EmpFirstName, EmpLastName,
ROW_NUMBER() OVER (PARTITION BY EmpFirstName, EmpLastName ORDER BY ID) AS RowNum
FROM aTable
This is a classic query used to filter out any duplicate values.
In order effectively select all the records with the duplicate values I can suggest using the COUNT() window function:
;with a as (
select ID, EmpFirstName, EmpLastName,
ROW_NUMBER() OVER (PARTITION BY EmpFirstName, EmpLastName ORDER BY ID) AS RowNum,
COUNT(*) OVER (PARTITION BY EmpFirstName, EmpLastName) AS cnt
FROM aTable
)
SELECT * FROM a where cnt > 1
ORDER BY EmpFirstName, EmpLastName
To test it use this query:
drop table #tmp
CREATE table #tmp (ID int , EmpFirstName varchar(10) , EmpLastName varchar(10))
go
INSERT INTO #tmp VALUES
(1,'X','Y' )
,(2,'A','B')
,(3,'A','B')
,(4,'A','C')
,(5,'B','C')
,(6,'B','C')
;with a as (
select ID, EmpFirstName, EmpLastName,
ROW_NUMBER() OVER (PARTITION BY EmpFirstName, EmpLastName ORDER BY ID) AS RowNum,
COUNT(id) OVER (PARTITION BY EmpFirstName, EmpLastName) AS cnt
FROM #tmp
)
SELECT * FROM a where cnt > 1
ORDER BY EmpFirstName, EmpLastName
Result:
ID EmpFirstName EmpLastName RowNum cnt
----------- ------------ ----------- -------------------- -----------
2 A B 1 2
3 A B 2 2
5 B C 1 2
6 B C 2 2
I make sample data and use this query
CREATE table #tmp (ID int , EmpFirstName varchar(10) , EmpLastName varchar(10) ,RowNum int)
INSERT INTO #tmp VALUES
(1,'X','Y',1)
,(2,'A','B',1)
,(3,'A','B',2)
SELECT ID,EmpFirstName,EmpLastName,RowNum
FROM (
SELECT *
,ROW_NUMBER() OVER (ORDER BY ID) AS [NEWrownum]
FROM #tmp
) q
WHERE q.NEWrownum > 1
try this,
DECLARE #Result TABLE (ID INT, EmpFirstName VARCHAR(10), EmpLastName VARCHAR(10), RowNum INT)
INSERT INTO #Result
VALUES
(1, 'X', 'Y', 1)
,(2, 'A', 'B', 1)
,(3, 'A', 'B', 2)
SELECT r1.*
FROM #Result r1
INNER JOIN (SELECT * -- get duplicate records
FROM #Result
WHERE RowNum = 2
) as r2 ON r1.EmpFirstName = r2.EmpFirstName
AND r1.EmpLastName = r2.EmpLastName

Unpivot multiple columns not showing desire result

Original
RecordKey Name Section1_Product Section1_Code Section2_Product Section2_Code ......
1 a ff 22
2 b gg 22
3 c hh 33
RecordKey Name Section Product Code ......
1 a 1 ff 22
1 a 2
2 b 1 gg 22
2 b 2
3 c 1 hh 22
3 c 2
I am trying to unpivot the columns into rows. Some sections will have null value.
SELECT RecordKey
,Name
,'Num_of_Sections' = ROW_NUMBER() OVER (PARTITION BY RecordKey ORDER BY ID)
,Product
,Code
FROM (
SELECT RecordKey, Name, Section1_Product, Section1_Code, Section2_Product, Section2_Code FROM Table
) M
UNPITVOT (
Product FOR ID IN (Section1_Product, Section2_Product)
) p
UNPIVOT (
Code FOR CO IN (Section1_Code, Section2_Code)
) c
If I execute with only one column (Product, comment out Code) then I will have 2 values in ID column (1,2). If I run the query with 2 columns then I get 4 values in ID column(1, 2, 3, 4).
may as per my assumption and your data provided we can achieve this using Cross apply and Row_number
declare #Record TABLE
([RecordKey] int,
[Name] varchar(1),
[Section1_Product] varchar(2),
[Section1_Code] int,
[Section2_Product] varchar(2),
[Section2_Code] int)
;
INSERT INTO #Record
([RecordKey], [Name], [Section1_Product], [Section1_Code],[Section2_Product],[Section2_Code])
VALUES
(1, 'a', 'ff', 22,NULL,NULL),
(2, 'b', 'gg', 22,NULL,NULL),
(3, 'c', 'hh', 33,NULL,NULL)
;
With cte as (
Select T.RecordKey,
T.Name,
T.val,
T.val1 from (
select RecordKey,Name,val,val1 from #Record
CROSS APPLY (VALUES
('Section1_Product',Section1_Product),
('Section2_Product',Section2_Product))cs(col,val)
CROSS APPLY (VALUES
('Section1_Code',Section1_Code),
('Section2_Code',Section2_Code))css(col1,val1)
WHERE val is NOT NULL)T
)
Select c.RecordKey,
c.Name,
c.RN,
CASE WHEN RN = 2 THEN NULL ELSE c.val END Product,
c.val1 Code
from (
Select RecordKey,
Name,
ROW_NUMBER()OVER(PARTITION BY val ORDER BY (SELECT NULL))RN,
val,
val1 from cte )C

Filter unique records from a database while removing double not-null values

This is kind of hard to explain in words but here is an example of what I am trying to do in SQL. I have a query which returns the following records:
ID Z
--- ---
1 A
1 <null>
2 B
2 E
3 D
4 <null>
4 F
5 <null>
I need to filter this query so that each unique record (based on ID) appears only once in the output and if there are multiple records for the same ID, the output should contain the record with the value of Z column being non-null. If there is only a single record for a given ID and it has value of null for column Z the output still should return that record. So the output from the above query should look like this:
ID Z
--- ---
1 A
2 B
2 E
3 D
4 F
5 <null>
How would you do this in SQL?
You can use GROUP BY for that:
SELECT
ID, MAX(Z) -- Could be MIN(Z)
FROM MyTable
GROUP BY ID
Aggregate functions ignore NULLs, returning them only when all values on the group are NULL.
If you need to return both 2-B and 2-E rows:
SELECT *
FROM YourTable t1
WHERE Z IS NOT NULL
OR NOT EXISTS
(SELECT * FROM YourTable t2
WHERE T2.ID = T1.id AND T2.z IS NOT NULL)
SELECT ID
,Z
FROM YourTable
WHERE Z IS NOT NULL
DECLARE #T TABLE ( ID INT, Z CHAR(1) )
INSERT INTO #T
( ID, Z )
VALUES ( 1, 'A' ),
( 1, NULL )
, ( 2, 'B' ) ,
( 2, 'E' ),
( 3, 'D' ) ,
( 4, NULL ),
( 4, 'F' ),
( 5, NULL )
SELECT *
FROM #T
; WITH c AS (SELECT ID, r=COUNT(*) FROM #T GROUP BY ID)
SELECT t.ID, Z
FROM #T t JOIN c ON t.ID = c.ID
WHERE c.r =1
UNION ALL
SELECT t.ID, Z
FROM #T t JOIN c ON t.ID = c.ID
WHERE c.r >=2
AND z IS NOT NULL
This example assumes you want two rows returned for ID = 2.
with tmp (id, cnt_val) as
(select id,
sum(case when z is not null then 1 else 0 end)
from t
group by id)
select t.id, t.z
from t
inner join tmp on t.id = tmp.id
where tmp.cnt_val > 0 and t.z is not null
or tmp.cnt_val = 0 and t.z is null
WITH CTE
AS (
SELECT id
,z
,ROW_NUMBER() OVER (
PARTITION BY id ORDER BY coalesce(z, '') DESC
) rn
FROM #T
)
SELECT id
,z
FROM CTE
WHERE rn = 1

Problem in counting nulls and then merging them with the existing rows

Input:
ID groupId RowID Data
1 1 1 W
2 1 1 NULL
3 1 1 NULL
4 1 1 Z
5 1 2 NULL
6 1 2 NULL
7 1 2 X
8 1 2 NULL
9 1 3 NULL
10 1 3 NULL
11 1 3 Y
12 1 3 NULL
Expected Output
GroupId NewData
1 2Y1,2X1,W2Z
For every Null there will be a numeric count. That is if there are two nulls then the numeric value will be 2.
The ddl is as under
DECLARE #t TABLE(ID INT IDENTITY(1,1) , GroupId INT, RowID INT, Data VARCHAR(10))
INSERT INTO #t (GroupId, RowID,DATA)
SELECT 1,1,'W' UNION ALL SELECT 1,1,NULL UNION ALL SELECT 1,1,NULL UNION ALL SELECT 1,1,'Z' UNION ALL SELECT 1,2,NULL UNION ALL
SELECT 1,2,NULL UNION ALL SELECT 1,2,'X' UNION ALL SELECT 1,2,NULL UNION ALL SELECT 1,3,NULL UNION ALL SELECT 1,3,NULL UNION ALL
SELECT 1,3,'Y' UNION ALL SELECT 1,3,NULL
select * from #t
My version is as under but not the correct output
;with t as (
select GroupID, id, RowID, convert(varchar(25), case when Data is null then '' else Data end) Val,
case when Data is null then 1 else 0 end NullCount from #t where id = 1
union all
select t.GroupID, a.id,a.RowID, convert(varchar(25), Val +
case when Data is not null or (t.RowID <> a.RowID and NullCount > 0) then ltrim(NullCount) else '' end +
case when t.RowID <> a.RowID then ',' else '' end + isnull(Data, '')),
case when Data is null then NullCount + 1 else 0 end NullCount
from t inner join #t a on t.GroupID = a.GroupID and t.id + 1 = a.id
)
select GroupID, Data = Val + case when NullCount > 0 then ltrim(NullCount) else '' end from t
where id = (select max(id) from #t where GroupID = t.GroupId)
Is yielding the below output
GroupID Data
1 W2Z,2X1,3Y1
Please help me out
Thanks in advance
Kind of messy and most likely can be improved
;With RawData AS
(
select * from #t
)
,Ranked1 as
(
select *, RANK() OVER (PARTITION BY GroupId, RowID ORDER BY ID, GroupId, RowID) R from #t
)
,Ranked2 as
(
select *, R - RANK() OVER (PARTITION BY GroupId, RowID ORDER BY ID, GroupId, RowID) R2 from Ranked1
where Data is null
)
,Ranked3 as
(
select MIN(ID) as MinID, GroupId, RowID, R2, COUNT(*) C2 from Ranked2
group by GroupId, RowID, R2
)
,Ranked4 as
(
select RD.ID, RD.GroupId, RD.RowID, ISNULL(Data, C2) as C3 from RawData RD
left join Ranked3 R3 on RD.ID = R3.MinID and RD.GroupId = R3.GroupId and RD.RowID = R3.RowID
where ISNULL(Data, C2) is not null
)
,Grouped as
(
select GroupId, RowID,
(
select isnull(C3, '') from Ranked4 as R41
where R41.GroupId = R42.GroupId and R41.RowID = R42.RowID
order by GroupId, RowID for xml path('')
) as C4
from Ranked4 as R42
group by GroupId, RowID
)
select GroupId,
stuff((
select ',' + C4 from Grouped as G1
where G1.GroupId = G2.GroupId
order by GroupId for xml path('')
), 1, 1, '')
from Grouped G2
group by GroupId