TSQL- generate a sequence number for duplicate records

TSQL- generate a sequence number for duplicate records - sql

Using SQL Server 2000, consider a source table with more than 400,000 records.
The task is to select each regno entry with an incrementing on-the-fly rowid or sequence number for those with duplicates or multiple entries. For those which do NOT have duplicate entries in the source table, the rowid should simply be null.
Here's an example of the desired output:
regno rowid
100 1
100 2
100 3
200 null
300 4
300 5
400 null
500 null
600 6
600 7
Question:
What query would do the desired sequence incrementing using TSQL in SQL Server 2000?

If my comment is correct (600 should be 6,7) then have a look at this
DECLARE #Table TABLE(
regno INT,
rowid INT
)
INSERT INTO #Table (regno,rowid) SELECT 100, NULL
INSERT INTO #Table (regno,rowid) SELECT 100, NULL
INSERT INTO #Table (regno,rowid) SELECT 100, NULL
INSERT INTO #Table (regno,rowid) SELECT 200, NULL
INSERT INTO #Table (regno,rowid) SELECT 300, NULL
INSERT INTO #Table (regno,rowid) SELECT 300, NULL
INSERT INTO #Table (regno,rowid) SELECT 400, NULL
INSERT INTO #Table (regno,rowid) SELECT 500, NULL
INSERT INTO #Table (regno,rowid) SELECT 600, NULL
INSERT INTO #Table (regno,rowid) SELECT 600, NULL
DECLARE #TempTable TABLE(
ID INT IDENTITY(1,1),
regno INT
)
INSERT INTO #TempTable (regno)
SELECT regno
FROM #Table
SELECT regno,
CASE
WHEN (SELECT COUNT(1) FROM #TempTable WHERE regno = t.regno) = 1
THEN NULL
ELSE (SELECT COUNT(1) FROM #TempTable WHERE regno = t.regno) - (SELECT COUNT(1) FROM #TempTable WHERE regno = t.regno AND ID > t.ID) +
(SELECT COUNT(1) FROM #TempTable WHERE regno < t.regno AND regno IN (SELECT regno FROM #TempTable GROUP BY regno having COUNT(1) > 1))
END Val
FROM #TempTable t

The query to extract the non-unique records would be
select regno,count(*) from table group by regno having count(*) > 1
I don't know enough about MSSQL to tell you how to generate an incrementing sequence number to update the records that match the query.

Without a temp table:
DECLARE #Table TABLE(
regno INT
)
INSERT INTO #Table (regno) SELECT 100
INSERT INTO #Table (regno) SELECT 100
INSERT INTO #Table (regno) SELECT 100
INSERT INTO #Table (regno) SELECT 200
INSERT INTO #Table (regno) SELECT 300
INSERT INTO #Table (regno) SELECT 300
INSERT INTO #Table (regno) SELECT 400
INSERT INTO #Table (regno) SELECT 500
INSERT INTO #Table (regno) SELECT 600
INSERT INTO #Table (regno) SELECT 600
select regno, null as rowid from #Table group by regno having count(*) = 1
union
select regno, row_number() OVER (ORDER BY a.regno) as rowid
from #table a
where regno in (select regno from #table group by regno having count(*) > 1)
regno rowid
----------- --------------------
100 1
100 2
100 3
200 NULL
300 4
300 5
400 NULL
500 NULL
600 6
600 7
Oops - did not see that you want to do this in SQL 2000 until after posting this ... ignore my query please. In SQL 2000 you need a temp table to generate the sequence.

Related

What is output of this query and Why?

declare #table as table
(
id int identity(1,1), salary int
)
insert into #table values(1000)
insert into #table values(2000)
insert into #table values(5000)
insert into #table values(4000)
insert into #table values(1000)
insert into #table values(8000)
insert into #table values(9000)
insert into #table values(6000)
insert into #table values(1000)
insert into #table values(7000)
insert into #table values(3000)
select
A.salary
from
#table as A
where
(select count(*)
from #table B
where b.salary < A.salary) > 5

It will display the salary which is greater than 5000

In the query, you are self joining a table.
And You are counting number of records from inner table which are higher than current record of outer table.
So it will give you 5 records from table A which are higher than 5 records from inner query
Sub Query select count(*) from #table B where b.salary < A.salary will give you number of records which are less in salary than of table A
And So the result.

your query gonna return
5000
8000
9000
6000
7000
because as per your query it will check if a record is greater then 6 other records of the table or not.
as the inner query counting it for you.
if you remove all entries of 1000 the result will change to
8000
9000
because now only these two records are greater then 6 other records of the tables.

Identifying repeated fields in SQL query

I have an SQL query that returns a column like this:
foo
-----------
1200
1200
1201
1200
1200
1202
1202
1202
It has already been ordered in a specific way, and I would like to perform another query on this result set to ID the repeated data like this:
foo ID
---- ----
1200 1
1200 1
1201 2
1200 3
1200 3
1202 4
1202 4
1202 4
It's important that the second group of 1200 is identified as separate from the first. Every variation of OVER/PARTITION seems to want to lump both groups together. Is there a way to window the partition to only these repeated groups?
Edit:
This is for Microsoft SQL Server 2012

Not sure this will be the fastest results...
select main.num, main.id from
(select x.num,row_number()
over (order by (select 0)) as id
from (select distinct num from num) x) main
join
(select num, row_number() over(order by (select 0)) as ordering
from num) x2 on
x2.num=main.num
order by x2.ordering
Assuming the table "num" has a column "num" that contains your data, in the order-- of course num could be made into a view or a "with" for your original query.
Please see the following sqlfiddle

Here is one way to do this without a CURSOR
-- Create a temporay table
DECLARE #table TABLE
(
SeqID INT IDENTITY(1,1) NOT NULL PRIMARY KEY,
foo INT,
id int null
)
DECLARE #i INT
DECLARE #j INT
DECLARE #k INT
declare #tFoo INT
declare #oldFoo INT
SET #k = 0
set #oldFoo = 0
-- Insert data into the temporary table
INSERT INTO #table(foo)
SELECT 1200
INSERT INTO #table(foo)
SELECT 1200
INSERT INTO #table(foo)
SELECT 1201
INSERT INTO #table(foo)
SELECT 1200
INSERT INTO #table(foo)
SELECT 1200
INSERT INTO #table(foo)
SELECT 1202
INSERT INTO #table(foo)
SELECT 1202
INSERT INTO #table(foo)
SELECT 1202
-- Get the max and min SeqIDs to loop through
SELECT #i = MIN(SeqID) FROM #table
SELECT #j = MAX(SeqID) FROM #table
-- Loop through the temp table using the SeqID indentity column
WHILE (#i <= #j)
BEGIN
SELECT #tFoo = foo FROM #table WHERE SeqID = #i
if #oldFoo <> #tFoo
set #k = #k + 1
update #table set id = #k where SeqID = #i
SET #oldFoo = #tFoo
-- Increment the counter
SET #i = #i + 1
END
SELECT * from #table

You can do it without using cursors but it does not look nice (at least what I came up with). So 1) I assume you have PK column which orders your main values.
Then 2) I assume you have an ID column which you want to set.
create table tbl(foo int, pk int, id int);
insert into tbl(foo, pk) values (1100, 5);
insert into tbl(foo, pk) values (1200, 10);
insert into tbl(foo, pk) values (1200, 20);
insert into tbl(foo, pk) values (1201, 30);
insert into tbl(foo, pk) values (1200, 40);
insert into tbl(foo, pk) values (1200, 50);
insert into tbl(foo, pk) values (1202, 60);
insert into tbl(foo, pk) values (1202, 70);
insert into tbl(foo, pk) values (1202, 80);
insert into tbl(foo, pk) values (1202, 90);
SQL Fiddle here: http://sqlfiddle.com/#!6/fdaaa/2
update tbl
set
ID = 1
update t
set
t.ID = m.RN2
from
tbl t
join
(
select
y1.RN as RN1, y1.PK as PK1,
y2.RN as RN2, y2.PK as PK2
FROM
(
SELECT
ROW_NUMBER() OVER(ORDER BY x.pk1 ASC) AS rn,
x.pk1 AS pk
FROM
(
SELECT t1.pk AS pk1, t2.pk AS pk2
FROM
tbl t1
LEFT JOIN tbl t2 ON
(
(t1.pk < t2.pk AND t1.foo = t2.foo)
AND
(
NOT EXISTS
(
SELECT tMid.pk FROM
tbl tMid WHERE
tMid.pk < t2.pk
AND
tMid.pk > t1.pk
)
)
)
) x WHERE x.pk2 IS NULL
) y1
left join
(
SELECT
ROW_NUMBER() OVER(ORDER BY x.pk1 ASC) AS rn,
x.pk1 AS pk
FROM
(
SELECT t1.pk AS pk1, t2.pk AS pk2
FROM
tbl t1
LEFT JOIN tbl t2 ON
(
(t1.pk < t2.pk AND t1.foo = t2.foo)
AND
(
NOT EXISTS
(
SELECT tMid.pk FROM
tbl tMid WHERE
tMid.pk < t2.pk
AND
tMid.pk > t1.pk
)
)
)
) x WHERE x.pk2 IS NULL
) y2 on y1.RN = y2.RN - 1
) m on
(
(t.pk > m.pk1 and ((m.pk2 is not null ) and (t.pk <= m.pk2)))
-- or
-- (t.pk<=m.pk1)
)

This is my solution using a cursor and a temporary table to hold the results.
DECLARE #foo INT
DECLARE #previousfoo INT = -1
DECLARE #id INT = 0
DECLARE #getid CURSOR
DECLARE #resultstable TABLE
(
primaryId INT IDENTITY(1, 1) NOT NULL PRIMARY KEY,
foo INT,
id int null
)
SET #getid = CURSOR FOR
SELECT originaltable.foo
FROM originaltable
OPEN #getid
FETCH NEXT
FROM #getid INTO #foo
WHILE ##FETCH_STATUS = 0
BEGIN
IF (#foo <> #previousfoo)
BEGIN
SET #id = #id + 1
END
INSERT INTO #resultstable VALUES (#foo, #id)
SET #previousfoo = #foo
FETCH NEXT
FROM #getid INTO #foo
END
CLOSE #getid
DEALLOCATE #getid

How to generate a new code

Using VB.Net and SQL Server
Table1
Id Value .....
1001P0010001 100
1001P0010002 200
1001P0010003 300
1001P0010004 400
...
I have n columns and rows in table1, from the table1, I want to copy all the column details with new id no...
id no is like this 1001P0020001, 1001P0020002, .......
P002 is next id, P003 is the next Id.....
The first 4 digits and last 4 digits will remain as read from table1, middle 4 digits should change to next series
Expected output
Id Value .....
1001P0010001 100
1001P0010002 200
1001P0010003 300
1001P0010004 400
1001P0020001 100
1001P0020002 200
1001P0020003 300
1001P0020004 400
...
Which the best way to do this?
I can do it in VB.Net or a SQL query...? Please suggest ways of doing this.

CREATE TABLE CocoJambo (
Id CHAR(12) NOT NULL,
Value INT NULL,
CHECK( Id LIKE '[0-9][0-9][0-9][0-9][A-Z][0-9][0-9][0-9][0-9][0-9][0-9][0-9]' )
);
GO
CREATE UNIQUE INDEX IUN_CocoJambo_Id
ON CocoJambo (Id);
GO
INSERT CocoJambo (Id, Value)
SELECT '1001P0010001', 100
UNION ALL SELECT '1001P0010002', 200
UNION ALL SELECT '1001P0010003', 300
UNION ALL SELECT '1001P0010004', 400
UNION ALL SELECT '1001P0020001', 100
UNION ALL SELECT '1001P0020002', 200
UNION ALL SELECT '1001P0020003', 300
UNION ALL SELECT '1001P0020004', 400;
GO
-- Test 1: generating a single Id
DECLARE #Prefix CHAR(5),
#Sufix CHAR(4);
SELECT #Prefix = '1001P',
#Sufix = '0001';
BEGIN TRAN
DECLARE #LastGeneratedMiddleValue INT,
#LastValue INT;
SELECT #LastGeneratedMiddleValue = y.MiddleValue,
#LastValue = y.Value
FROM
(
SELECT x.MiddleValue, x.Value,
ROW_NUMBER() OVER(ORDER BY x.MiddleValue DESC) AS RowNum
FROM
(
SELECT CONVERT(INT,SUBSTRING(a.Id,6,3)) AS MiddleValue, a.Value
FROM CocoJambo a WITH(UPDLOCK) -- It will lock the rows (U lock) during transaction
WHERE a.Id LIKE #Prefix+'%'+#Sufix
) x
) y
WHERE y.RowNum=1;
SELECT #LastGeneratedMiddleValue = ISNULL(#LastGeneratedMiddleValue ,0)
SELECT #Prefix
+RIGHT('00'+CONVERT(VARCHAR(3),#LastGeneratedMiddleValue +1),3)
+#Sufix AS MyNewId,
#LastValue AS Value
COMMIT TRAN;
GO
-- Test 2: generating many Id's
BEGIN TRAN
DECLARE #Results TABLE (
Prefix CHAR(5) NOT NULL,
Sufix CHAR(4) NOT NULL,
LastGeneratedMiddleValue INT NOT NULL,
LastValue INT NULL
);
INSERT #Results (Prefix, Sufix, LastGeneratedMiddleValue, LastValue)
SELECT y.Prefix, y.Sufix, y.MiddleValue, y.Value
FROM
(
SELECT x.Prefix, x.MiddleValue, x.Sufix, x.Value,
ROW_NUMBER() OVER(PARTITION BY x.Prefix, x.Sufix ORDER BY x.MiddleValue DESC) AS RowNum
FROM
(
SELECT SUBSTRING(a.Id,1,5) AS Prefix,
CONVERT(INT,SUBSTRING(a.Id,6,3)) AS MiddleValue,
SUBSTRING(a.Id,9,4) AS Sufix,
a.Value
FROM CocoJambo a WITH(UPDLOCK) -- It will lock the rows (U lock) during transaction
) x
) y
WHERE y.RowNum=1;
SELECT r.*,
r.Prefix
+RIGHT('00'+CONVERT(VARCHAR(3),r.LastGeneratedMiddleValue +1),3)
+r.Sufix AS MyNewId,
r.LastValue AS Value
FROM #Results r;
COMMIT TRAN;
GO

insert into table1 (id, value)
select
l +
replicate('0', 3 - lenght(m)) + m +
r,
value
from (
select
left(id, 5) l,
cast(cast(substring(id, 6, 3) as integer) + 1 as varchar(3)) m,
right(id, 4),
value
from table1
) s

"Distinct" column in SQL query

SELECT id, EmpNo
FROM EmployeesTable
EmpNo can be the same for 1 or more records in the results of the above query. I now want to add another column derived from EmpNo(lets call it EmpNo2) but only returning distinct values of EmpNo.
For example if the above query returns 100 records but there are 69 distinct EmpNo values and i modify the query to
SELECT id, EmpNo, Distinct EmpNo2
FROM EmployeesTable EmpNo
,
i want all the 100 rows to be returned but the last column EmpNo2 should return 69 distinct values of EmpNo field.
But as already know, using distinct in that way results into an error but i want to implement such functionality - and a subquery is not helping.
SAMPLE REQUIRED RESULTS
ID EmpNo EmpNo2
1 0T4/HR 0T4/HR
1 0T4/HR 2VP/E
1 0T4/HR xT9/67
1 0T4/HR
1 0T4/HR
2 2VP/E
2 2VP/E
2 2VP/E
2 2VP/E
2 2VP/E
3 XT9/67
3 XT9/67
3 xT9/67
3 XT9/67

How about:
Select id, empno, empno2
from employeestable left outer join (
SELECT min([id]) as minid
,[empno] empno2
FROM [EmployeesTable]
group by empno) etab2 on employeestable.id = etab2.minid
You're saying a subquery won't work, though - why not?

Your requirement is not clear and I also have very little information. Following is what you need. This can be even better but it is just a try.
declare #temp table
(
uniqueid int identity(1, 1),
id int,
empno varchar(50),
empno2 varchar(50)
)
insert into #temp select 1, '0T4/HR', null
insert into #temp select 1, '0T4/HR' , null
insert into #temp select 1 , '0T4/HR' , null
insert into #temp select 1, '0T4/HR' , null
insert into #temp select 1, '0T4/HR' , null
insert into #temp select 2, '2VP/E' , null
insert into #temp select 2, '2VP/E' , null
insert into #temp select 2, '2VP/E' , null
insert into #temp select 2, '2VP/E' , null
insert into #temp select 2, '2VP/E' , null
insert into #temp select 3, 'XT9/67' , null
insert into #temp select 3, 'XT9/67' , null
insert into #temp select 3, 'xT9/67' , null
insert into #temp select 3, 'XT9/67' , null
SELECT ROW_NUMBER() OVER (ORDER BY id) AS id, empno into #temp FROM #temp group by empno, id
update #temp set empno2 = t2.empno
from #temp t inner join #temp t2 on t.uniqueid = t2.id
select * from #temp
drop table #temp

SQL Group By Modulo of Row Count

I have the following sample data:
Id Name Quantity
1 Red 1
2 Red 3
3 Blue 1
4 Red 1
5 Yellow 3
So for this example, there are a total of 5 Red, 1 Blue, and 3 Yellow. I am looking for a way to group them by Color, but with a maximum of 2 items per group (sorting is not important). Like so:
Name QuantityInPackage
Red 2
Red 2
Red 1
Blue 1
Yellow 2
Yellow 1
Any suggestions on how to accomplish this using T-SQL on MS-SQL 2005?

I would define a table containing sequential numbers, say 1 to 1000 and join that table (unless your database supports generating these numbers in the query like Oracle using CONNECT BY):
Table num
n
1
2
3
...
I tried the following query using Oracle (should work with TSQL too):
With summed_colors As (
Select name, Sum(quantity) quantity
From colors
Group By name
)
Select
name,
Case When n*2-1 = quantity Then 1 Else 2 End quantityInPackage
From summed_colors
Join nums On ( n*2-1 <= quantity )
Order By name, quantityInPackage Desc
and it returns
Blue 1
Red 2
Red 2
Red 1
Yellow 2
Yellow 1

You need to use a numbers table to unpivot your data to make multiple rows:
DECLARE #PackageSize AS int
SET #PackageSize = 2
DECLARE #numbers AS TABLE (Number int)
INSERT INTO #numbers
VALUES (1)
INSERT INTO #numbers
VALUES (2)
INSERT INTO #numbers
VALUES (3)
INSERT INTO #numbers
VALUES (4)
INSERT INTO #numbers
VALUES (5)
INSERT INTO #numbers
VALUES (6)
INSERT INTO #numbers
VALUES (7)
INSERT INTO #numbers
VALUES (8)
INSERT INTO #numbers
VALUES (9)
INSERT INTO #numbers
VALUES (10)
DECLARE #t AS TABLE
(
Id int
,Nm varchar(6)
,Qty int
)
INSERT INTO #t
VALUES (1, 'Red', 1)
INSERT INTO #t
VALUES (2, 'Red', 3)
INSERT INTO #t
VALUES (3, 'Blue', 1)
INSERT INTO #t
VALUES (4, 'Red', 1)
INSERT INTO #t
VALUES (5, 'Yellow', 3) ;
WITH Totals
AS (
SELECT Nm
,SUM(Qty) AS TotalQty
,SUM(Qty) / #PackageSize AS NumCompletePackages
,SUM(Qty) % #PackageSize AS PartialPackage
FROM #t
GROUP BY Nm
)
SELECT Totals.Nm
,#PackageSize AS QuantityInPackage
FROM Totals
INNER JOIN #numbers AS numbers
ON numbers.Number <= Totals.NumCompletePackages
UNION ALL
SELECT Totals.Nm
,PartialPackage AS QuantityInPackage
FROM Totals
WHERE PartialPackage <> 0

It's not grouping or modulo/division that's the hard part here, it's the fact that you need to do an aggregate (sum) and then explode the data again. There aren't actually any "Red 2" rows, you have to create them somehow.
For SQL Server 2005+, I would probably use a function do the "exploding":
CREATE FUNCTION dbo.CreateBuckets
(
#Num int,
#MaxPerGroup int
)
RETURNS TABLE
AS RETURN
WITH First_CTE AS
(
SELECT CASE
WHEN #MaxPerGroup < #Num THEN #MaxPerGroup
ELSE #Num
END AS Seed
),
Sequence_CTE AS
(
SELECT Seed AS [Current], Seed AS Total
FROM First_CTE
UNION ALL
SELECT
CASE
WHEN (Total + #MaxPerGroup) > #Num THEN (#Num - Total)
ELSE #MaxPerGroup
END,
Total + #MaxPerGroup
FROM Sequence_CTE
WHERE Total < #Num
)
SELECT [Current] AS Num
FROM Sequence_CTE
Then, in the main query, group (sum) the data first and then use the bucket function:
WITH Totals AS
(
SELECT Name, SUM(Quantity) AS Total
FROM Table
GROUP BY Name
)
SELECT Name, b.Num AS QuantityInPackage
FROM Totals
CROSS APPLY dbo.CreateBuckets(Total, 2) b
This should work for any bucket size, doesn't have to be 2 (just change the parameter).

This is very crude, but it works.
CREATE TABLE #Colors
(
Id int,
Name varchar(50),
Quantity int
)
INSERT INTO #Colors VALUES (1, 'Red', 1)
INSERT INTO #Colors VALUES (2, 'Red', 3)
INSERT INTO #Colors VALUES (3, 'Blue', 1)
INSERT INTO #Colors VALUES (4, 'Red', 1)
INSERT INTO #Colors VALUES (5, 'Yellow', 3)
INSERT INTO #Colors VALUES (6, 'Green', 2)
SELECT
Name,
SUM(Quantity) AS TotalQuantity
INTO #Summed
FROM
#Colors
GROUP BY
Name
SELECT
Name,
TotalQuantity / 2 AS RecordsWithQuantity2,
TotalQuantity % 2 AS RecordsWithQuantity1
INTO #SortOfPivot
FROM
#Summed
ORDER BY
Name
DECLARE #RowCount int
SET #RowCount = (SELECT COUNT(*) FROM #SortOfPivot)
DECLARE #Name varchar(50)
DECLARE #TwosInsertCount int
DECLARE #OnesInsertCount int
CREATE TABLE #Result (Name varchar(50), Quantity int)
WHILE #RowCount > 0
BEGIN
SET #Name = (SELECT TOP 1 Name FROM #SortOfPivot)
SET #TwosInsertCount = (SELECT TOP 1 RecordsWithQuantity2 FROM #SortOfPivot)
SET #OnesInsertCount = (SELECT TOP 1 RecordsWithQuantity1 FROM #SortOfPivot)
WHILE #TwosInsertCount > 0
BEGIN
INSERT INTO #Result (Name, Quantity) VALUES (#Name, 2)
SET #TwosInsertCount = #TwosInsertCount - 1
END
WHILE #OnesInsertCount > 0
BEGIN
INSERT INTO #Result (Name, Quantity) VALUES (#Name, 1)
SET #OnesInsertCount = #OnesInsertCount - 1
END
DELETE FROM #SortOfPivot WHERE Name = #Name
SET #RowCount = (SELECT COUNT(*) FROM #SortOfPivot)
END
SELECT * FROM #Result
DROP TABLE #Colors
DROP TABLE #Result
DROP TABLE #Summed
DROP TABLE #SortOfPivot

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

TSQL- generate a sequence number for duplicate records - sql

The query to extract the non-unique records would be select regno,count() from table group by regno having count() > 1 I don't know enough about MSSQL to tell you how to generate an incrementing sequence number to update the records that match the query.

Related

What is output of this query and Why?

Identifying repeated fields in SQL query

How to generate a new code

"Distinct" column in SQL query

SQL Group By Modulo of Row Count

Categories

Resources

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

TSQL- generate a sequence number for duplicate records - sql

The query to extract the non-unique records would be select regno,count(*) from table group by regno having count(*) > 1 I don't know enough about MSSQL to tell you how to generate an incrementing sequence number to update the records that match the query.

Related

What is output of this query and Why?

Identifying repeated fields in SQL query

How to generate a new code

"Distinct" column in SQL query

SQL Group By Modulo of Row Count

Categories

Resources

The query to extract the non-unique records would be select regno,count() from table group by regno having count() > 1 I don't know enough about MSSQL to tell you how to generate an incrementing sequence number to update the records that match the query.