SQL Create Row Number, skip certain row numbers - sql

I am trying to create row numbers in SQL but do not want to use certain numbers.
Example Illustration:
Bob, 100
Matt, 120
Dan, 150
Bill, 156
Tim, 175
Normally in SQL....if I were to use the Row_Number() function...it would count - 1, 2, 3, 4, 5.
I would instead like to skip certain numbers so that it counts - 1, 2, 5, 6, 7
Below is my code
SELECT
CASE
WHEN clean_client_nm = 'Total Opportunities'
THEN 0
ELSE ROW_NUMBER() OVER (ORDER BY clean_ft_ee DESC)
END AS row_id,
prod,
client_nm,
clean_ft_ee,
ISNULL(Med, ''),
ISNULL(Stop, ''),
ISNULL(Den, ''),
ISNULL(VIS, ''),
ISNULL(Life, ''),
ISNULL(STD, ''),
ISNULL(LTD, ''),
ISNULL(Worksite, ''),
client_sic
FROM
(SELECT * FROM TOTAL
UNION ALL
SELECT * FROM BASE) AS X
ORDER BY
CASE
WHEN clean_client_nm = 'Total Opportunities'
THEN 0
ELSE 1
END,
clean_ft_ee DESC, client_nm

Answer is to create a temp/variable table and insert the specified values a user needs and then to use the FirstCursor logic to sequentially update the row_id fields in the table needed.
declare FirstCursor cursor global for select row_num from #row_num
open FirstCursor
while #count > 0
begin
fetch FirstCursor into #row_id;
with top1 as (
select top 1 lives, row_id
from #singlecovclient
where row_id is null order by lives desc)
update top1 set row_id = #row_id
set #count = #count - 1
end
close FirstCursor
deallocate FirstCursor

Related

How to find the gap sequence in non identity column in SQL Server

I have more than ten thousand rows in a table like below, and I want find out the missing gaps in the sequence number in that list
CI-480-1617
CI-481-1617
CI-482-1617
CI-483-1617
CI-484-1617
CI-485-1617
CI-486-1617
CI-487-1617
CW-095-1617
Can you help me?
Thanks
Without sample data, I cannot test the solution, but something like this might help you in the current situation:
WITH CTE AS -- end previous statement with semi colon
(
SELECT SUBSTRING (ColumnA, 4,3) AS SeqNumb,
ROW_NUMBER() OVER (PARTITION BY SUBSTRING (ColumnA, 4,3) ORDER BY (SELECT 1)) AS RowNumb
FROM TableA
)
SELECT C.*, C2.RowNumb - C.RowNumb AS Gap
FROM CTE AS C
LEFT JOIN CTE AS C2 ON C.RowNumb = C2.RowNumb - 1
This solution is based on some assumptions and may help you:
the first part of every row is SECTION
the second part of every row is ID
Table creation:
CREATE TABLE [dbo].[Gaps] (
[Text] nvarchar(50) NOT NULL
);
INSERT [dbo].[Gaps] ([Text])
VALUES
('CI-480-1617'),
('CI-481-1617'),
('CI-482-1617'),
('CI-483-1617'),
('CI-484-1617'),
('CI-485-1617'),
('CI-486-1617'),
('CI-487-1617'),
('CW-095-1617');
Finding gaps:
WITH IDS (Section, Nmr) AS (
SELECT
SUBSTRING([Text], 1, CHARINDEX('-', [Text]) - 1),
CONVERT(int, SUBSTRING([Text], CHARINDEX('-', [Text]) + 1, CHARINDEX('-', [Text], CHARINDEX('|', [Text]))))
FROM Gaps
UNION ALL
SELECT
DISTINCT SUBSTRING([Text], 1, CHARINDEX('-', [Text]) - 1),
0
FROM Gaps
-- Uncomment next lines if you want to get the gap to some MAX value
--UNION ALL
--SELECT
-- DISTINCT SUBSTRING([Text], 1, CHARINDEX('-', [Text]) - 1),
-- 1000
--FROM Gaps
)
SELECT Section, StartNmr = cur + 1, EndNmr = nxt - 1
FROM (
SELECT
Section,
cur = Nmr,
nxt = (
SELECT MIN(B.Nmr)
FROM IDS AS B
WHERE B.Section = A.Section AND B.Nmr > A.Nmr
)
FROM IDS AS A
) AS D
WHERE nxt - cur > 1
ORDER BY Section, StartNmr
Output (without MAX value):
-----------------------
Section StartNmr EndNmr
-----------------------
CI 1 479
CW 1 94
Output (with MAX value):
-----------------------
Section StartNmr EndNmr
-----------------------
CI 1 479
CI 488 999
CW 1 94
CW 96 999

How to remove similar text from another value

I have a large dataset, millions of rows, containing various people and items they're associated with. In many cases, these peoples' names are present in the item name as well. I would like to find the shortest substring of item name in which the owner name, or parts of their name, are no longer present.
A sample of the data is as follows:
CREATE TABLE test ([ID] nvarchar(255), [OWNER] nvarchar(255), [ITEM] nvarchar(255))
INSERT INTO test
SELECT '1','A B C','A B X X X'
UNION ALL
SELECT '2','ABC DEF','XABCD XX X'
UNION ALL
SELECT '2','ABC DEF','YABCD X X'
UNION ALL
SELECT '3','X X X X','YPD X X'
UNION ALL
SELECT '4','XYZ','X X X'
UNION ALL
SELECT '5','A B C','OOO PPP QQQ'
With ideal output being:
ID | OWNER | ITEM | SHORT ITEM
1 | A B C | A B X X X | X X X
2 | ABC DEF | XABCD XX X | XX X
2 | ABC DEF | YABCD X X | X X
3 | X X X X | YPD X X | X X
4 | XYZ | X X X | X X X
5 | A B C | OOO PPP DDD| PPP QQQ
This output includes a couple of cases in which I wanted to remove something from the item name which was not the owner's name, and so I have hardcoded that into the query. I've written the following query:
;WITH p1 as( --Retrieving first word of ITEM and ITEM minus first word
SELECT SUBSTRING([ITEM],1,
case when CHARINDEX(' ',[ITEM])=0 then LEN([ITEM]) --When no space in ITEM, return ITEM
else CHARINDEX(' ', [ITEM]) -1 end) as w1p --Return the first word separated by space
,SUBSTRING([ITEM],CHARINDEX(' ',[ITEM])+1,100) as m1p --Return everything minus the first word
,[ITEM]
,[ID]
,[OWNER]
FROM test
),p2 as( --Retrieving second word of ITEM and ITEM minus second word
SELECT SUBSTRING(m1p,1,
case when CHARINDEX(' ',m1p)=0 then LEN(m1p)
else CHARINDEX(' ',m1p) -1 end) as w2p
,SUBSTRING(m1p,CHARINDEX(' ',m1p)+1,100) as m2p
,[ITEM]
,[ID]
,[w1p]
,[m1p]
FROM p1
),p3 as( --Retrieving third word of ITEM and ITEM minus third word
SELECT SUBSTRING(m2p,1,
case when CHARINDEX(' ',m2p)=0 then LEN(m2p)
else CHARINDEX(' ',m2p) -1 end) as w3p
,SUBSTRING(m2p,CHARINDEX(' ',m2p)+1,100) as m3p
,*
FROM p2
),p4 as( --Retrieving fourth word of ITEM and ITEM minus fourth word
SELECT SUBSTRING(m3p,1,
case when CHARINDEX(' ',m3p)=0 then LEN(m3p)
else CHARINDEX(' ',m3p) -1 end) as w4p
,SUBSTRING(m3p,CHARINDEX(' ',m3p)+1,100) as m4p
,*
FROM p3
),m1 as( --Retrieving first word of OWNER and OWNER minus first word
SELECT SUBSTRING([OWNER],1,
CASE WHEN CHARINDEX(' ',[OWNER])=0 THEN LEN([OWNER])
ELSE CHARINDEX(' ',[OWNER])-1 end) as w1m
,SUBSTRING([OWNER],CHARINDEX(' ',[OWNER])+1,100) as m1m
,[OWNER]
,[ID]
FROM p1
GROUP BY [OWNER], [ID]
),m2 as( --Retrieving second word of OWNER and OWNER minus second word
SELECT SUBSTRING(m1m,1,
case when CHARINDEX(' ', m1m) = 0 then LEN(m1m)
else CHARINDEX(' ', m1m) -1 end) as w2m
,SUBSTRING(m1m,CHARINDEX(' ',m1m)+1,100) as m2m
,*
FROM m1
),m3 as( --Retrieving third word of OWNER and OWNER minus third word
SELECT SUBSTRING(m2m,1,
case when CHARINDEX(' ', m2m) = 0 then LEN(m2m)
else CHARINDEX(' ', m2m) -1 end) as w3m
,SUBSTRING(m2m,CHARINDEX(' ',m2m)+1,100) as m3m
,*
FROM m2
),m4 as( --Retrieving fourth word of OWNER
SELECT SUBSTRING(m3m,1,
case when CHARINDEX(' ', m3m) = 0 then LEN(m3m)
else CHARINDEX(' ', m3m) -1 end) as w4m
,*
FROM m3
),ms as( --Adding special cases not caught by regular query
SELECT CASE WHEN [ID] IN ('3','5') THEN
CASE WHEN [ID] = '3' THEN 'YPD'
WHEN [ID] = '5' THEN 'OOO'
ELSE NULL END
ELSE NULL END as SPEC
,*
FROM m4
)
SELECT m.[ID] --Finding closest shortname
,m.[OWNER]
,p.[ITEM]
,CASE WHEN SUBSTRING(p.[ITEM],1,LEN(m.SPEC)) = SPEC AND SPEC IS NOT NULL THEN LTRIM(SUBSTRING(p.[ITEM],LEN(m.SPEC)+1,100)) --If hardcoded phrase in ITEM, return ITEM minus that phrase
WHEN p.w1p LIKE '%'+m.w1m+'%' AND p.w2p NOT LIKE '%'+m.w2m+'%' AND p.w3p NOT LIKE '%'+m.w3m+'%' AND p.w4p NOT LIKE '%'+m.w4m+'%' THEN p.m1p --If first word of ITEM match first of OWNER, return ITEM minus first
WHEN p.w1p LIKE '%'+m.w1m+'%' AND p.w2p LIKE '%'+m.w2m+'%' AND p.w3p NOT LIKE '%'+m.w3m+'%' AND p.w4p NOT LIKE '%'+m.w4m+'%' THEN p.m2p --If first two words of ITEM match first of OWNER, return ITEM minus two words
WHEN p.w1p LIKE '%'+m.w1m+'%' AND p.w2p LIKE '%'+m.w2m+'%' AND p.w3p LIKE '%'+m.w3m+'%' AND p.w4p NOT LIKE '%'+m.w4m+'%' THEN p.m3p --If first three words of ITEM match first of OWNER, return ITEM minus three words
WHEN p.w1p LIKE '%'+m.w1m+'%' AND p.w2p LIKE '%'+m.w2m+'%' AND p.w3p LIKE '%'+m.w3m+'%' AND p.w4p LIKE '%'+m.w4m+'%' THEN p.m4p --If first four words of ITEM match first of OWNER, return ITEM minus four words
ELSE p.[ITEM]
END AS [SHORT ITEM]
FROM p4 p
LEFT JOIN ms m ON p.[ID] = m.[ID]
While this achieves my goal, it does not look very nice and feels like it could be optimized. It requires a where statement to have any sort of speed in execution. While I would likely not be running this on the full dataset anyway, I am looking for ways to improve. I do not have permission to view execution plans, so I cannot share that.
Thank you for any help or advice you can offer.
OK I developed this using only the first three rows of the table. Replace #test with your table name. Let me know if this works for you.
select *, row_number() over(order by id) rowid
into #a
from #test
select *, item short_item, row_number() over(order by id) rowid
into #b
from #test
declare #iterator int=1
declare #owner varchar(max)
declare #owneroriginal varchar(max)
declare #item varchar(max)
declare #itemoriginal varchar(max)
declare #itemactualoriginal varchar(max)
while #iterator<=(select max(rowid) from #a)
begin
select #owner=[owner] from #a where rowid=#iterator
select #owneroriginal=[owner] from #a where rowid=#iterator
select #item=[item] from #a where rowid=#iterator
select #itemoriginal=[item] from #a where rowid=#iterator
select #itemactualoriginal=[item] from #a where rowid=#iterator
while #owner<>''
begin
select #owner=left(#owneroriginal, charindex(' ',#owneroriginal))
select #owneroriginal= ltrim(replace(#owneroriginal,#owner,''))
select #item=left(#itemoriginal, charindex(' ',#itemoriginal))
select #itemoriginal= ltrim(replace(#itemoriginal,#item,''))
--select #owner, #owneroriginal, #item, #itemoriginal
if #itemactualoriginal
like '%'+rtrim(#owner)+'%' and #owner<>''
begin
--select 1
update #b
set short_item=replace(short_item, rtrim(#item),'')
where rowid=#iterator
end
else if ##rowcount=0
update #b
set short_item=
case
when #owner = ''
then ltrim(replace(short_item, #owneroriginal,''))
else ltrim(replace(short_item, #owner,''))
end
where rowid=#iterator
end
set #iterator=#iterator+1
end
select id, owner, item, short_item from #b

Check anagrams using sql server

ACT and CAT are anagrams
I have to Write a function in sql server that takes 2 strings and given a Boolean output that indicates whether the both of them are anagram or not.
This doesnt make sense to do it in sql server,but,it is for learning purpose only
SQL Server is not good at this kind of things, but here you are:
WITH Src AS
(
SELECT * FROM (VALUES
('CAT', 'ACT'),
('CAR', 'RAC'),
('BUZ', 'BUS'),
('FUZZY', 'MUZZY'),
('PACK', 'PACKS'),
('AA', 'AA'),
('ABCDEFG', 'GFEDCBA')) T(W1, W2)
), Numbered AS
(
SELECT *, ROW_NUMBER() OVER (ORDER BY (SELECT 1)) Num
FROM Src
), Splitted AS
(
SELECT Num, W1 Word1, W2 Word2, LEFT(W1, 1) L1, LEFT(W2, 1) L2, SUBSTRING(W1, 2, LEN(W1)) W1, SUBSTRING(W2, 2, LEN(W2)) W2
FROM Numbered
UNION ALL
SELECT Num, Word1, Word2, LEFT(W1, 1) L1, LEFT(W2, 1) L2, SUBSTRING(W1, 2, LEN(W1)) W1, SUBSTRING(W2, 2, LEN(W2)) W2
FROM Splitted
WHERE LEN(W1)>0 AND LEN(W2)>0
), SplitOrdered AS
(
SELECT *,
ROW_NUMBER() OVER (PARTITION BY Num ORDER BY L1) LNum1,
ROW_NUMBER() OVER (PARTITION BY Num ORDER BY L2) LNum2
FROM Splitted
)
SELECT S1.Num, S1.Word1, S1.Word2, CASE WHEN COUNT(*)=LEN(S1.Word1) AND COUNT(*)=LEN(S1.Word2) THEN 1 ELSE 0 END Test
FROM SplitOrdered S1
JOIN SplitOrdered S2 ON S1.L1=S2.L2 AND S1.Num=S2.Num AND S1.LNum1=S2.LNum2
GROUP BY S1.Num, S1.Word1, S1.Word2
And results:
1 CAT ACT 1
2 CAR RAC 1
3 BUZ BUS 0
4 FUZZY MUZZY 0
5 PACK PACKS 0
6 AA AA 1
7 ABCDEFG GFEDCBA 1
First split (T-SQL Split Word into characters) both words into temporary tables. Then perform an outer join and check for nulls.
Edit thanks to George's comment:
split (T-SQL Split Word into characters) both words into temporary tables
Modify temporary tables or use CTEs to add a column with count(*) with group by letters clause
Perform a full outer join on two temporary tables using a letter and it's count in join condition
Check for nulls in the output - if there are none, you have an anagram
The first get in my mind:
DECLARE #word1 nvarchar(max) = NULL,
#word2 nvarchar(max) = 'Test 1',
#i int = 0, #n int
DECLARE #table TABLE (
id int,
letter int
)
SELECT #word1 = ISNULL(LOWER(#word1),''), #word2 = ISNULL(LOWER(#word2),'')
SELECT #n = CASE WHEN LEN(#word1) > LEN(#word2) THEN LEN(#word1) ELSE LEN(#word2) END
WHILE #n > 0
BEGIN
INSERT INTO #table
SELECT 1, ASCII(SUBSTRING(#word1,#n,1))
UNION ALL
SELECT 2, ASCII(SUBSTRING(#word2,#n,1))
SET #n=#n-1
END
SELECT CASE WHEN COUNT(*) = 0 THEN 1 ELSE 0 END isAnagram
FROM (
SELECT id, letter, COUNT(letter) as c
FROM #table
WHERE id = 1
GROUP BY id, letter)as t
FULL OUTER JOIN (
SELECT id, letter, COUNT(letter) as c
FROM #table
WHERE id = 2
GROUP BY id, letter) as p
ON t.letter = p.letter and t.c =p.c
WHERE t.letter is NULL OR p.letter is null
Output:
isAnagram
0
You can also use loops in functions, and they can work fast. I am not able to get any of the of other answers even close to the performance of this function:
CREATE FUNCTION IsAnagram
(
#value1 VARCHAR(255)
, #value2 VARCHAR(255)
)
RETURNS BIT
BEGIN
IF(LEN(#value1) != LEN(#value2))
RETURN 0;
DECLARE #firstChar VARCHAR(3);
WHILE (LEN(#value1) > 0)
BEGIN
SET #firstChar = CONCAT('%', LEFT(#value1, 1), '%');
IF(PATINDEX(#firstChar, #value2) > 0)
SET #value2 = STUFF(#value2, PATINDEX(#firstChar, #value2), 1, '');
ELSE
RETURN 0;
SET #value1 = STUFF(#value1, 1, 1, '');
END
RETURN (SELECT IIF(#value2 = '', 1, 0));
END
GO
SELECT dbo.IsAnagram('asd', 'asd')
--1
SELECT dbo.IsAnagram('asd', 'dsa')
--1
SELECT dbo.IsAnagram('assd', 'dsa')
--0
SELECT dbo.IsAnagram('asd', 'dssa')
--0
SELECT dbo.IsAnagram('asd', 'asd')
This is something a numbers table can help with.
Code to create and populate a small numbers table is below.
CREATE TABLE dbo.Numbers
(
Number INT PRIMARY KEY
);
WITH Ten(N) AS
(
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
)
INSERT INTO dbo.Numbers
SELECT ROW_NUMBER() OVER (ORDER BY ##SPID) AS Number
FROM Ten T10,
Ten T100,
Ten T1000
Once that is in place you can use
SELECT W1,
W2,
IsAnagram = CASE
WHEN LEN(W1) <> LEN(W2)
THEN 0
ELSE
CASE
WHEN EXISTS (SELECT SUBSTRING(W1, Number, 1),
COUNT(*)
FROM dbo.Numbers
WHERE Number <= LEN(W1)
GROUP BY SUBSTRING(W1, Number, 1)
EXCEPT
SELECT SUBSTRING(W2, Number, 1),
COUNT(*)
FROM dbo.Numbers
WHERE Number <= LEN(W2)
GROUP BY SUBSTRING(W2, Number, 1))
THEN 0
ELSE 1
END
END
FROM (VALUES
('CAT', 'ACT'),
('CAR', 'RAC'),
('BUZ', 'BUS'),
('FUZZY', 'MUZZY'),
('PACK', 'PACKS'),
('AA', 'AA'),
('ABCDEFG', 'GFEDCBA')) T(W1, W2)
Or an alternative implementation could be
IsAnagram = CASE
WHEN LEN(W1) <> LEN(W2)
THEN 0
ELSE
CASE
WHEN EXISTS (SELECT 1
FROM dbo.Numbers N
CROSS APPLY (VALUES(1,W1),
(2,W2)) V(Col, String)
WHERE N.Number <= LEN(W1)
GROUP BY SUBSTRING(String, Number, 1)
HAVING COUNT(CASE WHEN Col = 1 THEN 1 END) <>
COUNT(CASE WHEN Col = 2 THEN 1 END))
THEN 0
ELSE 1
END
END

Tricky SQL query requiring search for contains

I have data such as this:
Inventors column in my table
Hundley; Edward; Ana
Isler; Hunsberger
Hunsberger;Hundley
Names are separated by ;. I want to write a SQL query which sums up the count.
Eg. The result should be:
Hundley 2
Isler 1
Hunsberger 2
Edward 1
Ana 1
I could do a group by but this is not a simple group by as you can see. Any ideas/thoughts on how to get this output?
Edit: Changed results so it doesn't create any confusion that a row only contains 2 names.
You can take a look at this. I certainly do not recommend this way if you have lots of data, BUT you can do some modifications and use it and it works like a charm!
This is the new code for supporting unlimited splits:
Declare #Table Table (
Name Nvarchar(50)
);
Insert #Table (
Name
) Select 'Hundley; Edward; Anna'
Union Select 'Isler; Hunsberger'
Union Select 'Hunsberger; Hundley'
Union Select 'Anna'
;
With Result (
Part
, Remained
, [Index]
, Level
) As (
Select Case When CharIndex(';', Name, 1) = 0
Then Name
Else Left(Name, CharIndex(';', Name, 1) - 1)
End
, Right(Name, Len(Name) - CharIndex(';', Name, 1))
, CharIndex(';', Name, 1)
, 1
From #Table
Union All
Select LTrim(
Case When CharIndex(';', Remained, 1) = 0
Then Remained
Else Left(Remained, CharIndex(';', Remained, 1) - 1)
End
)
, Right(Remained, Len(Remained) - CharIndex(';', Remained, 1))
, CharIndex(';', Remained, 1)
, Level
+ 1
From Result
Where [Index] <> 0
) Select Part
, Count(*)
From Result
Group By Part
Cheers
;with cte as
(
select 1 as Item, 1 as Start, CHARINDEX(';',inventors, 1) as Split, Inventors from YourInventorsTable
union all
select cte.Item+1, cte.Split+1, nullif(CHARINDEX(';',inventors, cte.Split+1),0), inventors as Split
from cte
where cte.Split<>0
)
select rTRIM(lTRIM(SUBSTRING(inventors, start,isnull(split,len(inventors)+1)-start))), count(*)
from cte
group by rTRIM(lTRIM(SUBSTRING(inventors, start,isnull(split,len(inventors)+1)-start)))
You can create a split function to split the col values
select splittedValues.items,count(splittedValues) from table1
cross apply dbo.split(col1,';') splittedValues
group by splittedValues.items
DEMO in Sql fiddle
first make one function who take your comma or any other operator(;) separated string into one table and by using that temp table, apply GROUP function on that table.
So you will get count for separate value.
"select d.number,count(*) from (select number from dbo.CommaseparedListToTable('Hundley;Edward;Ana;Isler;Hunsberger;Hunsberger;Hundley',';'))d
group by d.number"
declare #text nvarchar(255) = 'Edward; Hundley; AnaIsler; Hunsberger; Hunsberger; Hundley ';
declare #table table(id int identity,name varchar(50));
while #text like '%;%'
Begin
insert into #table (name)
select SUBSTRING(#text,1,charindex(';',#text)-1)
set #text = SUBSTRING(#text, charindex(';',#text)+1,LEN(#text))
end
insert into #table (name)
select #text
select name , count(name ) counts from #table group by name
Output
name count
AnaIsler 1
Hundley 2
Hunsberger 2
Edward 1

How to change the information in this table into an easy to use form?

I have a legacy product that I have to maintain. One of the table is somewhat similar to the following example:
DECLARE #t TABLE
(
id INT,
DATA NVARCHAR(30)
);
INSERT INTO #t
SELECT 1,
'name: Jim Ey'
UNION ALL
SELECT 2,
'age: 43'
UNION ALL
SELECT 3,
'----------------'
UNION ALL
SELECT 4,
'name: Johnson Dom'
UNION ALL
SELECT 5,
'age: 34'
UNION ALL
SELECT 6,
'----------------'
UNION ALL
SELECT 7,
'name: Jason Thwe'
UNION ALL
SELECT 8,
'age: 22'
SELECT *
FROM #t;
/*
You will get the following result
id DATA
----------- ------------------------------
1 name: Jim Ey
2 age: 43
3 ----------------
4 name: Johnson Dom
5 age: 34
6 ----------------
7 name: Jason Thwe
8 age: 22
*/
Now I want to get the information in the following form:
name age
-------------- --------
Jim Ey 43
Johnson Dom 34
Jason Thwe 22
What's the easiest way to do this?
Thanks.
Out of (slightly morbid) curiosity I tried to come up with a means of transforming the exact input data you have provided.
Far better, of course, would be to properly structure the original data. With a legacy system, this may not be possible, but an ETL process could be created to bring this information into an intermediate location so that an ugly query like this would not need to be run in real time.
Example #1
This example assumes that all IDs are consistent and sequential (otherwise, an additional ROW_NUMBER() column or a new identity column would need to be used to guarantee correct remainder operations on ID).
SELECT
Name = REPLACE( Name, 'name: ', '' ),
Age = REPLACE( Age, 'age: ', '' )
FROM
(
SELECT
Name = T2.Data,
Age = T1.Data,
RowNumber = ROW_NUMBER() OVER( ORDER BY T1.Id ASC )
FROM #t T1
INNER JOIN #t T2 ON T1.id = T2.id +1 -- offset by one to combine two rows
WHERE T1.id % 3 != 0 -- skip delimiter records
) Q1
-- skip every other record (minus delimiters, which have already been stripped)
WHERE RowNumber % 2 != 0
Example #2: No Dependency on Sequential IDs
This is a more practical example because the actual ID values do not matter, only the row sequence.
DECLARE #NumberedData TABLE( RowNumber INT, Data VARCHAR( 100 ) );
INSERT #NumberedData( RowNumber, Data )
SELECT
RowNumber = ROW_NUMBER() OVER( ORDER BY id ASC ),
Data
FROM #t;
SELECT
Name = REPLACE( N2.Data, 'name: ', '' ),
Age = REPLACE( N1.Data, 'age: ', '' )
FROM #NumberedData N1
INNER JOIN #NumberedData N2 ON N1.RowNumber = N2.RowNumber + 1
WHERE ( N1.RowNumber % 3 ) = 2;
DELETE #NumberedData;
Example #3: Cursor
Again, it would be best to avoid running a query like this in real time and use a scheduled, transactional ETL process. In my experience, semi-structured data like this is prone to anomalies.
While examples #1 and #2 (and the solutions provided by others) demonstrate clever ways of working with the data, a more practical way to transform this data would be a cursor. Why? it may actually perform better (no nested queries, recursion, pivoting, or row numbering) and even if it is slower it provides much better opportunities for error handling.
-- this could be a table variable, temp table, or staging table
DECLARE #Results TABLE ( Name VARCHAR( 100 ), Age INT );
DECLARE #Index INT = 0, #Data VARCHAR( 100 ), #Name VARCHAR( 100 ), #Age INT;
DECLARE Person_Cursor CURSOR FOR SELECT Data FROM #t;
OPEN Person_Cursor;
FETCH NEXT FROM Person_Cursor INTO #Data;
WHILE( 1 = 1 )BEGIN -- busy loop so we can handle the iteration following completion
IF( #Index = 2 ) BEGIN
INSERT #Results( Name, Age ) VALUES( #Name, #Age );
SET #Index = 0;
END
ELSE BEGIN
-- optional: examine #Data for integrity
IF( #Index = 0 ) SET #Name = REPLACE( #Data, 'name: ', '' );
IF( #Index = 1 ) SET #Age = CAST( REPLACE( #Data, 'age: ', '' ) AS INT );
SET #Index = #Index + 1;
END
-- optional: examine #Index to see that there are no superfluous trailing
-- rows or rows omitted at the end.
IF( ##FETCH_STATUS != 0 ) BREAK;
FETCH NEXT FROM Person_Cursor INTO #Data;
END
CLOSE Person_Cursor;
DEALLOCATE Person_Cursor;
Performance
I created sample source data of 100K rows and the three aforementioned examples seem roughly equivalent for transforming data.
I created a million rows of source data and a query similar to the following gives excellent performance for selecting a subset of rows (such as would be used in a grid on a web page or a report).
-- INT IDENTITY( 1, 1 ) numbers the rows for us
DECLARE #NumberedData TABLE( RowNumber INT IDENTITY( 1, 1 ), Data VARCHAR( 100 ) );
-- subset selection; ordering/filtering can be done here but it will need to preserve
-- the original 3 rows-per-result structure and it will impact performance
INSERT #NumberedData( Data )
SELECT TOP 1000 Data FROM #t;
SELECT
N1.RowNumber,
Name = REPLACE( N2.Data, 'name: ', '' ),
Age = REPLACE( N1.Data, 'age: ', '' )
FROM #NumberedData N1
INNER JOIN #NumberedData N2 ON N1.RowNumber = N2.RowNumber + 1
WHERE ( N1.RowNumber % 3 ) = 2;
DELETE #NumberedData;
I'm seeing execution times of 4-10ms (i7-3960x) against a set of a million records.
Given that table you can do this:
;WITH DATA
AS
(
SELECT
SUBSTRING(t.DATA,CHARINDEX(':',t.DATA)+2,LEN(t.DATA)) AS value,
SUBSTRING(t.DATA,0,CHARINDEX(':',t.DATA)) AS ValueType,
ID,
ROW_NUMBER() OVER(ORDER BY ID) AS RowNbr
FROM
#t AS t
WHERE
NOT t.DATA='----------------'
)
, RecursiveCTE
AS
(
SELECT
Data.RowNbr,
Data.value,
Data.ValueType,
NEWID() AS ID
FROM
Data
WHERE
Data.RowNbr=1
UNION ALL
SELECT
Data.RowNbr,
Data.value,
Data.ValueType,
CASE
WHEN Data.ValueType='age'
THEN RecursiveCTE.ID
ELSE NEWID()
END AS ID
FROM
Data
JOIN RecursiveCTE
ON RecursiveCTE.RowNbr+1=Data.RowNbr
)
SELECT
pvt.name,
pvt.age
FROM
(
SELECT
ID,
value,
ValueType
FROM
RecursiveCTE
) AS SourceTable
PIVOT
(
MAX(Value)
FOR ValueType IN ([name],[age])
) AS pvt
Output
Name Age
------------------
Jim Ey 43
Jason Thwe 22
Johnson Dom 34
Here's another option if you upgrade to SQL Server 2012, which implements the OVER clause for aggregate functions. This approach will allow you to choose only those tags that you know you want and find them regardless of how many rows there are between names.
This will also work if the names and ages are not always in the same order within a group of rows representing a single person.
with Ready2Pivot(tag,val,part) as (
select
CASE WHEN DATA like '_%:%' THEN SUBSTRING(DATA,1,CHARINDEX(':',DATA)-1) END as tag,
CASE WHEN DATA like '_%:%' THEN SUBSTRING(DATA,CHARINDEX(':',DATA)+1,8000) END as val,
max(id * CASE WHEN DATA LIKE 'name:%' THEN 1 ELSE 0 END)
over (
order by id
)
from #t
where DATA like '_%:%'
)
select [name], [age]
from Ready2Pivot
pivot (
max(val)
for tag in ([name], [age])
) as p
If your legacy data has an entry with extra items (say "altName: Jimmy"), this query will ignore it. If your legacy data has no row (and no id number) for someone's age, it will give you NULL in that spot. It will associate all information with the closest preceding row with "name: ..." as the DATA, so it is important that every group of rows has a "name: ..." row.