T-Sql count string sequences over multiple rows

T-Sql count string sequences over multiple rows - sql

How can I find subsets of data over multiple rows in sql?
I want to count the number of occurrences of a string (or number) before another string is found and then count the number of times this string occurs before another one is found.
All these strings can be in random order.
This is what I want to achieve:
I have one table with one column (columnx) with data like this:
A
A
B
C
A
B
B
The result I want from the query should be like this:
2 A
1 B
1 C
1 A
2 B
Is this even possible in sql or would it be easier just to write a little C# app to do this?

Since, as per your comment, you can add a column that will unambiguously define the order in which the columnx values go, you can try the following query (provided the SQL product you are using supports CTEs and ranking functions):
WITH marked AS (
SELECT
columnx,
sortcolumn,
grp = ROW_NUMBER() OVER ( ORDER BY sortcolumn)
- ROW_NUMBER() OVER (PARTITION BY columnx ORDER BY sortcolumn)
FROM data
)
SELECT
columnx,
COUNT(*)
FROM marked
GROUP BY
columnx,
grp
ORDER BY
MIN(sortcolumn)
;
You can see the method in work on SQL Fiddle.
If sortcolumn is an auto-increment integer column that is guaranteed to have no gaps, you can replace the first ROW_NUMBER() expression with just sortcolumn. But, I guess, that cannot be guaranteed in general. Besides, you might indeed want to sort on a timestamp instead of an integer.

I dont think you can do it with a single select.
You can use AdventureWorks cursor:
create table my_Strings
(
my_string varchar(50)
)
insert into my_strings values('A'),('A'),('B'),('C'),('A'),('B'),('B') -- this method will only work on SQL Server 2008
--select my_String from my_strings
declare #temp_result table(
string varchar(50),
nr int)
declare #myString varchar(50)
declare #myLastString varchar(50)
declare #nr int
set #myLastString='A' --set this with the value of your FIRST string on the table
set #nr=0
DECLARE string_cursor CURSOR
FOR
SELECT my_string as aux_column FROM my_strings
OPEN string_cursor
FETCH NEXT FROM string_cursor into #myString
WHILE ##FETCH_STATUS = 0 BEGIN
if (#myString = #myLastString) begin
set #nr=#nr+1
set #myLastString=#myString
end else begin
insert into #temp_result values (#myLastString, #nr)
set #myLastString=#myString
set #nr=1
end
FETCH NEXT FROM string_cursor into #myString
END
insert into #temp_result values (#myLastString, #nr)
CLOSE string_cursor;
DEALLOCATE string_cursor;
select * from #temp_result
Result:
A 2
B 1
C 1
A 1
B 2

Try this :
;with sample as (
select 'A' as columnx
union all
select 'A'
union all
select 'B'
union all
select 'C'
union all
select 'A'
union all
select 'B'
union all
select 'B'
), data
as (
select columnx,
Row_Number() over(order by (select 0)) id
from sample
) , CTE as (
select * ,
Row_Number() over(order by (select 0)) rno from data
) , result as (
SELECT d.*
, ( SELECT MAX(ID)
FROM CTE c
WHERE NOT EXISTS (SELECT * FROM CTE
WHERE rno = c.rno-1 and columnx = c.columnx)
AND c.ID <= d.ID) AS g
FROM data d
)
SELECT columnx,
COUNT(1) cnt
FROM result
GROUP BY columnx,
g
Result :
columnx cnt
A 2
B 1
C 1
A 1
B 2

Related

Extract Data from a large string to table using SQL

I'm trying to extract data strings from a long string using SQL query.
the string "'35522':{'item_id':'35522','sku':'deded','RowTotal':37.5,'qty':2"
I am trying to create a loop query that extracts data from the string.
The desired output is a table with columns (item_id,sku, RowTotal,qty) and each line will be extracted from the string above in the relevant column.
Im trying to create a function that will do that but currentley im not close.
Can you please assist me with ideas?
DECLARE #String VARCHAR(4000) =
'{:{item_id:35522,sku:deep-line-elixir,RowTotal:37.5,qty:2},
:{item_id:35527,sku:self-care-pamper-pack,RowTotal:158,qty:2},
:{item_id:35531,sku:neck-chest-rejuvenating-serum,RowTotal:21.87,qty:1},
:{item_id:35534,sku:pm-recovery-night-cream,RowTotal:23.75,qty:1},couponCode:,itemsQty:6}"'
DECLARE #b VARBINARY(4000) = CONVERT(varbinary(4000),#string)
DECLARE #StartPos int=9
DECLARE #Len tinyint=13
;WITH C (Orig,Startpos,Value) AS (
SELECT #b,#StartPos,CONVERT(VARCHAR,SUBSTRING(#b,#StartPos,#Len))
UNION ALL
SELECT #b,C.Startpos+#Len,CONVERT(VARCHAR,SUBSTRING(#b,C.StartPos+#Len,#Len)) FROM C
WHERE C.Startpos+#Len < = LEN(#b)
)
select C.Value from c where c.value like 'item%'
This is my code so far

The following approach splits the data into rows using STRING_SPLIT before replacing the additional characters and using case expressions to match field values. The temporary table is used to generate a row_num which can be used to order values when generating the group_num used to group the related values across multiple rows into a single related row
DECLARE #SampleString VARCHAR(4000) =
'{:{item_id:35522,sku:deep-line-elixir,RowTotal:37.5,qty:2},
:{item_id:35527,sku:self-care-pamper-pack,RowTotal:158,qty:2},
:{item_id:35531,sku:neck-chest-rejuvenating-serum,RowTotal:21.87,qty:1},
:{item_id:35534,sku:pm-recovery-night-cream,RowTotal:23.75,qty:1},couponCode:,itemsQty:6}"';
create table #temp_values (id int identity(1,1), value VARCHAR(200) );
insert into #temp_values (value) SELECT value FROM STRING_SPLIT(REPLACE(REPLACE(#SampleString,'{',''),'}',''),',');
WITH split_data AS (
SELECT id,value FROM #temp_values
),
extracted_data_raw AS (
SELECT
id as row_num,
CASE
WHEN value LIKE '%item_id%' THEN 1
WHEN value LIKE '%sku:%' THEN 2
WHEN value LIKE '%RowTotal:%' THEN 3
WHEN value LIKE '%qty:%' AND value NOT LIKE '%itemsQty%' THEN 4
END as type_num,
CASE
WHEN value LIKE '%item_id%' THEN TRIM(REPLACE(value,':item_id:',''))
END as item_id,
CASE WHEN value LIKE '%sku:%' THEN TRIM(REPLACE(value,'sku:','')) END as sku,
CASE WHEN value LIKE '%RowTotal:%' THEN TRIM(REPLACE(value,'RowTotal:','')) END as RowTotal,
CASE WHEN value LIKE '%qty:%' AND value NOT LIKE '%itemsQty%' THEN TRIM(REPLACE(value,'qty:','')) END as qty,
value
FROM split_data
)
,extracted_data_clean AS (
SELECT
MAX(item_id) as item_id,
MAX(sku) as sku,
MAX(RowTotal) as RowTotal,
MAX(qty) as qty
FROM (
SELECT
ROW_NUMBER() OVER (PARTITION BY type_num ORDER BY row_num) group_num,
item_id,
sku,
RowTotal,
qty
FROM
extracted_data_raw
WHERE
type_num IS NOT NULL
) t
GROUP BY group_num
)
select * from extracted_data_clean
Outputs:
item_id
sku
RowTotal
qty
35522
deep-line-elixir
37.5
2
35527
self-care-pamper-pack
158
2
35531
neck-chest-rejuvenating-serum
21.87
1
35534
pm-recovery-night-cream
23.75
1
Working Demo DB Fiddle
Edit 1:
You also referenced "'35522':{'item_id':'35522','sku':'deded','RowTotal':37.5,'qty':2" in your question. The following should work for both samples while also converting to respective data types:
DECLARE #SampleString VARCHAR(4000) =
'{"35522":{item_id:35522,sku:deep-line-elixir,RowTotal:37.5,qty:2},
"35522":{item_id:35527,sku:self-care-pamper-pack,RowTotal:158,qty:2},
"35522":{item_id:35531,sku:neck-chest-rejuvenating-serum,RowTotal:21.87,qty:1},
"35522":{item_id:35534,sku:pm-recovery-night-cream,RowTotal:23.75,qty:1},couponCode:,itemsQty:6}"';
create table #temp_values (id int identity(1,1), value VARCHAR(200) );
insert into #temp_values (value) SELECT value FROM STRING_SPLIT(REPLACE(REPLACE(#SampleString,'{',''),'}',''),',');
WITH split_data AS (
SELECT id,value FROM #temp_values
),
extracted_data_raw AS (
SELECT
id as row_num,
CASE
WHEN value LIKE '%item_id%' THEN 1
WHEN value LIKE '%sku:%' THEN 2
WHEN value LIKE '%RowTotal:%' THEN 3
WHEN value LIKE '%qty:%' AND value NOT LIKE '%itemsQty%' THEN 4
END as type_num,
CASE
WHEN value LIKE '%item_id%' THEN CONVERT(INT,TRIM(
REPLACE(SUBSTRING(
value,CHARINDEX('item_id',value),LEN(value)
),'item_id:','')
))
END as item_id,
CASE WHEN value LIKE '%sku:%' THEN TRIM(REPLACE(value,'sku:','')) END as sku,
CASE WHEN value LIKE '%RowTotal:%' THEN CONVERT(NUMERIC(10,2),TRIM(REPLACE(value,'RowTotal:',''))) END as RowTotal,
CASE WHEN value LIKE '%qty:%' AND value NOT LIKE '%itemsQty%' THEN CONVERT(NUMERIC(10,2),TRIM(REPLACE(value,'qty:',''))) END as qty,
value
FROM split_data
)
,extracted_data_clean AS (
SELECT
MAX(item_id) as item_id,
MAX(sku) as sku,
MAX(RowTotal) as RowTotal,
MAX(qty) as qty
FROM (
SELECT
ROW_NUMBER() OVER (PARTITION BY type_num ORDER BY row_num) group_num,
item_id,
sku,
RowTotal,
qty
FROM
extracted_data_raw
WHERE
type_num IS NOT NULL
) t
GROUP BY group_num
)
select * from extracted_data_clean;
drop table #temp_values;
Working Demo DB Fiddle
Let me know if this works for you.

SQL - populate new column according to data in row above

I need to populate a new column in a table known as RowType, where if the ID column contains the same ID value as the one above RowType is populated with 'D', if the value is new then RowType is populate with 'H', how would the SQL code look to be able to do this?
I.e should look something like below:
RowType (to be populated), ID (already there)
H, 1
D, 1
D, 1
H, 2
D, 2
H, 3
D, 3
D, 3
Thanks

You can use Row_Number and case
select *, RowType = case when Row_Number() over (partition by id order by id) = 1 then 'H' else 'D' End from #yourid
Your input table:
create table #yourId (id int)
insert into #yourid (id) values
(1)
,(1)
,(1)
,(2)
,(2)
,(3)
,(3)
,(3)

Use ROW_NUMER concept :
CREATE TABLE #table(Id INT)
INSERT INTO #table(Id)
SELECT 1 UNION ALL
SELECT 1 UNION ALL
SELECT 1 UNION ALL
SELECT 2 UNION ALL
SELECT 2 UNION ALL
SELECT 3 UNION ALL
SELECT 3 UNION ALL
SELECT 3
SELECT CASE WHEN RowType = 1 THEN 'H' ELSE 'D' END RowType , Id
FROM
(
SELECT ROW_NUMBER() OVER (PARTITION BY Id ORDER BY id) RowType , Id
FROM #table
) A

Please try...
UPDATE tableName
SET RowType = CASE
WHEN ( ID = LAG( ID ) OVER ( ORDER BY ID ) ) THEN 'D'
ELSE 'H'
END
If you have any questions or comments, then please feel free to post a Comment accordingly.
Further Reading
https://learn.microsoft.com/en-us/sql/t-sql/functions/lag-transact-sql (for information on LAG()).

It may not be the best solution, however it can point you somewhere, and it works.
Go through the code carfuly and make sure you understand this.
create table yourTable (RowType char, id int)
insert into yourTable (RowType, id) values
('',1)
,('',1)
,('',1)
,('',2)
,('',2)
,('',3)
,('',3)
,('',3)
select
row_number() over (order by id) as rowNumber,
RowType,
id
into #tempTable
from yourTable
declare #maxRow int = (select max(rowNumber) from #tempTable)
declare #currentRow int = 1
while (#currentRow <= #maxRow)
begin
if (#currentRow = 1)
begin
update #tempTable
set RowType = 'H'
where rowNumber = #currentRow
end
else
begin
if (select id from #tempTable where rowNumber = #currentRow) = (select id from #tempTable where rowNumber = #currentRow - 1)
begin
update #tempTable
set RowType = 'D'
where rowNumber = #currentRow
end
else
begin
update #tempTable
set RowType = 'H'
where rowNumber = #currentRow
end
end
set #currentRow = #currentRow +1
end
-- update data in actual table, you can do below if only those two columns exist in table !!!
delete from yourTable
-- insert into table from updated temp table
insert into yourTable
select RowType, ID
from #tempTable
select * from yourTable
select * from #tempTable
-- drop temp table
drop table #tempTable

Populating an empty table with sequential numbers

I have a table which is already truncated (Microsoft SQL 2008). I have to now populate it with sequential numbers up to 50,000 records arbitrary numbers (doesn't mater) up to 7 characters.
Can any one help as to what SQL statement I need to write that will automatically populate the newly empty table with A000001,A0000002,A0000003, etc so that I can sort number the records within the table.
I have approximately 50000 records which I need to sequentially entered and I really don't want to number the column manually via hand editing.
Thanks in advance.

I'd use excel to generate your unique ids using the following:
In A column:
=CONCATENATE($C2, TEXT($B2,"000000"))
In B column put a 1 in the first row and the following code in all subsequent rows:
=SUM($B4 + 1)
In C column:
The letter A
Then just import the excel csv as a table and you'll have all your ids ready to insert into your empty table.

The SQL below loads a table variable up. Just select from it and insert the data into the new table. Certainly not the model of efficiency, but it'll get the job done.
DECLARE #tmp TABLE(
Value NVARCHAR(10)
)
DECLARE #Counter INT=0
DECLARE #Padding NVARCHAR(20)
WHILE #Counter<50000
BEGIN
SET #Counter=#Counter+1
SET #Padding=
CASE LEN(CONVERT(NVARCHAR,#Counter))
WHEN 1 THEN '00000'
WHEN 2 THEN '0000'
WHEN 3 THEN '000'
WHEN 4 THEN '00'
WHEN 5 THEN '0'
ELSE ''
END
INSERT INTO #tmp SELECT 'A' + #Padding + CONVERT(NVARCHAR,#Counter)
END
select * from #tmp

Use Stacked CTE to generate sequential Numbers
;WITH e1(n) AS
(
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
), -- 10
e2(n) AS (SELECT 1 FROM e1 CROSS JOIN e1 AS b), -- 10*10
e3(n) AS (SELECT 1 FROM e2 CROSS JOIN e2 AS b), -- 100*100
e4(n) AS (SELECT 1 FROM e3 CROSS JOIN (SELECT TOP 5 n FROM e1) AS b) -- 5*10000
SELECT n = 'A'+right('000000'+
convert(varchar(20),ROW_NUMBER() OVER (ORDER BY n)),7)
FROM e4 ORDER BY n;
Check here for more methods to generate sequential numbers with performance analysis

Use a table with an identity column and populate it. Then update that table to set the alpha value you need as follows:
create table MyTable (
ID int not null identity(1,1),
Alpha varchar(30)
)
truncate table MyTable
begin tran -- makes it run much faster
declare #i int
select #i = 1
while #i < 1000000
begin
insert into MyTable (Alpha) values ('')
select #i = #i + 1
end
commit
update MyTable set Alpha = 'A' + replicate('0', 6 - len(cast(ID as varchar(30)))) + cast(ID as varchar(30))

Get N th row value in sql server

DECLARE #ActionNumber varchar(20)='EHPL-DES-SQ-1021'
set #ActionNumber=(select top 1 * from dbo.ANOSplit(#ActionNumber,'-')
order by ROW_NUMBER() OVER (ORDER BY items))
select #ActionNumber
from above query i need to return the 2ND and 3RD index from initial #ActionNumber
'EHPL-DES-SQ-1021' after Split().
format of the ActionNumber is exactly as above but DES, SQ and 1021 can change.
so i can not use ORDER BY items ASC or ORDER BY items DESC because it will order alphabetically.
above query returns 'EHPL'.how can i get DES and SQ.

You can do it with the ANOSplit function, but I would insert the result into a temp table or table variable.
As you said yourself, you can't just ORDER BY the values returned by the ANOSplit function because it will order alphabetically.
--> So you can use a temp table with an IDENTITY column, and use this for sorting:
DECLARE #ActionNumber varchar(20)='EHPL-DES-SQ-1021'
declare #tmp table
(
id int identity(1,1),
item varchar(20)
)
insert into #tmp (item)
select * from dbo.ANOSplit(#ActionNumber,'-')
select * from #tmp where id in (2,3)
The items will be inserted into the table in the exact order returned by the function, so after inserting you know that the lines with id 2 and 3 are the ones you want.

Try to use Substring with CharIndex >>>
DECLARE #ActionNumber varchar(20)='EHPL-DES-SQ-1021'
select SUBSTRING (#ActionNumber,CHARINDEX ('-',#ActionNumber,0) + 1, 3)

This isn't tested, but I think it will work:
DECLARE #ActionNumber varchar(20)='EHPL-DES-SQ-1021'
WITH nCTE AS
(
SELECT
ROW_NUMBER() OVER (ORDER BY items) AS RNum
FROM dbo.ANOSplit(#ActionNumber,'-')
)
SELECT * FROM nCTE WHERE RNum = 2 --put n here

In SQL can a sequenced range selection be done more efficiently than my algorithm (see code) that uses a cursor?

I need to collapse multiple ranges of sequential numbers (1 or more) to sets of their minimum and maximum values. I have unique integers (no duplicates) stored in a table column.
The obvious way (to me) to solve this problem is to use a cursor (see my algorithm below) and iterate through every integer. However, it seems inefficient to me so I am wondering if there is a more efficient algorithm. Perhaps there is a way using common table expressions with recursion. I have more than 32767 integers though, so any solution will need to use option (MAXRECURSION 0) which sets unlimited recursion.
Following is a simplified test case for my existing algorithm usign a cursor. It will output the minimum and maximum for each range of sequential numbers (e.g. 1-3, 9-11, 13-13, 15-16).
I am using MS SQL Server 2008. Please note comments begin with two dashes (--).
declare #minInt int, #maxInt int
declare #nextInt int, #prevInt int
--need a temporary table to store the ranges that were found
declare #rangeTable table (minInt int, maxInt int)
declare mycursor cursor for
select * from
(
select 1 as id union
select 2 as id union
select 3 as id union
select 9 as id union
select 10 as id union
select 11 as id union
select 13 as id union
select 15 as id union
select 16 as id
) tblRanges
order by id--order is needed for this algorithm if used with generic data
open mycursor
--initialise new sequence
fetch next from mycursor into #minInt
select #maxInt = #minInt--set the min and max to the smallest value
select #prevInt = #minInt--store the last int
declare #sequenceFound int
while ##FETCH_STATUS=0
begin
select #sequenceFound=1--set the default flag value to true
--loop while sequence found
while ##FETCH_STATUS=0 and #sequenceFound = 1
begin
fetch next from mycursor into #nextInt
if #nextInt = (#prevInt + 1)
begin
select #sequenceFound = 1
end
else
begin
select #sequenceFound = 0
end
select #prevInt = #nextInt--store the current value as the previous value for the next comparison
if #sequenceFound = 1 --if the nextInt is part of a sequence, then store the new maxInt
and #maxInt < #nextInt--should always be true for ordered output containing no duplicates
begin
select #maxInt = #nextInt
end
end--while sequenceFound
--store the sequence range and then check for more sequences
insert into #rangeTable (minInt,maxInt) values (#minInt,#maxInt)
--store the current value as the new minInt and maxInt for the next sequence iteration
select #minInt = #nextInt
select #maxInt = #nextInt
end--while more table rows found
select * from #rangeTable
close mycursor
deallocate mycursor

Courtesy of Itzik Ben-Gan:
WITH tblRanges AS
(
SELECT 1 AS ID UNION
SELECT 2 AS ID UNION
SELECT 3 AS ID UNION
SELECT 9 AS ID UNION
SELECT 10 AS ID UNION
SELECT 11 AS ID UNION
SELECT 13 AS ID UNION
SELECT 15 AS ID UNION
SELECT 16 AS ID
),
StartingPoints AS
(
SELECT ID, ROW_NUMBER() OVER(ORDER BY ID) AS rownum
FROM tblRanges AS A
WHERE NOT EXISTS
(SELECT *
FROM tblRanges AS B
WHERE B.ID = A.ID - 1)
),
EndingPoints AS
(
SELECT ID, ROW_NUMBER() OVER(ORDER BY ID) AS rownum
FROM tblRanges AS A
WHERE NOT EXISTS
(SELECT *
FROM tblRanges AS B
WHERE B.ID = A.ID + 1)
)
SELECT S.ID AS start_range, E.ID AS end_range
FROM StartingPoints AS S
JOIN EndingPoints AS E
ON E.rownum = S.rownum;
You can read a full explanation from his chapter in SQL Sever MVP Deep Dives called Gaps and Islands. He explains various techniques (including cursors) and compares them in terms of performance.

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

T-Sql count string sequences over multiple rows - sql

Related

Extract Data from a large string to table using SQL

SQL - populate new column according to data in row above

Populating an empty table with sequential numbers

Get N th row value in sql server

In SQL can a sequenced range selection be done more efficiently than my algorithm (see code) that uses a cursor?

Categories

Resources