Using string_split to create rows from multiple columns - sql

I have data that looks something like this example (on an unfortunately much larger scale):
+----+-------+--------------------+-----------------------------------------------+
| ID | Data | Cost | Comments |
+----+-------+--------------------+-----------------------------------------------+
| 1 | 1|2|3 | $0.00|$3.17|$42.42 | test test||previous thing has a blank comment |
+----+-------+--------------------+-----------------------------------------------+
| 2 | 1 | $420.69 | test |
+----+-------+--------------------+-----------------------------------------------+
| 3 | 1|2 | $3.50|$4.20 | |test |
+----+-------+--------------------+-----------------------------------------------+
Some of the columns in the table I have are pipeline delimited, but they are consistent by each row. So each delimited value corresponds to the same index in the other columns of the same row.
So I can do something like this which is what I want for a single column:
SELECT ID, s.value AS datavalue
FROM MyTable t CROSS APPLY STRING_SPLIT(t.Data, '|') s
and that would give me this:
+----+-----------+
| ID | datavalue |
+----+-----------+
| 1 | 1 |
+----+-----------+
| 1 | 2 |
+----+-----------+
| 1 | 3 |
+----+-----------+
| 2 | 1 |
+----+-----------+
| 3 | 1 |
+----+-----------+
| 3 | 2 |
+----+-----------+
but I also want to get the other columns as well (cost and comments in this example) so that the corresponding items are all in the same row like this:
+----+-----------+-----------+------------------------------------+
| ID | datavalue | costvalue | commentvalue |
+----+-----------+-----------+------------------------------------+
| 1 | 1 | $0.00 | test test |
+----+-----------+-----------+------------------------------------+
| 1 | 2 | $3.17 | |
+----+-----------+-----------+------------------------------------+
| 1 | 3 | $42.42 | previous thing has a blank comment |
+----+-----------+-----------+------------------------------------+
| 2 | 1 | $420.69 | test |
+----+-----------+-----------+------------------------------------+
| 3 | 1 | $3.50 | |
+----+-----------+-----------+------------------------------------+
| 3 | 2 | $4.20 | test |
+----+-----------+-----------+------------------------------------+
I'm not sure what the best or most simple way to achieve this would be

This isn't going to be achievable with STRING_SPLIT as Microsoft refuse to supply the ordinal position as part of the result set. As a result, you'll need to use a different function which does. Personally, I recommend Jeff Moden's DelimitedSplit8k.
Then, you can do this:
CREATE TABLE #Sample (ID int,
[Data] varchar(200),
Cost varchar(200),
Comments varchar(8000));
GO
INSERT INTO #Sample
VALUES (1,'1|2|3','$0.00|$3.17|$42.42','test test||previous thing has a blank comment'),
(2,'1','$420.69','test'),
(3,'1|2','$3.50|$4.20','|test');
GO
SELECT S.ID,
DSd.Item AS DataValue,
DSc.Item AS CostValue,
DSct.Item AS CommentValue
FROM #Sample S
CROSS APPLY dbo.DelimitedSplit8K(S.[Data],'|') DSd
CROSS APPLY (SELECT *
FROM DelimitedSplit8K(S.Cost,'|') SS
WHERE SS.ItemNumber = DSd.ItemNumber) DSc
CROSS APPLY (SELECT *
FROM DelimitedSplit8K(S.Comments,'|') SS
WHERE SS.ItemNumber = DSd.ItemNumber) DSct;
GO
DROP TABLE #Sample;
GO
There is, however, only one true answer to this question: Don't store delimited values in SQL Server. Store them in a normalised manner, and you won't have this problem.

Here is a solution approach using a recursive CTE instead of a User Defined Funtion (UDF) which is useful for those without permission to create functions.
CREATE TABLE mytable(
ID INTEGER NOT NULL PRIMARY KEY
,Data VARCHAR(7) NOT NULL
,Cost VARCHAR(20) NOT NULL
,Comments VARCHAR(47) NOT NULL
);
INSERT INTO mytable(ID,Data,Cost,Comments) VALUES (1,'1|2|3','$0.00|$3.17|$42.42','test test||previous thing has a blank comment');
INSERT INTO mytable(ID,Data,Cost,Comments) VALUES (2,'1','$420.69','test');
INSERT INTO mytable(ID,Data,Cost,Comments) VALUES (3,'1|2','$3.50|$4.20','|test');
This query allows choice of delimiter by using a variable, then using a common table expression it parses each delimited string to produce a rows for each portion of those strings, and retains the ordinal position of each.
declare #delimiter as varchar(1)
set #delimiter = '|'
;with cte as (
select id
, convert(varchar(max), null) as datavalue
, convert(varchar(max), null) as costvalue
, convert(varchar(max), null) as commentvalue
, convert(varchar(max), data + #delimiter) as data
, convert(varchar(max), cost + #delimiter) as cost
, convert(varchar(max), comments + #delimiter) as comments
from mytable as t
union all
select id
, convert(varchar(max), left(data, charindex(#delimiter, data) - 1))
, convert(varchar(max), left(cost, charindex(#delimiter, cost) - 1))
, convert(varchar(max), left(comments, charindex(#delimiter, comments) - 1))
, convert(varchar(max), stuff(data, 1, charindex(#delimiter, data), ''))
, convert(varchar(max), stuff(cost, 1, charindex(#delimiter, cost), ''))
, convert(varchar(max), stuff(comments, 1, charindex(#delimiter, comments), ''))
from cte
where (data like ('%' + #delimiter + '%') and cost like ('%' + #delimiter + '%')) or comments like ('%' + #delimiter + '%')
)
select id, datavalue, costvalue, commentvalue
from cte
where datavalue IS NOT NULL
order by id, datavalue
As the recursion adds new rows, it places the first portion of the delimited strings into the wanted output columns using left(), then also, using stuff(), removes the last used delimiter from the source strings so that the next row will start at the next delimiter. Note that to initiate the extractions, the delimiter is added to the end of the source delimited strings which is to ensure the where clause does not exclude any of the wanted strings.
the result:
id datavalue costvalue commentvalue
---- ----------- ----------- ------------------------------------
1 1 $0.00 test test
1 2 $3.17
1 3 $42.42 previous thing has a blank comment
2 1 $420.69 test
3 1 $3.50
3 2 $4.20 test
demonstrated here at dbfiddle.uk

Related

SQL sort by closest string match in multiple columns

I have a search parameter that I am trying to search on multiple columns using the "Like % InputParam %" pattern matching which gives me the following result. ie - Matching OrderID and Ref no to the input parameter.
Consider I have the following table -
OrderId | Name | Ref No |
12345 | XYZ | 120545 |
1205 | ABC | 451003 |
00120505 | CDE | 000174 |
Here OrderID, Ref no are strings and the input query = '1205'. I want the result to be sorted from the most matched to the least matched.
Where most matched is the most accurate match like 1205 = 1205 here
and Least matched is a substring like 00120505 = 1205.
Output -
OrderId | Name | Ref no |
1205 | ABC | 451003 |
12345 | XYZ | 120545 |
00120505 | CDE | 000174 |
You can do it by defining a computed column e.g. MATCH_SCORE with a value that depends on comparisons between OrderId and the value you are looking for; and then use ORDER BY MATCH_SCORE.
Working example:
DECLARE #tbl TABLE (OrderId VARCHAR(10), Name VARCHAR(10), RefNo VARCHAR(10))
INSERT INTO #tbl VALUES ('12345','XYZ','120545'), ('1205','ABC','451003'), ('00120505','CDE','000174')
DECLARE #v VARCHAR(10) = '1205'
;WITH data AS
(SELECT *,
CASE WHEN OrderId = #v THEN 1
WHEN OrderId LIKE #v + '%' THEN 2
WHEN OrderId LIKE '%' + #v + '%' THEN 3
ELSE 4
END AS MATCH_SCORE
FROM #tbl
)
SELECT * FROM data ORDER BY MATCH_SCORE
Output:
OrderId
Name
RefNo
MATCH_SCORE
1205
ABC
451003
1
00120505
CDE
000174
3
12345
XYZ
120545
4
I used a Common Table Expression to construct data, which defines a temporary named result set that is used by the SELECT that follows it.
You need to define what you mean by the distance between two strings. I'll use #peter-b 's definition in the example below. Once you know how to measure how close a string is to the search parameter, you can transpose the columns to rows with cross apply (LATERAL is the standard name), and use the min distance to order the rows.
with t (orderid, refno) as (
select '12345','120545'
union all
select '1205','451003'
)
select t.orderid, t.refno
, min(case when u.s = '1205' then 1
when u.s like '1205'+'%' then 2
when u.s like '%' + u.s + '%' then 3
else 4
end) as distance
from t
cross apply (
select t.orderid
union all
select t.refno
) as u (s)
group by t.orderid, t.refno
order by 3
;
orderid refno distance
1205 451003 1
12345 120545 2
Fiddle

Generate random numbers in a specific range without duplicate values

SELECT CEILING (RAND(CAST(NEWID() AS varbinary)) *275) AS RandomNumber
This creates random numbers. However, it spits out duplicates
Generate a numbers table with the range of your desire. In my case, I do it via recursive cte. Then order the numbers table using the newid function.
with numbers as (
select 0 as val union all
select val + 1 from numbers where val < 275
)
select ord = row_number() over(order by ap.nid),
val
into #rands
from numbers n
cross apply (select nid = newid()) ap
order by ord
option (maxrecursion 1000);
One run of the code above results in a table of 276 values that begins and ends as follows:
| ord | val |
+-----+-----+
| 1 | 102 |
| 2 | 4 |
| 3 | 127 |
| ... | ... |
| 276 | 194 |
Non duplicating ordering of random numbers.
You can select from it a variety of ways, but one way could be:
-- initiate these to begin with
declare #ord int = 1;
declare #val int;
declare #rand int;
-- do this on every incremental need for a random number
select #val = val,
#ord = #ord + 1
from #rands
where ord = #ord;
print #val;
In the comments to my other answer, you write:
The table I'm working with has an ID , Name , and I want to generate a 3rd column that assigns a unique random number between 1-275 (as there are 275 rows) with no duplicates.
In the future, please include details like this in your original question. With this information, we can help you out better.
First, let's make a sample of your problem. We'll simplify it to just 5 rows, not 275:
create table #data (
id int,
name varchar(10)
);
insert #data values
(101, 'Amanda'),
(102, 'Beatrice'),
(103, 'Courtney'),
(104, 'Denise'),
(105, 'Elvia');
Let's now add the third column you want:
alter table #data add rando int;
Finally, let's update the table by creating a subquery that orders the rows randomly using row_number(), and applying the output the the column we just created:
update reordered
set rando = rowNum
from (
select *,
rowNum = row_number() over(order by newid())
from #data
) reordered;
Here's the result I get, but of course it will be different every time it is run:
select *
from #data
| id | name | rando |
+-----+----------+-------+
| 101 | Amanda | 3 |
| 102 | Beatrice | 1 |
| 103 | Courtney | 4 |
| 104 | Denise | 5 |
| 105 | Elvia | 2 |

SQL splitting a column into 3 different columns with multiple seperators

I have the column below which contains data as shown
|DeliveryComment |
|-------------------------|
|[1 * B018] |
|GARAGE |
|BACK GARDEN. [124 * B002]|
|[1 * B018] |
| |
|[124 * B002] |
|[1 * B018] |
| |
|[124 * B002] |
I'd like to split this data into three columns displayed as below.
|ColA |ColB|ColC|
|-----------|----|----|
| |1 |B018|
|GARAGE | | |
|BACK GARDEN|124 |B002|
| |1 |B018|
| | | |
| |124 |B002|
| |1 |B018|
| | | |
| |124 |B002|
The data that should end up in column A can be variable up to 11 characters.
The data that should end up in column B can be a variable numeric value up to 3 characters.
The data that should end up in column C can be variable up to 4 characters.
There will always be [] around the numbers and there will always be a * in between them.
Create and populate sample table (Please save us this step in your future questions)
DECLARE #t AS TABLE
(
col varchar(50)
)
INSERT INTO #T VALUES
('[1 * B018]'),
('GARAGE'),
('BACK GARDEN. [124 * B002]'),
('[1 * B018]'),
(''),
('[124 * B002]'),
('[1 * B018]'),
(''),
('[124 * B002]')
The query:
SELECT CASE WHEN charindex('[', col) > 0 THEN
LEFT(col, charindex('[', col)-1)
ELSE
col
END AS ColA,
CASE WHEN charindex('[', col) = 0 THEN
''
ELSE
SUBSTRING(col, charindex('[', col) +1 ,charindex('*', col) - charindex('[', col) - 1)
END AS ColB,
CASE WHEN charindex('[', col) = 0 THEN
''
ELSE
SUBSTRING(col, charindex('*', col) +1 ,charindex(']', col) - charindex('*', col) - 1)
END AS ColC
FROM #T
Results:
ColA ColB ColC
1 B018
GARAGE
BACK GARDEN. 124 B002
1 B018
124 B002
1 B018
124 B002
This solution uses CROSS APPLY to make the CHARINDEX easier to manage.
SELECT
LEFT(SUBSTRING(col,1,CASE WHEN a= 0 THEN LEN(col) ELSE a-1 END),11) AS [ColA]
,REPLACE(SUBSTRING(col,a+1,b-a),'*','') AS [ColB]
,REPLACE(SUBSTRING(col,b+1,c-b),']','') AS [ColC]
FROM
#t
CROSS APPLY( SELECT CHARINDEX('[',Col,0)A
,CHARINDEX('*',Col,0)B
,CHARINDEX(']',Col,0)C
) Z
Note: the examples you have so far -including this one - currently have leading and trailing spaces that will need trimming out with RTRIM & LTRIM. But right now they would cloud the code.

SQL SELECT: concatenated column with line breaks and heading per group

I have the following SQL result from a SELECT query:
ID | category| value | desc
1 | A | 10 | text1
2 | A | 11 | text11
3 | B | 20 | text20
4 | B | 21 | text21
5 | C | 30 | text30
This result is stored in a temporary table named #temptab. This temporary table is then used in another SELECT to build up a new colum via string concatenation (don't ask me about the detailed rationale behind this. This is code I took from a colleague). Via FOR XML PATH() the output of this column is a list of the results and is then used to send mails to customers.
The second SELECT looks as follows:
SELECT t1.column,
t2.column,
(SELECT t.category + ' | ' + t.value + ' | ' + t.desc + CHAR(9) + CHAR(13) + CHAR(10)
FROM #temptab t
WHERE t.ID = ttab.ID
FOR XML PATH(''),TYPE).value('.','NVARCHAR(MAX)') AS colname
FROM table1 t1
...
INNER JOIN #temptab ttab on ttab.ID = someOtherTable.ID
...
Without wanting to go into too much detail, the column colname becomes populated with several entries (due to multiple matches) and hence, a longer string is stored in this column (CHAR(9) + CHAR(13) + CHAR(10) is essentially a line break). The result/content of colname looks like this (it is used to send mails to customers):
A | 10 | text1
A | 11 | text11
B | 20 | text20
B | 21 | text21
C | 30 | text30
Now I would like to know, if there is a way to more nicely format this output string. The best case would be to group the same categories together and add a heading and empty line between different categories:
*A*
A | 10 | text1
A | 11 | text11
*B*
B | 20 | text20
B | 21 | text21
*C*
C | 30 | text30
My question is: How do I have to modify the above query (especially the string-concatenation-part) to achieve above formatting? I was thinking about using a GROUP BY statement, but this obviously does not yield the desired result.
Edit: I use Microsoft SQL Server 2008 R2 (SP2) - 10.50.4270.0 (X64)
Declare #YourTable table (ID int,category varchar(50),value int, [desc] varchar(50))
Insert Into #YourTable values
(1,'A',10,'text1'),
(2,'A',11,'text11'),
(3,'B',20,'text20'),
(4,'B',21,'text21'),
(5,'C',30,'text30')
Declare #String varchar(max) = ''
Select #String = #String + Case when RowNr=1 Then Replicate(char(13)+char(10),2) +'*'+Category+'*' Else '' end
+ char(13)+char(10) + category + ' | ' + cast(value as varchar(25)) + ' | ' + [desc]
From (
Select *
,RowNr=Row_Number() over (Partition By Category Order By Value)
From #YourTable
) A Order By Category, Value
Select Substring(#String,5,Len(#String))
Returns
*A*
A | 10 | text1
A | 11 | text11
*B*
B | 20 | text20
B | 21 | text21
*C*
C | 30 | text30
This should return what you want
Declare #YourTable table (ID int,category varchar(50),value int, [desc] varchar(50))
Insert Into #YourTable values
(1,'A',10,'text1'),
(2,'A',11,'text11'),
(3,'B',20,'text20'),
(4,'B',21,'text21'),
(5,'C',30,'text30');
WITH Categories AS
(
SELECT category
,'**' + category + '**' AS CatCaption
,ROW_NUMBER() OVER(ORDER BY category) AS CatRank
FROM #YourTable
GROUP BY category
)
,Grouped AS
(
SELECT c.CatRank
,0 AS ValRank
,c.CatCaption AS category
,-1 AS ID
,'' AS Value
,'' AS [desc]
FROM Categories AS c
UNION ALL
SELECT c.CatRank
,ROW_NUMBER() OVER(PARTITION BY t.category ORDER BY t.Value)
,t.category
,t.ID
,CAST(t.value AS VARCHAR(100))
,t.[desc]
FROM #YourTable AS t
INNER JOIN Categories AS c ON t.category=c.category
)
SELECT category,Value,[desc]
FROM Grouped
ORDER BY CatRank,ValRank
The result
category Value desc
**A**
A 10 text1
A 11 text11
**B**
B 20 text20
B 21 text21
**C**
C 30 text30

Sql Server: Comma separated column values exist in other comma separated column

Let's say I have a table
OrgData
OrgDataid columvalues
1 1,2,3,4,5
2 6,7,8,9
3 16,17,18,19
Another table holds selected values
selectedData
rowid sid orgid values
1 1 1 1,2
2 1 2 6,7,8,9
3 2 1 1,2,3,4,5
Where sid is the id of OrgData
I want output like
for sid 1;
outputData
OrgData columvalues
1 partial selected
2 Full selected
3 Nothing selected
I want some straightway query, while I am trying to do it with splitting every comma separated to rows and looping for each one.
Thanks
You might try this:
--mock-up-tables with your data
DECLARE #OrgData TABLE(OrgDataid INT,columvalues VARCHAR(100));
INSERT INTO #OrgData VALUES
(1,'1,2,3,4,5')
,(2,'6,7,8,9')
,(3,'16,17,18,19');
DECLARE #selectedData TABLE(rowid INT,[sid] INT,orgid INT,[values] VARCHAR(100));
INSERT INTO #selectedData VALUES
(1,1,1,'1,2')
,(2,1,2,'6,7,8,9')
,(3,2,1,'1,2,3,4,5');
--First CTE to split OrgData
WITH OrgDataSplitted AS
(
SELECT od.*
,LEN(od.columvalues)-LEN(REPLACE(od.columvalues,',',''))+1 AS CountParts
,B.Part.value('.','int') AS PartInt
FROM #OrgData AS od
CROSS APPLY(SELECT CAST('<x>' + REPLACE(columvalues,',','</x><x>') + '</x>' AS XML)) AS A(Casted)
CROSS APPLY A.Casted.nodes('/x') AS B(Part)
)
--Second CTE to split SelectedData
,SelectedSplitted AS
(
SELECT sd.*
,B.Part.value('.','int') AS PartInt
FROM #selectedData AS sd
CROSS APPLY(SELECT CAST('<x>' + REPLACE([values],',','</x><x>') + '</x>' AS XML)) AS A(Casted)
CROSS APPLY A.Casted.nodes('/x') AS B(Part)
)
--The query to join them
SELECT o.OrgDataid,o.CountParts,s.rowid,COUNT(rowid) AS CountIdent
FROM Orgdatasplitted AS o
FULL OUTER JOIN SelectedSplitted AS s ON o.OrgDataID=s.orgid and o.PartInt=s.PartInt
GROUP BY o.OrgDataid,o.CountParts,s.rowid
ORDER BY o.OrgDataid
The result. If CountParts and CountIdent is the same it's full, >0 is partial and 0 is none
+-----------+------------+-------+------------+
| OrgDataid | CountParts | rowid | CountIdent |
+-----------+------------+-------+------------+
| 1 | 5 | 1 | 2 |
+-----------+------------+-------+------------+
| 1 | 5 | 3 | 5 |
+-----------+------------+-------+------------+
| 2 | 4 | 2 | 4 |
+-----------+------------+-------+------------+
| 3 | 4 | NULL | 0 |
+-----------+------------+-------+------------+
Try the below script, may be help you. But I agree with #Gordon Linoff,
Fix you data structure. Storing values in comma-delimited strings is the wrong way.
SELECT OrgDataid
,(CASE WHEN OD.columvalues = SD.[values] THEN 'Full selected'
WHEN SD.rowid IS NULL THEN 'Nothing selected'
ELSE 'partial selected' END) AS columvalues
FROM OrgData AS OD
LEFT OUTER JOIN selectedData AS SD ON OD.columvalues LIKE '%'+SD.[values]+'%'