Find rows that contain same value inside comma separated values - sql

I have a varchar column, populated by another process where I have no control over, that is filled with comma separated values.
Now I need to find all rows where part of this column exists in that same column, in another row
example
declare #table table (value varchar(50))
insert into #table values ('NB,BD,FR'), ('BD,GK'), ('SL,SR')
select * from #table
so the table contains
value
-----
NB,BD,FR
BD,GK
SL,SR
from the example above I would like to get
value
-----
NB,BD,FR
BD,GK
Because there is a value (in this case BD but can be anything) present in both rows
Can this be done in sql?

You could use clunky XML manipulation to convert comma separated values to rows:
DECLARE #table TABLE (value VARCHAR(50));
INSERT INTO #table VALUES
('NB,BD,FR'),
('BD,GK'),
('SL,SR');
WITH cte AS (
SELECT value, node.value('.', 'varchar(10)') AS substr
FROM #table
CROSS APPLY (SELECT CAST('<x>' + REPLACE(value, ',', '</x>,<x>') + '</x>' AS XML)) AS x(doc)
CROSS APPLY doc.nodes('/x') AS n(node)
)
-- use your favorite technique to find the duplicate
SELECT value
FROM cte AS m
WHERE EXISTS (
SELECT 1
FROM cte AS x
WHERE value <> m.value AND substr = m.substr
)
The CAST(... AS XML) part assumes that your data does not contain characters that have special meaning in XML. The nodes method will convert one row to many, rest is straight forward.

This is the wrong data structure. Don't store values in strings!
declare #table table (id int, value varchar(50));
insert into #table
values (1, 'NB'), (1, 'BD'), (1, 'FR'),
(2, 'BD'), (2, 'GK'),
(3, 'SL'), (3, 'SR');
Then you can get what you want using window functions:
select id, value
from (select t.*, max(cnt) over (partition by id) as max_cnt
from (select t.*, count(*) over (partition by value) as cnt
from #table t
) t
) t
where max_cnt >= 2

Related

Split one cell into multiple rows in SQL Server

Split single cell value into multiple rows by duplicating the id column and using only portion of the original text. Any way other than using UNION.
Here is the sample data
create table Spl
(
id INT,
Name VARCHAR(100)
)
insert into Spl values (1, '4334ASTBSTCST')
insert into Spl values (2, '7887ASTBSTCST')
insert into Spl values (3, '8793ASTBSTCST')
You can use cross apply with values:
select Id, v.[Name]
from spl
cross apply (
values
(Left([name],7)),
(Left([name],4) + Substring([name],8,3)),
(Left([name],4) + Substring([name],11,3))
)v([Name])
A version of cross apply
select Id, left([name],4) + substring([name], v.pos, v.len)
from spl
cross apply (
values
( 5,3),
( 8,3),
(11,3)
) v(pos,len)

Sorting VARCHAR column which contains integers

I have this table:
IF OBJECT_ID('tempdb..#Test') IS NOT NULL
DROP TABLE #Test;
CREATE TABLE #Test (Col VARCHAR(100));
INSERT INTO #Test
VALUES ('1'), ('2'), ('10'), ('A'), ('B'), ('C1'), ('1D'), ('10HH')
SELECT * FROM #Test
I want to sort by numeric value first and then alphabetically.
Outcome of sort I want to is:
1
1D
2
10
10HH
A
B
C1
Assume structure of entries is one of those (with no dash of course)
number
number-string
string-number
string
if there is an entry like string-number-string, assume it is string-number
It's not pretty, but it works.
SELECT T.Col
FROM #Test T
CROSS APPLY (VALUES(PATINDEX('%[^0-9]%',T.Col)))PI(I)
CROSS APPLY (VALUES(TRY_CONVERT(int,NULLIF(ISNULL(LEFT(T.Col,NULLIF(PI.I,0)-1),LEN(T.Col)),''))))TC(L)
ORDER BY CASE WHEN TC.L IS NULL THEN 1 ELSE 0 END,
TC.L,
T.Col;
Honestly, I would suggest that if you want to order your data like a numerical value you actually store the numerical value in a numerical column; clearly the above should be a numerical prefix value, and then the string suffix. If you then want to then have the values you have, the use a (PERSISTED) computed column. Like this:
CREATE TABLE #Test (Prefix int NULL,
Suffix varchar(100) NULL,
Col AS CONCAT(Prefix, Suffix) PERSISTED);
INSERT INTO #Test (Prefix, Suffix)
VALUES (1,NULL), (2,NULL), (10,NULL), (NULL,'A'), (NULL,'B'), (NULL,'C1'), (1,'D'), (10,'HH');
SELECT Col
FROM #Test
ORDER BY CASE WHEN Prefix IS NULL THEN 1 ELSE 0 END,
Prefix,
Suffix;
This awful and unintuitive solution, that would be unnecessary if you stored the two pieces of data separately, brought to you by bad idea designs™:
;WITH cte AS
(
SELECT Col, rest = SUBSTRING(Col, pos, 100),
possible_int = TRY_CONVERT(bigint, CASE WHEN pos <> 1 THEN
LEFT(Col, COALESCE(NULLIF(pos,0),100)-1) END)
FROM (SELECT Col, pos = PATINDEX('%[^0-9]%', Col) FROM #Test) AS src
)
SELECT Col FROM cte
ORDER BY CASE
WHEN possible_int IS NULL THEN 2 ELSE 1 END,
possible_int,
rest;
Result:
Col
1
1D
2
10
10HH
A
B
C1
Example db<>fiddle

Precise sort order of records that were found in a IN split function in SQL Server

I need to return records that I send to a stored procedure in a comma-separated string - like this:
#PMID = 29573145,24106086,20513766,24326307
I have a stored procedure that pulls records such as
SELECT
data,
PMID
FROM
[dbo].[ADMIN_Publication_JSON]
WHERE
PMID IN (SELECT DATA FROM dbo.Split(#PMID, ','))
The problem that I am having is that the return record set is random and I need it precise because my end user could change the order and the records need to be displayed in that order which would change the order in the comma string. Is this possible or do I need to totally change the way I pull the data? Thanks
You can use a window function like
Select T1.data,
T1.PMID
FROM [dbo].[ADMIN_Publication_JSON] T1 INNER JOIN
(SELECT Data,
ROW_NUMBER() OVER(ORDER BY (SELECT NULL)) RN
FROM dbo.Split(#PMID,',')
) T2 ON T1.PMID = T2.Data
ORDER BY T2.RN;
Here is a little sample:
CREATE TABLE T(
ID INT,
SomeValue VARCHAR(45)
);
INSERT INTO T VALUES
(1, 'One'),
(2, 'Two'),
(3, 'Three'),
(4, 'Four'),
(5, 'Five');
DECLARE #IDs VARCHAR(200) = '3,5,2';
SELECT T.*
FROM T INNER JOIN
(SELECT Value,
ROW_NUMBER() OVER(ORDER BY (SELECT NULL)) Seq
FROM STRING_SPLIT(#Ids, ',') --instead of your function
) TT
ON T.ID = TT.Value
ORDER BY TT.Seq;
Live Demo
Split method does not sort the Data column that means simple join with its result can do the trick. You don't need ROW_NUMBER() or any sorting effort here. Have a temp table store Splits result and LEFT JOIN the two. This works for me.
CREATE TABLE #Input (PMID varchar(10))
INSERT INTO #Input SELECT Data FROM dbo.Split(#PMID, ',')
SELECT
jsn.*
FROM
#Input spl INNER JOIN ADMIN_Publication_JSON jsn on spl.PMID = jsn.PMID
Output: Returns the set in the order passed in #PMID
I'm sorry to say but the currently accepted answer (by Sami) is wrong.
The problem with this answer is that it use ROW_NUMBER() OVER(ORDER BY (SELECT NULL)) to get the order of the items in the comma delimited string, but since the order by is done on select null, what actually happens is that the row_number will assign the numbers in an arbitrary order - that may or may not match the order of the strings in the source string.
For more information, read Conor Cunningham's No Seatbelt – Expecting Order without ORDER BY.
If your split UDF returns a table with two columns, where one contains the substring and the other contains it's index, like Jeff Moden's DelimitedSplit8K, then simply use the ItemNumber (or equivalent) column for the order by. If it only returns a single column containing the substrings, you can use this a nice trick I've learned from Aaron Bertrand's Solve old problems with SQL Server’s new STRING_AGG and STRING_SPLIT functions - it will guarantee to return the correct order of the substrings as long as they are unique.
A simple change on Sami's answer will give you correct results as long as the substrings are unique within the comma delimited string - Instead of ROW_NUMBER() OVER(ORDER BY (SELECT NULL)), use CHARINDEX(',' + Value + ',', ',' + #Ids + ','), which will return the index of each substring inside the comma delimited string:
CREATE TABLE T(
ID INT,
SomeValue VARCHAR(45)
);
INSERT INTO T VALUES
(1, 'One'),
(2, 'Two'),
(3, 'Three'),
(4, 'Four'),
(5, 'Five');
DECLARE #IDs VARCHAR(200) = '3,5,2';
SELECT T.*
FROM T
INNER JOIN
(SELECT Value,
CHARINDEX(',' + Value + ',', ',' + #Ids + ',') AS Seq
FROM dbo.Split(#Ids, ',')
) TT
ON T.ID = TT.Value
ORDER BY TT.Seq;

Find rows which are in a row with comma separated values same table sql

i have a table which contains comma separated values some thing like
id locs
1 a,s,d,f
2 s,d,f,a
3 d,s,a,f
4 d,f,g,a
5 a,s,e
6 f,d
i need out put as 1,2,3,6 in sql server when i have taken comma separated string of id 1.
that means i have taken locs of id 1 and separated with comma, now i want all the ids which contains the separated values of id 1.
Note: I know i don't have to keep comma separated values in table but its happened.
Hope i was clear with my question.
declare #tb table (id int, locs varchar(50))
insert into #tb values(1, 'a,s,d,f'),
(2,'s,d,f,a'),
(3,'d,s,a,f'),
(4,'d,f,g,a'),
(5,'a,s,e'),
(6,'f,d')
declare #cta varchar(20)='s,d,f,a'
;with cte0(id,col2)
as
(
select id,t.c.value('.','varchar(max)') as col2 from (select id,x= cast('<t>'+replace(locs,',','</t><t>') +'</t>' as xml) from #tb) a cross apply x.nodes('/t') t(c)
)
select distinct id from cte0 where #cta like '%'+col2+'%' and id not in( select distinct id from cte0 where #cta not like '%'+col2+'%')
If I understand you correctly, you need to return the id value of all the rows that has at least one of the comma separated values from the locs column of the row you selected. Since this is a poor database design there can only be an ugly solution to this problem.
Start by creating a user defined function to split a comma separated values into a table. there are many ways to do it, this is the first that google found.
DECLARE #Values varchar(max)
SELECT #Values = Locs
FROM Table WHERE Id = #Id
SELECT Id
FROM Table INNER JOIN dbo.Split(#Values) SplitedString
ON( '%,'+ SplitedString.s+',%' LIKE ',' + Locs + ',')

SQL Substring on varying column

I have a database table with a column METADATA. This METADATA may or may not contain a string. Here's the sample string:
StudentID:1234,StudentName:TestName,StudentNickName:TestNName,StudentLevel:5
Now, I want to extract the StudentNickName:TestName if it exists. Please note of the following constraint:
METADATA column doesn't always contain a value
METADATA column can contain a value without the StudentNickName clause
The StudentNickName: is fixed, while the TestNName varies per row.
For mssql 2005+
declare #t table(metadata varchar(200))
insert #t values('StudentID:1234,StudentName:TestName,StudentNickName:TestNName,StudentLevel:5')
insert #t values('')
insert #t values('StudentID:1234,StudentName:Thomas,StudentNickName:Tom,StudentLevel:3')
select left(b.a, patindex('%_,%', b.a)) StudentNickName
from #t t cross apply
(select right(metadata, patindex('%_:emaNkciNtnedutS%'
, reverse('X'+ metadata)))+',' a) b
Result:
StudentNickName
---------------
TestNName
Tom
Works in Sql Server
DECLARE #test TABLE(metadata VARCHAR(200))
INSERT #test VALUES('StudentID:1234,StudentName:TestName,StudentNickName:TestNName,StudentLevel:5')
INSERT #test VALUES('StudentID:1235,StudentName:TestName1,StudentNickName:TestNName1,StudentLevel:6')
INSERT #test VALUES('StudentID:1236,StudentName:TestName2,StudentNickName:TestNName2,StudentLevel:2')
INSERT #test VALUES('')
SELECT split.s.value('.','VARCHAR(100)') as colname FROM
(
SELECT CAST('<s>' + REPLACE(metadata,',','</s><s>') + '</s>' AS XML) col FROM #test
) AS t CROSS APPLY col.nodes('/s') AS split(s)
WHERE split.s.value('.','VARCHAR(100)') LIKE '%StudentNickName%'