Get the last three values with in a delimited string using SQL Server - sql

I would like to extract only the last three values from a delimited string and generate a delimited substring with those three values. Could anyone suggest what is the best way to do this. I tried using STRING_SPLIT and was able to successfully split the string into multiple values but I am not sure how to proceed further. Any thoughts would be appreciated.
SELECT value FROM STRING_SPLIT('CatalogTypeCode VARCHAR(10) NOT NULL
,EventID INT NOT NULL
,ModelCode TINYINT NOT NULL
,YearID INT NOT NULL
,PerilSetCode INT NOT NULL
,GrossLoss FLOAT NULL
,GrossSD FLOAT NULL
,GrossMaxLoss FLOAT NULL',',')
Output:
CatalogTypeCode VARCHAR(10) NOT NULL
EventID INT NOT NULL
ModelCode TINYINT NOT NULL
YearID INT NOT NULL
PerilSetCode INT NOT NULL
GrossLoss FLOAT NULL
GrossSD FLOAT NULL
GrossMaxLoss FLOAT NULL
Expected Output :
'GrossLoss FLOAT NULL,GrossSD FLOAT NULL,GrossMaxLoss FLOAT NULL'

Perhaps another option using a little XML in concert with reverse() ... twice
Example
Declare #YourTable table (ID int,SomeCol varchar(150))
Insert Into #YourTable values
(1,'Val1,Val2,Val3,Val4')
,(2,'Val1,Val2,Val3,Val4,Val5,Val6')
,(3,'Val1,Val2')
,(4,'Val1')
,(5,null)
Select A.ID
,LastThree = reverse(concat(Pos1,','+Pos2,','+Pos3))
From #YourTable A
Cross Apply (
Select Pos1 = n.value('/x[1]','varchar(max)')
,Pos2 = n.value('/x[2]','varchar(max)')
,Pos3 = n.value('/x[3]','varchar(max)')
From (Select cast('<x>' + replace(reverse(SomeCol),',','</x><x>')+'</x>' as xml) as n) X
) B
Returns
ID LastThree
1 Val2,Val3,Val4
2 Val4,Val5,Val6
3 Val1,Val2 -- Notice only 2 values
4 Val1 -- Notice only 1 value
5 -- Notice value was null

Unfortunately, split_string() doesn't return the position of the values within the string.
This will work, assuming there are no duplicates among the lines:
SELECT string_agg(line, ',') within group (order by pos) as lines_3
FROM (SELECT TOP (3) s.line, CHARINDEX(line, lines) as pos
FROM (VALUES ('CatalogTypeCode VARCHAR(10) NOT NULL
,EventID INT NOT NULL
,ModelCode TINYINT NOT NULL
,YearID INT NOT NULL
,PerilSetCode INT NOT NULL
,GrossLoss FLOAT NULL
,GrossSD FLOAT NULL
,GrossMaxLoss FLOAT NULL')
) v(lines) OUTER APPLY
STRING_SPLIT(v.lines, ',') s(line)
ORDER BY pos
) s
EDIT:
Oops, the above works on SQL Server 2017, but not 2016. You can use conditional aggregation:
SELECT (MAX(CASE WHEN seqnum = 1 THEN line END) + ','
MAX(CASE WHEN seqnum = 2 THEN line END) + ','
MAX(CASE WHEN seqnum = 3 THEN line END)
) as lines_3
FROM (SELECT TOP (3) s.line,
ROW_NUMBER() OVER (ORDER BY CHARINDEX(line, lines)) as seqnum
FROM (VALUES ('CatalogTypeCode VARCHAR(10) NOT NULL
,EventID INT NOT NULL
,ModelCode TINYINT NOT NULL
,YearID INT NOT NULL
,PerilSetCode INT NOT NULL
,GrossLoss FLOAT NULL
,GrossSD FLOAT NULL
,GrossMaxLoss FLOAT NULL')
) v(lines) OUTER APPLY
STRING_SPLIT(v.lines, ',') s(line)
ORDER BY pos
) s;

I am using this table-valued function for years now. Works fine. After creating function call select top 3 * from lma.dbo.split_test('a,b,c,d,e',',') order by id desc
CREATE FUNCTION [dbo].[Split]
(
#String VARCHAR(MAX),
#Delimiter VARCHAR(10)
)
RETURNS #Temptable TABLE (id int identity, items varchar(MAX))
AS
BEGIN
DECLARE #Idx INT
DECLARE #Slice VARCHAR(MAX)
DECLARE #Delimiterlen INT=LEN(#Delimiter)
--,#String VARCHAR(MAX)='N'
--,#Delimiter VARCHAR(10)=';;'
IF (#String like '%' + #Delimiter + '%')
BEGIN
SELECT #Idx = 1
IF LEN(#String)<#Delimiterlen or #String is null RETURN
WHILE #Idx!= 0
BEGIN
SET #Idx = CHARINDEX(#Delimiter,#String)
IF #Idx!=0
SET #Slice = left(#String,#idx-1)
ELSE
SET #Slice = #String
IF(LEN(#Slice)>0)
INSERT INTO #Temptable(Items) values(#Slice)
--290913 : IF WE WANT TO USE DELIMETER LENGTH GREATER THAN 2
IF LEN(#String) >= (#Idx -1 + #Delimiterlen)
SET #String = RIGHT(#String,LEN(#String) - (#Idx-1+#Delimiterlen))
IF LEN(#String) = 0 BREAK
END
END
ELSE
BEGIN
INSERT INTO #Temptable(Items) values(#String)
END
DELETE #Temptable WHERE items =''
RETURN
END

Related

Insert query with case or if-else logic possible?

I want to insert values present in SELECT Statement below into table #tab3.
But I want to apply some sort of if-else or case logic to check like this one:
if (ABCList,1) = 'DOB' Then insert it into Dob1 else NULL
if (ABCList,2) = '04MARCH 1999' Then insert it into Dobnum else NULL
if (ABCList,3) = 'Passport' Then insert it into Pass1 else NULL
if (ABCList,4) = 'ABCC123' Then insert it into Passnum else NULL
But I cant figure out how to move data directly from a string into table.
MAIN CODE:
DECLARE #string3 varchar(max) = 'DOB;04MARCH 1999;Passport;ABCC123';
DECLARE #sep3 char(1) = ';'
DECLARE #dot3 char(1) = '.'
DECLARE #tab3 TABLE(
id INT IDENTITY(1,1) PRIMARY KEY,
Dob1 varchar(max),
Dobnum varchar(max),
Pass1 varchar(max),
Passnum varchar(max)
);
SELECT REVERSE(REPLACE(REVERSE(#string3), #sep3, #dot3)) as ABClist
INSERT into #tab3 (Dob1,Dobnum,Pass1,Passnum)
values
(
);
select * from #tab3
Could you not use CASE to achieve what you want?
Something such as
INSERT INTO #tab3 (Dob1,Dobnum,Pass1,Passnum)
SELECT
(
CASE
WHEN //Condition == 'DOB1'
THEN //Whatever
ELSE NULL
END
) AS Dob1,
(
CASE
WHEN //Condition == '04MARCH 1999'
THEN //Whatever
ELSE NULL
END
) AS Dobnum,
and so on.

T-SQL query: find most frequent pair of values

I have text column with numeric values separated by semicolons. I'm trying to figure out how to get the most frequent pair of values that appeared together in the same row. I've found a solution for a very similar problem in Python Finding the most frequent occurrences of pairs in a list of lists, but I don't know how to rewrite it in using SQL In example below it returns 2 and 3 because this pair appeared 3 times in the input set:
Input rows Output
---------- -------
';1;2;3;5' | '2;3'
';2;3' | '1;2'
';3;4;5;1;2' | '1;3'
';1;5;2' | '1;5'
Orginal data:
You may try with the following approach. First, using OPENJSON(), get all possible combinations. When OPENJSON parses a JSON array the indexes of the elements in the JSON text are returned as keys (0-based). Then, count the most frequent pair with DENSE_RANK().
Input:
CREATE TABLE #Items (
Id int,
ItemValues varchar(max)
)
INSERT INTO #Items
(Id, ItemValues)
VALUES
(1, '1;2;3;5'),
(2, '2;3'),
(3, '3;4;5;1;2'),
(4, '1;5;2')
Statement:
;WITH combinationsCTE AS (
SELECT
CASE
WHEN s1.[value] <= s2.[value] THEN CONCAT(s1.[value], ';', s2.[value])
ELSE CONCAT(s2.[value], ';', s1.[value])
END AS PairValue
FROM #Items i
CROSS APPLY (SELECT [key], [value] FROM OPENJSON('["' + REPLACE(i.ItemValues,';','","') + '"]')) s1
CROSS APPLY (SELECT [key], [value] FROM OPENJSON('["' + REPLACE(i.ItemValues,';','","') + '"]')) s2
WHERE (s1.[key] < s2.[key])
), rankingCTE AS (
SELECT
PairValue,
DENSE_RANK() OVER (ORDER BY COUNT(PairValue) DESC) AS PairRank
FROM combinationsCTE
GROUP BY PairValue
)
SELECT PairValue
FROM rankingCTE
WHERE PairRank = 1
Output:
PairValue
1;2
1;5
2;3
2;5
First have a split function
CREATE FUNCTION Splitfn(#String varchar(8000), #Delimiter char(1))
returns #temptable TABLE (items varchar(8000))
as
begin
declare #idx int
declare #slice varchar(8000)
select #idx = 1
if len(#String)<1 or #String is null return
while #idx!= 0
begin
set #idx = charindex(#Delimiter,#String)
if #idx!=0
set #slice = left(#String,#idx - 1)
else
set #slice = #String
if(len(#slice)>0)
insert into #temptable(Items) values(#slice)
set #String = right(#String,len(#String) - #idx)
if len(#String) = 0 break
end
return
end
Second Step To get all rows in a single string
Declare #val Varchar(MAX);
Select #val = COALESCE(#val + '; ' + YourColumn, YourColumn)
From YourTable
Third step,
SELECT TOP 1 items, count(*)
FROM dbo.Splitfn(#Val, ';')
WHERE LTRIM(RTRIM(items)) <> ''
GROUP BY items
ORDER BY Count(*) DESC

SQL: Return first non null value from the string

Problem : I want to retrieve the first non null value from the string, substring is separated with , as separator.
Scenario :
String 1 - ,1002682657
String 2 - 1002682683,
String 3 - ,,1002682684
String 4 - ,,,
String 5 - 1002682664,1002682663
Expected Result
ResultString 1 - 1002682657
ResultString 2 - 1002682683
ResultString 3 - 1002682684
ResultString 4 - null value
ResultString 5 - 1002682664
So to retrieve this I wrote function below is the script
CREATE FUNCTION [dbo].[Return_first_NonNull_Value_From_list]
(
#List NvarChar(MAX)
)
RETURNS NVarChar
AS
BEGIN
-- Declare the return variable here
DECLARE #ReturnListPart NvarChar(max)
DECLARE #Start INT
DECLARE #End INT
DECLARE #Length INT
DECLARE #Length_String INT
SET #Start = 1
SET #End = CHARINDEX(',',#List,#Start)
SET #Length = (#End - #Start) + 1
SET #Length_String= (#END-#Start)+1
SET #pos = 0
SET #nextpos = 1
WHILE #Start>0 and #End>0 and #Length_String>0
BEGIN
IF (SUBSTRING(#List, #Start, 1) <> '') AND (SUBSTRING(#List, #Start,2) <>'')
BEGIN
SET #ReturnListPart = SUBSTRING(#List,#Start,#Length)
SET #Length_String= LEN(#ReturnListPart)
IF #Length_String > 1
BEGIN
SET #Length_String =0
END
END
ELSE
BEGIN
-- Replace the string with null value if null
SET #List = LTRIM(RTRIM(STUFF(#List,#Start,#Length,'')))
SET #Length_String = LEN(#List)
END
END
RETURN RTRIM(LTRIM(#ReturnListPart))
END
But this function doesn't return the expected result. Could anyone please help me out in this?
With two assumptions from your question you could do this pretty easily. It looks like your numbers are all 10 characters long, and they only have numerics (no characters).
With this in mind you could just do this with a pattern match like so:
SELECT CASE WHEN [Value] LIKE '%[0-9]%' THEN SUBSTRING([Value], PATINDEX('%[0-9]%', [Value]), 10)
ELSE NULL
END [Value]
FROM [#Test]
We can discount any rows without numeric characters straight away and return null, the rest we look for the first numeric character and get the next 10 chars.
A full sample to run in sql server would be:
CREATE TABLE [#Test]
(
[Value] NVARCHAR(1000)
)
INSERT INTO [#Test] ( [Value] ) VALUES ( N',1002682657')
INSERT INTO [#Test] ( [Value] ) VALUES ( N'1002682683,')
INSERT INTO [#Test] ( [Value] ) VALUES ( N',,1002682684')
INSERT INTO [#Test] ( [Value] ) VALUES ( N',,,')
INSERT INTO [#Test] ( [Value] ) VALUES ( N',1002682664,1002682663')
SELECT CASE WHEN [Value] LIKE '%[0-9]%' THEN SUBSTRING([Value], PATINDEX('%[0-9]%', [Value]), 10)
ELSE NULL
END [Value]
FROM [#Test]
DROP TABLE [#Test]

In operator matching all rows

I want to return matching all values of csv as the traditional "in" operator matches any of the items present in csv:
SELECT * FROM #MyTable
WHERE [UserID] IN (1,2)
The above query will not serve my purpose as I want to match the rows which have both records for a group. In my case group will by typeid.
Query to populate the table:
DECLARE #MyTable TABLE
(
[TypeID] INT ,
[UserID] INT
)
INSERT INTO #MyTable
SELECT 1 ,
1
UNION
SELECT 1 ,
2
UNION
SELECT 2 ,
1
UNION
SELECT 2 ,
2
UNION
SELECT 2 ,
3
UNION
SELECT 3 ,
1
UNION
SELECT 3 ,
2
UNION
SELECT 3 ,
3
UNION
SELECT 3 ,
4
To query the above table I have input string of userid
DECLARE #UserIDString VARCHAR(256)
Here is my requirement:
When the input is '1,2'; I want typeid 1 as the output as that group has all the records present in csv.
If the input is '1,2,3' ; 2 typeid should be returned as that group has all the values present in csv.
If the input is '1,2,3,4' ; 3 typeid should be returned as that group has all the values present in csv.
EDIT:
Here is the split function to split the csv:
CREATE FUNCTION [dbo].[Split_String]
(
#inputString NVARCHAR(2000) ,
#delimiter NVARCHAR(20) = ' '
)
RETURNS #Strings TABLE
(
[position] INT IDENTITY
PRIMARY KEY ,
[value] NVARCHAR(2000)
)
AS
BEGIN
DECLARE #index INT
SET #index = -1
WHILE ( LEN(#inputString) > 0 )
BEGIN-- Find the first delimiter
SET #index = CHARINDEX(#delimiter, #inputString)
-- No delimiter left?
-- Insert the remaining #inputString and break the loop
IF ( #index = 0 )
AND ( LEN(#inputString) > 0 )
BEGIN
INSERT INTO #Strings
VALUES ( RTRIM(LTRIM(CAST(#inputString AS NVARCHAR(2000))) ))
BREAK
END
-- Found a delimiter
-- Insert left of the delimiter and truncate the #inputString
IF ( #index > 1 )
BEGIN
INSERT INTO #Strings
VALUES ( RTRIM(LTRIM(CAST(LEFT(#inputString, #index - 1) AS NVARCHAR(2000)) ) ))
SET #inputString = RIGHT(#inputString,
( LEN(#inputString) - #index ))
END -- Delimiter is 1st position = no #inputString to insert
ELSE
SET #inputString = CAST(RIGHT(#inputString,
( LEN(#inputString) - #index )) AS NVARCHAR(2000))
END
RETURN
END
GO
Edit:
Thanks #Tab, with further modifications I have come to solution:
DECLARE #InputString VARCHAR(256)
DECLARE #Count VARCHAR(256)
--SET #InputString = '1,2'
DECLARE #DummyTable TABLE
(
[position] INT ,
[value] INT
)
INSERT INTO #DummyTable
( [position] ,
[value]
)
SELECT [position] ,
[value]
FROM [dbo].[Split_String](#InputString, ',')
SELECT #Count = COUNT(1)
FROM #DummyTable
SELECT TypeID
FROM #MyTable
WHERE TypeID NOT IN (
SELECT TypeID
FROM #MyTable T
LEFT OUTER JOIN #DummyTable ss ON t.UserId = ss.Value
WHERE ss.Position IS NULL )
GROUP BY TypeID
HAVING COUNT(TypeID) = #Count
Using your split function, you can do an OUTER JOIN and make sure there are no NULL rows:
SELECT TypeID
FROM #MyTable
WHERE TypeID NOT IN (
SELECT TypeID
FROM #MyTable t
LEFT OUTER JOIN [dbo].[Split_String] (#InputString,',') ss
ON t.UserId=ss.Value
WHERE ss.Position IS NULL
) x
Untested, but I think that should do it.
However, this should return ALL the types that meet the requirement of:
that group has all the records present in csv.
In your question, you seem to imply that only one row should be returned, but why would that be the case if more than one row matches all the values in the csv? And what is the rule for determining which row is returned when there is more than one match?

SQL Server - Compare Varchar values using IN

In my table, I have a varchar column whereby multi-values are stored. An example of my table:
RecNum | Title | Category
-----------------------------------------
wja-2012-000001 | abcdef | 4,6
wja-2012-000002 | qwerty | 1,3,7
wja-2012-000003 | asdffg |
wja-2012-000004 | zxcvbb | 2,7
wja-2012-000005 | ploiuh | 3,4,12
The values in the Category column points to another table.
How can I return the relevant rows if I want to retrieve the rows with value 1,3,5,6,8 in the Category column?
When I tried using IN, I get the 'Conversion failed when converting the varchar value '1,3,5,6,8' to data type int' error.
Breaking the Categories out into a separate table would be a better design if that's a change you can make... otherwise, you could create a function to split the values into a table of integers like this:
CREATE FUNCTION dbo.Split(#String varchar(8000), #Delimiter char(1))
returns #temptable TABLE (id int)
as
begin
declare #idx int
declare #slice varchar(8000)
select #idx = 1
if len(#String)<1 or #String is null return
while #idx!= 0
begin
set #idx = charindex(#Delimiter,#String)
if #idx!=0
set #slice = left(#String,#idx - 1)
else
set #slice = #String
if(len(#slice)>0)
insert into #temptable(id) values(convert(int, #slice))
set #String = right(#String,len(#String) - #idx)
if len(#String) = 0 break
end
return
end
Then call it from your query:
SELECT ...
FROM ...
WHERE #SomeID IN (SELECT id FROM dbo.Split(Category, ','))
Or if you're looking to provide a list of categories as an input parameter (such as '1,3,5,6,8'), and return all records in your table that contain at least one of these values, you could use a query like this:
SELECT ...
FROM ...
WHERE
EXISTS (
select 1
from dbo.Split(Category, ',') s1
join dbo.Split(#SearchValues, ',') s2 ON s1.id = s2.id
)
you can do like this
declare #var varchar(30); set #var='2,3';
exec('select * from category where Category_Id in ('+#var+')')
Try this solution:
CREATE TABLE test4(RecNum varchar(20),Title varchar(10),Category varchar(15))
INSERT INTO test4
VALUES('wja-2012-000001','abcdef','4,6'),
('wja-2012-000002','qwerty','1,3,7'),
('wja-2012-000003','asdffg',null),
('wja-2012-000004','zxcvbb','2,7'),
('wja-2012-000005','ploiuh','3,4,12')
select * from test4
Declare #str varchar(25) = '1,3,5,6,8'
;WITH CTE as (select RecNum,Title,Category from test4)
,CTE1 as (
select RecNum,Title,RIGHT(#str,LEN(#str)-CHARINDEX(',',#str,1)) as rem from CTE where category like '%'+LEFT(#str,1)+'%'
union all
select c.RecNum,c.Title,RIGHT(c1.rem,LEN(c1.rem)-CHARINDEX(',',c1.rem,1)) as rem from CTE1 c1 inner join CTE c
on c.category like '%'+LEFT(c1.rem,1)+'%' and CHARINDEX(',',c1.rem,1)>0
)
select RecNum,Title from CTE1
As mentioned by others, your table design violates basic database design principles and if there is no way around it, you could normalize the table with little code (example below) and then join away with the other table. Here you go:
Data:
CREATE TABLE data(RecNum varchar(20),Title varchar(10),Category varchar(15))
INSERT INTO data
VALUES('wja-2012-000001','abcdef','4,6'),
('wja-2012-000002','qwerty','1,3,7'),
('wja-2012-000003','asdffg',null),
('wja-2012-000004','zxcvbb','2,7'),
('wja-2012-000005','ploiuh','3,4,12')
This function takes a comma separated string and returns a table:
CREATE FUNCTION listToTable (#list nvarchar(MAX))
RETURNS #tbl TABLE (number int NOT NULL) AS
BEGIN
DECLARE #pos int,
#nextpos int,
#valuelen int
SELECT #pos = 0, #nextpos = 1
WHILE #nextpos > 0
BEGIN
SELECT #nextpos = charindex(',', #list, #pos + 1)
SELECT #valuelen = CASE WHEN #nextpos > 0
THEN #nextpos
ELSE len(#list) + 1
END - #pos - 1
INSERT #tbl (number)
VALUES (convert(int, substring(#list, #pos + 1, #valuelen)))
SELECT #pos = #nextpos
END
RETURN
END
Then, you can do something like this to "normalize" the table:
SELECT *
FROM data m
CROSS APPLY listToTable(m.Category) AS t
where Category is not null
And then use the result of the above query to join with the "other" table. For example (i did not test this query):
select * from otherTable a
join listToTable('1,3,5,6,8') b
on a.Category = b.number
join(
SELECT *
FROM data m
CROSS APPLY listToTable(m.Category) AS t
where Category is not null
) c
on a.category = c.number