String split in SQL Server - sql

I have a question about SQL Server: how to split a string using _ and get required format.
If we do not have - symbol then keep empty value.
Table :
CREATE TABLE [dbo].[student]
(
[sid] [int] NULL,
[course] [varchar](500) NULL,
[name] [varchar](50) NULL
)
INSERT INTO [dbo].[student] ([sid], [course], [name])
VALUES (1, N'database-sql;FE-Java', N'abc')
INSERT INTO [dbo].[student] ([sid], [course], [name])
VALUES (2, N'FE-net;database-oracle;FE-python', N'xyz')
INSERT INTO [dbo].[student] ([sid], [course], [name])
VALUES (3, N'test', N'axy')
INSERT INTO [dbo].[student] ([sid], [course], [name])
VALUES (4, N'FE-python-java;base-mysql', N'anr')
Based on this data, I want output like this:
Sid | course |name
----+-------------------+-----
1 |sql,java |abc
2 |net,oracle,python |xyz
3 | |axy
4 |python,java,mysql |anr
I have tried with SQL like this:
select
sid,
substring([course], charindex([course], '-') + 1, len([course])) course,
name
from
student
This query however is not returning the expected results.
How can I write a query to achieve this task in SQL Server?

Please try the following solution.
It will work starting from SQL Server 2016 onwards.
SQL
-- DDL and sample data population, start
DECLARE #tbl TABLE (sid int NULL, course varchar(500) NULL, name varchar(50) NULL);
INSERT INTO #tbl (sid, course, name) VALUES
(1, N'database-sql;FE-Java', N'abc'),
(2, N'FE-net;database-oracle;FE-python', N'xyz'),
(3, N'test', N'axy'),
(4, N'FE-python-java;base-mysql', N'anr');
-- DDL and sample data population, end
DECLARE #separator CHAR(1) = ';'
, #comma CHAR(1) = ','
, #dash CHAR(1) = '-';
SELECT tbl.*
, REPLACE(STUFF((SELECT #comma + IIF(pos=0,'',SUBSTRING(value, pos + 1, LEN(value)))
FROM #tbl AS tbl_inner
CROSS APPLY STRING_SPLIT(tbl_inner.course, #separator) AS ss
CROSS APPLY (SELECT CHARINDEX(#dash, value)) AS t(pos)
WHERE tbl_inner.sid = tbl.sid
FOR XML PATH('')), 1, 1, ''),#dash,#comma) AS Result
FROM #tbl AS tbl;
Output
+-----+----------------------------------+------+-------------------+
| sid | course | name | Result |
+-----+----------------------------------+------+-------------------+
| 1 | database-sql;FE-Java | abc | sql,Java |
| 2 | FE-net;database-oracle;FE-python | xyz | net,oracle,python |
| 3 | test | axy | |
| 4 | FE-python-java;base-mysql | anr | python,java,mysql |
+-----+----------------------------------+------+-------------------+

use this query:
SELECT
sid,
course,
name,
STRING_AGG(CA2.C,',') as result
FROM student AS s
CROSS APPLY(
SELECT * FROM
string_split(s.course,';')
)AS CA1
CROSS APPLY(
SELECT SUBSTRING(CA1.value, charindex('-',CA1.value) + 1, len(CA1.value)) c
) AS CA2
GROUP BY
sid,
course,
name

ALTER FUNCTION [dbo].[Split]
(
#String VARCHAR(max),
#Delimiter VARCHAR(5)
)
RETURNS #SplittedValues TABLE
(
OccurenceId SMALLINT IDENTITY(1,1),
SplitValue VARCHAR(max)
)
AS
BEGIN
DECLARE #SplitLength INT
WHILE LEN(#String) > 0
BEGIN
SELECT #SplitLength = (CASE CHARINDEX(#Delimiter,#String) WHEN 0 THEN
LEN(#String)
ELSE
CHARINDEX(#Delimiter,#String) -1 END)
INSERT INTO #SplittedValues
SELECT SUBSTRING(#String,1,#SplitLength)
SELECT #String = (CASE LTRIM(RTRIM(LEN(#String) - #SplitLength)) WHEN 0 THEN
''
ELSE
RIGHT(#String, LTRIM(RTRIM(LEN(#String) - #SplitLength - 1))) END)
END
RETURN
END

Related

Extract multiple strings from a column

I have a description field where data is going to look like this:
ID Description Title
1234 serial numbers are *XC54566, AB2345fg, 12IUT456* blahblah
I want to extract everything inside the two asterisk and show them in a column with ',' being the delimiter that differentiates between the serial numbers. The output would then look like
ID Serial_Numbers
1234 XC54566
1234 AB2345fg
1234 12IUT456
Looking for a SQL Server query to extract this information using SELECT and some form of substring/left/right function thingy.
Hopefully a SQL select statement.
If sequence does not matter, how about a string_split() or two ?
Example
Select A.ID
,Serial_Numbers = trim(C.value)
From YourTable A
Cross Apply String_split([Description],'*') B
Cross Apply String_split(B.Value,',') C
Where B.Value like '%,%'
Results
ID Serial_Numbers
1234 XC54566
1234 AB2345fg
1234 12IUT456
at the first I Use XML For retrieve the 2nd element then in the second step, to separate with , from STRING_SPLIT.
yo can use this Query:
select ID,trim(Value)
from(
SELECT ID,CAST('<t>' + REPLACE(Description , '*','</t><t>') + '</t>' AS
XML).value('/t[2]','varchar(50)') as A
FROM T
) as B
CROSS APPLY STRING_SPLIT(A,',')
I used the following table and data for testing
CREATE TABLE [dbo].[T](
[ID] [int] NULL,
[Description] [nvarchar](max) NULL
)
INSERT [dbo].[T] ([ID], [Description])
VALUES (1234, N'serial numbers are *XC54566, AB2345fg, 12IUT456*')
INSERT [dbo].[T] ([ID], [Description])
VALUES (5678, N'serial numbers , are *XC54566, AB2345fg, 12IUT456*')
INSERT [dbo].[T] ([ID], [Description])
VALUES (9784, N'serial numbers are *XC54566*')
//select * from dbo.search('XC54566, AB2345fg, 12IUT456')
CREATE FUNCTION [dbo].[Split]
(
#RowData nvarchar(2000),
#SplitOn nvarchar(5)
)
RETURNS #RtnValue table
(
Id int identity(1,1),
Data nvarchar(100)
)
AS
BEGIN
Declare #Cnt int
Set #Cnt = 1
While (Charindex(#SplitOn,#RowData)>0)
Begin
Insert Into #RtnValue (data)
Select
Data = ltrim(rtrim(Substring(#RowData,1,Charindex(#SplitOn,#RowData)-1)))
Set #RowData = Substring(#RowData,Charindex(#SplitOn,#RowData)+1,len(#RowData))
Set #Cnt = #Cnt + 1
End
Insert Into #RtnValue (data)
Select Data = ltrim(rtrim(#RowData))
Return
END

Nested while loop in SQL Server is not showing the expected result

I am trying to connect records from two different tables so I can display the data in a tabular format in an SSRS tablix.
The code below does not return the expected results.
As is, for each item in Temp_A the loop updates everything with the last item in Temp_C. Here is the code:
CREATE TABLE #Temp_A
(
[ID] INT,
[Name] VARCHAR(255)
)
INSERT INTO #Temp_A ([ID], [Name])
VALUES (1, 'A'), (2, 'B')
CREATE TABLE #Temp_C
(
[ID] INT,
[Name] VARCHAR(255)
)
INSERT INTO #Temp_C ([ID], [Name])
VALUES (1, 'C'), (2, 'D')
CREATE TABLE #Temp_Main
(
[Temp_A_ID] INT,
[Temp_A_Name] VARCHAR(255),
[Temp_C_ID] INT,
[Temp_C_Name] VARCHAR(255),
)
DECLARE #MIN_AID int = (SELECT MIN(ID) FROM #Temp_A)
DECLARE #MAX_AID int = (SELECT MAX(ID) FROM #Temp_A)
DECLARE #MIN_DID int = (SELECT MIN(ID) FROM #Temp_C)
DECLARE #MAX_DID int = (SELECT MAX(ID) FROM #Temp_C)
WHILE #MIN_AID <= #MAX_AID
BEGIN
WHILE #MIN_DID <= #MAX_DID
BEGIN
INSERT INTO #Temp_Main([Temp_A_ID], [Temp_A_Name])
SELECT ID, [Name]
FROM #Temp_A
WHERE ID = #MIN_AID
UPDATE #Temp_Main
SET [Temp_C_ID] = ID, [Temp_C_Name] = [Name]
FROM #Temp_C
WHERE ID = #MIN_DID
SET #MIN_DID = #MIN_DID + 1
END
SET #MIN_AID = #MIN_AID + 1
SET #MIN_DID = 1
END
SELECT * FROM #Temp_Main
DROP TABLE #Temp_A
DROP TABLE #Temp_C
DROP TABLE #Temp_Main
Incorrect result:
Temp_A_ID | Temp_A_Name | Temp_C_ID | Temp_C_Name
----------+-------------+-----------+---------------
1 A 2 D
1 A 2 D
2 B 2 D
2 B 2 D
Expected results:
Temp_A_ID | Temp_A_Name | Temp_C_ID | Temp_C_Name
----------+-------------+-----------+---------------
1 A 1 C
1 A 2 D
2 B 1 C
2 B 2 D
What am I missing?
You seem to want a cross join:
select a.*, c.*
from #Temp_A a cross join
#Temp_C c
order by a.id, c.id;
Here is a db<>fiddle.
There is no need to write a WHILE loop to do this.
You can use insert to insert this into #TempMain, but I don't se a need to have a temporary table for storing the results of this query.

Get the id of the last record in the data SQL Server

I am trying to get the last ID from at least 4 child-parent relationships between ID's and sum all related ID's quantity. I have tried below -
declare #test table (ID int not null, P_ID int null, Qty int not null)
insert into #test(ID, P_ID, Qty) values
(1 , 11 , 1),
(2 , null, 3),
(11, 21 , 2),
(21, 31 , 1),
(31, null, 3),
(12, null, 4)
select
COALESCE(T2.ID,T1.ID) as ID,
MAX(CASE WHEN T1.P_ID is not null then T1.ID END) as OldID,
SUM(Qty) as Qty
from
#test T1
left join
(select ID from #test
GROUP By ID) T2
on T2.ID = T1.P_ID
group by
COALESCE(T2.ID, T1.ID)
I am getting output -
ID OldID Qty
2 NULL 3
11 1 1
12 NULL 4
21 11 1
31 21 2
But I want my output will be like this where all ID's with no Parent ID in the first row then all previous ID's will show and SUM all relevant ID's quantity -
ID OldID3 OldID2 OldID1 Qty
2 3
12 4
31 21 11 1 7
Could someone please help me to achieve this.
Thanks in advance
Hopefully, this helps you. I have not tested it thoroughly, so apologies for any bugs.
I'm using a Common Table Expression to get the hierarchy information, then using a dynamic SQL I extract the desired number of previous IDs.
DECLARE #test TABLE (ID INT NOT NULL, P_ID INT NULL, Qty INT NOT NULL);
INSERT INTO #test(ID, P_ID, Qty) VALUES
(1 , 11 , 1),
(2 , null, 3),
(11, 21 , 2),
(21, 31 , 1),
(31, null, 3),
(12, null, 4);
IF (OBJECT_ID('tempdb..#hierarchy') IS NOT NULL)
DROP TABLE #hierarchy;
CREATE TABLE #hierarchy (
RootID INT NOT NULL, ID INT NOT NULL, [Qty] INT NOT NULL, SeqIndex INT NOT NULL
);
;WITH hierarchy AS (
SELECT ID, P_ID, Qty, ID [RootID], 0 [SeqIndex]
FROM #test
WHERE P_ID IS NULL
UNION ALL
SELECT child.ID, child.P_ID, child.Qty, parent.RootID, parent.SeqIndex + 1 [SeqIndex]
FROM #test child
JOIN hierarchy parent ON parent.ID=child.P_ID
)
INSERT #hierarchy
SELECT RootID, ID, Qty, SeqIndex
FROM hierarchy;
DECLARE
#DEPTH INT = 3,
#maxSeqIndex INT = (SELECT MAX(SeqIndex) FROM #hierarchy);
IF (#DEPTH = 0)
SELECT RootID, SUM(Qty) [Qty]
FROM #hierarchy
GROUP BY RootID;
ELSE IF (#DEPTH > #maxSeqIndex)
SELECT NULL
ELSE BEGIN
DECLARE #SQL NVARCHAR(MAX) = N'
SELECT
RootID,
';
DECLARE #idx INT = 1;
WHILE #idx <= #DEPTH BEGIN
SET #SQL += N'
(SELECT ID FROM #hierarchy i WHERE i.RootID=o.RootID AND SeqIndex='+CAST(#idx as nvarchar(10))+N') [OldID'+CAST(#maxSeqIndex-#idx+1 as nvarchar(10))+N'],';
SET #idx += 1;
END
SET #SQL += N'
SUM(Qty) [Qty]
FROM #hierarchy o
GROUP BY RootID;';
EXEC sp_executesql #SQL
END
Of course, the dynamic script could be replaced with a hard-coded SQL if that is OK for you.
Note: performance has not been considered

SQL Server, Merge two records in one record

We have these tables
CREATE TABLE tbl01
(
[id] int NOT NULL PRIMARY KEY,
[name] nvarchar(50) NOT NULL
)
CREATE TABLE tbl02
(
[subId] int NOT NULL PRIMARY KEY ,
[id] int NOT NULL REFERENCES tbl01(id),
[val] nvarchar(50) NULL,
[code] int NULL
)
If we run this query:
SELECT
tbl01.id, tbl01.name, tbl02.val, tbl02.code
FROM
tbl01
INNER JOIN
tbl02 ON tbl01.id = tbl02.id
we get these results:
-------------------------------
id | name | val | code
-------------------------------
1 | one | FirstVal | 1
1 | one | SecondVal | 2
2 | two | YourVal | 1
2 | two | OurVal | 2
3 | three | NotVal | 1
3 | three | ThisVal | 2
-------------------------------
You can see that each two rows are related to same "id"
The question is: we need for each id to retrieve one record with all val, each val will return in column according to the value of column code
if(code = 1) then val as val-1
else if (code = 2) then val as val-2
Like this:
-------------------------------
id | name | val-1 | val-2
-------------------------------
1 | one | FirstVal | SecondVal
2 | two | YourVal | OurVal
3 | three | NotVal | ThisVal
-------------------------------
Any advice?
Use can use MAX and Group By to achieve this
SELECT id,
name,
MAX([val1]) [val-1],
MAX([val2]) [val-2]
FROM ( SELECT tbl01.id, tbl01.name,
CASE code
WHEN 1 THEN tbl02.val
ELSE ''
END [val1],
CASE code
WHEN 2 THEN tbl02.val
ELSE ''
END [val2]
FROM tbl01
INNER JOIN tbl02 ON tbl01.id = tbl02.id
) Tbl
GROUP BY id, name
Is it the PIVOT operator (http://technet.microsoft.com/en-us/library/ms177410(v=sql.105).aspx) that you are looking for?
You've already got a few answers, but heres one using PIVOT as an alternative. The good thing is this approach is easy to scale if there are additional columns required later
-- SETUP TABLES
DECLARE #t1 TABLE (
[id] int NOT NULL PRIMARY KEY,
[name] nvarchar(50) NOT NULL
)
DECLARE #t2 TABLE(
[subId] int NOT NULL PRIMARY KEY ,
[id] int NOT NULL,
[val] nvarchar(50) NULL,
[code] int NULL
)
-- SAMPLE DATA
INSERT #t1 ( id, name )
VALUES ( 1, 'one'), (2, 'two'), (3, 'three')
INSERT #t2
( subId, id, val, code )
VALUES ( 1,1,'FirstVal', 1), ( 2,1,'SecondVal', 2)
,( 3,2,'YourVal', 1), ( 4,2,'OurVal', 2)
,( 5,3,'NotVal', 1), ( 6,3,'ThisVal', 2)
-- SELECT (using PIVOT)
SELECT id, name, [1] AS 'val-1', [2] AS 'val-2'
FROM
(
SELECT t2.id, t1.name, t2.val, t2.code
FROM #t1 AS t1 JOIN #t2 AS t2 ON t2.id = t1.id
) AS src
PIVOT
(
MIN(val)
FOR code IN ([1], [2])
) AS pvt
results:
id name val-1 val-2
---------------------------------
1 one FirstVal SecondVal
2 two YourVal OurVal
3 three NotVal ThisVal
If there are always only two values, you could join them or even easier, group them:
SELECT tbl01.id as id, Min(tbl01.name) as name, MIN(tbl02.val) as val-1, MAX(tbl02.val) as val-2
FROM tbl01
INNER JOIN tbl02 ON tbl01.id = tbl02.id
GROUP BY tbl02.id
note: this query will always put the lowest value in the first column and highest in the second, if this is not wanted: use the join query:
Join query
If you always want code 1 in the first column and code 2 in the second:
SELECT tbl01.id as id, tbl01.name as name, tbl02.val as val-1, tbl03.val as val-2
FROM tbl01
INNER JOIN tbl02 ON tbl01.id = tbl02.id
ON tbl02.code = 1
INNER JOIN tbl03 ON tbl01.id = tbl03.id
ON tbl03.code = 2
Variable amount of columns
You cannot get an variable amount of columns, only when you do this by building your query in code or t-sql stored procedures.
My advice:
If its always to values: join them in query, if not, let your server-side code transform the data. (or even better, find a way which makes it not nessecery to transform data)
Try this - it uses a pivot function but it also creates creates the dynamic columns dependent on code
DECLARE #ColumnString varchar(200)
DECLARE #sql varchar(1000)
CREATE TABLE #ColumnValue
(
Value varchar(500)
)
INSERT INTO #ColumnValue (Value)
SELECT DISTINCT '[' + 'value' + Convert(Varchar(20),ROW_NUMBER() Over(Partition by id Order by id )) + ']'
FROM Test
SELECT #ColumnString = COALESCE(#ColumnString + ',', '') + Value
FROM #ColumnValue
Drop table #ColumnValue
SET #sql =
'
SELECT *
FROM
(
SELECT
id,name,val,''value'' + Convert(Varchar(20),ROW_NUMBER() Over(Partition by id Order by id ))as [values]
FROM Test
) AS P
PIVOT
(
MAX(val) FOR [values] IN ('+#ColumnString+')
) AS pv
'
--print #sql
EXEC (#sql)

SQL query to show repeating data from child records in columns

I have the following tables in a SQL Server 2000 database:
Master
MasterID | Details | [other fields]
=====================================
PK (int) | Free text | ...
LogTable
LogID | MasterID | UserID | LogDate | LogText
==========================================================
PK (int) | FK (int) | VarChar(2)| Date stamp | Free text
There may be many Log entries for each master record.
I have a query which extracts the most recent three associated Log entries for each Master row as shown below. Note that appropriate conversion and formatting is performed to achieve the LogData concatenation (omitted for clarity):
SELECT
M.MasterID, M.Details, L.LogDate + L.UserID + L.LogText AS LogData
FROM
MasterTable M
INNER JOIN
LogTable L ON M.MasterID = L.MasterID
AND L.LogID IN (SELECT TOP 3 LogID FROM LogTable
WHERE MasterID = M. MasterID ORDER BY LogDate DESC)
This produces output like this:
MasterID | Details | LogData
========================================================
1 | First | 05/11/2012 AB Called Client
2 | Second | 08/11/2012 CD Client Visit
2 | Second | 07/11/2012 CD Called Client
2 | Second | 05/11/2012 AB Called Client
What I need to achieve is showing the data from the second table as columns in the output, all reported against each single master record, thus avoiding repeated data. Like so:
MasterID | Details | LogData1 | LogData2 | LogData3
===========================================================================================================
1 | First | 05/11/2012 AB Called Client | (null) | (null)
2 | Second | 08/11/2012 CD Client Visit | 07/11/2012 CD Called Client | 05/11/2012 AB Called Client
Note that in the real world requirement, this solution will be part of flattening 5 tables with the output consisting of approx 20,000 rows and 90 columns of data.
Thanks in advance.
I'm going to post this, just to show it can be done, but HIGHLY SUGGEST, not do it through SQL. Should be done through the UI that's displaying to be more dynamic on your columns. Even then, I would design this differently.
-- create master table
DECLARE #MasterTable TABLE (
[MasterID] [int] IDENTITY (1, 1) NOT NULL ,
[Details] [varchar] (50) ,
[AdditionalField_1] [varchar] (50) ,
[AdditionalField_n] [varchar] (50)
)
-- create log table
DECLARE #LogTable TABLE (
[LogID] [int] IDENTITY (1, 1) NOT NULL ,
[MasterID] [int] NULL ,
[UserID] [varchar] (2) ,
[LogDate] [datetime] NULL ,
[LogText] [varchar] (50)
)
-- insert into master table
INSERT INTO #MasterTable (Details)
VALUES ('First')
INSERT INTO #MasterTable (Details)
VALUES ('Second')
-- insert into log table
INSERT INTO #LogTable (MasterID, UserID, LogDate, LogText)
VALUES (1, 'AB', '05/11/2012', 'Called Client')
INSERT INTO #LogTable (MasterID, UserID, LogDate, LogText)
VALUES (2, 'AB', '05/11/2012', 'Called Client')
INSERT INTO #LogTable (MasterID, UserID, LogDate, LogText)
VALUES (2, 'CD', '07/11/2012', 'Called Client')
INSERT INTO #LogTable (MasterID, UserID, LogDate, LogText)
VALUES (2, 'CD', '08/11/2012', 'Client Visit')
-- create table to display data
DECLARE #MyTemp TABLE (MasterID INT, Details VARCHAR(50), LogData1 VARCHAR(50), LogData2 VARCHAR(50), LogData3 VARCHAR(50))
INSERT INTO #MyTemp SELECT MasterID, Details, NULL, NULL, NULL FROM #MasterTable
-- create vars
DECLARE #ID INT, #NewID INT, #MasterID INT, #NewValue VARCHAR(100)
SET #ID = 0
-- loop through data
WHILE #ID >-1
BEGIN
-- clear vars
SELECT #NewID = NULL, #MasterID = NULL, #NewValue = NULL
-- get first record
SELECT TOP 1
#NewValue = CONVERT(VARCHAR(10), LogDate, 103)+ ' ' + UserID + ': ' + LogText
, #MasterID=MasterID
, #NewID=LogID
FROM #LogTable WHERE LogID>#ID
-- if no data, exit loop
IF #NewID IS NULL
BREAK
-- update record based on valuds in fields
UPDATE m
SET #ID = #NewID
, LogData1 = (CASE WHEN m.LogData1 IS NULL THEN #NewValue ELSE m.LogData1 END)
, LogData2 = (CASE WHEN m.LogData1 IS NOT NULL THEN
(CASE WHEN m.LogData2 IS NULL THEN #NewValue ELSE m.LogData2 END)
ELSE m.LogData2 END)
, LogData3 = (CASE WHEN m.LogData1 IS NOT NULL THEN
(CASE WHEN m.LogData2 IS NOT NULL THEN
(CASE WHEN m.LogData3 IS NULL THEN #NewValue ELSE m.LogData3 END)
ELSE m.LogData3 END)
ELSE m.LogData3 END)
FROM #MyTemp m
WHERE m.MasterID=#MasterID
END
--display all data
SELECT * FROM #MyTemp