Migrate a Column into Multiple Columns - sql

I have a table with the following columns 'ID, LIST_OF_VALUES'.
Example data is:
ID| LIST_OF_VALUES
--+----------------------------
1 | firstval-secondval-thirdval
2 | val1-val2
3 | val10-val20-val30
4 | singleval
I would like to select the data like this:
ID| VAL1 | VAL2 | VAL3
--+----------+-----------+-------
1 | firstval | secondval | thirdval
2 | val1 | val2 | NULL
3 | val10 | val20 | val30
4 | singlval | NULL | NULL
I am aware of the STRING_SPLIT function. I have tried using it in various ways with Cross Apply, but I can't seem to get the result I want.
I know I can do this using a mess of SUBSTR/INDEX, but I am just curious if STRING_SPLIT offers a more elegant solution.

Just another option
Example of XML Option
Select A.ID
,Val1 = tmpXML.value('/x[1]','varchar(100)')
,Val2 = tmpXML.value('/x[2]','varchar(100)')
,Val3 = tmpXML.value('/x[3]','varchar(100)')
from YourTable A
Cross Apply ( values ( Cast('<x>' + replace([LIST_OF_VALUES],'-','</x><x>')+'</x>' as xml) ) ) B(tmpXML)
Returns
ID Val1 Val2 Val3
1 firstval secondval thirdval
2 val1 val2 NULL
3 val10 val20 val30
4 singleval NULL NULL
Example of JSON Option - as suggested by #PanagiotisKanavos if 2016+
Select A.ID
,Val1 = JSON_VALUE(S,'$[0]')
,Val2 = JSON_VALUE(S,'$[1]')
,Val3 = JSON_VALUE(S,'$[2]')
from #YourTable A
Cross Apply ( values ( '["'+replace(replace([LIST_OF_VALUES],'"','\"'),'-','","')+'"]' ) ) B(S)

Assuming you don't have duplicates, you can use it . . . but it is not trivial:
select t.*, s.*
from t cross apply
(select max(case when seqnum = 1 then value end) as val1,
max(case when seqnum = 2 then value end) as val2,
max(case when seqnum = 3 then value end) as val3
from (select s.value,
row_number() over (order by charindex('-' + value + '-', '-' + t.list_of_values + '-') as seqnum
from string_split(t.list_of_values, '-') s
) s
) s;
Unfortunately, string_split() doesn't provide the ordering. This recreates it using charindex().

One simple and very efficient and scalable way would be to use an ordinal splitter such as dbo.DelimitedSplit8K (or dbo.DelimitedSplitN4K (for nchar/nvarchar). Then the query would be something like this
dbo.DelimitedSplit8K tvf
CREATE FUNCTION dbo.DelimitedSplit8K
--===== Define I/O parameters
(#pString VARCHAR(8000), #pDelimiter CHAR(1))
--WARNING!!! DO NOT USE MAX DATA-TYPES HERE! IT WILL KILL PERFORMANCE!
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
--===== "Inline" CTE Driven "Tally Table" produces values from 1 up to 10,000...
-- enough to cover VARCHAR(8000)
WITH E1(N) AS (
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
), --10E+1 or 10 rows
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS (--==== This provides the "base" CTE and limits the number of rows right up front
-- for both a performance gain and prevention of accidental "overruns"
SELECT TOP (ISNULL(DATALENGTH(#pString),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
),
cteStart(N1) AS (--==== This returns N+1 (starting position of each "element" just once for each delimiter)
SELECT 1 UNION ALL
SELECT t.N+1 FROM cteTally t WHERE SUBSTRING(#pString,t.N,1) = #pDelimiter
),
cteLen(N1,L1) AS(--==== Return start and length (for use in substring)
SELECT s.N1,
ISNULL(NULLIF(CHARINDEX(#pDelimiter,#pString,s.N1),0)-s.N1,8000)
FROM cteStart s
)
--===== Do the actual split. The ISNULL/NULLIF combo handles the length for the final element when no delimiter is found.
SELECT ItemNumber = ROW_NUMBER() OVER(ORDER BY l.N1),
Item = SUBSTRING(#pString, l.N1, l.L1)
FROM cteLen l;
Query
select t.id,
max(case when ds.ItemNumber=1 then ds.Item end) as val1,
max(case when ds.ItemNumber=2 then ds.Item end) as val2,
max(case when ds.ItemNumber=3 then ds.Item end) as val3
from tTable t
cross apply
dbo.DelimitedSplit8K(t.LIST_OF_VALUES, '-') ds
group by t.id
order by t.id;
[EDIT] Here is an alternate method which produces the same output but does not use the DelimitedSplit8K function. This is the same approach as Gordon's but with an outer GROUP BY clause.
;with charindex_split_cte(id, Item, ItemNumber) as (
select t.id, sp.value,
row_number() over (order by charindex('-' + sp.value + '-', '-' + t.list_of_values + '-'))
from tTable t
cross apply string_split(t.list_of_values, '-') sp)
select id,
max(case when ItemNumber=1 then Item end) as val1,
max(case when ItemNumber=2 then Item end) as val2,
max(case when ItemNumber=3 then Item end) as val3
from charindex_split_cte
group by id
order by id;

Related

SQL Subquery with delimiter

I need to be able to split one string by the delimiter * into separate columns without including *
The column y from table x looks like this:
column y
*1HS*AB*GXX*123*02*PA45*2013-08-10*
*1R1*B*GX*123*02*PA45*2013-08-10*
*1HS*B*GX*13*01*PA45*2013-08-01*
*1P*C*GXX*123*02*PA45*2013-08-10*
STRING_SPLIT is not avalible
The outcome should be this:
Column1 Column2 Column3 Column4 Column5 Column6 Column7
1HS AB GXX 123 2 PA45 10-08-2013
1R1 B GX 123 2 PA45 10-08-2013
1HS B GX 13 1 PA45 01-08-2013
1P C GXX 123 2 PA45 10-08-2013
will you use the below query..
select RTRIM (REGEXP_SUBSTR (column y, '[^,]*,', 1, 1), ',') AS column 1
, RTRIM (REGEXP_SUBSTR (column y, '[^,]*,', 1, 2), ',') AS column 2
, RTRIM (REGEXP_SUBSTR (column y, '[^,]*,', 1, 3), ',') AS column 3
, LTRIM (REGEXP_SUBSTR (column y, ',[^,]*', 1, 3), ',') AS column 4
from YOUR_TABLE
Unfortunately, string_split() does not guarantee that it preserves the ordering of the values. And, SQL Server does not offer other useful string functions.
So, I recommend using recursive CTEs for this purpose:
with t as (
select *
from (values ('*1HS*AB*GXX*123*02*PA45*2013-08-10*'), ('1HSB*GX*13*01*PA45*2013-08-01*')) v(str)
),
cte as (
select convert(varchar(max), null) as val, 0 as lev, convert(varchar(max), str) as rest,
row_number() over (order by (select null)) as id
from t
union all
select left(rest, charindex('*', rest) - 1), lev + 1, stuff(rest, 1, charindex('*', rest) + 1, ''), id
from cte
where rest <> '' and lev < 10
)
select max(case when lev = 1 then val end) as col1,
max(case when lev = 2 then val end) as col2,
max(case when lev = 3 then val end) as col3,
max(case when lev = 4 then val end) as col4,
max(case when lev = 5 then val end) as col5,
max(case when lev = 6 then val end) as col6,
max(case when lev = 7 then val end) as col7
from cte
where lev > 0
group by cte.id;
Here is a db<>fiddle.
Assuming you can add a table valued function to your database then Jeff Moden's string split function is the best approach I've encountered. It will allow you to maintain order as well.
Find details here

SQL extract string where it starts with specific character

I have a column that includes strings that are separated by space and commas. It looks like:
Column1
----------------------------
T1234, C1234, D1234, C1234
E1234, C1234
I need a SQL query to extract anything that starts with C. So the result would look like:
Column1
--------------
C1234, C1234
C1234
This is also an opportunity to use a recursive CTE:
with t as (
select 'T1234, C1234, D1234, C1234' as col1 union all
select 'E1234, C1234'
),
cte as (
select col1,
convert(varchar(max), (case when col1 like 'C%' then ', ' + left(col1, charindex(',', col1 + ',') - 1 ) else '' end)) as c_list,
convert(varchar(max), stuff(col1, 1, charindex(',', col1 + ',') + 1, '')) as rest,
1 as lev
from t
union all
select col1,
c_list + (case when rest like 'C%' then ', ' + left(rest, charindex(',', rest + ',') - 1 ) else '' end) ,
convert(varchar(max), stuff(rest, 1, charindex(',', rest + ',') + 1, '')) as rest,
lev + 1
from cte
where rest > '' and lev < 10
)
select stuff(c_list, 1, 2, '') as c_list
from (select cte.*, row_number() over (partition by col1 order by lev desc) as seqnum
from cte
) cte
where seqnum = 1;
This approach does not require extracting strings and then reaggregating. It also guarantees that the values remain in the original order as in the original data.
Here is a db<>fiddle.
declare #t table (c varchar(200) PRIMARY KEY)
insert into #t values ('T1234, C1234, D1234, C1234'), ('E1234, C1234')
;with cte1 as ( -- cte1 - number the rows. Order by PK
select row_number() over(order by c) rn, *
from #t
), cte2 as ( -- cte2 - turn comma delimited lists into rows of trimmed values
select rn, replace(ca.value, ' ', '') val
from cte1
cross apply (
select value from string_split((select c from cte1 cte1_inner where cte1_inner.rn = cte1.rn), N',')
)ca
), cte3 as ( -- cte3 - get distinct row numbers and re-concat vals in a subquery
select distinct rn, (
SELECT STUFF(
(
select ', ' + val
from cte2 cte2_inner
where val like 'c%' and cte2_inner.rn = cte2.rn
for xml path('')
), 1, 2, '')
)concatenated
from cte2
)
select concatenated
from cte3
Returns:
concatenated
C1234
C1234, C1234
Here's what each CTE returns:
cte1:
rn c
1 E1234, C1234
2 T1234, C1234, D1234, C1234
cte2:
rn val
1 E1234
1 C1234
2 T1234
2 C1234
2 D1234
2 C1234
cte3:
rn concatenated
1 C1234
2 C1234, C1234

SQL - Two Columns into One Distinct Ordered Column

If I have a table like this:
Col 1 | Col 2
-------------
A | 1
A | 2
B | 1
C | 1
C | 2
C | 3
How can I write a query to pull one column that looks like this --
Col 1
------
A
1
2
B
1
C
1
2
3
SELECT col1
FROM Some_Table_You_Did_Not_Name
UNION ALL
SELECT col2
FROM Some_Table_You_Did_Not_Name
If the order matters in your example then you want this:
WITH data AS
(
SELECT col1, col2, ROW_NUMBER() OVER (ORDER BY col1, col2) as RN
FROM Some_Table_You_Did_Not_Name
)
SELECT col
FROM (
SELECT DISTINCT col1 as col, RN, 1 as O
FROM data
UNION ALL
SELECT DISTINCT col2 as col, RN, 2 as O
FROM data
) JC_IS_THAT_GUY
ORDER BY RN ASC, O ASC, col ASC
You can use a query like the following:
SELECT Col1
FROM (
SELECT DISTINCT Col1, Col1 AS Col2, 0 AS grp
FROM mytable
UNION ALL
SELECT Col2 AS Col1, Col1 AS Col2, 1 AS grp
FROM mytable) AS t
ORDER BY Col2, grp, Col1
Demo here
There is absolutely no need to do a UNION, UNION ALL or reference the table more than once to unpivot data...
-- if Col2 is always a well ordered sequense like the test data...
SELECT
Col1 = x.Value
FROM
#TestData td
CROSS APPLY ( VALUES (IIF(td.Col2 = 1, td.Col1, NULL)), (CAST(td.Col2 AS CHAR(1))) ) x (Value)
WHERE
x.Value IS NOT NULL;
-- if it isn't...
WITH
cre_Add_RN AS (
SELECT
td.Col1,
td.Col2,
RN = ROW_NUMBER() OVER (PARTITION BY td.Col1 ORDER BY td.Col2)
FROM
#TestData td
)
SELECT
x.Value
FROM
cre_Add_RN arn
CROSS APPLY ( VALUES (IIF(arn.RN = 1, arn.Col1, NULL)), (CAST(arn.Col2 AS CHAR(1))) ) x (Value)
WHERE
x.Value IS NOT NULL;
HTH,
Jason

Count pair-wise occurrences in a T-SQL table

How can I count pair-wise occurrences in a SQL Server table? Please note that the order of the given sequence has to be accounted for and shouldn't be changed.
Original table:
1 2 3 4
--------
1 | A A A B
2 | A # don't count
3 | B A A
4 | B # don't count
Result:
1 | AA = 3
2 | AB = 1
3 | BB = 0
4 | BA = 1
In addition, the code has to work for large datasets.
Edit:
A pair in this context is a set of two values {x[ij], x[(i+1)j]}, where i=1,...,4 and j=1,...,4. Further, pairs that have the form A null or B null shouldn't be counted. Moreover, null A or null B can't happen, therefore they don't have to be accounted for.
I just want to point out a pretty easy way to express this logic:
with vals as (
select 'A' as val union all select 'B'
)
pairs as (
select t1.val as val1, t2.val as val2
from vals t1 cross join vals t2
)
select p.*,
(select count(*)
from original
where [1] = val1 and [2] = val2 or
[2] = val1 and [3] = val2 or
[3] = val1 and [4] = val2
) as cnt
from pairs p
order by cnt desc;
This doesn't have great performance characteristics, that is actually easily fixed by using three subqueries and indexes on the data columns.
LiveDemo
CREATE TABLE #tab([1] NVARCHAR(100), [2] NVARCHAR(100),
[3] NVARCHAR(100), [4] NVARCHAR(100));
INSERT INTO #tab
VALUES ('A', 'A', 'A', 'B') ,('A' , NULL ,NULL ,NULL )
,('B' ,'A' ,'A', NULL),('B', NULL, NULL, NULL);
WITH cte AS
(
SELECT pair = [1] + [2] FROM #tab
UNION ALL
SELECT pair = [2] + [3] FROM #tab
UNION ALL
SELECT pair = [3] + [4] FROM #tab
), cte2 AS
(
SELECT [1] AS val FROM #tab
UNION ALL SELECT [2] FROM #tab
UNION ALL SELECT [3] FROM #tab
UNION ALL SELECT [4] FROM #tab
), all_pairs AS
(
SELECT DISTINCT a.val + b.val AS pair
FROM cte2 a
CROSS JOIN cte2 b
WHERE a.val IS NOT NULL and b.val IS NOT NULL
)
SELECT a.pair, result = COUNT(c.pair)
FROM all_pairs a
LEFT JOIN cte c
ON a.pair = c.pair
GROUP BY a.pair;
How it works:
cte create all pairs (1,2), (2,3), (3,4)
cte2 get all values from column
all_pairs create all possible pairs of values AA, AB, BA, BB
Final use grouping and COUNT to get number of occurences.
EDIT:
You can concatenate result as below:
LiveDemo2
...
, final AS
(
SELECT a.pair, result = COUNT(c.pair), rn = ROW_NUMBER() OVER(ORDER BY a.pair)
FROM all_pairs a
LEFT JOIN cte c
ON a.pair = c.pair
GROUP BY a.pair
)
SELECT rn, [result] = pair + ' = ' + CAST(result AS NVARCHAR(100))
FROM final
with cte as (
select 1 as id, 'A' as [1], 'A' as [2], 'A' as [3], 'B' as [4]
union all select 2 , 'A', NULL,NULL,NULL
union all select 3 , 'B', 'A','A',NULL
union all select 4 , 'B',NULL,NULL,NULL
)
, Vals as (
select 'AA' as Val
union all select 'AB'
union all select 'BB'
union all select 'BA'
)
, UNPVT as (
/*UNPIVOT to convert the columns to be rows*/
SELECT id , VAL + LEAD(VAL) OVER (PARTITION BY ID ORDER BY SEQ) as Code
FROM (
select ID,[1],[2],[3],[4] from cte
) P
UNPIVOT (Val FOR Seq IN ([1],[2],[3],[4])
) AS UNPVT
)
select Vals.Val, count(UNPVT.Code) from UNPVT right join Vals on UNPVT.Code = Vals.Val
group by Vals.Val
CTE: contains your data.
Vals: contains the returned code.
UnPVT: to convert the columns to be rows.

Split string and select into new table

I have a table with a structure like this
ID pointCount pointSeries
1 282 35.1079,-111.0151,35.1088,-111.0196...
Obviously the point series is string has pair of lat lon as points. I want to select the pointSeries into a new table
ID Lat Lon
1 35.1079 -111.0151
1 35.1088 -111.0196
What's the best way I can do a split and select into query?
You need to have a function for splitting comma-delimited strings into separate rows. Here is the DelimitedSplit8K function by Jeff Moden.
CREATE FUNCTION [dbo].[DelimitedSplit8K](
#pString NVARCHAR(4000), #pDelimiter NCHAR(1)
)
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
WITH E1(N) AS (
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
)
,E2(N) AS (SELECT 1 FROM E1 a, E1 b)
,E4(N) AS (SELECT 1 FROM E2 a, E2 b)
,cteTally(N) AS(
SELECT TOP (ISNULL(DATALENGTH(#pString),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
)
,cteStart(N1) AS(
SELECT 1 UNION ALL
SELECT t.N+1 FROM cteTally t WHERE SUBSTRING(#pString,t.N,1) = #pDelimiter
),
cteLen(N1,L1) AS(
SELECT
s.N1,
ISNULL(NULLIF(CHARINDEX(#pDelimiter,#pString,s.N1),0)-s.N1,8000)
FROM cteStart s
)
SELECT
ItemNumber = ROW_NUMBER() OVER(ORDER BY l.N1),
Item = SUBSTRING(#pString, l.N1, l.L1)
FROM cteLen l
Then you need to pivot the result of the split to achieve the desired result:
;WITH CteSplitted AS(
SELECT
t.ID,
x.ItemNumber,
Item = CAST(x.Item AS NUMERIC(16,4)),
RN = (ROW_NUMBER() OVER(PARTITION BY ID ORDER BY ItemNumber) + 1) / 2
FROM Test t
CROSS APPLY dbo.DelimitedSplit8K(t.PointSeries, ',') x
)
SELECT
ID,
Lat = MAX(CASE WHEN ItemNumber % 2 = 1 THEN Item END),
Lon = MAX(CASE WHEN ItemNumber % 2 = 0 THEN Item END)
FROM CteSplitted
GROUP BY ID, RN