I have a table with several rows of data like this :
16 W:\2-Work\ALBO\00_Proposal\ALxO_Amendement #1_20091022_signed.pdf
17 W:\2-Work\ALBO\00_Proposal\Level1\ALBO_Amendment #1_20110418.docx
18 W:\2-Work\ALBO\00_Proposal\A\BR\T\X_#1_20110418_final.docx
19 W:\2-Work\ALBO\MyOptionl\AO_Amendment_2 August 2013.docx
I have created columns from Col1 to Col10
I would like to separate each value with the delimiter '\'
The idea is to have on each column :
Col1 | Col2 | Col3 | Col4 | Col5 |etc...
W: 2-Work ALBO 00_Proposal ALxO_Amendement #1_20091022_signed.pdf
I know how to use charindex and substring but the number of '\' are different on each line (8500 rows).
Could you help me?
I'm using Microsoft SQL Server 2012.
Thank you very much
Edit 2014/06/24
My goal is to generate an XML of the full path and split path.
Actually, here is my idea :
1 - Identify all the ID in a temporary table to do loop
--On déclare une table tempo
declare #IdTable Table (
id int,
src nvarchar(max))
--On injecte tous les id existant de la table
insert into #IdTable (id, src)
select id, src from albo
--on déclare l'id de début en commencant par le plus petit
declare #id int = (select min(id) from ALBO)
--Tnat qu'il reste des ID on continue la boucle
while #id is not null
begin
print #id
select #id = min(id) from #IdTable where ID > #id
end
--Fin de la boucle des ID
2 - Split each row and update column (Colx => The Clolumns have been created before)
This code should be placed into my previous loop.
Declare #products varchar(max) = 'W:\2-Work\ALBO\13_WP Reporting\13_07_Monthly reports\13_07_01 Archives\2012\201211\Draft\ALBO-MR-201211\gp_scripts\v1\Top10_duree_final.txt'
Declare #individual varchar(max) = null
WHILE LEN(#products) > 0
BEGIN
IF PATINDEX('%\%',#products) > 0
BEGIN
SET #individual = SUBSTRING(#products, 0, PATINDEX('%\%',#products))
select #individual --i have to make and update with the ID
SET #products = SUBSTRING(#products, LEN(#individual + '\') + 1,
LEN(#products))
END
ELSE
BEGIN
SET #individual = #products
SET #products = NULL
print #individual
END
END
As others have said, this probably isn't the best way to do things, if you explain what you'll be doing with the results it might help us provide a better option
[Also, for some reason the colours of the code below are showing up odd, so copy and paste it into your Sql server to see it better]
drop table #Path
create table #Path (item bigint,location varchar(1000))
insert into #Path
select 16 ,'W:\2-Work\ALBO\00_Proposal\ALxO_Amendement #1_20091022_signed.pdf' union
select 17 ,'W:\2-Work\ALBO\00_Proposal\Level1\ALBO_Amendment #1_20110418.docx' union
select 18 ,'W:\2-Work\ALBO\00_Proposal\A\BR\T\X_#1_20110418_final.docx' union
select 19 ,'W:\2-Work\ALBO\MyOptionl\AO_Amendment_2 August 2013.docx'
select * from #Path;
with Path_Expanded(item,subitem,location, start, ending, split)
as(
select item
, 1 --subitem begins at 1
, location -- full location path
, 0 --start searching the file from the 0 position
, charindex('\',location) -- find the 1st '\' charactor
, substring(location,0,charindex('\',location)) --return the string from the start position, 0, to the 1st '\' charactor
from #Path
union all
select item
, subitem+1 --add 1 to subitem
, location -- full location path
, ending+1 -- start searching the file from the position after the last '\' charactor
, charindex('\',location,ending+1)-- find the 1st '\' charactor that occurs after the last '\' charactor found
, case when charindex('\',location,ending+1) = 0 then substring(location,ending+1,1000) --if you cant find anymore '\', return everything else after the last '\'
else substring(location,ending+1, case when charindex('\',location,ending+1)-(ending+1) <= 0 then 0
else charindex('\',location,ending+1)-(ending+1) end )--returns the string between the last '\' charactor and the next '\' charactor
end
from Path_Expanded
where ending > 0 --stop once you can't find anymore '\' charactors
)
--pivots the results
select item
, max(case when subitem = 1 then split else '' end) as col1
, max(case when subitem = 2 then split else '' end) as col2
, max(case when subitem = 3 then split else '' end) as col3
, max(case when subitem = 4 then split else '' end) as col4
, max(case when subitem = 5 then split else '' end) as col5
, max(case when subitem = 6 then split else '' end) as col6
, max(case when subitem = 7 then split else '' end) as col7
, max(case when subitem = 8 then split else '' end) as col8
, max(case when subitem = 9 then split else '' end) as col9
, max(case when subitem = 10 then split else '' end) as col10
from Path_Expanded
group by item
you might prefer to have each folder on its own row, if so replace the pivot part above with the below query instead
select item
, subitem
, location
, split from Path_Expanded where item = 16
The following query will get what you are looking for; as others have noted, it's not a particularly good design. For example, what happens when you're looking for the file name and it's in a different column each time?
Regardless, this will do what you asked for (and maybe even what you want):
-- Test Data
CREATE TABLE #FilePath (FileNumber INT IDENTITY(1,1), FilePath VARCHAR(1000))
INSERT INTO #FilePath (FilePath)
SELECT 'W:\2-Work\ALBO\00_Proposal\ALxO_Amendement #1_20091022_signed.pdf' UNION
SELECT 'W:\2-Work\ALBO\00_Proposal\Level1\ALBO_Amendment #1_20110418.docx' UNION
SELECT 'W:\2-Work\ALBO\00_Proposal\A\BR\T\X_#1_20110418_final.docx' UNION
SELECT 'W:\2-Work\ALBO\MyOptionl\AO_Amendment_2 August 2013.docx'
GO
-- Numbers CTE
WITH Numbers AS
(
SELECT n = 1
UNION ALL
SELECT n + 1
FROM Numbers
WHERE n+1 <= 1000 -- set this to the maximum length of your file path
)
SELECT
FilePath,
[1] AS Col1,
[2] AS Col2,
[3] AS Col3,
[4] AS Col4,
[5] AS Col5,
[6] AS Col6,
[7] AS Col7,
[8] AS Col8,
[9] AS Col9,
[10] AS Col10
FROM
(
SELECT
FilePath,
ROW_NUMBER() OVER (PARTITION BY FilePath ORDER BY n) RowNum,
CAST(LTRIM(RTRIM(NULLIF(SUBSTRING('\' + FilePath + '\' , n , CHARINDEX('\' , '\' + FilePath + '\' , n) - n) , ''))) AS VARCHAR(1000)) FolderName
FROM Numbers, #FilePath
WHERE
n <= Len('\' + FilePath + '\') AND SubString('\' + FilePath + '\' , n - 1, 1) = '\' AND
CharIndex('\' , '\' + FilePath+ '\' , n) - n > 0
)P
PIVOT
(MAX(FolderName) FOR RowNum IN
([1],[2],[3],[4],[5],[6],[7],[8],[9],[10])
) UP
OPTION (MAXRECURSION 1000)-- set this to the maximum length of your file path
-- Clean up
DROP TABLE #FilePath
One way (de-dupes):
;with T(ordinal, path, starts, pos) as (
select 1, path, 1, charindex('\', path) from #tbl
union all
select ordinal + 1, path, pos + 1, charindex('\', path, pos + 1)
from t where pos > 0
)
select [1],[2],[3],[4],[5],[6],[7],[8],[9],[10] from (
select
ordinal, path, substring(path, starts, case when pos > 0 then pos - starts else len(path) end) token
from T
) T2
pivot (max(token) for ordinal in ([1],[2],[3],[4],[5],[6],[7],[8],[9],[10])) T3
Related
I need to delimit #uid by "-". The issue is my data set has "--1" and I need it be treated as "-1"
I need #uid = '1585-1586--1-5417-2347-8865' to output this:
Instead of:
How can I achieve this in SQL?
The answer you have helps you a little here, however, with no definition of [dbo].[fnSplit] doesn't help any one else.
If we can assume that the data is well defined (has 6 columns), then we could "spam" some CHARINDEX functions to do this. You will, as shown in the answer, need to replace all the delimiters and then reinsert the value of - for the double delimiter:
DECLARE #UID varchar(30) = '1585-1586--1-5417-2347-8865';
DECLARE #Delimiter char(1) = '-';
SELECT SUBSTRING(ca.FixedUID,1,C1.I-1) AS Col1,
SUBSTRING(ca.FixedUID,C1.I+1, C2.I-C1.I-1) AS Col2,
SUBSTRING(ca.FixedUID,C2.I+1, C3.I-C2.I-1) AS Col3,
SUBSTRING(ca.FixedUID,C3.I+1, C4.I-C3.I-1) AS Col4,
SUBSTRING(ca.FixedUID,C4.I+1, C5.I-C4.I-1) AS Col5,
SUBSTRING(ca.FixedUID,C5.I+1, LEN(ca.FixedUID)-C5.I) AS Col6
FROM (VALUES(#UID))V([UID])
CROSS APPLY (VALUES(REPLACE(REPLACE(V.UID,#Delimiter,'|'),'||','|' + #Delimiter)))ca(FixedUID)
CROSS APPLY (VALUES(CHARINDEX('|',ca.FixedUID)))C1(I)
CROSS APPLY (VALUES(CHARINDEX('|',ca.FixedUID,C1.I+1)))C2(I)
CROSS APPLY (VALUES(CHARINDEX('|',ca.FixedUID,C2.I+1)))C3(I)
CROSS APPLY (VALUES(CHARINDEX('|',ca.FixedUID,C3.I+1)))C4(I)
CROSS APPLY (VALUES(CHARINDEX('|',ca.FixedUID,C4.I+1)))C5(I);
Of course, if you had a "empty" value, then this will fail:
DECLARE #UID varchar(30) = '1585--71-5417-2347-8865';
DECLARE #Delimiter char(1) = '-';
SELECT SUBSTRING(ca.FixedUID,1,C1.I-1) AS Col1,
SUBSTRING(ca.FixedUID,C1.I+1, C2.I-C1.I-1) AS Col2,
SUBSTRING(ca.FixedUID,C2.I+1, C3.I-C2.I-1) AS Col3,
SUBSTRING(ca.FixedUID,C3.I+1, C4.I-C3.I-1) AS Col4,
SUBSTRING(ca.FixedUID,C4.I+1, C5.I-C4.I-1) AS Col5,
SUBSTRING(ca.FixedUID,C5.I+1, LEN(ca.FixedUID)-C5.I) AS Col6
FROM (VALUES(#UID))V([UID])
CROSS APPLY (VALUES(REPLACE(REPLACE(V.UID,#Delimiter,'|'),'||','|' + #Delimiter)))ca(FixedUID)
CROSS APPLY (VALUES(CHARINDEX('|',ca.FixedUID)))C1(I)
CROSS APPLY (VALUES(CHARINDEX('|',ca.FixedUID,C1.I+1)))C2(I)
CROSS APPLY (VALUES(CHARINDEX('|',ca.FixedUID,C2.I+1)))C3(I)
CROSS APPLY (VALUES(CHARINDEX('|',ca.FixedUID,C3.I+1)))C4(I)
CROSS APPLY (VALUES(CHARINDEX('|',ca.FixedUID,C4.I+1)))C5(I);
Invalid length parameter passed to the LEFT or SUBSTRING function.
And hence why a delimiter than can appear in your data should never be used (however, one can hope that as these all appear to be integer values then a NULL value wouldn't exist and there would be a 0 instead: '1585-0-71-5417-2347-8865').
If you used a string splitter like DelimitedSpluit8K_LEAD then you could Pivot (and unpivot) the data fine, but the values would be in the wrong positions with the above example:
SELECT MAX(CASE DS.ItemNumber WHEN 1 THEN DS.Item END) AS Col1,
MAX(CASE DS.ItemNumber WHEN 2 THEN DS.Item END) AS Col2,
MAX(CASE DS.ItemNumber WHEN 3 THEN DS.Item END) AS Col3,
MAX(CASE DS.ItemNumber WHEN 4 THEN DS.Item END) AS Col4,
MAX(CASE DS.ItemNumber WHEN 5 THEN DS.Item END) AS Col5,
MAX(CASE DS.ItemNumber WHEN 6 THEN DS.Item END) AS Col6
FROM (VALUES(#UID))V([UID])
CROSS APPLY (VALUES(REPLACE(REPLACE(V.UID,#Delimiter,'|'),'||','|' + #Delimiter)))ca(FixedUID)
CROSS APPLY dbo.DelimitedSplit8K_LEAD(ca.FixedUID,'|') DS;
Which will result in the below:
Col1 Col2 Col3 Col4 Col5 Col6
---- ---- ---- ---- ---- ----
1585 -71 5417 2347 8865 NULL
Basically what I'm doing is a recursive cte from 6 to 1. each iteration I am removing the last delimited number and moving it to col_val column.
I decided to use reverse so that I could then use patindex to find the hyphen then the number. Doing that made it possible to get the negative values. In reverse the string looks like 1--6851-5851-0 then patindex('%-[0-9]%', <string>) returns 2 and because I used right function of the string 0-1585-1586--1 it will return -1
I added '0-' to the beginning of the delim_column because I want to use patindex without having to account for the last delimited column.
The column col_val is repeating all the above but instead of using #uid it is using delim_column
Here is what each iteration looks like:
col_num delim_column col_val loc
6 0-1585-1586--1-5417-2347 8865 4
5 0-1585-1586--1-5417 2347 4
4 0-1585-1586--1 5417 4
3 0-1585-1586 -1 2
2 0-1585 1586 4
1 0 1585 4
Then I'm pivoting the columns using a simple choose function. That will make the column names clean.
DECLARE
#uid VARCHAR(MAX) = '1585-1586--156-5417-2347-8865',
#delim_count INT = 0
--First, count the number of delimiters. We do this by temporarily replacing '--' with a single '-'
--and then count the difference in lengths of the two strings (one with '-' and one without)
SELECT #delim_count = LEN(REPLACE(#uid, '--', '-')) - LEN(REPLACE(REPLACE(#uid, '--', '-'), '-','')) - IIF(#uid LIKE '-%', 1, 0)
--next a recursive cte that will lop off the last number each iteration and move the last value to col_val
;WITH fnsplit(col_num, delim_column, col_val, loc)
AS
(
SELECT
#delim_count+1 --start with 6 and then go to 1. remove the +1 and replace col_num > 0 for a zero index
,'0-'+SUBSTRING(#uid,0, LEN(#uid) - LEN(RIGHT(#uid, PATINDEX('%-[0-9]%', reverse(#uid)) - 1)) )
,RIGHT(#uid, PATINDEX('%-[0-9]%', REVERSE(#uid)) - 1)
,PATINDEX('%-[0-9]%', REVERSE(#uid)) - 1
UNION ALL
SELECT
col_num - 1
,SUBSTRING(delim_column,0, LEN(delim_column) - LEN(RIGHT(delim_column, PATINDEX('%-[0-9]%', REVERSE(delim_column)) - 1)) )
,RIGHT(delim_column, PATINDEX('%-[0-9]%', REVERSE(delim_column)) - 1)
,PATINDEX('%-[0-9]%', REVERSE(delim_column)) - 1
FROM
fnsplit
WHERE
col_num > 1
)
--select * from fnsplit -- uncomment here and comment all below to see the recursion
SELECT
*
FROM
(
SELECT
column_name
,col_val
FROM
fnsplit
CROSS APPLY
(SELECT CHOOSE(col_num, 'Col_A','Col_B','Col_C', 'Col_D', 'Col_E', 'Col_F')) tbl(column_name)
)PVT
PIVOT
(
MAX(col_val)
FOR column_name IN ([Col_A], [Col_B], [Col_C], [Col_D], [Col_E], [Col_F])
) PVT1
The desired result can be achieve with the script below. The script relies on a User Defined Function called [fnSplit]. The [fnSplit] UDF is defined later in the post.
declare #uid nvarchar(100)
set #uid = '1585-1586--1-5417-2347-8865'
select
(select data from [dbo].[fnSplit] (REPLACE ((REPLACE (#uid, '-', '|')),'||', '|-'), '|') where id = 1) as [Col_A],
(select data from [dbo].[fnSplit] (REPLACE ((REPLACE (#uid, '-', '|')),'||', '|-'), '|') where id = 2) as [Col_B],
(select data from [dbo].[fnSplit] (REPLACE ((REPLACE (#uid, '-', '|')),'||', '|-'), '|') where id = 3) as [Col_C],
(select data from [dbo].[fnSplit] (REPLACE ((REPLACE (#uid, '-', '|')),'||', '|-'), '|') where id = 4) as [Col_D],
(select data from [dbo].[fnSplit] (REPLACE ((REPLACE (#uid, '-', '|')),'||', '|-'), '|') where id = 5) as [Col_E],
(select data from [dbo].[fnSplit] (REPLACE ((REPLACE (#uid, '-', '|')),'||', '|-'), '|') where id = 6) as [Col_F]
CREATE FUNCTION [dbo].[fnSplit]
(
#Line nvarchar(MAX),
#SplitOn nvarchar(5) = ','
)
RETURNS #RtnValue table
(
Id INT NOT NULL IDENTITY(1,1) PRIMARY KEY CLUSTERED,
Data nvarchar(1000) NOT NULL
)
AS
BEGIN
IF #Line IS NULL RETURN
DECLARE #split_on_len INT = LEN(#SplitOn)
DECLARE #start_at INT = 1
DECLARE #end_at INT
DECLARE #data_len INT
WHILE 1=1
BEGIN
SET #end_at = CHARINDEX(#SplitOn,#Line,#start_at)
SET #data_len = CASE #end_at WHEN 0 THEN LEN(#Line) ELSE #end_at-#start_at END
INSERT INTO #RtnValue (data) VALUES( SUBSTRING(#Line,#start_at,#data_len) );
IF #end_at = 0 BREAK;
SET #start_at = #end_at + #split_on_len
END
RETURN
END
I have a very long text string being imported into a table. I would like to split the string up; I have a routine to pull the data into a table, but it creates all the data in a single field in a table.
Example Text:
05/10/2018 21:14,#FXAAF00123456,,Cup 1 X Plane,0.00000,OK,Cup 1 Y Plane,0.00000,OK,Cup 1 Z Plane,40.64252,OK,Cup 2 X Plane,77.89434,OK,..etc
(The test string is much longer than this, in the region of 1500-1700 characters, but with the same structure in the rest of the string).
This data is a series of test measurements, with the name of the value, the value, and the OK/NOK indicator.
I want the results to be stored in a table (variable) with three fields, so the data above becomes:
Field1|Field2|Field3
05/10/2018 21:14|#FXAAF00123456|null|
Cup 1 X Plane|0.00000|OK|
Cup 1 Y Plane|0.00000|OK|
Cup 1 Z Plane|40.64252|OK|
Cup 2 X Plane|77.89434|OK|
...etc
I am using this function to split the string into a table variable:
CREATE FUNCTION [dbo].[fnSplitString]
(
#InputString NVARCHAR(MAX),
#Delim VARCHAR(255)
)
RETURNS TABLE
AS
RETURN ( SELECT [Value] FROM
(
SELECT
[Value] = LTRIM(RTRIM(SUBSTRING(#InputString, [Number],
CHARINDEX(#Delim, #InputString + #Delim, [Number]) - [Number])))
FROM (SELECT Number = ROW_NUMBER() OVER (ORDER BY name)
FROM sys.all_objects) AS x
WHERE Number <= LEN(#InputString)
AND SUBSTRING(#Delim + #InputString, [Number], LEN(#Delim)) = #Delim
) AS y
);
How can this be modified to give the output required above?
You can try this tiny inline splitting approach.
DECLARE #s VARCHAR(MAX)='05/10/2018 21:14,#FXAAF00123456,,Cup 1 X Plane,0.00000,OK,Cup 1 Y Plane,0.00000,OK,Cup 1 Z Plane,40.64252,OK,Cup 2 X Plane,77.89434,OK';
;WITH
a AS (SELECT n=0, i=-1, j=0 UNION ALL SELECT n+1, j, CAST(CHARINDEX(',', #s, j+1) AS INT) FROM a WHERE j > i)
,b AS (SELECT n, SUBSTRING(#s, i+1, IIF(j>0, j, LEN(#s)+1)-i-1) s FROM a WHERE i >= 0)
,c AS (SELECT n,(n-1) % 3 AS Position,(n-1)/3 AS RowIndex,s FROM b)
SELECT MAX(CASE WHEN Position=0 THEN s END) AS part1
,MAX(CASE WHEN Position=1 THEN s END) AS part2
,MAX(CASE WHEN Position=2 THEN s END) AS part3
FROM c
GROUP BY RowIndex
OPTION (MAXRECURSION 0);
The result
part1 part2 part3
05/10/2018 21:14 #FXAAF00123456
Cup 1 X Plane 0.00000 OK
Cup 1 Y Plane 0.00000 OK
Cup 1 Z Plane 40.64252 OK
Cup 2 X Plane 77.89434 OK
Hint
You might change your splitter function to the recursive approach above. On the one side you are limited to a string-length of the count in sys.all_objects which might be smaller than your input. On the other side your approach has to test each and any position, while the recursive approach hops from spot to spot. Should be faster...
This could easily be opened for a multi-character-delimiter if needed...
UPDATE another approach without recursion
...which makes it clumsy to be used in a splitter function (due to OPTION MAXRECURSION(0), which must be placed at the end of the query and cannot live within the function). Try it out:
;WITH
a(Casted) AS (SELECT CAST('<x>' + REPLACE((SELECT #s AS [*] FOR XML PATH('')),',','</x><x>') + '</x>' AS XML))
,b(s,RowIndex,Position) AS
(
SELECT x.value(N'text()[1]','nvarchar(max)')
,(ROW_NUMBER() OVER(ORDER BY(SELECT NULL)) -1) /3
,(ROW_NUMBER() OVER(ORDER BY(SELECT NULL)) -1) %3
FROM a
CROSS APPLY Casted.nodes(N'/x') X(x)
)
SELECT RowIndex
,MAX(CASE WHEN Position=0 THEN s END) AS part1
,MAX(CASE WHEN Position=1 THEN s END) AS part2
,MAX(CASE WHEN Position=2 THEN s END) AS part3
FROM b
GROUP BY RowIndex;
Hint:
Using (SELECT #s AS [*] FOR XML PATH('')) will make this approach save with forbidden characters...
this required a small modification to your fnSplitString function. Add a RowNo to identify the original sequence of the delimited item
CREATE FUNCTION [dbo].[fnSplitString]
(
#InputString NVARCHAR(MAX),
#Delim VARCHAR(255)
)
RETURNS TABLE
AS
RETURN ( SELECT [Value] FROM
(
SELECT RowNo = ROW_NUMBER() OVER (ORDER BY Number),
[Value] = LTRIM(RTRIM(SUBSTRING(#InputString, [Number],
CHARINDEX(#Delim, #InputString + #Delim, [Number]) - [Number])))
FROM (SELECT Number = ROW_NUMBER() OVER (ORDER BY name)
FROM sys.all_objects) AS x
WHERE Number <= LEN(#InputString)
AND SUBSTRING(#Delim + #InputString, [Number], LEN(#Delim)) = #Delim
) AS y
);
And with that, you can group every 3 rows as one. Also the RowNo can be used to identify the column
The query
; with tbl as
(
select col = '05/10/2018 21:14,#FXAAF00123456,,Cup 1 X Plane,0.00000,OK,Cup 1 Y Plane,0.00000,OK,Cup 1 Z Plane,40.64252,OK,Cup 2 X Plane,77.89434,OK'
)
select Field1 = MAX(CASE WHEN (RowNo - 1) % 3 = 0 THEN Value END),
Field2 = MAX(CASE WHEN (RowNo - 1) % 3 = 1 THEN Value END),
Field3 = MAX(CASE WHEN (RowNo - 1) % 3 = 2 THEN Value END)
from tbl t
cross apply dbo.fnSplitString (t.col, ',')
group by (RowNo - 1) / 3
Can you try following script after you create the SQL split function given in the reference document.
That split function returns the order of splitted string fragments so that information is used for row data
declare #str nvarchar(max) = '05/10/2018 21:14,#FXAAF00123456,,Cup 1 X Plane,0.00000,OK,Cup 1 Y Plane,0.00000,OK,Cup 1 Z Plane,40.64252,OK,Cup 2 X Plane,77.89434,OK'
select
floor(id / 3)+1 rn,
case when id % 3 = 1 then val end Field1,
case when id % 3 = 2 then val end Field2,
case when id % 3 = 0 then val end Field3
from dbo.Split(#str,',')
select
rn,
max(Field1) Field1,
max(Field2) Field2,
max(Field3) Field3
from (
select
floor((id-1) / 3)+1 rn,
case when id % 3 = 1 then val end Field1,
case when id % 3 = 2 then val end Field2,
case when id % 3 = 0 then val end Field3
from dbo.Split(#str,',')
) t
group by rn
I have a large dataset, millions of rows, containing various people and items they're associated with. In many cases, these peoples' names are present in the item name as well. I would like to find the shortest substring of item name in which the owner name, or parts of their name, are no longer present.
A sample of the data is as follows:
CREATE TABLE test ([ID] nvarchar(255), [OWNER] nvarchar(255), [ITEM] nvarchar(255))
INSERT INTO test
SELECT '1','A B C','A B X X X'
UNION ALL
SELECT '2','ABC DEF','XABCD XX X'
UNION ALL
SELECT '2','ABC DEF','YABCD X X'
UNION ALL
SELECT '3','X X X X','YPD X X'
UNION ALL
SELECT '4','XYZ','X X X'
UNION ALL
SELECT '5','A B C','OOO PPP QQQ'
With ideal output being:
ID | OWNER | ITEM | SHORT ITEM
1 | A B C | A B X X X | X X X
2 | ABC DEF | XABCD XX X | XX X
2 | ABC DEF | YABCD X X | X X
3 | X X X X | YPD X X | X X
4 | XYZ | X X X | X X X
5 | A B C | OOO PPP DDD| PPP QQQ
This output includes a couple of cases in which I wanted to remove something from the item name which was not the owner's name, and so I have hardcoded that into the query. I've written the following query:
;WITH p1 as( --Retrieving first word of ITEM and ITEM minus first word
SELECT SUBSTRING([ITEM],1,
case when CHARINDEX(' ',[ITEM])=0 then LEN([ITEM]) --When no space in ITEM, return ITEM
else CHARINDEX(' ', [ITEM]) -1 end) as w1p --Return the first word separated by space
,SUBSTRING([ITEM],CHARINDEX(' ',[ITEM])+1,100) as m1p --Return everything minus the first word
,[ITEM]
,[ID]
,[OWNER]
FROM test
),p2 as( --Retrieving second word of ITEM and ITEM minus second word
SELECT SUBSTRING(m1p,1,
case when CHARINDEX(' ',m1p)=0 then LEN(m1p)
else CHARINDEX(' ',m1p) -1 end) as w2p
,SUBSTRING(m1p,CHARINDEX(' ',m1p)+1,100) as m2p
,[ITEM]
,[ID]
,[w1p]
,[m1p]
FROM p1
),p3 as( --Retrieving third word of ITEM and ITEM minus third word
SELECT SUBSTRING(m2p,1,
case when CHARINDEX(' ',m2p)=0 then LEN(m2p)
else CHARINDEX(' ',m2p) -1 end) as w3p
,SUBSTRING(m2p,CHARINDEX(' ',m2p)+1,100) as m3p
,*
FROM p2
),p4 as( --Retrieving fourth word of ITEM and ITEM minus fourth word
SELECT SUBSTRING(m3p,1,
case when CHARINDEX(' ',m3p)=0 then LEN(m3p)
else CHARINDEX(' ',m3p) -1 end) as w4p
,SUBSTRING(m3p,CHARINDEX(' ',m3p)+1,100) as m4p
,*
FROM p3
),m1 as( --Retrieving first word of OWNER and OWNER minus first word
SELECT SUBSTRING([OWNER],1,
CASE WHEN CHARINDEX(' ',[OWNER])=0 THEN LEN([OWNER])
ELSE CHARINDEX(' ',[OWNER])-1 end) as w1m
,SUBSTRING([OWNER],CHARINDEX(' ',[OWNER])+1,100) as m1m
,[OWNER]
,[ID]
FROM p1
GROUP BY [OWNER], [ID]
),m2 as( --Retrieving second word of OWNER and OWNER minus second word
SELECT SUBSTRING(m1m,1,
case when CHARINDEX(' ', m1m) = 0 then LEN(m1m)
else CHARINDEX(' ', m1m) -1 end) as w2m
,SUBSTRING(m1m,CHARINDEX(' ',m1m)+1,100) as m2m
,*
FROM m1
),m3 as( --Retrieving third word of OWNER and OWNER minus third word
SELECT SUBSTRING(m2m,1,
case when CHARINDEX(' ', m2m) = 0 then LEN(m2m)
else CHARINDEX(' ', m2m) -1 end) as w3m
,SUBSTRING(m2m,CHARINDEX(' ',m2m)+1,100) as m3m
,*
FROM m2
),m4 as( --Retrieving fourth word of OWNER
SELECT SUBSTRING(m3m,1,
case when CHARINDEX(' ', m3m) = 0 then LEN(m3m)
else CHARINDEX(' ', m3m) -1 end) as w4m
,*
FROM m3
),ms as( --Adding special cases not caught by regular query
SELECT CASE WHEN [ID] IN ('3','5') THEN
CASE WHEN [ID] = '3' THEN 'YPD'
WHEN [ID] = '5' THEN 'OOO'
ELSE NULL END
ELSE NULL END as SPEC
,*
FROM m4
)
SELECT m.[ID] --Finding closest shortname
,m.[OWNER]
,p.[ITEM]
,CASE WHEN SUBSTRING(p.[ITEM],1,LEN(m.SPEC)) = SPEC AND SPEC IS NOT NULL THEN LTRIM(SUBSTRING(p.[ITEM],LEN(m.SPEC)+1,100)) --If hardcoded phrase in ITEM, return ITEM minus that phrase
WHEN p.w1p LIKE '%'+m.w1m+'%' AND p.w2p NOT LIKE '%'+m.w2m+'%' AND p.w3p NOT LIKE '%'+m.w3m+'%' AND p.w4p NOT LIKE '%'+m.w4m+'%' THEN p.m1p --If first word of ITEM match first of OWNER, return ITEM minus first
WHEN p.w1p LIKE '%'+m.w1m+'%' AND p.w2p LIKE '%'+m.w2m+'%' AND p.w3p NOT LIKE '%'+m.w3m+'%' AND p.w4p NOT LIKE '%'+m.w4m+'%' THEN p.m2p --If first two words of ITEM match first of OWNER, return ITEM minus two words
WHEN p.w1p LIKE '%'+m.w1m+'%' AND p.w2p LIKE '%'+m.w2m+'%' AND p.w3p LIKE '%'+m.w3m+'%' AND p.w4p NOT LIKE '%'+m.w4m+'%' THEN p.m3p --If first three words of ITEM match first of OWNER, return ITEM minus three words
WHEN p.w1p LIKE '%'+m.w1m+'%' AND p.w2p LIKE '%'+m.w2m+'%' AND p.w3p LIKE '%'+m.w3m+'%' AND p.w4p LIKE '%'+m.w4m+'%' THEN p.m4p --If first four words of ITEM match first of OWNER, return ITEM minus four words
ELSE p.[ITEM]
END AS [SHORT ITEM]
FROM p4 p
LEFT JOIN ms m ON p.[ID] = m.[ID]
While this achieves my goal, it does not look very nice and feels like it could be optimized. It requires a where statement to have any sort of speed in execution. While I would likely not be running this on the full dataset anyway, I am looking for ways to improve. I do not have permission to view execution plans, so I cannot share that.
Thank you for any help or advice you can offer.
OK I developed this using only the first three rows of the table. Replace #test with your table name. Let me know if this works for you.
select *, row_number() over(order by id) rowid
into #a
from #test
select *, item short_item, row_number() over(order by id) rowid
into #b
from #test
declare #iterator int=1
declare #owner varchar(max)
declare #owneroriginal varchar(max)
declare #item varchar(max)
declare #itemoriginal varchar(max)
declare #itemactualoriginal varchar(max)
while #iterator<=(select max(rowid) from #a)
begin
select #owner=[owner] from #a where rowid=#iterator
select #owneroriginal=[owner] from #a where rowid=#iterator
select #item=[item] from #a where rowid=#iterator
select #itemoriginal=[item] from #a where rowid=#iterator
select #itemactualoriginal=[item] from #a where rowid=#iterator
while #owner<>''
begin
select #owner=left(#owneroriginal, charindex(' ',#owneroriginal))
select #owneroriginal= ltrim(replace(#owneroriginal,#owner,''))
select #item=left(#itemoriginal, charindex(' ',#itemoriginal))
select #itemoriginal= ltrim(replace(#itemoriginal,#item,''))
--select #owner, #owneroriginal, #item, #itemoriginal
if #itemactualoriginal
like '%'+rtrim(#owner)+'%' and #owner<>''
begin
--select 1
update #b
set short_item=replace(short_item, rtrim(#item),'')
where rowid=#iterator
end
else if ##rowcount=0
update #b
set short_item=
case
when #owner = ''
then ltrim(replace(short_item, #owneroriginal,''))
else ltrim(replace(short_item, #owner,''))
end
where rowid=#iterator
end
set #iterator=#iterator+1
end
select id, owner, item, short_item from #b
I have a table that looks something like
ID Col1 Col2 Col3 Col4
1 3 5 3 3
What I want to do is COUNT the number of 3s in this particular row.
I have tried the
select COUNT(*)
from INFORMATION_SCHEMA.COLUMNS
where TABLE_NAME = 'TableName' -- but obviously I need WHERE Col1 = 3 OR Col2 = 3...
What would be the best way to achieve this?
Based on what OP asked, this can be done
select
CASE WHEN Col1 = 3 then 1 ELSE 0 END +
CASE WHEN Col2 = 3 then 1 ELSE 0 END +
CASE WHEN Col3 = 3 then 1 ELSE 0 END +
CASE WHEN Col4 = 3 then 1 ELSE 0 END
From TableName
I don't really enjoy working with PIVOT so here a solution using APPLY.
SELECT
T.id
, Val
, COUNT(*)
FROM MyTable AS T
CROSS APPLY (
VALUES
(T.C1)
, (T.C2)
, (T.C3)
, (T.C4)
) AS X(Val)
GROUP BY T.Id, X.Val
ORDER BY T.Id, X.val
Please find the sample code:
DECLARE #Query VARCHAR(MAX) = 'SELECT Count = '
SELECT
#Query += '( CASE WHEN '+ COLUMN_NAME + ' = 3 THEN 1 ELSE 0 END ) + '
FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = 'TEST' AND COLUMN_NAME <> 'ID'
SET #Query = SUBSTRING(#Query, 1, DATALENGTH(#Query) - 2) + ' FROM TEST WHERE ID = 1'
EXEC(#Query)
I'm sure I'm missing something here.
I have a dataset like this:
FK RowNumber Value Type Status
1 1 aaaaa A New
1 2 bbbbb B Good
1 3 ccccc A Bad
1 4 ddddd C Good
1 5 eeeee B Good
2 1 fffff C Bad
2 2 ggggg A New
2 3 hhhhh C Bad
3 1 iiiii A Good
3 2 jjjjj A Good
I'd like to query the top 3 results and Pivot them as columns, so the end result set looks like this:
FK Value1 Type1 Status1 Value2 Type2 Status2 Value3 Type3 Status3
1 aaaaa A New bbbbb B Good ccccc A Bad
2 fffff C Bad ggggg A New hhhhh C Bad
3 iiiii A Good jjjjj A Good
How can I accomplish this in SQL Server 2005?
I have been attempting this using PIVOT, but I am still very unfamiliar with that keyword and cannot get it to work the way I want.
SELECT * --Id, [1], [2], [3]
FROM
(
SELECT Id, Value, Type, Status
, ROW_NUMBER() OVER (PARTITION BY Id ORDER Status, Type) as [RowNumber]
FROM MyTable
) as T
PIVOT
(
-- I know this section doesn't work. I'm still trying to figure out PIVOT
MAX(T.Value) FOR RowNumber IN ([1], [2], [3]),
MAX(T.Type) FOR RowNumber IN ([1], [2], [3]),
MAX(T.Status) FOR RowNumber IN ([1], [2], [3])
) AS PivotTable;
My actual data set is a bit more complex than this, and I need the top 10 records, not the top 3, so I don't want to simply do CASE WHEN RowNumber = X THEN... for each one.
Update
I tested all the answers below, and found most of them seem about the same with no apparent performance difference in smaller data sets (around 3k records), however there was a slight difference when running the queries against larger data sets.
Here are the results of my tests using 80,000 records and querying for 5 columns in the top 10 rows, so my end result set was 50 columns + the Id column. I'd suggest you test them on your own to decide which one works best for you and your environment.
bluefoot's answer of unpivoting and re-pivoting the data averaged the fastest at about 12 seconds. I also liked this answer because I found it easiest to read and maintain.
Aaron's answer and koderoid's answer both suggest using a MAX(CASE WHEN RowNumber = X THEN ...), and was close behind averaging at around 13 seconds.
Rodney's answer of using multiple PIVOT statements averaged around 16 seconds, although it might be faster with fewer PIVOT statements (my tests had 5).
And the first half of Aaron's answer that suggested using a CTE and OUTER APPLY was the slowest. I don't know how long it would take to run because I cancelled it after 2 minutes, and that was with around 3k records, 3 rows, and 3 columns instead of 80k records, 10 rows, and 5 columns.
You can do an UNPIVOT and then a PIVOT of the data. this can be done either statically or dynamically:
Static Version:
select *
from
(
select fk, col + cast(rownumber as varchar(1)) new_col,
val
from
(
select fk, rownumber, value, cast(type as varchar(10)) type,
status
from yourtable
) x
unpivot
(
val
for col in (value, type, status)
) u
) x1
pivot
(
max(val)
for new_col in
([value1], [type1], [status1],
[value2], [type2], [status2],
[value3], [type3])
) p
see SQL Fiddle with demo
Dynamic Version, this will get the list of columns to unpivot and then to pivot at run-time:
DECLARE #colsUnpivot AS NVARCHAR(MAX),
#query AS NVARCHAR(MAX),
#colsPivot as NVARCHAR(MAX)
select #colsUnpivot = stuff((select ','+quotename(C.name)
from sys.columns as C
where C.object_id = object_id('yourtable') and
C.name not in ('fk', 'rownumber')
for xml path('')), 1, 1, '')
select #colsPivot = STUFF((SELECT ','
+ quotename(c.name
+ cast(t.rownumber as varchar(10)))
from yourtable t
cross apply
sys.columns as C
where C.object_id = object_id('yourtable') and
C.name not in ('fk', 'rownumber')
group by c.name, t.rownumber
order by t.rownumber
FOR XML PATH(''), TYPE
).value('.', 'NVARCHAR(MAX)')
,1,1,'')
set #query
= 'select *
from
(
select fk, col + cast(rownumber as varchar(10)) new_col,
val
from
(
select fk, rownumber, value, cast(type as varchar(10)) type,
status
from yourtable
) x
unpivot
(
val
for col in ('+ #colsunpivot +')
) u
) x1
pivot
(
max(val)
for new_col in
('+ #colspivot +')
) p'
exec(#query)
see SQL Fiddle with Demo
Both will generate the same results, however the dynamic is great if you do not know the number of columns ahead of time.
The Dynamic version is working under the assumption that the rownumber is already a part of the dataset.
You can try to do the pivot in three separate pivot statements. Please give this a try:
SELECT Id
,MAX(S1) [Status 1]
,MAX(T1) [Type1]
,MAX(V1) [Value1]
--, Add other columns
FROM
(
SELECT Id, Value , Type, Status
, 'S' + CAST(ROW_NUMBER() OVER (PARTITION BY Id ORDER BY Status, Type) AS VARCHAR(10)) [Status_RowNumber]
, 'T' + CAST(ROW_NUMBER() OVER (PARTITION BY Id ORDER BY Status, Type) AS VARCHAR(10)) [Type_RowNumber]
, 'V' + CAST(ROW_NUMBER() OVER (PARTITION BY Id ORDER BY Status, Type) AS VARCHAR(10)) [Value_RowNumber]
FROM MyTable
) as T
PIVOT
(
MAX(Status) FOR Status_RowNumber IN ([S1], [S2], [S3],[S4],[S5],[S6],[S7],[S8],[S9],[S10])
)AS StatusPivot
PIVOT(
MAX(Type) FOR Type_RowNumber IN ([T1], [T2], [T3],[T4],[T5],[T6],[T7],[T8],[T9],[T10])
)AS Type_Pivot
PIVOT(
MAX(Value) FOR Value_RowNumber IN ([V1], [V2], [V3],[V4],[V5],[V6],[V7],[V8],[V9],[V10])
)AS Value_Pivot
GROUP BY Id
I don't know the full scope of the criteria for selecting the top ten records, but this produces and output that may get you closer to your answer.
SQL Fiddle Example
Rodney's muli-pivot is clever, that's for sure. Here are two other alternatives that are of course less appealing when you get into the 10X vs. 3X area.
;WITH a AS
(
SELECT Id, Value, Type, Status,
n = ROW_NUMBER() OVER (PARTITION BY Id ORDER BY [Status], [Type])
FROM dbo.MyTable
)
SELECT a.Id,
Value1 = a.Value, Type1 = a.[Type], Status1 = a.[Status],
Value2 = b.Value, Type2 = b.[Type], Status2 = b.[Status],
Value3 = c.Value, Type3 = c.[Type], Status3 = c.[Status]
FROM a
OUTER APPLY (SELECT * FROM a AS T2 WHERE n = a.n + 1 AND id = a.id) AS b
OUTER APPLY (SELECT * FROM a AS T2 WHERE n = b.n + 1 AND id = b.id) AS c
WHERE a.n = 1
ORDER BY a.Id;
-- or --
;WITH a AS
(
SELECT Id, Value, [Type], [Status],
n = ROW_NUMBER() OVER (PARTITION BY Id ORDER BY [Status], [Type])
FROM dbo.MyTable
)
SELECT Id,
Value1 = MAX(CASE WHEN n = 1 THEN Value END),
Type1 = MAX(CASE WHEN n = 1 THEN [Type] END),
Status1 = MAX(CASE WHEN n = 1 THEN [Status] END),
Value2 = MAX(CASE WHEN n = 2 THEN Value END),
Type2 = MAX(CASE WHEN n = 2 THEN [Type] END),
Status2 = MAX(CASE WHEN n = 2 THEN [Status] END),
Value3 = MAX(CASE WHEN n = 3 THEN Value END),
Type3 = MAX(CASE WHEN n = 3 THEN [Type] END),
Status3 = MAX(CASE WHEN n = 3 THEN [Status] END)
FROM a
GROUP BY Id
ORDER BY a.Id;
This might work for you, though it's not elegant.
select aa.FK_Id
, isnull(max(aa.Value1), '') as Value1
, isnull(max(aa.Type1), '') as Type1
, isnull(max(aa.Status1), '') as Status1
, isnull(max(aa.Value2), '') as Value2
, isnull(max(aa.Type2), '') as Type2
, isnull(max(aa.Status2), '') as Status2
, isnull(max(aa.Value3), '') as Value3
, isnull(max(aa.Type3), '') as Type3
, isnull(max(aa.Status3), '') as Status3
from
(
select FK_Id
, case when RowNumber = 1 then Value else null end as Value1
, case when RowNumber = 1 then [Type] else null end as Type1
, case when RowNumber = 1 then [Status] else null end as Status1
, case when RowNumber = 2 then Value else null end as Value2
, case when RowNumber = 2 then [Type] else null end as Type2
, case when RowNumber = 2 then [Status] else null end as Status2
, case when RowNumber = 3 then Value else null end as Value3
, case when RowNumber = 3 then [Type] else null end as Type3
, case when RowNumber = 3 then [Status] else null end as Status3
from Table1
) aa
group by aa.FK_Id
try something like this:
declare #rowCount int
set #rowCount = 10
declare #isNullClause varchar(4024)
set #isnullClause = ''
declare #caseClause varchar(4024)
set #caseClause = ''
declare #i int
set #i = 1
while(#i <= #rowCount) begin
set #isnullClause = #isNullClause +
' , max(aa.Value' + CAST(#i as varchar(3)) + ') as Value' + CAST(#i as varchar(3)) +
' , max(aa.Type' + CAST(#i as varchar(3)) + ') as Type' + CAST(#i as varchar(3)) +
' , max(aa.Status' + CAST(#i as varchar(3)) + ') as Status' + CAST(#i as varchar(3)) + ' ';
set #caseClause = #caseClause +
' , case when RowNumber = ' + CAST(#i as varchar(3)) + ' then Value else null end as Value' + CAST(#i as varchar(3)) +
' , case when RowNumber = ' + CAST(#i as varchar(3)) + ' then Type else null end as Type' + CAST(#i as varchar(3)) +
' , case when RowNumber = ' + CAST(#i as varchar(3)) + ' then Status else null end as Status' + CAST(#i as varchar(3)) + ' '
set #i = #i + 1;
end
declare #sql nvarchar(4000)
set #sql = 'select aa.FK_Id ' + #isnullClause + ' from ( select FK_Id '
+ #caseClause + ' from Table1) aa group by aa.FK_Id '
exec SP_EXECUTESQL #sql