SQL query to split a column based on hardcoded values - sql

I have a table eg assume this setup
table MyTable has various columns Id, UserId, col1, col2 col3 including column called Stuff.
I want to output certain Columns from MyTable with a query
but i want to split the 'Stuff' column such that 2 new columns are shown in the query
I can define the categories hardcoded, im not sure how this can be represented in sql
Categoy1 = "alpha, bravo, delta, gamma';
Categoy2 = "charlie, echo, hotel';
MyTable
ID | UserID | Stuff | Other Cols....
----------------------------------------------------------
1 1 alpha
2 2 hotel
3 1 charlie
4 1 echo
5 1 gamma
6 2 bravo
7 2 delta
i want the select query to show
UserId | Category1 | Catergory2
----------------------------------------------------------
1 alpha, gamma charlie, echo
---------------------------------------------------------
2 bravo, delta hotel
----------------------------------------------------------
i.e produce 2 columns split based on whether the stuff column contains an item from category1 or category2
based on a distinct userId the categories content can be comma separated as hown above
Please can you show how this can be done
Hope this makes sense.
Thanks

You can use the xml extensions to concatenate your strings, then just hard code the categories into each subquery:
CREATE TABLE #T (ID INT, UserID INT, [Stuff] VARCHAR(300))
INSERT #T VALUES
(1, 1, 'alpha'),
(2, 2, 'hotel'),
(3, 1, 'charlie'),
(4, 1, 'echo'),
(5, 1, 'gamma'),
(6, 2, 'bravo'),
(7, 2, 'delta');
SELECT UserID,
[Category1] = STUFF(( SELECT ', ' + [Stuff]
FROM #T t2
WHERE [Stuff] IN ('alpha', 'bravo', 'delta', 'gamma')
AND t.UserID = t2.UserID
FOR XML PATH(''), TYPE
).value('.', 'NVARCHAR(MAX)'), 1, 2, ''),
[Category2] = STUFF(( SELECT ', ' + [Stuff]
FROM #T t2
WHERE [Stuff] IN ('charlie', 'echo', 'hotel')
AND t.UserID = t2.UserID
FOR XML PATH(''), TYPE
).value('.', 'NVARCHAR(MAX)'), 1, 2, '')
FROM ( SELECT DISTINCT UserID
FROM #T
) t
Example on SQL Fiddle
You could define your categories at the start in a CTE (Categories) for improved readibility:
WITH Categories AS
( SELECT Category, Name
FROM (VALUES
(1, 'alpha'),
(1, 'bravo'),
(1, 'delta'),
(1, 'gamma'),
(2, 'charlie'),
(2, 'echo'),
(2, 'hotel')
) t (Category, Name)
), Data AS
( SELECT UserID, [Stuff], Category
FROM T
INNER JOIN Categories c
ON c.Name = T.[Stuff]
)
SELECT UserID,
[Category1] = STUFF(( SELECT ', ' + [Stuff]
FROM Data t2
WHERE Category = 1
AND t.UserID = t2.UserID
FOR XML PATH(''), TYPE
).value('.', 'NVARCHAR(MAX)'), 1, 2, ''),
[Category2] = STUFF(( SELECT ', ' + [Stuff]
FROM Data t2
WHERE Category = 2
AND t.UserID = t2.UserID
FOR XML PATH(''), TYPE
).value('.', 'NVARCHAR(MAX)'), 1, 2, '')
FROM ( SELECT DISTINCT UserID
FROM T
) t
Example on SQL Fiddle

My try, the technique I learned from Stack Overflow!... Please check:
DECLARE #Categoy1 NVARCHAR(MAX) = 'alpha, bravo, delta, gamma',
#Categoy2 NVARCHAR(MAX) = 'charlie, echo, hotel'
SELECT
UserID,
STUFF((SELECT ', ' + display_term
FROM sys.dm_fts_parser('"'+ ',' + #Categoy1 + '"', 1033, NULL, 0) INNER JOIN
YourTable T on display_term=[Stuff]
WHERE T.UserID= x.UserID
FOR XML PATH(''), TYPE
).value('.', 'NVARCHAR(MAX)'), 1, 2, '') Category1,
STUFF((SELECT ', ' + display_term
FROM sys.dm_fts_parser('"'+ ',' + #Categoy2 + '"', 1033, NULL, 0) INNER JOIN
YourTable T on display_term=[Stuff]
WHERE T.UserID= x.UserID
FOR XML PATH(''), TYPE
).value('.', 'NVARCHAR(MAX)'), 1, 2, '') Category2
FROM YourTable x
GROUP BY UserID

Related

Delete duplicate values from concatenated string

I have the following table:
Object Field Values
------------------------------------
1 1 A;A;A;B;A;A
2 1 A;B;C;C
2 2 X
3 1 X;Y;Z
3 2 V;V;V;V;V;V;V;V;V;V;V
How can I select from this table only the unique values from the concatenated values? So:
Object Field Values
---------------------
1 1 A;B
2 1 A;B;C
2 2 X
3 1 X;Y;Z
3 2 V
In any scripting language, I would loop through the values from Values, explode on ; and loop through that array with some logic filtering out duplicates. However, I need to do this only using SQL (Server 2008).
Can anybody tell me if and how this can be done?
Any help is greatly appreciated :-)
To do this first create a split function. This is the one I use but if you search the internet (or even SO) for "SQL Server Split Function" you will find a number of alternatives if you don't like this:
ALTER FUNCTION [dbo].[Split](#StringToSplit NVARCHAR(MAX), #Delimiter NCHAR(1))
RETURNS TABLE
AS
RETURN
(
SELECT ID = ROW_NUMBER() OVER(ORDER BY n.Number),
Position = Number,
Value = SUBSTRING(#StringToSplit, Number, CHARINDEX(#Delimiter, #StringToSplit + #Delimiter, Number) - Number)
FROM ( SELECT TOP (LEN(#StringToSplit) + 1) Number = ROW_NUMBER() OVER(ORDER BY a.object_id)
FROM sys.all_objects a
CROSS JOIN sys.all_objects b
) n
WHERE SUBSTRING(#Delimiter + #StringToSplit + #Delimiter, n.Number, 1) = #Delimiter
);
Then you can split your field, So running:
SELECT t.Object, t.Field, s.Value
FROM T
CROSS APPLY dbo.Split(t.[Values], ';') AS s
Will turn this:
Object Field Values
------------------------------------
1 1 A;A;A;B;A;A
into:
Object Field Values
------------------------------------
1 1 A
1 1 A
1 1 A
1 1 B
1 1 A
1 1 A
Then you can apply the DISTINCT Operator:
SELECT DISTINCT t.Object, t.Field, s.Value
FROM T
CROSS APPLY dbo.Split(t.[Values], ';') AS s;
To give:
Object Field Values
------------------------------------
1 1 A
1 1 B
Then you can concatenate your rows back into a single column giving a final query:
SELECT t.Object, t.Field, [Values] = STUFF(x.value('.', 'NVARCHAR(MAX)'), 1, 1, '')
FROM T
CROSS APPLY
( SELECT DISTINCT ';' + s.Value
FROM dbo.Split(t.[Values], ';') AS s
FOR XML PATH(''), TYPE
) AS s (x)
SQL Fiddle appears to be down, but once you have the Split function created the below is a full working example:
CREATE TABLE #T (Object INT, Field INT, [Values] VARCHAR(MAX));
INSERT #T
VALUES
(1, 1, 'A;A;A;B;A;A'),
(2, 1, 'A;B;C;C'),
(2, 2, 'X'),
(3, 1, 'X;Y;Z'),
(3, 2, 'V;V;V;V;V;V;V;V;V;V;V');
SELECT t.Object, t.Field, [Values] = STUFF(x.value('.', 'NVARCHAR(MAX)'), 1, 1, '')
FROM #T AS T
CROSS APPLY
( SELECT DISTINCT ';' + s.Value
FROM dbo.Split(t.[Values], ';') AS s
FOR XML PATH(''), TYPE
) AS s (x);
EDIT
Based on your comment that you can't create tables or modify the DDL, I thought I would account for the situation where you can't create a function either. You can expand the above split function out into your query, so you don't actually need to create a function:
CREATE TABLE #T (Object INT, Field INT, [Values] VARCHAR(MAX));
INSERT #T
VALUES
(1, 1, 'A;A;A;B;A;A'),
(2, 1, 'A;B;C;C'),
(2, 2, 'X'),
(3, 1, 'X;Y;Z'),
(3, 2, 'V;V;V;V;V;V;V;V;V;V;V');
SELECT t.Object,
t.Field,
[Values] = STUFF(x.value('.', 'NVARCHAR(MAX)'), 1, 1, '')
FROM #T AS T
CROSS APPLY
( SELECT DISTINCT ';' + SUBSTRING(t.[Values], Number, CHARINDEX(';', t.[Values] + ';', Number) - Number)
FROM ( SELECT TOP (LEN(t.[Values]) + 1) Number = ROW_NUMBER() OVER(ORDER BY a.object_id)
FROM sys.all_objects a
CROSS JOIN sys.all_objects b
) n
WHERE SUBSTRING(';' + t.[Values] + ';', n.Number, 1) = ';'
FOR XML PATH(''), TYPE
) AS s (x);
Here is a standalone solution:
DECLARE #t table(Object int, Field int, [Values] varchar(max))
INSERT #t values
(1, 1, 'A;A;A;B;A;A'),
(2, 1, 'A;B;C;C'),
(3, 1, 'X'),
(4, 1, 'X;Y;Z'),
(5, 1, 'V;V;V;V;V;V;V;V;V;V;V')
SELECT t.Object, t.Field, x.[NewValues]
FROM #t t
CROSS APPLY
(
SELECT STUFF((
SELECT distinct ';'+t.c.value('.', 'VARCHAR(2000)') value
FROM (
SELECT x = CAST('<t>' +
REPLACE([Values], ';', '</t><t>') + '</t>' AS XML)
) a
CROSS APPLY x.nodes('/t') t(c)
for xml path(''), type
).value('.', 'varchar(max)'), 1, 1, '') [NewValues]
) x
Result:
Object Field NewValues
1 1 A;B
2 1 A;B;C
3 1 X
4 1 X;Y;Z
5 1 V
According to #GarethD's comment this may perform slow.
Test data:
create table #t(Object int identity(1,1), Field int, [Values] varchar(max))
INSERT #t values
(1, 'A;A;A;B;A;A'),(1, 'A;B;C;C'), (1, 'X'), (1, 'X;Y;Z'),(1, 'V;V;V;V;V;V;V;V;V;V;V')
insert #t select field, [values] from #t union all select field, [values] from #t union all select field, [values] from #t
insert #t select field, [values] from #t union all select field, [values] from #t union all select field, [values] from #t
insert #t select field, [values] from #t union all select field, [values] from #t union all select field, [values] from #t
insert #t select field, [values] from #t union all select field, [values] from #t union all select field, [values] from #t
insert #t select field, [values] from #t union all select field, [values] from #t union all select field, [values] from #t
insert #t select field, [values] from #t union all select field, [values] from #t union all select field, [values] from #t
Performance testing my script:
SELECT t.Object, t.Field, x.[NewValues]
FROM #t t
CROSS APPLY
(
SELECT STUFF((
SELECT distinct ';'+t.c.value('.', 'VARCHAR(2000)') value
FROM (
SELECT x = CAST('<t>' +
REPLACE([Values], ';', '</t><t>') + '</t>' AS XML)
) a
CROSS APPLY x.nodes('/t') t(c)
for xml path(''), type
).value('.', 'varchar(max)'), 1, 1, '') [NewValues]
) x
Result less than 1 sec.
Performance testing Garath script
(had to edit testdata to get all rows. Identical rows were considered as 1 row):
WITH CTE AS
( SELECT DISTINCT t.Object, t.Field, s.Value
FROM #T AS T
CROSS APPLY
( SELECT ID = ROW_NUMBER() OVER(ORDER BY n.Number),
Position = Number,
Value = SUBSTRING(t.[Values], Number, CHARINDEX(';', t.[Values] + ';', Number) - Number)
FROM ( SELECT TOP (LEN(t.[Values]) + 1) Number = ROW_NUMBER() OVER(ORDER BY a.object_id)
FROM sys.all_objects a
CROSS JOIN sys.all_objects b
) n
WHERE SUBSTRING(';' + t.[Values] + ';', n.Number, 1) = ';'
) AS s
)
SELECT Object,
Field,
[Values] = STUFF((SELECT ';' + Value
FROM CTE AS T2
WHERE T2.Object = T.Object
AND T2.Field = T.Field
FOR XML PATH(''), TYPE
).value('.', 'VARCHAR(MAX)'), 1, 1, '')
FROM CTE AS T
GROUP BY Object, Field;
Result 6 seconds
If any row has null in values this script will also crash.
Just as a Scalar Value Function alternative without the CTE...
ALTER FUNCTION [SplitRemoveDupes] (
#String VARCHAR(MAX)
,#Delimiter VARCHAR(5)
)
RETURNS VARCHAR(MAX)
AS
BEGIN
DECLARE #SplitLength INT
DECLARE #DedupedValues VARCHAR(MAX)
DECLARE #SplittedValues TABLE
(
OccurenceId SMALLINT IDENTITY(1,1),
SplitValue VARCHAR(200)
)
WHILE LEN(#String) > 0
BEGIN
SELECT #SplitLength = (
CASE CHARINDEX(#Delimiter, #String)
WHEN 0
THEN LEN(#String)
ELSE CHARINDEX(#Delimiter, #String) - 1
END
)
INSERT INTO #SplittedValues
SELECT SUBSTRING(#String, 1, #SplitLength)
SELECT #String = (
CASE (LEN(#String) - #SplitLength)
WHEN 0
THEN ''
ELSE RIGHT(#String, LEN(#String) - #SplitLength - 1) END)
END
SET #DedupedValues=(SELECT DISTINCT STUFF((
SELECT DISTINCT (#Delimiter + SplitValue)
FROM #SplittedValues s
ORDER BY (#Delimiter + SplitValue)
FOR XML PATH('')
), 1, 1, '') AS a
FROM #SplittedValues ss)
RETURN #DedupedValues
END
Call it inline...
SELECT Object, Field, [dbo].[SplitRemoveDupes](Values,';') From Table

Have all Records in one Field

how is possible to have all records of one field into one field
Id, No , FDevice
1 , 1 , 'A'
2 , 1 , 'B'
3 , 1 , 'C'
4 , 2 , 'D'
5 , 2 , 'E'
I want to have
No , FDevice
1 , A-B-C
2 , D-E
Thank you for your help
use STUFF() - which inserts a string into another string.
SELECT
[No],
STUFF(
(SELECT '-' + [FDevice]
FROM TableName
WHERE [No] = a.[No]
FOR XML PATH (''))
, 1, 1, '') AS FDevice
FROM TableName AS a
GROUP BY [No]
SQLFiddle Demo
There're a well-known solution for aggregate concatenation in SQL Server, using select ... for xml path(''), but I have to say that many people using it incorrectly. Correct way to do this would be
select
a.[No],
stuff(
(
select '-' + t.[FDevice]
from TableName as t
where t.[No] = a.[No]
for xml path(''), type
).value('.', 'nvarchar(max)')
, 1, 1, '') as FDevice
from (select distinct [No] from TableName) as a;
sql fiddle demo
The main part is to use xml type inside the query and then to convert it into varchar using value function, otherwise you can end up with incorrectly converted special chars like '>', '<', '&' and so on. SQLfiddle somehow doesn't show the difference, but here's a script which can show you what can happen if you don't use xml type:
declare #TableName table
([Id] int, [No] int, [FDevice] varchar(3))
;
INSERT INTO #TableName
([Id], [No], [FDevice])
VALUES
(1, 1, 'A<'),
(2, 1, 'B'),
(3, 1, '&C'),
(4, 2, 'D'),
(5, 2, 'E')
;
SELECT
[No],
STUFF(
(SELECT '-' + [FDevice]
FROM #TableName
WHERE [No] = a.[No]
FOR XML PATH (''))
, 1, 1, '') AS FDevice
FROM #TableName AS a
GROUP BY [No];
outputs
No FDevice
--------------------
1 A<-B-&C
2 D-E
select
a.[No],
stuff(
(
select '-' + t.[FDevice]
from #TableName as t
where t.[No] = a.[No]
for xml path(''), type
).value('.', 'nvarchar(max)')
, 1, 1, '') as FDevice
from (select distinct [No] from #TableName) as a;
outputs
No FDevice
--------------------
1 A<-B-&C
2 D-E

concatinate all rows of a column into single value

I have table called Rule.
RuleId Name
1 A1
2 A2
3 A3
.
.
.
Now I want all the names as single result.
may be like #allnames = A1,A2,A3
Can somebody advise how to write query for this without using loops?
Thanks in advance...
Try this:
SELECT #allnames = STUFF((SELECT distinct ',' + Name
FROM table1
FOR XML PATH(''), TYPE
).value('.', 'NVARCHAR(MAX)')
, 1, 1, '');
SQL Fiddle Demo
DECLARE #names NVARCHAR(MAX)
SELECT #names = coalesce(#names + ',', '') + coalesce(Name, '')
FROM (SELECT distinct Name FROM Rule) x
print #names
Try this one -
DECLARE #temp TABLE ([RuleId] INT, Name CHAR(2))
INSERT INTO #temp([RuleId], Name)
VALUES
(1, 'A1'),
(2, 'A2'),
(3, 'A3')
DECLARE #all_names NVARCHAR(MAX)
SELECT #all_names = STUFF((
SELECT DISTINCT ',' + Name
FROM #temp
--ORDER BY Name
FOR XML PATH(''), TYPE).value('.', 'NVARCHAR(MAX)'), 1, 1, '')
SELECT #all_names
Output -
---------------
A1,A2,A3

SQL, questions about join

I have two tables in sql 2012: name and prod with structure:
name: id int increment, name1 nvarchar(50)
prod: id int increment, products nvarchar(50), id_name int
Values for table are:
name table:
Id name1
1 pop
2 andi
prod table:
Id products id_name
1 coke 1
2 pizza 1
3 orange 2
I have done this query:
select name.name1, prod.product, prod.id_name
from name
join prod on name.id=prod.id_name
How can I obtain this result:
pop ->coke, pizza
andi->orange
unfortunately, there's no easy way to do it in SQL Server. Known solutions are:
xml trick (see below);
using variable to accumulate data (don't work for multiple group rows, only with cursor);
custom CLR aggregate;
here's xml:
select
n.name1,
stuff(
(
select ', ' + p.product
from prod as p
where p.id_name = n.id
for xml path(''), type).value('.', 'nvarchar(max)')
, 1, 2, '') as products
from name as n
sql fiddle demo
here's variable:
declare #product nvarchar(max), #id int
select #id = 1
select #product = isnull(#product + ', ', '') + product
from prod
where id_name = #id
select name1, #product as products
from name
where id = #id
sql fiddle demo
try this:
SELECT
G.id,
G.name1,
stuff(
(select cast(',' as varchar(10)) + U.product
from prod U
WHERE U.id_name = G.id
order by U.product
for xml path('')
), 1, 1, '') AS prod
FROM name G
ORDER BY G.name1 ASC
sqlfiddle
select
n.nameid [id],
n.name [name],
count(*)[count],
stuff(
(
select ', ' + p.prod
from prodtbl as p
where p.nameid = n.nameid
for xml path(''), type).value('.', 'nvarchar(max)'), 1, 1, '') as products
from nametbl n, prodtbl p
where p.nameid = n.nameid
group by n.nameid, n.name
order by [id];

Duplicates without using While or Cursor in T-SQL

ID Name
1 A
1 B
1 C
2 X
2 Y
3 P
3 Q
3 R
These are the columns in a table. I want to get output like
ID Company
1 A,B,C
2 X, Y
3 P,Q,R
Restriction is that I cannot use WHILE or CURSOR. Please write a query for the same.
This query should do it - uses FOR XML PATH which is new in SQL Server 2005 - hope you are on 2005 or higher, you didn't clearly specify.....
SELECT
ID,
STUFF(CAST((SELECT ','+Name FROM dbo.YourTable t2
WHERE t2.ID = dbo.YourTable.ID
FOR XML PATH(''), TYPE) AS VARCHAR(MAX)), 1, 1, '') AS 'Company'
FROM
dbo.YourTable
GROUP BY
ID
Here's a solution using the CROSS APPLY method:
select id, sub.names
from (
select distinct id from YourTable
) a
cross apply (
select name + ', ' as [text()]
from YourTable b
where b.id = a.id
for xml path('')
) sub(names)
For 2005 version:
CREATE TABLE dbo.TEST([Type] INTEGER, [Name] NVARCHAR(100), [Qty] INTEGER)
GO
INSERT dbo.TEST VALUES(1, N'a', 5)
INSERT dbo.TEST VALUES(1, N'b', 6)
INSERT dbo.TEST VALUES(2, N'c', 44)
INSERT dbo.TEST VALUES(3, N'd', 1)
GO
select [Type],
[Description] = replace((select [Name] + ':' + cast([Qty] as varchar) as 'data()'
from TEST where [Type] = t.[Type] for xml path('')), ' ', ',')
from dbo.TEST t
group by [Type]
go
drop table dbo.TEST
You can group on the ID to get the unique values, then get the comma separated string for each using a for xml query:
select
a.ID,
substring((
select ', ' + Name
from Test1
where Test1.ID = a.ID
for xml path('')
), 3, 1000) as Company
from
TheTable a
group by
a.ID