SQL Recursive CTE Replacing records in each recursion - sql

I have a table like this:
ItemID ItemFormula
100 'ID_3+ID_5'
110 'ID_2+ID_6'
120 'ID_100+ID_110'
130 'ID_120+ID_4'
This is the simplified version of a formula table with nearly 1000 records and up to 40 levels of reference (items used in other items). The task is breaking down the formulas to just one level reference where no other items are in one item. For example in the table above for id=130 I should have '((ID_3+ID_5)+(ID_2+ID_6))+ID_4'
EDIT: The operations do not limit to "+" and items have a character between them to be recognizable. For the sake of simplicity, I removed that character.
I can use recursive CTE for that. but my problem is that due to high levels of reference, my recursive select has lots of records joining so it takes a lot to complete.
My question is that: Can I keep the previous recursion only each time the recursion happens?
Here is my CTE Code
WITH Formula
AS (SELECT A.ItemID
,'ID_' + CONVERT(VARCHAR(20), A.ItemID) AS ItemText
,CONVERT(VARCHAR(MAX), A.ItemFormula) AS ItemFormula
FROM (VALUES (100,'ID_3+ID_5'),
(110,'ID_2+ID_6'),
(120,'ID_100+ID_110'),
(130,'ID_120+ID_4')
) A (ItemID,ItemFormula)
)
,REC
AS
(
SELECT A.ItemID
,A.ItemText
,A.ItemFormula
,1 AS LevelID
FROM Formula A
UNION ALL
SELECT A.ItemID
,A.ItemText
,' '
+ TRIM (REPLACE (REPLACE (A.ItemFormula, B.ItemText, ' ( ' + B.ItemFormula + ' ) '), ' ', ' '))
+ ' ' AS ItemFormula
,A.LevelID + 1 AS LevelID
FROM REC A
CROSS APPLY
(
SELECT *
FROM
(
SELECT *
,ROW_NUMBER () OVER (ORDER BY GETDATE ()) AS RowNum
FROM Formula B2
WHERE CHARINDEX (B2.ItemText, A.ItemFormula) > 0
) B3
WHERE B3.RowNum = 1
) B
)
,FinalQ
AS
(
SELECT A2.ItemID
,A2.ItemFormula
,A2.LevelID
FROM
(
SELECT A.ItemID
,REPLACE (TRIM (A.ItemFormula), ' ', '') AS ItemFormula
,A.LevelID
,ROW_NUMBER () OVER (PARTITION BY A.ItemID ORDER BY A.LevelID DESC) AS RowNum
FROM REC A
) A2
WHERE A2.RowNum = 1
)
SELECT * FROM FinalQ A2 ORDER BY A2.ItemID;
Thanks in advance.

My question is that: Can I keep the previous recursion only each time the recursion happens?
No. The recursive CTE will keep adding rows to the ones found in previous iterations. You don't have some kind of control that would allow you to remove rows of the recursive CTE during its iterations.
You can, however, filter them out after the recursive CTE is complete, maybe on a secondary CTE that takes into account only the last meaninful rows (by some kind of rule to be defined).
The only vaguely similar idea is found in PostgreSQL where you can use the UNION clause in addition to UNION ALL, to avoid producing more identical rows. But this is different to what you need, anyway.

This is an enormously complicated problem. Here are the ideas:
Find which items do not need any insertions. These are the ones that have no references to any others.
Build an ordering for item insertion. An insertion can go into an item, assuming that the item is already defined. A recursive CTE can be used for this.
Enumerate the insertions. Everything from (1) gets a "1". The rest are in order.
Process the insertions in the insertion order.
Here is my solution:
with ordering as (
select itemid, itemtext, itemformula, convert(varchar(max), null) as otheritemtext, 1 as lev
from formula f
where not exists (select 1
from formula f2 join
string_split(f.itemformula, '+') s
on f2.itemtext = s.value
where f2.itemid <> f.itemid
)
union all
select f.itemid, f.itemtext, f.itemformula, convert(varchar(max), s.value), lev + 1
from formula f cross apply
string_split(f.itemformula, '+') s join
ordering o
on o.itemtext = s.value
-- where lev <= 2
),
ordered as (
select distinct o.*,
dense_rank() over (order by (case when lev = 1 then -1 else lev end), (case when lev = 1 then '' else otheritemtext end)) as seqnum
from ordering o
),
cte as (
select o.itemid, o.itemtext, o.itemformula, convert(varchar(max), o.otheritemtext) as otheritemtext,
o.itemformula as newformula, o.seqnum, 1 as lev
from ordered o
where seqnum = 1
union all
select cte.itemid, o.itemtext, o.itemformula, convert(varchar(max), cte.itemtext),
replace(o.itemformula, o.otheritemtext, concat('(', cte.newformula, ')')), o.seqnum, cte.lev + 1
from cte join
ordered o
on cte.itemtext = o.otheritemtext and cte.seqnum < o.seqnum
)
select *
from cte;
And the db<>fiddle.

You could take advantage of the logical order of the formulas if any (Item_100 can not reference Item_150) and process items in a descending order.
The following uses LIKE and it will not work for formulas which have overlapping patterns (eg ID_10 & ID_100) you could fix that by some string manipulation or by keeping ItemIDs of fixed length (eg. ID_10010 & ID_10100: start numbering of items from a high number like 10000)
declare #f table
(
ItemId int,
ItemFormula varchar(1000)
);
insert into #f(ItemId, ItemFormula)
values
(100, 'ID_3+ID_5'),
(110, 'ID_2+ID_6'),
(120, 'ID_100+ID_110'),
(130, 'ID_120+ID_4'),
(140, '(ID_130+ID_110)/ID_100'),
(150, 'sqrt(ID_140, ID_130)'),
(160, 'ID_150-ID_120+ID_140');
;with cte
as
(
select f.ItemId, replace(cast(f.ItemFormula as varchar(max)), isnull('ID_' + cast(r.ItemId as varchar(max)), ''), isnull('(' + r.ItemFormula+ ')', '')) as therepl, 1 as lvl
from #f as f
outer apply (
select *
from
(
select rr.*, row_number() over(order by rr.ItemId desc) as rownum
from #f as rr
where f.ItemFormula like '%ID_' + cast(rr.ItemId as varchar(1000)) + '%'
) as src
where rownum = 1
) as r
union all
select c.ItemId, replace(c.therepl, 'ID_' + cast(r.ItemId as varchar(max)), '(' + r.ItemFormula+ ')'), c.lvl+1
from cte as c
cross apply (
select *
from
(
select rr.*, row_number() over(order by rr.ItemId desc) as rownum
from #f as rr
where c.therepl like '%ID_' + cast(rr.ItemId as varchar(1000)) + '%'
) as src
where rownum = 1
) as r
),
rown
as
(
select *, row_number() over (partition by itemid order by lvl desc) as rownum
from cte
)
select *
from rown
where rownum = 1;

Related

How to get data from 2 rows which has same data in all columns except one in MSSQL

As in my title I want to take data from 2 rows but In my case each 2nd row has one different value compare to the first row.
I want to take all the common data along with the different data as a single row .
Here you can see each row has same values in another row except the 2nd rows last column.
Thanks.
Edits Result :
I suspect you have a some kind of ordering columns that could specify your actual data ordering if so, then you can use row_number() function
select * from (
select *,
row_number() over (partition by <common data cols> order by ? desc) Seq
from table t
) t
where seq = 1;
EDIT : I don't believe your inventort_item_id columns but yes you could use creation_date for ordering purpose
SELECT
EPI.ITEM_CODE, LMP.PROD_DESC, LLPC.COLOC_PROD_PRICE,
BASE_PATH + '' + EPI.IMAGE_FOLDER_NAME + '/' + EPI.IMAGE_DESCRIPTION AS POPULAR_PRODUCTS_IMAGE_PATHS
FROM (SELECT *,
ROW_NUMBER() OVER (PARTITION BY ITEM_CODE ORDER BY creation_date DESC) as Seq
FROM ECOM_PRODUCT_IMAGES EPI
) EPI
INNER JOIN ECOM_POPULAR_PRODUCTS_MAPPING EPPIM ON EPPIM.ITEM_CODE = EPI.ITEM_CODE
INNER JOIN LOM_MST_PRODUCT LMP ON LMP.PROD_CODE = EPI.ITEM_CODE
INNER JOIN LOM_LNK_PROD_COMP LLPC ON LLPC.COLOC_PROD_CODE = LMP.PROD_CODE
WHERE EPI.Seq = 1 AND
EPPIM.ITEM_STATUS = 'ACTIVE';
EDIT 2: In that case you need to use GROUP BY clause with conditional aggregation
SELECT
EPI.ITEM_CODE, LMP.PROD_DESC, LLPC.COLOC_PROD_PRICE,
MAX(CASE WHEN EPI.Seq = 2
THEN (BASE_PATH + '' + EPI.IMAGE_FOLDER_NAME + '/' + EPI.IMAGE_DESCRIPTION)
END) AS POPULAR_PRODUCTS_IMAGE_PATHS,
MAX(CASE WHEN EPI.Seq = 1
THEN (BASE_PATH + '' + EPI.IMAGE_FOLDER_NAME + '/' + EPI.IMAGE_DESCRIPTION)
END) AS PATH_NEW
FROM (SELECT *,
ROW_NUMBER() OVER (PARTITION BY ITEM_CODE ORDER BY creation_date DESC) as Seq
FROM ECOM_PRODUCT_IMAGES EPI
) EPI
INNER JOIN ECOM_POPULAR_PRODUCTS_MAPPING EPPIM ON EPPIM.ITEM_CODE = EPI.ITEM_CODE
INNER JOIN LOM_MST_PRODUCT LMP ON LMP.PROD_CODE = EPI.ITEM_CODE
INNER JOIN LOM_LNK_PROD_COMP LLPC ON LLPC.COLOC_PROD_CODE = LMP.PROD_CODE
WHERE EPPIM.ITEM_STATUS = 'ACTIVE'
GROUP BY EPI.ITEM_CODE, LMP.PROD_DESC, LLPC.COLOC_PROD_PRICE;
here is my approach, also using a window function.
sample data
if object_id('tempdb..#x') is not null drop table #x
CREATE TABLE #x (ITEM_CODE VARCHAR(10), PROD_DESC VARCHAR(20),
COLOR_PROD_PRICE DECIMAL, POPULAR_PRODUCTS_IMAGE_PATHS VARCHAR(200))
INSERT INTO #X(ITEM_CODE,PROD_DESC,COLOR_PROD_PRICE,POPULAR_PRODUCTS_IMAGE_PATHS) VALUES
('P0001', 'Axe Brand', 88.000, 'some_path_to_img1.jpg'),
('P0001', 'Axe Brand', 88.000, 'some_path_to_img2.jpg'),
('P0002', 'Almond Nuts', 499.000, 'some_path_to_img1.jpg'),
('P0002', 'Almond Nuts', 499.000, 'some_path_to_img2.jpg')
query - just change #x to your table and it should work
;WITH my_cte as
(
SELECT *,
ROW_NUMBER() OVER(PARTITION BY ITEM_CODE ORDER BY POPULAR_PRODUCTS_IMAGE_PATHS) AS 'track_row'
FROM #x
)
SELECT a.ITEM_CODE, a.PROD_DESC, a.COLOR_PROD_PRICE,
a.POPULAR_PRODUCTS_IMAGE_PATHS + ' ' + b.POPULAR_PRODUCTS_IMAGE_PATHS AS 'POPULAR_PRODUCTS_IMAGE_PATHS'
FROM my_cte AS a
INNER JOIN
my_cte AS b ON a.ITEM_CODE=b.ITEM_CODE
WHERE a.track_row=1 AND b.track_row=2
output
ITEM_CODE PROD_DESC COLOR_PROD_PRICE POPULAR_PRODUCTS_IMAGE_PATHS
P0001 Axe Brand 88 some_path_to_img1.jpg some_path_to_img2.jpg
P0002 Almond Nuts 499 some_path_to_img1.jpg some_path_to_img2.jpg

Getting most used words from a column of strings in SQL

So we have this database filled with a bunch of strings, in this case post titles.
What I want to do is:
Split the string up in to words
Count how many times words appear in strings
Give me to top 50 words
Not have this timeout in a data.se query
I tried using the info from this SO question adapted to data.se as follows:
select word, count(*) from (
select (case when instr(substr(p.Title, nums.n+1), ' ') then substr(p.Title, nums.n+1)
else substr(p.Title, nums.n+1, instr(substr(p.Title, nums.n+1), ' ') - 1)
end) as word
from (select ' '||Title as string
from Posts p
)Posts cross join
(select 1 as n union all select 2 union all select 10
) nums
where substr(p.Title, nums.n, 1) = ' ' and substr(p.Title, nums.n, 1) <> ' '
) w
group by word
order by count(*) desc
Unfortunately, this gives me a slew of errors:
'substr' is not a recognized built-in function name. Incorrect syntax
near '|'. Incorrect syntax near 'nums'.
So given a column of strings in SQL with a variable amount of text in each string, how can I get a list of the most frequently used X words?
As Blogbeard said, the query you provided does not work with SQL Server. Here is one way to count the most used word. This is based from a function, DelimitedSplitN4K, written by Jeff Moden and improved by members of the SQL Server Central community.
ONLINE DEMO
WITH E1(N) AS (
SELECT 1 FROM (VALUES
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)
) t(N)
),
E2(N) AS (SELECT 1 FROM E1 a CROSS JOIN E1 b),
E4(N) AS (SELECT 1 FROM E2 a CROSS JOIN E2 b)
SELECT TOP 50
x.Item,
COUNT(*)
FROM Posts p
CROSS APPLY (
SELECT
ItemNumber = ROW_NUMBER() OVER(ORDER BY l.N1),
Item = LTRIM(RTRIM(SUBSTRING(p.Title, l.N1, l.L1)))
FROM (
SELECT s.N1,
L1 = ISNULL(NULLIF(CHARINDEX(' ',p.Title,s.N1),0)-s.N1,4000)
FROM(
SELECT 1 UNION ALL
SELECT t.N+1
FROM(
SELECT TOP (ISNULL(DATALENGTH(p.Title)/2,0))
ROW_NUMBER() OVER (ORDER BY (SELECT NULL))
FROM E4
) t(N)
WHERE SUBSTRING(p.Title ,t.N,1) = ' '
) s(N1)
) l(N1, L1)
) x
WHERE x.item <> ''
GROUP BY x.Item
ORDER BY COUNT(*) DESC
Since creation of function is not allowed, I've written it that way. Here is the function definition if you're interested:
CREATE FUNCTION [dbo].[DelimitedSplitN4K](
#pString NVARCHAR(4000),
#pDelimiter NCHAR(1)
)
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
WITH E1(N) AS (
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
),
E2(N) AS (SELECT 1 FROM E1 a, E1 b),
E4(N) AS (SELECT 1 FROM E2 a, E2 b),
cteTally(N) AS(
SELECT TOP (ISNULL(DATALENGTH(#pString)/2,0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
),
cteStart(N1) AS (
SELECT 1 UNION ALL
SELECT t.N+1 FROM cteTally t WHERE SUBSTRING(#pString,t.N,1) = #pDelimiter
),
cteLen(N1,L1) AS(
SELECT s.N1,
ISNULL(NULLIF(CHARINDEX(#pDelimiter,#pString,s.N1),0)-s.N1,4000)
FROM cteStart s
)
SELECT
ItemNumber = ROW_NUMBER() OVER(ORDER BY l.N1),
Item = SUBSTRING(#pString, l.N1, l.L1)
FROM cteLen l
;
And here is how you would use it:
SELECT TOP 50
x.Item,
COUNT(*)
FROM Posts p
CROSS APPLY dbo.DelimitedSplitN4K(p.Title, ' ') x
WHERE LTRIM(RTRIM(x.Item)) <> ''
GROUP BY x.Item
ORDER BY COUNT(*) DESC
The result:
Item
-------- -------
to 3812411
in 3331522
a 2543636
How 1770915
the 1534298
with 1341632
of 1297468
and 1166664
on 970554
from 964449
for 886007
not 835979
is 704724
using 703007
I 633838
- 632441
an 548450
when 449169
file 409717
how 358745
data 335271
do 323854
can 310298
get 305922
or 266317
error 263563
use 258408
value 254392
it 251254
my 238902
function 235832
by 231025
Android 228308
as 216654
array 209157
working 207445
does 207274
Is 205613
multiple 203336
that 197826
Why 196979
into 196591
after 192056
string 189053
PHP 187018
one 182360
class 179965
if 179590
text 174878
table 169393
Query solution (No Split Function Required)
PostgreSQL
select word, count(*) from
(
-- get 1st words
select split_part(title, ' ', 1) as word
from posts
union all
-- get 2nd words
select split_part(title, ' ', 2) as word
from posts
union all
-- get 3rd words
select split_part(title, ' ', 3) as word
from posts
-- can do this as many times as the number of words in longest title
) words
where word is not null
and word NOT IN ('', 'and', 'for', 'of', 'on')
group by word
order by count desc
limit 50;
for a concise version, see: https://dba.stackexchange.com/a/82456/95929
With the now available STRING_SPLIT function (since SQL Server 2016, Compatability Level 130) this query becomes much easier:
SELECT TOP 50
value [word]
, COUNT(*) [#times]
FROM posts p
CROSS APPLY STRING_SPLIT(p.title, ' ')
GROUP BY value
ORDER BY COUNT(*) DESC
See it in action on the Stack Exchange Data Explorer where it still runs under 2 minutes for the current number of posts in the Stack Overflow database. On Stack Overflow em Português it runs without having to fear for the dreaded timeout.
Results are similar to what you saw in the answer from Felix:

Select Random Numbers from a list

This is my query.
SELECT TOP 2 NUM
FROM QT_PIVOT
WHERE NUM BETWEEN 1 AND 45
ORDER BY NEWID()
I'm selecting 2 random numbers from a list but I don't want that these numbers to be continuous
Sometimes the result is
NUM
----
2
3
And I don't want this
Thanks , and sorry for my English u.u
Basically the same as the 2nd approach Gordon uses except it lacks the use of the lag function and therefor will work on SQL-2008.
WITH Data AS(
SELECT *, RowNum = ROW_NUMBER() OVER (ORDER BY NEWID())
FROM sys.objects AS O
),
r AS(
SELECT TOP 1 *, SkipRow = 0
FROM Data
WHERE Data.RowNum = 1
UNION ALL
SELECT d.*, SkipRow = CASE WHEN d.object_id BETWEEN r.object_id -2 AND r.object_id + 2 THEN 1 ELSE 0 END
FROM r
JOIN Data AS D
ON r.RowNum + 1 = D.RowNum
)
SELECT TOP 2 * FROM R
WHERE R.SkipRow = 0
One approach is to select the first number, and then select an appropriate second number:
WITH r AS (
SELECT TOP 1 num
FROM QT_PIVOT
WHERE NUM BETWEEN 1 AND 45
ORDER BY NEWId()
)
select num
from r
union all
select top 1 q.num
from qt_pivot q join
r
on q.num not in (r.num, r.num - 1, r.num + 1)
where q.num between 1 and 45
order by newid();
Another approach (if you had SQL Server 2012+) would use lag() to remove any possibilities that do not meet the conditions:
WITH r AS (
SELECT num, row_number() over (order by newid()) as seqnum
FROM QT_PIVOT
WHERE NUM BETWEEN 1 AND 45
)
SELECT r.num
FROM (SELECT r.*, LAG(num) OVER (ORDER BY seqnum) as prevnum
FROM r
) r
WHERE prevnum is null or
prevnum not in (num - 1, num + 1);
EDIT:
The first approach doesn't work, because SQL Server always re-evaluates CTEs, and there is not even a hint to fix this problem. Here is an alternative approach, that will ensure that values are not consecutive:
WITH r as (
SELECT (1 + checksum(newid()) * 45) as r1,
(2 + checksum(newid()) * 43) as r2
)
SELECT q.num
FROM QT_PIVOT q
WHERE q.num = r.r1 or
q.num = 1 + (r.r1 + r.r2) % 45;
This calculates a two random numbers. The first is a random position. The second is an allowable offset (hence the "2" and "43") to guarantee that the numbers are not adjacent.

SQL query for finding first missing sequence string (prefix+no)

T-SQL query for finding first missing sequence string (prefix+no)
Sequence can have a prefix + a continuing no.
ex sequence will be
ID
-------
AUTO_500
AUTO_501
AUTO_502
AUTO_504
AUTO_505
AUTO_506
AUTO_507
AUTO_508
So above the missing sequence is AUTO_503 or if there is no missing sequence then it must return next sequence.
Also starting no is to specified ex. 500 in this case and prefix can be null i.e. no prefix only numbers as sequence.
You could LEFT JOIN the id numbers on shifted(+1) values to find gaps in sequential order:
SELECT
MIN(a.offsetnum) AS first_missing_num
FROM
(
SELECT 500 AS offsetnum
UNION
SELECT CAST(REPLACE(id, 'AUTO_', '') AS INT) + 1
FROM tbl
) a
LEFT JOIN
(SELECT CAST(REPLACE(id, 'AUTO_', '') AS INT) AS idnum FROM tbl) b ON a.offsetnum = b.idnum
WHERE
a.offsetnum >= 500 AND b.idnum IS NULL
SQLFiddle Demo
Using a recursive CTE to dynamically generate the sequence between the min and max of the ID Numbers maybe over complicated things a bit but it seems to work -
LIVE ON FIDDLE
CREATE TABLE tbl (
id VARCHAR(55)
);
INSERT INTO tbl VALUES
('AUTO_500'),
('AUTO_501'),
('AUTO_502'),
('AUTO_504'),
('AUTO_505'),
('AUTO_506'),
('AUTO_507'),
('AUTO_508'),
('509');
;WITH
data_cte(id)AS
(SELECT [id] = CAST(REPLACE(id, 'AUTO_', '') AS INT) FROM tbl)
,maxmin_cte(minId, maxId)AS
(SELECT [minId] = min(id),[maxId] = max(id) FROM data_cte)
,recursive_cte(n) AS
(
SELECT [minId] n from maxmin_cte
UNION ALL
SELECT (1 + n) n FROM recursive_cte WHERE n < (SELECT [maxId] from maxmin_cte)
)
SELECT x.n
FROM
recursive_cte x
LEFT OUTER JOIN data_cte y ON
x.n = y.id
WHERE y.id IS NULL
Check this solution.Here you just need to add identity column.
CREATE TABLE tbl (
id VARCHAR(55),
idn int identity(0,1)
);
INSERT INTO tbl VALUES
('AUTO_500'),
('AUTO_501'),
('AUTO_502'),
('AUTO_504'),
('AUTO_505'),
('AUTO_506'),
('AUTO_507'),
('AUTO_508'),
('509');
SELECT min(idn+500) FROM tbl where 'AUTO_'+cast((idn+500) as varchar)<>id
try this:
with cte as(
select cast(REPLACE(id,'AUTO_','') as int)-500+1 [diff],ROW_NUMBER()
over(order by cast(REPLACE(id,'AUTO_','') as int)) [rnk] from tbl)
select top 1 'AUTO_'+cast(500+rnk as varchar(50)) [ID] from cte
where [diff]=[rnk]
order by rnk desc
SQL FIddle Demo
Had a similar situation, where we have R_Cds that were like this R01005
;with Active_R_CD (R_CD)
As
(
Select Distinct Cast(Replace(R_CD,'R', ' ') as Int)
from table
where stat = 1)
select Arc.R_CD + 1 as 'Gaps in R Code'
from Active_R_CD as Arc
left outer join Active_R_CD as r on ARC.R_CD + 1 = R.R_CD
where R.R_CD is null
order by 1

SQL: how to get all the distinct characters in a column, across all rows

Is there an elegant way in SQL Server to find all the distinct characters in a single varchar(50) column, across all rows?
Bonus points if it can be done without cursors :)
For example, say my data contains 3 rows:
productname
-----------
product1
widget2
nicknack3
The distinct inventory of characters would be "productwigenka123"
Here's a query that returns each character as a separate row, along with the number of occurrences. Assuming your table is called 'Products'
WITH ProductChars(aChar, remain) AS (
SELECT LEFT(productName,1), RIGHT(productName, LEN(productName)-1)
FROM Products WHERE LEN(productName)>0
UNION ALL
SELECT LEFT(remain,1), RIGHT(remain, LEN(remain)-1) FROM ProductChars
WHERE LEN(remain)>0
)
SELECT aChar, COUNT(*) FROM ProductChars
GROUP BY aChar
To combine them all to a single row, (as stated in the question), change the final SELECT to
SELECT aChar AS [text()] FROM
(SELECT DISTINCT aChar FROM ProductChars) base
FOR XML PATH('')
The above uses a nice hack I found here, which emulates the GROUP_CONCAT from MySQL.
The first level of recursion is unrolled so that the query doesn't return empty strings in the output.
Use this (shall work on any CTE-capable RDBMS):
select x.v into prod from (values('product1'),('widget2'),('nicknack3')) as x(v);
Test Query:
with a as
(
select v, '' as x, 0 as n from prod
union all
select v, substring(v,n+1,1) as x, n+1 as n from a where n < len(v)
)
select v, x, n from a -- where n > 0
order by v, n
option (maxrecursion 0)
Final Query:
with a as
(
select v, '' as x, 0 as n from prod
union all
select v, substring(v,n+1,1) as x, n+1 as n from a where n < len(v)
)
select distinct x from a where n > 0
order by x
option (maxrecursion 0)
Oracle version:
with a(v,x,n) as
(
select v, '' as x, 0 as n from prod
union all
select v, substr(v,n+1,1) as x, n+1 as n from a where n < length(v)
)
select distinct x from a where n > 0
Given that your column is varchar, it means it can only store characters from codes 0 to 255, on whatever code page you have. If you only use the 32-128 ASCII code range, then you can simply see if you have any of the characters 32-128, one by one. The following query does that, looking in sys.objects.name:
with cteDigits as (
select 0 as Number
union all select 1 as Number
union all select 2 as Number
union all select 3 as Number
union all select 4 as Number
union all select 5 as Number
union all select 6 as Number
union all select 7 as Number
union all select 8 as Number
union all select 9 as Number)
, cteNumbers as (
select U.Number + T.Number*10 + H.Number*100 as Number
from cteDigits U
cross join cteDigits T
cross join cteDigits H)
, cteChars as (
select CHAR(Number) as Char
from cteNumbers
where Number between 32 and 128)
select cteChars.Char as [*]
from cteChars
cross apply (
select top(1) *
from sys.objects
where CHARINDEX(cteChars.Char, name, 0) > 0) as o
for xml path('');
If you have a Numbers or Tally table which contains a sequential list of integers you can do something like:
Select Distinct '' + Substring(Products.ProductName, N.Value, 1)
From dbo.Numbers As N
Cross Join dbo.Products
Where N.Value <= Len(Products.ProductName)
For Xml Path('')
If you are using SQL Server 2005 and beyond, you can generate your Numbers table on the fly using a CTE:
With Numbers As
(
Select Row_Number() Over ( Order By c1.object_id ) As Value
From sys.columns As c1
Cross Join sys.columns As c2
)
Select Distinct '' + Substring(Products.ProductName, N.Value, 1)
From Numbers As N
Cross Join dbo.Products
Where N.Value <= Len(Products.ProductName)
For Xml Path('')
Building on mdma's answer, this version gives you a single string, but decodes some of the changes that FOR XML will make, like & -> &.
WITH ProductChars(aChar, remain) AS (
SELECT LEFT(productName,1), RIGHT(productName, LEN(productName)-1)
FROM Products WHERE LEN(productName)>0
UNION ALL
SELECT LEFT(remain,1), RIGHT(remain, LEN(remain)-1) FROM ProductChars
WHERE LEN(remain)>0
)
SELECT STUFF((
SELECT N'' + aChar AS [text()]
FROM (SELECT DISTINCT aChar FROM Chars) base
ORDER BY aChar
FOR XML PATH, TYPE).value(N'.[1]', N'nvarchar(max)'),1, 1, N'')
-- Allow for a lot of recursion. Set to 0 for infinite recursion
OPTION (MAXRECURSION 365)