Pass Table Values into Variables - sql

I have a table with Names and some values. I want to pass the values associated to the names into another table to prepopulate a list of values. I'm not sure the best way to approach this either by creating a function or procedure.
Below is the query I have now where I have to SET the variables manually. But rather doing it manually I would like to pass the values from the other table into this. How would I do this?
Example I have a table called ABC and in that table 3 values
Name|AVG |DEV
A |1.89|.74
B |2.43|1.20
C |.74 |.12
I want to pass the all the values from that table in the query below. The AVG value in the table into the #AVG variable in the below query and the DEV value in the table into the #deviation table below.
The results can be put into a new table or in a query.
DECLARE #avg DECIMAL(4,1) = 1.89 --this would be row 1 (value A) avg
DECLARE #deviation DECIMAL(4,1) = 0.74 --this would be row 1 (value A) DEV
DECLARE #startnum DECIMAL(4,1)= #Avg - (#deviation * 3)
DECLARE #endnum DECIMAL(4,1)= #Avg + (#deviation * 3)
;
WITH gen AS (
SELECT CAST(#startnum AS decimal(4,1)) AS Mulitiple
UNION ALL
SELECT CAST(Mulitiple+.1 AS decimal(4,1)) FROM gen WHERE Mulitiple+.1<=#endnum
)
SELECT A.Mulitiple , CAST(((A.Mulitiple-#avg)/#deviation) AS DECImAL(4,2)) AS ZScore
,Z.Y AS Area
FROM gen AS A
LEFT JOIN STAT..ZScore AS Z ON CAST(((A.Mulitiple-#avg)/#deviation) AS DECImAL(4,2)) = CAST(Z.Zscore AS decimal(4,2))
--WHERE Z.Y IS NOT NULL
option (maxrecursion 10000)

This approach uses a tally function named dbo.fnNumbers to generate the range of zscores.
dbo.fnNumbers
create function [dbo].[fnNumbers](
#zero_or_one bit,
#n bigint)
returns table with schemabinding as return
with n(n) as (select null from (values (1),(2),(3),(4)) n(n))
select 0 n where #zero_or_one = 0
union all
select top(#n) row_number() over(order by (select null)) n
from n na, n nb, n nc, n nd, n ne, n nf, n ng, n nh,
n ni, n nj, n nk, n nl, n nm, n np, n nq, n nr;
Query
drop table if exists #abc;
go
create table #abc(
[name] varchar(2),
[avg] decimal(4,1),
[dev] decimal(4,1));
insert #abc([name], [avg], [dev]) values
('a', 1.89, .74),
('b', 2.43, 1.20),
('c', 0.74, .12);
select a.[name], gen.multiple, gen_z.zscore, z.y as area
from #abc a
cross apply dbo.fnNumbers(1, cast(2*(a.dev*3)+0.1 as decimal(4,1))*10) fn
cross apply (values (cast((a.[avg]-(a.dev*3)+(fn.N-1)*0.1) as decimal(4,1)))) gen(multiple)
cross apply (values (cast(((gen.multiple-a.[avg])/a.dev) AS decimal(4,2)))) gen_z(zscore)
left join stat..zscore z on gen_z.zscore=cast(z.zscore as decimal(4,2))
order by a.[name], gen.multiple;

If I understand correctly, you can just use JOIN. In this case, you can introduce the table using CROSS JOIN:
SELECT ABC.*, A.Mulitiple ,
CAST(((A.Mulitiple - ABC.avg) / ABC.dev) AS DECImAL(4, 2)) AS ZScore
,Z.Y AS Area
FROM ABC CROSS JOIN
gen a LEFT JOIN
STAT..ZScore Z
ON CAST(((A.Mulitiple - abc.avg)/ abc.dev) AS DECIMAL(4,2)) = CAST(Z.Zscore AS decimal(4,2))
--WHERE Z.Y IS NOT NULL
You can avoid having the formula twice by using:
SELECT ABC.*, A.Mulitiple, v.ZScore, Z.Y AS Area
FROM ABC CROSS JOIN
gen a CROSS APPLY
(VALUES (CAST((A.Mulitiple - ABC.avg) / ABC.dev AS DECImAL(4, 2))
) v(Zscore) LEFT JOIN
STAT..ZScore Z
ON v.ZScore = CAST(Z.Zscore AS decimal(4,2))
--WHERE Z.Y IS NOT NULL

Related

Find missed max and min value in a sequence of numbers

For example, I have a sequence of numbers: {1, 2, 5, 7}.
I need to find the smallest and the biggest one, which are missed in this sequence (min=3 and max=6 for this example). Values can also be negative.
Here is my solution, but it doesn't pass on extra checking database (Wrong number of records (less by 1)), so I can't say what is exactly wrong. I also tried versions with LEFT OUTER JOIN and EXCEPT predicates - same problem. Please, help me to improve my solution.
WITH AA AS (SELECT MAX(Q_ID) MX
FROM UTQ),
BB AS (SELECT MIN(Q_ID) CODE
FROM UTQ
UNION ALL
SELECT CODE + 1
FROM BB
WHERE CODE < (SELECT MX
FROM AA)
)
SELECT MIN(CODE) MIN_RES, MAX(CODE) MAX_RES
FROM BB
WHERE CODE NOT IN (SELECT Q_ID
FROM UTQ)
One method is not exists:
select min(q_id + 1)
from utq
where not exists (select 1 from utq utq2 where utq2.q_id = utq.id + 1)
union all
select max(q_id - 1)
from utq
where not exists (select 1 from utq utq2 where utq2.q_id = utq.id - 1);
You can also use lead() and lag():
select min(case when next_q_id <> q_id + 1 then q_id + 1 end),
max(case when prev_q_id <> q_id - 1 then q_id - 1 end)
from (select utq.*,
lag(q_id) over (order by q_id) as prev_q_id,
lead(q_id) over (order by q_id) as next_q_id
from utq
) utq;
A tally based method seems like a good approach here. Especially if the sequences are large.
The first CTE summarizes the maximum and minimum q_id's in the test table. The second CTE selects the missing integers by generating the complete sequence (using the fnNumbers tvf) between the minimum and maximum q_id values and comparing WHERE NOT EXISTS to the original sequence. Something like this.
numbers function
create function [dbo].[fnNumbers](
#zero_or_one bit,
#n bigint)
returns table with schemabinding as return
with n(n) as (select null from (values (1),(2),(3),(4)) n(n))
select 0 n where #zero_or_one = 0
union all
select top(#n) row_number() over(order by (select null)) n
from n na, n nb, n nc, n nd, n ne, n nf, n ng, n nh,
n ni, n nj, n nk, n nl, n nm, n np, n nq, n nr;
data and query
drop table if exists #seq;
go
create table #seq(
q_id int unique not null);
insert #seq values (1),(2),(5),(7);
with
max_min_cte(max_q, min_q) as (
select max(q_id), min(q_id)
from #seq),
missing_cte(q_id) as (
select mm.min_q+fn.n
from max_min_cte mm
cross apply dbo.fnNumbers(0, mm.max_q-mm.min_q) fn
where not exists (select 1
from #seq s
where (mm.min_q+fn.n)=s.q_id))
select max(q_id) max_missing, min(q_id) min_missing
from missing_cte;
output
max_missing min_missing
6 3
You can try like following using LEAD
SELECT MIN(Q_ID + 1) AS MinValue
,MAX(Q_ID + 1) AS MaxValue
FROM (
SELECT *,LEAD(Q_ID) OVER (ORDER BY Q_ID) NQ_ID
FROM (VALUES (1),(2),(5),(7)) v(Q_ID)
) t
WHERE NQ_ID - Q_ID <> 1

How to Read Data Number by Number

I have a field that contains numbers such as the examples below in #Numbers. Each number within each row in #Numbers relates
to many different values that are contained within the #Area table.
I need to make a relationship from #Numbers to #Area using each number within each row.
CREATE TABLE #Numbers
(
Number int
)
INSERT INTO #Numbers
(
Number
)
SELECT 102 UNION
SELECT 1 UNION
SELECT 2 UNION
select * from #Numbers
CREATE TABLE #Area
(
Number int,
Area varchar(50)
)
INSERT INTO #Area
(
Number,
Area
)
SELECT 0,'Area1' UNION
SELECT 1,'Area2' UNION
SELECT 1,'Area3' UNION
SELECT 1,'Area5' UNION
SELECT 1,'Area8' UNION
SELECT 1,'Area9' UNION
SELECT 2,'Area12' UNION
SELECT 2,'Area43' UNION
SELECT 2,'Area25' UNION
select * from #Area
It would return the following for 102:
102,Area2
102,Area3
102,Area5
102,Area8
102,Area9
102,Area1
102,Area12
102,Area43
102,Area25
For 1 it would return:
1,Area2
1,Area3
1,Area5
1,Area8
1,Area9
For 2 it would return:
2,Area12
2,Area43
2,Area25
Note how the numbers match up to the individual Areas and return the values accordingly.
Well, the OP marked an answer already, which even got votes. Maybe he will not read this, but here is another option using direct simple select, which (according to the EP) seems like using a lot less resources:
SELECT *
FROM #Numbers t1
LEFT JOIN #Area t2 ON CONVERT(VARCHAR(10), t1.Number) like '%' + CONVERT(CHAR(1), t2.Number) + '%'
GO
Note! According to Execution Plan this solution uses only 27% while the selected answer (written by Squirrel) uses 73%, but Execution Plan can be misleading sometimes and you should check IO and TIME statistics as well using the real table structure and real data.
looks like you need to extract individual digit from #Number and then used it to join to #Area
; with tally as
(
select n = 1
union all
select n = n + 1
from tally
where n < 10
)
select n.Number, a.Area
from #Numbers n
cross apply
(
-- here it convert n.Number to string
-- then extract 1 digit
-- and finally convert back to integer
select num = convert(int,
substring(convert(varchar(10), n.Number),
t.n,
1)
)
from tally t
where t.n <= len(convert(varchar(10), n.Number))
) d
inner join #Area a on d.num = a.Number
order by n.Number
or if you prefer to do it in arithmetic and not string
; with Num as
(
select Number, n = 0, Num = Number / power(10, 0) % 10
from #Numbers
union all
select Number, n = n + 1, Num = Number / power(10, n + 1) % 10
from Num
where Number > power(10, n + 1)
)
select n.Number, a.Area
from Num n
inner join #Area a on n.Num = a.Number
order by n.Number
Here is my idea. In theory, it should work.
Have a table (temp or permanent) with the values and it's translation
I.E.
ID value
1 Area1, Area2, Area7, Area8, Area15
2 Area28, Area35
etc
Take each row and put a some special character between each number. Use a function like string_split with that character to turn it into a column of values.
e.g 0123 will then be something like 0|1|2|3 and when you run that through string_split you would get
0
1
2
3
Now join each value to your lookup table and return the Value.
Now you have a row with all the values that you want. Use another function like STUFF FOR XML and put those values back into a single column.
This doesn't sound very efficient.. but this is one way of achieving what you desire..
Another is to do a replace().. but that would be very messy!
Create a third table called n which contains a single column also called n that contains integers from 1 to the maximum number of digits in your number. Make it 1000 if you like, doesn't matter. Then:
select #numbers.number, substring(convert(varchar,#numbers.number),n,1) as chr, Area
from #numbers
join n on n>0 and n <=len(convert(varchar,number))
join #area on #area.number=substring(convert(varchar,#numbers.number),n,1)
The middle column chr is just there to show you what it's doing, and would be removed from the final result.

How to calculate the total time while excluding the overlapped time in Microsoft SQL?

I have a problem with tackling the Gaps and Islands type in this situation.
I want to calculate the total downtime in Microsoft SQL. Is there any I can produce the following output? Thank you!
The actual downtime = Total downtime - overlapped time
In this case:
Machine A: 14 hours
Machine B: 5 hours but 4 hours overlapped
Machine C: 1 hour
Machine D: 2 hours
Machine E: 1 hour overlapped
Machine F: 2 hours but 1 hour overlapped
In total, it is 19 hours as the actual downtime'
My table is a query. Please let me know how I can put the query in. Thank you!
Here's a solution based on techniques from Itzik Ben-Gan (noted in the source below). The solution uses the DENSE_RANK function. The code is complete - it can be copied into an SSMS query window and executed.
USE tempdb
GO
IF OBJECT_ID('dbo.GetNums', 'IF') IS NOT NULL
DROP FUNCTION dbo.GetNums;
GO
/* dbo.GetNums function is from Itzik Ben-Gan's article on packing intervals:
(http://blogs.solidq.com/en/sqlserver/packing-intervals/). */
CREATE FUNCTION dbo.GetNums(#n AS BIGINT)
RETURNS TABLE
AS
RETURN
WITH
L0 AS (SELECT 1 AS c UNION ALL SELECT 1),
L1 AS (SELECT 1 AS c FROM L0 AS A CROSS JOIN L0 AS B),
L2 AS (SELECT 1 AS c FROM L1 AS A CROSS JOIN L1 AS B),
L3 AS (SELECT 1 AS c FROM L2 AS A CROSS JOIN L2 AS B),
L4 AS (SELECT 1 AS c FROM L3 AS A CROSS JOIN L3 AS B),
L5 AS (SELECT 1 AS c FROM L4 AS A CROSS JOIN L4 AS B),
Nums AS (SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS n FROM L5)
SELECT TOP (#n) n FROM Nums ORDER BY n;
GO
IF OBJECT_ID('dbo.Production', 'U') IS NOT NULL
DROP TABLE dbo.Production;
GO
CREATE TABLE dbo.Production
(
production_line INT NOT NULL,
machine CHAR(1) NOT NULL,
[date] DATE NOT NULL,
time_started TIME NOT NULL,
time_completed TIME NOT NULL,
CONSTRAINT PK_Production PRIMARY KEY(production_line, machine)
);
INSERT INTO dbo.Production
(production_line, machine, [date], time_started, time_completed)
VALUES
(1, 'A', '2018-01-16', '00:00:00', '14:00:00'),
(1, 'B', '2018-01-16', '10:00:00', '15:00:00'),
(1, 'C', '2018-01-16', '17:00:00', '18:00:00'),
(1, 'D', '2018-01-16', '21:00:00', '23:00:00'),
(1, 'E', '2018-01-16', '21:30:00', '22:30:00'),
(1, 'F', '2018-01-16', '17:00:00', '19:00:00');
/* Algorithm adapted from "Microsoft SQL Server 2012
High-Performance T-SQL Using Window Functions" by
Itzik Ben-Gan (p. 198). */
DECLARE #production_date AS DATE = '2018-01-16';
DECLARE #from AS TIME = '00:00:00';
DECLARE #to AS TIME = '23:59:59';
WITH Hours AS
(
SELECT
DATEADD(hour, (nums.n - 1), #from) AS hr
FROM
dbo.GetNums(24 /* Hours in a day. */) AS nums
),
Groups AS
(
SELECT
H.hr,
DATEADD(hour, -1 * DENSE_RANK() OVER (ORDER BY H.hr), H.hr) AS grp
FROM
dbo.Production AS P
INNER JOIN Hours AS H ON H.hr BETWEEN P.time_started AND P.time_completed
WHERE
p.[date] = #production_date
),
Ranges AS
(
SELECT
MIN(hr) AS range_start,
MAX(hr) AS range_end
FROM
Groups
GROUP BY
grp
)
SELECT
SUM(DATEDIFF(hour, range_start, range_end)) AS hours_of_downtime
FROM
Ranges
DROP FUNCTION dbo.GetNums;
DROP TABLE dbo.Production;
EDIT: In response to OP's question about if their data comes from a query. This modified example removes the temporary dbo.Production table, and adds a Production Common Table Expression.
USE tempdb
GO
IF OBJECT_ID('dbo.GetNums', 'IF') IS NOT NULL
DROP FUNCTION dbo.GetNums;
GO
/* dbo.GetNums function is from Itzik Ben-Gan's article on packing intervals:
(http://blogs.solidq.com/en/sqlserver/packing-intervals/). */
CREATE FUNCTION dbo.GetNums(#n AS BIGINT)
RETURNS TABLE
AS
RETURN
WITH
L0 AS (SELECT 1 AS c UNION ALL SELECT 1),
L1 AS (SELECT 1 AS c FROM L0 AS A CROSS JOIN L0 AS B),
L2 AS (SELECT 1 AS c FROM L1 AS A CROSS JOIN L1 AS B),
L3 AS (SELECT 1 AS c FROM L2 AS A CROSS JOIN L2 AS B),
L4 AS (SELECT 1 AS c FROM L3 AS A CROSS JOIN L3 AS B),
L5 AS (SELECT 1 AS c FROM L4 AS A CROSS JOIN L4 AS B),
Nums AS (SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS n FROM L5)
SELECT TOP (#n) n FROM Nums ORDER BY n;
GO
/* Algorithm adapted from "Microsoft SQL Server 2012
High-Performance T-SQL Using Window Functions" by
Itzik Ben-Gan (p. 198). */
DECLARE #production_date AS DATE = '2018-01-16';
DECLARE #from AS TIME = '00:00:00';
DECLARE #to AS TIME = '23:59:59';
WITH Hours AS
(
SELECT
DATEADD(hour, (nums.n - 1), #from) AS hr
FROM
dbo.GetNums(24 /* Hours in a day. */) AS nums
),
Production AS
(
SELECT
production_line,
machine,
[date],
time_started,
time_completed
FROM
production_table
WHERE
[date] = #production_date
),
Groups AS
(
SELECT
H.hr,
DATEADD(hour, -1 * DENSE_RANK() OVER (ORDER BY H.hr), H.hr) AS grp
FROM
Production AS P
INNER JOIN Hours AS H ON H.hr BETWEEN P.time_started AND P.time_completed
),
Ranges AS
(
SELECT
MIN(hr) AS range_start,
MAX(hr) AS range_end
FROM
Groups
GROUP BY
grp
)
SELECT
SUM(DATEDIFF(hour, range_start, range_end)) AS hours_of_downtime
FROM
Ranges
DROP FUNCTION dbo.GetNums;
This is pretty ugly, but here is what I did:
Combine all records
Check for overlaps
Get min start and max stop for overlaps
Remove overlaps from the original set
sum the start/stop deltas for overlaps and non-overlaps
I created a table using your sample data above and got your answer: 19.
And the code is:
WITH aset
AS (
SELECT [Machine]
, [Date]
, [TimeStarted]
, TimeCompleted
FROM [CEA_DBA].[dbo].[LineInteruptions]
WHERE date = '2018-01-16'),
overlaps
AS (
SELECT a.machine
AS a_machine
, b.machine
AS b_machine
, CASE
WHEN a.TimeStarted <= b.TimeStarted
THEN a.TimeStarted
ELSE b.TimeStarted
END
AS timeStarted
, CASE
WHEN a.TimeCompleted >= b.TimeCompleted
THEN a.TimeCompleted
ELSE b.TimeCompleted
END
AS timeCompleted
FROM aset
AS a
CROSS JOIN aset
AS b
WHERE b.TimeStarted <= a.timeCompleted
AND b.timecompleted >= a.timecompleted
AND a.Machine <> b.Machine),
nonoverlaps
AS (
SELECT aset.timeStarted
, aset.timeCompleted
FROM aset
LEFT OUTER JOIN overlaps
AS oa ON aset.Machine = oa.a_machine
LEFT OUTER JOIN overlaps
AS ob ON aset.Machine = ob.b_machine
WHERE oa.a_machine IS NULL
AND ob.b_machine IS NULL),
gset
AS (
SELECT TimeStarted
, TimeCompleted
FROM overlaps
UNION ALL
SELECT timestarted
, timecompleted
FROM nonoverlaps)
SELECT SUM(DATEDIFF(hour, TimeStarted, timeCompleted))
AS downtime
FROM gset;

Generate sequence in SQL Server, poor performance with cross apply

I obtained the following code from the web many years ago and it has served my very well. It is simply a function that generates a sequence of numbers from 1 to whatever you pass in.
Basically it's a way of doing a for loop in a SQL statement.
CREATE FUNCTION [SequenceCreate]
(#MaxValue INT)
RETURNS TABLE
AS
RETURN
WITH
Num1 (n) AS (SELECT 1 UNION ALL SELECT 1),
Num2 (n) AS (SELECT 1 FROM Num1 AS X, Num1 AS Y),
Num3 (n) AS (SELECT 1 FROM Num2 AS X, Num2 AS Y),
Num4 (n) AS (SELECT 1 FROM Num3 AS X, Num3 AS Y),
Num5 (n) AS (SELECT 1 FROM Num4 AS X, Num4 AS Y),
Num6 (n) AS (SELECT 1 FROM Num5 AS X, Num5 AS Y),
Nums (n) AS
(SELECT ROW_NUMBER() OVER(ORDER BY n)
FROM Num6)
SELECT n AS [Value] FROM Nums
WHERE n BETWEEN 1 AND #MaxValue;
This generally works very well and is fast but I have found its performance to be very poor when using a cross apply statement, eg
DECLARE #T TABLE(StartNum INT, ItemCount INT)
INSERT INTO #T VALUES (100, 5)
INSERT INTO #T VALUES (110, 7)
INSERT INTO #T VALUES (55, 3)
SELECT Seq.Value + StartNum FROM #T
CROSS APPLY he.SequenceCreate(ItemCount) AS Seq
This is very slow on my machine. Does anyone know why it works fine when executed once but runs very badly when executed 3 times via cross apply? Even if the #T table contains only 1 row the performance is still terrible. Is there a better way to write this?
Thanks in advance,
Michael
The query optimizer figures it is better to only execute the function once and then use the result in a join to get the rows you want. It is done like that because your function is an inline table valued function. If you instead make your function a multi-statement valued function it will execute the function once for each row in your source table. However I would recommend that you create a numbers table instead as suggested by Igor.
CREATE FUNCTION [SequenceCreate]
(#MaxValue INT)
RETURNS #T TABLE ([Value] INT NOT NULL PRIMARY KEY)
AS
BEGIN
WITH
Num1 (n) AS (SELECT 1 UNION ALL SELECT 1),
Num2 (n) AS (SELECT 1 FROM Num1 AS X, Num1 AS Y),
Num3 (n) AS (SELECT 1 FROM Num2 AS X, Num2 AS Y),
Num4 (n) AS (SELECT 1 FROM Num3 AS X, Num3 AS Y),
Num5 (n) AS (SELECT 1 FROM Num4 AS X, Num4 AS Y),
Num6 (n) AS (SELECT 1 FROM Num5 AS X, Num5 AS Y),
Nums (n) AS
(SELECT ROW_NUMBER() OVER(ORDER BY n)
FROM Num6)
INSERT INTO #T
SELECT n AS [Value] FROM Nums
WHERE n BETWEEN 1 AND #MaxValue;
RETURN
END
If you look at estimated execution plans of both of your queries you will see a lot of Constant Scans wich outputs are joined by Nested Loops.
In case of
select * from dbo.SequenceCreate (100)
Estimated number of rows for every Constant Scan is 1
In case of
SELECT N.N + StartNum
FROM #T t
LEFT JOIN Numbers AS N ON N.N <= T.ItemCount
Estimated number of rows for every Constant Scan is 2. So this is good example of geometric progression. The last Neste Loops returns 4294970000 rows - 36 GB.
I cannot say why optimizer choose this plan but it choose it.
You can use following approach instead.
First, create table with consecutiv numbers
CREATE TABLE Numbers(N INT PRIMARY KEY NOT NULL IDENTITY(1,1));
GO
INSERT INTO Numbers DEFAULT VALUES;
GO 1000 -- it takes about 2 minutes for 1000 but you need to execut it just once
Use following script:
DECLARE #T TABLE(StartNum INT, ItemCount INT)
INSERT INTO #T VALUES (100, 5)
INSERT INTO #T VALUES (110, 7)
INSERT INTO #T VALUES (55, 3)
SELECT N.N + StartNum
FROM #T t
LEFT JOIN Numbers AS N ON N.N <= T.ItemCount
Your single biggest issue is the following line of code...
WHERE n BETWEEN 1 AND #MaxValue;
Itzik Ben-Gan point out in his second post when he first came up with the idea of the very effective cascading CTE's (cCTEs for short)that SQL server sometime does things a little crazy and can generate ALL of the numbers that the cCTEs are capable of generating before the WHERE clause takes effect.
To the best of my knowledge, the following is a copy of Itzik's latest code.
----------------------------------------------------------------------
-- © Itzik Ben-Gan
-- For more, see 5-day Advanced T-SQL Course:
-- http://tsql.Lucient.com/t-sql-courses/
----------------------------------------------------------------------
IF OBJECT_ID(N'dbo.GetNums', N'IF') IS NOT NULL DROP FUNCTION dbo.GetNums;
GO
CREATE FUNCTION dbo.GetNums(#low AS BIGINT, #high AS BIGINT) RETURNS TABLE
AS
RETURN
WITH
L0 AS (SELECT c FROM (SELECT 1 UNION ALL SELECT 1) AS D(c)),
L1 AS (SELECT 1 AS c FROM L0 AS A CROSS JOIN L0 AS B),
L2 AS (SELECT 1 AS c FROM L1 AS A CROSS JOIN L1 AS B),
L3 AS (SELECT 1 AS c FROM L2 AS A CROSS JOIN L2 AS B),
L4 AS (SELECT 1 AS c FROM L3 AS A CROSS JOIN L3 AS B),
L5 AS (SELECT 1 AS c FROM L4 AS A CROSS JOIN L4 AS B),
Nums AS (SELECT ROW_NUMBER() OVER(ORDER BY (SELECT NULL)) AS rownum
FROM L5)
SELECT TOP(#high - #low + 1) #low + rownum - 1 AS n
FROM Nums
ORDER BY rownum;
GO
The source link for that code is as follows:
http://tsql.lucient.com/SourceCodes/GetNums.txt
And here's the article that I was speaking of...
https://www.itprotoday.com/open-source-sql/fangraphs-tags-cloud-database-keep-big-show

SQL: how to get all the distinct characters in a column, across all rows

Is there an elegant way in SQL Server to find all the distinct characters in a single varchar(50) column, across all rows?
Bonus points if it can be done without cursors :)
For example, say my data contains 3 rows:
productname
-----------
product1
widget2
nicknack3
The distinct inventory of characters would be "productwigenka123"
Here's a query that returns each character as a separate row, along with the number of occurrences. Assuming your table is called 'Products'
WITH ProductChars(aChar, remain) AS (
SELECT LEFT(productName,1), RIGHT(productName, LEN(productName)-1)
FROM Products WHERE LEN(productName)>0
UNION ALL
SELECT LEFT(remain,1), RIGHT(remain, LEN(remain)-1) FROM ProductChars
WHERE LEN(remain)>0
)
SELECT aChar, COUNT(*) FROM ProductChars
GROUP BY aChar
To combine them all to a single row, (as stated in the question), change the final SELECT to
SELECT aChar AS [text()] FROM
(SELECT DISTINCT aChar FROM ProductChars) base
FOR XML PATH('')
The above uses a nice hack I found here, which emulates the GROUP_CONCAT from MySQL.
The first level of recursion is unrolled so that the query doesn't return empty strings in the output.
Use this (shall work on any CTE-capable RDBMS):
select x.v into prod from (values('product1'),('widget2'),('nicknack3')) as x(v);
Test Query:
with a as
(
select v, '' as x, 0 as n from prod
union all
select v, substring(v,n+1,1) as x, n+1 as n from a where n < len(v)
)
select v, x, n from a -- where n > 0
order by v, n
option (maxrecursion 0)
Final Query:
with a as
(
select v, '' as x, 0 as n from prod
union all
select v, substring(v,n+1,1) as x, n+1 as n from a where n < len(v)
)
select distinct x from a where n > 0
order by x
option (maxrecursion 0)
Oracle version:
with a(v,x,n) as
(
select v, '' as x, 0 as n from prod
union all
select v, substr(v,n+1,1) as x, n+1 as n from a where n < length(v)
)
select distinct x from a where n > 0
Given that your column is varchar, it means it can only store characters from codes 0 to 255, on whatever code page you have. If you only use the 32-128 ASCII code range, then you can simply see if you have any of the characters 32-128, one by one. The following query does that, looking in sys.objects.name:
with cteDigits as (
select 0 as Number
union all select 1 as Number
union all select 2 as Number
union all select 3 as Number
union all select 4 as Number
union all select 5 as Number
union all select 6 as Number
union all select 7 as Number
union all select 8 as Number
union all select 9 as Number)
, cteNumbers as (
select U.Number + T.Number*10 + H.Number*100 as Number
from cteDigits U
cross join cteDigits T
cross join cteDigits H)
, cteChars as (
select CHAR(Number) as Char
from cteNumbers
where Number between 32 and 128)
select cteChars.Char as [*]
from cteChars
cross apply (
select top(1) *
from sys.objects
where CHARINDEX(cteChars.Char, name, 0) > 0) as o
for xml path('');
If you have a Numbers or Tally table which contains a sequential list of integers you can do something like:
Select Distinct '' + Substring(Products.ProductName, N.Value, 1)
From dbo.Numbers As N
Cross Join dbo.Products
Where N.Value <= Len(Products.ProductName)
For Xml Path('')
If you are using SQL Server 2005 and beyond, you can generate your Numbers table on the fly using a CTE:
With Numbers As
(
Select Row_Number() Over ( Order By c1.object_id ) As Value
From sys.columns As c1
Cross Join sys.columns As c2
)
Select Distinct '' + Substring(Products.ProductName, N.Value, 1)
From Numbers As N
Cross Join dbo.Products
Where N.Value <= Len(Products.ProductName)
For Xml Path('')
Building on mdma's answer, this version gives you a single string, but decodes some of the changes that FOR XML will make, like & -> &.
WITH ProductChars(aChar, remain) AS (
SELECT LEFT(productName,1), RIGHT(productName, LEN(productName)-1)
FROM Products WHERE LEN(productName)>0
UNION ALL
SELECT LEFT(remain,1), RIGHT(remain, LEN(remain)-1) FROM ProductChars
WHERE LEN(remain)>0
)
SELECT STUFF((
SELECT N'' + aChar AS [text()]
FROM (SELECT DISTINCT aChar FROM Chars) base
ORDER BY aChar
FOR XML PATH, TYPE).value(N'.[1]', N'nvarchar(max)'),1, 1, N'')
-- Allow for a lot of recursion. Set to 0 for infinite recursion
OPTION (MAXRECURSION 365)