Split a coma separated string in a particular format - sql

I have a table with a column which represent hierarchy path, so when i execute the SQL query
select hierachypath from mytable where id=10
for a particular row i will get the result like this
hieracheypath
--------------
1,2,3,4,5,6,7,8,9,10
select hierachypath from mytable where id=10
I want to get a result like
1,2,3,4,5,6,7,8,9,10
1,1,2,3,4,5,6,7,8,9
1,2,3,4,5,6,7,8
1,2,3,4,5,6,7
1,2,3,4,5,6
1,2,3,4,5
1,2,3,4
1,2,3
1,2
1
OR
1
1,2
1,2,3
1,2,3,4
1,2,3,4,5
1,2,3,4,5,6
1,2,3,4,5,6,7
1,2,3,4,5,6,7,8
1,2,3,4,5,6,7,8,9
1,2,3,4,5,6,7,8,9,10
I had try this way
Declare #heiracheypath nvarchar(4000) ='1,2,3,4,5,6,7,8,9,10'
declare #Result TABLE (Column1 VARCHAR(100))
Declare #tcount int
SELECT #tcount=(len(#heiracheypath) - LEN(REPLACE(#heiracheypath,',','')) + 1)
DECLARE #IntLocation INT
WHILE (CHARINDEX(',', #heiracheypath, 0) > 0)
BEGIN
SET #IntLocation = CHARINDEX(',', #heiracheypath, 0)
INSERT INTO #Result (Column1)
--LTRIM and RTRIM to ensure blank spaces are removed
SELECT RTRIM(LTRIM(SUBSTRING(#heiracheypath, 0, #IntLocation)))
SET #heiracheypath = STUFF(#heiracheypath, 1, #IntLocation, '')
END
INSERT INTO #Result (Column1)
SELECT RTRIM(LTRIM(#heiracheypath))--LTRIM and RTRIM to ensure blank spaces are removed
select * from #Result
but the result was
Column1
-------
1
2
3
4
5
6
7
8
9
10

The code in the question Looks like T-SQL - so here's a simple solution without common table expressions:
DECLARE #heiracheypath nvarchar(4000) ='1,2,3,4,5,6,7,8,9,10';
SELECT SUBSTRING(#heiracheypath, 1, ci-1) As Paths
FROM
(
SELECT CHARINDEX(',',#heiracheypath, N) As ci
FROM
(
SELECT TOP(LEN(#heiracheypath)) ROW_NUMBER() OVER(ORDER BY ##SPID) As N
FROM sys.objects A
) AS Tally
UNION
SELECT LEN(#heiracheypath) + 1
) As CommaIndexes
WHERE ci > 0
ORDER BY ci
The Tally derived table contains numbers from 1 to the length of the value,
the CommaIndexes table contains the distinct indexes of each comma in the value,
the union part is to also return the full string,
and the outer most select statement simply use substring to return the relevant parts of the string.
This could be simplified further by combining the tally derived table with the commaIndexs derived table:
SELECT SUBSTRING(#heiracheypath, 1, ci-1) As Paths
FROM
(
SELECT TOP(LEN(#heiracheypath)) CHARINDEX(',',#heiracheypath, ROW_NUMBER() OVER(ORDER BY ##SPID)) As ci
FROM sys.objects A
UNION SELECT LEN(#heiracheypath) + 1
) As CommaIndexes
WHERE ci > 0
ORDER BY ci
Result:
Paths
1
1,2
1,2,3
1,2,3,4
1,2,3,4,5
1,2,3,4,5,6
1,2,3,4,5,6,7
1,2,3,4,5,6,7,8
1,2,3,4,5,6,7,8,9
1,2,3,4,5,6,7,8,9,10

Related

SQL get average of a list in sql select

We have this column in the table named "pricehistory"
1634913730;48.38,1634916509;48.38,1635162352;37.96,1635177904;49.14,1635337722;1219.98,1635340811;27.17
that is an example data.
first is the timestamp than after ; is the price at this timestamp
But i want the average price from every timestamp in a select... is that possible?
I dont find any similiar examples somewhere and my tries to select doesnt work... i am not so good with sql
so i want average of all prices behind that ; and before ,
The , split the timestamp and prices
Some test data :
create table test ( id int not null, pricehistory text not null );
insert into test values ( 1, '1634913730;48.38,1634916509;48.38,1635162352;37.96,1635177904;49.14,1635337722;1219.98,1635340811;27.17' );
insert into test values ( 2, '1634913731;42.42,1634916609;21.21' );
If your RDBMS has some splitting function
Then it's quite easy, just split and use AVG. Here is an example using PostgreSQL :
SELECT id, AVG(SUBSTRING(v, 12, 42)::decimal) AS average
FROM test
INNER JOIN LATERAL regexp_split_to_table(pricehistory, E',') t(v) ON TRUE
GROUP BY id;
Then you get:
id | average
----+----------------------
2 | 31.8150000000000000
1 | 238.5016666666666667
(2 rows)
Otherwise
You can use a CTE to split the values manually. This is a bit more involved. Here is an example using PostgreSQL again :
WITH RECURSIVE T AS (
SELECT id,
-- We get the last value ...
SUBSTRING(pricehistory, LENGTH(pricehistory) - STRPOS(REVERSE(pricehistory), ',') + 2) AS oneprice,
pricehistory AS remaining
FROM test
UNION ALL
-- ... as we get the other values from the recursive CTE.
SELECT id,
LEFT(remaining, STRPOS(remaining, ',') - 1),
SUBSTRING(remaining, STRPOS(remaining, ',') + 1)
FROM T
WHERE STRPOS(remaining, ',') > 0
)
SELECT id, AVG(SUBSTRING(oneprice, 12)::decimal) AS average
FROM T
GROUP BY id;
Then you get:
id | average
----+----------------------
2 | 31.8150000000000000
1 | 238.5016666666666667
(2 rows)
MySql >= 8.0
I used Recursive Common Table Expressions (cte) to split pricehistory string by ','. Then I split price from timestamp by ';', cast price as decimal(10,2) and group by id to get average price by id.
WITH RECURSIVE
cte AS (SELECT id,
SUBSTRING_INDEX(pricehistory, ',', 1) AS price,
CASE WHEN POSITION(',' IN pricehistory) > 0
THEN SUBSTR(pricehistory, POSITION(',' IN pricehistory) + 1)
ELSE NULL END AS rest
FROM t
UNION ALL
SELECT id,
SUBSTRING_INDEX(rest, ',', 1) AS price,
CASE WHEN POSITION(',' IN rest) > 0
THEN SUBSTR(rest, POSITION(',' IN rest) + 1)
ELSE NULL END AS rest
FROM cte
WHERE rest IS NOT NULL)
SELECT id, AVG(CAST(SUBSTR(price, POSITION(';' IN price) + 1) AS decimal(10,2))) AS price_average
FROM cte
GROUP BY id;
A similar way to do the same (using regular expressions functions):
WITH RECURSIVE
cte AS (SELECT Id, concat(pricehistory, ',') AS pricehistory FROM t),
unnest AS (SELECT id,
pricehistory,
1 AS i,
REGEXP_SUBSTR(pricehistory, ';[0-9.]*,', 1, 1) AS price
FROM cte
UNION ALL
SELECT id,
pricehistory,
i + 1,
REGEXP_SUBSTR(pricehistory, ';[0-9.]*,', 1, i + 1)
FROM unnest
WHERE REGEXP_SUBSTR(pricehistory, ';[0-9.]*,', 1, i + 1) IS NOT NULL)
SELECT id, AVG(CAST(SUBSTR(price, 2, LENGTH(price) - 2) AS decimal(10,2))) AS price_average
FROM unnest
GROUP BY id;
you don't write what DBMS you are using.
In MS SQL-SERVER you can write something like this.
Create a function to convert string to multiple rows, and then use that in the query.
CREATE or ALTER FUNCTION dbo.BreakStringIntoRows (#CommadelimitedString varchar(1000), #Separator VARCHAR(1))
RETURNS #Result TABLE (Column1 VARCHAR(max))
AS
BEGIN
DECLARE #IntLocation INT
WHILE (CHARINDEX(#Separator, #CommadelimitedString, 0) > 0)
BEGIN
SET #IntLocation = CHARINDEX(#Separator, #CommadelimitedString, 0)
INSERT INTO #Result (Column1)
--LTRIM and RTRIM to ensure blank spaces are removed
SELECT RTRIM(LTRIM(SUBSTRING(#CommadelimitedString, 0, #IntLocation)))
SET #CommadelimitedString = STUFF(#CommadelimitedString, 1, #IntLocation, '')
END
INSERT INTO #Result (Column1)
SELECT RTRIM(LTRIM(#CommadelimitedString))--LTRIM and RTRIM to ensure blank spaces are removed
RETURN
END
create table test1 ( id int not null, pricehistory varchar(max) not null );
insert into test1 values ( 1, '1634913730;48.38,1634916509;48.38,1635162352;37.96,1635177904;49.14,1635337722;1219.98,1635340811;27.17' );
insert into test1 values ( 2, '1634913731;42.42,1634916609;21.21' );
Select *,
(
Select avg(CAST(RTRIM(LTRIM(SUBSTRING(column1, 0, CHARINDEX(';', column1, 0)))) as decimal)) From dbo.BreakStringIntoRows(pricehistory, ',')
) as AVG
FRom test1
sample output:

Generate a comma-separated list of numbers in a single string

Is there a way to generate a comma-separated string of a series of numbers where the "begin" and "end" numbers are provided?
For example, provide the numbers 1 and 10 and the output would be a single value of: 1,2,3,4,5,6,7,8,9,10
10/10/2019 edit explaining why I'm interested in this:
My workplace writes queries with several columns in the SELECT statement plus aggregate functions. Then a GROUP BY clause using the column numbers. I figured using a macro that creates a comma-separated list to copy/paste in would save some time.
SELECT t.colA
, t.colB
, t.colC
, t.colD
, t.colE
, t.colF
, t.colG
, t.colH
, t.colI
, t.colJ
, sum(t.colK) as sumK
, sum(t.colL) as sumL
, sum(t.colM) as sumM
FROM t
GROUP BY 1, 2, 3, 4, 5, 6, 7, 8, 9, 10
;
You can use a recursive CTE to generate your numbers, and xml_agg to generate your string:
with recursive nums (counter) as
( select * from (select cast(1 as bigint) as counter) t
union all
select
counter + 1
from nums
where counter between 1 and 9
)
select
trim(trailing ',' from cast(xmlagg(cast(counter as varchar(2)) || ',' order by counter) as varchar(100)))
from nums
Check these methods in SQL Server-
IF OBJECT_ID('TEMPDB..#Sample') IS NOT NULL
DROP TABLE #Sample
Create table #Sample
(
NUM int
)
declare #n int
select #n=10
insert into #Sample(NUM)
SELECT NUM FROM (select row_number() over (order by (select null)) AS NUM from sys.columns) A WHERE NUM<=#N
--Method 1 (For SQL SERVER -NEW VERSION Support)
SELECT STRING_AGG(NUM,',') AS EXPECTED_RESULT FROM #Sample
--Method 1 (For SQL SERVER -OLD VERSION Support)
select DISTINCT STUFF(CAST((
SELECT ' ,' +CAST(c.num AS VARCHAR(MAX))
FROM (
SELECT num
FROM #Sample
) c
FOR XML PATH(''), TYPE) AS VARCHAR(MAX)), 1, 2, '') AS EXPECTED_RESULT
from #Sample t
While loop seems appropriate
declare #begin int=1
declare #end int=11
declare #list varchar(500)
if #begin > #end
begin
select 'error, beginning number ' + convert(varchar(500),#begin)
+ ' must not be greater than ending number '
+ convert(varchar(500),#end) + '.' err
return
end
else
set #list = convert(varchar(500),#begin)
;
while #begin < #end
begin
set #begin += 1
set #list = #list + ',' + convert(varchar(500),#begin)
end
select #list
You might want to use varchar(5000) or something depending on how big you want it to get.
disclaimer -- I don't know if this works with teradata
I'm not sure there is a good direct way to generate a series in Teradata. You can fake it a few different ways though. Here's a comma separated list of numbers from 5 to 15, for example:
SELECT TRIM(TRAILING ',' FROM (XMLAGG(TRIM(rn)|| ',' ) (VARCHAR(10000))))
FROM (SELECT 4 + ROW_NUMBER() OVER (ORDER BY Sys_Calendar."CALENDAR".day_of_calendar) as rn FROM Sys_Calendar."CALENDAR" QUALIFY rn <= 15) t
I've only used sys_calendar.calendar here because it's a big table. Any big table would do here though.
Here's one way to do it in Teradata:
SELECT ARRAY_AGG(src.RowNum)
FROM (
SELECT ROW_NUMBER() OVER() AS RowNum
FROM sys_calendar.calendar
QUALIFY RowNum BETWEEN <begin_num> AND <end_num>
) src
This will give you the output as an ARRAY data type, which you can probably cast as a VARCHAR. It also assumes begin_num > 0 and <end_num> is less than the number of rows in the sys_calendar.calendar view. You can always fiddle with this to fit your required range of values.
There are also DelimitedBuild UDFs out there (if you can find one) that can be used to convert row values into delimited strings.
The cheapest way to achieve your goal is this one (no functions, or joins to tables required):
WITH RECURSIVE NumberRanges(TheNumber,TheString) AS
(
SELECT 1 AS TheNumber,casT(1 as VARCHAR(500)) as TheString
FROM
(
SELECT * FROM (SELECT NULL AS X) X
) DUMMYTABLE
UNION ALL
SELECT
TheNumber + 1 AS TheNumber,
TheString ||',' || TRIM(TheNumber+1)
FROM NumberRanges
WHERE
TheNumber < 10
)
SELECT TheString
FROM NumberRanges
QUALIFY ROW_NUMBER() OVER ( ORDER BY TheNumber DESC) = 1;
Result String: 1,2,3,4,5,6,7,8,9,10

Combinations in SQL Server

Using SQL Server (2008) and given a table with rows as follows:
Id
--
4
7
Using a value for x (e.g. a parameter #x), I want to be able to generate rows with x columns giving the all combinations of the Id values in the table:
For example with x=2, would produce an output with two columns as follows:
4,4
4,7
7,4
7,7
In this case where x=3, the result would be a rows with three columns as following:
4,4,4
4,4,7
4,7,4
4,7,7
7,4,4
7,4,7
7,7,4
7,7,7
The table may contain more or less rows than the 2 rows in the above example, which also depending on the value of x would change the number of combination rows/columns in the output.
E.g.
If the table contained:
4
7
9
If x=2, would produce
4,4
4,7
4,9
7,4
7,7
7,9
9,4
9,7
9,9
If x=3, would produce
4,4,4
4,4,7
4,4,9
4,7,4
4,7,7
4,7,9
4,9,4
4,9,7
4,9,9
etc
Thanks
You can do this using a recursive CTE:
with cte as (
select convert(varchar(max), id) as ids, 1 as cnt
from t
union all
select ids + ',' + convert(varchar(max), id), cnt + 1
from cte join
t
on cte.cnt < #x
)
select *
from cte
where cnt = #x;
Here is a db<>fiddle.
Note: you need to represent the results as a string, because SQL does not allow you to return a variable number of columns. You could put each value in a separate column, but then you would not be able to use a variable to control the size of the combinations.
Another possible approach is to use dynamic SQL:
-- Table
CREATE TABLE #Numbers (
Id int
)
INSERT INTO #Numbers
(Id)
VALUES
(4),
(7),
(9)
-- Declarations
DECLARE #select nvarchar(max)
DECLARE #from nvarchar(max)
DECLARE #stm nvarchar(max)
DECLARE #x int
-- Numbers
SELECT #x = 2
-- Statement generation
;WITH CounterCTE as (
SELECT 1 AS Counter
UNION ALL
SELECT Counter + 1
FROM CounterCTE
WHERE Counter < #x
)
SELECT
#select = (SELECT CONCAT(N',t', Counter, N'.Id') FROM CounterCTE FOR XML PATH('')),
#from = (SELECT CONCAT(N',#Numbers t', Counter) FROM CounterCTE FOR XML PATH(''))
SET #stm = CONCAT(
N'SELECT ',
STUFF(#select, 1, 1, N''),
N' FROM ',
STUFF(#from, 1, 1, N'')
)
-- Execution
PRINT #stm
EXEC sp_executesql #stm
Output for #x = 2
Id Id
4 4
7 4
9 4
4 7
7 7
9 7
4 9
7 9
9 9

Count Of Distinct Characters In Column

Say I have the following data set
Column1 (VarChar(50 or something))
Elias
Sails
Pails
Plane
Games
What I'd like to produce from this column is the following set:
LETTER COUNT
E 3
L 4
I 3
A 5
S 5
And So On...
One solution I thought of was combining all strings into a single string, and then count each instance of the letter in that string, but that feels sloppy.
This is more an exercise of curiosity than anything else, but, is there a way to get a count of all distinct letters in a dataset with SQL?
I would do this by creating a table of your letters similar to:
CREATE TABLE tblLetter
(
letter varchar(1)
);
INSERT INTO tblLetter ([letter])
VALUES
('a'),
('b'),
('c'),
('d'); -- etc
Then you could join the letters to your table where your data is like the letter:
select l.letter, count(n.col) Total
from tblLetter l
inner join names n
on n.col like '%'+l.letter+'%'
group by l.letter;
See SQL Fiddle with Demo. This would give a result:
| LETTER | TOTAL |
|--------|-------|
| a | 5 |
| e | 3 |
| g | 1 |
| i | 3 |
| l | 4 |
| m | 1 |
| p | 2 |
| s | 4 |
If you create a table of letters, like this:
create table letter (ch char(1));
insert into letter(ch) values ('A'),('B'),('C'),('D'),('E'),('F'),('G'),('H')
,('I'),('J'),('K'),('L'),('M'),('N'),('O'),('P')
,('Q'),('R'),('S'),('T'),('U'),('V'),('W'),('X'),('Y'),('Z');
you could do it with a cross join, like this:
select ch, SUM(len(str) - len(replace(str,ch,'')))
from letter
cross join test -- <<== test is the name of the table with the string
group by ch
having SUM(len(str) - len(replace(str,ch,''))) <> 0
Here is a running demo on sqlfiddle.
You can do it without defining a table by embedding a list of letters into a query itself, but the idea of cross-joining and grouping by the letter would remain the same.
Note: see this answer for the explanation of the expression inside the SUM.
To me, this is a problem almost tailored for a CTE (Thanks, Nicholas Carey, for the original, my fiddle here: http://sqlfiddle.com/#!3/44f77/8):
WITH cteLetters
AS
(
SELECT
1 AS CharPos,
str,
MAX(LEN(str)) AS MaxLen,
SUBSTRING(str, 1, 1) AS Letter
FROM
test
GROUP BY
str,
SUBSTRING(str, 1, 1)
UNION ALL
SELECT
CharPos + 1,
str,
MaxLen,
SUBSTRING(str, CharPos + 1, 1) AS Letter
FROM
cteLetters
WHERE
CharPos + 1 <= MaxLen
)
SELECT
UPPER(Letter) AS Letter,
COUNT(*) CountOfLetters
FROM
cteLetters
GROUP BY
Letter
ORDER BY
Letter;
Use the CTE to calculate character positions and deconstruct each string. Then you can just aggregate from the CTE itself. No need for additional tables or anything.
This should work even if you have case sensitivity turned on.
The setup:
CREATE TABLE _test ( Column1 VARCHAR (50) )
INSERT _test (Column1) VALUES ('Elias'),('Sails'),('Pails'),('Plane'),('Games')
The work:
DECLARE #counter AS INT
DECLARE #results TABLE (LETTER VARCHAR(1),[COUNT] INT)
SET #counter=65 --ascii value for 'A'
WHILE ( #counter <=90 ) -- ascii value for 'Z'
BEGIN
INSERT #results (LETTER,[COUNT])
SELECT CHAR(#counter),SUM(LEN(UPPER(Column1)) - LEN(REPLACE(UPPER(Column1), CHAR(#counter),''))) FROM _test
SET #counter=#counter+1
END
SELECT * FROM #results WHERE [Count]>0
It's often useful to have a range or sequence table that gives you a source of large runs of contiguous sequential numbers, like this one covering the range -100,000–+100,000.
drop table dbo.range
go
create table dbo.range
(
id int not null primary key clustered ,
)
go
set nocount on
go
declare #i int = -100000
while ( #i <= +100000 )
begin
if ( #i > 0 and #i % 1000 = 0 ) print convert(varchar,#i) + ' rows'
insert dbo.range values ( #i )
set #i = #i + 1
end
go
set nocount off
go
Once you have such a table, you can do something like this:
select character = substring( t.some_column , r.id , 1 ) ,
frequency = count(*)
from dbo.some_table t
join dbo.range r on r.id between 1 and len( t.some_column )
group by substring( t.some_column , r.id , 1 )
order by 1
If you want to ensure case-insensitivity, just mix in the desired upper() or lower():
select character = upper( substring( t.some_column , r.id , 1 ) ) ,
frequency = count(*)
from dbo.some_table t
join dbo.range r on r.id between 1 and len( t.some_column )
group by upper( substring( t.some_column , r.id , 1 ) )
order by 1
Given your sample data:
create table dbo.some_table
(
some_column varchar(50) not null
)
go
insert dbo.some_table values ( 'Elias' )
insert dbo.some_table values ( 'Sails' )
insert dbo.some_table values ( 'Pails' )
insert dbo.some_table values ( 'Plane' )
insert dbo.some_table values ( 'Games' )
go
The latter query above produces the following results:
character frequency
A 5
E 3
G 1
I 3
L 4
M 1
N 1
P 2
S 5

Tricky SQL query requiring search for contains

I have data such as this:
Inventors column in my table
Hundley; Edward; Ana
Isler; Hunsberger
Hunsberger;Hundley
Names are separated by ;. I want to write a SQL query which sums up the count.
Eg. The result should be:
Hundley 2
Isler 1
Hunsberger 2
Edward 1
Ana 1
I could do a group by but this is not a simple group by as you can see. Any ideas/thoughts on how to get this output?
Edit: Changed results so it doesn't create any confusion that a row only contains 2 names.
You can take a look at this. I certainly do not recommend this way if you have lots of data, BUT you can do some modifications and use it and it works like a charm!
This is the new code for supporting unlimited splits:
Declare #Table Table (
Name Nvarchar(50)
);
Insert #Table (
Name
) Select 'Hundley; Edward; Anna'
Union Select 'Isler; Hunsberger'
Union Select 'Hunsberger; Hundley'
Union Select 'Anna'
;
With Result (
Part
, Remained
, [Index]
, Level
) As (
Select Case When CharIndex(';', Name, 1) = 0
Then Name
Else Left(Name, CharIndex(';', Name, 1) - 1)
End
, Right(Name, Len(Name) - CharIndex(';', Name, 1))
, CharIndex(';', Name, 1)
, 1
From #Table
Union All
Select LTrim(
Case When CharIndex(';', Remained, 1) = 0
Then Remained
Else Left(Remained, CharIndex(';', Remained, 1) - 1)
End
)
, Right(Remained, Len(Remained) - CharIndex(';', Remained, 1))
, CharIndex(';', Remained, 1)
, Level
+ 1
From Result
Where [Index] <> 0
) Select Part
, Count(*)
From Result
Group By Part
Cheers
;with cte as
(
select 1 as Item, 1 as Start, CHARINDEX(';',inventors, 1) as Split, Inventors from YourInventorsTable
union all
select cte.Item+1, cte.Split+1, nullif(CHARINDEX(';',inventors, cte.Split+1),0), inventors as Split
from cte
where cte.Split<>0
)
select rTRIM(lTRIM(SUBSTRING(inventors, start,isnull(split,len(inventors)+1)-start))), count(*)
from cte
group by rTRIM(lTRIM(SUBSTRING(inventors, start,isnull(split,len(inventors)+1)-start)))
You can create a split function to split the col values
select splittedValues.items,count(splittedValues) from table1
cross apply dbo.split(col1,';') splittedValues
group by splittedValues.items
DEMO in Sql fiddle
first make one function who take your comma or any other operator(;) separated string into one table and by using that temp table, apply GROUP function on that table.
So you will get count for separate value.
"select d.number,count(*) from (select number from dbo.CommaseparedListToTable('Hundley;Edward;Ana;Isler;Hunsberger;Hunsberger;Hundley',';'))d
group by d.number"
declare #text nvarchar(255) = 'Edward; Hundley; AnaIsler; Hunsberger; Hunsberger; Hundley ';
declare #table table(id int identity,name varchar(50));
while #text like '%;%'
Begin
insert into #table (name)
select SUBSTRING(#text,1,charindex(';',#text)-1)
set #text = SUBSTRING(#text, charindex(';',#text)+1,LEN(#text))
end
insert into #table (name)
select #text
select name , count(name ) counts from #table group by name
Output
name count
AnaIsler 1
Hundley 2
Hunsberger 2
Edward 1