Tricky SQL query requiring search for contains - sql

I have data such as this:
Inventors column in my table
Hundley; Edward; Ana
Isler; Hunsberger
Hunsberger;Hundley
Names are separated by ;. I want to write a SQL query which sums up the count.
Eg. The result should be:
Hundley 2
Isler 1
Hunsberger 2
Edward 1
Ana 1
I could do a group by but this is not a simple group by as you can see. Any ideas/thoughts on how to get this output?
Edit: Changed results so it doesn't create any confusion that a row only contains 2 names.

You can take a look at this. I certainly do not recommend this way if you have lots of data, BUT you can do some modifications and use it and it works like a charm!
This is the new code for supporting unlimited splits:
Declare #Table Table (
Name Nvarchar(50)
);
Insert #Table (
Name
) Select 'Hundley; Edward; Anna'
Union Select 'Isler; Hunsberger'
Union Select 'Hunsberger; Hundley'
Union Select 'Anna'
;
With Result (
Part
, Remained
, [Index]
, Level
) As (
Select Case When CharIndex(';', Name, 1) = 0
Then Name
Else Left(Name, CharIndex(';', Name, 1) - 1)
End
, Right(Name, Len(Name) - CharIndex(';', Name, 1))
, CharIndex(';', Name, 1)
, 1
From #Table
Union All
Select LTrim(
Case When CharIndex(';', Remained, 1) = 0
Then Remained
Else Left(Remained, CharIndex(';', Remained, 1) - 1)
End
)
, Right(Remained, Len(Remained) - CharIndex(';', Remained, 1))
, CharIndex(';', Remained, 1)
, Level
+ 1
From Result
Where [Index] <> 0
) Select Part
, Count(*)
From Result
Group By Part
Cheers

;with cte as
(
select 1 as Item, 1 as Start, CHARINDEX(';',inventors, 1) as Split, Inventors from YourInventorsTable
union all
select cte.Item+1, cte.Split+1, nullif(CHARINDEX(';',inventors, cte.Split+1),0), inventors as Split
from cte
where cte.Split<>0
)
select rTRIM(lTRIM(SUBSTRING(inventors, start,isnull(split,len(inventors)+1)-start))), count(*)
from cte
group by rTRIM(lTRIM(SUBSTRING(inventors, start,isnull(split,len(inventors)+1)-start)))

You can create a split function to split the col values
select splittedValues.items,count(splittedValues) from table1
cross apply dbo.split(col1,';') splittedValues
group by splittedValues.items
DEMO in Sql fiddle

first make one function who take your comma or any other operator(;) separated string into one table and by using that temp table, apply GROUP function on that table.
So you will get count for separate value.
"select d.number,count(*) from (select number from dbo.CommaseparedListToTable('Hundley;Edward;Ana;Isler;Hunsberger;Hunsberger;Hundley',';'))d
group by d.number"

declare #text nvarchar(255) = 'Edward; Hundley; AnaIsler; Hunsberger; Hunsberger; Hundley ';
declare #table table(id int identity,name varchar(50));
while #text like '%;%'
Begin
insert into #table (name)
select SUBSTRING(#text,1,charindex(';',#text)-1)
set #text = SUBSTRING(#text, charindex(';',#text)+1,LEN(#text))
end
insert into #table (name)
select #text
select name , count(name ) counts from #table group by name
Output
name count
AnaIsler 1
Hundley 2
Hunsberger 2
Edward 1

Related

SQL get average of a list in sql select

We have this column in the table named "pricehistory"
1634913730;48.38,1634916509;48.38,1635162352;37.96,1635177904;49.14,1635337722;1219.98,1635340811;27.17
that is an example data.
first is the timestamp than after ; is the price at this timestamp
But i want the average price from every timestamp in a select... is that possible?
I dont find any similiar examples somewhere and my tries to select doesnt work... i am not so good with sql
so i want average of all prices behind that ; and before ,
The , split the timestamp and prices
Some test data :
create table test ( id int not null, pricehistory text not null );
insert into test values ( 1, '1634913730;48.38,1634916509;48.38,1635162352;37.96,1635177904;49.14,1635337722;1219.98,1635340811;27.17' );
insert into test values ( 2, '1634913731;42.42,1634916609;21.21' );
If your RDBMS has some splitting function
Then it's quite easy, just split and use AVG. Here is an example using PostgreSQL :
SELECT id, AVG(SUBSTRING(v, 12, 42)::decimal) AS average
FROM test
INNER JOIN LATERAL regexp_split_to_table(pricehistory, E',') t(v) ON TRUE
GROUP BY id;
Then you get:
id | average
----+----------------------
2 | 31.8150000000000000
1 | 238.5016666666666667
(2 rows)
Otherwise
You can use a CTE to split the values manually. This is a bit more involved. Here is an example using PostgreSQL again :
WITH RECURSIVE T AS (
SELECT id,
-- We get the last value ...
SUBSTRING(pricehistory, LENGTH(pricehistory) - STRPOS(REVERSE(pricehistory), ',') + 2) AS oneprice,
pricehistory AS remaining
FROM test
UNION ALL
-- ... as we get the other values from the recursive CTE.
SELECT id,
LEFT(remaining, STRPOS(remaining, ',') - 1),
SUBSTRING(remaining, STRPOS(remaining, ',') + 1)
FROM T
WHERE STRPOS(remaining, ',') > 0
)
SELECT id, AVG(SUBSTRING(oneprice, 12)::decimal) AS average
FROM T
GROUP BY id;
Then you get:
id | average
----+----------------------
2 | 31.8150000000000000
1 | 238.5016666666666667
(2 rows)
MySql >= 8.0
I used Recursive Common Table Expressions (cte) to split pricehistory string by ','. Then I split price from timestamp by ';', cast price as decimal(10,2) and group by id to get average price by id.
WITH RECURSIVE
cte AS (SELECT id,
SUBSTRING_INDEX(pricehistory, ',', 1) AS price,
CASE WHEN POSITION(',' IN pricehistory) > 0
THEN SUBSTR(pricehistory, POSITION(',' IN pricehistory) + 1)
ELSE NULL END AS rest
FROM t
UNION ALL
SELECT id,
SUBSTRING_INDEX(rest, ',', 1) AS price,
CASE WHEN POSITION(',' IN rest) > 0
THEN SUBSTR(rest, POSITION(',' IN rest) + 1)
ELSE NULL END AS rest
FROM cte
WHERE rest IS NOT NULL)
SELECT id, AVG(CAST(SUBSTR(price, POSITION(';' IN price) + 1) AS decimal(10,2))) AS price_average
FROM cte
GROUP BY id;
A similar way to do the same (using regular expressions functions):
WITH RECURSIVE
cte AS (SELECT Id, concat(pricehistory, ',') AS pricehistory FROM t),
unnest AS (SELECT id,
pricehistory,
1 AS i,
REGEXP_SUBSTR(pricehistory, ';[0-9.]*,', 1, 1) AS price
FROM cte
UNION ALL
SELECT id,
pricehistory,
i + 1,
REGEXP_SUBSTR(pricehistory, ';[0-9.]*,', 1, i + 1)
FROM unnest
WHERE REGEXP_SUBSTR(pricehistory, ';[0-9.]*,', 1, i + 1) IS NOT NULL)
SELECT id, AVG(CAST(SUBSTR(price, 2, LENGTH(price) - 2) AS decimal(10,2))) AS price_average
FROM unnest
GROUP BY id;
you don't write what DBMS you are using.
In MS SQL-SERVER you can write something like this.
Create a function to convert string to multiple rows, and then use that in the query.
CREATE or ALTER FUNCTION dbo.BreakStringIntoRows (#CommadelimitedString varchar(1000), #Separator VARCHAR(1))
RETURNS #Result TABLE (Column1 VARCHAR(max))
AS
BEGIN
DECLARE #IntLocation INT
WHILE (CHARINDEX(#Separator, #CommadelimitedString, 0) > 0)
BEGIN
SET #IntLocation = CHARINDEX(#Separator, #CommadelimitedString, 0)
INSERT INTO #Result (Column1)
--LTRIM and RTRIM to ensure blank spaces are removed
SELECT RTRIM(LTRIM(SUBSTRING(#CommadelimitedString, 0, #IntLocation)))
SET #CommadelimitedString = STUFF(#CommadelimitedString, 1, #IntLocation, '')
END
INSERT INTO #Result (Column1)
SELECT RTRIM(LTRIM(#CommadelimitedString))--LTRIM and RTRIM to ensure blank spaces are removed
RETURN
END
create table test1 ( id int not null, pricehistory varchar(max) not null );
insert into test1 values ( 1, '1634913730;48.38,1634916509;48.38,1635162352;37.96,1635177904;49.14,1635337722;1219.98,1635340811;27.17' );
insert into test1 values ( 2, '1634913731;42.42,1634916609;21.21' );
Select *,
(
Select avg(CAST(RTRIM(LTRIM(SUBSTRING(column1, 0, CHARINDEX(';', column1, 0)))) as decimal)) From dbo.BreakStringIntoRows(pricehistory, ',')
) as AVG
FRom test1
sample output:

Generate a comma-separated list of numbers in a single string

Is there a way to generate a comma-separated string of a series of numbers where the "begin" and "end" numbers are provided?
For example, provide the numbers 1 and 10 and the output would be a single value of: 1,2,3,4,5,6,7,8,9,10
10/10/2019 edit explaining why I'm interested in this:
My workplace writes queries with several columns in the SELECT statement plus aggregate functions. Then a GROUP BY clause using the column numbers. I figured using a macro that creates a comma-separated list to copy/paste in would save some time.
SELECT t.colA
, t.colB
, t.colC
, t.colD
, t.colE
, t.colF
, t.colG
, t.colH
, t.colI
, t.colJ
, sum(t.colK) as sumK
, sum(t.colL) as sumL
, sum(t.colM) as sumM
FROM t
GROUP BY 1, 2, 3, 4, 5, 6, 7, 8, 9, 10
;
You can use a recursive CTE to generate your numbers, and xml_agg to generate your string:
with recursive nums (counter) as
( select * from (select cast(1 as bigint) as counter) t
union all
select
counter + 1
from nums
where counter between 1 and 9
)
select
trim(trailing ',' from cast(xmlagg(cast(counter as varchar(2)) || ',' order by counter) as varchar(100)))
from nums
Check these methods in SQL Server-
IF OBJECT_ID('TEMPDB..#Sample') IS NOT NULL
DROP TABLE #Sample
Create table #Sample
(
NUM int
)
declare #n int
select #n=10
insert into #Sample(NUM)
SELECT NUM FROM (select row_number() over (order by (select null)) AS NUM from sys.columns) A WHERE NUM<=#N
--Method 1 (For SQL SERVER -NEW VERSION Support)
SELECT STRING_AGG(NUM,',') AS EXPECTED_RESULT FROM #Sample
--Method 1 (For SQL SERVER -OLD VERSION Support)
select DISTINCT STUFF(CAST((
SELECT ' ,' +CAST(c.num AS VARCHAR(MAX))
FROM (
SELECT num
FROM #Sample
) c
FOR XML PATH(''), TYPE) AS VARCHAR(MAX)), 1, 2, '') AS EXPECTED_RESULT
from #Sample t
While loop seems appropriate
declare #begin int=1
declare #end int=11
declare #list varchar(500)
if #begin > #end
begin
select 'error, beginning number ' + convert(varchar(500),#begin)
+ ' must not be greater than ending number '
+ convert(varchar(500),#end) + '.' err
return
end
else
set #list = convert(varchar(500),#begin)
;
while #begin < #end
begin
set #begin += 1
set #list = #list + ',' + convert(varchar(500),#begin)
end
select #list
You might want to use varchar(5000) or something depending on how big you want it to get.
disclaimer -- I don't know if this works with teradata
I'm not sure there is a good direct way to generate a series in Teradata. You can fake it a few different ways though. Here's a comma separated list of numbers from 5 to 15, for example:
SELECT TRIM(TRAILING ',' FROM (XMLAGG(TRIM(rn)|| ',' ) (VARCHAR(10000))))
FROM (SELECT 4 + ROW_NUMBER() OVER (ORDER BY Sys_Calendar."CALENDAR".day_of_calendar) as rn FROM Sys_Calendar."CALENDAR" QUALIFY rn <= 15) t
I've only used sys_calendar.calendar here because it's a big table. Any big table would do here though.
Here's one way to do it in Teradata:
SELECT ARRAY_AGG(src.RowNum)
FROM (
SELECT ROW_NUMBER() OVER() AS RowNum
FROM sys_calendar.calendar
QUALIFY RowNum BETWEEN <begin_num> AND <end_num>
) src
This will give you the output as an ARRAY data type, which you can probably cast as a VARCHAR. It also assumes begin_num > 0 and <end_num> is less than the number of rows in the sys_calendar.calendar view. You can always fiddle with this to fit your required range of values.
There are also DelimitedBuild UDFs out there (if you can find one) that can be used to convert row values into delimited strings.
The cheapest way to achieve your goal is this one (no functions, or joins to tables required):
WITH RECURSIVE NumberRanges(TheNumber,TheString) AS
(
SELECT 1 AS TheNumber,casT(1 as VARCHAR(500)) as TheString
FROM
(
SELECT * FROM (SELECT NULL AS X) X
) DUMMYTABLE
UNION ALL
SELECT
TheNumber + 1 AS TheNumber,
TheString ||',' || TRIM(TheNumber+1)
FROM NumberRanges
WHERE
TheNumber < 10
)
SELECT TheString
FROM NumberRanges
QUALIFY ROW_NUMBER() OVER ( ORDER BY TheNumber DESC) = 1;
Result String: 1,2,3,4,5,6,7,8,9,10

Get a specific string

It's my data and every ThroughRouteSid record has the same pattern.
six number and five comma. then I just want to get three and five
number into two record to template Table and get the same Count()
value to these two record.
For example: First record in the picture.
ThroughRouteSid(3730,2428,2428,3935,3935,3938,) Count(32).
I want a result like this:
2428 32 3935 32
I get What number I want.become two record and both have same Count value into template table
you can use XML to get your result, please refer below sample code -
create table #t1( ThroughRouteSid varchar(500) , Cnt int)
insert into #t1
select '3730,2428,2428,3935,3935,3938,' , len('3730,2428,2428,3935,3935,3938,')
union all select '1111,2222,3333,4444,5555,6666,' , len('1111,2222,3333,4444,5555,6666,')
select cast( '<xml><td>' + REPLACE( SUBSTRING(ThroughRouteSid ,1 , len(ThroughRouteSid)-1),',','</td><td>') + '</td></xml>' as xml) XmlData , Cnt
into #t2 from #t1
select XmlData.value('(xml/td)[3]' ,'int' ), Cnt ,XmlData.value('(xml/td)[5]' ,'int' ), Cnt
from #t2
First create the function referring How to Split a string by delimited char in SQL Server. Then try Querying the following
select (SELECT CONVERT(varchar,splitdata) + ' '+ Convert(varchar, [Count])+' ' FROM (select splitdata, ROW_NUMBER() over (ORDER BY (SELECT 100)) row_no
from [dbo].[fnSplitString](ThroughRouteSid,',')
where splitdata != '') as temp where row_no in (2,5)
for xml path('')) as col1 from [yourtable]
If you are using SQL Server 2016 you can do something like this:
create table #temp (ThroughRouteSid varchar(1024),[Count] int)
insert into #temp values
('3730,2428,2428,3935,3935,3938,',32),
('730,428,428,335,935,938,',28)
select
spt.value,
t.[Count]
from #temp t
cross apply (
select value from STRING_SPLIT(t.ThroughRouteSid,',') where LEN(value) > 0
)spt

Split string and number columns

Let's say I have a table such as
ItemID ClassID
------------------------
1 10, 13, 12
2 5, 7
and would like to copy the data to another table like so
ItemID Numbering ClassID
----------------------------------
1 1 10
1 2 13
1 3 12
2 1 5
2 2 7
Separating the comma-delimited ClassID field into individual rows, retaining the order they had in the first table
Populating the Numbering row on insert. The Numbering column has sequential integers for each batch of ClassID and is why ClassID needs to be kept in order.
I have attempted this with the following function:
CREATE FUNCTION dbo.Split
(
#String NVARCHAR(MAX)
)
RETURNS #SplittedValues TABLE(
Value INT
)
AS
BEGIN
DECLARE #SplitLength INT
DECLARE #Delimiter VARCHAR(10)
SET #Delimiter = ','
WHILE len(#String) > 0
BEGIN
SELECT #SplitLength = (CASE charindex(#Delimiter, #String)
WHEN 0 THEN
datalength(#String) / 2
ELSE
charindex(#Delimiter, #String) - 1
END)
INSERT INTO #SplittedValues
SELECT cast(substring(#String, 1, #SplitLength) AS INTEGER)
WHERE
ltrim(rtrim(isnull(substring(#String, 1, #SplitLength), ''))) <> '';
SELECT #String = (CASE ((datalength(#String) / 2) - #SplitLength)
WHEN 0 THEN
''
ELSE
right(#String, (datalength(#String) / 2) - #SplitLength - 1)
END)
END
RETURN
END
but it only partly works. It copies the rows the correct amount of times (i.e. three times for ItemID=1, and twice for ItemID=2 in the above example), but they are exact copies of the row (all saying '10, 13, 12') and the comma-delimited parts are not split up. There is also nothing in the function to add to the Numbering column.
So, I have two questions: How do I modify the above function to split up the ClassID string, and what do I add to correctly increment the Numbering column?
Thanks!
I'd use a recursive CTE to do it.
WITH SplitCTE AS
(
SELECT
itemid,
LEFT(ClassID,CHARINDEX(',',ClassID)-1) AS ClassID
,RIGHT(ClassID,LEN(ClassID)-CHARINDEX(',',ClassID)) AS remaining
FROM table1
WHERE ClassID IS NOT NULL AND CHARINDEX(',',ClassID)>0
UNION ALL
SELECT
itemid,
LEFT(remaining,CHARINDEX(',',remaining)-1)
,RIGHT(remaining,LEN(remaining)-CHARINDEX(',',remaining))
FROM SplitCTE
WHERE remaining IS NOT NULL AND CHARINDEX(',',remaining)>0
UNION ALL
SELECT
itemid,remaining,null
FROM SplitCTE
WHERE remaining IS NOT NULL AND CHARINDEX(',',remaining)=0
)
SELECT
itemid,
row_number() over (partition by itemid order by cast(classid as int) asc) as Numbering,
cast (ClassID as int) as ClassID
FROM
SplitCTE
UNION ALL
select
ItemId,
1,
cast(classid as int)
FROM table1
WHERE ClassID IS NOT NULL AND CHARINDEX(',',ClassID) = 0
SQL Fiddle
DECLARE #t TABLE( ID INT IDENTITY, data VARCHAR(50))
INSERT INTO #t(data) SELECT '10, 13, 12'
INSERT INTO #t(data) SELECT '5, 7'
select F1.id,O.splitdata, ROW_NUMBER() OVER(PARTITION BY ID ORDER BY (SELECT 1))
from (
select *,cast(''+replace(F.data,',','')+'' as XML) as xmlfilter from #t F
)F1
cross apply
(
select fdata.D.value('.','varchar(50)') as splitdata from f1.xmlfilter.nodes('X') as fdata(D)
) O

Return Distinct Rows That Contain The Same Value/Character In SQL

I have a bit of a tricky situation. I have a column that contains a pipe delimited set of numbers in numerous rows in a table. For example:
Courses
-------------------
1|2
1|2|3
1|2|8
10
11
11|12
What I want to achieve is to return rows where the number only appears once in my output.
Ideally, I want to try and carry this out using SQL rather than having to carry out checks at a web application level. Carrying out a DISTINCT does not achieve what I want.
The desired output would be:
Courses
-------------------
1
2
3
8
10
11
12
I would appreciated if anyone can guide me in the right direction.
Thanks.
Please try:
declare #tbl as table(Courses nvarchar(max))
insert into #tbl values
('1|2'),
('1|2|3'),
('1|2|8'),
('10'),
('11'),
('11|12')
select * from #tbl
SELECT
DISTINCT CAST(Split.a.value('.', 'VARCHAR(100)') AS INT) AS CVS
FROM
(
SELECT CAST ('<M>' + REPLACE(Courses, '|', '</M><M>') + '</M>' AS XML) AS CVS
FROM #tbl
) AS A CROSS APPLY CVS.nodes ('/M') AS Split(a)
ORDER BY 1
Try this one -
SET NOCOUNT ON;
DECLARE #temp TABLE
(
string VARCHAR(500)
)
DECLARE #Separator CHAR(1)
SELECT #Separator = '|'
INSERT INTO #temp (string)
VALUES
('1|2'),
('1|2|3'),
('1|2|8'),
('10'),
('11'),
('11|12')
-- 1. XML
SELECT p.value('(./s)[1]', 'VARCHAR(500)')
FROM (
SELECT field = CAST('<r><s>' + REPLACE(t.string, #Separator, '</s></r><r><s>') + '</s></r>' AS XML)
FROM #temp t
) d
CROSS APPLY field.nodes('/r') t(p)
-- 2. CTE
;WITH a AS
(
SELECT
start_pos = 1
, end_pos = CHARINDEX(#Separator, t.string)
, t.string
FROM #temp t
UNION ALL
SELECT
end_pos + 1
, CHARINDEX(#Separator, string, end_pos + 1)
, string
FROM a
WHERE end_pos > 0
)
SELECT d.name
FROM (
SELECT
name = SUBSTRING(
string
, start_pos
, ABS(end_pos - start_pos)
)
FROM a
) d
WHERE d.name != ''
Try this :
create table course (courses varchar(100))
insert into course values('1|2')
insert into course values('1|2|3')
insert into course values('1|2|8')
insert into course values('10')
insert into course values('11')
insert into course values('11|12')
Declare #col varchar(200)
SELECT
#col=(
SELECT DISTINCT c.courses + '|'
FROM course c
FOR XML PATH('')
);
select * from course
;with demo as(
select cast(substring(#col,1,charindex('|',#col,1)-1) AS INT) cou,charindex('|',#col,1) pos
union all
select cast(substring(#col,pos+1,charindex('|',#col,pos+1)-pos-1)AS INT) cou,charindex('|',#col,pos+1) pos
from demo where pos<LEN(#col))
select distinct cou from demo
Could not manage without recursion :( Something like this could do the trich?
WITH splitNum(num, r)
AS
(
SELECT
SUBSTRING(<field>,1, CHARINDEX('|', <field>)-1) num,
SUBSTRING(<field>,CHARINDEX('|', <field>)+1, len(<field>)) r
FROM <yourtable> as a
UNION ALL
SELECT
SUBSTRING(r,1, CHARINDEX('|', r)-1) num,
SUBSTRING(r,CHARINDEX('|', r)+1, len(r)) r
FROM <yourtable> b
WHERE CHARINDEX('|', r) > 0
inner join splitNum as c on <whatevertheprimarykeyis>
)
SELECT distinct num FROM splitNum
Didn't make it run, but it should do the trick, just replace the and with the correct info
One way would be to use a recursive CTE:
with cte as
(select cast(case charindex('|',courses) when 0 then courses
else left(courses,charindex('|',courses)-1) end as int) course,
case charindex('|',courses) when 0 then ''
else right(courses,len(courses)-charindex('|',courses)) end courses
from courses
union all
select cast(case charindex('|',courses) when 0 then courses
else left(courses,charindex('|',courses)-1) end as int) course,
case charindex('|',courses) when 0 then ''
else right(courses,len(courses)-charindex('|',courses)) end courses
from cte
where len(courses)>0)
select distinct course from cte
SQLFiddle here.