Find the missing words

Find the missing words - sql

Hi I am creating a module to find the missing words from the string in SQL
If I have a string
"Man,one young men only,boat,adults only"
I want to match these words on the table tbl_missingwords.table is as following
ID Keyword Synonym
1 One young men only young men,young adults,adults only
2 One young women only young women,young adults,adults only
3 Domestic cat Domestic animals,pats,animal themes
4 Domestic dog Domestic animals,pats,animal themes
5 Adventure recreation,persuit
6 Boat mode of transport,transport
Then the result should be
ID Keyword Synonym
1 One young men only young men,young adults
6 Boat mode of transport,transport
Please help to select these missing words in SQL.

First create a user-defined function to split the input string.
Function - fn_split
CREATE FUNCTION [dbo].[fn_Split](#text varchar(8000), #delimiter varchar(20) = ' ')
RETURNS #Strings TABLE
(
position int IDENTITY PRIMARY KEY,
value varchar(8000)
)
AS
BEGIN
DECLARE #index int
SET #index = -1
WHILE (LEN(#text) > 0)
BEGIN
SET #index = CHARINDEX(#delimiter , #text)
IF (#index = 0) AND (LEN(#text) > 0)
BEGIN
INSERT INTO #Strings VALUES (#text)
BREAK
END
IF (#index > 1)
BEGIN
INSERT INTO #Strings VALUES (LEFT(#text, #index - 1))
SET #text = RIGHT(#text, (LEN(#text) - #index))
END
ELSE
SET #text = RIGHT(#text, (LEN(#text) - #index))
END
RETURN
END
Then try the following sql query.
Query
declare #str as varchar(max)
set #str='Man,one young men only,boat,adults only'
select ID,min(keyword) as keyword,
case when right(min([Synonym]),1) = ','
then replace(left(min([Synonym]),len(min([Synonym]))-1),',,',',')
else replace(min([Synonym]),',,',',') end as [Synonym]
from
(
select ID,Keyword,value,replace([Synonym],Value,'') as [Synonym]
from fn_split(#str,','),
tbl_missingwords
where Keyword in (select value from fn_split(#str,','))
)t
group by ID;
SQL Fiddle

You've already asked a number of questions regarding this. Redesigning your table structures should now be your priority. Anyway, here is my solution using a splitter and CROSS APPLY:
SQL Fiddle
DECLARE #str VARCHAR(MAX) = 'Man,one young men only,boat,adults only'
DECLARE #tblStr AS TABLE(word VARCHAR(MAX))
INSERT INTO #tblStr
SELECT Item
FROM dbo.SplitStrings_XML(#str, ',')
;WITH CteMissingWords(ID, Keyword, Synonym) AS(
SELECT
w.ID,
w.Keyword,
s.Item
FROM tbl_missingwords w
CROSS APPLY dbo.SplitStrings_XML(w.Synonym, ',') s
)
SELECT
tmw.ID, tmw.Keyword, x.Synonym
FROM tbl_missingwords tmw
CROSS APPLY(
SELECT STUFF((
SELECT ',' + cmw.Synonym
FROM CteMissingWords cmw
WHERE
cmw.ID = tmw.ID
AND cmw.Synonym NOT IN(SELECT word FROM #tblStr)
AND cmw.Keyword IN(SELECT word FROM #tblStr)
FOR XML PATH('')
), 1, 1, '')
)x(Synonym)
WHERE x.Synonym IS NOT NULL
The definition of dbo.SplitStrings_XML is already in one of your previous question.

Related

SQL Search/Compare Keywords and Orderby similarity

I got this challenge to do a related page display based on the keywords of a page.
The application is supposed to display related pages based on the keywords of the current page the user is visiting. It will check the keywords string the page currently has for example it has 3 keywords
"Tag,Keyword,Test"
It should check other pages in the database for the same keywords. For example the results will be 2 other pages with some of the keywords (not all)
PageName | Keyword
Subpage1| Test,Tag
Subpage2| Tag
I would also like to order them by the relevance, the more keywords is matched the higher the relevance.

Try this
Function
CREATE FUNCTION [dbo].[fn_Split](#text varchar(8000), #delimiter varchar(20))
RETURNS #Strings TABLE
(
position int IDENTITY PRIMARY KEY,
value varchar(8000)
)
AS
BEGIN
DECLARE #index int
SET #index = -1
WHILE (LEN(#text) > 0)
BEGIN
SET #index = CHARINDEX(#delimiter , #text)
IF (#index = 0) AND (LEN(#text) > 0)
BEGIN
INSERT INTO #Strings VALUES (#text)
BREAK
END
IF (#index > 1)
BEGIN
INSERT INTO #Strings VALUES (LEFT(#text, #index - 1))
SET #text = RIGHT(#text, (LEN(#text) - #index))
END
ELSE
SET #text = RIGHT(#text, (LEN(#text) - #index))
END
RETURN
END
Query
declare #keyword nvarchar(max) = 'tag,test'
select tabl1.pageName,table1.Keyword,count(keywordname.value) as MatchCount
from table1 inner join
(select value from fn_Split(#keyword,',')) as keywordname on
table1.keyword like '%'+keywordname.value+'%'
group by tabl1.pageName,table1.Keyword
order by MatchCount desc

YOu can Use CTE and Split Function using Cross Apply
;WITH CtePrimary AS(
SELECT
LTRIM(RTRIM(s.Data)) AS Keyword,PageName
FROM PageKeywords k
CROSS APPLY dbo.Split(k.Primary_Kwd, ",") s
),CteSecondry as (
select Distinct PageName,
STUFF(
(SELECT ', ' + convert(varchar(10), t2.keywords, 120)
FROM CtePrimary t2
where t1.PageName
= t2.PageName
FOR XML PATH (''))
, 1, 1, '') AS Keywords
,Count(distinct Keywords) AS KeyCount
from CtePrimary t1 where Keyword In (select Data From dbo.Split(#keywords,',' )) )
select PageName,Keywords from CteSecondry order by KeyCount desc

Query to get only numbers from a string

I have data like this:
string 1: 003Preliminary Examination Plan
string 2: Coordination005
string 3: Balance1000sheet
The output I expect is
string 1: 003
string 2: 005
string 3: 1000
And I want to implement it in SQL.

First create this UDF
CREATE FUNCTION dbo.udf_GetNumeric
(
#strAlphaNumeric VARCHAR(256)
)
RETURNS VARCHAR(256)
AS
BEGIN
DECLARE #intAlpha INT
SET #intAlpha = PATINDEX('%[^0-9]%', #strAlphaNumeric)
BEGIN
WHILE #intAlpha > 0
BEGIN
SET #strAlphaNumeric = STUFF(#strAlphaNumeric, #intAlpha, 1, '' )
SET #intAlpha = PATINDEX('%[^0-9]%', #strAlphaNumeric )
END
END
RETURN ISNULL(#strAlphaNumeric,0)
END
GO
Now use the function as
SELECT dbo.udf_GetNumeric(column_name)
from table_name
SQL FIDDLE
I hope this solved your problem.
Reference

Try this one -
Query:
DECLARE #temp TABLE
(
string NVARCHAR(50)
)
INSERT INTO #temp (string)
VALUES
('003Preliminary Examination Plan'),
('Coordination005'),
('Balance1000sheet')
SELECT LEFT(subsrt, PATINDEX('%[^0-9]%', subsrt + 't') - 1)
FROM (
SELECT subsrt = SUBSTRING(string, pos, LEN(string))
FROM (
SELECT string, pos = PATINDEX('%[0-9]%', string)
FROM #temp
) d
) t
Output:
----------
003
005
1000

Query:
DECLARE #temp TABLE
(
string NVARCHAR(50)
)
INSERT INTO #temp (string)
VALUES
('003Preliminary Examination Plan'),
('Coordination005'),
('Balance1000sheet')
SELECT SUBSTRING(string, PATINDEX('%[0-9]%', string), PATINDEX('%[0-9][^0-9]%', string + 't') - PATINDEX('%[0-9]%',
string) + 1) AS Number
FROM #temp

Please try:
declare #var nvarchar(max)='Balance1000sheet'
SELECT LEFT(Val,PATINDEX('%[^0-9]%', Val+'a')-1) from(
SELECT SUBSTRING(#var, PATINDEX('%[0-9]%', #var), LEN(#var)) Val
)x

Getting only numbers from a string can be done in a one-liner.
Try this :
SUBSTRING('your-string-here', PATINDEX('%[0-9]%', 'your-string-here'), LEN('your-string-here'))
NB: Only works for the first int in the string, ex: abc123vfg34 returns 123.

I found this approach works about 3x faster than the top voted answer. Create the following function, dbo.GetNumbers:
CREATE FUNCTION dbo.GetNumbers(#String VARCHAR(8000))
RETURNS VARCHAR(8000)
AS
BEGIN;
WITH
Numbers
AS (
--Step 1.
--Get a column of numbers to represent
--every character position in the #String.
SELECT 1 AS Number
UNION ALL
SELECT Number + 1
FROM Numbers
WHERE Number < LEN(#String)
)
,Characters
AS (
SELECT Character
FROM Numbers
CROSS APPLY (
--Step 2.
--Use the column of numbers generated above
--to tell substring which character to extract.
SELECT SUBSTRING(#String, Number, 1) AS Character
) AS c
)
--Step 3.
--Pattern match to return only numbers from the CTE
--and use STRING_AGG to rebuild it into a single string.
SELECT #String = STRING_AGG(Character,'')
FROM Characters
WHERE Character LIKE '[0-9]'
--allows going past the default maximum of 100 loops in the CTE
OPTION (MAXRECURSION 8000)
RETURN #String
END
GO
Testing
Testing for purpose:
SELECT dbo.GetNumbers(InputString) AS Numbers
FROM ( VALUES
('003Preliminary Examination Plan') --output: 003
,('Coordination005') --output: 005
,('Balance1000sheet') --output: 1000
,('(111) 222-3333') --output: 1112223333
,('1.38hello#f00.b4r#\-6') --output: 1380046
) testData(InputString)
Testing for performance:
Start off setting up the test data...
--Add table to hold test data
CREATE TABLE dbo.NumTest (String VARCHAR(8000))
--Make an 8000 character string with mix of numbers and letters
DECLARE #Num VARCHAR(8000) = REPLICATE('12tf56se',800)
--Add this to the test table 500 times
DECLARE #n INT = 0
WHILE #n < 500
BEGIN
INSERT INTO dbo.NumTest VALUES (#Num)
SET #n = #n +1
END
Now testing the dbo.GetNumbers function:
SELECT dbo.GetNumbers(NumTest.String) AS Numbers
FROM dbo.NumTest -- Time to complete: 1 min 7s
Then testing the UDF from the top voted answer on the same data.
SELECT dbo.udf_GetNumeric(NumTest.String)
FROM dbo.NumTest -- Time to complete: 3 mins 12s
Inspiration for dbo.GetNumbers
Decimals
If you need it to handle decimals, you can use either of the following approaches, I found no noticeable performance differences between them.
change '[0-9]' to '[0-9.]'
change Character LIKE '[0-9]' to ISNUMERIC(Character) = 1 (SQL treats a single decimal point as "numeric")
Bonus
You can easily adapt this to differing requirements by swapping out WHERE Character LIKE '[0-9]' with the following options:
WHERE Letter LIKE '[a-zA-Z]' --Get only letters
WHERE Letter LIKE '[0-9a-zA-Z]' --Remove non-alphanumeric
WHERE Letter LIKE '[^0-9a-zA-Z]' --Get only non-alphanumeric

With the previous queries I get these results:
'AAAA1234BBBB3333' >>>> Output: 1234
'-çã+0!\aº1234' >>>> Output: 0
The code below returns All numeric chars:
1st output: 12343333
2nd output: 01234
declare #StringAlphaNum varchar(255)
declare #Character varchar
declare #SizeStringAlfaNumerica int
declare #CountCharacter int
set #StringAlphaNum = 'AAAA1234BBBB3333'
set #SizeStringAlfaNumerica = len(#StringAlphaNum)
set #CountCharacter = 1
while isnumeric(#StringAlphaNum) = 0
begin
while #CountCharacter < #SizeStringAlfaNumerica
begin
if substring(#StringAlphaNum,#CountCharacter,1) not like '[0-9]%'
begin
set #Character = substring(#StringAlphaNum,#CountCharacter,1)
set #StringAlphaNum = replace(#StringAlphaNum, #Character, '')
end
set #CountCharacter = #CountCharacter + 1
end
set #CountCharacter = 0
end
select #StringAlphaNum

declare #puvodni nvarchar(20)
set #puvodni = N'abc1d8e8ttr987avc'
WHILE PATINDEX('%[^0-9]%', #puvodni) > 0 SET #puvodni = REPLACE(#puvodni, SUBSTRING(#puvodni, PATINDEX('%[^0-9]%', #puvodni), 1), '' )
SELECT #puvodni

A solution for SQL Server 2017 and later, using TRANSLATE:
DECLARE #T table (string varchar(50) NOT NULL);
INSERT #T
(string)
VALUES
('003Preliminary Examination Plan'),
('Coordination005'),
('Balance1000sheet');
SELECT
result =
REPLACE(
TRANSLATE(
T.string COLLATE Latin1_General_CI_AI,
'abcdefghijklmnopqrstuvwxyz',
SPACE(26)),
SPACE(1),
SPACE(0))
FROM #T AS T;
Output:
result
003
005
1000
The code works by:
Replacing characters a-z (ignoring case & accents) with a space
Replacing spaces with an empty string.
The string supplied to TRANSLATE can be expanded to include additional characters.

I did not have rights to create functions but had text like
["blahblah012345679"]
And needed to extract the numbers out of the middle
Note this assumes the numbers are grouped together and not at the start and end of the string.
select substring(column_name,patindex('%[0-9]%', column_name),patindex('%[0-9][^0-9]%', column_name)-patindex('%[0-9]%', column_name)+1)
from table name

Although this is an old thread its the first in google search, I came up with a different answer than what came before. This will allow you to pass your criteria for what to keep within a string, whatever that criteria might be. You can put it in a function to call over and over again if you want.
declare #String VARCHAR(MAX) = '-123. a 456-78(90)'
declare #MatchExpression VARCHAR(255) = '%[0-9]%'
declare #return varchar(max)
WHILE PatIndex(#MatchExpression, #String) > 0
begin
set #return = CONCAT(#return, SUBSTRING(#string,patindex(#matchexpression, #string),1))
SET #String = Stuff(#String, PatIndex(#MatchExpression, #String), 1, '')
end
select (#return)

This UDF will work for all types of strings:
CREATE FUNCTION udf_getNumbersFromString (#string varchar(max))
RETURNS varchar(max)
AS
BEGIN
WHILE #String like '%[^0-9]%'
SET #String = REPLACE(#String, SUBSTRING(#String, PATINDEX('%[^0-9]%', #String), 1), '')
RETURN #String
END

Just a little modification to #Epsicron 's answer
SELECT SUBSTRING(string, PATINDEX('%[0-9]%', string), PATINDEX('%[0-9][^0-9]%', string + 't') - PATINDEX('%[0-9]%',
string) + 1) AS Number
FROM (values ('003Preliminary Examination Plan'),
('Coordination005'),
('Balance1000sheet')) as a(string)
no need for a temporary variable

Firstly find out the number's starting length then reverse the string to find out the first position again(which will give you end position of number from the end). Now if you deduct 1 from both number and deduct it from string whole length you'll get only number length. Now get the number using SUBSTRING
declare #fieldName nvarchar(100)='AAAA1221.121BBBB'
declare #lenSt int=(select PATINDEX('%[0-9]%', #fieldName)-1)
declare #lenEnd int=(select PATINDEX('%[0-9]%', REVERSE(#fieldName))-1)
select SUBSTRING(#fieldName, PATINDEX('%[0-9]%', #fieldName), (LEN(#fieldName) - #lenSt -#lenEnd))

T-SQL function to read all the integers from text and return the one at the indicated index, starting from left or right, also using a starting search term (optional):
create or alter function dbo.udf_number_from_text(
#text nvarchar(max),
#search_term nvarchar(1000) = N'',
#number_position tinyint = 1,
#rtl bit = 0
) returns int
as
begin
declare #result int = 0;
declare #search_term_index int = 0;
if #text is null or len(#text) = 0 goto exit_label;
set #text = trim(#text);
if len(#text) = len(#search_term) goto exit_label;
if len(#search_term) > 0
begin
set #search_term_index = charindex(#search_term, #text);
if #search_term_index = 0 goto exit_label;
end;
if #search_term_index > 0
if #rtl = 0
set #text = trim(right(#text, len(#text) - #search_term_index - len(#search_term) + 1));
else
set #text = trim(left(#text, #search_term_index - 1));
if len(#text) = 0 goto exit_label;
declare #patt_number nvarchar(10) = '%[0-9]%';
declare #patt_not_number nvarchar(10) = '%[^0-9]%';
declare #number_start int = 1;
declare #number_end int;
declare #found_numbers table (id int identity(1,1), val int);
while #number_start > 0
begin
set #number_start = patindex(#patt_number, #text);
if #number_start > 0
begin
if #number_start = len(#text)
begin
insert into #found_numbers(val)
select cast(substring(#text, #number_start, 1) as int);
break;
end;
else
begin
set #text = right(#text, len(#text) - #number_start + 1);
set #number_end = patindex(#patt_not_number, #text);
if #number_end = 0
begin
insert into #found_numbers(val)
select cast(#text as int);
break;
end;
else
begin
insert into #found_numbers(val)
select cast(left(#text, #number_end - 1) as int);
if #number_end = len(#text)
break;
else
begin
set #text = trim(right(#text, len(#text) - #number_end));
if len(#text) = 0 break;
end;
end;
end;
end;
end;
if #rtl = 0
select #result = coalesce(a.val, 0)
from (select row_number() over (order by m.id asc) as c_row, m.val
from #found_numbers as m) as a
where a.c_row = #number_position;
else
select #result = coalesce(a.val, 0)
from (select row_number() over (order by m.id desc) as c_row, m.val
from #found_numbers as m) as a
where a.c_row = #number_position;
exit_label:
return #result;
end;
Example:
select dbo.udf_number_from text(N'Text text 10 text, 25 term', N'term',2,1);
returns 10;

This is one of the simplest and easiest one. This will work on the entire String for multiple occurences as well.
CREATE FUNCTION dbo.fn_GetNumbers(#strInput NVARCHAR(500))
RETURNS NVARCHAR(500)
AS
BEGIN
DECLARE #strOut NVARCHAR(500) = '', #intCounter INT = 1
WHILE #intCounter <= LEN(#strInput)
BEGIN
SELECT #strOut = #strOut + CASE WHEN SUBSTRING(#strInput, #intCounter, 1) LIKE '[0-9]' THEN SUBSTRING(#strInput, #intCounter, 1) ELSE '' END
SET #intCounter = #intCounter + 1
END
RETURN #strOut
END

Following a solution using a single common table expression (CTE).
DECLARE #s AS TABLE (id int PRIMARY KEY, value nvarchar(max));
INSERT INTO #s
VALUES
(1, N'003Preliminary Examination Plan'),
(2, N'Coordination005'),
(3, N'Balance1000sheet');
SELECT * FROM #s ORDER BY id;
WITH t AS (
SELECT
id,
1 AS i,
SUBSTRING(value, 1, 1) AS c
FROM
#s
WHERE
LEN(value) > 0
UNION ALL
SELECT
t.id,
t.i + 1 AS i,
SUBSTRING(s.value, t.i + 1, 1) AS c
FROM
t
JOIN #s AS s ON t.id = s.id
WHERE
t.i < LEN(s.value)
)
SELECT
id,
STRING_AGG(c, N'') WITHIN GROUP (ORDER BY i ASC) AS value
FROM
t
WHERE
c LIKE '[0-9]'
GROUP BY
id
ORDER BY
id;

DECLARE #index NVARCHAR(20);
SET #index = 'abd565klaf12';
WHILE PATINDEX('%[0-9]%', #index) != 0
BEGIN
SET #index = REPLACE(#index, SUBSTRING(#index, PATINDEX('%[0-9]%', #index), 1), '');
END
SELECT #index;
One can replace [0-9] with [a-z] if numbers only are wanted with desired castings using the CAST function.

If we use the User Define Function, the query speed will be greatly reduced. This code extracts the number from the string....
SELECT
Reverse(substring(Reverse(rtrim(ltrim( substring([FieldName] , patindex('%[0-9]%', [FieldName] ) , len([FieldName]) )))) , patindex('%[0-9]%', Reverse(rtrim(ltrim( substring([FieldName] , patindex('%[0-9]%', [FieldName] ) , len([FieldName]) )))) ), len(Reverse(rtrim(ltrim( substring([FieldName] , patindex('%[0-9]%', [FieldName] ) , len([FieldName]) ))))) )) NumberValue
FROM dbo.TableName

CREATE OR REPLACE FUNCTION count_letters_and_numbers(input_string TEXT)
RETURNS TABLE (letters INT, numbers INT) AS $$
BEGIN
RETURN QUERY SELECT
sum(CASE WHEN input_string ~ '[A-Za-z]' THEN 1 ELSE 0 END) as letters,
sum(CASE WHEN input_string ~ '[0-9]' THEN 1 ELSE 0 END) as numbers
FROM unnest(string_to_array(input_string, '')) as input_string;
END;
$$ LANGUAGE plpgsql;

For the hell of it...
This solution is different to all earlier solutions, viz:
There is no need to create a function
There is no need to use pattern matching
There is no need for a temporary table
This solution uses a recursive common table expression (CTE)
But first - note the question does not specify where such strings are stored. In my solution below, I create a CTE as a quick and dirty way to put these strings into some kind of "source table".
Note also - this solution uses a recursive common table expression (CTE) - so don't get confused by the usage of two CTEs here. The first is simply to make the data avaliable to the solution - but it is only the second CTE that is required in order to solve this problem. You can adapt the code to make this second CTE query your existing table, view, etc.
Lastly - my coding is verbose, trying to use column and CTE names that explain what is going on and you might be able to simplify this solution a little. I've added in a few pseudo phone numbers with some (expected and atypical, as the case may be) formatting for the fun of it.
with SOURCE_TABLE as (
select '003Preliminary Examination Plan' as numberString
union all select 'Coordination005' as numberString
union all select 'Balance1000sheet' as numberString
union all select '1300 456 678' as numberString
union all select '(012) 995 8322 ' as numberString
union all select '073263 6122,' as numberString
),
FIRST_CHAR_PROCESSED as (
select
len(numberString) as currentStringLength,
isNull(cast(try_cast(replace(left(numberString, 1),' ','z') as tinyint) as nvarchar),'') as firstCharAsNumeric,
cast(isNull(cast(try_cast(nullIf(left(numberString, 1),'') as tinyint) as nvarchar),'') as nvarchar(4000)) as newString,
cast(substring(numberString,2,len(numberString)) as nvarchar) as remainingString
from SOURCE_TABLE
union all
select
len(remainingString) as currentStringLength,
cast(try_cast(replace(left(remainingString, 1),' ','z') as tinyint) as nvarchar) as firstCharAsNumeric,
cast(isNull(newString,'') as nvarchar(3999)) + isNull(cast(try_cast(nullIf(left(remainingString, 1),'') as tinyint) as nvarchar(1)),'') as newString,
substring(remainingString,2,len(remainingString)) as remainingString
from FIRST_CHAR_PROCESSED fcp2
where fcp2.currentStringLength > 1
)
select
newString
,* -- comment this out when required
from FIRST_CHAR_PROCESSED
where currentStringLength = 1
So what's going on here?
Basically in our CTE we are selecting the first character and using try_cast (see docs) to cast it to a tinyint (which is a large enough data type for a single-digit numeral). Note that the type-casting rules in SQL Server say that an empty string (or a space, for that matter) will resolve to zero, so the nullif is added to force spaces and empty strings to resolve to null (see discussion) (otherwise our result would include a zero character any time a space is encountered in the source data).
The CTE also returns everything after the first character - and that becomes the input to our recursive call on the CTE; in other words: now let's process the next character.
Lastly, the field newString in the CTE is generated (in the second SELECT) via concatenation. With recursive CTEs the data type must match between the two SELECT statements for any given column - including the column size. Because we know we are adding (at most) a single character, we are casting that character to nvarchar(1) and we are casting the newString (so far) as nvarchar(3999). Concatenated, the result will be nvarchar(4000) - which matches the type casting we carry out in the first SELECT.
If you run this query and exclude the WHERE clause, you'll get a sense of what's going on - but the rows may be in a strange order. (You won't necessarily see all rows relating to a single input value grouped together - but you should still be able to follow).
Hope it's an interesting option that may help a few people wanting a strictly expression-based solution.

In Oracle
You can get what you want using this:
SUBSTR('ABCD1234EFGH',REGEXP_INSTR ('ABCD1234EFGH', '[[:digit:]]'),REGEXP_COUNT ('ABCD1234EFGH', '[[:digit:]]'))
Sample Query:
SELECT SUBSTR('003Preliminary Examination Plan ',REGEXP_INSTR ('003Preliminary Examination Plan ', '[[:digit:]]'),REGEXP_COUNT ('003Preliminary Examination Plan ', '[[:digit:]]')) SAMPLE1,
SUBSTR('Coordination005',REGEXP_INSTR ('Coordination005', '[[:digit:]]'),REGEXP_COUNT ('Coordination005', '[[:digit:]]')) SAMPLE2,
SUBSTR('Balance1000sheet',REGEXP_INSTR ('Balance1000sheet', '[[:digit:]]'),REGEXP_COUNT ('Balance1000sheet', '[[:digit:]]')) SAMPLE3 FROM DUAL

If you are using Postgres and you have data like '2000 - some sample text' then try substring and position combination, otherwise if in your scenario there is no delimiter, you need to write regex:
SUBSTRING(Column_name from 0 for POSITION('-' in column_name) - 1) as
number_column_name

Get the first letter of each word in a SQL string [duplicate]

This question already has answers here:
Closed 11 years ago.
Possible Duplicate:
sql to pick apart a string of a persons name and output the initials
In MS-SQL Server, there is a way to get the first letter of each word in a string? For example:
Name:
Michael Joseph Jackson
Query:
SELECT name, [function] as initial FROM Customers
Result:
MJJ

This function will shield your results against multiple sequential spaces in the source string:
CREATE FUNCTION dbo.fnFirsties ( #str NVARCHAR(4000) )
RETURNS NVARCHAR(2000)
AS
BEGIN
DECLARE #retval NVARCHAR(2000);
SET #str=RTRIM(LTRIM(#str));
SET #retval=LEFT(#str,1);
WHILE CHARINDEX(' ',#str,1)>0 BEGIN
SET #str=LTRIM(RIGHT(#str,LEN(#str)-CHARINDEX(' ',#str,1)));
SET #retval+=LEFT(#str,1);
END
RETURN #retval;
END
GO
SELECT dbo.fnFirsties('Michael Joseph Jackson');
SELECT dbo.fnFirsties(' Michael Joseph Jackson '); -- multiple space protection :)
Results:
MJJ
MJJ

Assuming we're doing this in MSSQL2008R2 though nothing involved should really matter here. All we do is have some fun with string manipulation. You could put this into a funciton or proc or just run it in query analyzer directly.
DECLARE #str varchar(250) = 'Michael Joseph Jackson'
DECLARE #initials varchar(250) = substring(#str,1,1)
WHILE(charindex(' ',#str)!=0)
BEGIN
DECLARE #currentSpace int = charindex(' ',#str)
SET #initials += substring(#str,#currentSpace+1,1)
SET #str = substring(#str,#currentSpace+1,len(#str))
END
SELECT #initials
If you're not doing this for some trivial purpose you'll likely want to clean up the data before attempting to process it. Names are often prefixed by titles, data entry fields are susceptible to user error, etc.

You'll want to add some checks and error handling before you update tblStudents or something, but this should get you started.
CREATE FUNCTION initials ( #s AS nvarchar(4000))
RETURNS nvarchar(100)
AS
BEGIN
DECLARE #i nvarchar(100) = LEFT(#s, 1); -- first char in string
DECLARE #p int = CHARINDEX(' ', #s); -- location of first space
WHILE (#p > 0) -- while a space has been found
BEGIN
SET #i = #i + SUBSTRING(#s, #p + 1, 1) -- add char after space
SET #p = CHARINDEX(' ', #s, #p + 1); -- find next space
END
RETURN #i
END
GO
SELECT dbo.initials('Michael Joseph Jackson');

You first need a table-valued function that splits a varchar and returns a table with a single-column called 'S'.
CREATE FUNCTION dbo.fn_Split2 (#sep nvarchar(10), #s nvarchar(4000))
RETURNS table
AS
RETURN (
WITH Pieces(pn, start, stop) AS (
SELECT 1, 1, CHARINDEX(#sep, #s)
UNION ALL
SELECT pn + 1, stop + (datalength(#sep)/2), CHARINDEX(#sep, #s, stop + (datalength(#sep)/2))
FROM Pieces
WHERE stop > 0
)
SELECT pn,
SUBSTRING(#s, start, CASE WHEN stop > 0 THEN stop-start ELSE 4000 END) AS s
FROM Pieces
)
Getting the initials is easy now:
DECLARE #Initials VARCHAR(8000)
SELECT #Initials = COALESCE(#Initials, '') + SUBSTRING(s, 1, 1) FROM dbo.fn_Split2(' ', 'Michael Joseph Jackson')
SELECT #Initials
That returns 'MJJ', as required.

SUBSTRING( string, startpos, endpos ) AS 'Initial'

Split string and return data in multiple columns

First of all many thanks to the site creator and most importantly helping guru's on this site.
I have the same problem splitting string from a field and displaying it in multiple columns example my table has got three columns
dbo.tests
Fname ID wTest Loc
ABC 1 "XYZ,PTO,LKMD,HGGFFD," R1
BCE 2 "PTO,XYZ,LKMD,," R1
LKJ 3 "XYZ" R3
JKL 4 "XYZ,PTO,LKMD,HGGFFD,PKL" R2
The output for the select statement should display the data as follows: (Dynamically generate number of columns based on maximum columns required from wTest string and fill the empty columsn with null or some value.
Returns:
Fname ID Loc wTest wTest1 wTest2,wTest3,Wtest4...
ABC 1 R1 XYZ PTO LKMD HGGFFD Null
BCE 2 R1 PTO XYZ LKMD Null Null
LKJ 3 R3 XYZ Null Null Null Null
JKL 4 R2 XYZ PTO LKMD HGGFFD PKL
Two close function I came accross are as follows:
CREATE FUNCTION dbo.Split (#sep char(1), #s varchar(512))
RETURNS table AS RETURN
(
WITH Pieces (pn, start, stop) AS
(
SELECT 1, 1, CHARINDEX(#sep, #s)
UNION ALL
SELECT pn + 1, stop + 1,
CHARINDEX(#sep, #s, stop + 1)
FROM Pieces
WHERE stop > 0 )
SELECT pn, SUBSTRING(#s, start, CASE WHEN stop > 0 THEN stop-start ELSE 512 END) AS s FROM Pieces )
with testTable AS
( SELECT 1 AS Id, N'how now brown cow' AS txt
UNION ALL
SELECT 2, N'she sells sea shells upon the sea shore' UNION ALL
SELECT 3, N'red lorry yellow lorry' UNION ALL
SELECT 4, N'the quick brown fox jumped over the lazy dog' )
SELECT display_term, COUNT(*) As Cnt
FROM testTable CROSS APPLY sys.dm_fts_parser('"' + txt + '"', 1033, 0,0)
GROUP BY display_term
HAVING COUNT(*) > 1 ORDER BY Cnt DESC
Any help in this regard is highly appreciated.
Zain...
zainali2006#hotmail.co.uk

Someone suggested this, but I am having difficulties applying for my purpose....
Returns #Tbl_IDs
Table (Id int identity(1,1),
Data Varchar(500)) As
Begin
--Remove the leading delimiter if any
while (substring(#IDs,1,1) =#Delimiter)
set #IDs = substring(#IDs, 2,len(#IDs)-1)
-- Append comma
--Set #IDs = #IDs + #Delimiter
set #IDs = REPLACE(RTRIM(LTRIM(REPLACE(#IDs,#Delimiter,' '))),' ',#Delimiter)
-- Indexes to keep the position of searching
Declare #Pos1 Int
Declare #pos2 Int
Declare #RowNum Int
-- Start from first character
Set #Pos1=1
Set #Pos2=1
While #Pos1>0
Begin
Set #Pos1 = CharIndex(#Delimiter,#IDs,#Pos1)
Insert #Tbl_IDs Values (Substring(#IDs,#Pos2,#Pos1-#Pos2))
-- Go to next non comma character
Set #Pos2=#Pos1+1
-- Search from the next charcater
Set #Pos1 = #Pos1+1
End
Return
End

Another one I came across quite interesting and simple but not sure how to use in my select statement:
DECLARE #NextString NVARCHAR(40)
DECLARE #Pos INT
DECLARE #NextPos INT
DECLARE #String NVARCHAR(40)
DECLARE #Delimiter NVARCHAR(40)
SET #String ='SQL,TUTORIALS,,TCF'
SET #Delimiter = ','
SET #String = #String + #Delimiter
SET #Pos = charindex(#Delimiter,#String)
WHILE (#pos <> 0)
BEGIN
SET #NextString = substring(#String,1,#Pos - 1)
SELECT #NextString -- Show Results
SET #String = substring(#String,#pos+1,len(#String))
SET #pos = charindex(#Delimiter,#String)
END

SQL:Casting a String to IDS with IN clause

DECLARE #STR_IDS VARCHAR(15)
SET #STR_IDS='7,15,18'
UPDATE TBL_USERS WHERE ID IN #STR_IDS
I know the update statement would not work as the ID is of type INT and i am replacing a varachar value there .How can i change the query so that it will be executed like this in effect ?
UPDATE TBL_USERS WHERE ID IN (7,15,18)
Thanks in advace

Op doesn't mention database, so I'll just use SQL Server, because the example SQL in the question looks like TSQL. There are many ways to split string in SQL Server. This article covers the PROs and CONs of just about every method:
"Arrays and Lists in SQL Server 2005 and Beyond, When Table Value Parameters Do Not Cut it" by Erland Sommarskog
You need to create a split function. This is how a split function can be used:
SELECT
*
FROM YourTable y
INNER JOIN dbo.yourSplitFunction(#Parameter) s ON y.ID=s.Value
I prefer the number table approach to split a string in TSQL but there are numerous ways to split strings in SQL Server, see the previous link, which explains the PROs and CONs of each.
For the Numbers Table method to work, you need to do this one time table setup, which will create a table Numbers that contains rows from 1 to 10,000:
SELECT TOP 10000 IDENTITY(int,1,1) AS Number
INTO Numbers
FROM sys.objects s1
CROSS JOIN sys.objects s2
ALTER TABLE Numbers ADD CONSTRAINT PK_Numbers PRIMARY KEY CLUSTERED (Number)
Once the Numbers table is set up, create this split function:
CREATE FUNCTION [dbo].[FN_ListToTable]
(
#SplitOn char(1) --REQUIRED, the character to split the #List string on
,#List varchar(8000)--REQUIRED, the list to split apart
)
RETURNS TABLE
AS
RETURN
(
----------------
--SINGLE QUERY-- --this will not return empty rows
----------------
SELECT
ListValue
FROM (SELECT
LTRIM(RTRIM(SUBSTRING(List2, number+1, CHARINDEX(#SplitOn, List2, number+1)-number - 1))) AS ListValue
FROM (
SELECT #SplitOn + #List + #SplitOn AS List2
) AS dt
INNER JOIN Numbers n ON n.Number < LEN(dt.List2)
WHERE SUBSTRING(List2, number, 1) = #SplitOn
) dt2
WHERE ListValue IS NOT NULL AND ListValue!=''
);
GO
You can now easily split a CSV string into a table and join on it or use it however you need, even from within dynamic sql. Here is how to use it from your question:
UPDATE t
SET Col1=...
FROM dbo.FN_ListToTable(',','7,15,18') dt
INNER JOIN TBL_USERS t ON CAST(dt.value AS INT)=t.id

Lately I prefer to use User-Defined Table Types to pass lists of parameters, but I used to use this utility function:
CREATE FUNCTION [dbo].[fn_CommasToIntTable]
(
#CommaList varchar(8000)
)
RETURNS #ParsedList TABLE
(
TokenID int
)
AS
BEGIN
DECLARE #CurrentToken varchar(10)
DECLARE #Pos int
SET #CommaList = LTRIM(RTRIM(#CommaList))+ ','
SET #Pos = CHARINDEX(',', #CommaList, 1)
IF REPLACE(#CommaList, ',', '') <> ''
BEGIN
WHILE #Pos > 0
BEGIN
SET #CurrentToken = LTRIM(RTRIM(LEFT(#CommaList, #Pos - 1)))
IF #CurrentToken <> ''
BEGIN
INSERT #ParsedList (TokenID)
VALUES (CAST(#CurrentToken AS int))
END
SET #CommaList = RIGHT(#CommaList, LEN(#CommaList) - #Pos)
SET #Pos = CHARINDEX(',', #CommaList, 1)
END
END
RETURN
END
You'd use it like this:
SELECT (Columns)
FROM (Table)
WHERE ID IN (SELECT TokenID FROM dbo.fn_CommasToIntTable(#idList))

Stolen from here:
CREATE FUNCTION fn_Split(#text varchar(8000), #delimiter varchar(20) = ' ')
RETURNS #Strings TABLE
(
position int IDENTITY PRIMARY KEY,
value varchar(8000)
)
AS
BEGIN
DECLARE #index int
SET #index = -1
WHILE (LEN(#text) > 0)
BEGIN
SET #index = CHARINDEX(#delimiter , #text)
IF (#index = 0) AND (LEN(#text) > 0)
BEGIN
INSERT INTO #Strings VALUES (#text)
BREAK
END
IF (#index > 1)
BEGIN
INSERT INTO #Strings VALUES (LEFT(#text, #index - 1))
SET #text = RIGHT(#text, (LEN(#text) - #index))
END
ELSE
SET #text = RIGHT(#text, (LEN(#text) - #index))
END
RETURN
END
The just do:
UPDATE tbl_users
SET ...
FROM fn_split('7,15,18', ',') q
JOIN tbl_users
ON id = CAST(q.value AS INT)

for mysql its really easy
just use FIND_IN_SET function
UPDATE TBL_USERS WHERE FIND_IN_SET ( id , '7,15,18')

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

Find the missing words - sql

Related

SQL Search/Compare Keywords and Orderby similarity

Query to get only numbers from a string

Get the first letter of each word in a SQL string [duplicate]

Split string and return data in multiple columns

SQL:Casting a String to IDS with IN clause

Categories

Resources