Remove Substring according to specific pattern - sql

I need to remove in a SQL Server database a substring according to a pattern:
Before: Winter_QZ6P91712017_115BPM
After: Winter_115BPM
Or
Before: cpx_Note In My Calendar_QZ6P91707044
After: cpx_Note In My Calendar
Basically delete the substring that has pattern _ + 12 chars.
I've tried PatIndex('_\S{12}', myCol) to get the index of the substring but it doesn't match anything.

Assuming you mean underscore followed by 12 characters that are not underscores you can use this pattern:
SELECT *,
CASE WHEN PATINDEX('%[_][^_][^_][^_][^_][^_][^_][^_][^_][^_][^_][^_][^_]%', str) > 0
THEN STUFF(str, PATINDEX('%[_][^_][^_][^_][^_][^_][^_][^_][^_][^_][^_][^_][^_]%', str), 13, '')
ELSE str
END
FROM (VALUES
('Winter_QZ6P91712017_115BPM'),
('Winter_115BPM_QZ6P91712017')
) AS tests(str)

late to the party, but you could also use latest STRING_SPLIT function to explode the string by underscores and count length of each segment between underscores. If the length is >=12, these sections must be replaced from original string via replace function recursively.
drop table if exists Tbl;
drop table if exists #temptable;
create table Tbl (input nvarchar(max));
insert into Tbl VALUES
('Winter_QZ6P91712017_115BPM'),
('cpx_Note In My Calendar_QZ6P91707044'),
('stuff_asdasd_QZ6P91712017'),
('stuff_asdasd_QZ6P91712017_stuff_asdasd_QZ6P91712017'),
('stuff_asdasd_QZ6P917120117_stuff_asdasd_QZ6P91712017');
select
input, value as replacethisstring,
rn = row_number() over (partition by input order by (select 1))
into #temptable
from
(
select
input,value as hyphensplit
from Tbl
cross apply string_split(input,'_')
)T cross apply string_split(hyphensplit,' ')
where len(value)>=12
; with cte as (
select input, inputtrans= replace(input,replacethisstring,''), level=1 from #temptable where rn=1
union all
select T.input,inputtrans=replace(cte.inputtrans,T.replacethisstring,''),level=level+1
from cte inner join #temptable T on T.input=cte.input and rn=level+1
--where level=rn
)
select input, inputtrans
from (
select *, rn=row_number() over (partition by input order by level desc) from cte
) T where rn=1
sample output

SQL Server doesn't support Regex. Considering, however, you just want to remove the first '_' and the 12 characters afterwards, you could use CHARINDEX to find the location of said underscore, and then STUFF to remove the 13 characters:
SELECT V.YourString,
STUFF(V.YourString, CHARINDEX('_',V.YourString),13,'') AS NewString
FROM (VALUES('Winter_QZ6P91712017_115BPM'))V(YourString);

Related

How to convert fields to JSON in Postgresql

I have a table with the following schema (postgresql 14):
message sentiment classification
any text positive mobile, communication
message are only string, phrases.
sentiment is a string, only one word
classification are string but can have 1 to many word comma separated
I would like to create a json field with these columns, like this:
{"msg":"any text", "sentiment":"positive","classification":["mobile,"communication"]}
Also, if possible, is there a way to consider the classification this way:
{"msg":"any text", "sentiment":"positive","classification 1":"mobile","classification 2" communication"}
The first part of question is easy - Postgres provides functions for splitting string and converting to json:
with t(message, sentiment, classification) as (values
('any text','positive','mobile, communication')
)
select row_to_json(x.*)
from (
select t.message
, t.sentiment
, array_to_json(string_to_array(t.classification, ', ')) as classification
from t
) x
The second part is harder - your want json to have variable number of attributes, mixed of grouped and nongrouped data. I suggest to unwind all attributes and then assemble them back (note the numbered CTE is actually not needed if your real table has id - I just needed some column to group by):
with t(message, sentiment, classification) as (values
('any text','positive','mobile, communication')
)
, numbered (id, message, sentiment, classification) as (
select row_number() over (order by null)
, t.*
from t
)
, extracted (id,message,sentiment,classification,index) as (
select n.id
, n.message
, n.sentiment
, l.c
, l.i
from numbered n
join lateral unnest(string_to_array(n.classification, ', ')) with ordinality l(c,i) on true
), unioned (id, attribute, value) as (
select id, concat('classification ', index::text), classification
from extracted
union all
select id, 'message', message
from numbered
union all
select id, 'sentiment', sentiment
from numbered
)
select json_object_agg(attribute, value)
from unioned
group by id;
DB fiddle
Use jsonb_build_object and concatenate the columns you want
SELECT
jsonb_build_object(
'msg',message,
'sentiment',sentiment,
'classification',
string_to_array(classification,','))
FROM mytable;
Demo: db<>fiddle
The second output is definitely not trivial. The SQL code would be much larger and harder to maintain - not to mention that parsing such file also requires a little more effort.
You can use a cte to handle the flattening of the classification attributes and then perform the necessary grouping in the main queries for each problem component:
with cte(r, m, s, k) as (
select row_number() over (order by t.message), t.message, t.sentiment, v.* from tbl t
cross join json_array_elements(array_to_json(string_to_array(t.classification, ', '))) v
)
-- first part --
select json_build_object('msg', t1.message, 'sentiment', t1.sentiment, 'classification', string_to_array(t1.classification, ', ')) from tbl t1
-- second part --
select jsonb_build_object('msg', t1.m, 'sentiment', t1.s)||('{'||t1.g||'}')::jsonb
from (select c.m, c.s, array_to_string(array_agg('"classification '||c.r||'":'||c.k), ', ') g
from cte c group by c.m, c.s) t1

Change the casing of my letters in string using sql function

I want to convert my string into opposite casing in sql using function.
can someone help me??
I am able to do the first letter of the string but not middle letters
http://www.sql-server-helper.com/functions/initcap.aspx
Example:
input:
"hI mY Name IS Joe"
output:
"Hi My nAME is jOE"
If your sql-server version support (sql-server-2017 and above), you can use TRANSLATE function
SELECT TRANSLATE ('hI mY Name IS Joe' COLLATE Latin1_general_CS_AS
,'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
,'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')
Result:
Hi My nAME is jOE
A completely set-based approach:
DECLARE #TheLinkTable TABLE(ID INT IDENTITY,YourText NVARCHAR(1000));
INSERT INTO #TheLinkTable VALUES('hI mY Name IS Joe');
The query:
WITH cte AS
(
SELECT t.ID
,t.YourText
,A.Nmbr
,C.SwitchedLetter
FROM #TheLinkTable t
CROSS APPLY(SELECT TOP(LEN(t.YourText)) ROW_NUMBER() OVER(ORDER BY (SELECT NULL)) FROM master..spt_values) A(Nmbr)
CROSS APPLY(SELECT SUBSTRING(t.YourText,A.Nmbr,1)) B(TheLetter)
CROSS APPLY(SELECT CASE WHEN TheLetter LIKE '[a-zA-Z]'
THEN CHAR(ASCII(TheLetter) ^ 0x20)
ELSE CASE WHEN TheLetter=' ' THEN TheLetter END END) C(SwitchedLetter)
)
SELECT cte1.ID
,cte1.YourText
,(
SELECT SwitchedLetter AS [*]
FROM cte cte2
WHERE cte2.ID=cte1.ID
ORDER BY cte2.Nmbr
FOR XML PATH(''),TYPE).value('.','nvarchar(max)'
)
FROM cte cte1
GROUP BY cte1.ID,cte1.YourText;
The idea in short:
Using a tally-on-the-fly (in this case a ROW_NUMBER() against any bigger set with a computed TOP-clause we get a running number from 1 to n where n is the count of letters.
The second APPLY will pick each letter separately.
The third apply will switch the casing of letters from a to z simply by XORing the binary representation and re-set the significant BIT. A blank is returned as is.
The following SELECT will group by the ID and use a correlated sub-query to reconcatenate the string.
Another set-based approach implies a recursive CTE:
WITH recCTE AS
(
SELECT 1 AS position
,YourText
,CAST(CASE WHEN ASCII(TheLetter) BETWEEN 65 AND 90 THEN LOWER(TheLetter)
ELSE CASE WHEN ASCII(TheLetter) BETWEEN 97 AND 122 THEN UPPER(TheLetter) END END AS NVARCHAR(MAX)) AS SwitchLetter
FROM #TheLinkTable
CROSS APPLY(SELECT SUBSTRING(YourText,1,1)) A(TheLetter)
UNION ALL
SELECT r.position+1
,YourText
,CONCAT(r.SwitchLetter
,CASE WHEN ASCII(TheLetter) BETWEEN 65 AND 90 THEN LOWER(TheLetter)
ELSE CASE WHEN ASCII(TheLetter) BETWEEN 97 AND 122 THEN UPPER(TheLetter)
ELSE TheLetter END END) AS SwitchLetter
FROM recCTE r
CROSS APPLY(SELECT SUBSTRING(YourText,r.position+1,1)) A(TheLetter)
WHERE r.position<LEN(YourText)
)
SELECT * FROM recCte;
You must add a WHERE to pick the last one (e.g. LEN(SwitchLetter)=LEN(YourText)). I left it aside to show how it's working.

sql server string split last but one

Table has a column with values
ColA
------
a.b.c.d.e (car.make.model, car.la, kg)
ab.cd.ef (car.make.model)
a1.b2.c3.d4.e5(car.make.model, car.la, kg, av.vc.de)
I want to write a sql query to split the ColA by delimiter "." and pick last but one.
Expected output
Result
------
d
cd
d4
I have tried ParseName but dont see option to pick last but one.
Thank you
Using Jeff Moden's DelimitedSplit8K:
USE Sandbox;
GO
CREATE TABLE #Sample (ColA varchar(500));
GO
INSERT INTO #Sample
VALUES ('a.b.c.d.e'),
('ab.cd.ef'),
('a1.b2.c3.d4.e5');
GO
SELECT *
FROM #Sample;
WITH Split AS(
SELECT S.ColA,
DS.*,
MAX(DS.ItemNumber) OVER (PARTITION BY S.ColA) AS Items
FROM #Sample S
CROSS APPLY DelimitedSplit8K(S.ColA,'.') DS)
SELECT Item
FROM Split
WHERE ItemNumber = Items - 1;
GO
DROP TABLE #Sample
Ideally, though, don't store your data in a delimited format. :)
Just to play around using STRING_SPLIT:
SELECT ColA, t.value
FROM table1
CROSS APPLY(SELECT value,
COUNT(*) OVER () as cnt,
ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS rn
FROM STRING_SPLIT(ColA, '.')) AS t
WHERE t.rn = t.cnt - 1
Note: The function is available from SQL Server 2016.
Note 2: The query works provided that the function returns each value in the same order as the one it appears inside the string.
Why not simply using substring?
DECLARE #ColA NVARCHAR(100) = 'a1.b2.c3.d4.e5(car.make.model, car.la, kg, av.vc.de)';
SELECT REVERSE(LEFT(RIGHT(REVERSE(LEFT(#ColA, CHARINDEX('(', #ColA)-1)), LEN(LEFT(#ColA, CHARINDEX('(', #ColA)-1))-CHARINDEX('.',REVERSE(LEFT(#ColA, CHARINDEX('(', #ColA)-1)))), CHARINDEX('.',RIGHT(REVERSE(LEFT(#ColA, CHARINDEX('(', #ColA)-1)), LEN(LEFT(#ColA, CHARINDEX('(', #ColA)-1))-CHARINDEX('.',REVERSE(LEFT(#ColA, CHARINDEX('(', #ColA)-1)))))-1))
However, this last edit does NOT handle the case when there is no . or no ( in the string - feel free t o extend the query accordingly
Try This
;WITH CTE(ColA)
AS
(
SELECT 'a.b.c.d.e' UNION ALL
SELECT 'ab.cd.ef' UNION ALL
SELECT 'a1.b2.c3.d4.e5'
)
SELECT ColA,REVERSE(SUBSTRING(ReqColA,0,CHARINDEX('.',(ColA)))) AS ReqColA
FROM
(
SELECT ColA ,SUBSTRING(REVERSE(ColA),CHARINDEX('.',REVERSE(ColA))+1,LEN(REVERSE(ColA))) AS ReqColA FROM CTE
)dt
Result
ColA ReqColA
-----------------------
a.b.c.d.e d
ab.cd.ef cd
a1.b2.c3.d4.e5 d4

Regex pattern inside REPLACE function

SELECT REPLACE('ABCTemplate1', 'Template\d+', '');
SELECT REPLACE('ABC_XYZTemplate21', 'Template\d+', '');
I am trying to remove the part Template followed by n digits from a string. The result should be
ABC
ABC_XYZ
However REPLACE is not able to read regex. I am using SQLSERVER 2008. Am I doing something wrong here? Any suggestions?
SELECT SUBSTRING('ABCTemplate1', 1, CHARINDEX('Template','ABCTemplate1')-1)
or
SELECT SUBSTRING('ABC_XYZTemplate21',1,PATINDEX('%Template[0-9]%','ABC_XYZTemplate21')-1)
More generally,
SELECT SUBSTRING(column_name,1,PATINDEX('%Template[0-9]%',column_name)-1)
FROM sometable
WHERE PATINDEX('%Template[0-9]%',column_name) > 0
You can use substring with charindex or patindex if the pattern being looked for is fixed.
select SUBSTRING('ABCTemplate1',1, CHARINDEX ( 'Template' ,'ABCTemplate1')-1)
My answer expects that "Template" is enough to determine where to cut the string:
select LEFT('ABCTemplate1', CHARINDEX('Template', 'ABCTemplate1') - 1)
Using numbers table..
;with cte
as
(select 'ABCTemplate1' as string--this can simulate your table column
)
select * from cte c
cross apply
(
select replace('ABCTemplate1','template'+cast(n as varchar(2)),'') as rplcd
from
numbers
where n<=9
)
b
where c.string<>b.rplcd
Using Recursive CTE..
;with cte
as
(
select cast(replace('ABCTemplate21','template','') as varchar(100)) as string,0 as num
union all
select cast(replace(string,cast(num as varchar(2)),'') as varchar(100)),num+1
from cte
where num<=9
)
select top 1 string from cte
order by num desc

T-SQL function to split string with two delimiters as column separators into table

I'm looking for a t-sql function to get a string like:
a:b,c:d,e:f
and convert it to a table like
ID Value
a b
c d
e f
Anything I found in Internet incorporated single column parsing (e.g. XMLSplit function variations) but none of them letting me describe my string with two delimiters, one for column separation & the other for row separation.
Can you please guiding me regarding the issue? I have a very limited t-sql knowledge and cannot fork those read-made functions to get two column solution?
You can find a split() function on the web. Then, you can do string logic:
select left(val, charindex(':', val)) as col1,
substring(val, charindex(':', val) + 1, len(val)) as col2
from dbo.split(#str, ';') s(val);
You can use a custom SQL Split function in order to separate data-value columns
Here is a sql split function that you can use on a development system
It returns an ID value that can be helpful to keep id and value together
You need to split twice, first using "," then a second split using ";" character
declare #str nvarchar(100) = 'a:b,c:d,e:f'
select
id = max(id),
value = max(value)
from (
select
rowid,
id = case when id = 1 then val else null end,
value = case when id = 2 then val else null end
from (
select
s.id rowid, t.id, t.val
from (
select * from dbo.Split(#str, ',')
) s
cross apply dbo.Split(s.val, ':') t
) k
) m group by rowid