Extract a part of string with complex pattern SQL - sql

I am working with string as below:
DOT LTB TOL SP BLM 3X(35X**25.1**GR)BWY BNL
And I am trying to extract the float between the last X and G -> 25.1.
In Python it is an easy task using regex expression: **[0-9]+X([0-9\.]+)G**
But I could not find a way to extract the data here in SQL Server:
I tried to write a function that define the position of X and the ) argument like that:
AS
Begin
Declare #LastX as INT
Declare #LastG as INT
Declare #Diff as INT
Declare #Result as varchar(50)
Set #LastX = LEN(#Temp) - CAST(CHARINDEX('X', REVERSE(#Temp)) AS Integer)
Set #LastG = LEN(#Temp) - CAST(CHARINDEX(')', REVERSE(#Temp)) AS Integer)
Set #Diff = #LastG - #LastX
IF #Diff > 0
Set #Result = SUBSTRING(#Temp, #LastX, #Diff)
ELSE
Set #Result = 0
Return #Result
End
My result is 5X25.1G
Can you help me with this?
I'm also open if you have other suggestion to extract this value in SQL.

If it's always between the last X and last G, you could use a couple of CHARINDEX's as you have tried. I prefer to do this in the FROM and use APPLY to avoid repetition of code and a "nasty" long single expression:
SELECT *,REVERSE(SUBSTRING(R.YourString,CI1.I+1,CI2.I-CI1.I-1))
FROM (VALUES('DOT LTB TOL SP BLM 3X(35X25.1GR)BWY BNL'))V(YourString)
CROSS APPLY(VALUES(REVERSE(V.YourString)))R(YourString)
CROSS APPLY(VALUES(NULLIF(CHARINDEX('G',R.YourString),0)))CI1(I)
CROSS APPLY(VALUES(NULLIF(CHARINDEX('X',R.YourString,CI1.I),0)))CI2(I)
That really long expression would be the following, by the way:
SELECT V.Yourstring,
REVERSE(SUBSTRING(REVERSE(V.YourString),NULLIF(CHARINDEX('G',REVERSE(V.YourString)),0)+1,NULLIF(CHARINDEX('X',REVERSE(V.YourString),NULLIF(CHARINDEX('G',REVERSE(V.YourString)),0)),0)-NULLIF(CHARINDEX('G',REVERSE(V.YourString)),0)-1))
FROM (VALUES('DOT LTB TOL SP BLM 3X(35X25.1GR)BWY BNL'))V(YourString)
Have fun trying to read that. Hence why the APPLYs. 🙃

Related

One-Time Use Randomized Number

Tables(columns) that are important here (there are more in my actual DB but I simplified):
Patients(PatientID, PatientNumber, DOB, Weight, RandomCode)
RandomNumbers(RandomNumberID, Available)
Sites(SiteID, SiteNumber)
Patients does not contain any data and is to be populated via a 3rd party GUI. RandomNumbers contains 50 entries. When a patient's data is first added to Patients, a random number 1-50 is selected, which is tied to RandomNumberID. The "available" column in RandomNumbers is treated as a boolean (T meaning available, F meaning unavailable). After the RandomNumberID is tied to a patient, that RandomNumberID cannot be used again (Available is switched to "F"). If an already used RandomNumber is assigned to a patient upon data entry, another randomly selected RandomNumberID is selected until one is found where Available is T.
Here is the storedproc that I've written so far:
GO
CREATE PROCEDURE uspPatientEntry
#intPatientID AS INTEGER OUTPUT
,#intPatientNumber AS INTEGER
,#intSiteID AS INTEGER
,#intRandomCode AS INTEGER
AS
SET NOCOUNT ON
SET XACT_ABORT ON
BEGIN TRANSACTION
SELECT #intPatientID = MAX (intPatientID) + 1
FROM TPatients TP (TABLOCKX)
SELECT #intPatientID = COALESCE(#intPatientID, 1)
SELECT #intPatientNumber = CONCAT(#intSiteID, #intPatientID)
INSERT INTO TPatients (intPatientID, intPatientNumber, intSiteID, intRandomCodeID)
VALUES (#intPatientID, #intPatientNumber, #intSiteID, #intRandomCode)
COMMIT TRANSACTION
GO
And here is another function I wrote to test it out:
DECLARE #intPatientID AS INTEGER = 0;
DECLARE #intSiteNumber AS INTEGER = 0;
DECLARE #intPatientNumber AS INTEGER = CONCAT(#intSiteNumber, #intPatientID);
DECLARE #intRandomCode AS INTEGER = FLOOR(RAND()*50)+1; -- This just picks a random integer 1-40. <- this is the heart of my question
EXECUTE uspPatientEntry #intPatientID OUTPUT, #intPatientNumber, 2, #intRandomCode
Would something like this work? Ignore syntax, just the general idea:
DECLARE #intRandomCode AS INTEGER;
SELECT #intRandomCode = FLOOR(RAND()*50+1;
FROM TRandomCodes TRC
WHERE TRC.blnAvailable = "T"
UPDATE TRandomCodes TRC
SET TRC.blnAvailable = "F"
WHERE #intRandomCode = TRC.intRandomNumberID;
Thank you

Error Handling for numbers of delimiters when extracting substrings

Situation: I have a column where each cell can have up to 5 delimiters. However, it's possible that there are none.
Objective: How do i handle errors such as :
Invalid length parameter passed to the LEFT or SUBSTRING function.
in the case that it cannot find the specified delimiter.
Query:
declare #text VARCHAR(111) = 'abc-def-geeee-ifjf-zzz'
declare #start1 as int
declare #start2 as int
declare #start3 as int
declare #start4 as int
declare #start_index_reverse as int
set #start1 = CHARINDEX('-',#text,1)
set #start2 = CHARINDEX('-',#text,charindex('-',#text,1)+1)
set #start3 = CHARINDEX('-',#text,charindex('-',#text,CHARINDEX('-',#text,1)+1)+1)
set #start4 = CHARINDEX('-',#text,charindex('-',#text,CHARINDEX('-',#text,CHARINDEX('-',#text,1)+1)+1)+1)
set #start_index_reverse = CHARINDEX('-',REVERSE(#text),1)
select
LEFT(#text,#start1-1) AS Frst,
SUBSTRING(#text,#start1+1,#start2-#start1-1) AS Scnd,
SUBSTRING(#text,#start2+1,#start3-#start2-1) AS Third,
SUBSTRING(#text,#start3+1,#start4-#start3-1)AS Third,
RIGHT(#text,#start_index_reverse-1) AS Lst
In this case my variable includes 5 delimiters and so my query works but if i removed one '-' it would break.
XML support in SQL Server brings about some unintentional but useful tricks. Converting this string to XML allows for some parsing that is far less messy than native string handling, which is very far from awesome.
DECLARE #test varchar(111) = 'abc-def-ghi-jkl-mnop'; -- try also with 'abc-def'
;WITH n(x) AS
(
SELECT CONVERT(xml, '<x>' + REPLACE(#test, '-', '</x><x>') + '</x>')
)
SELECT
Frst = x.value('/x[1]','varchar(111)'),
Scnd = x.value('/x[2]','varchar(111)'),
Thrd = x.value('/x[3]','varchar(111)'),
Frth = x.value('/x[4]','varchar(111)'),
Ffth = x.value('/x[5]','varchar(111)')
FROM n;
For a table it's almost identical:
DECLARE #foo TABLE ( col varchar(111) );
INSERT #foo(col) VALUES('abc-def-ghi-jkl-mnop'),('abc'),('def-ghi');
;WITH n(x) AS
(
SELECT CONVERT(xml, '<x>' + REPLACE(col, '-', '</x><x>') + '</x>')
FROM #foo
)
SELECT
Frst = x.value('/x[1]','varchar(111)'),
Scnd = x.value('/x[2]','varchar(111)'),
Thrd = x.value('/x[3]','varchar(111)'),
Frth = x.value('/x[4]','varchar(111)'),
Ffth = x.value('/x[5]','varchar(111)')
FROM n;
Results (sorry about the massive size, seems this doesn't handle 144dpi well):
add a test before your last select
then you should decide how to handle the other case (when one of start is 0)
You can also refer to this link about splitting a string in sql server
which is uses a loop and can handle any number of delimiters
if #start1>0 and #start2>0 and #start3>0 and #start4>0
select LEFT(#text,#start1-1) AS Frst,
SUBSTRING(#text,#start1+1,#start2-#start1-1) AS Scnd,
SUBSTRING(#text,#start2+1,#start3-#start2-1) AS Third,
SUBSTRING(#text,#start3+1,#start4-#start3-1)AS Third,
RIGHT(#text,#start_index_reverse-1) AS Lst

String operation in SQL to reverse a string

In DB2 9.7 I am looking for a way to reverse a string in a SQL query.
I am familiar with SQL Server where the query would be like
SELECT
REVERSE(LEFT_TO_REIGHT) AS RIGHT_TO_LEFT
FROM
TABLE1;
I couldn't find a similar function in DB2. is there a simple way to reverse a string?
Creating a REVERSE(..) function is unnecessary.
DB2 has something called RIGHT(string-expression, length):
The RIGHT function returns the rightmost string of string-expression
of length length, expressed in the specified string unit. If
string-expression is a character string, the result is a character
string. If string-expression is a graphic string, the result is a
graphic string
So if you're interested in the last 8 characters, you can pretty trivially do this via:
SELECT RIGHT(left_to_right, 8) AS right_to_left
FROM Table1
(I'm actually still concerned about the fact that you're splitting off 8 characters consistently, as it implies you have a multi-part key of some sort).
Try something like:
SELECT STRIP(CAST( TRANSLATE('87654321',LEFT_TO_REIGHT, '12345678') AS VARCHAR(8) ))
FROM TABLE1;
Due to the original question this is the first webpage that comes up when one searches for 'How to reverse a string in DB2'.
Here is an answer that doesn't require implementing it in C and shouldn't brake on non-pure-Engilsh strings regardless of their length.
Be warned though, the efficiency is 'meh' at best.
CREATE FUNCTION REVERSE_STRING(STR VARCHAR(100))
RETURNS VARCHAR(100)
LANGUAGE SQL
SPECIFIC REVERSE_STRING
DETERMINISTIC
REVERSE: BEGIN
DECLARE REVERSED_STRING VARCHAR(100);
DECLARE REVERSED_CHARACTERS_INDEX INTEGER;
SET REVERSED_STRING='';
SET REVERSED_CHARACTERS_INDEX=0;
WHILE (REVERSED_CHARACTERS_INDEX < CHARACTER_LENGTH(STR, CODEUNITS16))
DO
SET REVERSED_CHARACTERS_INDEX = REVERSED_CHARACTERS_INDEX + 1;
SET REVERSED_STRING = CONCAT(
REVERSED_STRING,
LEFT(RIGHT(STR, REVERSED_CHARACTERS_INDEX, CODEUNITS16), 1, CODEUNITS16));
END WHILE;
RETURN REVERSED_STRING;
END REVERSE#
The idea is to get a substring which starts from the n-th character from the right till the end of the string, then take the first element of this substring from the left and append it to a reversed string. This operation is conducted n times where n is the length of a string to be reversed.
You can use it like any other function.
SELECT FIRSTNME AS FIRSTNAME, REVERSE_STRING(FIRSTNME) AS REVERSED_FIRSTNAME
FROM SAMPLE.EMPLOYEE#
Example output
Answering the original question of reversing a string there's user defined functions published on the IBM site that will do it that you can find here. There's apparently no built in ability in DB2
https://www.ibm.com/developerworks/community/blogs/SQLTips4DB2LUW/entry/reverse?lang=en
Tortured SQL version:
CREATE OR REPLACE FUNCTION REVERSE(INSTR VARCHAR(4000))
RETURNS VARCHAR(4000) SPECIFIC REVERSE
DETERMINISTIC NO EXTERNAL ACTION CONTAINS SQL
RETURN WITH rec(pos, res) AS (VALUES (1, CAST('' AS VARCHAR(4000)))
UNION ALL
SELECT pos + 1, SUBSTR(INSTR, pos , 1) || res
FROM rec
WHERE pos <= LENGTH(INSTR)
AND pos < 5000)
SELECT res FROM rec WHERE pos > LENGTH(INSTR);
But then you have to do this as well, yuck:
CREATE BUFFERPOOL bp32 PAGESIZE 32K;
CREATE SYSTEM TEMPORARY TABLESPACE tsp32 PAGESIZE 32K BUFFERPOOL bp32;
A saner C implementation
#include <sqludf.h>
#ifdef __cplusplus
extern "C"
#endif
void SQL_API_FN ReverseSBCP(SQLUDF_VARCHAR *inVarchar,
SQLUDF_VARCHAR *outVarchar,
SQLUDF_SMALLINT *inVarcharNullInd,
SQLUDF_SMALLINT *outVarcharNullInd,
SQLUDF_TRAIL_ARGS)
{
int inLen, inPos, outPos;
if (*inVarcharNullInd == -1)
{
*outVarcharNullInd = -1;
}
else
{
inLen = strlen(inVarchar);
for (inPos = 0, outPos = inLen -1; inPos < inLen; inPos++, outPos--)
{
outVarchar[outPos] = inVarchar[inPos];
}
outVarchar[inLen] = '\0';
*outVarcharNullInd = 0;
}
return;
}

SQL multiple if statements, same result set, different argument

I am writing a stored proc that calculates a WHOLE bunch of different things, but I have a bit in it, that is repeated about 9 times.
eg:
if #argA = 1 (true)
select Count(samples) from dbo.X where type = #argAType
if #argB = 1 (true)
select Count(samples) from dbo.X where type = #argBType
if #argC = 1
select Count(samples) from dbo.X where type = #argCType
and so on...
how can I write a function (or something similar) that I can pass in a bit (true or false), and other argument, and only return the result set if true???
Is this what you're looking for? This is the best I can deduce based on the question as it's currently posted.
SELECT COUNT(samples)
FROM dbo.X
WHERE
(type=#argAType AND #argA=1)
OR
(type=#argBType AND #argB=1)
OR
(type=#argCType AND #argC=1)
In function form, I think this is right:
CREATE FUNCTION GetCount(#n AS BIGINT) RETURNS BIGINT
AS
BEGIN
DECLARE #count BIGINT
SELECT #count = COUNT(samples)
FROM dbo.X
WHERE
(type=#argAType AND #argA=1)
OR
(type=#argBType AND #argB=1)
OR
(type=#argCType AND #argC=1)
RETURN #count
END

SQL Server 2008 math fail

After hunting around on various forums for almost an hour, I've come to the conclusion that SQL server is slightly stupid about simple arithmetic.
I am attempting to utilize a function which, until recently seemed to work just fine. Upon changing out some of the values for a different set of information on the form in use, I get the odd behavior ahead.
The problem is that it is giving me the incorrect result as based on an excel spreadsheet formula.
The formula looks like this:
=IF(D8=0,0,(((D8*C12-C16)*(100-C13)/100+C16)/D8)+(C18*D8))
My SQL looks like this:
(((#DaysBilled * #ContractRate - #ActualPlanDed) * (100 - #InsCover) / 100 + #ActualPlanDed) / #DaysBilled) + (#CoPay * #DaysBilled)
Filling the variables with the given data looks like this:
(((11 * 433 - 15) * (100 - 344) / 100 + 15) / 11) + (15 * 11)
Even stranger, if I use the numbers above (adding .00 to the end of each value) manually in the server environment, it gives me -11405.1200000000
With the values I am giving, it should come out 166.36. Unfortunately, I am getting -886.83
Here is the entire function and how it is called:
ALTER FUNCTION Liability
(
#ClientGUID CHAR(32),
#RecordGUID CHAR(32),
#Type CHAR(3)
)
RETURNS DECIMAL(18,2) AS
BEGIN
DECLARE #ReturnValue decimal(18,2);
DECLARE #DaysBilled int;
DECLARE #ContractRate decimal(18,2);
DECLARE #ActualPlanDed decimal(18,2);
DECLARE #InsCover decimal(18,2);
DECLARE #CoPay decimal(18,2);
IF (#Type = 'RTC')
BEGIN
SELECT #DaysBilled = RTCDaysBilled,
#ContractRate = CAST(REPLACE(REPLACE(ContractRateRTC, ' ',''),'$', '') AS DECIMAL(6,2)),
#ActualPlanDed = RTCActualPlanDed,
#InsCover = InsRTCCover,
#CoPay = RTCCoPay
FROM AccountReconciliation1
WHERE #ClientGUID = tr_42b478f615484162b2391ef0b2c35ddc
AND #RecordGUID = tr_abb4effa0d9c4fe98c78cb4d2e21ba5d
END
IF (#Type = 'PHP')
BEGIN
SELECT #DaysBilled = PHPDaysBilled,
#ContractRate = CAST(REPLACE(REPLACE(ContractRatePHP, ' ',''),'$', '') AS DECIMAL(6,2)),
#ActualPlanDed = PHPActualPlanDed,
#InsCover = InsPHPCover,
#CoPay = PHPCoPay
FROM AccountReconciliation1
WHERE #ClientGUID = tr_42b478f615484162b2391ef0b2c35ddc
AND #RecordGUID = tr_abb4effa0d9c4fe98c78cb4d2e21ba5d
END
IF (#Type = 'IOP')
BEGIN
SELECT #DaysBilled = IOPDaysBilled,
#ContractRate = CAST(REPLACE(REPLACE(ContractRateIOP, ' ',''),'$', '') AS DECIMAL(6,2)),
#ActualPlanDed = IOPActualPlanDed,
#InsCover = InsIOPCover,
#CoPay = IOPCoPay
FROM AccountReconciliation1
WHERE #ClientGUID = tr_42b478f615484162b2391ef0b2c35ddc
AND #RecordGUID = tr_abb4effa0d9c4fe98c78cb4d2e21ba5d
END
IF (#DaysBilled <> 0)
BEGIN
SET #ReturnValue = (((#DaysBilled * #ContractRate - #ActualPlanDed)
*
(100 - #InsCover) / 100 + #ActualPlanDed)
/
#DaysBilled
)
+
(#CoPay * #DaysBilled)
END
ELSE
BEGIN
SET #ReturnValue = 0;
END
RETURN #ReturnValue;
END
It is called by running a select statement from our front-end, but the result is the same as calling the function from within management studio:
SELECT dbo.Liability('ClientID','RecordID','PHP') AS Liability
I have been reading about how a unary minus tends to break SQL's math handling, but I'm not entirely sure how to counteract it.
One last stupid trick with this function: It must remain a function. I cannot convert it into a stored procedure because it must be used with our front-end, which cannot utilize stored procedures.
Does SQL server even care about the parentheses? Or is it just ignoring them?
The calculation is correct, it differes of course if you are using float values
instead of integers.
For (((11 * 433 - 15) * (100 - 344) / 100 + 15) / 11) + (15 * 11)
a value around -886.xx depending in which places integers/floats are used is correct,
What makes you believe it should be 166.36?