How to extract a part of string in sql? - sql

I have the below data table (name tabel1) and I have to extract the English part from every row, for example from row one Education Sector.
ID
Name
1
PK:"";UK:"2. Education Sector";SP:"Sector Educativo"; GR:"2. Bildungssektor";FR:"2. Secteur de l/éducation";
2
UK:"3. Football: pitch/ground";SP:"3. Campo de fútbol"; GR:"3. Fußballplatz/Boden";NR:"3. fotballbane/bane";FR:"3. Terrain de football";
3
JP:""; GR:"1. Stadt: Hauptstadt/Hauptstadt"; SP:"1. Ciudad: ciudad principal/capital"; UK:"1. City: main city/capital"; FR:"1. Ville : ville principale/capitale"; NR:"1. By: hovedby/hovedstad"; IND:"";
4
AF:""; IND:""; GR:"4. Andere"; SP:"4. Otras"; FR:""; NR:"4. Andre"; FR:"4. Les autres"; UK:"4. Others"
I am Expecting result 1 this way but cannot solve it:
ID
Name
1
2. Education Sector
2
3. Football: pitch/ground
3
1. City: main city/capital
4
4. Others
I am trying this way but it's not getting the expected result:
SELECT SUBSTRING(LEFT(name, CHARINDEX(';', name) + 1, 100)
FROM table1
WHERE CHARINDEX('\[', name) = "2. Education Sector" OR CHARINDEX('\[', name) = "3. Football: pitch/ground" OR CHARINDEX('\[', name) = "1. City: main city/capital" OR CHARINDEX('\[', name) = "4. Others";
And I am expecting result 2 this way but cannot solve it:
ID
Name
1
Education Sector
2
Football: pitch/ground
3
City: main city/capital
4
Others
I am trying this way but its not getting my expected result:
SELECT SUBSTRING(LEFT(name, CHARINDEX(';', name) + 1, 100)
FROM table1
WHERE CHARINDEX('\[', name) = "Education Sector" OR CHARINDEX('\[', name) = "Football: pitch/ground" OR CHARINDEX('\[', name) = "City: main city/capital" OR CHARINDEX('\[', name) = "Others";
any suggestion?

Because you tagged sql-server I can offer the following simple method, assuming you're using both SQL Server and a fully supported version:
I was going to delete this answer but I'll leave it here in case you can make use of it in SQLLite - I am not familiar with the product personally.
select Id, s.[Name]
from t
cross apply (
select Trim(Replace(Replace([value], '"',''), 'UK:',''))
from string_split(Name, ';')
where [value] like '%UK:%'
)s([Name]);
See a Demo Fiddle

You can try a combination of LEFT and RIGHT.
Find 'UK:"' in Name column
Take everything starting with 'UK:"' using RIGHT and LEN
Find first '"' in the remaining string - this is your last character position
Take everything up to the fist '"' - this is your result
SELECT ID,
LEFT
(
RIGHT(Name, LEN(Name) - CHARINDEX('UK:"', Name) - 3)
,CHARINDEX(RIGHT(Name, LEN(Name) - CHARINDEX('UK:"', Name) - 3), '"') - 1 -- subtracting 3 to remove "UK:"; then finding closing quotation mark
)
FROM table1
Please note that this solution assumes that the structure of the string is always the same: starts with 'UK:"' and finishes with '"'.

Here's how I would do it:
SELECT ID , SUBSTRING(name, CHARINDEX('UK', name)+4, CHARINDEX('"', name,CHARINDEX('UK', name)+4)-CHARINDEX('UK', name)-4) FROM table1

This method uses a recursive call to split the string on the delimiter, then selects from that. Adapted from this post.
The first 'with' (Common table expression) just sets up the test data (used here like a temp table). The second called split parses the string on the semi-colon resulting in a CTE of rows consisting of ids and parsed string elements. Uncomment the query following that to see. Then, the final query selects the rows that start with 'UK' and returns the meat.
with tbl(id, str) as (
select 1, 'PK:"";UK:"2. Education Sector";SP:"Sector Educativo"; GR:"2. Bildungssektor";FR:"2. Secteur de l/éducation";'
union ALL
select 2, 'UK:"3. Football: pitch/ground";SP:"3. Campo de fútbol"; GR:"3. Fußballplatz/Boden";NR:"3. fotballbane/bane";FR:"3. Terrain de football";'
union ALL
select 3, 'JP:""; GR:"1. Stadt: Hauptstadt/Hauptstadt"; SP:"1. Ciudad: ciudad principal/capital"; UK:"1. City: main city/capital"; FR:"1. Ville : ville principale/capitale"; NR:"1. By: hovedby/hovedstad"; IND:"";'
union all
select 4, 'AF:""; IND:""; GR:"4. Andere"; SP:"4. Otras"; FR:""; NR:"4. Andre"; FR:"4. Les autres"; UK:"4. Others"'
union ALL
select 5, 'AF:""; IND:""; GR:"4. Andere"; SP:"4. Otras"; FR:""; NR:"4. Andre"; FR:"4. Les autres";'
),
split(id, word, csv) AS (
SELECT
id, '', str ||';'
from tbl
UNION ALL
SELECT id,
trim(substr(csv, 0, instr(csv, ';'))),
substr(csv, instr(csv, ';') + 1)
FROM split
WHERE csv != ''
)
-- Show splitting on delimiter
--SELECT id, word FROM split
--WHERE word != ''
--order by id;
SELECT id, substr(word, 5, length(word)-5) as UK_Result
FROM split
WHERE word != ''
and word like '%UK%'
order by id;
1 2. Education Sector
2 3. Football: pitch/ground
3 1. City: main city/capital
4 4. Others

Related

SQL: using regexp_substr ot regexp_extract, looking for the regex pattern that will only return the string between one character and a space

The row I am trying to parse from is a series of string values separated only by spaces. Sample below:
TX:123 SP:XapZNsyeS INST:456123
I need to use either regexp_substr or regexp_extract to return only values for the string that appears after "TX:" or "SP:", etc. So essentially an expression that only captures the string after a string (e.g. "TX:") and before a space (" ").
Here's one way to split on 2 delimiters. This works on Oracle 12c as you included the Oracle regexp-substr tag. Using a with statement, first set up the original data, then split on a space or the end of the line, then break into name-value pairs.
WITH tbl_original_data(ID, str) AS (
SELECT 1, 'TX:123 SP:XapZNsyeS INST:456123' FROM dual UNION ALL
SELECT 2, 'MI:321 SP:MfeKLgkrJ INST:654321' FROM dual
),
tbl_split_on_space(ID, ELEMENT) AS (
SELECT ID,
REGEXP_SUBSTR(str, '(.*?)( |$)', 1, LEVEL, NULL, 1)
FROM tbl_original_data
CONNECT BY REGEXP_SUBSTR(str, '(.*?)( |$)', 1, LEVEL) IS NOT NULL
AND PRIOR ID = ID
AND PRIOR SYS_GUID() IS NOT NULL
)
--SELECT * FROM tbl_split_on_space;
SELECT ID,
REGEXP_REPLACE(ELEMENT, '^(.*):.*', '\1') NAME,
REGEXP_REPLACE(ELEMENT, '.*:(.*)$', '\1') VALUE
FROM tbl_split_on_space;
ID NAME VALUE
---------- ---------- ----------
1 TX 123
1 SP XapZNsyeS
1 INST 456123
2 MI 321
2 SP MfeKLgkrJ
2 INST 654321
6 rows selected.
EDIT: Realizing this answer is a little more than was asked for, here's a simplified answer to return one element. Don't forget to allow for the ending of a space or the end of the line as well, in case you element is at the end of the line.
WITH tbl_original_data(ID, str) AS (
SELECT 1, 'TX:123 SP:XapZNsyeS INST:456123' FROM dual
)
SELECT REGEXP_SUBSTR(str, '.*?TX:(.*)( |$)', 1, 1, NULL, 1) TX_VALUE
FROM tbl_original_data;
TX_VALUE
--------
123
1 row selected.

Split values from a column to another column SQL DEVELOPER

Hello people here again with another oracle SQL question.
Im having some problems spliting values from a column to another one.
So there it goes.. im having this query :
SELECT MONEDA ,
LISTAGG (MONTO , ';') WITHIN GROUP (ORDER BY MONTO) MONTO,
REGEXP_SUBSTR(MONTO, '[^;]+', 1, 1) col_one,
REGEXP_SUBSTR(MONTO, '[^;]+', 1, 2) col_two
FROM (SELECT SUM(ZMT.AMOUNT) AS MONTO,
ZMT.T_TYPE AS tipo,
JSON_VALUE(MSG, '$.glAccount.currency.code') AS moneda
FROM Z_MAMBU_TRANSACTIONS ZMT JOIN POSTING_ONLINE0182 PO ON PO.RESP_REFERENCE0182 = ZMT.TRANSACTIONID
WHERE TO_CHAR(ZMT.CREATIONDATE, 'YYYY-MM-DD') = '2021-04-20' AND
PO.POSTING_RESPCODE0182 = 0 AND
(JSON_VALUE(MSG, '$.type') = 'DEBIT') OR (JSON_VALUE(MSG, '$.type') = 'CREDIT')
GROUP BY T_TYPE, JSON_VALUE(MSG, '$.glAccount.currency.code')
ORDER BY T_TYPE)
GROUP BY MONEDA
the result is the next:
https://i.stack.imgur.com/QMgYr.png
What i need to do is SPLIT the "MONTO" values with the ";" as separator to other 2 columns (col_one and col_two). As you can see in the result he is spliting me only the second value not the first.
After that i need to make the substract from the values that i split.
This is an exaple of what i need :
MONEDA MONTO COL_ONE COL_TWOV
COL 174579148065,39;175491229711,9 174579148065,39 175491229711,9
DOL 30300300300;30300300300 30300300300 30300300300
THANK YOU GUYS!
I agree with Tim - substr + instr do the job just nicely. If you, for some reason, want to try regular expressions, see if this helps (sample data in lines #1 - 4; query begins at line #5):
SQL> with result (moneda, monto) as
2 (select 'COL', '174579148065,39;175491229711,9' from dual union all
3 select 'DOL', '30300300300;30300300300' from dual
4 )
5 select moneda,
6 regexp_substr(monto, '\d+(,\d+)?', 1, 1) col_one,
7 regexp_substr(monto, '\d+(,\d+)?', 1, 2) col_two
8 from result;
MONEDA COL_ONE COL_TWO
---------- -------------------- --------------------
COL 174579148065,39 175491229711,9
DOL 30300300300 30300300300
SQL>
I would just use the base string functions here and avoid regex altogether. Going by your sample data given at the very end of your question:
SELECT
MONEDA,
MONTO,
SUBSTR(MONTO, 1, INSTR(MONTO, ';') - 1) AS COL_ONE,
SUBSTR(MONTO, INSTR(MONTO, ';') + 1, LENGTH(MONTO) - INSTR(MONTO, ';')) AS COL_TWO
FROM yourTable;
Demo

Using Oracle REGEXP_SUBSTR to extract uppercase data separated by underscores

sample column data:
Failure on table TOLL_USR_TRXN_HISTORY:
Failure on table DOCUMENT_IMAGES:
Error in CREATE_ACC_STATEMENT() [line 16]
I am looking for a way to extract only the uppercase words (table names) separated by underscores. I want the whole table name, the maximum is 3 underscores and the minimum is 1 underscore. I would like to ignore any capital letters that are initcap.
You can just use regexp_substr():
select regexp_substr(str, '[A-Z_]{3,}', 1, 1, 'c')
from (select 'Failure on table TOLL_USR_TRXN_HISTORY' as str from dual) x;
The pattern says to find substrings with capital letters or underscores, at least 3 characters long. The 1, 1 means start from the first position and return the first match. The 'c' makes the search case-sensitive.
You may use such a SQL Select statement for each substituted individual line
( Failure on table TOLL_USR_TRXN_HISTORY in the below case )
from your text :
select regexp_replace(q.word, '[^a-zA-Z0-9_]+', '') as word
from
(
select substr(str,nvl(lag(spc) over (order by lvl),1)+1*sign(lvl-1),
abs(decode(spc,0,length(str),spc)-nvl(lag(spc) over (order by lvl),1))) word,
nvl(lag(spc) over (order by lvl),1) lg
from
(
with tab as
( select 'Failure on table TOLL_USR_TRXN_HISTORY' str from dual )
select instr(str,' ',1,level) spc, str, level lvl
from tab
connect by level <= 10
)
) q
where lg > 0
and upper(regexp_replace(q.word, '[^a-zA-Z0-9_]+', ''))
= regexp_replace(q.word, '[^a-zA-Z0-9_]+', '')
and ( nvl(length(regexp_substr(q.word,'_',1,1)),0)
+ nvl(length(regexp_substr(q.word,'_',1,2)),0)
+ nvl(length(regexp_substr(q.word,'_',1,3)),0)) > 0
and nvl(length(regexp_substr(q.word,'_',1,4)),0) = 0;
Alternate way to get only table name from below error message , the below query will work only if table_name at end in the mentioned way
with t as( select 'Failure on table TOLL_USR_TRXN_HISTORY:' as data from dual)
SELECT RTRIM(substr(data,instr(data,' ',-1)+1),':') from t
New Query for all messages :
select replace (replace ( 'Failure on table TOLL_USR_TRXN_HISTORY:
Failure on table DOCUMENT_IMAGES:' , 'Failure on table', ' ' ),':',' ') from dual

How to convert only first letter uppercase without using Initcap in Oracle?

Is there a way to convert the first letter uppercase in Oracle SQl without using the Initcap Function?
I have the problem, that I must work with the DISTINCT keyword in SQL clause and the Initcap function doesn´t work.
Heres is my SQL example:
select distinct p.nr, initcap(p.firstname), initcap(p.lastname), ill.describtion
from patient p left join illness ill
on p.id = ill.id
where p.deleted = 0
order by p.lastname, p.firstname;
I get this error message: ORA-01791: not a SELECTed expression
When SELECT DISTINCT, you can't ORDER BY columns that aren't selected. Use column aliases instead, as:
select distinct p.nr, initcap(p.firstname) fname, initcap(p.lastname) lname, ill.describtion
from patient p left join illness ill
on p.id = ill.id
where p.deleted = 0
order by lname, fname
this would do it, but i think you need to post your query as there may be a better solution
select upper(substr(<column>,1,1)) || substr(<column>,2,9999) from dual
To change string to String, you can use this:
SELECT
regexp_replace ('string', '[a-z]', upper (substr ('string', 1, 1)), 1, 1, 'i')
FROM dual;
This assumes that the first letter is the one you want to convert. It your input text starts with a number, such as 2 strings then it won't change it to 2 Strings.
You can also use the column number instead of the name or alias:
select distinct p.nr, initcap(p.firstname), initcap(p.lastname), ill.describtion
from patient p left join illness ill
on p.id = ill.id
where p.deleted = 0
order by 3, 2;
WITH inData AS
(
SELECT 'word1, wORD2, word3, woRD4, worD5, word6' str FROM dual
),
inRows as
(
SELECT 1 as tId, LEVEL as rId, trim(regexp_substr(str, '([A-Za-z0-9])+', 1, LEVEL)) as str
FROM inData
CONNECT BY instr(str, ',', 1, LEVEL - 1) > 0
)
SELECT tId, LISTAGG( upper(substr(str, 1, 1)) || substr(str, 2) , '') WITHIN GROUP (ORDER BY rId) AS camelCase
FROM inRows
GROUP BY tId;

Check if string variations exists in another string

I need to check if a partial name matches full name. For example:
Partial_Name | Full_Name
--------------------------------------
John,Smith | Smith William John
Eglid,Timothy | Timothy M Eglid
I have no clue how to approach this type of matching.
Another thing is that name and last name may come in the wrong order, making it harder.
I could do something like this, but this only works if names are in the same order and 100% match
decode(LOWER(REGEXP_REPLACE(Partial_Name,'[^a-zA-Z'']','')), LOWER(REGEXP_REPLACE(Full_Name,'[^a-zA-Z'']','')), 'Same', 'Different')
you could use this pattern on the text provided - works for most engines
([^ ,]+),([^ ,]+)(?=.*\b\1\b)(?=.*\b\2\b)
Demo
WITH
/*
tab AS
(
SELECT 'Smith William John' Full_Name, 'John,Smith' Partial_Name FROM dual
UNION ALL SELECT 'Timothy M Eglid', 'Eglid,timothy' FROM dual
UNION ALL SELECT 'Tim M Egli', 'Egli,Tim,M2' FROM dual
UNION ALL SELECT 'Timot M Eg', 'Eg' FROM dual
),
*/
tmp AS (
SELECT Full_Name, Partial_Name,
trim(CASE WHEN instr(Partial_Name, ',') = 0 THEN Partial_Name
ELSE regexp_substr(Partial_Name, '[^,]+', 1, lvl+1)
END) token
FROM tab t CROSS JOIN (SELECT lvl FROM (SELECT LEVEL-1 lvl FROM dual
CONNECT BY LEVEL <= (SELECT MAX(LENGTH(Partial_Name) - LENGTH(REPLACE(Partial_Name, ',')))+1 FROM tab)))
WHERE LENGTH(Partial_Name) - LENGTH(REPLACE(Partial_Name, ',')) >= lvl
)
SELECT Full_Name, Partial_Name
FROM tmp
GROUP BY Full_Name, Partial_Name
HAVING count(DISTINCT token)
= count(DISTINCT CASE WHEN REGEXP_LIKE(Full_Name, token, 'i')
THEN token ELSE NULL END);
In the tmp each partial_name is splitted on tokens (separated by comma)
The resulting query retrieves only those rows which full_name matches all the corresponding tokens.
This query works with the dynamic number of commas in partial_name. If there can be only zero or one commas then the query will be much easier:
SELECT * FROM tab
WHERE instr(Partial_Name, ',') > 0
AND REGEXP_LIKE(full_name, substr(Partial_Name, 1, instr(Partial_Name, ',')-1), 'ix')
AND REGEXP_LIKE(full_name, substr(Partial_Name,instr(Partial_Name, ',')+1), 'ix')
OR instr(Partial_Name, ',') = 0
AND REGEXP_LIKE(full_name, Partial_Name, 'ix');
This is what I ended up doing... Not sure if this is the best approach.
I split partials by comma and check if first name present in full name and last name present in full name. If both are present then match.
CASE
WHEN
instr(trim(lower(Full_Name)),
trim(lower(REGEXP_SUBSTR(Partial_Name, '[^,]+', 1, 1)))) > 0
AND
instr(trim(lower(Full_Name)),
trim(lower(REGEXP_SUBSTR(Partial_Name, '[^,]+', 1, 2)))) > 0
THEN 'Y'
ELSE 'N'
END AS MATCHING_NAMES