oracle sql contain - sql

I have table with values in columns like
colname
TMC_MCH,OTA_MCH,CONSOL_MCH,RETAIL_MCH,TOUROP_MCH,SPEC_MCH,QRACTO_MCH
RETAIL_MCH
RETAIL_MCH,CONSOL_MCH
CONSOL_MCH
OTA_MCH
I need to run query to fetch all rows contains RETAIL_MCH or CONSOL_MCH.
if i run query below i get result as below
select * from table111 where
(CONTAINS(table111.colname, 'RETAIL,CONSOL' , 1) > 0)
TMC_MCH,OTA_MCH,CONSOL_MCH,RETAIL_MCH,TOUROP_MCH,SPEC_MCH,QRACTO_MCH
RETAIL_MCH
RETAIL_MCH,CONSOL_MCH
CONSOL_MCH
but I need to exact search including underscore "_"
select * from table111 where
(CONTAINS(table111.colname, 'RETAIL_MCH,CONSOL_MCH' , 1) > 0)

CONTAINS is an Oracle text function, you can escape the underscore:
SELECT *
FROM table111
WHERE CONTAINS( colname, 'RETAIL\_MCH,CONSOL\_MCH', 1 ) > 0
Or, if you want to pass the string in unescaped then you could use REPLACE to add the escape characters:
SELECT *
FROM table111
WHERE CONTAINS( colname, REPLACE ( 'RETAIL_MCH,CONSOL_MCH', '_', '\_' ), 1 ) > 0
Which, for the sample data:
CREATE TABLE table111 ( colname ) AS
SELECT 'TMC_MCH,OTA_MCH,CONSOL_MCH,RETAIL_MCH,TOUROP_MCH,SPEC_MCH,QRACTO_MCH' FROM DUAL UNION ALL
SELECT 'RETAIL_MCH' FROM DUAL UNION ALL
SELECT 'RETAIL_MCH,CONSOL_MCH' FROM DUAL UNION ALL
SELECT 'CONSOL_MCH' FROM DUAL UNION ALL
SELECT 'OTA_MCH' FROM DUAL;
CREATE INDEX table111__colname__textidx ON table111(colname) INDEXTYPE IS CTXSYS.CONTEXT;
Outputs:
| COLNAME |
| :------------------------------------------------------------------- |
| TMC_MCH,OTA_MCH,CONSOL_MCH,RETAIL_MCH,TOUROP_MCH,SPEC_MCH,QRACTO_MCH |
| RETAIL_MCH |
| RETAIL_MCH,CONSOL_MCH |
| CONSOL_MCH |
db<>fiddle here

It would be probably simpler if you said which result you want. The way I understood it, maybe this helps:
select *
from table111
where instr(colname, 'RETAIL_MCH') > 0
or instr(colname, 'CONSOL_MCH') > 0;
OR might need to be substituted by AND (depending on what you want).

If you want either value, you would use:
where CONTAINS(table111.colname, 'RETAIL|CONSOL' , 1) > 0
If you want both:
where CONTAINS(table111.colname, 'RETAIL&CONSOL' , 1) > 0
You should pass the value in with the operator you want, instead of ,.

Related

Oracle regex to extract string between first pair of < and > brackets

I am have been assigned a task to parse a string (which is essentially in XML format) and I need to extract the name of the first tag in the string
eg: string '<column><data-type>string</data-type>.............'
or '<filter><condition>....</condition>...............'
or
'......................'
the string keeps changing but I am only interested in the first tag, I would like to get the output like:
column,
filter,
query
i have tried regexp_substr(string,'^<(.+)>',1,1,null,1) and some similer variations but they don't seem to be working cosistently.
Please help.
If you have XML data then use a proper XML parser:
SELECT XMLQUERY( '/*/name()' PASSING XMLTYPE(value) RETURNING CONTENT ) AS tag_name
FROM table_name
Which for the sample data:
CREATE TABLE table_name ( value CLOB );
INSERT INTO table_name ( value )
SELECT '<column><data-type>string</data-type></column>' FROM DUAL UNION ALL
SELECT '<filter><condition>....</condition></filter>' FROM DUAL UNION ALL
SELECT '<query />' FROM DUAL UNION ALL
SELECT '<has_attributes attr1="do not return this" attr2="<or> this" />' FROM DUAL
Outputs:
| TAG_NAME |
| :------------- |
| column |
| filter |
| query |
| has_attributes |
db<>fiddle here
You are looking for any character between the bounds -- and that includes '>'. So, just exclude the terminating character:
select regexp_substr(string,'^<([^>]+)>',1,1,null,1)
from (select '<column><data-type>string</data-type>.............' as string from dual union all
select '<filter><condition>....</condition>...............' from dual
) x;

REGEXP to validate a specific number

How can I search for a specific number in an array using REGEXP?
I have an array and need to verify if it has a specific number.
Ex: [5,2,1,4,6,19] and I am looking for number 1, but just the number 1 and not any number that contain the digit 1.
I had to do this:
case when REGEXP_INSTR(JSON_QUERY(MY_JSON_COLUMN,'$.path') , '[[]{1}[1][,]')<>0
or REGEXP_INSTR(JSON_QUERY(MY_JSON_COLUMN,'$.path') , '[,]{1}[1][,]{1}')<>0
or REGEXP_INSTR(JSON_QUERY(MY_JSON_COLUMN,'$.path') , '[,]{1}[1][]]')<>0
or REGEXP_INSTR(JSON_QUERY(MY_JSON_COLUMN,'$.path') , '[[]{1}[1][]]') <>0
then 'DIGIT_ONE' else 'NO_DIGIT_ONE'
end
Is there anything simpler?
You can use
(^|\D)1(\D|$)
This will seach for 1 not enclosed with other digits.
See this regex demo.
Details
(^|\D) - start of string or non-digit
1 - a 1 char
(\D|$) - non-digit or end of string.
Do NOT use regular expressions, use a proper JSON parser and then filter for the number you want:
SELECT my_json_column,
CASE
WHEN JSON_EXISTS( my_json_column, '$?(#.path[*] == 1)' )
THEN 'DIGIT ONE'
ELSE 'NO DIGIT ONE'
END AS has_one
FROM table_name;
or (if you are using Oracle 12.1 and cannot use path filter expressions with JSON_EXISTS, which is only available from Oracle 12.2):
SELECT my_json_column,
CASE
WHEN EXISTS(
SELECT 'X'
FROM JSON_TABLE(
t.my_json_column,
'$.path[*]'
COLUMNS (
value NUMBER PATH '$'
)
)
WHERE value = 1
)
THEN 'DIGIT ONE'
ELSE 'NO DIGIT ONE'
END
FROM table_name t;
Which, for the sample data:
CREATE TABLE table_name (
my_json_column CHECK ( my_json_column IS JSON )
) AS
SELECT '{"path":[5,2,1,4,6,19],"not_this_path":[1,2,3,4,5]}' FROM DUAL UNION ALL
SELECT '{"path":[5,2,4,6,19],"not_this_path":[1,2,3,4,5]}' FROM DUAL UNION ALL
SELECT '{"path":[11],"not_this_path":[1]}' FROM DUAL UNION ALL
SELECT '{"path":[2],"not_this_path":[1]}' FROM DUAL UNION ALL
SELECT '{"path":[1,11]}' FROM DUAL;
Both output:
MY_JSON_COLUMN | HAS_ONE
:-------------------------------------------------- | :-----------
{"path":[5,2,1,4,6,19],"not_this_path":[1,2,3,4,5]} | DIGIT ONE
{"path":[5,2,4,6,19],"not_this_path":[1,2,3,4,5]} | NO DIGIT ONE
{"path":[11],"not_this_path":[1]} | NO DIGIT ONE
{"path":[2],"not_this_path":[1]} | NO DIGIT ONE
{"path":[1,11]} | DIGIT ONE
db<>fiddle here
Alternatively, with a little bit more typing (a little bit? Am I kidding?!), splitting the string into rows and comparing values to the search string:
SQL> with test (col) as
2 (select '[5,2,1,4,6,19]' from dual)
3 select t.col,
4 case when '&par_search_string' in
5 (select regexp_substr(substr(col, 2, length(col) - 1), '[^,]+', 1, level) val
6 from test
7 connect by level <= regexp_count(col, ',') + 1
8 )
9 then 'Search string exists'
10 else 'Search string does not exist'
11 end result
12 from test t;
Enter value for par_search_string: 1
COL RESULT
-------------- ----------------------------
[5,2,1,4,6,19] Search string exists
SQL> /
Enter value for par_search_string: 24
COL RESULT
-------------- ----------------------------
[5,2,1,4,6,19] Search string does not exist
SQL>

how to find the same two-digit numbers in a sequence of characters regular expressions SQL

I would like to print out strings of characters in which at least two numbers are repeated in SQL
EX
11-22-33
11-22-44
22-22-33
55-22-33
11-66-33
11-88-33
33-88-33
77-77-22
OUTPUT :
22-22-33
77-77-22
33-88-33
But I have no idea how to write a regular expression that would help me
For this fixed format of NN-NN-NN, you could just use string functions and test the three possible combinations:
select *
from mytable
where substr(val, 1, 2) = substr(val, 3, 2)
or substr(val, 1, 2) = substr(val, 5, 2)
or substr(val, 3, 2) = substr(val, 5, 2)
We could get a little fancy and use a lateral join instead of the repeating or conditions. This scales better if you have more than 3 parts (the number of combinations increases rapidly, which makes the or solution less convinient):
select t.*
from mytable t
cross apply (
select count(distinct part) cnt_distinct_part
from (
select substr(t.val, 1, 2) part
union all select substr(t.val, 3, 2)
union all select substr(t.val, 5, 2)
) x
) x
where x.cnt_distinct_part < 3
You can use the regular expression (^|-)(\d+)(-\d+)*-\2(-|$) to match pairs of numbers of any number of digits or number of terms.
(^|-) matches either the start-of-the-string ^ or a hyphen - contained in the first capturing group ();
followed by one-or-more digit characters \d+ contained in the second capturing group () to match the first of the pair of the numbers;
then a third capturing group () which is matched zero-or-more times * containing a - followed by one-or-more digits \d+ to match any amount of numbers between the pair of matched numbers;
then a hyphen -;
then a duplicate of the second capturing group \2 which will match the second of the pair of numbers;
then either a hyphen - or the end-of-the-string $
Giving the query:
SELECT value
FROM table_name
WHERE REGEXP_LIKE( value, '(^|-)(\d+)(-\d+)?-\2(-|$)' );
Which, for the sample data:
CREATE TABLE table_name ( value ) AS
SELECT '11-22-33' FROM DUAL UNION ALL
SELECT '11-22-44' FROM DUAL UNION ALL
SELECT '22-22-33' FROM DUAL UNION ALL
SELECT '55-22-33' FROM DUAL UNION ALL
SELECT '11-66-33' FROM DUAL UNION ALL
SELECT '11-88-33' FROM DUAL UNION ALL
SELECT '33-88-33' FROM DUAL UNION ALL
SELECT '77-77-22' FROM DUAL UNION ALL
SELECT '11-77-77' FROM DUAL UNION ALL
SELECT '11-177-77' FROM DUAL UNION ALL
SELECT '11-77-771' FROM DUAL UNION ALL
SELECT '123-456-123' FROM DUAL UNION ALL
SELECT '1-2-2' FROM DUAL UNION ALL
SELECT '99999-99999-0' FROM DUAL UNION ALL
SELECT '1-2-3-4-5-6-7-8-9-0-11-2-13' FROM DUAL;
Outputs:
| VALUE |
| :-------------------------- |
| 22-22-33 |
| 33-88-33 |
| 77-77-22 |
| 11-77-77 |
| 123-456-123 |
| 1-2-2 |
| 99999-99999-0 |
| 1-2-3-4-5-6-7-8-9-0-11-2-13 |
db<>fiddle here

Split a Column with Delimited Values and Compare Each Value

I have a column that contains multiple values in a delimited(comma-separated) format -
id | code
------------
1 11,19,21
2 55,87,33
3 3,11
4 11
I want to be able to compare to each value inside the 'code' column as below -
SELECT id FROM myTbl WHERE code = '11'
This should return -
1
3
4
I've tried the solution below but it does not work for all cases -
SELECT id FROM myTbl WHERE POSITION('11' IN code) <> 0
This will work with a 2 digit number like '11' as it will return a value that is <> 0 if it finds a match. But it will fail when searching for say '3' because rows with 'id' 2 and 3 both will be returned.
Here is link that talks about the POSITION function in REDSHIFT.
Any other approach that will solve this problem?
you can get the count of this string
SELECT id FROM myTbl WHERE regexp_count(user_action, '[11]') > 0
I think we can use regexp_substr() as follow.
select tb .id from myTbl tb where '11' in (
select regexp_substr( (select code from myTbl where id=tb.id),'[^,]+', 1, LEVEL) from dual
connect by regexp_substr((select code from myTbl where id=tb.id) , '[^,]+', 1, LEVEL) is not null);
just try this.
Use split_part() function
SELECT distinct id
FROM myTbl
WHERE '11' in ( split_part( code||',' , ',', 1 ),
split_part( code||',' , ',', 2 ),
split_part( code||',' , ',', 3 ) )
This is a very, very bad data model. You should be storing this information in a junction/association table, with one row per value.
But, if you have no choice, you can use like:
SELECT id
FROM myTbl
WHERE ',' || code || ',' LIKE '%,11,%';

GROUP BY or COUNT Like Field Values - UNPIVOT?

I have a table with test fields, Example
id | test1 | test2 | test3 | test4 | test5
+----------+----------+----------+----------+----------+----------+
12345 | P | P | F | I | P
So for each record I want to know how many Pass, Failed or Incomplete (P,F or I)
Is there a way to GROUP BY value?
Pseudo:
SELECT ('P' IN (fields)) AS pass
WHERE id = 12345
I have about 40 test fields that I need to somehow group together and I really don't want to write this super ugly, long query. Yes I know I should rewrite the table into two or three separate tables but this is another problem.
Expected Results:
passed | failed | incomplete
+----------+----------+----------+
3 | 1 | 1
Suggestions?
Note: I'm running PostgreSQL 7.4 and yes we are upgrading
I may have come up with a solution:
SELECT id
,l - length(replace(t, 'P', '')) AS nr_p
,l - length(replace(t, 'F', '')) AS nr_f
,l - length(replace(t, 'I', '')) AS nr_i
FROM (SELECT id, test::text AS t, length(test::text) AS l FROM test) t
The trick works like this:
Transform the rowtype into its text representation.
Measure character-length.
Replace the character you want to count and measure the change in length.
Compute the length of the original row in the subselect for repeated use.
This requires that P, F, I are present nowhere else in the row. Use a sub-select to exclude any other columns that might interfere.
Tested in 8.4 - 9.1. Nobody uses PostgreSQL 7.4 anymore nowadays, you'll have to test yourself. I only use basic functions, but I am not sure if casting the rowtype to text is feasible in 7.4. If that doesn't work, you'll have to concatenate all test-columns once by hand:
SELECT id
,length(t) - length(replace(t, 'P', '')) AS nr_p
,length(t) - length(replace(t, 'F', '')) AS nr_f
,length(t) - length(replace(t, 'I', '')) AS nr_i
FROM (SELECT id, test1||test2||test3||test4 AS t FROM test) t
This requires all columns to be NOT NULL.
Essentially, you need to unpivot your data by test:
id | test | result
+----------+----------+----------+
12345 | test1 | P
12345 | test2 | P
12345 | test3 | F
12345 | test4 | I
12345 | test5 | P
...
- so that you can then group it by test result.
Unfortunately, PostgreSQL doesn't have pivot/unpivot functionality built in, so the simplest way to do this would be something like:
select id, 'test1' test, test1 result from mytable union all
select id, 'test2' test, test2 result from mytable union all
select id, 'test3' test, test3 result from mytable union all
select id, 'test4' test, test4 result from mytable union all
select id, 'test5' test, test5 result from mytable union all
...
There are other ways of approaching this, but with 40 columns of data this is going to get really ugly.
EDIT: an alternative approach -
select r.result, sum(char_length(replace(replace(test1||test2||test3||test4||test5,excl1,''),excl2,'')))
from mytable m,
(select 'P' result, 'F' excl1, 'I' excl2 union all
select 'F' result, 'P' excl1, 'I' excl2 union all
select 'I' result, 'F' excl1, 'P' excl2) r
group by r.result
You could use an auxiliary on-the-fly table to turn columns into rows, then you would be able to apply aggregate functions, something like this:
SELECT
SUM(fields = 'P') AS passed,
SUM(fields = 'F') AS failed,
SUM(fields = 'I') AS incomplete
FROM (
SELECT
t.id,
CASE x.idx
WHEN 1 THEN t.test1
WHEN 2 THEN t.test2
WHEN 3 THEN t.test3
WHEN 4 THEN t.test4
WHEN 5 THEN t.test5
END AS fields
FROM atable t
CROSS JOIN (
SELECT 1 AS idx
UNION ALL SELECT 2
UNION ALL SELECT 3
UNION ALL SELECT 4
UNION ALL SELECT 5
) x
WHERE t.id = 12345
) s
Edit: just saw the comment about 7.4, I don't think this will work with that ancient version (unnest() came a lot later). If anyone thinks this is not worth keeping, I'll delete it.
Taking Erwin's idea to use the "row representation" as a base for the solution a bit further and automatically "normalize" the table on-the-fly:
select id,
sum(case when flag = 'F' then 1 else null end) as failed,
sum(case when flag = 'P' then 1 else null end) as passed,
sum(case when flag = 'I' then 1 else null end) as incomplete
from (
select id,
unnest(string_to_array(trim(trailing ')' from substr(all_column_values,strpos(all_column_values, ',') + 1)), ',')) flag
from (
SELECT id,
not_normalized::text AS all_column_values
FROM not_normalized
) t1
) t2
group by id
The heart of the solution is Erwin's trick to make a single value out of the complete row using the cast not_normalized::text. The string functions are applied to strip of the leading id value and the brackets around it.
The result of that is transformed into an array and that array is transformed into a result set using the unnest() function.
To understand that part, simply run the inner selects step by step.
Then the result is grouped and the corresponding values are counted.