Is there a BigQuery version of isnumeric - google-bigquery

I need to test if a field is numeric or not using standard SQL in BigQuery.
The example below works and is similar to what I have done in Cognos using TRANSLATE('mystring','1234567890.','') but its not very elegant.
SELECT
IF(LENGTH(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE('1234.56','1',''),'2',''),'3',''),'4',''),'5',''),'6',''),'7',''),'8',''),'9',''),'0',''),'.',''))=0,
'A number',
'Not a number')

You can use SAFE_CAST to try casting to a number. SAFE_CAST casts similar to CAST, but if casting fails, instead of erring null is returned.
For example you can do:
SAFE_CAST('1234567890' AS FLOAT64);
which will return 1.23456789E9

Thanks for both suggestions, both work a treat and I have gone for the SAFE_CAST option as it runs a fraction quicker.
#standardSQL
WITH `project.dataset.table` AS (
SELECT '1234.56' col UNION ALL
SELECT '1234.' col UNION ALL
SELECT '1234' col UNION ALL
SELECT '.56' col UNION ALL
SELECT '1234..56' col UNION ALL
SELECT 'a1234.56'
)
SELECT
col,
if(SAFE_CAST(col AS FLOAT64) is null,'Not a number', 'A number')
FROM `project.dataset.table`

but its not very elegant
Below examples for BigQuery Standard SQL
#standardSQL
WITH `project.dataset.table` AS (
SELECT '1234.56' col UNION ALL
SELECT '1234.' col UNION ALL
SELECT '1234' col UNION ALL
SELECT '.56' col UNION ALL
SELECT '1234..56' col UNION ALL
SELECT 'a1234.56'
)
SELECT
col,
IF(LENGTH(REGEXP_REPLACE(col, r'[\d.]', '')) = 0, 'A number', 'Not a number') ,
IF(REGEXP_CONTAINS(col, r'^\d*.?\d*$'), 'A number', 'Not a number')
FROM `project.dataset.table`

I think that we could use translate function to replace digits from 0 to 9
by 0 (let's say string_1 ) and then compare it to a string (let's say String_2) that equals to as much of 0 as then lengh of String_1.
(translate(src.NUM_BU , '0123456789', '0000000000'))
=
rpad('', length((translate(src.NUM_BU , '0123456789', '0000000000'))), '0')

Related

Check if all characters are 'X'

I have the below table:
COL
---
XXY
YXX
XXX
NULL
I want to filter out the rows which don't consist of all 'X's.
Expected output:
COL
---
XXX
We can use REGEXP_LIKE here:
SELECT COL
FROM yourTable
WHERE REGEXP_LIKE(COL, '^X+$'); -- ^X+$ means all X from start to end
Another similar version:
SELECT COL
FROM yourTable
WHERE NOT REGEXP_LIKE(COL, '[^X]'); -- this means no non X present
Another option(without using a regular expression) might be using
WITH t(col) AS
(
SELECT 'XXY' FROM dual UNION ALL
SELECT 'YXX' FROM dual UNION ALL
SELECT 'XXX' FROM dual UNION ALL
SELECT NULL FROM dual UNION ALL
SELECT 'XX ' FROM dual
)
SELECT *
FROM t
WHERE REPLACE(NVL(col,'Y'),'X') IS NULL;
COL
----
XXX
without forgetting the case col = NULL through use of a NVL()
You can use the following syntax (assuming you are using MySQL database 5.6 or greater version):
SELECT * FROM table_name WHERE col_name REGEXP '^X+$';
If you don't want/have regexp then:
WITH
tbl AS
( Select 'XXY' "COL" From dual Union All
Select 'YXX' "COL" From dual Union All
Select 'XXX' "COL" From dual Union All
Select null "COL" From dual
)
Select COL
From tbl
Where Length(Nvl(COL, 'Z')) - Length( Replace( Upper(Nvl(COL, 'Z')), 'X', '')) Is Null
COL
---
XXX
This covers both small 'x' and capital 'X' if needed and returns original COL value

Query to find if a aggregate string contains certain numbers

I am working on Big Query Standard SQL. I have a data table like shown below (using ; as separator):
id;operation
107327;-1,-1,-1,-1,5,-1,0,2,-1
108296;-1,6,2,-1,-1,-1
690481;0,-1,-1,-1,5
102643;5,-1,-1,-1,-1,-2,2,3,-1,0,-1,-1,-1,-1,-1,-1
103171;0,5
789481;0,-1,5
I would like to take id that only contains operation 0,5 or 0,-1,5 so the result will show:
690481
103171
789481
Below is for BigQuery Standard SQL
#standardSQL
SELECT *
FROM `project.dataset.table`
WHERE 0 = (
SELECT COUNT(1)
FROM UNNEST(SPLIT(operation)) op
WHERE NOT op IN ('0', '-1', '5')
)
You can test, play with above using sample data form your question as in below example
#standardSQL
WITH `project.dataset.table` AS (
SELECT 107327 id, '-1,-1,-1,-1,5,-1,0,2,-1' operation UNION ALL
SELECT 108296, '-1,6,2,-1,-1,-1' UNION ALL
SELECT 690481, '0,-1,-1,-1,5' UNION ALL
SELECT 102643, '5,-1,-1,-1,-1,-2,2,3,-1,0,-1,-1,-1,-1,-1,-1' UNION ALL
SELECT 103171, '0,5' UNION ALL
SELECT 789481, '0,-1,5'
)
SELECT *
FROM `project.dataset.table`
WHERE 0 = (
SELECT COUNT(1)
FROM UNNEST(SPLIT(operation)) op
WHERE NOT op IN ('0', '-1', '5')
)
with output
I think regular expression does what you want:
select t.*
from t
where regexp_contains(operation, '^0,(-1,)*5$');
If you want matches to rows that contain only 0, -1, or 5, you would use:
where regexp_contains(operation, '^((0|-1|5),)*(0|-1|5)$');

How to extract this specific data from a particular column in SQL Server?

I have column with below data:
Change
18 MCO-005329
A ECO-12239
0 ECO-25126
X1 ECO-05963
NA MCO-003778
C ECO-08399
MCO-003759
ECO-00643217
NULL
I want to extract the output like below:
MCO-005329
ECO-12239
ECO-25126
ECO-05963
MCO-003778
ECO-08399
MCO-003759
ECO-00643217
I have implemented the code like below:
select DISTINCT change,
case when change like 'MCO%' THEN change when change like 'ECO-%' THEN change
when change like '%MCO-%' then LTRIM(RTRIM(SUBSTRING(change,10,19) ))
when change like '%ECO-%' then LTRIM(RTRIM(SUBSTRING(change,10,19) ))
else '' end x from table
You can parse out the values from your requirements using SPLIT_STRING, outer apply, and a simple where clause without relying on hard coding any specific string length or position values, its dynamic.
SELECT D2.*
FROM
(
select '18 MCO-005329'
union select 'A ECO-12239'
union select '0 ECO-25126'
union select 'X1 ECO-05963'
union select 'NA MCO-003778'
union select 'C ECO-08399'
union select 'MCO-003759'
union select 'ECO-00643217'
union select NULL
) T(Change)
outer apply
(
select value
from
string_split(Change, ' ') d
) d2
where d2.value like '%-%' or d2.value is null
If you dont want nulls then smiply remove or d2.value is null
https://learn.microsoft.com/en-us/sql/t-sql/queries/from-transact-sql?view=sql-server-ver15
https://learn.microsoft.com/en-us/sql/t-sql/functions/string-split-transact-sql?view=sql-server-ver15
You could use CHARINDEX() and RIGHT() as
SELECT *, RIGHT(Change, CHARINDEX('-', REVERSE(Change)) + 3)
FROM
(
VALUES
('18 MCO-005329'),
('A ECO-12239'),
('0 ECO-25126'),
('X1 ECO-05963'),
('NA MCO-003778'),
('C ECO-08399'),
('MCO-003759'),
('ECO-00643217'), ('hhh kkk-k'),
(NULL)
) T(Change)

regexp match character before a specify string (not include)

REGEXP_SUBSTR(label) ,'.*_dis')
this is for sql;
my database is mysql
select REGEXP_SUBSTR(label) ,'.*_dis') as dis ,
substr(label,length(label))-1) as num
from table
table.lable col's data:
1. a_b_dis_12
2. a_dis_13
3. c_d_dis_23
3. c_dis_22
i want to get the character before '_dis' and the numeric part use regexp
1.a_b 12
2.a 13
3.c_d 23
4.c 22
thanks a lot!
You can use regexp_substr as follows:
Select regexp_substr(your_column, '^(.*)_dis_[0-9]+$',1,1,null,1) as dis,
Regexp_substr(your_column, '[0-9]+$') as num
From your table
You can use regexp_replace():
select regexp_replace(col, '^(.*)_dis.*$', '\1'),
regexp_replace(col, '^.*_dis_([0-9]+)', '\1')
from (select 'a_b_dis_12' as col from dual union all
select 'a_dis_13' as col from dual union all
select 'c_d_dis_23' as col from dual union all
select 'c_dis_22' as col from dual
) t;
Here is a db<>fiddle.
I would use regexp_replace() as follows:
select
regexp_replace(label, '_dis_.*$', '') dis,
regexp_replace(label, '^.*_dis_', '') num
from mytable
The first expression suppresses everything from '_dis_ (included) to the end of the string. The second expression removes everything from the beginning of the string until '_dis_' (included).

Comparing 2 lists in Oracle

I have 2 lists which I need to compare. I need to find if at least one element from List A is found in List B. I know IN doesn't work with 2 lists. What are my other options?
Basically something like this :
SELECT
CASE WHEN ('A','B','C') IN ('A','Z','H') THEN 1 ELSE 0 END "FOUND"
FROM DUAL
Would appreciate any help!
You are probably looking for something like this. The WITH clause is there just to simulate your "lists" (whatever you mean by that); they are not really part of the solution. The query you need is just the last three lines (plus the semicolon at the end).
with
first_list (str) as (
select 'A' from dual union all
select 'B' from dual union all
select 'C' from dual
),
second_list(str) as (
select 'A' from dual union all
select 'Z' from dual union all
select 'H' from dual
)
select case when exists (select * from first_list f join second_list s
on f.str = s.str) then 1 else 0 end as found
from dual
;
FOUND
----------
1
In Oracle you can do:
select
count(*) as total_matches
from table(sys.ODCIVarchar2List('A', 'B', 'C')) x,
table(sys.ODCIVarchar2List('A', 'Z', 'H')) y
where x.column_value = y.column_value;
You need to repeat the conditions:
SELECT (CASE WHEN 'A' IN ('A', 'Z', 'H') OR
'B' IN ('A', 'Z', 'H') OR
'C' IN ('A', 'Z', 'H')
THEN 1 ELSE 0
END) as "FOUND"
FROM DUAL
If you are working with collection of String you can try Multiset Operators.
create type coll_of_varchar2 is table of varchar2(4000);
and:
-- check if exits
select * from dual where cardinality (coll_of_varchar2('A','B','C') multiset intersect coll_of_varchar2('A','Z','H')) > 0;
-- list of maching elments
select * from table(coll_of_varchar2('A','B','C') multiset intersect coll_of_varchar2('A','Z','H'));
Additionally:
-- union of elemtns
select * from table(coll_of_varchar2('A','B','C') multiset union distinct coll_of_varchar2('A','Z','H'));
select * from table(coll_of_varchar2('A','B','C') multiset union all coll_of_varchar2('A','Z','H'));
-- eelemnt from col1 not in col2
select * from table(coll_of_varchar2('A','A','B','C') multiset except all coll_of_varchar2('A','Z','H'));
select * from table(coll_of_varchar2('A','A','B','C') multiset except distinct coll_of_varchar2('A','Z','H'));
-- check if col1 is subset col2
select * from dual where coll_of_varchar2('B','A') submultiset coll_of_varchar2('A','Z','H','B');
I am trying to do something very similar but the first list is another field on the same query created with listagg and containing integer numbers like:
LISTAGG(my_first_list,', ') WITHIN GROUP(
ORDER BY
my_id
) my_first_list
and return this with all the other fields that I am already returning
SELECT
CASE WHEN my_first_list IN ('1,2,3') THEN 1 ELSE 0 END "FOUND"
FROM DUAL