Bigquery SQL to convert single value string to multi value based on separator/prefix - google-bigquery

I have a column value string with + or - orefix as below :
id val
1 +a+b+c-d-e-f+g
Now based on + or - separator I need to build the dataset as follows :
id new_val prefix
1 a +
1 b +
1 c +
1 d -
1 e -
1 f -
1 g +
And to add the string is not fixed length ie it would continue with either separator (+ or -) for different rows.
Any guide on big-query SQL to do this transformation would be helpful.
Update :
I am using this query but missing some value though :
with mytable as (
select 1 as id, '+a+b+c-d-f+g' as val1,
)
select * from (
select id, new_val1 , '+' symbol
from mytable, unnest(split(val1, '+')) as new_val1 WITH OFFSET AS val1_offset
union all
select id, new_val1 , '-' symbol
from mytable, unnest(split(val1, '-')) as new_val1 WITH OFFSET AS val1_offset
) where length(new_val1) = 1 and new_val1 is not null

Consider below approach
select id, substr(part, 2) new_val, substr(part, 1, 1) prefix
from `project.dataset.table`,
unnest(regexp_extract_all(val, r'[+-][^+-]+')) part
If applied to sample data in your question - output is

The split into substrings can be done, by adding a further separator, which does the string do not contains:
select id, substr(vals,2) as new_val, substr(vals,1,1) as prefix
from (
SELECT id, split(substr(replace(replace(val,'-',';-'),'+',';+'),2) ,';') as val_tmp
from (select 1 as id, "+a+b+c-d-e-f+g" as val)
) as t, unnest(t.val_tmp) as vals
If you have more than + and -, regex would be a better option:
SELECT id, split(substr(REGEXP_REPLACE(val,r"([+-]+)", ";\\1"),2) ,';') as val_tmp

Related

Converting alphanumeric to numeric and vice versa in oracle

I have a requirement to convert alphanumeric to numeric and vice-versa.
Example: If 'A2' is passed then I have written below query to convert it to numeric:
select sum(val) from (
select power(36, loc - 1) *
case when letter between '0'
and '9'
then to_number(letter)
else 10 + ascii(letter) - ascii('A')
end as val from(
select substr(ip_str, length(ip_str) + 1 - level, 1) letter,
level loc from(select 'A2'
ip_str from dual) connect by level <= length(ip_str)
)
); --sum(val) returns 362
How do I decode 362 back to 'A2'?
Base N Convert - this site describes algorithm. I implemented it as recursive query:
with
t(num) as (select 362 from dual),
r(md, div, lvl) as (
select mod(num, 36), floor(num/36), 1 from t union all
select mod(div, 36), floor(div/36), lvl + 1 from r where div > 0)
select listagg(case when md > 9 then chr(ascii('A') - 10 + md)
else to_char(md)
end) within group (order by lvl desc) b36
from r
dbfiddle demo
Seems to work, I tested several values comparing results with online calculators. Theoretically you can use other bases, not only 36, algorithm is the same, but I did not test it.

Convert String to Tuple in BigQuery

I have a variable passed as an argument in BigQuery which is in the format "('a','b','c')"
with vars as (
select "{0}" as var1,
)
-- where, {0} = "('a','b','c')"
To use it in BigQuery I need to make it a tuple ('a','b','c').
How can it be done?
Any alternate approach is also welcome.
Example:
with vars as (
select "('a','b','c')" as index
)
select * from `<some_other_db>.table` where index in (
select index from vars)
-- gives me empty results because index is now a string
Present output:
select * from <db_name>.table where index in "('a','b','c')"
Required output:
select * from <db_name>.table where index in ('a','b','c')
Below is for BigQuery Standard SQL
#standardSQL
WITH vars AS (
SELECT "('a','b','c')" AS var
)
SELECT *
FROM `<some_other_db>.table`
WHERE index IN UNNEST((
SELECT SPLIT(REGEXP_REPLACE(var, r'[()\']', ''))
FROM vars
))
You can test, play with above using some dummy data as in below example
#standardSQL
WITH vars AS (
SELECT "('a','b','c')" AS var
), `<some_other_db>.table` AS (
SELECT 1 id, 'a' index UNION ALL
SELECT 2, 'd' UNION ALL
SELECT 3, 'c' UNION ALL
SELECT 4, 'e'
)
SELECT *
FROM `<some_other_db>.table`
WHERE index IN UNNEST((
SELECT SPLIT(REGEXP_REPLACE(var, r'[()\']', ''))
FROM vars
))
with output
Row id index
1 1 a
2 3 c
I think this does what you are asking for:
with vars as ( select "('a','b','c')" as var1)
select as struct
MAX(CASE WHEN n = 0 then var END) as f1,
MAX(CASE WHEN n = 1 then var END) as f2,
MAX(CASE WHEN n = 2 then var END) as f3
from vars v cross join
unnest(SPLIT(REPLACE(REPLACE(var1, '(', ''), ')', ''), ',')) var with offset n;

Possible to Search Partial Matched Strings from same table?

I have a table and lets say the table has items with the item numbers:
12345
12345_DDM
345653
2345664
45567
45567_DDM
I am having trouble creating a query that will get all of the _DDM and the corresponding item that has the same prefix digits.
So in this case I'd want both 12345 and 12345_DDM etc to be returned
Use like to find rows with _DDM.
Use EXISTS to find rows with numbers also having a _DDM row.
working demo
select *
from tablename t1
where columnname LIKE '%_DDM'
or exists (select 1 from tablename t2
where t1.columnname + '_DDM' = t2.columnname)
Try this query:
--sample data
;with tbl as (
select col from (values ('12345'),('12345_DDM'),('345653'),('2345664'), ('45567'),('45567_DDM')) A(col)
)
--select query
select col from (
select col,
prefix,
max(case when charindex('_DDM', col) > 0 then 1 else 0 end) over (partition by prefix) [prefixGroupWith_DDM]
from (
select col,
case when charindex('_DDM', col) - 1 > 0 then substring(col, 1, charindex('_DDM', col) - 1) else col end [prefix]
from tbl
) a
) a where [prefixGroupWith_DDM] = 1

Find way for gathering data and replace with values from another table

I am looking for an Oracle SQL query to find a specific pattern and replace them with values from another table.
Scenario:
Table 1:
No column1
-----------------------------------------
12345 user:12345;group:56789;group:6785;...
Note: field 1 may be has one or more pattern
Table2 :
Id name type
----------------------
12345 admin user
56789 testgroup group
Result must be the same
No column1
-----------------------------------
12345 user: admin;group:testgroup
Logic:
First split the concatenated string to individual rows using connect
by clause and regex.
Join the newly created table(split_tab) with Table2(tab2).
Use listagg function to concatenate data in the columns.
Query:
WITH tab1 AS
( SELECT '12345' NO
,'user:12345;group:56789;group:6785;' column1
FROM DUAL )
,tab2 AS
( SELECT 12345 id
,'admin' name
,'user' TYPE
FROM DUAL
UNION
SELECT 56789 id
,'testgroup' name
,'group' TYPE
FROM DUAL )
SELECT no
,listagg(category||':'||name,';') WITHIN GROUP (ORDER BY tab2.id) column1
FROM ( SELECT NO
,REGEXP_SUBSTR( column1, '(\d+)', 1, LEVEL ) id
,REGEXP_SUBSTR( column1, '([a-z]+)', 1, LEVEL ) CATEGORY
FROM tab1
CONNECT BY LEVEL <= regexp_count( column1, '\d+' ) ) split_tab
,tab2
WHERE split_tab.id = tab2.id
GROUP BY no
Output:
No Column1
12345 user:admin;group:testgroup
with t1 (no, col) as
(
-- start of test data
select 1, 'user:12345;group:56789;group:6785;' from dual union all
select 2, 'user:12345;group:56789;group:6785;' from dual
-- end of test data
)
-- the lookup table which has the substitute strings
-- nid : concatenation of name and id as in table t1 which requires the lookup
-- tname : required substitute for each nid
, t2 (id, name, type, nid, tname) as
(
select t.*, type || ':' || id, type || ':' || name from
(
select 12345 id, 'admin' name, 'user' type from dual union all
select 56789, 'testgroup', 'group' from dual
) t
)
--select * from t2;
-- cte table calculates the indexes for the substrings (eg, user:12345)
-- no : sequence no in t1
-- col : the input string in t1
-- si : starting index of each substring in the 'col' input string that needs attention later
-- ei : ending index of each substring in the 'col' input string
-- idx : the order of substring to put them together later
,cte (no, col, si, ei, idx) as
(
select no, col, 1, case when instr(col,';') = 0 then length(col)+1 else instr(col,';') end, 1 from t1 union all
select no, col, ei+1, case when instr(col,';', ei+1) = 0 then length(col)+1 else instr(col,';', ei+1) end, idx+1 from cte where ei + 1 <= length(col)
)
,coll(no, col, sstr, idx, newstr) as
(
select
a.no, a.col, a.sstr, a.idx,
-- when a substitute is not found in t2, use the same input substring (eg. group:6785)
case when t2.tname is null then a.sstr else t2.tname end
from
(select cte.*, substr(col, si, ei-si) as sstr from cte) a
-- we don't want to miss if there is no substitute available in t2 for a substring
left outer join
t2
on (a.sstr = t2.nid)
)
select no, col, listagg(newstr, ';') within group (order by no, col, idx) from coll
group by no, col;

Check palindrome without using string functions with condition

I have a table EmployeeTable.
If I want only that records where employeename have character of 1 to 5
will be palindrome and there also condition like total character is more then 10 then 4 to 8 if character less then 7 then 2 to 5 and if character less then 5 then all char will be checked and there that are palindrome then only display.
Examples :- neen will be display
neetan not selected
kiratitamara will be selected
I try this something on string function like FOR first case like name less then 5 character long
SELECT SUBSTRING(EmployeeName,1,5),* from EmaployeeTable where
REVERSE (SUBSTRING(EmployeeName,1,5))=SUBSTRING(EmployeeName,1,5)
I want to do that without string functions,
Can anyone help me on this?
You need at least SUBSTRING(), I have a solution like this:
(In SQL Server)
DECLARE #txt varchar(max) = 'abcba'
;WITH CTE (cNo, cChar) AS (
SELECT 1, SUBSTRING(#txt, 1, 1)
UNION ALL
SELECT cNo + 1, SUBSTRING(#txt, cNo + 1, 1)
FROM CTE
WHERE SUBSTRING(#txt, cNo + 1, 1) <> ''
)
SELECT COUNT(*)
FROM (
SELECT *, ROW_NUMBER() OVER (ORDER BY cNo DESC) as cRevNo
FROM CTE t1 CROSS JOIN
(SELECT Max(cNo) AS strLength FROM CTE) t2) dt
WHERE
dt.cNo <= dt.strLength / 2
AND
dt.cChar <> (SELECT dti.cChar FROM CTE dti WHERE dti.cNo = cRevNo)
The result will shows the count of differences and 0 means no differences.
Note :
Current solution is Non-Case-Sensitive for change it to a Case-Sensitive you need to check the strings in a case-sensitive collation like Latin1_General_BIN
You can use this solution as a SVF or something like that.
I dont realy understand why you dont want to use string functions in your query, but here is one solution. Compute everything beforehand:
Add Column:
ALTER TABLE EmployeeTable
ADD SubString AS
SUBSTRING(EmployeeName,
(
CASE WHEN LEN(EmployeeName)>10
THEN 4
WHEN LEN(EmployeeName)>7
THEN 2
ELSE 1 END
)
,
(
CASE WHEN LEN(EmployeeName)>10
THEN 8
WHEN LEN(EmployeeName)>7
THEN 5
ELSE 5 END
)
PERSISTED
GO
ALTER TABLE EmployeeTable
ADD Palindrome AS
REVERSE(SUBSTRING(EmployeeName,
(
CASE WHEN LEN(EmployeeName)>10
THEN 4
WHEN LEN(EmployeeName)>7
THEN 2
ELSE 1 END
)
,
(
CASE WHEN LEN(EmployeeName)>10
THEN 8
WHEN LEN(EmployeeName)>7
THEN 5
ELSE 5 END
)) PERSISTED
GO
Then your query will looks like:
SELECT * from EmaployeeTable
where Palindrome = SubString
BUT!
This is not a good idea. Please tell us, why you dont want to use string functios.
You could do it building a list of palindrome words using a recursive query that generates palindrome words till a length o n characters and then selects employees with the name matching a palindrome word. This may be a really inefficient way, but it does the trick
This is a sample query for Oracle, PostgreSQL should support this feature as well with little differences on syntax. I don't know about other RDBMS.
with EmployeeTable AS (
SELECT 'ADA' AS employeename
FROM DUAL
UNION ALL
SELECT 'IDA' AS employeename
FROM DUAL
UNION ALL
SELECT 'JACK' AS employeename
FROM DUAL
), letters as (
select chr(ascii('A') + rownum - 1) as letter
from dual
connect by ascii('A') + rownum - 1 <= ascii('Z')
), palindromes(word, len ) as (
SELECT WORD, LEN
FROM (
select CAST(NULL AS VARCHAR2(100)) as word, 0 as len
from DUAL
union all
select letter as word, 1 as len
from letters
)
union all
select l.letter||p.word||l.letter AS WORD, len + 1 AS LEN
from palindromes p
cross join letters l
where len <= 4
)
SEARCH BREADTH FIRST BY word SET order1
CYCLE word SET is_cycle TO 'Y' DEFAULT 'N'
select *
from EmployeeTable
WHERE employeename IN (
SELECT WORD
FROM palindromes
)
DECLARE #cPalindrome VARCHAR(100) = 'SUBI NO ONIBUS'
SET #cPalindrome = REPLACE(#cPalindrome, ' ', '')
;WITH tPalindromo (iNo) AS (
SELECT 1
WHERE SUBSTRING(#cPalindrome, 1, 1) = SUBSTRING(#cPalindrome, LEN(#cPalindrome), 1)
UNION ALL
SELECT iNo + 1
FROM tPalindromo
WHERE SUBSTRING(#cPalindrome, iNo + 1, 1) = SUBSTRING(#cPalindrome, LEN(#cPalindrome) - iNo, 1)
AND LEN(#cPalindrome) > iNo
)
SELECT IIF(MAX(iNo) = LEN(#cPalindrome), 'PALINDROME', 'NOT PALINDROME')
FROM tPalindromo