Bigquery SQL to convert single value string to multi value based on separator/prefix

Bigquery SQL to convert single value string to multi value based on separator/prefix - google-bigquery

I have a column value string with + or - orefix as below :
id val
1 +a+b+c-d-e-f+g
Now based on + or - separator I need to build the dataset as follows :
id new_val prefix
1 a +
1 b +
1 c +
1 d -
1 e -
1 f -
1 g +
And to add the string is not fixed length ie it would continue with either separator (+ or -) for different rows.
Any guide on big-query SQL to do this transformation would be helpful.
Update :
I am using this query but missing some value though :
with mytable as (
select 1 as id, '+a+b+c-d-f+g' as val1,
)
select * from (
select id, new_val1 , '+' symbol
from mytable, unnest(split(val1, '+')) as new_val1 WITH OFFSET AS val1_offset
union all
select id, new_val1 , '-' symbol
from mytable, unnest(split(val1, '-')) as new_val1 WITH OFFSET AS val1_offset
) where length(new_val1) = 1 and new_val1 is not null

Consider below approach
select id, substr(part, 2) new_val, substr(part, 1, 1) prefix
from `project.dataset.table`,
unnest(regexp_extract_all(val, r'[+-][^+-]+')) part
If applied to sample data in your question - output is

The split into substrings can be done, by adding a further separator, which does the string do not contains:
select id, substr(vals,2) as new_val, substr(vals,1,1) as prefix
from (
SELECT id, split(substr(replace(replace(val,'-',';-'),'+',';+'),2) ,';') as val_tmp
from (select 1 as id, "+a+b+c-d-e-f+g" as val)
) as t, unnest(t.val_tmp) as vals
If you have more than + and -, regex would be a better option:
SELECT id, split(substr(REGEXP_REPLACE(val,r"([+-]+)", ";\\1"),2) ,';') as val_tmp

Related

Converting alphanumeric to numeric and vice versa in oracle

I have a requirement to convert alphanumeric to numeric and vice-versa.
Example: If 'A2' is passed then I have written below query to convert it to numeric:
select sum(val) from (
select power(36, loc - 1) *
case when letter between '0'
and '9'
then to_number(letter)
else 10 + ascii(letter) - ascii('A')
end as val from(
select substr(ip_str, length(ip_str) + 1 - level, 1) letter,
level loc from(select 'A2'
ip_str from dual) connect by level <= length(ip_str)
)
); --sum(val) returns 362
How do I decode 362 back to 'A2'?

Base N Convert - this site describes algorithm. I implemented it as recursive query:
with
t(num) as (select 362 from dual),
r(md, div, lvl) as (
select mod(num, 36), floor(num/36), 1 from t union all
select mod(div, 36), floor(div/36), lvl + 1 from r where div > 0)
select listagg(case when md > 9 then chr(ascii('A') - 10 + md)
else to_char(md)
end) within group (order by lvl desc) b36
from r
dbfiddle demo
Seems to work, I tested several values comparing results with online calculators. Theoretically you can use other bases, not only 36, algorithm is the same, but I did not test it.

Convert String to Tuple in BigQuery

I have a variable passed as an argument in BigQuery which is in the format "('a','b','c')"
with vars as (
select "{0}" as var1,
)
-- where, {0} = "('a','b','c')"
To use it in BigQuery I need to make it a tuple ('a','b','c').
How can it be done?
Any alternate approach is also welcome.
Example:
with vars as (
select "('a','b','c')" as index
)
select * from `<some_other_db>.table` where index in (
select index from vars)
-- gives me empty results because index is now a string
Present output:
select * from <db_name>.table where index in "('a','b','c')"
Required output:
select * from <db_name>.table where index in ('a','b','c')

Below is for BigQuery Standard SQL
#standardSQL
WITH vars AS (
SELECT "('a','b','c')" AS var
)
SELECT *
FROM `<some_other_db>.table`
WHERE index IN UNNEST((
SELECT SPLIT(REGEXP_REPLACE(var, r'[()\']', ''))
FROM vars
))
You can test, play with above using some dummy data as in below example
#standardSQL
WITH vars AS (
SELECT "('a','b','c')" AS var
), `<some_other_db>.table` AS (
SELECT 1 id, 'a' index UNION ALL
SELECT 2, 'd' UNION ALL
SELECT 3, 'c' UNION ALL
SELECT 4, 'e'
)
SELECT *
FROM `<some_other_db>.table`
WHERE index IN UNNEST((
SELECT SPLIT(REGEXP_REPLACE(var, r'[()\']', ''))
FROM vars
))
with output
Row id index
1 1 a
2 3 c

I think this does what you are asking for:
with vars as ( select "('a','b','c')" as var1)
select as struct
MAX(CASE WHEN n = 0 then var END) as f1,
MAX(CASE WHEN n = 1 then var END) as f2,
MAX(CASE WHEN n = 2 then var END) as f3
from vars v cross join
unnest(SPLIT(REPLACE(REPLACE(var1, '(', ''), ')', ''), ',')) var with offset n;

Possible to Search Partial Matched Strings from same table?

I have a table and lets say the table has items with the item numbers:
12345
12345_DDM
345653
2345664
45567
45567_DDM
I am having trouble creating a query that will get all of the _DDM and the corresponding item that has the same prefix digits.
So in this case I'd want both 12345 and 12345_DDM etc to be returned

Use like to find rows with _DDM.
Use EXISTS to find rows with numbers also having a _DDM row.
working demo
select *
from tablename t1
where columnname LIKE '%_DDM'
or exists (select 1 from tablename t2
where t1.columnname + '_DDM' = t2.columnname)

Try this query:
--sample data
;with tbl as (
select col from (values ('12345'),('12345_DDM'),('345653'),('2345664'), ('45567'),('45567_DDM')) A(col)
)
--select query
select col from (
select col,
prefix,
max(case when charindex('_DDM', col) > 0 then 1 else 0 end) over (partition by prefix) [prefixGroupWith_DDM]
from (
select col,
case when charindex('_DDM', col) - 1 > 0 then substring(col, 1, charindex('_DDM', col) - 1) else col end [prefix]
from tbl
) a
) a where [prefixGroupWith_DDM] = 1

Find way for gathering data and replace with values from another table

I am looking for an Oracle SQL query to find a specific pattern and replace them with values from another table.
Scenario:
Table 1:
No column1
-----------------------------------------
12345 user:12345;group:56789;group:6785;...
Note: field 1 may be has one or more pattern
Table2 :
Id name type
----------------------
12345 admin user
56789 testgroup group
Result must be the same
No column1
-----------------------------------
12345 user: admin;group:testgroup

Logic:
First split the concatenated string to individual rows using connect
by clause and regex.
Join the newly created table(split_tab) with Table2(tab2).
Use listagg function to concatenate data in the columns.
Query:
WITH tab1 AS
( SELECT '12345' NO
,'user:12345;group:56789;group:6785;' column1
FROM DUAL )
,tab2 AS
( SELECT 12345 id
,'admin' name
,'user' TYPE
FROM DUAL
UNION
SELECT 56789 id
,'testgroup' name
,'group' TYPE
FROM DUAL )
SELECT no
,listagg(category||':'||name,';') WITHIN GROUP (ORDER BY tab2.id) column1
FROM ( SELECT NO
,REGEXP_SUBSTR( column1, '(\d+)', 1, LEVEL ) id
,REGEXP_SUBSTR( column1, '([a-z]+)', 1, LEVEL ) CATEGORY
FROM tab1
CONNECT BY LEVEL <= regexp_count( column1, '\d+' ) ) split_tab
,tab2
WHERE split_tab.id = tab2.id
GROUP BY no
Output:
No Column1
12345 user:admin;group:testgroup

with t1 (no, col) as
(
-- start of test data
select 1, 'user:12345;group:56789;group:6785;' from dual union all
select 2, 'user:12345;group:56789;group:6785;' from dual
-- end of test data
)
-- the lookup table which has the substitute strings
-- nid : concatenation of name and id as in table t1 which requires the lookup
-- tname : required substitute for each nid
, t2 (id, name, type, nid, tname) as
(
select t.*, type || ':' || id, type || ':' || name from
(
select 12345 id, 'admin' name, 'user' type from dual union all
select 56789, 'testgroup', 'group' from dual
) t
)
--select * from t2;
-- cte table calculates the indexes for the substrings (eg, user:12345)
-- no : sequence no in t1
-- col : the input string in t1
-- si : starting index of each substring in the 'col' input string that needs attention later
-- ei : ending index of each substring in the 'col' input string
-- idx : the order of substring to put them together later
,cte (no, col, si, ei, idx) as
(
select no, col, 1, case when instr(col,';') = 0 then length(col)+1 else instr(col,';') end, 1 from t1 union all
select no, col, ei+1, case when instr(col,';', ei+1) = 0 then length(col)+1 else instr(col,';', ei+1) end, idx+1 from cte where ei + 1 <= length(col)
)
,coll(no, col, sstr, idx, newstr) as
(
select
a.no, a.col, a.sstr, a.idx,
-- when a substitute is not found in t2, use the same input substring (eg. group:6785)
case when t2.tname is null then a.sstr else t2.tname end
from
(select cte.*, substr(col, si, ei-si) as sstr from cte) a
-- we don't want to miss if there is no substitute available in t2 for a substring
left outer join
t2
on (a.sstr = t2.nid)
)
select no, col, listagg(newstr, ';') within group (order by no, col, idx) from coll
group by no, col;

Check palindrome without using string functions with condition

I have a table EmployeeTable.
If I want only that records where employeename have character of 1 to 5
will be palindrome and there also condition like total character is more then 10 then 4 to 8 if character less then 7 then 2 to 5 and if character less then 5 then all char will be checked and there that are palindrome then only display.
Examples :- neen will be display
neetan not selected
kiratitamara will be selected
I try this something on string function like FOR first case like name less then 5 character long
SELECT SUBSTRING(EmployeeName,1,5),* from EmaployeeTable where
REVERSE (SUBSTRING(EmployeeName,1,5))=SUBSTRING(EmployeeName,1,5)
I want to do that without string functions,
Can anyone help me on this?

You need at least SUBSTRING(), I have a solution like this:
(In SQL Server)
DECLARE #txt varchar(max) = 'abcba'
;WITH CTE (cNo, cChar) AS (
SELECT 1, SUBSTRING(#txt, 1, 1)
UNION ALL
SELECT cNo + 1, SUBSTRING(#txt, cNo + 1, 1)
FROM CTE
WHERE SUBSTRING(#txt, cNo + 1, 1) <> ''
)
SELECT COUNT(*)
FROM (
SELECT *, ROW_NUMBER() OVER (ORDER BY cNo DESC) as cRevNo
FROM CTE t1 CROSS JOIN
(SELECT Max(cNo) AS strLength FROM CTE) t2) dt
WHERE
dt.cNo <= dt.strLength / 2
AND
dt.cChar <> (SELECT dti.cChar FROM CTE dti WHERE dti.cNo = cRevNo)
The result will shows the count of differences and 0 means no differences.
Note :
Current solution is Non-Case-Sensitive for change it to a Case-Sensitive you need to check the strings in a case-sensitive collation like Latin1_General_BIN
You can use this solution as a SVF or something like that.

I dont realy understand why you dont want to use string functions in your query, but here is one solution. Compute everything beforehand:
Add Column:
ALTER TABLE EmployeeTable
ADD SubString AS
SUBSTRING(EmployeeName,
(
CASE WHEN LEN(EmployeeName)>10
THEN 4
WHEN LEN(EmployeeName)>7
THEN 2
ELSE 1 END
)
,
(
CASE WHEN LEN(EmployeeName)>10
THEN 8
WHEN LEN(EmployeeName)>7
THEN 5
ELSE 5 END
)
PERSISTED
GO
ALTER TABLE EmployeeTable
ADD Palindrome AS
REVERSE(SUBSTRING(EmployeeName,
(
CASE WHEN LEN(EmployeeName)>10
THEN 4
WHEN LEN(EmployeeName)>7
THEN 2
ELSE 1 END
)
,
(
CASE WHEN LEN(EmployeeName)>10
THEN 8
WHEN LEN(EmployeeName)>7
THEN 5
ELSE 5 END
)) PERSISTED
GO
Then your query will looks like:
SELECT * from EmaployeeTable
where Palindrome = SubString
BUT!
This is not a good idea. Please tell us, why you dont want to use string functios.

You could do it building a list of palindrome words using a recursive query that generates palindrome words till a length o n characters and then selects employees with the name matching a palindrome word. This may be a really inefficient way, but it does the trick
This is a sample query for Oracle, PostgreSQL should support this feature as well with little differences on syntax. I don't know about other RDBMS.
with EmployeeTable AS (
SELECT 'ADA' AS employeename
FROM DUAL
UNION ALL
SELECT 'IDA' AS employeename
FROM DUAL
UNION ALL
SELECT 'JACK' AS employeename
FROM DUAL
), letters as (
select chr(ascii('A') + rownum - 1) as letter
from dual
connect by ascii('A') + rownum - 1 <= ascii('Z')
), palindromes(word, len ) as (
SELECT WORD, LEN
FROM (
select CAST(NULL AS VARCHAR2(100)) as word, 0 as len
from DUAL
union all
select letter as word, 1 as len
from letters
)
union all
select l.letter||p.word||l.letter AS WORD, len + 1 AS LEN
from palindromes p
cross join letters l
where len <= 4
)
SEARCH BREADTH FIRST BY word SET order1
CYCLE word SET is_cycle TO 'Y' DEFAULT 'N'
select *
from EmployeeTable
WHERE employeename IN (
SELECT WORD
FROM palindromes
)

DECLARE #cPalindrome VARCHAR(100) = 'SUBI NO ONIBUS'
SET #cPalindrome = REPLACE(#cPalindrome, ' ', '')
;WITH tPalindromo (iNo) AS (
SELECT 1
WHERE SUBSTRING(#cPalindrome, 1, 1) = SUBSTRING(#cPalindrome, LEN(#cPalindrome), 1)
UNION ALL
SELECT iNo + 1
FROM tPalindromo
WHERE SUBSTRING(#cPalindrome, iNo + 1, 1) = SUBSTRING(#cPalindrome, LEN(#cPalindrome) - iNo, 1)
AND LEN(#cPalindrome) > iNo
)
SELECT IIF(MAX(iNo) = LEN(#cPalindrome), 'PALINDROME', 'NOT PALINDROME')
FROM tPalindromo

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

Bigquery SQL to convert single value string to multi value based on separator/prefix - google-bigquery

Consider below approach select id, substr(part, 2) new_val, substr(part, 1, 1) prefix from `project.dataset.table`, unnest(regexp_extract_all(val, r'[+-][^+-]+')) part If applied to sample data in your question - output is

Related

Converting alphanumeric to numeric and vice versa in oracle

Convert String to Tuple in BigQuery

Possible to Search Partial Matched Strings from same table?

Find way for gathering data and replace with values from another table

Check palindrome without using string functions with condition

Categories

Resources