Oracle Pivot on column larger than 4000 characters - sql

select * from
(
select id, type, LISTAGG(value,',') WITHIN GROUP (ORDER BY 1) as value from
(
select 'user1' as id, 'BMW' as value, 'CAR' as type from dual union
select 'user1' as id, 'Audi' as value, 'CAR' as type from dual union
select 'user2' as id, 'Honda' as value, 'CAR' as type from dual union
select 'user1' as id, 'Dell' as value, 'COMPUTER' as type from dual union
select 'user1' as id, 'Sony' as value, 'COMPUTER' as type from dual union
select 'user2' as id, 'HP' as value, 'COMPUTER' as type from dual
)
group by id, type
)
PIVOT (max(value) FOR id IN ('user1' user1, 'user2' user2));
Is there a way of using the above example if the LISTAGG is to be larger than 4000 characters?
Pivot doesn't seem to work on a CLOB column. I tried to use XMLAGG with a .getClobVAl() around it. This gives me a list larger than 4000 but cannot pivot.

Related

Regex to get 00 if the string has less digits

I have a column that is giving me output like 'ABC2001' , 'ABC100145', 'ABC009282' ,' ABC1901'
I want to change this column value to have '00' in between literals and numbers if number is less than 6 digits. Something like -
COL_A
------------
ABC2001
ABC100145
ABC009282
ABC1901
Expected output
COL_B
------------
ABC002001
ABC100145
ABC009282
ABC001901
How to use regex for this ? Currently I am using
SELECT SUBSTR(COL_A,1,3)||LPAD(REGEXP_REPLACE(COL_A,'\D+'),6,'0') FROM TAB
and it is giving me output like -
ABC210073
ABC210073
You do not need (slow) regular expressions and can use simple string functions:
SELECT col_a,
SUBSTR(col_a, 1, 3) || LPAD(SUBSTR(col_a, 4), 6, '0') AS col_b
FROM table_name;
Which, for your sample data:
CREATE TABLE table_name (col_a) AS
SELECT 'ABC2001' FROM DUAL UNION ALL
SELECT 'ABC100145' FROM DUAL UNION ALL
SELECT 'ABC009282' FROM DUAL UNION ALL
SELECT 'ABC1901' FROM DUAL;
Outputs:
COL_A
COL_B
ABC2001
ABC002001
ABC100145
ABC100145
ABC009282
ABC009282
ABC1901
ABC001901
db<>fiddle here
Just for fun, for different prefixes:
usual string functions: trim/lpad/substr:
with table_name (col_a) AS (
SELECT 'ABC2001' FROM DUAL UNION ALL
SELECT 'ABC100145' FROM DUAL UNION ALL
SELECT 'ABC009282' FROM DUAL UNION ALL
SELECT 'ABC1901' FROM DUAL UNION ALL
-- other different prefixes:
select 'ABC2001' from dual union all
select 'AB100145' from dual union all
select 'A-BC9282' from dual union all
select 'A8C2374' from dual union all
select '7x-ABC32129' from dual union all
select '123ABC8942' from dual
)
select v.*, prefix||num as col_b
from (
select
col_a,
rtrim(col_a,'0123456789') as prefix,
lpad(substr(col_a,1+length(rtrim(col_a,'0123456789'))),6,'0') as num
from table_name
) v
;
DBFiddle
using regex functions:
with table_name (col_a) AS (
SELECT 'ABC2001' FROM DUAL UNION ALL
SELECT 'ABC100145' FROM DUAL UNION ALL
SELECT 'ABC009282' FROM DUAL UNION ALL
SELECT 'ABC1901' FROM DUAL UNION ALL
-- other different prefixes:
select 'ABC2001' from dual union all
select 'AB100145' from dual union all
select 'A-BC9282' from dual union all
select 'A8C2374' from dual union all
select '7x-ABC32129' from dual union all
select '123ABC8942' from dual
)
select
col_a,
regexp_replace(
regexp_replace(col_a,'(\d+)$','00000\1')
,'0*(\d{6})$'
,'\1'
) as col_b
from table_name
;
DBFiddle
regex solution for padding numbers to the maximum their length, ie not knowing max numbers length(if it's not hard-coded 6):
select
v.*,
regexp_replace(
regexp_replace(col_a,'(\d+)$',rpad('0',max_num_length,'0')||'\1')
,'0*(\d{'||max_num_length||'})$'
,'\1'
) as col_b
from (
select t.*, max(length(regexp_substr(col_a,'\d+$')))over() as max_num_length
from table_name t
) v
;
DBFiddle

Count distinct letters in a string in bigquery

I have a string column in Biquery like:
select 'A'
union all (select 'ab')
union all (select 'abc')
union all (select 'aa')
union all (select 'aab')
I would like to count the number of distinct characters in every row of the column, in this case the results would be:
1
2
3
1
2
Can this be done in BigQuery? How?
How about this (assuming you don't want to differentiate between uppercase and lowercase)...
WITH data AS (select 'A' AS `val`
union all (select 'ab')
union all (select 'abc')
union all (select 'aa')
union all (select 'aab'))
SELECT `val`, 26 - LENGTH(REGEXP_REPLACE('abcdefghijklmnopqrstuvwxyz', '['||LOWER(`val`)||']', ''))
FROM `data`;
A simple approach is to use the SPLIT to convert your string to an array and UNNEST to convert the resulting array to a table. You may then use COUNT and DISTINCT to determine the number of unique characters as shown below:
with my_data AS (
select 'A' as col
union all (select 'ab')
union all (select 'abc')
union all (select 'aa')
union all (select 'aab')
)
select col, (SELECT COUNT(*) FROM (
SELECT DISTINCT element FROM UNNEST(SPLIT(col,'')) as element
)) n from my_data;
or simply
WITH my_data AS (
SELECT 'A' as col UNION ALL
SELECT 'ab' UNION ALL
SELECT 'abc' UNION ALL
SELECT 'aa' UNION ALL
SELECT 'aab'
)
SELECT
col,
(
SELECT
COUNT(DISTINCT element)
FROM
UNNEST(SPLIT(col,'')) as element
) cnt
FROM
my_data;
Like previous but using COUNT with DISTINCT
with my_data AS (
select 'A' as col
union all (select 'ab')
union all (select 'abc')
union all (select 'aa')
union all (select 'aab')
)
select col, COUNT(DISTINCT element) FROM
my_data,UNNEST(SPLIT(col,'')) as element
GROUP BY col
If the data is not quite huge, I would rather go with the user-defined functions to ease up the string manipulation across different columns
CREATE TEMP FUNCTION
get_unique_char_count(x STRING)
RETURNS INT64
LANGUAGE js AS r"""
str_split = new Set(x.split(""));
return str_split.size;
""";
WITH
result AS (
SELECT
'A' AS val
UNION ALL (
SELECT
'ab')
UNION ALL (
SELECT
'abc')
UNION ALL (
SELECT
'aa')
UNION ALL (
SELECT
'aab') )
SELECT
val,
get_unique_char_count(val) unique_char_count
FROM
result
RESULT:

how to get the missing values in SQL query when using in clause

Suppose I have the following query :
select value from table where value in ('abc','cde','efg');
If only 'abc' is populated in the table,
I want to be able to see which value is missing in the result set,
so the results looks like :
cde
efg
You can use UNION ALL to get a resultset with all the values that you want:
SELECT 'abc' AS value FROM dual UNION ALL
SELECT 'cde' FROM dual UNION ALL
SELECT 'efg' FROM dual
(you may omit FROM dual depending on your database).
And with NOT EXISTS get all the values from the above resultset that do not appear in the table:
SELECT u.*
FROM (
SELECT 'abc' AS value FROM dual UNION ALL
SELECT 'cde' FROM dual UNION ALL
SELECT 'efg' FROM dual
) u
WHERE NOT EXISTS (SELECT 1 FROM tablename t WHERE t.value = u.value)

unpivot query join to other tables

I have a query like below
with t as (
select ID, name, tag, tag_1, tag_2, tag_3, tag_4, location from table_one
)
select * from t
unpivot (
value for _tag_ in (tag,tag_1,tag_2,tag_3,tag_4)
)
Now, I want to join 3 other tables table1, table2, table3 to the above, I need to select other columns example col1, col2, col3 from those tables. Any idea on how to proceed with that.
I would use a lateral join in Oracle 12C+:
select u.*
from t cross apply
(select id, name, tag from dual union all
select id, name, tag_1 from dual union all
select id, name, tag_2 from dual union all
select id, name, tag_3 from dual union all
select id, name, tag_4 from dual
) u;
You can then join to u as you would anything else:
select u.*, . . .
from t cross apply
(select id, name, tag from dual union all
select id, name, tag_1 from dual union all
select id, name, tag_2 from dual union all
select id, name, tag_3 from dual union all
select id, name, tag_4 from dual
) u join
x
on u.? = x.?;
In Oracle 11, you can do something similar if you make the unpivot a subquery or CTE.

What's the best way of re-using classification rules for multiple queries within big query standard SQL?

I'm using Big Query to analyse Google Analytics data.
I need to classify visits dependent on whether they visit particular URLs that indicate they were in the booking process or purchased etc.
There is a long list of URLs that represent each step and hence it would be advantageous to include the classifications within a view and re-use with appropriate joins for whatever query requires the classification.
I have the following view that seems to do what I need:
SELECT
fullVisitorId,
visitID,
LOWER(h.page.pagePath) AS path,
CASE
WHEN
LOWER(h.page.pagePath) = '/' THEN '/'
WHEN
LOWER(h.page.pagePath) LIKE '{path-here}%' OR
.... .... ....
ELSE 'other'
END
AS path_classification,
_TABLE_SUFFIX AS date
FROM
`{project-id}.{data-id}.ga_sessions_*`, UNNEST(hits) AS h
WHERE
REGEXP_CONTAINS(_TABLE_SUFFIX, r'[0-9]{8}')
AND
h.type = 'PAGE'
I'm wondering if there's a simpler way of achieving this that doesn't require selecting from a pre-existing table as this doesn't seem necessary to define the classifications. I get the feeling that it's possible to use something more straight forward, but I'm not sure how to do it.
Does anyone know how to put these definitions into a view without querying a table within the view?
Let's consider simple example:
#standardSQL
WITH yourTable AS (
SELECT 1 AS id, '123' AS path UNION ALL
SELECT 2, '234' UNION ALL
SELECT 3, '345' UNION ALL
SELECT 4, '456'
)
SELECT
id,
path,
CASE path
WHEN '123' THEN 'a'
WHEN '234' THEN 'b'
WHEN '345' THEN 'c'
ELSE 'other'
END AS path_classification
FROM yourTable
ORDER BY id
Above can be refactored into below
#standardSQL
WITH yourTable AS (
SELECT 1 AS id, '123' AS path UNION ALL
SELECT 2, '234' UNION ALL
SELECT 3, '345' UNION ALL
SELECT 4, '456'
)
SELECT
id,
path,
IFNULL(
( SELECT rr.crule FROM UNNEST(r.rules) AS rr WHERE rr.cpath = path LIMIT 1),
( SELECT rr.crule FROM UNNEST(r.rules) AS rr WHERE rr.cpath IS NULL LIMIT 1)
) AS path_classification
FROM yourTable,
(SELECT ARRAY_AGG(STRUCT<cpath STRING, crule STRING>(path, rule)) AS rules
FROM `project.dataset.rules`) AS r
ORDER BY id
which relies on rules view that is defined as below
#standardSQL
SELECT '123' AS path, 'a' AS rule UNION ALL
SELECT '234', 'b' UNION ALL
SELECT '345', 'c' UNION ALL
SELECT NULL, 'other'
As you can see all classification rules are only in rules view!
You can play around this approach with below :
#standardSQL
WITH yourTable AS (
SELECT 1 AS id, '123' AS path UNION ALL
SELECT 2, '234' UNION ALL
SELECT 3, '345' UNION ALL
SELECT 4, '456'
),
rules AS (
SELECT '123' AS path, 'a' AS rule UNION ALL
SELECT '234', 'b' UNION ALL
SELECT '345', 'c' UNION ALL
SELECT NULL, 'other'
)
SELECT
id,
path,
IFNULL(
( SELECT rr.crule FROM UNNEST(r.rules) AS rr WHERE rr.cpath = path LIMIT 1),
( SELECT rr.crule FROM UNNEST(r.rules) AS rr WHERE rr.cpath IS NULL LIMIT 1)
) AS path_classification
FROM yourTable,
(SELECT ARRAY_AGG(STRUCT<cpath STRING, crule STRING>(path, rule)) AS rules
FROM rules) AS r
ORDER BY id
this can be further "simplified" by moving ARRAY_AGG inside view as below
#standardSQL
SELECT ARRAY_AGG(STRUCT<cpath STRING, crule STRING>(path, rule)) AS rules
FROM (
SELECT '123' AS path, 'a' AS rule UNION ALL
SELECT '234', 'b' UNION ALL
SELECT '345', 'c' UNION ALL
SELECT NULL, 'other'
)
In this case final query is as simple as below
#standardSQL
SELECT
id,
path,
IFNULL(
( SELECT rr.crule FROM UNNEST(r.rules) AS rr WHERE rr.cpath = path LIMIT 1),
( SELECT rr.crule FROM UNNEST(r.rules) AS rr WHERE rr.cpath IS NULL LIMIT 1)
) AS path_classification
FROM yourTable, rules AS r
ORDER BY id
Depends on your specific rules - above can /should be adjusted/optimized respectively - but I hope this gives you a main direction
Q in comment: does your solution enable the use of matching with the LIKE keyword or matching with regex?
Original question was - What's the … way of re-using classification rules for multiple queries within big query standard SQL?
So above examples in my initial answer just show you how to make this happen (focus on “reuse”)
How you will use it (matching with the LIKE keyword or matching with regex) is totally up to you!
See example below
Take a look at path_classification_exact_match vs path_classification_like_match vs path_classification_regex_match
#standardSQL
WITH yourTable AS (
SELECT 1 AS id, '123' AS path UNION ALL
SELECT 2, '234' UNION ALL
SELECT 3, '345' UNION ALL
SELECT 4, '456' UNION ALL
SELECT 5, '234abc' UNION ALL
SELECT 6, '345bcd' UNION ALL
SELECT 7, '456cde'
),
rules AS (
SELECT ARRAY_AGG(STRUCT<cpath STRING, crule STRING>(path, rule)) AS rules
FROM (
SELECT '123' AS path, 'a' AS rule UNION ALL
SELECT '234', 'b' UNION ALL
SELECT '345', 'c' UNION ALL
SELECT NULL, 'other'
)
)
SELECT
id,
path,
IFNULL(
( SELECT rr.crule FROM UNNEST(r.rules) AS rr WHERE rr.cpath = path LIMIT 1),
( SELECT rr.crule FROM UNNEST(r.rules) AS rr WHERE rr.cpath IS NULL LIMIT 1)
) AS path_classification_exact_match,
IFNULL(
( SELECT rr.crule FROM UNNEST(r.rules) AS rr WHERE path LIKE CONCAT('%',rr.cpath,'%') LIMIT 1),
( SELECT rr.crule FROM UNNEST(r.rules) AS rr WHERE rr.cpath IS NULL LIMIT 1)
) AS path_classification_like_match,
IFNULL(
( SELECT rr.crule FROM UNNEST(r.rules) AS rr WHERE REGEXP_CONTAINS(path, rr.cpath) LIMIT 1),
( SELECT rr.crule FROM UNNEST(r.rules) AS rr WHERE rr.cpath IS NULL LIMIT 1)
) AS path_classification_regex_match
FROM yourTable, rules AS r
ORDER BY id
Output is:
id path path_classification_exact_match path_classification_like_match path_classification_regex_match
1 123 a a a
2 234 b b b
3 345 c c c
4 456 other other other
5 234abc other b b
6 345bcd other c c
7 456cde other other other
Hope this helps :o)
It sounds like you may be interested in WITH clauses, which let you compose queries without having to use subqueries. For example,
#standardSQL
WITH Sales AS (
SELECT 1 AS sku, 3.14 AS price UNION ALL
SELECT 2 AS sku, 1.00 AS price UNION ALL
SELECT 3 AS sku, 9.99 AS price UNION ALL
SELECT 2 AS sku, 0.90 AS price UNION ALL
SELECT 1 AS sku, 3.56 AS price
),
ItemTotals AS (
SELECT sku, SUM(price) AS total
FROM Sales
GROUP BY sku
)
SELECT sku, total
FROM ItemTotals;
If you want to compose expressions, you can use CREATE TEMP FUNCTION statements to provide "macro-like" functionality:
#standardSQL
CREATE TEMP FUNCTION LooksLikeCheese(s STRING) AS (
LOWER(s) IN ('gouda', 'gruyere', 'havarti')
);
SELECT
s1,
LooksLikeCheese(s1) AS s1_is_cheese,
s2,
LooksLikeCheese(s2) AS s2_is_cheese
FROM (
SELECT 'spam' AS s1, 'ham' AS s2 UNION ALL
SELECT 'havarti' AS s1, 'crackers' AS s2 UNION ALL
SELECT 'gruyere' AS s1, 'ice cream' AS s2
);