Just continue from the answer for my previous question.
I want to get all values from table b (in rows) if there is any difference between values in arrays from table a by same ids
WITH a as (SELECT 1 as id, ['123', 'abc', '456', 'qaz', 'uqw'] as value
UNION ALL SELECT 2, ['123', 'wer', 'thg', '10', '200']
UNION ALL SELECT 3, ['200']
UNION ALL SELECT 4, null
UNION ALL SELECT 5, ['140']),
b as (SELECT 1 as id, '123' as value
UNION ALL SELECT 1, 'abc'
UNION ALL SELECT 1, '456'
UNION ALL SELECT 1, 'qaz'
UNION ALL SELECT 1, 'uqw'
UNION ALL SELECT 2, '123'
UNION ALL SELECT 2, 'wer'
UNION ALL SELECT 2, '10'
UNION ALL SELECT 3, null
UNION ALL SELECT 4, 'wer'
UNION ALL SELECT 4, '234'
UNION ALL SELECT 5, '140'
UNION ALL SELECT 5, '121'
)
SELECT * EXCEPT(flag)
FROM (
SELECT b.*, COUNTIF(b.value IS NULL) OVER(PARTITION BY id) flag
FROM a LEFT JOIN a.value
FULL OUTER JOIN b
USING(id, value)
)
WHERE flag > 0
AND NOT id IS NULL
It works well for all ids except 5.
In my case I need to return all values if there is any difference.
In example array with id 5 from table a has only one value is '140' while there are two rows with values by id 5 from table b. So in this case all values by id 5 from table b also must appear in expected output
How need to modify this query to get what I want?
UPDATED
Seems like it works for me. But I can not be sure for 100%
SELECT * EXCEPT(flag)
FROM (
SELECT b.*, COUNTIF((b.value IS NULL AND a.value IS NOT NULL) OR (b.value IS NOT NULL AND a.value IS NULL)) OVER(PARTITION BY id) flag
FROM a LEFT JOIN a.value
FULL OUTER JOIN b
USING(id, value)
)
WHERE flag > 0
AND NOT id IS NULL
#standardSQL
SELECT *
FROM table_b
WHERE id IN (
SELECT id FROM table_a a
JOIN table_b b USING(id)
GROUP BY id
HAVING STRING_AGG(IFNULL(b.value, 'NULL') ORDER BY b.value) !=
IFNULL(ANY_VALUE((SELECT STRING_AGG(IFNULL(value, 'NULL') ORDER BY value) FROM a.value)), 'NULL')
)
I'm using Big Query to analyse Google Analytics data.
I need to classify visits dependent on whether they visit particular URLs that indicate they were in the booking process or purchased etc.
There is a long list of URLs that represent each step and hence it would be advantageous to include the classifications within a view and re-use with appropriate joins for whatever query requires the classification.
I have the following view that seems to do what I need:
SELECT
fullVisitorId,
visitID,
LOWER(h.page.pagePath) AS path,
CASE
WHEN
LOWER(h.page.pagePath) = '/' THEN '/'
WHEN
LOWER(h.page.pagePath) LIKE '{path-here}%' OR
.... .... ....
ELSE 'other'
END
AS path_classification,
_TABLE_SUFFIX AS date
FROM
`{project-id}.{data-id}.ga_sessions_*`, UNNEST(hits) AS h
WHERE
REGEXP_CONTAINS(_TABLE_SUFFIX, r'[0-9]{8}')
AND
h.type = 'PAGE'
I'm wondering if there's a simpler way of achieving this that doesn't require selecting from a pre-existing table as this doesn't seem necessary to define the classifications. I get the feeling that it's possible to use something more straight forward, but I'm not sure how to do it.
Does anyone know how to put these definitions into a view without querying a table within the view?
Let's consider simple example:
#standardSQL
WITH yourTable AS (
SELECT 1 AS id, '123' AS path UNION ALL
SELECT 2, '234' UNION ALL
SELECT 3, '345' UNION ALL
SELECT 4, '456'
)
SELECT
id,
path,
CASE path
WHEN '123' THEN 'a'
WHEN '234' THEN 'b'
WHEN '345' THEN 'c'
ELSE 'other'
END AS path_classification
FROM yourTable
ORDER BY id
Above can be refactored into below
#standardSQL
WITH yourTable AS (
SELECT 1 AS id, '123' AS path UNION ALL
SELECT 2, '234' UNION ALL
SELECT 3, '345' UNION ALL
SELECT 4, '456'
)
SELECT
id,
path,
IFNULL(
( SELECT rr.crule FROM UNNEST(r.rules) AS rr WHERE rr.cpath = path LIMIT 1),
( SELECT rr.crule FROM UNNEST(r.rules) AS rr WHERE rr.cpath IS NULL LIMIT 1)
) AS path_classification
FROM yourTable,
(SELECT ARRAY_AGG(STRUCT<cpath STRING, crule STRING>(path, rule)) AS rules
FROM `project.dataset.rules`) AS r
ORDER BY id
which relies on rules view that is defined as below
#standardSQL
SELECT '123' AS path, 'a' AS rule UNION ALL
SELECT '234', 'b' UNION ALL
SELECT '345', 'c' UNION ALL
SELECT NULL, 'other'
As you can see all classification rules are only in rules view!
You can play around this approach with below :
#standardSQL
WITH yourTable AS (
SELECT 1 AS id, '123' AS path UNION ALL
SELECT 2, '234' UNION ALL
SELECT 3, '345' UNION ALL
SELECT 4, '456'
),
rules AS (
SELECT '123' AS path, 'a' AS rule UNION ALL
SELECT '234', 'b' UNION ALL
SELECT '345', 'c' UNION ALL
SELECT NULL, 'other'
)
SELECT
id,
path,
IFNULL(
( SELECT rr.crule FROM UNNEST(r.rules) AS rr WHERE rr.cpath = path LIMIT 1),
( SELECT rr.crule FROM UNNEST(r.rules) AS rr WHERE rr.cpath IS NULL LIMIT 1)
) AS path_classification
FROM yourTable,
(SELECT ARRAY_AGG(STRUCT<cpath STRING, crule STRING>(path, rule)) AS rules
FROM rules) AS r
ORDER BY id
this can be further "simplified" by moving ARRAY_AGG inside view as below
#standardSQL
SELECT ARRAY_AGG(STRUCT<cpath STRING, crule STRING>(path, rule)) AS rules
FROM (
SELECT '123' AS path, 'a' AS rule UNION ALL
SELECT '234', 'b' UNION ALL
SELECT '345', 'c' UNION ALL
SELECT NULL, 'other'
)
In this case final query is as simple as below
#standardSQL
SELECT
id,
path,
IFNULL(
( SELECT rr.crule FROM UNNEST(r.rules) AS rr WHERE rr.cpath = path LIMIT 1),
( SELECT rr.crule FROM UNNEST(r.rules) AS rr WHERE rr.cpath IS NULL LIMIT 1)
) AS path_classification
FROM yourTable, rules AS r
ORDER BY id
Depends on your specific rules - above can /should be adjusted/optimized respectively - but I hope this gives you a main direction
Q in comment: does your solution enable the use of matching with the LIKE keyword or matching with regex?
Original question was - What's the … way of re-using classification rules for multiple queries within big query standard SQL?
So above examples in my initial answer just show you how to make this happen (focus on “reuse”)
How you will use it (matching with the LIKE keyword or matching with regex) is totally up to you!
See example below
Take a look at path_classification_exact_match vs path_classification_like_match vs path_classification_regex_match
#standardSQL
WITH yourTable AS (
SELECT 1 AS id, '123' AS path UNION ALL
SELECT 2, '234' UNION ALL
SELECT 3, '345' UNION ALL
SELECT 4, '456' UNION ALL
SELECT 5, '234abc' UNION ALL
SELECT 6, '345bcd' UNION ALL
SELECT 7, '456cde'
),
rules AS (
SELECT ARRAY_AGG(STRUCT<cpath STRING, crule STRING>(path, rule)) AS rules
FROM (
SELECT '123' AS path, 'a' AS rule UNION ALL
SELECT '234', 'b' UNION ALL
SELECT '345', 'c' UNION ALL
SELECT NULL, 'other'
)
)
SELECT
id,
path,
IFNULL(
( SELECT rr.crule FROM UNNEST(r.rules) AS rr WHERE rr.cpath = path LIMIT 1),
( SELECT rr.crule FROM UNNEST(r.rules) AS rr WHERE rr.cpath IS NULL LIMIT 1)
) AS path_classification_exact_match,
IFNULL(
( SELECT rr.crule FROM UNNEST(r.rules) AS rr WHERE path LIKE CONCAT('%',rr.cpath,'%') LIMIT 1),
( SELECT rr.crule FROM UNNEST(r.rules) AS rr WHERE rr.cpath IS NULL LIMIT 1)
) AS path_classification_like_match,
IFNULL(
( SELECT rr.crule FROM UNNEST(r.rules) AS rr WHERE REGEXP_CONTAINS(path, rr.cpath) LIMIT 1),
( SELECT rr.crule FROM UNNEST(r.rules) AS rr WHERE rr.cpath IS NULL LIMIT 1)
) AS path_classification_regex_match
FROM yourTable, rules AS r
ORDER BY id
Output is:
id path path_classification_exact_match path_classification_like_match path_classification_regex_match
1 123 a a a
2 234 b b b
3 345 c c c
4 456 other other other
5 234abc other b b
6 345bcd other c c
7 456cde other other other
Hope this helps :o)
It sounds like you may be interested in WITH clauses, which let you compose queries without having to use subqueries. For example,
#standardSQL
WITH Sales AS (
SELECT 1 AS sku, 3.14 AS price UNION ALL
SELECT 2 AS sku, 1.00 AS price UNION ALL
SELECT 3 AS sku, 9.99 AS price UNION ALL
SELECT 2 AS sku, 0.90 AS price UNION ALL
SELECT 1 AS sku, 3.56 AS price
),
ItemTotals AS (
SELECT sku, SUM(price) AS total
FROM Sales
GROUP BY sku
)
SELECT sku, total
FROM ItemTotals;
If you want to compose expressions, you can use CREATE TEMP FUNCTION statements to provide "macro-like" functionality:
#standardSQL
CREATE TEMP FUNCTION LooksLikeCheese(s STRING) AS (
LOWER(s) IN ('gouda', 'gruyere', 'havarti')
);
SELECT
s1,
LooksLikeCheese(s1) AS s1_is_cheese,
s2,
LooksLikeCheese(s2) AS s2_is_cheese
FROM (
SELECT 'spam' AS s1, 'ham' AS s2 UNION ALL
SELECT 'havarti' AS s1, 'crackers' AS s2 UNION ALL
SELECT 'gruyere' AS s1, 'ice cream' AS s2
);
I'm currently working with my report parameter list of value that is dependent in another parameter.
I have come up with this idea, is there any possible way to for this to work?
WITH A AS (
SELECT DISTINCT columnA1 FROM Table1
UNION SELECT DISTINCT columnA2 FROM Table1
UNION SELECT DISTINCT columnA3 FROM Table1)
WITH B AS (SELECT DISTINCT columnB1 FROM Table1
UNION SELECT DISTINCT columnB2 FROM Table1
UNION SELECT DISTINCT columnB3 FROM Table1)
Select * from CASE WHEN (:PM_Parameter1 = 'A')
THEN A
ELSE B
END;
Assuming this is Oracle SQL, you can use a the EXISTS function to check for the parameter value, then combine the sets using UNION.
Try playing with this SQL:
select * from
(
select 'A' from dual
union
select 'B' from dual
)
where exists
(SELECT 'Y'
FROM dual
where 'parameter' = 'parameter'
)
union
select * from
(
select 'X' from dual
union
select 'Y' from dual
)
where exists
(SELECT 'Y'
FROM dual
where 'parameter' != 'parameter'
)
If you reverse both the conditions 'parameter' = 'parameter' and 'parameter' != 'parameter', it will return two different row sets.
I am sure this can be optimized again, hope it works out for you.
I'm building a query to show average and variance from many columns.
To get the average I use this:
SELECT *,
(SELECT AVG(t.c)
FROM (
SELECT peca_1 UNION ALL
SELECT peca_2 UNION ALL
SELECT peca_3 UNION ALL
SELECT peca_4 UNION ALL
SELECT peca_5 UNION ALL
SELECT peca_6 UNION ALL
SELECT peca_7 UNION ALL
SELECT peca_8 UNION ALL
SELECT peca_9 UNION ALL
SELECT peca_10
) t(c)
) as [media]
from Durabilidade
where cd_durabilidade = 1
The result is:
Now I need a new column with VAR(media) comparing each row with first row.
Any idea?
I think this is a case where cross apply is appropriate. I am assuming that you want the variance of the values as calculated by the var() function:
SELECT *, t.avgval as [media], t.varval
from Durabilidade d cross apply
(select avg(t.val) as avgval, var(t.val) as varval
from (select d.peca_1 union all
select d.peca_2 union all
select d.peca_3 union all
select d.peca_4 union all
select d.peca_5 union all
select d.peca_6 union all
select d.peca_7 union all
select d.peca_8 union all
select d.peca_9 union all
select d.peca_10
) t(val) -- t(val) to work
) t
where cd_durabilidade = 1
Something like this?
SELECT *,
VAR(media) AS [variance]
FROM
(
SELECT *,
(SELECT AVG(t.c)
FROM (
SELECT peca_1 UNION ALL
SELECT peca_2 UNION ALL
SELECT peca_3 UNION ALL
SELECT peca_4 UNION ALL
SELECT peca_5 UNION ALL
SELECT peca_6 UNION ALL
SELECT peca_7 UNION ALL
SELECT peca_8 UNION ALL
SELECT peca_9 UNION ALL
SELECT peca_10
) t(c)
) as [media]
from Durabilidade
where cd_durabilidade = 1
) x
GROUP BY
column1_from_durabilidade
,column2_from_durabilidade
--etc
,media
Here's my data:
with first_three as
(
select 'AAAA' as code from dual union all
select 'BBBA' as code from dual union all
select 'BBBB' as code from dual union all
select 'BBBC' as code from dual union all
select 'CCCC' as code from dual union all
select 'CCCD' as code from dual union all
select 'FFFF' as code from dual union all
select 'GFFF' as code from dual )
select substr(code,1,3) as r1
from first_three
group by substr(code,1,3)
having count(*) >1
This query returns the characters that meet the cirteria. Now, how do I select from this to get desired results? Or, is there another way?
Desired Results
BBBA
BBBB
BBBC
CCCC
CCCD
WITH code_frequency AS (
SELECT code,
COUNT(1) OVER ( PARTITION BY SUBSTR( code, 1, 3 ) ) AS frequency
FROM table_name
)
SELECT code
FROM code_frequency
WHERE frequency > 1
WITH first_three AS (
...
)
SELECT *
FROM first_three f1
WHERE EXISTS (
SELECT 1 FROM first_three f2
WHERE f1.code != f2.code
AND substr(f1.code, 1, 3) = substr(f2.code, 1, 3)
)
select res from (select res,count(*) over
(partition by substr(res,1,3) order by null) cn from table_name) where cn>1;