Join two subqueries with ON or USING - sql

I have two queries I need to LEFT JOIN the first one with the second one. The purpose is to wrap all of this inside of something else bigger. I got both the first and second queries working alone but cannot get them to join.
First Query:
SELECT *
FROM (
SELECT Source as system, DT as ts, Status as statusCode
FROM (
(SELECT 'SOURCE1' Source FROM Dual
UNION SELECT 'SOURCE2' FROM Dual
UNION SELECT 'SOURCE3' FROM Dual
UNION SELECT 'SOURCE4' FROM Dual
) system
CROSS JOIN (
SELECT
TO_DATE('09-30-2013','MM-DD-YYYY') - 1 + LEVEL dt
FROM dual
CONNECT BY
LEVEL <= ( TO_DATE('10/05/2013','MM/DD/YYYY')
- TO_DATE('09/30/2013','MM/DD/YYYY')) + 1
) ts
CROSS JOIN (
SELECT 'O' Status FROM Dual
UNION SELECT 'C' FROM Dual
) statusCode
)--For some reason cannot name this so need to wrap in another select *
)Duals
Second Query: (there would be a LEFT JOIN) between here
LEFT JOIN
Was tried
Select * FROM(
SELECT myTable1.system, TO_CHAR(maxResults.ts,'YYYY-MM-DD') as ts, myTable1.statusCode
FROM (
SELECT table_id, MAX(ts) as ts
FROM myTable1_history
WHERE ts BETWEEN TO_TIMESTAMP('2013-09-29','yyyy-mm-dd') AND TO_TIMESTAMP('2013-10-06','yyyy-mm-dd')
GROUP BY table_id )maxResults
JOIN myTable1
ON maxResults.table_id = myTable1.table_id
WHERE myTable1.statusCode = 'C'
UNION ALL
SELECT myTable1.system as "system", TO_CHAR(myTable1.ts,'YYYY-MM-DD') as "ts", 'O' as "statusCode"
FROM myTable1
WHERE myTable1.ts BETWEEN TO_TIMESTAMP('2013-09-29','yyyy-mm-dd') AND TO_TIMESTAMP('2013-10-06','yyyy-mm-dd')
--AND myTable1.statusCode = 'O'
)Records
and
USING (system, ts, statusCode)
I tried just sticking in a LEFT JOIN in the middle of the two queries but didn't work (I am probably doing it wrong) as shown
EDIT: Added the JOIN and USING as example of what was not working, receiving "invalid table name"

This is a guess, assuming you want to join on all columns?
(SELECT * FROM (
SELECT system, ts, statuscode
FROM (SELECT Source as system, DT as ts, Status as statusCode
FROM ((SELECT 'SOURCE1' Source FROM Dual
UNION SELECT 'SOURCE2' FROM Dual
UNION SELECT 'SOURCE3' FROM Dual
UNION SELECT 'SOURCE4' FROM Dual
) system CROSS JOIN (SELECT TO_DATE('09-30-2013','MM-DD-YYYY') - 1 + LEVEL dt
FROM dual
CONNECT BY
LEVEL <= ( TO_DATE('10/05/2013','MM/DD/YYYY')
- TO_DATE('09/30/2013','MM/DD/YYYY')) + 1
) ts CROSS JOIN (SELECT 'O' Status FROM Dual
UNION SELECT 'C' FROM Dual) statusCode
)
))duals LEFT JOIN
(Select * FROM(
SELECT myTable1.system, TO_CHAR(maxResults.ts,'YYYY-MM-DD') as ts, myTable1.statusCode
FROM (
SELECT table_id, MAX(ts) as ts
FROM myTable1_history
WHERE ts BETWEEN TO_TIMESTAMP('2013-09-29','yyyy-mm-dd') AND TO_TIMESTAMP('2013-10-06','yyyy-mm-dd')
GROUP BY table_id )maxResults
JOIN myTable1
ON maxResults.table_id = myTable1.table_id
WHERE myTable1.statusCode = 'C'
UNION ALL
SELECT myTable1.system, TO_CHAR(myTable1.ts,'YYYY-MM-DD') as ts, 'O' as statusCode
FROM myTable1
WHERE myTable1.ts BETWEEN TO_TIMESTAMP('2013-09-29','yyyy-mm-dd') AND TO_TIMESTAMP('2013-10-06','yyyy-mm-dd')
--AND myTable1.statusCode = 'O'
) Records ON duals.system = records.system AND duals.ts = records.ts AND duals.statusCode = records.statusCode

Related

What's the best way of re-using classification rules for multiple queries within big query standard SQL?

I'm using Big Query to analyse Google Analytics data.
I need to classify visits dependent on whether they visit particular URLs that indicate they were in the booking process or purchased etc.
There is a long list of URLs that represent each step and hence it would be advantageous to include the classifications within a view and re-use with appropriate joins for whatever query requires the classification.
I have the following view that seems to do what I need:
SELECT
fullVisitorId,
visitID,
LOWER(h.page.pagePath) AS path,
CASE
WHEN
LOWER(h.page.pagePath) = '/' THEN '/'
WHEN
LOWER(h.page.pagePath) LIKE '{path-here}%' OR
.... .... ....
ELSE 'other'
END
AS path_classification,
_TABLE_SUFFIX AS date
FROM
`{project-id}.{data-id}.ga_sessions_*`, UNNEST(hits) AS h
WHERE
REGEXP_CONTAINS(_TABLE_SUFFIX, r'[0-9]{8}')
AND
h.type = 'PAGE'
I'm wondering if there's a simpler way of achieving this that doesn't require selecting from a pre-existing table as this doesn't seem necessary to define the classifications. I get the feeling that it's possible to use something more straight forward, but I'm not sure how to do it.
Does anyone know how to put these definitions into a view without querying a table within the view?
Let's consider simple example:
#standardSQL
WITH yourTable AS (
SELECT 1 AS id, '123' AS path UNION ALL
SELECT 2, '234' UNION ALL
SELECT 3, '345' UNION ALL
SELECT 4, '456'
)
SELECT
id,
path,
CASE path
WHEN '123' THEN 'a'
WHEN '234' THEN 'b'
WHEN '345' THEN 'c'
ELSE 'other'
END AS path_classification
FROM yourTable
ORDER BY id
Above can be refactored into below
#standardSQL
WITH yourTable AS (
SELECT 1 AS id, '123' AS path UNION ALL
SELECT 2, '234' UNION ALL
SELECT 3, '345' UNION ALL
SELECT 4, '456'
)
SELECT
id,
path,
IFNULL(
( SELECT rr.crule FROM UNNEST(r.rules) AS rr WHERE rr.cpath = path LIMIT 1),
( SELECT rr.crule FROM UNNEST(r.rules) AS rr WHERE rr.cpath IS NULL LIMIT 1)
) AS path_classification
FROM yourTable,
(SELECT ARRAY_AGG(STRUCT<cpath STRING, crule STRING>(path, rule)) AS rules
FROM `project.dataset.rules`) AS r
ORDER BY id
which relies on rules view that is defined as below
#standardSQL
SELECT '123' AS path, 'a' AS rule UNION ALL
SELECT '234', 'b' UNION ALL
SELECT '345', 'c' UNION ALL
SELECT NULL, 'other'
As you can see all classification rules are only in rules view!
You can play around this approach with below :
#standardSQL
WITH yourTable AS (
SELECT 1 AS id, '123' AS path UNION ALL
SELECT 2, '234' UNION ALL
SELECT 3, '345' UNION ALL
SELECT 4, '456'
),
rules AS (
SELECT '123' AS path, 'a' AS rule UNION ALL
SELECT '234', 'b' UNION ALL
SELECT '345', 'c' UNION ALL
SELECT NULL, 'other'
)
SELECT
id,
path,
IFNULL(
( SELECT rr.crule FROM UNNEST(r.rules) AS rr WHERE rr.cpath = path LIMIT 1),
( SELECT rr.crule FROM UNNEST(r.rules) AS rr WHERE rr.cpath IS NULL LIMIT 1)
) AS path_classification
FROM yourTable,
(SELECT ARRAY_AGG(STRUCT<cpath STRING, crule STRING>(path, rule)) AS rules
FROM rules) AS r
ORDER BY id
this can be further "simplified" by moving ARRAY_AGG inside view as below
#standardSQL
SELECT ARRAY_AGG(STRUCT<cpath STRING, crule STRING>(path, rule)) AS rules
FROM (
SELECT '123' AS path, 'a' AS rule UNION ALL
SELECT '234', 'b' UNION ALL
SELECT '345', 'c' UNION ALL
SELECT NULL, 'other'
)
In this case final query is as simple as below
#standardSQL
SELECT
id,
path,
IFNULL(
( SELECT rr.crule FROM UNNEST(r.rules) AS rr WHERE rr.cpath = path LIMIT 1),
( SELECT rr.crule FROM UNNEST(r.rules) AS rr WHERE rr.cpath IS NULL LIMIT 1)
) AS path_classification
FROM yourTable, rules AS r
ORDER BY id
Depends on your specific rules - above can /should be adjusted/optimized respectively - but I hope this gives you a main direction
Q in comment: does your solution enable the use of matching with the LIKE keyword or matching with regex?
Original question was - What's the … way of re-using classification rules for multiple queries within big query standard SQL?
So above examples in my initial answer just show you how to make this happen (focus on “reuse”)
How you will use it (matching with the LIKE keyword or matching with regex) is totally up to you!
See example below
Take a look at path_classification_exact_match vs path_classification_like_match vs path_classification_regex_match
#standardSQL
WITH yourTable AS (
SELECT 1 AS id, '123' AS path UNION ALL
SELECT 2, '234' UNION ALL
SELECT 3, '345' UNION ALL
SELECT 4, '456' UNION ALL
SELECT 5, '234abc' UNION ALL
SELECT 6, '345bcd' UNION ALL
SELECT 7, '456cde'
),
rules AS (
SELECT ARRAY_AGG(STRUCT<cpath STRING, crule STRING>(path, rule)) AS rules
FROM (
SELECT '123' AS path, 'a' AS rule UNION ALL
SELECT '234', 'b' UNION ALL
SELECT '345', 'c' UNION ALL
SELECT NULL, 'other'
)
)
SELECT
id,
path,
IFNULL(
( SELECT rr.crule FROM UNNEST(r.rules) AS rr WHERE rr.cpath = path LIMIT 1),
( SELECT rr.crule FROM UNNEST(r.rules) AS rr WHERE rr.cpath IS NULL LIMIT 1)
) AS path_classification_exact_match,
IFNULL(
( SELECT rr.crule FROM UNNEST(r.rules) AS rr WHERE path LIKE CONCAT('%',rr.cpath,'%') LIMIT 1),
( SELECT rr.crule FROM UNNEST(r.rules) AS rr WHERE rr.cpath IS NULL LIMIT 1)
) AS path_classification_like_match,
IFNULL(
( SELECT rr.crule FROM UNNEST(r.rules) AS rr WHERE REGEXP_CONTAINS(path, rr.cpath) LIMIT 1),
( SELECT rr.crule FROM UNNEST(r.rules) AS rr WHERE rr.cpath IS NULL LIMIT 1)
) AS path_classification_regex_match
FROM yourTable, rules AS r
ORDER BY id
Output is:
id path path_classification_exact_match path_classification_like_match path_classification_regex_match
1 123 a a a
2 234 b b b
3 345 c c c
4 456 other other other
5 234abc other b b
6 345bcd other c c
7 456cde other other other
Hope this helps :o)
It sounds like you may be interested in WITH clauses, which let you compose queries without having to use subqueries. For example,
#standardSQL
WITH Sales AS (
SELECT 1 AS sku, 3.14 AS price UNION ALL
SELECT 2 AS sku, 1.00 AS price UNION ALL
SELECT 3 AS sku, 9.99 AS price UNION ALL
SELECT 2 AS sku, 0.90 AS price UNION ALL
SELECT 1 AS sku, 3.56 AS price
),
ItemTotals AS (
SELECT sku, SUM(price) AS total
FROM Sales
GROUP BY sku
)
SELECT sku, total
FROM ItemTotals;
If you want to compose expressions, you can use CREATE TEMP FUNCTION statements to provide "macro-like" functionality:
#standardSQL
CREATE TEMP FUNCTION LooksLikeCheese(s STRING) AS (
LOWER(s) IN ('gouda', 'gruyere', 'havarti')
);
SELECT
s1,
LooksLikeCheese(s1) AS s1_is_cheese,
s2,
LooksLikeCheese(s2) AS s2_is_cheese
FROM (
SELECT 'spam' AS s1, 'ham' AS s2 UNION ALL
SELECT 'havarti' AS s1, 'crackers' AS s2 UNION ALL
SELECT 'gruyere' AS s1, 'ice cream' AS s2
);

Can I have dynamic FROM Clause in SQL?

I'm currently working with my report parameter list of value that is dependent in another parameter.
I have come up with this idea, is there any possible way to for this to work?
WITH A AS (
SELECT DISTINCT columnA1 FROM Table1
UNION SELECT DISTINCT columnA2 FROM Table1
UNION SELECT DISTINCT columnA3 FROM Table1)
WITH B AS (SELECT DISTINCT columnB1 FROM Table1
UNION SELECT DISTINCT columnB2 FROM Table1
UNION SELECT DISTINCT columnB3 FROM Table1)
Select * from CASE WHEN (:PM_Parameter1 = 'A')
THEN A
ELSE B
END;
Assuming this is Oracle SQL, you can use a the EXISTS function to check for the parameter value, then combine the sets using UNION.
Try playing with this SQL:
select * from
(
select 'A' from dual
union
select 'B' from dual
)
where exists
(SELECT 'Y'
FROM dual
where 'parameter' = 'parameter'
)
union
select * from
(
select 'X' from dual
union
select 'Y' from dual
)
where exists
(SELECT 'Y'
FROM dual
where 'parameter' != 'parameter'
)
If you reverse both the conditions 'parameter' = 'parameter' and 'parameter' != 'parameter', it will return two different row sets.
I am sure this can be optimized again, hope it works out for you.

Finding sequence in data and grouping by it

Data in Phone_number column of my Temp_table looks like this
1234560200
1234560201
1234560202
2264540300
2264540301
2264540302
2264540303
2264540304
2264540305
2264540306
I want it to find sequence of last 4 digits and and find First and Last number of sequence of it. For eg.
There is sequence of first 3 rows as 0200, 0201, 0202, so First = 0200 and Last = 0202
Final Output of this query should be
First Last
0200 0202
0300 0306
I tried below query, but not sure about this approach.
WITH get_nxt_range AS
(
select substr(a.PHONE_NUMBER,7,4) range1,
LEAD(substr(a.PHONE_NUMBER,7,4)) OVER (ORDER BY a.PHONE_NUMBER ) nxt_range
from Temp_table a
)
SELECT range1,nxt_range FROM get_nxt_range
WHERE nxt_range = range1 +1
ORDER BY range1
One method to get sequences is to use the difference of row numbers approach. This works in your case as well:
select substr(phone_number, 1, 6),
min(substr(phone_number, 7, 4)), max(substr(phone_number, 7, 4))
from (select t.*,
(row_number() over (order by phone_number) -
row_number() over (partition by substr(phone_number, 1, 6) order by phone_number)
) as grp
from temp_table t
) t
group by substr(phone_number, 1, 6), grp;
I think something like this might work:
select
min (substr (phone_number, -4, 4)) as first,
max (substr (phone_number, -4, 4)) as last
from temp_table
group by
substr (phone_number, -4, 2)
SELECT DISTINCT
COALESCE(
first_in_sequence,
LAG( first_in_sequence ) IGNORE NULLS OVER ( ORDER BY phone_number )
) AS first_in_sequence,
COALESCE(
last_in_sequence,
LAG( last_in_sequence ) IGNORE NULLS OVER ( ORDER BY phone_number )
) AS last_in_sequence
FROM (
SELECT phone_number,
CASE phone_number
WHEN LAG( phone_number ) OVER ( ORDER BY phone_number ) + 1
THEN NULL
ELSE phone_number
END AS first_in_sequence,
CASE phone_number
WHEN LEAD( phone_number ) OVER ( ORDER BY phone_number ) - 1
THEN NULL
ELSE phone_number
END AS last_in_sequence
FROM temp_table
);
Update:
CREATE TABLE phone_numbers ( phone_number ) AS
select 1234560200 from dual union all
select 1234560201 from dual union all
select 1234560202 from dual union all
select 2264540300 from dual union all
select 2264540301 from dual union all
select 2264540302 from dual union all
select 2264540303 from dual union all
select 2264540304 from dual union all
select 2264540305 from dual union all
select 2264540306 from dual;
SELECT MIN( phone_number ) AS first_in_sequence,
MAX( phone_number ) AS last_in_sequence
FROM (
SELECT phone_number,
phone_number - ROW_NUMBER() OVER ( ORDER BY phone_number ) AS grp
FROM phone_numbers
)
GROUP BY grp;
Output:
FIRST_IN_SEQUENCE LAST_IN_SEQUENCE
----------------- ----------------
2264540300 2264540306
1234560200 1234560202
If 1234560201 1234560203 1234560204 are two instances then this should work:
with tt as (
select substr(PHONE_NUMBER,7,4) id from Temp_table
),
t as (
select
t1.id,
case when t3.id is null then 1 else 0 end start,
case when t2.id is null then 1 else 0 end "end"
from tt t1
-- no next adjacent element - we have an end of interval
left outer join tt t2 on t2.id - 1 = t1.id
-- not previous adjacent element - we have a start of interval
left outer join tt t3 on t3.id + 1 = t1.id
-- select starts and ends only
where t2.id is null or t3.id is null
)
-- find nearest end record for each start record (it may be the same record)
select t1.id, (select min(id) from t where id >= t1.id and "end" = 1)
from t t1
where t1.start = 1
I see guys already have answered for your question.
I just want to propose my variant how resolve this task:
with list_num (phone_number) as (
select 1234560200 from dual union all
select 1234560201 from dual union all
select 1234560202 from dual union all
select 2264540300 from dual union all
select 2264540301 from dual union all
select 2264540302 from dual union all
select 2264540303 from dual union all
select 2264540304 from dual union all
select 2264540305 from dual union all
select 2264540306 from dual)
select root as from_value,
max(phone_number) keep (dense_rank last order by lvl) as to_value
from
(select phone_number, level as lvl, CONNECT_BY_ROOT phone_number as root
from
(select phone_number,
decode(phone_number-lag (phone_number) over(order by phone_number),1,1,0) as start_value
from list_num) b
connect by nocycle phone_number = prior phone_number + 1
start with start_value = 0)
group by root
having count(1) > 1
If you need only last 4 numbers just substr it.
substr(root,7,4) as from_value,
substr(max(phone_number) keep (dense_rank last order by lvl),7,4) as to_value
Thanks.

Get Var() and AVG() from many columns

I'm building a query to show average and variance from many columns.
To get the average I use this:
SELECT *,
(SELECT AVG(t.c)
FROM (
SELECT peca_1 UNION ALL
SELECT peca_2 UNION ALL
SELECT peca_3 UNION ALL
SELECT peca_4 UNION ALL
SELECT peca_5 UNION ALL
SELECT peca_6 UNION ALL
SELECT peca_7 UNION ALL
SELECT peca_8 UNION ALL
SELECT peca_9 UNION ALL
SELECT peca_10
) t(c)
) as [media]
from Durabilidade
where cd_durabilidade = 1
The result is:
Now I need a new column with VAR(media) comparing each row with first row.
Any idea?
I think this is a case where cross apply is appropriate. I am assuming that you want the variance of the values as calculated by the var() function:
SELECT *, t.avgval as [media], t.varval
from Durabilidade d cross apply
(select avg(t.val) as avgval, var(t.val) as varval
from (select d.peca_1 union all
select d.peca_2 union all
select d.peca_3 union all
select d.peca_4 union all
select d.peca_5 union all
select d.peca_6 union all
select d.peca_7 union all
select d.peca_8 union all
select d.peca_9 union all
select d.peca_10
) t(val) -- t(val) to work
) t
where cd_durabilidade = 1
Something like this?
SELECT *,
VAR(media) AS [variance]
FROM
(
SELECT *,
(SELECT AVG(t.c)
FROM (
SELECT peca_1 UNION ALL
SELECT peca_2 UNION ALL
SELECT peca_3 UNION ALL
SELECT peca_4 UNION ALL
SELECT peca_5 UNION ALL
SELECT peca_6 UNION ALL
SELECT peca_7 UNION ALL
SELECT peca_8 UNION ALL
SELECT peca_9 UNION ALL
SELECT peca_10
) t(c)
) as [media]
from Durabilidade
where cd_durabilidade = 1
) x
GROUP BY
column1_from_durabilidade
,column2_from_durabilidade
--etc
,media

How to select and join on a query with a table

I think this is just a syntax issue for me on Join but I am trying to Join a query to an existing table
select *
from
(
(select 'Source1' Source from Dual
union select 'Source2' from Dual
union select 'Source3' from Dual
union select 'Source4' from Dual
)
cross join (
select 'O' Status from Dual
union select 'C' from Dual
)
cross join (
SELECT
TO_DATE('09/30/2013','mm/dd/yyyy') - 1 + LEVEL dt
FROM dual
CONNECT BY
LEVEL <= ( TO_DATE('10/05/2013','mm/dd/yyyy')
- TO_DATE('09/30/2013','mm/dd/yyyy')) + 1
)
) as x
left join
(
select myTable.mySource
from myTable
) as y
on y.err_sts_cd = x.Status
You should be able to just remove most of the grouping parens...
select *
from
(select 'Source1' Source from Dual
union select 'Source2' from Dual
union select 'Source3' from Dual
union select 'Source4' from Dual
) s
cross join (
select 'O' Status from Dual
union select 'C' from Dual
) x
cross join (
SELECT
TO_DATE('09/30/2013','mm/dd/yyyy') - 1 + LEVEL dt
FROM dual
CONNECT BY
LEVEL <= ( TO_DATE('10/05/2013','mm/dd/yyyy')
- TO_DATE('09/30/2013','mm/dd/yyyy')) + 1
) d
left join
myTable y
on y.err_sts_cd = x.Status
There is no y.err_sts_cd
left join
(
select myTable.mySource
from myTable
) as y
on y.err_sts_cd = x.Status
You need to either join using y.mySource or actually select err_sts_cd from myTable, aledgedly:
select myTable.mySource, myTable.err_sts_cd
from myTable