Query to find if a aggregate string contains certain numbers - sql

I am working on Big Query Standard SQL. I have a data table like shown below (using ; as separator):
id;operation
107327;-1,-1,-1,-1,5,-1,0,2,-1
108296;-1,6,2,-1,-1,-1
690481;0,-1,-1,-1,5
102643;5,-1,-1,-1,-1,-2,2,3,-1,0,-1,-1,-1,-1,-1,-1
103171;0,5
789481;0,-1,5
I would like to take id that only contains operation 0,5 or 0,-1,5 so the result will show:
690481
103171
789481

Below is for BigQuery Standard SQL
#standardSQL
SELECT *
FROM `project.dataset.table`
WHERE 0 = (
SELECT COUNT(1)
FROM UNNEST(SPLIT(operation)) op
WHERE NOT op IN ('0', '-1', '5')
)
You can test, play with above using sample data form your question as in below example
#standardSQL
WITH `project.dataset.table` AS (
SELECT 107327 id, '-1,-1,-1,-1,5,-1,0,2,-1' operation UNION ALL
SELECT 108296, '-1,6,2,-1,-1,-1' UNION ALL
SELECT 690481, '0,-1,-1,-1,5' UNION ALL
SELECT 102643, '5,-1,-1,-1,-1,-2,2,3,-1,0,-1,-1,-1,-1,-1,-1' UNION ALL
SELECT 103171, '0,5' UNION ALL
SELECT 789481, '0,-1,5'
)
SELECT *
FROM `project.dataset.table`
WHERE 0 = (
SELECT COUNT(1)
FROM UNNEST(SPLIT(operation)) op
WHERE NOT op IN ('0', '-1', '5')
)
with output

I think regular expression does what you want:
select t.*
from t
where regexp_contains(operation, '^0,(-1,)*5$');
If you want matches to rows that contain only 0, -1, or 5, you would use:
where regexp_contains(operation, '^((0|-1|5),)*(0|-1|5)$');

Related

Is there a BigQuery version of isnumeric

I need to test if a field is numeric or not using standard SQL in BigQuery.
The example below works and is similar to what I have done in Cognos using TRANSLATE('mystring','1234567890.','') but its not very elegant.
SELECT
IF(LENGTH(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE('1234.56','1',''),'2',''),'3',''),'4',''),'5',''),'6',''),'7',''),'8',''),'9',''),'0',''),'.',''))=0,
'A number',
'Not a number')
You can use SAFE_CAST to try casting to a number. SAFE_CAST casts similar to CAST, but if casting fails, instead of erring null is returned.
For example you can do:
SAFE_CAST('1234567890' AS FLOAT64);
which will return 1.23456789E9
Thanks for both suggestions, both work a treat and I have gone for the SAFE_CAST option as it runs a fraction quicker.
#standardSQL
WITH `project.dataset.table` AS (
SELECT '1234.56' col UNION ALL
SELECT '1234.' col UNION ALL
SELECT '1234' col UNION ALL
SELECT '.56' col UNION ALL
SELECT '1234..56' col UNION ALL
SELECT 'a1234.56'
)
SELECT
col,
if(SAFE_CAST(col AS FLOAT64) is null,'Not a number', 'A number')
FROM `project.dataset.table`
but its not very elegant
Below examples for BigQuery Standard SQL
#standardSQL
WITH `project.dataset.table` AS (
SELECT '1234.56' col UNION ALL
SELECT '1234.' col UNION ALL
SELECT '1234' col UNION ALL
SELECT '.56' col UNION ALL
SELECT '1234..56' col UNION ALL
SELECT 'a1234.56'
)
SELECT
col,
IF(LENGTH(REGEXP_REPLACE(col, r'[\d.]', '')) = 0, 'A number', 'Not a number') ,
IF(REGEXP_CONTAINS(col, r'^\d*.?\d*$'), 'A number', 'Not a number')
FROM `project.dataset.table`
I think that we could use translate function to replace digits from 0 to 9
by 0 (let's say string_1 ) and then compare it to a string (let's say String_2) that equals to as much of 0 as then lengh of String_1.
(translate(src.NUM_BU , '0123456789', '0000000000'))
=
rpad('', length((translate(src.NUM_BU , '0123456789', '0000000000'))), '0')

Extract number or string after string in BigQuery

I have several 1.000 URLs and want to extract some values from the URL parameters.
Here some examples from the DB:
["www.xxx.com?uci=6666&rci=fefw"]
["www.xxx.com?uci=61
["www.xxx.com?rci=62&uci=5536"]
["www.xxx.com?uci=6666&utm_source=XXX"]
["www.xxx.com?pccst=TEST%20sTESTg"]
["www.xxx.com?pccst=TEST2%20s&uci=1"]
["www.xxx.com?uci=1pccst=TEST42rt24&rci=2"]
How can I extract the value of the parameter UCI. It is always a digit number (don’t know the exact length).
I tried it with REGEXP_EXTRACT. But I didn't succeed:
REGEXP_EXTRACT(URL, '(uci)\=[0-9]+') AS UCI_extract
And I also want to extract the value of the parameter pccst. It can be every character and I don`t know the exact length. But it always ends with “ or ? or &
I tried it also with REGEXP_EXTRACT but didn't succeed:
REGEXP_EXTRACT(URL, r'pccst\=(.*)(\"|\&|\?)') AS pccst_extract
I am really not the REGEX expert.
So would be great if someone could help me.
Thanks a lot in advance,
Peter
You can adapt this solution
#standardSQL
# Extract query parameters from a URL as ARRAY in BigQuery; standard-sql; 2018-04-08
# #see http://www.pascallandau.com/bigquery-snippets/extract-url-parameters-array/
WITH examples AS (
SELECT 1 AS id, 'www.xxx.com?uci=6666&rci=fefw' AS query
UNION ALL SELECT 2, 'www.xxx.com?uci=1pccst%20TEST42rt24&rci=2'
UNION ALL SELECT 3, 'www.xxx.com?pccst=TEST2%20s&uci=1'
)
SELECT
id,
query,
REGEXP_EXTRACT_ALL(query,r'(?:\?|&)((?:[^=]+)=(?:[^&]*))') as params,
REGEXP_EXTRACT_ALL(query,r'(?:\?|&)(?:([^=]+)=(?:[^&]*))') as keys,
REGEXP_EXTRACT_ALL(query,r'(?:\?|&)(?:(?:[^=]+)=([^&]*))') as values
FROM examples
Below example for BigQuery Standard SQL
#standardSQL
WITH `project.dataset.table` AS (
SELECT "www.xxx.com?uci=6666&rci=fefw" url UNION ALL
SELECT "www.xxx.com?uci=61" UNION ALL
SELECT "www.xxx.com?rci=62&uci=5536" UNION ALL
SELECT "www.xxx.com?uci=6666&utm_source=XXX" UNION ALL
SELECT "www.xxx.com?pccst=TEST%20sTESTg" UNION ALL
SELECT "www.xxx.com?pccst=TEST2%20s&uci=1" UNION ALL
SELECT "www.xxx.com?uci=1&pccst=TEST42rt24&rci=2"
)
SELECT
url,
REGEXP_EXTRACT(url, r'[?&]uci=(.*?)(?:$|&)') uci,
REGEXP_EXTRACT(url, r'[?&]pccst=(.*?)(?:$|&)') pccst
FROM `project.dataset.table`
result is
Row url uci pccst
1 www.xxx.com?pccst=TEST%20sTESTg null TEST%20sTESTg
2 www.xxx.com?pccst=TEST2%20s&uci=1 1 TEST2%20s
3 www.xxx.com?uci=1&pccst=TEST42rt24&rci=2 1 TEST42rt24
4 www.xxx.com?uci=61 61 null
5 www.xxx.com?rci=62&uci=5536 5536 null
6 www.xxx.com?uci=6666&rci=fefw 6666 null
7 www.xxx.com?uci=6666&utm_source=XXX 6666 null
Also, below option to parse out all key-value pairs so, then you can dynamically select needed
#standardSQL
WITH `project.dataset.table` AS (
SELECT "www.xxx.com?uci=6666&rci=fefw" url UNION ALL
SELECT "www.xxx.com?uci=61" UNION ALL
SELECT "www.xxx.com?rci=62&uci=5536" UNION ALL
SELECT "www.xxx.com?uci=6666&utm_source=XXX" UNION ALL
SELECT "www.xxx.com?pccst=TEST%20sTESTg" UNION ALL
SELECT "www.xxx.com?pccst=TEST2%20s&uci=1" UNION ALL
SELECT "www.xxx.com?uci=1pccst=TEST42rt24&rci=2"
)
SELECT url,
ARRAY(
SELECT AS STRUCT
SPLIT(kv, '=')[SAFE_OFFSET(0)] key,
SPLIT(kv, '=')[SAFE_OFFSET(1)] value
FROM UNNEST(SPLIT(SUBSTR(url, LENGTH(NET.HOST(url)) + 2), '&')) kv
) key_value_pair
FROM `project.dataset.table`

Using a case statement as an if statement

I am attempting to create an IF statement in BigQuery. I have built a concept that will work but it does not select the data from a table, I can only get it to display 1 or 0
Example:
SELECT --AS STRUCT
CASE
WHEN (
Select Count(1) FROM ( -- If the records are the same, then return = 0, if the records are not the same then > 1
Select Distinct ESCO, SOURCE, LDCTEXT, STATUS,DDR_DATE, TempF, HeatingDegreeDays, DecaTherms
from `gas-ddr.gas_ddr_outbound.LexingtonDDRsOutbound_onchange_Prior_Filtered`
Except Distinct
Select Distinct ESCO, SOURCE, LDCTEXT, STATUS,DDR_DATE, TempF, HeatingDegreeDays, DecaTherms
from `gas-ddr.gas_ddr_outbound.LexingtonDDRsOutbound_onchange_Latest_Filtered`
)
)= 0
THEN
(Select * from `gas-ddr.gas_ddr_outbound.LexingtonDDRsOutbound_onchange_Latest`) -- This Does not
work Scalar subquery cannot have more than one column unless using SELECT AS
STRUCT to build STRUCT values at [16:4] END
SELECT --AS STRUCT
CASE
WHEN (
Select Count(1) FROM ( -- If the records are the same, then return = 0, if the records are not the same then > 1
Select Distinct ESCO, SOURCE, LDCTEXT, STATUS,DDR_DATE, TempF, HeatingDegreeDays, DecaTherms
from `gas-ddr.gas_ddr_outbound.LexingtonDDRsOutbound_onchange_Prior_Filtered`
Except Distinct
Select Distinct ESCO, SOURCE, LDCTEXT, STATUS,DDR_DATE, TempF, HeatingDegreeDays, DecaTherms
from `gas-ddr.gas_ddr_outbound.LexingtonDDRsOutbound_onchange_Latest_Filtered`
)
)= 0
THEN 1 --- This does work
Else
0
END
How can I Get this query to return results from an existing table?
You question is still a little generic, so my answer same as well - and just mimic your use case at extend I can reverse engineer it from your comments
So, in below code - project.dataset.yourtable mimics your table ; whereas
project.dataset.yourtable_Prior_Filtered and project.dataset.yourtable_Latest_Filtered mimic your respective views
#standardSQL
WITH `project.dataset.yourtable` AS (
SELECT 'aaa' cols, 'prior' filter UNION ALL
SELECT 'bbb' cols, 'latest' filter
), `project.dataset.yourtable_Prior_Filtered` AS (
SELECT cols FROM `project.dataset.yourtable` WHERE filter = 'prior'
), `project.dataset.yourtable_Latest_Filtered` AS (
SELECT cols FROM `project.dataset.yourtable` WHERE filter = 'latest'
), check AS (
SELECT COUNT(1) > 0 changed FROM (
SELECT DISTINCT cols FROM `project.dataset.yourtable_Latest_Filtered`
EXCEPT DISTINCT
SELECT DISTINCT cols FROM `project.dataset.yourtable_Prior_Filtered`
)
)
SELECT t.* FROM `project.dataset.yourtable` t
CROSS JOIN check WHERE check.changed
the result is
Row cols filter
1 aaa prior
2 bbb latest
if you changed your table to
WITH `project.dataset.yourtable` AS (
SELECT 'aaa' cols, 'prior' filter UNION ALL
SELECT 'aaa' cols, 'latest' filter
) ......
the result will be
Row cols filter
Query returned zero records.
I hope this gives you right direction
Added more explanations:
I can be wrong - but per your question - it looks like you have one table project.dataset.yourtable and two views project.dataset.yourtable_Prior_Filtered and project.dataset.yourtable_Latest_Filtered which present state of your table prior and after some event
So, first three CTE in the answer above just mimic those table and views which you described in your question.
They are here so you can see concept and can play with it without any extra work before adjusting this to your real use-case.
For your real use-case you should omit them and use your real table and views names and whatever columns the have.
So the query for you to play with is:
#standardSQL
WITH check AS (
SELECT COUNT(1) > 0 changed FROM (
SELECT DISTINCT cols FROM `project.dataset.yourtable_Latest_Filtered`
EXCEPT DISTINCT
SELECT DISTINCT cols FROM `project.dataset.yourtable_Prior_Filtered`
)
)
SELECT t.* FROM `project.dataset.yourtable` t
CROSS JOIN check WHERE check.changed
It should be a very simple IF statement in any language.
Unfortunately NO! it cannot be done with just simple IF and if you see it fit you can submit a feature request to BigQuery team for whatever you think makes sense

use SUM on certain conditions

I have a script that extracts transactions and their details from a database. But my users complain that the file size being generated is too large, and so they asked for certain transactions to be just summed up/consolidated instead if they are of a certain classification, say Checking Accounts. That means there should only be one line in the result set named "Checking" which contains the sum of all transactions under Checking Accounts. Is there a way for an SQL script to go like:
CASE
WHEN Acct_class = 'Checking'
then sum(tran_amount)
ELSE tran_amount
END
I already have the proper GROUP BY and ORDER BY statements, but I can't seem to get my desired output. There is still more than one "Checking" line in the result set. Any ideas would be very much appreciated.
Try This,
Select sum(tran_amount) From tran_amount Where Acct_class = 'Checking'
You can try to achieve this using UNION ALL
SELECT tran_amount, .... FROM table WHERE NOT Acct_class = 'Checking'
UNION ALL
SELECT SUM(tran_amount), .... FROM table WHERE Acct_class = 'Checking' GROUP BY Acct_class, ...;
hi you can try below sql
select account_class,
case when account_class = 'saving' then listagg(trans_detail, ',') within group (order by emp_name) -- will give you all details transactions
when account_class = 'checking' then to_char(sum(trans_detail)) -- will give you only sum of transactions
end as trans_det from emp group by account_class;
Or, if your desired output is getting either the sum, either the actual column value based on another column value, the solution would be to use an analytical function to get the sum together with the actual value:
select
decode(acct_class, 'Checking', tran_amount_sum, tran_amount)
from (
select
sum(tran_amount) over (partition by acct_class) as tran_amount_sum,
tran_amount,
acct_class
from
YOUR_TABLE
)
You can try something like the following, by keeping single rows for some classes, and aggregating for some others:
with test (id, class, amount) as
(
select 1, 'a' , 100 from dual union all
select 2, 'a' , 100 from dual union all
select 3, 'Checking', 100 from dual union all
select 4, 'Checking', 100 from dual union all
select 5, 'c' , 100 from dual union all
select 6, 'c' , 100 from dual union all
select 7, 'c' , 100 from dual union all
select 8, 'd' , 100 from dual
)
select sum(amount), class
from test
group by case
when class = 'Checking' then null /* aggregates elements of class 'b' */
else id /* keeps elements of other classes not aggregated */
end,
class

How do I combine 2 records with a single field into 1 row with 2 fields (Oracle 11g)?

Here's a sample data
record1: field1 = test2
record2: field1 = test3
The actual output I want is
record1: field1 = test2 | field2 = test3
I've looked around the net but can't find what I'm looking for. I can use a custom function to get it in this format but I'm trying to see if there's a way to make it work without resorting to that.
thanks a lot
You need to use pivot:
with t(id, d) as (
select 1, 'field1 = test2' from dual union all
select 2, 'field1 = test3' from dual
)
select *
from t
pivot (max (d) for id in (1, 2))
If you don't have the id field you can generate it, but you will have XML type:
with t(d) as (
select 'field1 = test2' from dual union all
select 'field1 = test3' from dual
), t1(id, d) as (
select ROW_NUMBER() OVER(ORDER BY d), d from t
)
select *
from t1
pivot xml (max (d) for id in (select id from t1))
There are several ways to approach this - google pivot rows to columns. Here is one set of answers: http://www.dba-oracle.com/t_converting_rows_columns.htm