Get Var() and AVG() from many columns - sql

I'm building a query to show average and variance from many columns.
To get the average I use this:
SELECT *,
(SELECT AVG(t.c)
FROM (
SELECT peca_1 UNION ALL
SELECT peca_2 UNION ALL
SELECT peca_3 UNION ALL
SELECT peca_4 UNION ALL
SELECT peca_5 UNION ALL
SELECT peca_6 UNION ALL
SELECT peca_7 UNION ALL
SELECT peca_8 UNION ALL
SELECT peca_9 UNION ALL
SELECT peca_10
) t(c)
) as [media]
from Durabilidade
where cd_durabilidade = 1
The result is:
Now I need a new column with VAR(media) comparing each row with first row.
Any idea?

I think this is a case where cross apply is appropriate. I am assuming that you want the variance of the values as calculated by the var() function:
SELECT *, t.avgval as [media], t.varval
from Durabilidade d cross apply
(select avg(t.val) as avgval, var(t.val) as varval
from (select d.peca_1 union all
select d.peca_2 union all
select d.peca_3 union all
select d.peca_4 union all
select d.peca_5 union all
select d.peca_6 union all
select d.peca_7 union all
select d.peca_8 union all
select d.peca_9 union all
select d.peca_10
) t(val) -- t(val) to work
) t
where cd_durabilidade = 1

Something like this?
SELECT *,
VAR(media) AS [variance]
FROM
(
SELECT *,
(SELECT AVG(t.c)
FROM (
SELECT peca_1 UNION ALL
SELECT peca_2 UNION ALL
SELECT peca_3 UNION ALL
SELECT peca_4 UNION ALL
SELECT peca_5 UNION ALL
SELECT peca_6 UNION ALL
SELECT peca_7 UNION ALL
SELECT peca_8 UNION ALL
SELECT peca_9 UNION ALL
SELECT peca_10
) t(c)
) as [media]
from Durabilidade
where cd_durabilidade = 1
) x
GROUP BY
column1_from_durabilidade
,column2_from_durabilidade
--etc
,media

Related

MS SQL Convert rows to columns with Pivot

in MS SQL
I have following table
I would like to convert it to :
I had a look at pivot table function, but could not get it work correctly.
Any advice?
You coud try this:
With data (STOCKCODE, QTY, AGE) as (
select 'AIRFIL01', 3,1 union all
select 'AIRFIL01', 8,2 union all
select 'AIRFIL05', 4,1 union all
select 'AIRFIL05', 14,2 union all
select 'AIRPRE01', 4,1 union all
select 'AIRPRE01', 24,2 union all
select 'AIRSUS01', 1,2 union all
select 'ALARM01', 1,1 union all
select 'ALARM01', 6,2 union all
select 'ALARM01', 7,10 union all
select 'ALARM05', 2,1 union all
select 'ANTROL01', 5,2
)
SELECT * from (
Select STOCKCODE, QTY, CONCAT('Age_' , AGE) comment from data
)t
PIVOT
(
SUM(QTY)
FOR comment IN ( [Age_1],[Age_2],[Age_3],[Age_4],[Age_5],[Age_6],[Age_7],[Age_8],[Age_9],[Age_10])
) p

Count distinct letters in a string in bigquery

I have a string column in Biquery like:
select 'A'
union all (select 'ab')
union all (select 'abc')
union all (select 'aa')
union all (select 'aab')
I would like to count the number of distinct characters in every row of the column, in this case the results would be:
1
2
3
1
2
Can this be done in BigQuery? How?
How about this (assuming you don't want to differentiate between uppercase and lowercase)...
WITH data AS (select 'A' AS `val`
union all (select 'ab')
union all (select 'abc')
union all (select 'aa')
union all (select 'aab'))
SELECT `val`, 26 - LENGTH(REGEXP_REPLACE('abcdefghijklmnopqrstuvwxyz', '['||LOWER(`val`)||']', ''))
FROM `data`;
A simple approach is to use the SPLIT to convert your string to an array and UNNEST to convert the resulting array to a table. You may then use COUNT and DISTINCT to determine the number of unique characters as shown below:
with my_data AS (
select 'A' as col
union all (select 'ab')
union all (select 'abc')
union all (select 'aa')
union all (select 'aab')
)
select col, (SELECT COUNT(*) FROM (
SELECT DISTINCT element FROM UNNEST(SPLIT(col,'')) as element
)) n from my_data;
or simply
WITH my_data AS (
SELECT 'A' as col UNION ALL
SELECT 'ab' UNION ALL
SELECT 'abc' UNION ALL
SELECT 'aa' UNION ALL
SELECT 'aab'
)
SELECT
col,
(
SELECT
COUNT(DISTINCT element)
FROM
UNNEST(SPLIT(col,'')) as element
) cnt
FROM
my_data;
Like previous but using COUNT with DISTINCT
with my_data AS (
select 'A' as col
union all (select 'ab')
union all (select 'abc')
union all (select 'aa')
union all (select 'aab')
)
select col, COUNT(DISTINCT element) FROM
my_data,UNNEST(SPLIT(col,'')) as element
GROUP BY col
If the data is not quite huge, I would rather go with the user-defined functions to ease up the string manipulation across different columns
CREATE TEMP FUNCTION
get_unique_char_count(x STRING)
RETURNS INT64
LANGUAGE js AS r"""
str_split = new Set(x.split(""));
return str_split.size;
""";
WITH
result AS (
SELECT
'A' AS val
UNION ALL (
SELECT
'ab')
UNION ALL (
SELECT
'abc')
UNION ALL (
SELECT
'aa')
UNION ALL (
SELECT
'aab') )
SELECT
val,
get_unique_char_count(val) unique_char_count
FROM
result
RESULT:

How filter rows by matched values using BigQuery?

I have a table in BigQuery
SELECT 1 as big_id, 1 as temp_id, '101' as names
UNION ALL SELECT 1,1, 'z3Awwer',
UNION ALL SELECT 1,1, 'gA1sd03',
UNION ALL SELECT 1,2, 'z3Awwer',
UNION ALL SELECT 1,2, 'gA1sd03',
UNION ALL SELECT 1,3, 'gA1sd03',
UNION ALL SELECT 1,3, 'sAs10sdf4',
UNION ALL SELECT 1,4, 'sAs10sdf4',
UNION ALL SELECT 1,5, 'Adf105',
UNION ALL SELECT 2,1, 'A1sdf02',
UNION ALL SELECT 2,1, '345A103',
UNION ALL SELECT 2,2, '345A103',
UNION ALL SELECT 2,2, 'A1sd04',
UNION ALL SELECT 2,3, 'A1sd04',
UNION ALL SELECT 2,4, '6_0Awe105'
I want to filter it by temp_id if all names of one temp_id included in some another temp_id in partition by big_id window. For example I do not need to select all rows where temp_id = 2 because all names of temp_id = 2 included in temp_id = 1. As well as need to keep all rows of temp_id = 1 because this names range covers names range of temp_id = 2
So expected output:
SELECT 1 as big_id, 1 as temp_id, '101' as names
UNION ALL SELECT 1,1, 'z3Awwer',
UNION ALL SELECT 1,1, 'gA1sd03',
UNION ALL SELECT 1,3, 'gA1sd03',
UNION ALL SELECT 1,3, 'sAs10sdf4',
UNION ALL SELECT 1,5, 'Adf105',
UNION ALL SELECT 2,1, 'A1sdf02',
UNION ALL SELECT 2,1, '345A103',
UNION ALL SELECT 2,2, '345A103',
UNION ALL SELECT 2,2, 'A1sd04',
UNION ALL SELECT 2,4, '6_0Awe105'
How can I make it using BigQuery?
Below is for BigQuery Standard SQL
#standardsql
with temp as (
select big_id, temp_id, array_agg(names) names
from `project.dataset.table`
group by big_id, temp_id
)
select big_id, temp_id, names
from (
select big_id, temp_id, any_value(names) names
from (
select t1.*,
( select count(1)
from t1.names name
join t2.names name
using(name)
where t1.temp_id != t2.temp_id
) = array_length(t1.names) as flag
from temp t1
join temp t2
using (big_id)
)
group by big_id, temp_id
having countif(flag) = 0
), unnest(names) names
If to apply above to sample data from your question - the output is

I need some SQL help trying to find what is not in a table

I need some help. I tried this and it does not work.
I selected all the data I needed and tried to say what's not in
the original file.
Could someone show me the correct way?
SELECT *
FROM acknowledgement t1
where t1.st01 = '110'
and (t1.shipment ='S640D14268424' or t1.Shipment ='S640D14268924' or
t1.Shipment ='S640D14268925' or t1.Shipment ='S646D14261190' or
t1.Shipment ='S646I14265886' or t1.Shipment ='S640D14268423' ...)
AND [shipment]
NOT IN(
SELECT [shipment] FROM acknowledgement
)
Any help would be useful.
You are looking for something like:
SELECT * FROM (
SELECT 'S640D14268424' AS Shipment
UNION SELECT 'S640D14268924'
UNION SELECT 'S640D14268925'
UNION SELECT 'S646D14261190'
UNION SELECT 'S646I14265886'
UNION SELECT 'S640D14268423'
UNION SELECT 'S646D14269951'
UNION SELECT 'S646D14269939'
UNION SELECT 'S646D14269034'
UNION SELECT 'S646D14269962'
UNION SELECT 'S646D14269953'
UNION SELECT 'S646D14271620'
UNION SELECT 'S646D14269030'
UNION SELECT 'S646D14269941'
UNION SELECT 'S646D14269251'
UNION SELECT 'S646D14273089'
UNION SELECT 'S646D14272388'
UNION SELECT 'S646D14273197'
UNION SELECT 'S646D14273399'
UNION SELECT 'S640D14273543'
UNION SELECT 'S640D14272952'
UNION SELECT 'S640D14272953'
UNION SELECT 'S640D14276996'
UNION SELECT 'S640D14277005'
UNION SELECT 'S640D14277006'
UNION SELECT 'S646D148047394'
UNION SELECT 'S640D14277004'
UNION SELECT 'S646D158049311'
UNION SELECT 'S646D158049791'
UNION SELECT 'S646D158049797'
UNION SELECT 'S646D158049806'
UNION SELECT 'S646D158049781'
UNION SELECT 'S646D158049557'
UNION SELECT 'S646D158049064'
UNION SELECT 'S646D158049561'
UNION SELECT 'S646D158049794'
UNION SELECT 'S646D158049362'
UNION SELECT 'S646D158049361'
UNION SELECT 'S646D158049792'
UNION SELECT 'S646D158049808'
UNION SELECT 'S646D158049788'
UNION SELECT 'S646D158049365'
UNION SELECT 'S646D158049800'
UNION SELECT 'S646D158049790'
UNION SELECT 'S646D158049799'
UNION SELECT 'S646D158049803'
UNION SELECT 'S646D158049784'
UNION SELECT 'S646D158049522'
UNION SELECT 'S646D158049340'
UNION SELECT 'S646D158049796'
UNION SELECT 'S646D158049687'
UNION SELECT 'S646D158049491'
UNION SELECT 'S646D158049499'
UNION SELECT 'S646D158049484'
UNION SELECT 'S646D158049486'
UNION SELECT 'S646D158049494') numbers_to_look
WHERE Shipment NOT IN
(SELECT [shipment] FROM acknowledgement WHERE shipment>'' )
Better way to do this is create a temp table to store the numbers you want to check.

cross-dbms way to check if string is numeric

Ok, I have this field: code varchar(255). It contains some values used in our export routine like
DB84
DB34
3567
3568
I need to select only auto-generated (fully numeric) fields
WHERE is_numeric(table.code)
is_numeric() checks if code field contains only positive digits.
Can you propose anything that will work both under mysql 5.1 and oracle 10g?
Below are three separate implementations for each of SQL Server, MySQL and Oracle. None use (or can) the same approach, so there doesn't seem to be a cross DBMS way to do it.
For MySQL and Oracle, only the simple integer test is show; for SQL Server, the full numeric test is shown.
For SQL Server:
note that isnumeric('.') returns 1.. but it can not actually be converted to float. Some text like '1e6' cannot be converted to numeric directly, but you can pass through float, then numeric.
;with tmp(x) as (
select 'db01' union all select '1' union all select '1e2' union all
select '1234' union all select '' union all select null union all
select '1.2e4' union all select '1.e10' union all select '0' union all
select '1.2e+4' union all select '1.e-10' union all select '1e--5' union all
select '.' union all select '.123' union all select '1.1.23' union all
select '-.123' union all select '-1.123' union all select '--1' union all
select '---1.1' union all select '+1.123' union all select '++3' union all
select '-+1.123' union all select '1 1' union all select '1e1.3' union all
select '1.234' union all select 'e4' union all select '+.123' union all
select '1-' union all select '-3e-4' union all select '+3e-4' union all
select '+3e+4' union all select '-3.2e+4' union all select '1e1e1' union all
select '-1e-1-1')
select x, isnumeric(x),
case when x not like '%[^0-9]%' and x >'' then convert(int, x) end as SimpleInt,
case
when x is null or x = '' then null -- blanks
when x like '%[^0-9e.+-]%' then null -- non valid char found
when x like 'e%' or x like '%e%[e.]%' then null -- e cannot be first, and cannot be followed by e/.
when x like '%e%_%[+-]%' then null -- nothing must come between e and +/-
when x='.' or x like '%.%.%' then null -- no more than one decimal, and not the decimal alone
when x like '%[^e][+-]%' then null -- no more than one of either +/-, and it must be at the start
when x like '%[+-]%[+-]%' and not x like '%[+-]%e[+-]%' then null
else convert(float,x)
end
from tmp order by 2, 3
For MySQL
create table tmp(x varchar(100));
insert into tmp
select 'db01' union all select '1' union all select '1e2' union all
select '1234' union all select '' union all select null union all
select '1.2e4' union all select '1.e10' union all select '0' union all
select '1.2e+4' union all select '1.e-10' union all select '1e--5' union all
select '.' union all select '.123' union all select '1.1.23' union all
select '-.123' union all select '-1.123' union all select '--1' union all
select '---1.1' union all select '+1.123' union all select '++3' union all
select '-+1.123' union all select '1 1' union all select '1e1.3' union all
select '1.234' union all select 'e4' union all select '+.123' union all
select '1-' union all select '-3e-4' union all select '+3e-4' union all
select '+3e+4' union all select '-3.2e+4' union all select '1e1e1' union all
select '-1e-1-1';
select x,
case when x not regexp('[^0-9]') then x*1 end as SimpleInt
from tmp order by 2
For Oracle
case when REGEXP_LIKE(col, '[^0-9]') then col*1 end