pivot multi-level nested fields in bigquery - google-bigquery

My bq table schema:
Continuing this post: bigquery pivoting with nested field
I'm trying to flatten this table. I would like to unnest the timeseries.data fields, i.e. the final number of rows should be equal to the total length of timeseries.data arrays. I would also like to add annotation.properties.key with certain value as additional columns, and annotation.properties.value as its value. So in this case, it would be the "margin" column. However the following query gives me error: "Unrecognized name: data". But after the last FROM, I did already: unnest(timeseries.data) as data.
flow_timestamp, channel_name, number_of_digits, timestamp, value, margin
2019-10-31 15:31:15.079674 UTC, channel_1, 4, 2018-02-28T02:00:00, 50, 0.01
query:
SELECT
flow_timestamp, timeseries.channel_name,
( SELECT MAX(IF(channel_properties.key = 'number_of_digits', channel_properties.value, NULL))
FROM UNNEST(timeseries.channel_properties) AS channel_properties
),
data.timestamp ,data.value
,(with subq as (select * from unnest(data.annotation))
select max(if (properties.key = 'margin', properties.value, null))
from (
select * from unnest(subq.properties)
) as properties
) as margin
FROM my_table
left join unnest(timeseries.data) as data
WHERE DATE(flow_timestamp) between "2019-10-28" and "2019-11-02"
order by flow_timestamp

Try below
#standardSQL
SELECT
flow_timestamp,
timeseries.channel_name,
( SELECT MAX(IF(channel_properties.key = 'number_of_digits', channel_properties.value, NULL))
FROM UNNEST(timeseries.channel_properties) AS channel_properties
) AS number_of_digits,
item.timestamp,
item.value,
( SELECT MAX(IF(prop.key = 'margin', prop.value, NULL))
FROM UNNEST(item.annotation) AS annot, UNNEST(annot.properties) prop
) AS margin
FROM my_table
LEFT JOIN UNNEST(timeseries.data) item
WHERE DATE(flow_timestamp) BETWEEN '2019-10-28' AND '2019-11-02'
ORDER BY flow_timestamp
Below is a little less verbose version of the same solution, but I usually prefer above as it simpler to maintain
#standardSQL
SELECT
flow_timestamp,
timeseries.channel_name,
( SELECT MAX(IF(key = 'number_of_digits', value, NULL))
FROM UNNEST(timeseries.channel_properties) AS channel_properties
) AS number_of_digits,
timestamp,
value,
( SELECT MAX(IF(key = 'margin', value, NULL))
FROM UNNEST(annotation), UNNEST(properties)
) AS margin
FROM my_table
LEFT JOIN UNNEST(timeseries.data)
WHERE DATE(flow_timestamp) BETWEEN '2019-10-28' AND '2019-11-02'
ORDER BY flow_timestamp

Related

how to delete repeated values in UNION query using count

Hello I have been trying to delete a repeated value on the following UNION query with the following results (image). How can I filter out the value LW_ID=8232 with AANTALLN =0. I need to find a way taht if in the first query AANTALLN >0 is found, then on the second part of the union query not insert it again. Thanks "
With LESEENHEIDLOOPBAAN as (
SELECT
LE_AGENDA_FK,
LE_CODE,
LE_ID,
LE_KLAS_FK,
LE_KLASPARTITIE_FK,
LE_OMSCHRIJVING,
LE_VERANDERDDOOR,
LE_VERANDERDOP,
Count(LH_ID) As AantalLln
FROM
LESEENHEID
INNER JOIN LOOPBAANLESEENHEID on (LH_LESEENHEID_FK = LE_ID)
INNER JOIN LOOPBAAN ON (LH_LOOPBAAN_FK = LB_ID)
WHERE
(
'2022/09/28' BETWEEN LB_VAN
AND LB_TOT
)
AND (
LE_ID in (8277, 8276, 8232)
)
GROUP BY
LE_AGENDA_FK,
LE_CODE,
LE_ID,
LE_KLAS_FK,
LE_KLASPARTITIE_FK,
LE_OMSCHRIJVING,
LE_VERANDERDDOOR,
LE_VERANDERDOP
),
LESEENHEIDLOOPBAANNULL AS (
SELECT
LE_AGENDA_FK,
LE_CODE,
LE_ID,
LE_KLAS_FK,
LE_KLASPARTITIE_FK,
LE_OMSCHRIJVING,
LE_VERANDERDDOOR,
LE_VERANDERDOP,
0 As AantalLln
FROM
LESEENHEID
where
LE_ID in (8277, 8276, 8232)
and EXISTS (
SELECT
*
FROM
LESEENHEIDLOOPBAAN
)
)
SELECT
*
FROM
LESEENHEIDLOOPBAAN
UNION
SELECT
*
FROM
LESEENHEIDLOOPBAANNULL ROWS 1000
Try this out using ROW_NUMBER:
SELECT * FROM (
SELECT
ROW_NUMBER () OVER (PARTITION BY LW_ID ORDER BY AANTALLN DESC) AS RN
,* FROM
(
SELECT * FROM
LESEENHEIDLOOPBAAN
UNION
SELECT
*
FROM
LESEENHEIDLOOPBAANNULL ROWS 1000
)
)
) WHERE RN = 1
This way you eliminate the duplicates.

Modify T-SQL Query to include/exclude colums depending on other columns value

I have the below query, that I need some help modifying. In the below Query I get the number of columns that are not null and the percentage:
SELECT COUNT(v.col) as num_not_null, COUNT(v.col) * 1.0 / COUNT(*) * 100 as percent_not_null, COUNT(*) as toltalColsNeedsFilled
FROM EFP_EmploymentUser t
CROSS APPLY (VALUES (t.ITAdvicedFirst),
(t.ITAdvicedSecond),
(t.ITDepartmentDone),
(t.CFOAdvicedFirst),
(t.CFOInfoProvided),
(t.CFOAdvicedSecond),
(t.CFODone),
(t.EconomyAdviced),
(t.EconomyDone),
(t.AcademyAdviced),
(t.AcademyDone),
(t.PublicatorAdviced),
(t.PublicatorDone),
(t.PortraitAdviced),
(t.PortraitDone),
(t.WhoIsWhoAdviced),
(t.WhoIsWhoDone),
(t.BogportalAdviced),
(t.BogportalDone),
(t.KeyCardAdviced),
(t.KeyCardDone) ) v(col)
WHERE ID = '19';
This returns in the case of ID 19:
num_not_null percent_not_null toltalColsNeedsFilled
5 23.809523809500 21
But I need to check if the following columns in the same table (Publicator,Bogportal,Academy) are filled with value 'yes', and depending on that I need to include or exclude som of the columns from my above query:
i.e.: IF Academy = YES then include t.AcademyAdviced & t.AcademyDone
IF Publicator= YES then include t.PublicatorDone & t.PortraitAdviced
IF Bogportal = YES then include t.BogportalAdviced & t.BogportalDone
Can anyone help me how to modifying the query to achive this? :-)
Best Regards
Stig
You can use UNION ALL and WHERE predicates to decide which columns to add to the unpivot:
SELECT COUNT(v.col) as num_not_null, COUNT(v.col) * 1.0 / COUNT(*) * 100 as percent_not_null, COUNT(*) as toltalColsNeedsFilled
FROM EFP_EmploymentUser t
CROSS APPLY (
SELECT * FROM
( VALUES (t.ITAdvicedFirst),
(t.ITAdvicedSecond),
(t.ITDepartmentDone),
(t.CFOAdvicedFirst),
(t.CFOInfoProvided),
(t.CFOAdvicedSecond),
(t.CFODone),
(t.EconomyAdviced),
(t.EconomyDone),
(t.PortraitAdviced),
(t.PortraitDone),
(t.WhoIsWhoAdviced),
(t.WhoIsWhoDone),
(t.KeyCardAdviced),
(t.KeyCardDone) ) v(col)
UNION ALL
SELECT *
FROM (VALUES (t.AcademyAdviced), (t.AcademyDone) ) v(col)
WHERE t.Academy = 'YES'
UNION ALL
SELECT *
FROM (VALUES (t.PublicatorDone), (t.PortraitAdviced) ) v(col)
WHERE t.Publicator = 'YES'
UNION ALL
SELECT *
FROM (VALUES (t.BogportalAdviced), (t.BogportalDone ) ) v(col)
WHERE t.Bogportal = 'YES'
) v
WHERE t.ID = '19';

ORACLE SQL Pivot Issue

I am trying to pivot a sql result. I need to do this all in the one query. The below is telling me invalid identifier for header_id. I am using an Oracle database.
Code
Select * From (
select ppd.group_id,g.group_name, ct.type_desc,ht.hos_cat_descr
from item_history ih, item ci, contract ppd,
header ch, group g, cd_std_type ct, cd_hos h,
cd_std_hospital_cat ht
where ih.item_id = ci.item_id
and ih.header_id = ch.header_id
and ci.hos_id = h.hos_id
and ih.item_id = ci.item_id
and ch.user_no = ppd.user_no
and ppd.group_id = g.group_id
and ch.header_type = ct.header_type_id
and ci.hos_id = h.hos_id
and h.cat_id = ht.cat_id
)
Pivot
(
count(distinct header_id) as Volume
For hos_cat_descr IN ('A')
)
Your inner query doesn't have header_id in its projection, so the pivot clause doesn't have that column available to use. You need to add it, either as:
Select * From (
select ppd.group_id,g.group_name, ct.type_desc,ht.hos_cat_descr,ih.header_id
---------------------------------------------------------------^^^^^^^^^^^^^
from ...
)
Pivot
(
count(distinct header_id) as Volume
For hos_cat_descr IN ('A')
)
or:
Select * From (
select ppd.group_id,g.group_name, ct.type_desc,ht.hos_cat_descr,ch.header_id
---------------------------------------------------------------^^^^^^^^^^^^^
from ...
)
Pivot
(
count(distinct header_id) as Volume
For hos_cat_descr IN ('A')
)
It doesn't really matter which, since those two values must be equal as they are part of a join condition.
You could achieve the same thing with simpler aggregation instead of a pivot, but presumably you are doing more work in the pivot really.

Getting Number of Common Values from 2 comma-seperated strings

I have a table that contains comma-separated values in a column In Postgres.
ID PRODS
--------------------------------------
1 ,142,10,75,
2 ,142,87,63,
3 ,75,73,2,58,
4 ,142,2,
Now I want a query where I can give a comma-separated string and it will tell me the number of matches between the input string and the string present in the row.
For instance, for input value ',142,87,', I want the output like
ID PRODS No. of Match
------------------------------------------------------------------------
1 ,142,10,75, 1
2 ,142,87,63, 2
3 ,75,73,2,58, 0
4 ,142,2, 1
Try this:
SELECT
*,
ARRAY(
SELECT
*
FROM
unnest(string_to_array(trim(both ',' from prods), ','))
WHERE
unnest = ANY(string_to_array(',142,87,', ','))
)
FROM
prods_table;
Output is:
1 ,142,10,75, {142}
2 ,142,87,63, {142,87}
3 ,75,73,2,58, {}
4 ,142,2, {142}
Add the cardinality(anyarray) function to the last column to get just a number of matches.
And consider changing your database design.
Check This.
select T.*,
COALESCE(No_of_Match,'0')
from TT T Left join
(
select ID,count(ID) No_of_Match
from (
select ID,unnest(string_to_array(trim(t.prods, ','), ',')) A
from TT t)a
Where A in ('142','87')
group by ID
)B
On T.Id=b.id
Demo Here
OutPut
If you install the intarray extension, this gets quite easy:
select id, prods, cardinality(string_to_array(trim(prods, ','), ',')::int[] & array[142,87])
from bad_design;
Otherwise it's a bit more complicated:
select bd.id, bd.prods, m.matches
from bad_design bd
join lateral (
select bd.id, count(v.p) as matches
from unnest(string_to_array(trim(bd.prods, ','), ',')) as l(p)
left join (
values ('142'),('87') --<< these are your input values
) v(p) on l.p = v.p
group by bd.id
) m on m.id = bd.id
order by bd.id;
Online example: http://rextester.com/ZIYS97736
But you should really fix your data model.
with data as
(
select *,
unnest(string_to_array(trim(both ',' from prods), ',') ) as v
from myTable
),
counts as
(
select id, count(t) as c from data
left join
( select unnest(string_to_array(',142,87,', ',') ) as t) tmp on tmp.t = data.v
group by id
order by id
)
select t1.id, t1.prods, t2.c as "No. of Match"
from myTable t1
inner join counts t2 on t1.id = t2.id;

Complex Query Limited SQL Knowledge

Im trying to write what I believe is a rather complicated SQL query but Im working with limited SQL knowledge. I have this query:
SELECT
analysisvalue.analysisid,
heatname,
analysistime,
sampletype,
grade,
productid,
element,
value
FROM
dbo.AnalysisValue
INNER JOIN
dbo.CAnalysis
ON
dbo.AnalysisValue.AnalysisID = dbo.CAnalysis.AnalysisID
WHERE
heatname = 'A7M0066'
ORDER BY
analysisvalue.analysisid ASC,
element
and this query:
SELECT
*
FROM
S_analysis
WHERE
heat_no = 'A7M0066'
I need to modify the second query to also include * from S_analysis WHERE heat_no = 'A7M0066' but also all records including those without that heat_no where analysis_datetime is between the analysis time of the last record from the first query and two hours that follow that time.
The S_analysis table structure for the second query is:
analysis_datetime(datetime, not null)
heat_no(varchar(7), not null)
comment(varchar(40), null)
Then about 30 other columns of type (real, null)
The table structure of AnalysisValue in the first query is:
AnalysisID(int, not null)
Element(char(9), not null)
Value(real, null)
The table structure of CAnalysis in the first query is:
AnalysisID(PK, int, not null)
HeatName(char(7), null)
AnalysisTime(datetime, null)
SampleType(char(5), null)
Grade(char(4), null)
ProductID(char(14), null)
Try this (never used sql-server, but it should produce your expected results)
SELECT
*
FROM
S_analysis
CROSS JOIN
(
SELECT TOP 1
analysistime
FROM
dbo.CAnalysis
WHERE
heatname = 'A7M0066'
ORDER BY
analysistime DESC ) c
WHERE
heat_no = 'A7M0066'
OR ( heat_no IS NULL AND s.analysis_datetime BETWEEN c.analysistime AND DATEADD(hh, 2, c.analysistime ) )
Since you need only top row from your 1st query .You can use Cross join with the 2nd query to get the desired result
;with cte as
(SELECT
analysisvalue.analysisid,
heatname,
analysistime,
sampletype,
grade,
productid,
element,
value
FROM
dbo.AnalysisValue
INNER JOIN
dbo.CAnalysis
ON
dbo.AnalysisValue.AnalysisID = dbo.CAnalysis.AnalysisID
WHERE
heatname = 'A7M0066'
)
SELECT
*
FROM
S_analysis s
CROSS JOIN (SELECT TOP 1 analysistime
FROM cte
ORDER BY analysisid desc
) c
WHERE s.heat_no = 'A7M0066' OR
(s.analysis_datetime BETWEEN c.analysistime AND DATEADD(hh, 2, c.analysistime ))