BigQuery Unexpected error "(" when creating TEMP TABLE - google-bigquery

I got a Syntax error: Unexpected "(" at [2:1] whilst trying to create a Temp Table in Big Query. When i try to remove the "(" I get Syntax error: Expected end of input but got identifier "string" at [3:17]. How do i correct this in BigQuery.
CREATE TABLE #PercentPopulationVaccinated
(
continent string,
location string,
date datetime,
population int64,
new_vaccinations int64,
AccumVaccinations int64
)
INSERT INTO #PercentPopulationVaccinated
SELECT dea.continent, dea.location, dea.date, dea.population,vac.new_vaccinations
, SUM(vac.new_vaccinations) OVER (PARTITION BY dea.location ORDER BY dea.location,dea.date) AS AccumVaccinations
FROM `portfolio-project-356519.Covid.CovidDeaths` dea
JOIN `portfolio-project-356519.Covid.CovidVaccinations` vac
ON dea.location = vac.location
AND dea.date = vac.date
WHERE dea.continent IS NOT NULL
SELECT *, (AccumVaccinations/population)*100
FROM #PercentPopulationVaccinated

This is the right way to create a temporary table based on the code you shared.
CREATE TEMP TABLE PercentPopulationVaccinated
(
continent string,
location string,
date datetime,
population int64,
new_vaccinations int64,
AccumVaccinations int64
);
INSERT INTO PercentPopulationVaccinated
SELECT dea.continent, dea.location, dea.date, dea.population,vac.new_vaccinations
, SUM(vac.new_vaccinations) OVER (PARTITION BY dea.location ORDER BY dea.location,dea.date) AS AccumVaccinations
FROM `portfolio-project-356519.Covid.CovidDeaths` dea
JOIN `portfolio-project-356519.Covid.CovidVaccinations` vac
ON dea.location = vac.location
AND dea.date = vac.date
WHERE dea.continent IS NOT NULL;
SELECT *, (AccumVaccinations/population)*100
FROM PercentPopulationVaccinated;

Related

Clause is invalid in views, inline functions, derived tables, subqueries, and common table expressions

Here is the query
WITH PopVSVac (continent, location, date, population, new_vaccinations, RollingPeopleVaccinated) AS
(
SELECT
DEATHS.continent, DEATHS.location, DEATHS.date,
DEATHS.population, VAC.new_vaccinations,
SUM(CAST(VAC.new_vaccinations AS INT)) OVER (PARTITION BY DEATHS.location
ORDER BY DEATHS.location, DEATHS.date) AS RollingPeopleVaccinated
FROM
PortfolioProject1..CovidDeaths AS DEATHS
JOIN
PortfolioProject1..CovidVaccinations AS VAC ON DEATHS.location = VAC.location
AND DEATHS.date = VAC.date
WHERE
DEATHS.continent IS NOT NULL
ORDER BY
2, 3
)
If I use offset 0 rows after by "order by 2, 3" I will get an error
incorrect syntax near ')'-
I also tried adding "TOP 1000*" next o select but then the PopVSVAC will have an error 'PopVsVac has more column than specified column list.
Please help

Syntax error: Expected keyword AS but got "("

I have been getting the error message when trying to write the CTE SQL code.
WITH PopulationVancine (Date, Location, population, continent, new_vacinations, RollingPeopleVaccinated)
AS (
SELECT Death.date,Death.location, Death.population, Death.continent, vacine.new_vaccinations, SUM (vacine.new_vaccinations) OVER (PARTITION BY Death.location order by Death.location, Death.date) AS RollingPeopleVaccinated
FROM `my-data-project-96387.PortfolioProjectSamp.CovidDeath` as Death
INNER JOIN `my-data-project-96387.PortfolioProjectSamp.CovidVacination` as vacine
ON Death.location = vacine.location
AND Death.date = vacine.date
WHERE Death.continent is not null
)
Select *
From PopulationVancine
Please I will need your assist as regards this error message.
Working on the same project and got this error too as opposed to the script. I removed the (Date, Location, population, continent, new_vacinations, RollingPeopleVaccinated) and it worked with same results as script.Try this:
WITH PopulationVancine
AS (
SELECT Death.date,Death.location, Death.population, Death.continent, vacine.new_vaccinations, SUM (vacine.new_vaccinations) OVER (PARTITION BY Death.location order by Death.location, Death.date) AS RollingPeopleVaccinated
FROM `my-data-project-96387.PortfolioProjectSamp.CovidDeath` as Death
INNER JOIN `my-data-project-96387.PortfolioProjectSamp.CovidVacination` as vacine
ON Death.location = vacine.location
AND Death.date = vacine.date
WHERE Death.continent is not null
)
Select *
From PopulationVancine
Emmanuel Korli's answer worked brilliantly. Maybe it's something specific for Big Query or Alex's work, but the query does work without the first
"(Date, Location, population, continent, new_vacinations, RollingPeopleVaccinated)"
In my case it looks like this:
WITH PopVsVac
AS
(
SELECT
dea.continent, dea.location, dea.date, dea.population, vac.new_vaccinations,
SUM(vac.new_vaccinations) OVER (PARTITION BY dea.location ORDER BY dea.location, dea.date) AS rollout_vaccines
FROM
COVID.covid_deaths AS dea
JOIN
COVID.covid_vaccinations AS vac
on dea.location = vac.location
and dea.date = vac.date
WHERE
dea.continent is not null
ORDER BY
2, 3
)
SELECT
*, (rollout_vaccines/population)*100
FROM
PopVsVac

Aggregate column values into a list produces an error

The following query
DECLARE #SNH TABLE
(
dt date,
QueueName varchar(10),
SN varchar(10)
)
INSERT INTO #SNH (Dt, QueueName, SN)
VALUES ('2001-04-04', 'Queue01', 'Q01SN01'),
('2001-04-05', 'Queue01', 'Q01SN01'),
('2001-04-06', 'Queue01', 'Q01SN01'),
('2001-04-04', 'Queue02', 'Q02SN01'),
('2001-04-05', 'Queue02', 'Q02SN01'),
('2001-04-06', 'Queue02', 'Q02SN02')
DECLARE #QH TABLE
(
DT date,
QueueName varchar(10)
)
INSERT INTO #QH(DT, QueueName)
VALUES ('2001-04-04','Queue01'),
('2001-04-05','Queue01'),
('2001-04-06','Queue01'),
('2001-04-04','Queue02'),
('2001-04-05','Queue02'),
('2001-04-06','Queue02')
SELECT DISTINCT
q.QueueName clnQueueName,
MIN(q.Dt) OVER (PARTITION BY q.QueueName) clnStartDate,
MAX(q.Dt) OVER (PARTITION BY q.QueueName) clnEndDate,
s.SN
FROM
#QH q
LEFT JOIN
#SNH s ON s.QueueName = q.QueueName
returns this output:
clnQueueName
clnStartDate
clnEndDate
SN
Queue01
2001-04-04
2001-04-06
Q01SN01
Queue02
2001-04-04
2001-04-06
Q02SN01
Queue02
2001-04-04
2001-04-06
Q02SN02
which I'm aiming to aggregate into a comma separated list with
SELECT DISTINCT
q.QueueName clnQueueName,
MIN(q.Dt) OVER (PARTITION BY q.QueueName) clnStartDate,
MAX(q.Dt) OVER (PARTITION BY q.QueueName) clnEndDate,
STRING_AGG(s.SN,',')
FROM
#QH q
LEFT JOIN
#SNH s ON s.QueueName = q.QueueName AND s.Dt = q.Dt
as follows
clnQueueName
clnStartDate
clnEndDate
SN
Queue01
2001-04-04
2001-04-06
Q01SN01
Queue02
2001-04-04
2001-04-06
Q02SN01,Q02SN02
Instead I get:
Msg 8120, Level 16, State 1, Line 36
Column '#QH.QueueName' is invalid in the select list because it is not contained in either an aggregate function or the GROUP BY clause
Sorry, I can't get it.
#QH.QueueName isn't even mentioned in the SELECT, only q.QueueName.
What I am missing here?
As per the docs, a group by clause is required if the string_agg is not the only item being selected. Once grouping correctly you no longer need window functions or distinct.
To obtain only distinct values of SN you need to group twice, the first time in a sub-query (in this case a CTE) to get distinct values of SN and the second time with string_agg to get distinct values of QueueName.
WITH cte AS (
SELECT
q.QueueName clnQueueName
, MIN(q.Dt) clnStartDate
, MAX(q.Dt) clnEndDate
, s.SN
FROM #QH q
LEFT JOIN #SNH s
ON s.QueueName = q.QueueName AND s.Dt = q.Dt
GROUP BY q.QueueName, s.SN
)
SELECT clnQueueName
, MIN(clnStartDate) clnStartDate
, MAX(clnEndDate) clnEndDate
, STRING_AGG(SN,',') SN
FROM cte
GROUP BY clnQueueName;
Returns:
clnQueueName
clnStartDate
clnEndDate
SN
Queue01
2001-04-04
2001-04-06
Q01SN01
Queue02
2001-04-04
2001-04-06
Q02SN01,Q02SN02

Syntax error: Expected end of input but got keyword INSERT at [11:1] error in bigquery

Syntax error: Expected end of input but got keyword INSERT at [11:1] error in bigquery
create table percentpopulationvaccinated
(
continent string,
Location string,
Date datetime,
population numeric,
new_vaccinations numeric,
peoplevaccinated numeric
)
insert into percentpopulationvaccinated
select
dea.continent, dea.location, dea.date, dea.population,
new_vaccinations,
sum(vac.new_vaccinations) over (partition by dea.location order by dea.location, dea.date) as peoplevaccinated
from
my-protfolio-324718.sql_code.covid_deaths dea
join
my-protfolio-324718.sql_code.covid_vac vac on dea.location = vac.location
and dea.date = vac.date
select
*,
(peoplevaccinated / population) * 100
from
percentpopulationvaccinated
The create table is one query, insert into is the second and finally the third is the select. In bigquery you should separate them using a ; so the interpreter can run every step in order.

Keep getting "Cannot access field item on a value with type ARRAY<STRUCT<hitNumber INT64, time INT64, hour INT64, ...>>"

I'm copying and pasting the code from the Google Analytics Cookbook:
SELECT one.hits.item.productSku AS ProductSku, ( sum_of_hit_number / total_hits ) AS avg_hit_number
FROM (
SELECT hits.item.productSku, SUM(hits.hitNumber) AS sum_of_hit_number
FROM [‘GA Dataset Name’ ]
WHERE hits.item.productSku IS NOT NULL
AND totals.transactions>=1
GROUP BY hits.item.productSku ) AS ‘Alias_Name_1’
JOIN (
SELECT hits.item.productSku, COUNT( fullVisitorId ) AS total_hits
FROM [‘GA Dataset Name’ ]
WHERE hits.item.productSku IS NOT NULL
AND totals.transactions>=1
GROUP BY hits.item.productSku ) AS ‘Alias_Name_2’
ON Alias_Name_1.hits.item.productSku = Alias_Name_2.hits.item.productSku;
but I edited it to this:
SELECT one.hits.item.productSku AS ProductSku, ( sum_of_hit_number / total_hits ) AS avg_hit_number
FROM (
SELECT hits.item.productSku, SUM(hits.hitNumber) AS sum_of_hit_number
FROM `bigquery-public-data.google_analytics_sample.ga_sessions_20170801`
WHERE hits.item.productSku IS NOT NULL
AND totals.transactions>=1
GROUP BY hits.item.productSku ) AS ProductSKU_Item
JOIN (
SELECT hits.item.productSku, COUNT( fullVisitorId ) AS total_hits
FROM `bigquery-public-data.google_analytics_sample.ga_sessions_20170801`
WHERE hits.item.productSku IS NOT NULL
AND totals.transactions>=1
GROUP BY hits.item.productSku ) AS product2_item
ON ProductSKU_Item.hits.item.productSku = product2_item.hits.item.productSku;
and I'm still getting an error that says Cannot access field item on a value with type ARRAY<STRUCT<hitNumber INT64, time INT64, hour INT64, ...>> at [5:13] but when I try the UNNEST function, it still isn't working and I'm not sure what I'm doing wrong.
The error which I made with unnest was to try to unnest hits.item which is a struct. hits is the array which needs to be unnested. Try this:
SELECT
hits.item.productSku,
SUM(hits.hitNumber) / COUNT( fullVisitorId ) AS avg_hit_number
FROM
`bigquery-public-data.google_analytics_sample.ga_sessions_20170801` t
CROSS JOIN
UNNEST(t.hits) AS hits
WHERE
hits.item.productSku IS NOT NULL
AND totals.transactions >= 1
GROUP BY
hits.item.productSku