Joining two queries that have INNER JOINS in them - sql

I'm trying to join these two SQL queries together. My data is at https://policevideorequests.cartodb.com/tables/seattle_police_govqa_audit_trails which has a Postgresql SQL API.
SELECT
t1.customer_id, t1.c,
t2.customer_name, t2.customer_email, t2.customer_email_domain
FROM
(SELECT
a.customer_id, count(a.customer_id) as c
FROM
(SELECT customer_id, reference_no
FROM seattle_police_govqa_audit_trails
WHERE customer_id NOT IN (5, 0, -1)
GROUP BY customer_id, reference_no) a
GROUP BY
a.customer_id
ORDER BY
count(a.customer_id) DESC) t1
INNER JOIN
(SELECT DISTINCT
customer_id,
INITCAP(LOWER(SUBSTRING(new_value FROM 'Dear (.*?):</div>'))) as customer_name,
LOWER(SUBSTRING(new_value FROM 'login:<b>(.*?)</b>')) as customer_email,
LOWER(SUBSTRING(new_value FROM 'login:<b>.*?#(.*?)</b>')) as customer_email_domain
FROM
seattle_police_govqa_audit_trails
WHERE
SUBSTRING(new_value FROM 'Dear (.*?):</div>') IS NOT NULL) t2 ON t1.customer_id = t2.customer_id
ORDER BY
t1.c DESC
SELECT DISTINCT
t1.new_value as requester_type, t2.customer_id
FROM
(SELECT
reference_no, new_value
FROM
seattle_police_govqa_audit_trails
WHERE
action_desc = 154) t1
INNER JOIN
(SELECT
reference_no, customer_id
FROM
seattle_police_govqa_audit_trails
WHERE
customer_id NOT IN (0, -1, 5)) t2 ON t1.reference_no = t2.reference_no
My attempt at joining the two:
SELECT t1.customer_id,t3.requester_typer,t1.c,t2.customer_name,t2.customer_email,t2.customer_email_domain,t2.customer_email_domain_tld FROM (SELECT a.customer_id,count(a.customer_id) as c FROM (SELECT customer_id, reference_no FROM seattle_police_govqa_audit_trails WHERE customer_id NOT IN (5,0,-1) GROUP BY customer_id,reference_no) a GROUP BY a.customer_id ORDER BY count(a.customer_id) DESC) t1
INNER JOIN (SELECT DISTINCT customer_id,INITCAP(LOWER(SUBSTRING(new_value FROM 'Dear (.*?):</div>'))) as customer_name,LOWER(SUBSTRING(new_value FROM 'login:<b>(.*?)</b>')) as customer_email, LOWER(SUBSTRING(new_value FROM 'login:<b>.*?#(.*?)</b>')) as customer_email_domain, LOWER(SUBSTRING(new_value FROM 'login:<b>.*?#.*?\.(.*?)</b>')) as customer_email_domain_tld FROM seattle_police_govqa_audit_trails WHERE SUBSTRING(new_value FROM 'Dear (.*?):</div>') IS NOT NULL) t2
ON t1.customer_id = t2.customer_id ORDER BY t1.c DESC
INNER JOIN (SELECT DISTINCT t1.new_value as requester_type,t2.customer_id FROM (SELECT reference_no,new_value FROM seattle_police_govqa_audit_trails WHERE action_desc = 154) t1
INNER JOIN (SELECT reference_no,customer_id FROM seattle_police_govqa_audit_trails WHERE customer_id NOT IN (0,-1,5)) t2
ON t1.reference_no = t2.reference_no) as t3
ON t2.customer_id = t3.customer_id
I get the error "syntax error near INNER"

The problem in your SQL query is that you tried to keep ORDER BY in the middle. The ORDER BY clause must be moved all the way to the back of the query, because ordering is applied to the entire query, not to its parts.

Try this:
SELECT t1.customer_id,t3.requester_typer,t1.c,t2.customer_name,t2.customer_email,t2.customer_email_domain,t2.customer_email_domain_tld FROM (SELECT a.customer_id,count(a.customer_id) as c FROM (SELECT customer_id, reference_no FROM seattle_police_govqa_audit_trails WHERE customer_id NOT IN (5,0,-1) GROUP BY customer_id,reference_no) a GROUP BY a.customer_id ORDER BY count(a.customer_id) DESC) t1
INNER JOIN (SELECT DISTINCT customer_id,INITCAP(LOWER(SUBSTRING(new_value FROM 'Dear (.*?):</div>'))) as customer_name,LOWER(SUBSTRING(new_value FROM 'login:<b>(.*?)</b>')) as customer_email, LOWER(SUBSTRING(new_value FROM 'login:<b>.*?#(.*?)</b>')) as customer_email_domain, LOWER(SUBSTRING(new_value FROM 'login:<b>.*?#.*?\.(.*?)</b>')) as customer_email_domain_tld FROM seattle_police_govqa_audit_trails WHERE SUBSTRING(new_value FROM 'Dear (.*?):</div>') IS NOT NULL) t2
ON t1.customer_id = t2.customer_id
INNER JOIN (SELECT DISTINCT t1.new_value as requester_type,t2.customer_id FROM (SELECT reference_no,new_value FROM seattle_police_govqa_audit_trails WHERE action_desc = 154) t1
INNER JOIN (SELECT reference_no,customer_id FROM seattle_police_govqa_audit_trails WHERE customer_id NOT IN (0,-1,5)) t2
ON t1.reference_no = t2.reference_no) as t3
ON t2.customer_id = t3.customer_id
ORDER BY t1.c DESC

Related

Snowflake: most efficient way to join two tables that use the WITH clause instead of sub-queries

Here's the code I've written. Just wondering if there's any way to make it more efficient. I've self joining two separate tables to each other and then, I want to join the result of both to eachother:
SELECT
T1.CITY,
T1.TIMESTAMP,
T1.VALUE AS PARTICULATE_LEVEL,
T2.VALUE AS POLLUTION_LEVEL
FROM
(
WITH PARTICULATE_DATA (CITY,TIMESTAMP, VALUE) AS
(
SELECT LOCATION,
TIMESTAMP,
(MICRO_VALUE * 0.097) AS VALUE
FROM
CONSOLIDATED_TABLE
)
select a.*,
max(b.VALUE) as MAX_PAR_PREVIOUS_24_HOURS,
from PARTICULATE_DATA as a
left join PARTICULATE_DATA as b
on a.CITY = b.CITY and b.TIMESTAMP between dateadd(day, -1, a.TIMESTAMP) and a.TIMESTAMP
group by 1,2,3,
order by 1,2
) T1
INNER JOIN
(
WITH POLLUTION_DATA (CITY,TIMESTAMP, VALUE) AS
(
SELECT LOCATION,
TIMESTAMP,
(VALUE ) AS VALUE
FROM
CONSOLIDATED_TABLE_2
)
select a.*,
max(b.VALUE) as MAX_POLLUTION_LEVEL_PREVIOUS_24_HOURS,
from POLLUTION_DATA as c
left join POLLUTION_DATA as d
on a.CITY = b.CITY and b.TIMESTAMP between dateadd(day, -1, a.TIMESTAMP) and a.TIMESTAMP
group by 1,2,3,
order by 1,2
) T2
ON T1.CITY = T2.CITY
Well there are many things that are happening the are waste of time:
The two ORDER BY's that should not be present. And if you must self join the DATEADD should be moved into the CTE, to improve that section:
WITH particulate_data (city, timestamp, value) AS (
SELECT
location,
timestamp,
dateadd(day, -1, timestamp) AS ts_m1day,
micro_value * 0.097 AS value
FROM consolidated_table
)
select
a.location,
a.timestamp,
a.value,
max(b.value) AS max_par_previous_24_hours
FROM particulate_data AS a
LEFT JOIN particulate_data AS b
ON a.city = b.city
AND b.timestamp BETWEEN a.ts_m1day AND a.TIMESTAMP
GROUP BY 1,2,3
Then on the join between T1 and T2 you are only joining on CITY, but you will get T2 dates for EVERY T1 timestamp. Now it might be clear that these two data sources might not overlap.
But if you rolled the data up (truncated) the timestamp to the day/date then you can aggregate those values in the MAX and thus avoid the self join, that have a much simpler window, of 'max value in the this "day"' verse the last 24h of each record.
How I would write this SQL:
WITH particulate_data AS (
SELECT
location as city,
timestamp::date as day,
max(micro_value * 0.097) AS max_par
FROM consolidated_table
GROUP BY 1,2
), pollution_data AS (
SELECT
location as city,
timestamp::date as day,
max(VALUE) as max_pollution_level
FROM consolidated_table_2
GROUP BY 1,2
)
SELECT
t1.city,
t1.day,
t1.value AS particulate_level,
t2.value AS pollution_level
FROM particulate_data AS t1
JOIN pollution_data AS T2
ON t1.city = t2.city AND t1.day = t2.day

How to use columns from the left table in where clause in the right select statement in SQL Server join statement

I want to join a table with the output of select statement where I need to use a column last_order_date from Table1 in the WHERE clause in the select statement.
For example, see this screenshot:
I tried the following code:
SELECT
*
FROM
Table1 t1
LEFT JOIN
(SELECT prod_id, SUM(sales) sales_sum
FROM Table2
WHERE transaction_date BETWEEN t1.last_order_date AND CAST(GETDATE()-1 AS DATE)
GROUP BY prod_id) t2
ON t1.prod_id = t2.prod_id
Apparently, the problem is that I can't use t1.last_order_date in the WHERE CLAUSE in the right part of the join statement. Anyone can help?
You can try to use OUTER APPLY
SELECT *
FROM
Table1 t1
OUTER APPLY
(
SELECT t2.prod_id, SUM(t2.sales) sales_sum
FROM Table2 t2
WHERE t2.transaction_date BETWEEN t1.last_order_date AND CAST(GETDATE()-1 AS DATE)
AND t1.prod_id = t2.prod_id
GROUP BY t2.prod_id
) t2
From your expect result I think you can use LEFT JOIN like this.
SELECT t1.prod_id,
MIN(t1.last_order_date) last_order_date,
SUM(ISNULL(t2.sales,0)) sales
FROM
Table1 t1
LEFT JOIN Table2 t2
ON t2.transaction_date BETWEEN t1.last_order_date
AND CAST(GETDATE()-1 AS DATE)
AND t1.prod_id = t2.prod_id
GROUP BY t1.prod_id

Achieve same results without common table expression

My SQL code produce per_month, per_month min, per_month max and per_month standard deviation. But i have done it with CTE. now i want to do without CTE.
;WITH QTY_T AS(
SELECT
YEAR(SHIP_DATE) [Year],
MONTH(SHIP_DATE) [Month],
T1.PLANT AS PLANTS,
WC AS W_C,
T2.SHIP_TO AS SHIP_TO,
T1.PARTS AS PARTS,
SUM([QTY_MII]) AS [QTY_MONTH]
FROM TABLE1 T1
INNER JOIN
TABLE2 T2
ON
T2.OBD = T1.OBD
INNER JOIN
TABLE3 T3
ON T1.OBD=T3.OBD AND T1.ITEM = T3.ITEM AND T1.PLANT = T3.PLANT
INNER JOIN
TABLE4 T4
ON T3.SHIP_LBL = T4.HU_CODE AND T4.STATUS ='SHIPPED'AND T4.PLANT = T3.PLANT
GROUP BY
T1.PLANT,WC,SHIP_TO,T1.PARTS,YEAR(SHIP_DATE),MONTH(SHIP_DATE)
) SELECT
PLANTS,W_C,PARTS,SHIP_TO,
ROUND(AVG(QTY_MONTH),3) AS QTY_SHIPPED_PER_MONTH,
ROUND(MIN(QTY_MONTH),3) AS QTY_SHIPPED_PER_MONTH_MIN,
ROUND(MAX(QTY_MONTH),3) AS QTY_SHIPPED_PER_MONTH_MAX,
ROUND(AVG(QTY_MONTH),3) AS QTY_SHIPPED_PER_MONTH_AVG,
ROUND(STDEV(QTY_MONTH),3) AS QTY_SHIPPED_PER_MONTH_STD
FROM QTY_T
GROUP BY PLANTS,W_C,QTY_T.SHIP_TO,QTY_T.PARTS
you can use subquery without cte
SELECT
PLANTS,W_C,PARTS,SHIP_TO,
ROUND(AVG(QTY_MONTH),3) AS QTY_SHIPPED_PER_MONTH,
ROUND(MIN(QTY_MONTH),3) AS QTY_SHIPPED_PER_MONTH_MIN,
ROUND(MAX(QTY_MONTH),3) AS QTY_SHIPPED_PER_MONTH_MAX,
ROUND(AVG(QTY_MONTH),3) AS QTY_SHIPPED_PER_MONTH_AVG,
ROUND(STDEV(QTY_MONTH),3) AS QTY_SHIPPED_PER_MONTH_STD
FROM (
SELECT
YEAR(SHIP_DATE) [Year],
MONTH(SHIP_DATE) [Month],
T1.PLANT AS PLANTS,
WC AS W_C,
T2.SHIP_TO AS SHIP_TO,
T1.PARTS AS PARTS,
SUM([QTY_MII]) AS [QTY_MONTH]
FROM TABLE1 T1
INNER JOIN TABLE2 T2 ON T2.OBD = T1.OBD
INNER JOINTABLE3 T3 ON T1.OBD=T3.OBD AND T1.ITEM = T3.ITEM AND T1.PLANT = T3.PLANT
INNER JOIN TABLE4 T4 ON T3.SHIP_LBL = T4.HU_CODE AND T4.STATUS ='SHIPPED'AND T4.PLANT = T3.PLANT
GROUP BY
PLANT,WC,SHIP_TO,T1.PARTS,YEAR(SHIP_DATE),MONTH(SHIP_DATE)
) as QTY_T
GROUP BY PLANTS,W_C,QTY_T.SHIP_TO,QTY_T.PARTS

How to create a temp table in PostgreSQL?

I'm trying to use temp table to simplify my query. At the beginning I used WITH, which was not recognized if I'm not joining each table specifically. What's the best way to approach this query? what's wrong with this syntax?
For the account that purchased the most (in total over their lifetime as a customer) standard_qty paper, how many accounts still had more in total purchases?
create temp table t1 as (
SELECT
a.id as account_id,
SUM(o.standard_qty) as all_std_qty
FROM
accounts a
JOIN orders o ON (a.id = o.account_id)
GROUP BY
1
order by
2 desc
limit
1
)
create temp table t2 as (
SELECT
a.id as account_id,
SUM(o.total) as total_purchases
FROM
accounts a
JOIN orders o ON (a.id = o.account_id)
GROUP BY
1
)
create temp table t3 as (
SELECT
t1.account_id,
t2.total_purchases as total_pur FROM
t1
JOIN t2
ON (t1.account_id = t2.account_id)
)
SELECT
count(a.id) as count_ids
FROM
accounts a
JOIN orders o ON (a.id = o.account_id)
WHERE
o.total > t3.total_pur
I think you missed a join with table t3 and you used it on where clause thats the problem ,can you please try with below query
WITH t1 as (
SELECT
a.id as account_id,
SUM(o.standard_qty) as all_std_qty
FROM
accounts a
JOIN orders o ON (a.id = o.account_id)
GROUP BY
1
order by
2 desc
limit
1
), t2 as (
SELECT
a.id as account_id,
SUM(o.total) as total_purchases
FROM
accounts a
JOIN orders o ON (a.id = o.account_id)
GROUP BY
1
), t3 as (
SELECT
t1.account_id,
t2.total_purchases as total_pur FROM
t1
JOIN t2
ON (t1.account_id = t2.account_id)
)
SELECT
count(a.id) as count_ids
FROM
accounts a
JOIN orders o ON (a.id = o.account_id)
inner join t3 on a.id=t3.account_id
WHERE
o.total > t3.total_pur

How to find the max value without subquery

To get the below result set I wrote following SQL:
SELECT t1.FilmName,
t2.CountryName,
t1.FilmRunTimeMinutes
FROM Film as t1
INNER JOIN country as t2 on t1.FilmCountryId = t2.CountryID
WHERE t1.FilmRunTimeMinutes = ( SELECT max(t2.FilmRunTimeMinutes)
FROM film as t2
WHERE t2.FilmCountryId = t1.FilmCountryId
)
ORDER BY FilmRunTimeMinutes DESC
I read this Link and tried the same method but I could not. So how can I get the same result set using by LEFT OUTER JOIN?
Film table has those columns:
FilmId --PK
FilmName
FilmCountryId --FK
FilmRunTimeMinutes
Country table has those columns:
CountryId --PK
CountryName
Thanks in advance.
use Row_Number window function
SELECT TOP 1 WITH ties t1.FilmName,
t2.CountryName,
t1.FilmRunTimeMinutes
FROM Film AS t1
INNER JOIN country AS t2
ON t1.FilmCountryId = t2.CountryID
ORDER BY Row_number() OVER(partition BY FilmCountryId ORDER BY FilmRunTimeMinutes DESC),
FilmRunTimeMinutes DESC;
or use CTE/Sub-Select
WITH cte
AS (SELECT t1.FilmName,
t2.CountryName,
t1.FilmRunTimeMinutes,
Rn = Row_number() OVER(partition BY FilmCountryId ORDER BY FilmRunTimeMinutes DESC)
FROM Film AS t1
INNER JOIN country AS t2
ON t1.FilmCountryId = t2.CountryID)
SELECT *
FROM cte
WHERE Rn = 1
ORDER BY FilmRunTimeMinutes DESC
if you really want left join approach then
SELECT t1.FilmName,
t2.CountryName,
t1.FilmRunTimeMinutes
FROM Film AS t1
INNER JOIN country AS t2
ON t1.FilmCountryId = t2.CountryID
LEFT JOIN Film AS t3
ON t3.FilmCountryId = t2.CountryID
AND t3.FilmRunTimeMinutes > t1.FilmRunTimeMinutes
WHERE t3.FilmID IS NULL
ORDER BY FilmRunTimeMinutes DESC
Try This
;WITH Q
AS
(
SELECT
RN = ROW_NUMBER() OVER(PARTITION BY t1.FilmCountryId ORDER BY t2.FilmRunTimeMinutes DESC),
t1.FilmName,
t2.CountryName,
t1.FilmRunTimeMinutes
FROM Film as t1
INNER JOIN country as t2 on t1.FilmCountryId=t2.CountryID
ORDER BY FilmRunTimeMinutes DESC
)
SELECT
*
FROM Q
WHERE RN = 1