Combine 3 big tables with 2 joins - sql

I have three tables using complex. I run them separately without problems. Here are the queries:
TABLE A:
select
maxxx.id_demande_diffusion AS ID_DIFFUSION,
maxxx.id_notification as ID_NOTIFICATION,
maxxx.cd_organisation_client as ID_ENTITE,
maxxx.cod_entrep as ID_ENTITE_GARANTE,
maxxx.cd_canal as CD_CANAL,
maxxx.id_demande_diffusion_originale as ID_DIFFUSION_PARENT,
maxxx.ref_maquette as REF_MAQUETTE,
maxxx.qualification_canal as QUALIFICATION_CANAL,
maxxx.ger_id_pli as ID_PLI_GER,
case when maxxx.typ_mvt="S" then 1 else 0 end AS TOP_SUPP,
case when maxxx.typ_mvt = "S" then to_date(substr(maxxx.dt_capt, 1, 11)) else null end AS DT_SUPP,
minnn.typ_mvt as MIN_MVT,
maxxx.typ_mvt as MAX_MVT,
case when minnn.typ_mvt = 'C' then 'C' else 'M' end as TYP_MVT
from
(select s.id_demande_diffusion, s.dt_capt, s.typ_mvt from ${use_database}.pz_send_demande_diffusion as s
join
(select id_demande_diffusion, min(dt_capt) as dtmin from ${use_database}.pz_send_demande_diffusion group by id_demande_diffusion) as minn
on minn.id_demande_diffusion=s.id_demande_diffusion and s.dt_capt=minn.dtmin ) as minnn
join
(select s.id_demande_diffusion, s.typ_mvt, s.id_notification, s.dt_capt, s.cd_organisation_client, s.cod_entrep, s.cd_canal, s.id_demande_diffusion_originale,
s.ref_maquette, s.qualification_canal, s.ger_id_pli from ${use_database}.pz_send_demande_diffusion as s
join
(select id_demande_diffusion, max(dt_capt) as dtmax from ${use_database}.pz_send_demande_diffusion group by id_demande_diffusion) as maxx
on s.id_demande_diffusion=maxx.id_demande_diffusion and s.dt_capt=maxx.dtmax)as maxxx
on minnn.id_demande_diffusion=maxxx.id_demande_diffusion;
TABLE B:
select
maxxx.id_notification as ID_NOTIFICATION,
maxxx.cd_type_destinataire as CD_TYPE_DESTINATAIRE,
case when maxxx.cd_type_destinataire = "IDGRC" then maxxx.destinataire else null end AS ID_PERSONNE,
case when maxxx.cd_type_destinataire = "MAIL" then maxxx.destinataire else null end AS EMAIL_DESTINATAIRE,
case when maxxx.cd_type_destinataire = "SMS" then maxxx.destinataire else null end AS NUM_TEL_DESTINATAIRE,
maxxx.cd_type_evenement,
maxxx.cd_type_notification,
maxxx.cd_type_destinataire_source AS CD_TYPE_DEST_SOURCE,
case when maxxx.cd_type_destinataire_source = "IDGRC" then maxxx.destinataire_source when maxxx.cd_type_destinataire_source = "IDGRC|IDGRC" then substr(maxxx.destinataire_source, 1, locate("|", maxxx.destinataire_source)-1) else null end AS ID_PERS_DEST_SOURCE,
case when maxxx.cd_type_destinataire_source = "SIGMA" or maxxx.cd_type_destinataire_source = "CUBA" then maxxx.destinataire_source else null end AS REF_EXT_DEST_SOURCE,
case when maxxx.cd_type_destinataire_source = "MAIL" then maxxx.destinataire_source else null end AS EMAIL_DEST_SOURCE,
case when maxxx.cd_type_destinataire_source = "SMS" then maxxx.destinataire_source else null end AS NUM_TEL_DEST_SOURCE,
case when maxxx.cd_type_destinataire_source = "IDGRC|IDGRC" then substr(maxxx.destinataire_source, locate("|", maxxx.destinataire_source)+1, length(maxxx.destinataire_source)) end AS ID_PERSONNE_DEST_SOURCE_2
from
(select n.id_notification, n.destinataire, n.cd_type_evenement, n.cd_type_notification, n.destinataire_source, n.cd_type_destinataire, n.cd_type_destinataire_source from ${use_database}.pz_send_notification as n
join
(select id_notification, max(dt_capt) as dtmax from ${use_database}.pz_send_notification group by id_notification) as maxx
on n.id_notification=maxx.id_notification and n.dt_capt=maxx.dtmax) as maxxx;
TABLE C:
select
maxxx.id_communication AS ID_COMMUNICATION,
maxxx.cd_sa as CD_SYS_DIFFUSEUR,
maxxx.type_conteneur as CD_TYPE_CONTENEUR
from
(select n.id_communication, n.cd_sa, n.type_conteneur from ${use_database}.pz_send_comm_retour as n
join
(select id_communication, max(dt_capt) as dtmax from ${use_database}.pz_send_comm_retour group by id_communication) as maxx
on n.id_communication=maxx.id_communication and n.dt_capt=maxx.dtmax) as maxxx;
Is there anyway to combine the three of the using one join and one left join ?
I want o have something like
SELECT * FROM (TABLE A join TABLE B ON A.ID_NOTIFICATION=B.ID_NOTIFICATION) AS TMP LEFT JOIN TABLE C ON TMP.ID_DIFFUSION=C.ID_COMMUNICATION;
I have been trying but it always fails because of missing or misplaced parenthesis.
Thank you!

I have no idea if your queries are correct, but common table expressions (CTE) can frequently be used to keep result set joins clean.
WITH TABLE_A AS
( SELECT
FROM ...
...
),
TABLE_B AS
( SELECT
FROM ...
...
),
TABLE_C AS
( SELECT
FROM ...
...
)
SELECT *
FROM TABLE_A TA
JOIN TABLE_B TB
ON TA.Key_Field = TB.Key_Field
JOIN TABLE_C TC
ON TB.Key_Field = TC.Key_Field

Related

Need help joining table without pulling in all the data

I am looking to join three tables but only pull in data from two of them. The third table I just want to reference one column in a case statement.
SELECT c.Client_ID,
c.Last_Name,
c.First_Name,
d.Sched_Time,
CASE
WHEN d.Eventstatus = 1 THEN d.Time_In
ELSE NULL
END AS Time_In,
CASE
WHEN d.Eventstatus = 1 THEN d.Time_Out
ELSE NULL
END AS Time_Out,
CASE
WHEN d.Eventstatus = 1 THEN (d.Time_Out - d.Time_In) * 60 * 24
ELSE NULL
END AS Mins,
CASE
WHEN d.Eventstatus = 1 THEN (Round(d.Sched_Duration/15,1))
ELSE NULL
END AS Units,
CASE
WHEN d.Eventstatus = 1 THEN 'Completed'
WHEN d.Eventstatus = 10 THEN 'Cancelled: No Show'
ELSE TO_CHAR(d.Eventstatus)
END AS Appt_Status,
CASE
WHEN d.SRVID = 37913 THEN 'OASAS - Toxicology Rapid Screen'
ELSE TO_CHAR(d.SRVID)
END AS Program,
/* Here is where I want to reference the other table. I want it be WHEN d.Eventstatus = 1 AND cd.customfieldvaule != 'CSE' THEN 'Y' */
CASE
WHEN d.Eventstatus = 1 THEN 'Y'
ELSE 'N'
END AS Billable,
CASE
WHEN d.ProvidersID = 112906 THEN 'Some Name'
ELSE TO_CHAR(d.ProvidersID)
END AS Provider,
CASE
WHEN d.Activity_Desc = '<b>OASAS</b> <i>(Group)</i>' THEN 'WGACS'
WHEN d.Activity_Desc = '<b>OASAS</b> <i>(Individual)</i>' THEN 'WGACS'
ELSE d.Activity_Desc
END AS Location_ID
FROM Clients c
JOIN DAILY_LOG_DATA d
ON c.SHISID = d.SHISID
JOIN Client_Custom_Data cd
ON d.SHISID = cd.SHISID
WHERE d.SRVID IN (37913, 36186, 36185, 36180, 36179, 36168, 36167, 36182, 36181, 36173, 36172, 36177, 36176, 36175, 36174, 36178, 36184, 36183, 36188, 36187)
AND c.Last_Name != 'TEST'
AND d.Sched_Time >= TO_DATE('2020-09-28 01:00:00', 'YYYY/MM/DD HH:MI:SS')
AND d.Sched_Time < TO_DATE('2020-10-04 12:59:00', 'YYYY/MM/DD HH:MI:SS')
The Client_Custom_Data table is the one I where I do not want to pull in all the data. Let me know if I need to explain this a little better.
So when I try to join in the 3rd table I get duplicate rows. This is what I tried.
FROM Clients c
JOIN DAILY_LOG_DATA d
ON c.SHISID = d.SHISID
OUTER Apply
(SELECT cd.customfieldvalue FROM Client_Custom_Data cd
WHERE d.SHISID = cd.SHISID)
But even joining the 3rd table, I am still unsure how to use it in a case statement up above where I want to reference the 3rd table. I want it be WHEN d.Eventstatus = 1 AND cd.customfieldvaule != 'CSE' THEN 'Y
you can achieve this by selecting the values you want or with a sub query like this
SELECT
c.*,
d.*,
(SELECT case_field FROM Client_Custom_Data cd where d.SHISID = cd.SHISID) as case_field
FROM Clients c
JOIN DAILY_LOG_DATA d
ON c.SHISID = d.SHISID
FROM Clients c
JOIN DAILY_LOG_DATA d
ON c.SHISID = d.SHISID
OUTER APPLY
(
SELECT case_field FROM Client_Custom_Data cd
where d.SHISID = cd.SHISID
) A

Cannot use an aggregate or a subquery

I'm trying to group this case expression but unfortunately I'm getting an error.
select da.order_id, osl.itemid, sum(case when osl.sku = '00005' then 0 when osl.sku = '00006' then 0 else osl.price * sil.quantity end) merchcost, sum(sil.merchUnitCost * sil.quantity) cogs, sum(sil.quantity) quantity,
CASE
WHEN EXISTS (SELECT * FROM fcat.dbo.subscriptionskus a where a.itemId = osl.itemid) then 1
WHEN EXISTS (SELECT * FROM foam.dbo.subscriptionprogram b where b.orderStateLineId = osl.orderStateLine_id ) then 1
else 0
END as isAdmin,
CASE
WHEN EXISTS (SELECT * FROM fcat.dbo.subscriptionskus a where a.itemId = osl.itemid) then CAST('subscriptionskus' as varchar(MAX))
WHEN EXISTS (SELECT * FROM foam.dbo.subscriptionprogram b where b.orderStateLineId = osl.orderStateLine_id ) then CAST('subscriptionprogram' as varchar(MAX))
else 'NotListed'
END as SubTable
from #dispatchAmounts da
inner join orderstates os on os.order_id = da.order_id
inner join orderstatelines osl on osl.orderState_id = os.orderState_id
inner join shippingIntentLines sil on sil.orderStateLine_id = osl.orderStateLine_id and da.shippingIntent_nbr = sil.shippingIntent_nbr
where da.code = 'merch' and sil.quantity > 0
group by da.order_id, osl.itemid, CASE
WHEN EXISTS (SELECT * FROM fcat.dbo.subscriptionskus a where a.itemId = osl.itemid) then 1
WHEN EXISTS (SELECT * FROM foam.dbo.subscriptionprogram b where b.orderStateLineId = osl.orderStateLine_id ) then 1
else 0
END,CASE
WHEN EXISTS (SELECT * FROM fcat.dbo.subscriptionskus a where a.itemId = osl.itemid) then CAST('subscriptionskus' as varchar(MAX))
WHEN EXISTS (SELECT * FROM foam.dbo.subscriptionprogram b where b.orderStateLineId = osl.orderStateLine_id ) then CAST('subscriptionprogram' as varchar(MAX))
else 'NotListed'
END
ERROR:
Cannot use an aggregate or a subquery in an expression used for the group by list of a GROUP BY clause.
Is there anyway I can make this work?
I have a couple of suggestions. You are using select * in your case statements , but the columns referenced by '*' are not in your group by clause. To add them would be a bad way to fix the problem. Instead use ' select 1 ' and see what happens.
Second recommendation is to just not use the when exists rather use left join using sub queries and use those in your case statements.
So what I did is.
1. Removed the Case Statement on the Group By.
2. Modified the SubQueries on the Case Statement. Followed #Shiv Sidhu recommendation.
Thanks for everyone.

How to convert this Query to HiveSQL?

I am getting an error when running this query in Databricks. How to convert the following SQL Query to HiveSQL:
%sql
-- To clear table if it already exists
DROP TABLE IF EXISTS air_d;
-- Create temp table syntax
CREATE TEMP VIEW air_d AS
select *,airport_1+'-'+airport_2 as route,cast(citymarketid_1 as varchar)+cast(citymarketid_2 as varchar) as city_route
from
(select year, quarter, tkcarrier, nonstopmiles, OriginCityMarketID, DestCityMarketID, Origin, Dest, passengers, mktfare, itingeotype, bulkfare, mktmilesflown, #car_airlineid.car_flag, car_airlineid.airlineid,
case when t1.citymarketnmesrt < t2.citymarketnmesrt then origin else dest end as airport_1,
case when t1.citymarketnmesrt < t2.citymarketnmesrt then dest else origin end as airport_2,
case when t1.citymarketnmesrt < t2.citymarketnmesrt then origincitymarketid else destcitymarketid end as citymarketid_1,
case when t1.citymarketnmesrt < t2.citymarketnmesrt then destcitymarketid else origincitymarketid end as citymarketid_2
from air_db
LEFT OUTER JOIN aptcty as t1 on air_db.originairportid=t1.airportid
LEFT OUTER JOIN aptcty as t2 on air_db.destairportid=t2.airportid
LEFT OUTER JOIN #car_airlineid on air_db.tkcarrierairlineid=#car_airlineid.airlineid
where (Year>=2003 and Year<=2018) or (Year=2019 and Quarter in (1,2)) and OriginCountry = 'US' and DestCountry = 'US') as new_table;
This is the error:
Error in SQL statement: ParseException: mismatched input 'from'
expecting (line 4, pos 0)
Here is the answer:
%sql
create table air_d from (
select *,airport_1 +'-'+ airport_2 as route, string(citymarketid_1) + string(citymarketid_2) as city_route from
(
select year, quarter, tkcarrier, nonstopmiles, OriginCityMarketID, DestCityMarketID, Origin, Dest, passengers, mktfare, itingeotype, bulkfare, mktmilesflown, t3.car_flag, t3.airlineid,
case when t1.citymarketnmesrt < t2.citymarketnmesrt then origin else dest end as airport_1,
case when t1.citymarketnmesrt < t2.citymarketnmesrt then dest else origin end as airport_2,
case when t1.citymarketnmesrt < t2.citymarketnmesrt then origincitymarketid else destcitymarketid end as citymarketid_1,
case when t1.citymarketnmesrt < t2.citymarketnmesrt then destcitymarketid else origincitymarketid end as citymarketid_2
from (
(select * from air_db  ) t0
left join (select * from aptcty ) t1 on t0.originairportid=t1.airportid
left join (select * from aptcty ) t2 on t0.destairportid=t2.airportid
left join (select * from car_airlineid) t3 on t0.tkcarrierairlineid=t3.airlineid )
where ( (Year>=2003 and Year<=2018) or (Year=2019 and Quarter in (1,2))) and OriginCountry = 'US' and DestCountry = 'US'
))

Using a CASE WHEN statement and an IN (SELECT...FROM) subquery

I'm trying to create a temp table and build out different CASE WHEN logic for two different medications. In short I have two columns of interest for these CASE WHEN statements; procedure_code and ndc_code. There are only 3 procedure codes that I need, but there are about 20 different ndc codes. I created a temp.ndcdrug1 temp table with these ndc codes for medication1 and temp.ndcdrug2 for the ndc codes for medication2 instead of listing out each ndc code individually. My query looks like this:
CREATE TABLE temp.flags AS
SELECT DISTINCT a.userid,
CASE WHEN (procedure_code = 'J7170' OR ndc_code in (select ndc_code from temp.ndcdrug1)) THEN 'Y' ELSE 'N' END AS Drug1,
CASE WHEN (procedure_code = 'J7205' OR procedure_code = 'C9136' OR ndc_code in (select ndc_code from temp.ndcdrug2)) THEN 'Y' ELSE 'N' END AS Drug2,
CASE WHEN (procedure_code = 'J7170' AND procedure_code = 'J7205') THEN 'Y' ELSE 'N' END AS Both
FROM table1 a
LEFT JOIN table2 b
ON a.userid = b.userid
WHERE...
AND...
When I run this, it returns: org.apache.spark.sql.AnalysisException: IN/EXISTS predicate sub-queries can only be used in a Filter.
I could list these ndc_code values out individually, but there are a lot of them so wanted a more efficient way of going about this. Is there a way to use a sub select query like this when writing out CASE WHEN's?
Query.
CREATE TABLE temp.flags AS
SELECT DISTINCT a.userid,
CASE WHEN (
procedure_code = 'J7170' OR
(select min('1') from temp.ndcdrug1 m where m.ndc_code = a.ndc_code) = '1'
) THEN 'Y' ELSE 'N' END AS Drug1,
CASE WHEN (
procedure_code = 'J7205' OR
procedure_code = 'C9136' OR
(select min('1') from temp.ndcdrug2 m where m.ndc_code = a.ndc_code) = '1'
) THEN 'Y' ELSE 'N' END AS Drug2,
CASE WHEN (procedure_code = 'J7170' AND procedure_code = 'J7205')
THEN 'Y' ELSE 'N' END AS Both
FROM table1 a
LEFT JOIN table2 b
ON a.userid = b.userid
WHERE...
AND...

multiple errors in using with function in select statement

I'm having some problems with my select statement. When I try to execute it, it gives 3 errors
Must specify table to select from
An object or column name is missing or empty for SELECT INFO statements. verify each column
has a name. For other statements, look for empty alias names. Aliases defined...
Column name or number of supplied values does not match table definition.
WITH A AS
(SELECT ROW_NUMBER() OVER (ORDER BY
CASE
WHEN #pOrderBy = 'SortByName' THEN colPortAgentVendorNameVarchar
WHEN #pOrderBy = 'SortByCOuntry' THEN colCountryNameVarchar
WHEN #pOrderBy = 'SortByCity' THEN colCityNameVarchar
END,
colPortAgentVendorNameVarchar
) xRow, A.*
FROM
(
SELECT DISTINCT
V.colPortAgentVendorIDInt,
colPortAgentVendorNameVarchar = RTRIM(LTRIM(V.colPortAgentVendorNameVarchar)),
C.colCountryNameVarchar,
Y.colCityNameVarchar,
V.colContactNoVarchar,
V.colFaxNoVarchar,
V.colEmailToVarchar,
V.colWebsiteVarchar,
BR.colBrandIdInt,
PR.colPriorityTinyint,
colBrandCodeVarchar = RTRIM(LTRIM(BR.colBrandCodeVarchar))
FROM dbo.TblVendorPortAgent V
LEFT JOIN TblCountry C ON C.colCountryIDInt = V.colCountryIDInt
LEFT JOIN TblCity Y ON Y.colCityIDInt = V.colCityIDInt
LEFT JOIN tblBrandAirportPortAgent PR ON PR.colPortAgentVendorIDInt = V.colPortAgentVendorIDInt
AND PR.colIsActiveBit = 1
LEFT JOIN TblBrand BR ON BR.colBrandIdInt = PR.colBrandIdInt
AND BR.colIsActiveBit = 1
WHERE V.colIsActiveBit = 1
AND V.colPortAgentVendorNameVarchar LIKE '%'+ #pPortAgentVendor +'%'
AND (PR.colBrandIdInt = #pBrandID OR #pBrandID = 0)
) A
)
INSERT INTO #tempPortAgent
SELECT A.*, IsWithContract = CASE WHEN colContractIdInt IS NULL THEN CAST(0 AS BIT) ELSE CAST(1 AS BIT) END FROM A LEFT JOIN TblContractPortAgent B ON A.colPortAgentVendorIDInt = B.colPortAgentVendorIDInt
AND B.colIsActiveBit = 1
;WITH QQ AS
(
SELECT ROW_NUMBER() OVER(PARTITION BY Q.colPortAgentVendorIDInt ORDER BY xRow,
ContractStatusOrder,
colDateCreatedDate DESC
)ContractRow,*
FROM
(
SELECT DISTINCT GG.*, B.colContractIdInt, B.colContractStatusVarchar, B.colDateCreatedDate ,
ContractStatusOrder = CASE WHEN B.colContractStatusVarchar = 'Approved' THEN 1 ELSE '2' END
FROM #tempPortAgent GG LEFT JOIN TblContractPortAgent B ON GG.colPortAgentVendorIDInt = B.colPortAgentVendorIDInt
AND B.colIsActiveBit = 1
) Q
)SELECT * INTO #tempPortAgentWithContract
SELECT * FROM #tempPortAgentWithContract
I don't really know where its showing, because the errors are saying that these are inside the select statements inside.
1- Use Select Into instead of Insert into select
SELECT A.*, IsWithContract = CASE WHEN colContractIdInt IS NULL THEN CAST(0 AS BIT) ELSE CAST(1 AS BIT) END
Into #tempPortAgent
FROM A
LEFT JOIN TblContractPortAgent B ON A.colPortAgentVendorIDInt = B.colPortAgentVendorIDInt
AND B.colIsActiveBit = 1
2- SELECT * INTO #tempPortAgentWithContract is incorrect syntax. you must use following format:
SELECT * INTO #tempPortAgentWithContract From QQ