PostgreSQL CROSS JOIN query issue - sql

I would like to improve the following query:
WITH calendar as (
SELECT d
FROM generate_series(
'2015-01-01'::timestamp, '2020-12-01'::timestamp, interval '1 month'
) d
)
SELECT c.d::date AS ord_date,
n.id,
coalesce(nb_cit,0) as nb_cit
FROM (SELECT distinct t.id from "data".taxonomie t where id = '30092') n CROSS JOIN
calendar c
left join
(select date_trunc('month', i2.date_d) as mon, count(c2.id) as nb_cit
from "data".inventaire i2, "data".citation c2 where i2.id = c2.id_inv and c2.id_taxo = '30092'
group by mon) s
on c.d = s.mon
It works when I set an id (in the from and left join). But the idea is to have the result for all the id.
If I don't set this identifier, I get the same result whatever the identifier is.
I think I have to take out the "count(c2.id)" from the left join but I can't figure how to do it.
Help would be welcome !
Best Regards,
Mathias

I think you just want to aggregate by the id as well in the subquery:
SELECT c.d::date AS ord_date,
n.id,
coalesce(s.nb_cit, 0) as nb_cit
FROM (SELECT DISTINCT t.id FROM "data".taxonomie t) n CROSS JOIN
calendar c LEFT JOIN
(SELECT c2.id_taxo, date_trunc('month', i2.date_d) as mon, count(c2.id) as nb_cit
FROM "data".inventaire i2 JOIN
"data".citation c2
ON i2.id = c2.id_inv
GROUP BY c2.id_taxo, mon
) s
ON c.d = s.mon AND n.id = s.id_taxo;

Related

Getting the closest date to a given date in BigQuery with no subqueries

Trying to get back to the currency conversion rate at the moment the purchase happened, so the revenue calculation is exact.
BigQuery doesn't support subqueries, so doing something like these 2 queries won't work (they do work on the DB):
Query 1
SELECT
b.id,
j.id,
j.currency,
j.created,
j.status,
j.pricev,
j.price,
(
select
erc.rfbcurr
from
test.exchangerates_exchangeratechange erc
join
test.exchangerates_exchangerate er on er.id = erc.exchangerateid
where
erc.created < j.created and er.currencycode = j.currency
order by
erc.created desc
limit 1
) as exchangerate
FROM
transferz-st.test.bs_b b
JOIN
transferz-st.test.bs_j j on j.bid = b.id;
Query 2
SELECT
b.id,
j.id,
j.currency,
j.created,
j.status,
j.pricev,
j.price,
j.currency,
erc.rfbcurr
FROM
b
JOIN
j on j.bid = b.id
JOIN
exchangerate er on er.currencycode = j.currency
JOIN
exchangeratechange erc on erc.exchangerateid = er.id
GROUP BY
b.id,
j.id,
j.currency,
j.created,
j.status,
j.pricevat,
j.price,
j.currency,
erc.rfbcurr,
erc.id
HAVING
erc.id = (select erc2.id from exchangeratechange erc2 join exchangerate er2 on er2.id = rc2.id where erc2.created < j.created and er2.currencycode = j.currency order by erc2.created desc limit 1);
Could anybody suggest a solution?
You can try using a JOIN to bring in the currency rates. Use LEAD() to get a date range:
WITH er AS (
select erc.rfbcurr, er.currencycode, er.created,
lead(er.created) over (partition by er.currencycode order by er.created) as next_created
from test.exchangerates_exchangeratechange erc join
test.exchangerates_exchangerate er
on er.id = erc.exchangerateid
)
SELECT . . ., er.rfbcurr as exchange_rage
FROM transferz-st.test.bs_b b JOIN
transferz-st.test.bs_j j
ON j.bid = b.id JOIN
er
ON er.currencycode = j.currency AND
j.created >= er.created AND
(er.next_created IS NULL OR j.created < er.next_created);

Count with row_number function SQL CTE

I have the below CTEs that work perfectly, but I want to count the "cl.memb_dim_id" by "cl.post_date" but I am not sure how to do that? When adding in the count function I get an error that highlights the ' row number' so I am assuming I cant have both order and group together ????
WITH
DATES AS
(
select to_date('01-jan-2017') as startdate,to_date('02-jan-2017') as enddate
from dual
),
Claims as (select distinct
cl.memb_dim_id,
row_number () over (partition by cl.Claim_number order by cl.post_date desc) as uniquerow,
cl.Claim_number,
cl.post_date,
ct.claim_type,
ap.claim_status_desc,
dc.company_desc,
dff.io_flag_desc,
pr.product_desc,
cl.prov_dim_id,
cl.prov_type_dim_id
from dw.fact_claim cl
inner join dates d
on 1=1
and cl.post_date >= d.startdate
and cl.post_date <= d.enddate
and cl.provider_par_dim_id in ('2')
and cl.processing_status_dim_id = '1'
and cl.company_dim_id in ('581','585','586','589','590','591','588','592','594','601','602','603','606','596','598','597','579','599','578','577','573','574','576','575')
left join dw.DIM_CLAIM_STATUS ap
on cl.claim_status_dim_id = ap.claim_status_dim_id
left join dw.dim_claim_type ct
on cl.claim_type_dim_id = ct.claim_type_dim_id
and cl.claim_type_dim_id in ('1','2','6','7')
left join dw.DIM_COMPANY dc
on cl.company_dim_id = dc.company_dim_id
left join dw.DIM_IO_FLAG dff
on cl.io_flag_dim_id = dff.io_flag_dim_id
left join dw.dim_product pr
on cl.product_dim_id = pr.product_dim_id
)
Select * from claims where uniquerow ='1'
First, does this work?
count(cl.memb_dim_id) over (partition by cl.Claim_number, cl.post_date) as cnt,
Second, it is strange to be using analytic functions with select distinct.

Error in on clause comparison - Big Query

The following big query code gives the following error.
select
selected_date date,
pp.name property,
bb.bookings bb,
av.available vailable,
from
(SELECT DATE(DATE_ADD(TIMESTAMP("2017-10-01"), pos - 1, "DAY")) AS selected_date
FROM (
SELECT ROW_NUMBER() OVER() AS pos, *
FROM (FLATTEN((
SELECT SPLIT(RPAD('', 1 + DATEDIFF(TIMESTAMP(CURRENT_DATE()), TIMESTAMP("2017-10-01")), '.'),'') AS h
FROM (SELECT NULL)),h
)))) v
cross join
(select p.name name from [roomsproperties.properties] p where p.name not like '%test%' group by name) as pp
left join
(select sum(b.rooms) bookings,
p.name property,
b.checkin checkin,
b.checkout checkout
from [bookings.bookings] b
left join [roomsproperties.rooms] r on r.id = b.room_id
left join [roomsproperties.properties] p on p.id = r.property_id
where p.name not like '%test%'
and b.status not in('Rejected', 'Cancelled - By customer', 'OTP Not Varified')
group by property,checkin,checkout
) as bb on pp.name = bb.property and (v.selected_date between bb.checkin and bb.checkout)
left join
(select sum(r.quantity) available,
p.name property,
date(r.created_at) date
from [roomsproperties.rooms] r
left join [roomsproperties.properties] p on p.id = r.property_id
group by property, date
) av on pp.name = av.property and v.selected_date >= av.date
The error is,
Error: ON clause must be AND of = comparisons of one field name from each table, with all field names prefixed with table name. Consider using Standard SQL
Can any one help
You should try:
(select ...) as bb on pp.name = bb.property
WHERE v.selected_date between bb.checkin and bb.checkou
and:
(select ...) as av on pp.name = av.property
WHERE v.selected_date >= av.date

SQL - Using Subquery with Aggregate Function

I'm trying to return the last time entry posted for a particular client, and the case (matter) number associated to that entry. The relationship is one client has many matters, and one matter has many time entries.
I have the code below, but it obviously returns all the matters and not just the one associated to the time entry. I understand why, but tie myself in knots when trying to correct it. Any help much appreciated.
select c.CLIENT_CODE,
c.CLIENT_NAME,
c.OPEN_DATE,
mp.EMPLOYEE_NAME,
MAX(tt.TRAN_DATE)[Last Time],
m.MATTER_NUMBER
from HBM_CLIENT c
join HBM_MATTER m
on m.CLIENT_UNO=c.CLIENT_UNO
left join TAT_TIME tt
on tt.MATTER_UNO=m.MATTER_UNO
left join HBM_PERSNL mp
on mp.EMPL_UNO=c.RESP_EMPL_UNO
where c.STATUS_CODE = 'Targ'
group by c.CLIENT_CODE,
c.CLIENT_NAME,
c.OPEN_DATE,
mp.EMPLOYEE_NAME,
m.MATTER_NUMBER
order by OPEN_DATE
Completely untested but in the right direction
select
<whatever>
from
HBM_CLIENT c
join HBM_MATTER m on
m.CLIENT_UNO = c.CLIENT_UNO
join TAT_TIME tt on
tt.MATTER_UNO = m.MATTER_UNO AND
tt.tran_date = (
select max(tran_date)
from TAT_TIME
where matter_uno = m.matter_uno)
where
m.CLIENT_UNO = ? and
c.STATUS_CODE = 'Targ'
One way to do this is using row_number(). I think the following will do what you want:
select c.CLIENT_CODE, c.CLIENT_NAME, c.OPEN_DATE, mp.EMPLOYEE_NAME,
tt.TRAN_DATE as [Last Time], m.MATTER_NUMBER
from HBM_CLIENT c join
(select m.*, tt.TRAN_DATE,
row_number() over (partition by m.CLIENT_UNO
order by tt.TRAN_DATE desc
) as seqnum
from HBM_MATTER m LEFT JOIN
TAT_TIME tt
ON tt.MATTER_UNO = m.MATTER_UNO
) m
ON m.CLIENT_UNO = c.CLIENT_UNO and seqnum = 1 left join
HBM_PERSNL mp
on mp.EMPL_UNO=c.RESP_EMPL_UNO
where c.STATUS_CODE = 'Targ';
I don't think you need the group by, unless the other joins create duplicates.

simplifying query that has multiple WITH and multiple subqueries

It is bothering me that for a simple query, I have to write out so many sub-selects and WITH statements.
The question is: are there basic guidelines on how to simplify queries that have subqueries?
Here's my query:
WITH cte_min
AS (SELECT a.client_id,
a.specimen_source,
a.received_date
FROM f_accession_daily a
JOIN (SELECT DISTINCT f.client_id,
f.received_date,
f.accession_daily_key
FROM F_ACCESSION_DAILY f
JOIN (SELECT CLIENT_ID,
Min(received_date) MinRecDate
FROM F_ACCESSION_DAILY
GROUP BY CLIENT_ID) i
ON f.CLIENT_ID = i.CLIENT_ID
AND f.RECEIVED_DATE = i.MinRecDate) b
ON a.ACCESSION_DAILY_KEY = b.ACCESSION_DAILY_KEY),
cte_max
AS (SELECT a.client_id,
a.specimen_source,
a.received_date
FROM f_accession_daily a
JOIN (SELECT DISTINCT f.client_id,
f.received_date,
f.accession_daily_key
FROM F_ACCESSION_DAILY f
JOIN (SELECT CLIENT_ID,
Max(received_date) MaxRecDate
FROM F_ACCESSION_DAILY
GROUP BY CLIENT_ID) i
ON f.CLIENT_ID = i.CLIENT_ID
AND f.RECEIVED_DATE = i.MaxRecDate) b
ON a.ACCESSION_DAILY_KEY = b.ACCESSION_DAILY_KEY),
cte_est
AS (SELECT DISTINCT client_id,
MLIS_DATE_ESTABLISHED
FROM D_CLIENT
WHERE REC_ACTIVE_FLG = 1
AND MLIS_DATE_ESTABLISHED IS NOT NULL)
SELECT DISTINCT f.client_id,
cmin.specimen_source,
cmin.received_date,
cmax.specimen_source,
cmax.received_date,
cest.MLIS_DATE_ESTABLISHED
FROM F_ACCESSION_DAILY f
LEFT JOIN cte_max cmax
ON cmax.CLIENT_ID = f.CLIENT_ID
LEFT JOIN cte_min cmin
ON cmin.CLIENT_ID = f.CLIENT_ID
LEFT JOIN cte_est cest
ON cest.CLIENT_ID = f.CLIENT_ID
I am not asking necessarily for you to do the simplification yourself (although I would be very grateful for this), rather I am asking for general guidelines/directions on re-writing this query to be more elegant.
Does this look any better?
;WITH minmax AS (
SELECT client_id, specimen_source, received_date,
RMin = row_number() over (partition by Client_id
order by received_date, accession_daily_key),
RMax = row_number() over (partition by Client_id
order by received_date desc, accession_daily_key desc)
FROM F_ACCESSION_DAILY
)
SELECT f.client_id,
max(case when rmin=1 then f.specimen_source end),
max(case when rmin=1 then f.received_date end),
max(case when rmax=1 then f.specimen_source end),
max(case when rmax=1 then f.received_date end),
D.MLIS_DATE_ESTABLISHED
FROM minmax f
LEFT JOIN D_CLIENT D ON D.REC_ACTIVE_FLG = 1 AND D.MLIS_DATE_ESTABLISHED IS NOT NULL
WHERE 1 in (f.rmin, f.rmax)
GROUP BY f.client_id, D.MLIS_DATE_ESTABLISHED
50 rows reporting 5 values and in all of that only two tables are referenced.
In the first CTE you have 4 joins (or virtual joins) to the same table and no other table involved reporting 3 columns. Don't know the key so cannot conclude it can be reduced.
If a cte is not reference more than once then it does not result in less lines of code.
For one this cte can be replaced with less code.
cte_est
AS (SELECT DISTINCT client_id,
MLIS_DATE_ESTABLISHED
FROM D_CLIENT
WHERE REC_ACTIVE_FLG = 1
AND MLIS_DATE_ESTABLISHED IS NOT NULL)
...
cest.MLIS_DATE_ESTABLISHED
...
LEFT JOIN cte_est cest
ON cest.CLIENT_ID = f.CLIENT_ID
reduces to
D_CLIENT.MLIS_DATE_ESTABLISHED
...
LEFT JOIN D_CLIENT
ON D_CLIENT.CLIENT_ID = f.CLIENT_ID
AND D_CLIENT.REC_ACTIVE_FLG = 1
AND D_CLIENT.MLIS_DATE_ESTABLISHED IS NOT NULL
While I am not sure if everyone would consider this simpler and/or easier to read, this is how I would do it:
WITH
cte_MaxMinRecvd As
(
SELECT CLIENT_ID,
Min(received_date) MinRecDate,
Max(received_date) MaxRecDate
FROM F_ACCESSION_DAILY
GROUP BY CLIENT_ID
)
, cte_MaxMinDaily As
(
SELECT *
FROM F_ACCESSION_DAILY f
JOIN cte_MaxMinRecvd i ON f.CLIENT_ID = i.CLIENT_ID
)
, cte_min AS
(
SELECT a.client_id,
a.specimen_source,
a.received_date
FROM F_ACCESSION_DAILY a
WHERE EXISTS(
SELECT *
FROM cte_MaxMinDaily f
WHERE f.RECEIVED_DATE = f.MinRecDate
AND a.ACCESSION_DAILY_KEY = f.ACCESSION_DAILY_KEY
)
)
, cte_max AS
(
SELECT a.client_id,
a.specimen_source,
a.received_date
FROM f_accession_daily a
WHERE EXISTS(
SELECT *
FROM cte_MaxMinDaily f
WHERE f.RECEIVED_DATE = f.MinRecDate
AND a.ACCESSION_DAILY_KEY = f.ACCESSION_DAILY_KEY
)
)
SELECT DISTINCT
f.client_id,
cmin.specimen_source,
cmin.received_date,
cmax.specimen_source,
cmax.received_date,
cest.MLIS_DATE_ESTABLISHED
FROM F_ACCESSION_DAILY f
LEFT JOIN cte_max cmax ON cmax.CLIENT_ID = f.CLIENT_ID
LEFT JOIN cte_min cmin ON cmin.CLIENT_ID = f.CLIENT_ID
LEFT JOIN D_CLIENT cest ON cest.CLIENT_ID = f.CLIENT_ID
AND cest.REC_ACTIVE_FLG = 1
AND cest.MLIS_DATE_ESTABLISHED IS NOT NULL
Mainly what I did was to
Turn most of the subqueries into CTEs, where applicable,
Merge the Min and Max subqueries together, and
Change the DISTINCT subqueries into EXISTS subqueries, which can be simpler (and usually perform better)
Ooops, I also got rid of the cte_est CTE as Blam suggested..