Related
In the following query between date time columns are repeated in multiple places and I need to replace them with two variables named start_date and end_date I tried multiple methods and had no luck. Please answer with a runnable query if you can. Thanks in advance.
WITH encounter
AS (SELECT patient_pomr_id AS encounter_number,
patient_id AS umrn,
doctor_id,
doctor_name
FROM eh_pomr.ehpom_patient_pomr
WHERE created_on BETWEEN timestamp '2022-08-01 00:00:00' AND
timestamp '2022-08-30 00:00:00'),
chief_complain
AS (SELECT chief_complain,
patient_pomr_id
FROM eh_pomr.ehpom_chief_complain),
admission
AS (SELECT admitted_date,
patient_id,
ADMISSION_ID,
admission_type AS encounter_type,
patient_pomr_id,
hospital_id,
clinic_name
FROM ad_request.admlm_admission
WHERE direct_admission IS NULL
AND is_from_er != 1
AND created_date BETWEEN timestamp '2022-08-01 00:00:00' AND
timestamp '2022-08-30 00:00:00'),
ip_create_admission
AS (SELECT patientpomr,
dbms_lob.Substr(admitting_diagnosis, 2000, 1) diagnosis
FROM eh_ip.ehip_create_admission
WHERE created_on BETWEEN timestamp '2022-08-01 00:00:00' AND
timestamp '2022-08-30 00:00:00'),
discharge
AS (SELECT CASE
WHEN dischargevia = 1 THEN 'Private Vehicle'
WHEN dischargevia = 2 THEN 'Ambulatory'
WHEN dischargevia = 3 THEN 'Other'
ELSE ' Unknown'
END AS dischargevia,
pomrid,
modifiedon AS discharge_date,
conditionondischarge AS discharge_speciality
FROM eh_ndischarge.ehipd_dischargedetails
WHERE isactive = 1),
death
AS (SELECT dbms_lob.Substr(underlying_cause, 2000, 1) cause_of_death,
patientpomr
FROM eh_ip.ehip_death_detail),
empi
AS (SELECT id_number,
mrn
FROM rf_empi.emred_patients),
vitals
AS (SELECT PR.id,
PR.patient_pomr_id,
FS.field_code,
FS.value
FROM eh_commmon.ehcom_patient_record PR
left join eh_commmon.ehcom_flow_sheet_data FS
ON PR.id = FS.patient_record_id
WHERE PR.flow_sheet_code = 'vitals'
AND FS.time_stamp BETWEEN timestamp '2022-08-01 00:00:00' AND
timestamp '2022-08-30 00:00:00'),
leaves
AS (SELECT requesting_days,
visit_id,
ADM.PATIENT_POMR_ID
FROM ad_request.admlm_med_leave_final_print FP
left join ad_request.admlm_medical_leave ML
ON FP.request_id = ML.request_id
LEFT JOIN AD_REQUEST.ADMLM_ADMISSION ADM
ON ML.VISIT_ID = ADM.ADMISSION_ID
WHERE FP.leave_status = 5
AND ML.created_date BETWEEN timestamp '2022-08-01 00:00:00' AND
timestamp '2022-08-30 00:00:00'
AND ML.REQUESTING_DAYS IS NOT NULL)
SELECT DISTINCT encounter.encounter_number,
admission.encounter_type,
empi.id_number AS Patient_National_ID,
admission.patient_id AS umrn,
admission.admitted_date,
admission.hospital_id,
admission.clinic_name AS admission_speciality,
chief_complain.chief_complain,
leaves.requesting_days AS Duration_of_leave,
encounter.doctor_id,
encounter.doctor_name,
ip_create_admission.diagnosis,
discharge.dischargevia,
discharge.discharge_date,
discharge_speciality,
admission.clinic_name AS clinic,
death.cause_of_death
-- VITALS.field_code,
-- VITALS.value
FROM admission
left join empi
ON admission.patient_id = empi.mrn
left join encounter
ON admission.patient_pomr_id = encounter.encounter_number
left join ip_create_admission
ON admission.patient_pomr_id = ip_create_admission.patientpomr
--admission_request_numbrer with adt
left join discharge
ON admission.patient_pomr_id = discharge.pomrid
left join death
ON admission.patient_pomr_id = death.patientpomr
left join chief_complain
ON admission.patient_pomr_id = chief_complain.patient_pomr_id
left join leaves
ON admission.patient_pomr_id = leaves.PATIENT_POMR_ID
I tried adding with begin and end tags with declare key words but had no luck. Also is there a special way to insert variable using in to keyword when we need to insert it for between?
Include yet another CTE (I'm calling it dates) which is then cross-joined in another CTEs which utilize these values. Something like this:
WITH
dates (start_date, end_date) --> this is new CTE
AS (SELECT timestamp '2022-08-01 00:00:00',
timestamp '2022-08-30 00:00:00'
FROM dual),
encounter
AS (SELECT patient_pomr_id AS encounter_number,
patient_id AS umrn,
doctor_id,
doctor_name
FROM eh_pomr.ehpom_patient_pomr
CROSS JOIN dates d --> it is used here
WHERE created_on BETWEEN d.start_date AND d.end_date), --> like this
chief_complain
AS ..
This is from MSSQL, you can try converting this through OracleSQL
#dateFrom datetime = null,
#dateTo datetime = null,
DATEADD(D, 0, DATEDIFF(D, 0, #DateFrom))
AND DATEADD(D, 0, DATEDIFF(D, 0, #DateTo))
I'm using PostgreSQL 9.5.19, DBeaver 6.3.4
I have a table where one row is - user's name, place he attended, time when he was there
I need to select all pairs of places where any user was (if user was at place a and place b i need row like this: user, place a, place b, time at place a, time at place b)
The ponds table:
CREATE TABLE example.example (
tm timestamp NOT NULL,
place_name varchar NOT NULL,
user_name varchar NOT NULL
);
Some sample data:
INSERT INTO example.example (tm, place_name, user_name)
values
('2020-02-25 00:00:19.000', 'place_1', 'user_1'),
('2020-03-25 00:00:19.000', 'place_2', 'user_1'),
('2020-02-25 00:00:19.000', 'place_1', 'user_2'),
('2020-03-25 00:00:19.000', 'place_1', 'user_3'),
('2020-02-25 00:00:19.000', 'place_2', 'user_3');
I'm trying this script:
select
t.user_name
,t.place_name as r1_place
,max(t.tm) as r1_tm
,t2.place_name as r2_place
,min(t2.tm) as r2_tm
from example.example as t
join example.example as t2 on t.user_name = t2.user_name
and t.tm < t2.tm
and t.place_name <> t2.place_name
where t.tm between '2020-02-25 00:00:00' and '2020-03-25 15:00:00'
and t2.tm between '2020-02-25 00:00:00' and '2020-03-25 15:00:00'
group by t.user_name
, t.place_name
, t2.place_name
Seems like it gives me the right result, but it works really slow.
Can I optimize it somehow?
I would suggest trying indexes. For this query:
select t.user_name, t.place_name as r1_place, max(t.tm) as r1_tm,
t2.place_name as r2_place, min(t2.tm) as r2_tm
from schema.table t join
schema.table t2
on t.user_name = t2.user_name and
t.tm < t2.tm and
t.place_name <> t2.place_name
where t.tm between '2020-03-25 00:00:00' and '2020-03-25 15:00:00' and
t2.tm between '2020-03-25 00:00:00' and '2020-03-25 15:00:00'
group by t.user_name, t.place_name, t2.place_name
I would suggest an index on (tm, user_name, place_name) and on (user_name, tm, place_name) -- yes, both, one for each reference.
Colleague helped me to create window function:
select
subq.*
,EXTRACT(EPOCH FROM (subq.next_tm - subq.tm)) as seconds_diff
from (
select
t1.user_name,
t1.place_name,
t1.tm,
lead(t1.place_name) over w as next_place_name,
lead(t1.tm) over w as next_tm
from example.example as t1
window w as (partition by t1.user_name order by tm asc)
)subq
where
next_place_name is not null
and next_tm is not null
and place_name <> next_place_name
;
When I run the below query, I get the error "Invalid column reference: cnt". Any suggestions would be great !!
select count(customer) as cnt from (
select customer, concat(visid, lowid), count(name)
from tab1 where date_time between '2017-05-01 00:00:00' and '2017-05-31 23:59:59' and name in ('payment: Complete', 'check: Complete')
group by evar71, concat(visid, lowid)) t1
where cnt > 1;
Another way to do it.
select count(customer) as cnt from (
select customer, concat(visid, lowid), count(name)
from tab1 where date_time between '2017-05-01 00:00:00' and '2017-05-31 23:59:59' and name in ('payment: Complete', 'check: Complete')
group by evar71, concat(visid, lowid)) t1
having count(customer) > 1;
WHERE filter applied before aggregation
that is why where cnt > 1 does not work. There is HAVING keyword which introduces a condition on aggregations, it works as filter after aggregation.
select count(customer) cnt
...
where rows_filter_condition_here --before aggregation
having count(customer) > 1 --aggregation results filter
order by cnt desc --this works after aggregation
I think hive prefers aliases in the group by. In addition, several column aliases are not correct:
select count(customer) as cnt
from (select customer, concat(visid, lowid) as ids, count(name) as cc
from tab1
where date_time >= '2017-05-01' and date_time < '2017-06-01' and
name in ('payment: Complete', 'check: Complete')
group by customer, ids
) t1
where cc > 1;
I have a few queries that I would like to combine into ONE query in order to not have to call out to the server multiple times.
An example of the queries I am using:
SELECT COUNT(*) AS mailCount1
FROM [WebContact].[dbo].[memberEmails]
WHERE contactdatetime > '01/01/06'
AND contactdatetime < '02/01/06'
SELECT COUNT(*) AS mailCount2
FROM [WebContact].[dbo].[otherEmails]
WHERE contactdatetime > '01/01/06'
AND contactdatetime < '02/01/06'
SELECT COUNT(*) AS mailCount3
FROM [WebContact].[dbo].[memberEmails]
WHERE contactdatetime > '02/01/06'
AND contactdatetime < '03/01/06'
SELECT COUNT(*) AS mailCount4
FROM [WebContact].[dbo].[otherEmails]
WHERE contactdatetime > '02/01/06'
AND contactdatetime < '03/01/06'
etc etc...
So as the examples above, only thing that changes are:
The FROM (memberEmails & otherEmails)
The > & < months (01/01/06, 02/01/06 | 02/01/06, 03/01/06 | etc...)
Is this possible to do with a single query?
First, use group by and just use two queries:
select year(contactdatetime) as yyyy, month(contactdatetime) as mm, count(*)
from WebContact].[dbo].[memberEmails]
group by year(contactdatetime), month(contactdatetime);
and:
select year(contactdatetime) as yyyy, month(contactdatetime) as mm, count(*)
from WebContact].[dbo].[otherEmails]
group by year(contactdatetime), month(contactdatetime);
Then, if you like, you can combine these into a single query:
select coalesce(me.yyyy, oe.yyyy) as yyyy, coalesce(me.mm, oe.mm) as mm,
coalesce(me.cnt, 0) as memberemailcnt,
coalesce(oe.cnt, 0) as otheremailcnt
from (select year(contactdatetime) as yyyy, month(contactdatetime) as mm, count(*) as cnt
from WebContact].[dbo].[memberEmails]
group by year(contactdatetime), month(contactdatetime)
) me full outer join
(select year(contactdatetime) as yyyy, month(contactdatetime) as mm, count(*) as cnt
from WebContact].[dbo].[otherEmails]
group by year(contactdatetime), month(contactdatetime)
) oe
on me.yyyy = oe.yyyy and me.mm = oe.mm;
A full outer join is not necessary if both tables have data for all months.
declare #emailCount table(tablename varchar(20), year int, month int, qty int)
insert into #emailCount
select 'memberEmails', year(contactdatetime), month(contactdatetime), count(*)
from [WebContact].[dbo].[memberEmails]
group by year(contactdatetime), month(contactdatetime)
insert into #emailCount
select 'otherEmails',year(contactdatetime), month(contactdatetime), count(*)
from [WebContact].[dbo].[otherEmails]
group by year(contactdatetime), month(contactdatetime)
select tablename, year, month, qty from #emailCount
Add WHERE clause if needed to restrict date ranges. (edit- simplified to use year() and month() functions.)
I haven't check the syntax or performance but you can do something like this,
WITH cte (
countvalue
,description
)
AS (
SELECT COUNT(*)
,'mailCount1'
FROM [WebContact].[dbo].[memberEmails]
WHERE contactdatetime > '01/01/06'
AND contactdatetime < '02/01/06'
UNION ALL
SELECT COUNT(*)
,'mailCount2'
FROM [WebContact].[dbo].[otherEmails]
WHERE contactdatetime > '01/01/06'
AND contactdatetime < '02/01/06'
UNION ALL
SELECT COUNT(*)
,'mailCount3'
FROM [WebContact].[dbo].[memberEmails]
WHERE contactdatetime > '02/01/06'
AND contactdatetime < '03/01/06'
UNION ALL
SELECT COUNT(*)
,'mailCount4'
FROM [WebContact].[dbo].[otherEmails]
WHERE contactdatetime > '02/01/06'
AND contactdatetime < '03/01/06'
)
SELECT mailCount1
,mailCount2
,mailCount3
,mailCount4
FROM (
SELECT countvalue
,description
FROM cte
) d
pivot(max(countvalue) FOR description IN (mailCount1, mailCount2, mailCount3, mailCount4)) piv;
Hope this helps..
This query is working but it seems to take longer time than usual to retrieve the data. Is there a better solution to optimize this query? I need to get all PRD_ID from T1 and T2 even if there is no match with S1 and S2.
SELECT DISTINCT T.PRD_ID T.AMOUNT, T.DATE, T.REGION
FROM
(
SELECT DISTINCT T1.PRD_ID, T1.PRD_CODE, S1.ORDER_DATE AS DATE, T1.REGION
FROM
(
(SELECT PRD_ID, PRD_CODE,AMOUNT,REGION
FROM PRODUCT
WHERE REGION='CA') T1
LEFT JOIN SERVICE_1 S1
ON S1.PRD_ID = T1.PRD_ID
AND S1.PRD_CODE= T1.PRD_CODE
AND S1.AMT = T1.AMOUNT
AND S1.ORDER_DATE >= '01/01/2015'
AND S1.ORDER_DATE <= '02/28/2015'
)
UNION ALL
SELECT DISTINCT T2.PRD_ID, T2.PRD_CODE, S2.ACCT_CALENDAR_DT AS DATE, T2.REGION
FROM
(
(SELECT PRD_ID, PRD_CODE,AMOUNT,REGION
FROM PRODUCT
WHERE REGION='IL') T2
LEFT JOIN SERVICE_2 S2
ON S2.PRD_ID = T2.PRD_ID
AND S2.PRD_CODE= T2.PRD_CODE
AND S2.AMT = T2.AMOUNT
AND S2.ACCT_CALENDAR_DT >= '20150101'
AND S2.ACCT_CALENDAR_DT <= '20150228'
)
) T
ORDER BY REGION, ORDER_DATE DESC, PRD_ID
I can't see why you need all these (3!) levels of nested tables. The following should be equivalent:
SELECT DISTINCT
T1.PRD_ID, T1.PRD_CODE, S1.ORDER_DATE AS DATE, T1.REGION
FROM
PRODUCT T1
LEFT JOIN SERVICE_1 S1
ON S1.PRD_ID = T1.PRD_ID
AND S1.PRD_CODE= T1.PRD_CODE
AND S1.AMT = T1.AMOUNT
AND S1.ORDER_DATE >= DATE '2015-01-01' -- converted '01/01/2015'
AND S1.ORDER_DATE <= DATE '2015-02-28' -- converted '02/28/2015'
WHERE T1.REGION = 'CA'
UNION ALL -- No need for DISTINCT here. The Region
-- is different between the 2 parts.
SELECT DISTINCT
T2.PRD_ID, T2.PRD_CODE, S2.ACCT_CALENDAR_DT AS DATE, T2.REGION
FROM
PRODUCT T2
LEFT JOIN SERVICE_2 S2
ON S2.PRD_ID = T2.PRD_ID
AND S2.PRD_CODE= T2.PRD_CODE
AND S2.AMT = T2.AMOUNT
AND S2.ACCT_CALENDAR_DT >= DATE '2015-01-01'
AND S2.ACCT_CALENDAR_DT <= DATE '2015-02-28'
WHERE T2.REGION = 'IL'
ORDER BY REGION, DATE DESC, PRD_ID ;
or:
SELECT DISTINCT
T1.PRD_ID, T1.PRD_CODE, S1.ORDER_DATE AS DATE, 'CA' AS REGION
FROM
( SELECT PRD_ID, PRD_CODE, AMOUNT
FROM PRODUCT
WHERE REGION = 'CA'
) T1
LEFT JOIN SERVICE_1 S1
ON S1.PRD_ID = T1.PRD_ID
AND S1.PRD_CODE= T1.PRD_CODE
AND S1.AMT = T1.AMOUNT
AND S1.ORDER_DATE >= DATE '2015-01-01'
AND S1.ORDER_DATE <= DATE '2015-02-28'
UNION ALL
SELECT DISTINCT
T2.PRD_ID, T2.PRD_CODE, S2.ACCT_CALENDAR_DT AS DATE, 'IL' AS REGION
FROM
( SELECT PRD_ID, PRD_CODE, AMOUNT
FROM PRODUCT
WHERE REGION = 'IL'
) T2
LEFT JOIN SERVICE_2 S2
ON S2.PRD_ID = T2.PRD_ID
AND S2.PRD_CODE= T2.PRD_CODE
AND S2.AMT = T2.AMOUNT
AND S2.ACCT_CALENDAR_DT >= DATE '2015-01-01'
AND S2.ACCT_CALENDAR_DT <= DATE '2015-02-28'
ORDER BY REGION, DATE DESC, PRD_ID ;