How to optimize selection of pairs from one column of the table? - sql

I'm using PostgreSQL 9.5.19, DBeaver 6.3.4
I have a table where one row is - user's name, place he attended, time when he was there
I need to select all pairs of places where any user was (if user was at place a and place b i need row like this: user, place a, place b, time at place a, time at place b)
The ponds table:
CREATE TABLE example.example (
tm timestamp NOT NULL,
place_name varchar NOT NULL,
user_name varchar NOT NULL
);
Some sample data:
INSERT INTO example.example (tm, place_name, user_name)
values
('2020-02-25 00:00:19.000', 'place_1', 'user_1'),
('2020-03-25 00:00:19.000', 'place_2', 'user_1'),
('2020-02-25 00:00:19.000', 'place_1', 'user_2'),
('2020-03-25 00:00:19.000', 'place_1', 'user_3'),
('2020-02-25 00:00:19.000', 'place_2', 'user_3');
I'm trying this script:
select
t.user_name
,t.place_name as r1_place
,max(t.tm) as r1_tm
,t2.place_name as r2_place
,min(t2.tm) as r2_tm
from example.example as t
join example.example as t2 on t.user_name = t2.user_name
and t.tm < t2.tm
and t.place_name <> t2.place_name
where t.tm between '2020-02-25 00:00:00' and '2020-03-25 15:00:00'
and t2.tm between '2020-02-25 00:00:00' and '2020-03-25 15:00:00'
group by t.user_name
, t.place_name
, t2.place_name
Seems like it gives me the right result, but it works really slow.
Can I optimize it somehow?

I would suggest trying indexes. For this query:
select t.user_name, t.place_name as r1_place, max(t.tm) as r1_tm,
t2.place_name as r2_place, min(t2.tm) as r2_tm
from schema.table t join
schema.table t2
on t.user_name = t2.user_name and
t.tm < t2.tm and
t.place_name <> t2.place_name
where t.tm between '2020-03-25 00:00:00' and '2020-03-25 15:00:00' and
t2.tm between '2020-03-25 00:00:00' and '2020-03-25 15:00:00'
group by t.user_name, t.place_name, t2.place_name
I would suggest an index on (tm, user_name, place_name) and on (user_name, tm, place_name) -- yes, both, one for each reference.

Colleague helped me to create window function:
select
subq.*
,EXTRACT(EPOCH FROM (subq.next_tm - subq.tm)) as seconds_diff
from (
select
t1.user_name,
t1.place_name,
t1.tm,
lead(t1.place_name) over w as next_place_name,
lead(t1.tm) over w as next_tm
from example.example as t1
window w as (partition by t1.user_name order by tm asc)
)subq
where
next_place_name is not null
and next_tm is not null
and place_name <> next_place_name
;

Related

Inserting parameters to SQL query in Oracle SQL

In the following query between date time columns are repeated in multiple places and I need to replace them with two variables named start_date and end_date I tried multiple methods and had no luck. Please answer with a runnable query if you can. Thanks in advance.
WITH encounter
AS (SELECT patient_pomr_id AS encounter_number,
patient_id AS umrn,
doctor_id,
doctor_name
FROM eh_pomr.ehpom_patient_pomr
WHERE created_on BETWEEN timestamp '2022-08-01 00:00:00' AND
timestamp '2022-08-30 00:00:00'),
chief_complain
AS (SELECT chief_complain,
patient_pomr_id
FROM eh_pomr.ehpom_chief_complain),
admission
AS (SELECT admitted_date,
patient_id,
ADMISSION_ID,
admission_type AS encounter_type,
patient_pomr_id,
hospital_id,
clinic_name
FROM ad_request.admlm_admission
WHERE direct_admission IS NULL
AND is_from_er != 1
AND created_date BETWEEN timestamp '2022-08-01 00:00:00' AND
timestamp '2022-08-30 00:00:00'),
ip_create_admission
AS (SELECT patientpomr,
dbms_lob.Substr(admitting_diagnosis, 2000, 1) diagnosis
FROM eh_ip.ehip_create_admission
WHERE created_on BETWEEN timestamp '2022-08-01 00:00:00' AND
timestamp '2022-08-30 00:00:00'),
discharge
AS (SELECT CASE
WHEN dischargevia = 1 THEN 'Private Vehicle'
WHEN dischargevia = 2 THEN 'Ambulatory'
WHEN dischargevia = 3 THEN 'Other'
ELSE ' Unknown'
END AS dischargevia,
pomrid,
modifiedon AS discharge_date,
conditionondischarge AS discharge_speciality
FROM eh_ndischarge.ehipd_dischargedetails
WHERE isactive = 1),
death
AS (SELECT dbms_lob.Substr(underlying_cause, 2000, 1) cause_of_death,
patientpomr
FROM eh_ip.ehip_death_detail),
empi
AS (SELECT id_number,
mrn
FROM rf_empi.emred_patients),
vitals
AS (SELECT PR.id,
PR.patient_pomr_id,
FS.field_code,
FS.value
FROM eh_commmon.ehcom_patient_record PR
left join eh_commmon.ehcom_flow_sheet_data FS
ON PR.id = FS.patient_record_id
WHERE PR.flow_sheet_code = 'vitals'
AND FS.time_stamp BETWEEN timestamp '2022-08-01 00:00:00' AND
timestamp '2022-08-30 00:00:00'),
leaves
AS (SELECT requesting_days,
visit_id,
ADM.PATIENT_POMR_ID
FROM ad_request.admlm_med_leave_final_print FP
left join ad_request.admlm_medical_leave ML
ON FP.request_id = ML.request_id
LEFT JOIN AD_REQUEST.ADMLM_ADMISSION ADM
ON ML.VISIT_ID = ADM.ADMISSION_ID
WHERE FP.leave_status = 5
AND ML.created_date BETWEEN timestamp '2022-08-01 00:00:00' AND
timestamp '2022-08-30 00:00:00'
AND ML.REQUESTING_DAYS IS NOT NULL)
SELECT DISTINCT encounter.encounter_number,
admission.encounter_type,
empi.id_number AS Patient_National_ID,
admission.patient_id AS umrn,
admission.admitted_date,
admission.hospital_id,
admission.clinic_name AS admission_speciality,
chief_complain.chief_complain,
leaves.requesting_days AS Duration_of_leave,
encounter.doctor_id,
encounter.doctor_name,
ip_create_admission.diagnosis,
discharge.dischargevia,
discharge.discharge_date,
discharge_speciality,
admission.clinic_name AS clinic,
death.cause_of_death
-- VITALS.field_code,
-- VITALS.value
FROM admission
left join empi
ON admission.patient_id = empi.mrn
left join encounter
ON admission.patient_pomr_id = encounter.encounter_number
left join ip_create_admission
ON admission.patient_pomr_id = ip_create_admission.patientpomr
--admission_request_numbrer with adt
left join discharge
ON admission.patient_pomr_id = discharge.pomrid
left join death
ON admission.patient_pomr_id = death.patientpomr
left join chief_complain
ON admission.patient_pomr_id = chief_complain.patient_pomr_id
left join leaves
ON admission.patient_pomr_id = leaves.PATIENT_POMR_ID
I tried adding with begin and end tags with declare key words but had no luck. Also is there a special way to insert variable using in to keyword when we need to insert it for between?
Include yet another CTE (I'm calling it dates) which is then cross-joined in another CTEs which utilize these values. Something like this:
WITH
dates (start_date, end_date) --> this is new CTE
AS (SELECT timestamp '2022-08-01 00:00:00',
timestamp '2022-08-30 00:00:00'
FROM dual),
encounter
AS (SELECT patient_pomr_id AS encounter_number,
patient_id AS umrn,
doctor_id,
doctor_name
FROM eh_pomr.ehpom_patient_pomr
CROSS JOIN dates d --> it is used here
WHERE created_on BETWEEN d.start_date AND d.end_date), --> like this
chief_complain
AS ..
This is from MSSQL, you can try converting this through OracleSQL
#dateFrom datetime = null,
#dateTo datetime = null,
DATEADD(D, 0, DATEDIFF(D, 0, #DateFrom))
AND DATEADD(D, 0, DATEDIFF(D, 0, #DateTo))

Date difference between two locations in same table with one date column

Tag will be placed physically in client location and will move around the places. i need find the how long it placed in one location. example if tag is placed in location 1 at 10 am and moved to location 2 at 10:15 then time difference is 15 minutes. here is sample data i have
create table #Tagm (tagname varchar(10),created_date datetime ,Loc int )
insert into #Tagm values ('AC1', '2018-07-01 09:35:37.370' ,56)
,( 'AC1', '2018-07-01 10:35:37.370' ,64),( 'AC1', '2018-07-01 10:55:37.370' ,84),( 'AC1', '2018-07-01 11:55:37.370' ,76)
I tried this but this is giving me the count for all the locations
select tagname ,DATEDIFF(MINUTE, min(created_date),max(created_date) )as totaltime
from #Tagm
group by tagname
the result i am looking for is shown below
Any help will be appreciated
Because you mentioned it is possible to have the same Location several times in a row you need to find the true start and end of the time at that location. By using LAG you can do that similar to one of the other answers. After finding true start and end then you can grab the difference. This can be done in less Common Table Expressions or as a subquery but I have split it like this so you can see the logic a bit easier.
I also added a second tagname and the use case that location doesn't change to both.
create table #Tagm (tagname varchar(10),created_date datetime ,Loc int )
insert into #Tagm values ('AC1', '2018-07-01 09:35:37.370' ,56), ('AC1', '2018-07-01 09:40:37.370' ,56) ,( 'AC1', '2018-07-01 10:35:37.370' ,64),( 'AC1', '2018-07-01 10:55:37.370' ,84),( 'AC1', '2018-07-01 11:55:37.370' ,76)
insert into #Tagm values ('AC2', '2018-08-01 09:35:37.370' ,56), ('AC2', '2018-08-01 09:40:37.370' ,64) ,( 'AC2', '2018-08-01 10:35:37.370' ,64),( 'AC2', '2018-08-01 10:55:37.370' ,84),( 'AC2', '2018-08-01 11:55:37.370' ,76)
;WITH cte AS (
SELECT
*
,LAG(Loc) OVER (PARTITION BY tagname ORDER BY created_date) as PrevLoc
FROM
#Tagm
)
, cteLocationStart AS (
SELECT
*
,IIF(PrevLoc IS NULL or PrevLoc <> Loc, 1,0) as StartSequence
FROM
cte
)
SELECT
s.tagname
,s.Loc
,s.created_date as StartDateTime
,MIN(n.created_date) as EndDateTime
,DATEDIFF(MINUTE,s.created_date, MIN(n.created_date)) as TotalTime
FROM
cteLocationStart s
LEFT JOIN cteLocationStart n
ON s.tagname = n.tagname
AND s.created_date < n.created_date
AND n.StartSequence = 1
WHERE
s.StartSequence = 1
GROUP BY
s.tagname
,s.Loc
,s.created_date
ORDER BY
tagname
,StartDateTime
I think you just want lead():
SELECT tagname,
DATEDIFF(MINUTE,
created_date,
LEAD(created_date) OVER (PARTITION BY tagname
ORDER BY created_date
)
) AS totaltime
FROM #Tagm t;
with CTE as
(select row_number() over (order by created_date desc) as rn, created_date, tagname,loc from #Tagm
)
SELECT t1.loc,t1.created_date, t1.tagname, ISNULL(DATEDIFF(mi, t1.created_date, t2.created_date), NULL)
AS seconds FROM CTE t1
LEFT JOIN CTE t2
ON t1.rn = t2.rn + 1 ORDER BY t1.created_date

COUNT DISTINCT(column) slows the query 20X

I have this query which works fine and is fast (about 1 seconds execution time):
SELECT COUNT(ticket) AS times_appears
,COUNT(LOGIN) AS number_of_accounts
,comment
FROM mt4_trades
WHERE COMMENT != ''
AND CLOSE_TIME != '1970-01-01 00:00:00.000'
GROUP BY comment
ORDER BY times_appears DESC
but as soon as I change the second line to:
,COUNT(DISTINCT LOGIN) AS number_of_accounts
the query is slowing down 20X times.
Is the DISTINCT so slow that affects the whole query or am I missing something here?
After some research I found out that sometimes is better to use a subquery than COUNT(DISTINCT column).
So this is my query which is 20X times faster than the one on my question:
SELECT COUNT(mtt.ticket) as times_appears
--,COUNT(DISTINCT login) as number_of_accounts
,(SELECT COUNT(LOGIN) FROM (SELECT DISTINCT login FROM mt4_trades WHERE COMMENT=mtt.COMMENT AND CLOSE_TIME != '1970-01-01 00:00:00.000' ) AS temp)AS number_of_accounts
,comment
FROM mt4_trades mtt
WHERE mtt.COMMENT != ''
AND mtt.CLOSE_TIME != '1970-01-01 00:00:00.000'
GROUP BY mtt.comment
ORDER BY times_appears DESC
#Raphaƫl-Althau Thanks for the helpful URL-hint
---- tickt count, irrespective of login
Select mtt.comment
,t.number_of_accounts
,Count(mtt.ticket) As times_appears
From mt4_trades As mtt With (Nolock)
Join
(
Select t.comment
,Count(t.login) As number_of_accounts
From (
Select Distinct
mtt.login
,mtt.comment
From mt4_trades As mtt With (Nolock)
Where mtt.comment <> ''
And mtt.CLOSE_TIME <> '1970-01-01 00:00:00.000'
) As t
Group By t.comment
) As mt On mtt.comment = t.comment
Where mtt.comment <> ''
And mtt.CLOSE_TIME <> '1970-01-01 00:00:00.000'
Group By mtt.comment
,t.number_of_accounts
---- tickt count with respect to login
Select t.comment
,Count(t.ticket) As times_appears
,Count(t.login) As number_of_accounts
From (
Select Distinct
mtt.ticket
,mtt.login
,mtt.comment
From mt4_trades As mtt With (Nolock)
Where mtt.comment <> ''
And mtt.CLOSE_TIME <> '1970-01-01 00:00:00.000'
) As t
Group By t.comment

Display Only One Record That May Or May Not Have Children

I've been stuck on this issue for a while now. I'm really close I think, but there's something I'm missing.
A Transaction can have zero or many TransactionErrors. I am trying to display all Transactions only once, and I'm also trying to display only the latest error message if there is one.
SELECT [Transaction].[TransactionID]
,[FileName]
,[DestinationSystem]
,[CreatedOn]
,LEFT([TransactionError].[ErrorMessage], 300) AS LatestErrorMessage --Gets only the first 300 characters of the error message
FROM [WM01DB].[dbo].[Transaction]
INNER JOIN SourceSystem ON SourceSystem.SourceSystemId = Transaction.SourceSystemId
LEFT JOIN TransactionError ON TransactionError.TransactionId = Transaction.TransactionId
WHERE Transaction.CreatedOn >= '2014-08-01 00:00:00.000'
AND Transaction.CreatedOn < '2014-09-02 00:00:00.000'
ORDER BY [CreatedOn], [Transaction].[TransactionID]
When I run this query, I get most of the results I want, but I get duplicate transactions because these transactions have multiple TransactionErrors. It looks like this...
TransactionID FileName DestinationSystem CreatedOn LatestErrorMessage
18124 201408131541517937_DC_TEST_3339376-4.1.xml TEST 2014-08-18 18:31:19.993 U_BOL and Tracking Number are blank
18124 201408131541517937_DC_TEST_3339376-4.1.xml TEST 2014-08-18 18:31:19.993 FRT_CHG_TYPE is blank
18125 201408111521484448_DC_TEST_3339375-2.1.xml TEST 2014-08-19 16:04:58.467 NULL
18126 201408111521484448_DC_TEST_3339375-2.1.xml TEST 2014-08-19 16:09:00.467 NULL
Ugh... Bad looking code block...
As you can see, there are duplicate TransactionIDs as demonstrated with 18124. I would like 18124 to display only once with the latest error message. The only way to get the latest error message would be to use the latest TransactionErrorID for a particular TransactionID...
Please help! :(
I have a similar solution to Krishnraj Rana. However, I think that you need to avoid having the rowid filter in the WHERE clause because that will make if behave like an inner join:
; with Errors as
(SELECT [ErrorMessage]
, Row_Number() over (Partition By TransactionId order by TransactionErrorId Desc) as id
FROM TransactionError
)
SELECT [Transaction].[TransactionID]
,[FileName]
,[DestinationSystem]
,[CreatedOn]
,LEFT([ErrorMessage], 300) AS LatestErrorMessage --Gets only the first 300 characters of the error message
FROM [WM01DB].[dbo].[Transaction]
INNER JOIN SourceSystem ON SourceSystem.SourceSystemId = Transaction.SourceSystemId
LEFT JOIN Errors ON TransactionError.TransactionId = Transaction.TransactionId
and errors.id = 1
WHERE Transaction.CreatedOn >= '2014-08-01 00:00:00.000'
AND Transaction.CreatedOn < '2014-09-02 00:00:00.000'
ORDER BY [CreatedOn], [Transaction].[TransactionID]
You can achieve it by using ROW_NUMBER() with PARTITION BY clause like this -
SELECT [Transaction].[TransactionID]
,[FileName]
,[DestinationSystem]
,[CreatedOn]
,LEFT([TransactionError].[ErrorMessage], 300) AS LatestErrorMessage --Gets only the first 300 characters of the error message
,ROW_NUMBER() OVER (
PARTITION BY [Transaction].[TransactionID] ORDER BY [CreatedOn]
,[Transaction].[TransactionID] DESC
) AS SrNo
FROM [WM01DB].[dbo].[Transaction]
INNER JOIN SourceSystem ON SourceSystem.SourceSystemId = TRANSACTION.SourceSystemId
LEFT JOIN TransactionError ON TransactionError.TransactionId = TRANSACTION.TransactionId
WHERE TRANSACTION.CreatedOn >= '2014-08-01 00:00:00.000'
AND TRANSACTION.CreatedOn < '2014-09-02 00:00:00.000'
AND SrNo = 1
ORDER BY [CreatedOn]
,[Transaction].[TransactionID]
SELECT A.[TransactionID]
,A.[FileName]
,A.[DestinationSystem]
,A.[CreatedOn]
,A.LatestErrorMessage
FROM (
SELECT [Transaction].[TransactionID]
,[FileName]
,[DestinationSystem]
,[CreatedOn]
,LEFT([TransactionError].[ErrorMessage], 300) AS LatestErrorMessage --Gets only the first 300 characters of the error message
,ROW_NUMBER() OVER (PARTITION BY [Transaction].[TransactionID] ORDER BY [CreatedOn] DESC) rn
FROM [WM01DB].[dbo].[Transaction]
INNER JOIN SourceSystem ON SourceSystem.SourceSystemId = [Transaction].SourceSystemId
AND [Transaction].CreatedOn >= '2014-08-01 00:00:00.000'
AND [Transaction].CreatedOn < '2014-09-02 00:00:00.000'
LEFT JOIN TransactionError ON TransactionError.TransactionId = [Transaction].TransactionId
)A
WHERE A.rn = 1
ORDER BY A.[CreatedOn], A.[TransactionID]
Also using row_number() but picking last TransactionErrorId as requested (and assuming at least SQL Server 2005):
with x as (
select
t.[TransactionId],
[FileName],
[DestinationSytem],
[CreatedOn],
e.[ErrorMessage],
row_number() over (
partition by t.[TransactionId],
order by e.[TransactionErrorId] desc
) rn
from
[wm01db].[dbo].[Transaction] t
inner join
[dbo].[SourceSystem] s
on t.SourceSystemId = s.SourceSytemId
left outer join
[dbo].[TransactionError] e
on e.TransactionId = t.TransactionId
where
t.CreatedOn >= '2014-08-01 00:00:00.000' and
t.CreatedOn < '2014-09-02 00:00:00.000'
) select
[TransactionId],
[FileName],
[DestinationSytem],
[CreatedOn],
left([ErrorMessage], 300) as LastErrorMessage
from
x
where
rn = 1
order by
[CreatedOn],
[TransactionId] ;

rewriting group by in subselect

I have 2 queries and both the queries are showing different results when moving the where clause in inner select query.
select destination_number, count(*) as cnt from (
select caller_id_number as caller_id_number, destination_number as destination_number
from Final1 where start_stamp > '2013-01-01 00:00:00' and start_stamp < '2013-01-26 00:00:00'
group by caller_id_number, destination_number)
where destination_number = '1234'
group by destination_number
count result: 294636
select destination_number, count(*) as cnt from (
select caller_id_number as caller_id_number, destination_number as destination_number
from Final1 where destination_number = '1234' and start_stamp > '2013-01-01 00:00:00' and start_stamp < '2013-01-26 00:00:00'
group by caller_id_number, destination_number)
group by destination_number
count result: 310627
I will like to know why does the count change and which query is correct.