SQL select students logins - sql

I have a table STUDENT_LAST_LOGIN, which contains data about students last logins.
ID STUDENT_ID DATE TIME
1 A 2020-02-01 12:00 15 MIN
2 B 2020-02-02 12:00 45 MIN
3 C 2020-02-03 12:00 25 MIN
In addition there is STUDENT_LOGIN table, which contains data about students all logins.
ID STUDENT_ID DATE TIME
1 A 2020-02-01 12:00 15 MIN
4 A 2020-01-01 14:00 33 MIN
2 B 2020-02-02 12:00 45 MIN
5 B 2020-01-02 13:30 47 MIN
10 B 2020-01-03 13:30 27 MIN
6 B 2020-01-02 10:00 44 MIN
3 C 2020-02-03 12:00 25 MIN
7 C 2020-01-03 10:00 12 MIN
8 C 2020-01-03 18:00 56 MIN
9 C 2020-01-04 12:00 88 MIN
As a result I need to get something like this:
STUDENT_ID LAST_LOGIN LAST_LOGIN_ONE_MONTH_AGO TIME TIME_ONE_MONTH_AGO
A 2020-02-01 12:00 2020-01-01 14:00 15 min 33 min
B 2020-02-02 12:00 2020-01-02 13:30 15 min 47 min
C 2020-02-03 12:00 2020-01-03 18:00 25 min 56 min
Can you help me write this?

SELECT LAST_LOGIN, LAST_LOGIN_ONE_MONTH_AGO, S_L.TIME, S_L.TIME_ONE_MONTH_AGO
FROM STUDENT_LAST_LOGIN S_L_L
INNER JOIN STUDENT_LOGIN S_L on S_L_L.id = S_L.id
where S_L_L.date < DATEADD(month, -1, GETDATE())
you need to write your query something like this.

You need to use the windows function as follows:
SELECT * FROM
(SELECT SLL.STUDENT_ID,
SLL.DATE LAST_LOGIN,
SL.DATE LAST_LOGIN_ONE_MONTHE_AGO,
SLL.TIME,
SL.TIME TIME_ONE_MONTH_AGO,
ROW_NUMBER() OVER (PARTITION BY SLL.STUDENT_ID ORDER BY SL.DATE DESC NULLS LAST) AS RN
FROM STUDENT_LAST_LOGIN SLL LEFT JOIN STUDENT_LOGIN SL
ON SL.STUDENT_ID = SLL.STUDENT_ID
AND TRUNC(SL.DATE) = ADD_MONTHS(TRUNC(SLL.DATE),-1)
)
WHERE RN = 1

I can only speculate that you want the most recent login and then the most recent login from the calendar month before that. I would suggest conditional aggregation:
select sll.student_id,
max(case when month_seqnum = 1 then last_login end),
max(case when month_seqnum = 2 then last_login end),
max(case when month_seqnum = 1 then time end),
max(case when month_seqnum = 2 then time end)
from (select sll.*,
row_number() over (partition by student_id, to_char(date, 'YYYY-MM')
order by date desc
) as seqnum,
dense_rank() over (partition by student_id order by to_char(date, 'YYYY-MM')) as month_seqnum
from student_last_login sll
) sll
where month_seqnum in (1, 2) and seqnum = 1
group by student_id;
I think this returns the values that you specify.

Related

Add a counting condition into dense_rank window Function SQL

I have a function that counts how many times you've visited and if you have converted or not.
What I'd like is for the dense_rank to re-start the count, if there has been a conversion:
SELECT
uid,
channel,
time,
conversion,
dense_rank() OVER (PARTITION BY uid ORDER BY time asc) as visit_order
FROM table
current table output:
this customer (uid) had a conversion at visit 18 and now I would want the visit_order count from dense_rank to restart at 0 for the same customer until it hits the next conversion that is non-null.
See this (I do not like "try this" 😉):
SELECT
id,
ts,
conversion,
-- SC,
ROW_NUMBER() OVER (PARTITION BY id,SC) R
FROM (
SELECT
id,
ts,
conversion,
-- COUNT(conversion) OVER (PARTITION BY id, conversion=0 ORDER BY ts ) CC,
SUM(CASE WHEN conversion=1 THEN 1000 ELSE 1 END) OVER (PARTITION BY id ORDER BY ts ) - SUM(CASE WHEN conversion=1 THEN 1000 ELSE 1 END) OVER (PARTITION BY id ORDER BY ts )%1000 SC
FROM sample
ORDER BY ts
) x
ORDER BY ts;
DBFIDDLE
output:
id
ts
conversion
R
1
2022-01-15 10:00:00
0
1
1
2022-01-16 10:00:00
0
2
1
2022-01-17 10:00:00
0
3
1
2022-01-18 10:00:00
1
1
1
2022-01-19 10:00:00
0
2
1
2022-01-20 10:00:00
0
3
1
2022-01-21 10:00:00
0
4
1
2022-01-22 10:00:00
0
5
1
2022-01-23 10:00:00
0
6
1
2022-01-24 10:00:00
0
7
1
2022-01-25 10:00:00
1
1
1
2022-01-26 10:00:00
0
2
1
2022-01-27 10:00:00
0
3

How to create a start and end date with no gaps from one date column and to sum a value within the dates

I am new SQL coding using in SQL developer.
I have a table that has 4 columns: Patient ID (ptid), service date (dt), insurance payment amount (insr_amt), out of pocket payment amount (op_amt). (see table 1 below)
What I would like to do is (1) create two columns "start_dt" and "end_dt" using the "dt" column where if there are no gaps in the date by the patient ID then populate the start and end date with the first and last date by patient ID, however if there is a gap in service date within the patient ID then to create the separate start and end date rows per patient ID, along with (2) summing the two payment amounts by patient ID with in the one set of start and end date visits (see table 2 below).
What would be the way to run this using SQL code in SQL developer?
Thank you!
Table 1:
Ptid
dt
insr_amt
op_amt
A
1/1/2021
30
20
A
1/2/2021
30
10
A
1/3/2021
30
10
A
1/4/2021
30
30
B
1/6/2021
10
10
B
1/7/2021
20
10
C
2/1/2021
15
30
C
2/2/2021
15
30
C
2/6/2021
60
30
Table 2:
Ptid
start_dt
end_dt
total_insr_amt
total_op_amt
A
1/1/2021
1/4/2021
120
70
B
1/6/2021
1/7/2021
30
20
C
2/1/2021
2/2/2021
30
60
C
2/6/2021
2/6/2021
60
30
You didn't mention the specific database so this solution works in PostgreSQL. You can do:
select
ptid,
min(dt) as start_dt,
max(dt) as end_dt,
sum(insr_amt) as total_insr_amt,
sum(op_amt) as total_op_amt
from (
select *,
sum(inc) over(partition by ptid order by dt) as grp
from (
select *,
case when dt - interval '1 day' = lag(dt) over(partition by ptid order by dt)
then 0 else 1 end as inc
from t
) x
) y
group by ptid, grp
order by ptid, grp
Result:
ptid start_dt end_dt total_insr_amt total_op_amt
----- ---------- ---------- -------------- -----------
A 2021-01-01 2021-01-04 120 70
B 2021-01-06 2021-01-07 30 20
C 2021-02-01 2021-02-02 30 60
C 2021-02-06 2021-02-06 60 30
See running example at DB Fiddle 1.
EDIT for Oracle
As requested, the modified query that works in Oracle is:
select
ptid,
min(dt) as start_dt,
max(dt) as end_dt,
sum(insr_amt) as total_insr_amt,
sum(op_amt) as total_op_amt
from (
select x.*,
sum(inc) over(partition by ptid order by dt) as grp
from (
select t.*,
case when dt - 1 = lag(dt) over(partition by ptid order by dt)
then 0 else 1 end as inc
from t
) x
) y
group by ptid, grp
order by ptid, grp
See running example at db<>fiddle 2.

Add first and last date of a sequence

I am working on a database which have a huge collection of rows. I want to update it so repeated records will be deleted. Now, I have a date column in table and I want to convert it into startDate and endDate. Please check:
id | date | price | minutes | prefixId | sellerId | routeTypeId
1234 2020-01-01 0.123 0 1 1 1
1235 2020-01-04 0.123 0 1 1 1
1236 2020-01-05 0.123 123 1 1 1
1237 2020-01-06 0.123 31 1 1 1
1238 2020-01-07 0.123 23 1 1 1
1239 2020-01-08 0.130 41 1 2 1
1240 2020-01-09 0.130 0 1 1 1
What I am looking for is:
id | startDate | endDate | price | minutes | prefixId | sellerId | routeTypeId
1234 2020-01-01 2020-01-01 0.123 0 1 1 1
1235 2020-01-04 2020-01-07 0.123 0 1 1 1
1239 2020-01-08 2020-01-08 0.130 41 1 2 1
1240 2020-01-09 2020-01-09 0.130 0 1 2 2
Dates will be considered in a series if price, prefixId, sellerId, routeTypeId will remain same with previous row and date column generates a series (without any gap between dates. So, 2020-01-01, 2020-01-2, 2020-01-10 are two different series for example)
This is a gaps-and-islands problem. You can use lag() and a cumulative sum:
select price, prefixId, sellerId, routeTypeId,
min(minutes),
min(date), max(date)
from (select t.*,
sum(case when prev_date = date - interval '1 day' then 0 else 1 end) over (order by date) as grp
from (select t.*,
lag(date) over (partition by price, prefixId, sellerId, routeTypeId order by date) as prev_date
from t
) t
) t
group by grp, price, prefixId, sellerId, routeTypeId
This is a "Gaps & Islands" problem. You can do it using:
select
min(id) as id,
min(date) as start_date,
max(date) as end_date,
min(price) as price,
...
from (
select *,
sum(inc) over(order by id) as grp
from (
select *,
case when price = lag(price) over(order by id)
and date = lag(date) over(
partition by price, prefixId, sellerId, routeTypeId
order by id)
+ interval '1 day'
then 0 else 1 end as inc
from t
) x
) y
group by grp

SQL How Long Have Incomplete Records Been Outstanding Broken Down By Day

I have asked a similar question to this SQL Daily Outstanding Sales, Rolling Aggregate? however I now need to understand, by day, how long sales have been outstanding. This will be grouped into 24 hours (or day) intervals to a max of 72hours. An example of the output from the Dataset is below, Timeoutstanding would be within a group by however Date needs to be any date, even if it was not within the DataSet which can be seen with 2020-01-02 on the second row
Date TimeOutstanding VolumeOutstanding
2020-01-01 Under24Hour 1
2020-01-02 Under48Hour 1
2020-01-03 Under24Hour 3
2020-01-03 Under72Hour 1
2020-01-04 Under48Hour 3
2020-01-05 Under72Hour 2
2020-01-05 Over72Hour 1
DataSet
SaleID Date Outcome
1 2020-01-01 New
1 2020-01-01 Complete
2 2020-01-01 New
3 2020-01-03 New
4 2020-01-03 New
5 2020-01-03 New
2 2020-01-04 Complete
5 2020-01-04 Complete
3 2020-01-06 Complete
4 2020-01-07 Complete
First, you can summarize each salesid with the new and completed date. Then, generate the dates that you care about. And use conditional aggregation:
select d.date,
count(*) as num_open,
sum(case when new_date >= dateadd(day, -1, d) then 1 else 0
end) as within_1day,
sum(case when new_date < dateadd(day, -1, d) and new_date >= dateadd(day, -3, d then 1 else 0
end) as within_1_3days_ago,
. . .
from (select distinct date from t) d left join
(select salesid, min(date) as new_date,
nullif(max(date), min(date)) as completed_date
from t
group by salesid
) t
on new_date <= date and (date < completed_date or completed_date is null)
group by d.date

MsSql Compare specific datetimes in sequence based on ID

I have a table where we store our data from a call and it looks like this:
CallID Arrive_Seq DateTime ActivitytypeID
1 1 2018-01-01 05:00:00 1
1 2 2018-01-01 05:00:01 2
1 3 2018-01-01 06:00:00 21
1 4 2018-01-01 06:00:01 28
1 5 2018-01-01 06:00:02 13
1 6 2018-01-01 06:00:03 22
1 7 2018-01-01 06:00:05 29
1 8 2018-01-01 06:05:00 21
1 9 2018-01-01 06:05:01 28
1 10 2018-01-01 06:05:02 13
1 11 2018-01-01 06:05:03 22
1 12 2018-01-01 06:07:45 29
Now I want to select the datediff between ActivitytypeID 21 and 29 in the arrive_sew order. In this example they occur twice (on arrive_seq 3,8 and 7,12). This order is not specific and ActivitytypeID can occur both more and less times in the sequence but they are always connected with eachother. Think of it as ActivitytypeID 21 = 'call started' AND ActivitytypeID = 29 'Call ended'.
In the example the answer whould be:
SELECT DATEDIFF (SECOND, '2018-01-01 06:00:00', '2018-01-01 06:00:05') = 5 -- Compares datetime of arrive_seq 3 and 7
AND
SELECT DATEDIFF (SECOND, '2018-01-01 06:00:05', '2018-01-01 06:07:45') = 460 -- Compares datetime of arrive_seq 21 and 29
Total duration = 465
I have tried with this code but it doesn't work all the time due to row# can change based on arrive_seq and ActivitytypeID
;WITH CallbackDuration AS (
SELECT ROW_NUMBER() OVER(ORDER BY a.time_stamp ASC) AS RowNumber, DATEDIFF(second, a.time_stamp, b.time_stamp) AS 'Duration'
FROM Table a
JOIN Table b on a.call_id = b.call_id
WHERE a.call_id = 1 AND a.activity_type = 21 AND b.activity_type = 29
GROUP BY a.time_stamp, b.time_stamp,a.call_id)
SELECT SUM(Duration) AS 'Duration' FROM CallbackDuration WHERE RowNumber in (1,5,9)
I think this is what you want:
select
call_start,
call_end,
datediff (second, call_start, call_end) as duration
from
(
select
call_timestamp as call_end,
lag(call_timestamp) over (partition by call_id order by call_timestamp) as call_start,
activity_type as call_end_activity,
lag (activity_type) over (partition by call_id order by call_timestamp) as call_start_activity
from
call_log
where
activity_type in (21, 29)
) x
where
call_start_activity = 21;
Result:
call_start call_end duration
----------------------- ----------------------- -----------
2018-01-01 06:00:00.000 2018-01-01 06:00:05.000 5
2018-01-01 06:05:00.000 2018-01-01 06:07:45.000 165
(2 rows affected)
Note that the time of the second call is based on your sample data with start time 2018-01-01 06:05:00
This query seems to return your expected result
declare #x int = 21
declare #y int = 29
;with cte(CallID, Arrive_Seq, DateTime, ActivitytypeID) as (
select
a, b, cast(c as datetime), d
from (values
(1,1,'2018-01-01 05:00:00',1)
,(1,2,'2018-01-01 05:00:01',2)
,(1,3,'2018-01-01 06:00:00',21)
,(1,4,'2018-01-01 06:00:01',28)
,(1,5,'2018-01-01 06:00:02',13)
,(1,6,'2018-01-01 06:00:03',22)
,(1,7,'2018-01-01 06:00:05',29)
,(1,8,'2018-01-01 06:05:00',21)
,(1,9,'2018-01-01 06:05:01',28)
,(1,10,'2018-01-01 06:05:02',13)
,(1,11,'2018-01-01 06:05:03',22)
,(1,12,'2018-01-01 06:07:45',29)
) t(a,b,c,d)
)
select
sum(ss)
from (
select
*, ss = datediff(ss, DateTime, lead(datetime) over (order by Arrive_Seq))
, rn = row_number() over (order by Arrive_Seq)
from
cte
where
ActivitytypeID in (#x, #y)
) t
where
rn % 2 = 1