Postgresql Using Limit with Order by without select and where case - sql

So I want to get the latest 2 rows of the person_id and check if the deposit field is decreased.
Here is what I have done so far;
The customer table is;
person_id employee_id deposit ts
101 201 44 2021-09-30 10:12:19+00
101 201 47 2021-09-30 09:12:19+00
101 201 65 2021-09-29 09:12:19+00
100 200 21 2021-09-29 10:12:19+00
104 203 54 2021-09-27 10:12:19+00
and as a result I want is;
person_id employee_id deposit ts pre_deposit pre_ts
101 201 44 2021-09-30 10:12:19+00 47 2021-09-30 09:12:19+00
I don't want to get deposit:65 row because I just want to check the last 2 rows. 47 > 44 so. I need to compare only the last 2 rows. if the deposit decreased in any other rows, I simply don't care.
SELECT person_id,
employee_id,
deposit,
ts,
lag(deposit) over client_window as pre_deposit,
lag(ts) over client_window as pre_ts
FROM customer
WINDOW client_window as (partition by person_id order by ts)
ORDER BY person_id , ts
so it returns a table with the following results;
person_id employee_id deposit ts pre_deposit pre_ts
101 201 44 2021-09-30 10:12:19+00 47 2021-09-30 09:12:19+00
101 201 47 2021-09-30 09:12:19+00 65 null
100 200 21 2021-09-29 10:12:19+00 null 2021-09-29 09:12:19+00
104 203 54 2021-09-27 10:12:19+00 null null
but if I do the following;
SELECT person_id,
employee_id,
deposit,
ts,
lag(deposit) over client_window as pre_deposit,
lag(ts) over client_window as pre_ts
FROM customer
WINDOW client_window as (partition by person_id order by ts limit 2)
this query doesn't work, because it throws an error as;
ERROR: syntax error at or near "limit"
LINE 11: limit 2
so how can I limit to compare the last 2 rows?
where pre_deposit > deposit

You can do:
with
data as (
select person_id, employee_id, deposit, ts,
row_number() over(partition by person_id order by ts desc) as rn
from customer
)
select a.*,
b.deposit as pre_deposit,
b.ts as pre_ts
from data a
left join data b on a.person_id = b.person_id and b.rn = 2
where a.rn = 1
Result:
person_id employee_id deposit ts rn pre_deposit pre_ts
---------- ------------ -------- ------------------------- --- ------------ ------------------------
100 200 21 2021-09-29T10:12:19.000Z 1 null null
101 201 44 2021-09-30T10:12:19.000Z 1 47 2021-09-30T09:12:19.000Z
104 203 54 2021-09-27T10:12:19.000Z 1 null null
See running example at DB Fiddle.

You are pretty close. Use this brilliant distinct on on your first query slightly modified and you are there.
select distinct on (person_id) *
from
(
select person_id, employee_id, deposit, ts,
lead(deposit) over w as pre_deposit,
lead(ts) over w as pre_ts
from customer
window w as (partition by person_id order by ts desc)
) t
where pre_deposit > deposit
order by person_id, ts desc;
SQL Fiddle here.

Related

Select the first record after the last but one X

I'm trying to get the first BEG_PERIOD date immediately after the last but one record of X (DEF_ENDING) of each user (USER_ID).
So I have this:
USER_ID
BEG_PERIOD
END_PERIOD
DEF_ENDING
159
01-07-2022
31-07-2022
X
159
25-09-2022
15-10-2022
X
159
01-11-2022
13-11-2022
159
14-11-2022
21-12-2022
X
159
01-01-2023
30-01-2023
X
414
01-04-2022
31-05-2022
X
414
01-07-2022
30-09-2022
414
01-10-2022
01-12-2022
X
480
01-07-2022
30-06-2022
480
01-07-2022
30-08-2022
X
480
02-09-2022
01-11-2022
X
503
15-03-2022
16-06-2022
X
503
19-07-2022
23-07-2022
503
24-07-2022
31-10-2022
503
01-11-2022
21-12-2022
X
The dates I need are the ones in bold
Can you help me?
I tried this but I only get the latest dates :(
SELECT
p.USER_ID,
p.BEG_PERIOD
FROM
PERIODS p
INNER JOIN PERIODS p2 ON
p.USER_ID = p2.USER_ID
AND
p.BEG_PERIOD = (
SELECT
MAX( BEG_PERIOD )
FROM
PERIODS
WHERE
PERIODS.USER_ID = p.USER_ID
)
WHERE
p.USER_ID > 10
This should work based on the sample data:
with data as (
select *,
sum(case when DEF_ENDING = 'X' then 1 end)
over (partition by USER_ID order by BEG_PERIOD desc) as grp
from PERIODS
)
select
USER_ID,
min(BEG_PERIOD) as BEG_PERIOD,
min(END_PERIOD) as END_PERIOD,
min(DEF_ENDING) as DEF_ENDING
from data
where grp = 1
group by USER_ID;
If you can't rely on the two dates being minimums then:
with data as (
select *,
sum(case when DEF_ENDING = 'X' then 1 end)
over (partition by USER_ID order by BEG_PERIOD desc) as grp
from PERIODS
), data2 as (
select *,
row_number() over (partition by USER_ID order by BEG_PERIOD) as rn
from data
where grp = 1
)
select *
from data2
where rn = 1;
This can also be done entirely via subqueries if that's more appropriate at the level of your class:
select USER_ID, min(BEG_PERIOD), min(END_PERIOD), min(DEF_ENDING)
from periods p1
where p1.BEG_PERIOD > (
select max(BEG_PERIOD)
from periods p2
where p2.USER_ID = p1.USER_ID and p2.DEF_ENDING = 'X'
and exists (
select 1
from periods p3
where p3.USER_ID = p2.USER_ID and p3.DEF_ENDING = 'X'
and p3.BEG_PERIOD > p2.BEG_PERIOD
)
)
group by USER_ID;
Try the following using the ROW_NUMBER and `LAG' window functions:
/* this to assign row numbers only for rows where def_ending = 'X' */
with order_def_ending as
(
select *,
case def_ending when 'X' then
row_number() over (partition by user_id order by
case def_ending when 'X' then 1 else 2 end,
end_period desc)
else null end rn,
lag(def_ending, 1, def_ending) over (partition by user_id order by end_period) pde /* previous end_period value */
from yourTbl
),
lag_rn as
(
select *,
lag(rn) over (partition by user_id order by end_period) prn /* previous row_number value */
from order_def_ending
)
select user_id, beg_period, end_period, def_ending
from lag_rn
where (
prn = 2 or /* when there are multiple rows with def_ending = 'X' */
(prn = 1 and rn is null) /* when there is only one row with def_ending = 'X' */
) and pde = 'X' /* ensure that the previous value of def_ending is = 'X' */
order by user_id, end_period
See demo
I think, this works on SQL server 2008
with periods as(
select USER_ID, cast(BEG_PERIOD as date)BEG_PERIOD,cast(END_PERIOD as date)END_PERIOD,DEF_ENDING
from (values
(159,'01-07-2022','31-07-2022','X')
,(159,'25-09-2022','15-10-2022','X')
,(159,'01-11-2022','13-11-2022',null)
,(159,'14-11-2022','21-12-2022','X')
,(159,'01-01-2023','30-01-2023','X')
,(414,'01-04-2022','31-05-2022','X')
,(414,'01-07-2022','30-09-2022',null)
,(414,'01-10-2022','01-12-2022','X')
,(480,'01-07-2022','30-06-2022',null)
,(480,'01-07-2022','30-08-2022','X')
,(480,'02-09-2022','01-11-2022','X')
,(503,'15-03-2022','16-06-2022','X')
,(503,'19-07-2022','23-07-2022',null)
,(503,'24-07-2022','31-10-2022',null)
,(503,'01-11-2022','21-12-2022','X')
)t(USER_ID, BEG_PERIOD, END_PERIOD, DEF_ENDING)
)
,cte as (
select *
,(select sum(case when def_ending='X' then 1 else 0 end)
from periods t2 where t2.user_id=t1.USER_ID and t2.BEG_PERIOD>=t1.BEG_PERIOD
) N -- last but one has N=2, all next N=1 (reverse order of counts)
from periods t1
)
select *
,(select min(t2.BEG_PERIOD)
from cte t2 where t2.user_id=t1.USER_ID and t2.N=1
) LastButOne -- first after last but one with N=1
from cte t1
Result
USER_ID
BEG_PERIOD
END_PERIOD
DEF_ENDING
N
LastButOne
159
2022-07-01
2022-07-31
X
4
2023-01-01
159
2022-09-25
2022-10-15
X
3
2023-01-01
159
2022-11-01
2022-11-13
NULL
2
2023-01-01
159
2022-11-14
2022-12-21
X
2
2023-01-01
159
2023-01-01
2023-01-30
X
1
2023-01-01
414
2022-04-01
2022-05-31
X
2
2022-07-01
414
2022-07-01
2022-09-30
NULL
1
2022-07-01
414
2022-10-01
2022-12-01
X
1
2022-07-01
480
2022-07-01
2022-06-30
NULL
2
2022-09-02
480
2022-07-01
2022-08-30
X
2
2022-09-02
480
2022-09-02
2022-11-01
X
1
2022-09-02
503
2022-03-15
2022-06-16
X
2
2022-07-19
503
2022-07-19
2022-07-23
NULL
1
2022-07-19
503
2022-07-24
2022-10-31
NULL
1
2022-07-19
503
2022-11-01
2022-12-21
X
1
2022-07-19
About Parallel Data Warehouse,
as mentioned here, Non-PDW versions of SQL Server before 2012 do not support the ORDER BY clause with aggregate functions like MIN.
Windowing function support was considerably extended in 2012, compared with the basic implementation available starting with SQL Server 2005. The extensions were made available in Parallel Data Warehouse before being incorporated in the box product.

Snowflake SQL - Count Distinct Users within descending time interval

I want to count the distinct amount of users over the last 60 days, and then, count the distinct amount of users over the last 59 days, and so on and so forth.
Ideally, the output would look like this (TARGET OUTPUT)
Day Distinct Users
60 200
59 200
58 188
57 185
56 180
[...] [...]
where 60 days is the max total possible distinct users, and then 59 would have a little less and so on and so forth.
my query looks like this.
select
count(distinct (case when datediff(day,DATE,current_date) <= 60 then USER_ID end)) as day_60,
count(distinct (case when datediff(day,DATE,current_date) <= 59 then USER_ID end)) as day_59,
count(distinct (case when datediff(day,DATE,current_date) <= 58 then USER_ID end)) as day_58
FROM Table
The issue with my query is that This outputs the data by column instead of by rows (like shown below) AND, most importantly, I have to write out this logic 60x for each of the 60 days.
Current Output:
Day_60 Day_59 Day_58
209 207 207
Is it possible to write the SQL in a way that creates the target as shown initially above?
Using below data in CTE format -
with data_cte(dates,userid) as
(select * from values
('2022-05-01'::date,'UID1'),
('2022-05-01'::date,'UID2'),
('2022-05-02'::date,'UID1'),
('2022-05-02'::date,'UID2'),
('2022-05-03'::date,'UID1'),
('2022-05-03'::date,'UID2'),
('2022-05-03'::date,'UID3'),
('2022-05-04'::date,'UID1'),
('2022-05-04'::date,'UID1'),
('2022-05-04'::date,'UID2'),
('2022-05-04'::date,'UID3'),
('2022-05-04'::date,'UID4'),
('2022-05-05'::date,'UID1'),
('2022-05-06'::date,'UID1'),
('2022-05-07'::date,'UID1'),
('2022-05-07'::date,'UID2'),
('2022-05-08'::date,'UID1')
)
Query to get all dates and count and distinct counts -
select dates,count(userid) cnt, count(distinct userid) cnt_d
from data_cte
group by dates;
DATES
CNT
CNT_D
2022-05-01
2
2
2022-05-02
2
2
2022-05-03
3
3
2022-05-04
5
4
2022-05-05
1
1
2022-05-06
1
1
2022-05-08
1
1
2022-05-07
2
2
Query to get difference of date from current date
select dates,datediff(day,dates,current_date()) ddiff,
count(userid) cnt,
count(distinct userid) cnt_d
from data_cte
group by dates;
DATES
DDIFF
CNT
CNT_D
2022-05-01
45
2
2
2022-05-02
44
2
2
2022-05-03
43
3
3
2022-05-04
42
5
4
2022-05-05
41
1
1
2022-05-06
40
1
1
2022-05-08
38
1
1
2022-05-07
39
2
2
Get records with date difference beyond a certain range only -
include clause having
select datediff(day,dates,current_date()) ddiff,
count(userid) cnt,
count(distinct userid) cnt_d
from data_cte
group by dates
having ddiff<=43;
DDIFF
CNT
CNT_D
43
3
3
42
5
4
41
1
1
39
2
2
38
1
1
40
1
1
If you need to prefix 'day' to each date diff count, you can
add and outer query to previously fetched data-set and add the needed prefix to the date diff column as following -
I am using CTE syntax, but you may use sub-query given you will select from table -
,cte_1 as (
select datediff(day,dates,current_date()) ddiff,
count(userid) cnt,
count(distinct userid) cnt_d
from data_cte
group by dates
having ddiff<=43)
select 'day_'||to_char(ddiff) days,
cnt,
cnt_d
from cte_1;
DAYS
CNT
CNT_D
day_43
3
3
day_42
5
4
day_41
1
1
day_39
2
2
day_38
1
1
day_40
1
1
Updated the answer to get distinct user count for number of days range.
A clause can be included in the final query to limit to number of days needed.
with data_cte(dates,userid) as
(select * from values
('2022-05-01'::date,'UID1'),
('2022-05-01'::date,'UID2'),
('2022-05-02'::date,'UID1'),
('2022-05-02'::date,'UID2'),
('2022-05-03'::date,'UID5'),
('2022-05-03'::date,'UID2'),
('2022-05-03'::date,'UID3'),
('2022-05-04'::date,'UID1'),
('2022-05-04'::date,'UID6'),
('2022-05-04'::date,'UID2'),
('2022-05-04'::date,'UID3'),
('2022-05-04'::date,'UID4'),
('2022-05-05'::date,'UID7'),
('2022-05-06'::date,'UID1'),
('2022-05-07'::date,'UID8'),
('2022-05-07'::date,'UID2'),
('2022-05-08'::date,'UID9')
),cte_1 as
(select datediff(day,dates,current_date()) ddiff,userid
from data_cte), cte_2 as
(select distinct ddiff from cte_1 )
select cte_2.ddiff,
(select count(distinct userid)
from cte_1 where cte_1.ddiff <= cte_2.ddiff) cnt
from cte_2
order by cte_2.ddiff desc
DDIFF
CNT
47
9
46
9
45
9
44
8
43
5
42
4
41
3
40
1
You can do unpivot after getting your current output.
sample one.
select
*
from (
select
209 Day_60,
207 Day_59,
207 Day_58
)unpivot ( cnt for days in (Day_60,Day_59,Day_58));

cumulative count returning more rows than expected

base_table
eom account_id closings checkouts
2018-11-01 1 21 147
2018-12-01 1 20 214
calendar_table
month account_id
2020-11-01 1
2014-04-01 1
Based on two tables, above, I would like to create a month-by-month cumulative closings and checkouts.
The calendar_table contains the months the account id is active. Thus, it is used as the main table (in the from clause).
with
base_table as (
select eom, account_id, closings, checkouts
from base_table bt
where account_id in (3,30,122,152,161,179)
)
,calendar_table as (
select ct.month, c.external_id as account_id
from calendar_table ct
left join customers c
on c.id = ct.organization_id
where account_id in (3,30,122,152,161,179)
)
,cumulative_table as (
select ct."month"
,list.account_id
,coalesce(bt.closings,0) as closings
,coalesce(sum(closings) OVER (PARTITION BY list.account_id
ORDER BY ct."month"
rows BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW),0)
as cum_closings
,coalesce(bt.checkouts,0) as checkouts
,coalesce(sum(checkouts) OVER (PARTITION BY list.account_id
ORDER BY ct."month"
rows BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW),0)
AS cum_checkouts
from calendar_table ct
cross join (select distinct account_id from base_table) list
left join base_table bt
on bt.account_id = list.account_id and bt.eom = ct.month
)
select *
from cumulative_table
The query above returns a cumulative table that contains duplications, probably because of the cross join.
month account_id closings cum_closings checkouts cum_checkouts
01/11/17 1 20 20 282 282
01/11/17 1 20 40 282 564
01/11/17 1 20 60 282 846
01/12/17 1 17 77 346 1192
01/12/17 1 17 94 346 1538
01/12/17 1 17 111 346 1884
I expect the query to return one month per account id.
month account_id closings cum_closings checkouts cum_checkouts
01/11/17 1 20 20 282 282
01/12/17 1 17 37 346 628
You can do more simple :
WITH list AS
( select bt.eom, bt.account_id, bt.closings, bt.checkouts
, ct.month, ct.organization_id
from base_table bt
inner join calendar_table ct
on ct.account_id = bt.account_id
where bt.account_id in (3,30,122,152,161,179)
)
select l.month, c.external_id as account_id
, coalesce(l.closings,0) as closings
, coalesce( sum(l.closings) OVER (PARTITION BY l.account_id
ORDER BY l."month"
rows BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
, 0
) as cum_closings
, coalesce(l.checkouts,0) as checkouts
, coalesce( sum(l.checkouts) OVER (PARTITION BY l.account_id
ORDER BY l."month"
rows BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
, 0
) AS cum_checkouts
from list as l
left join customers c
on c.id = l.organization_id ;

How do I get the time elapsed between flags for every user?

The table below represents user logins (i.e LogAction_INT = 1 is login, LogAction_INT = 0 is logout). What is the best approach to sum the time elapsed between a user's login and logout (session). Ideally I need a total of time spent per user. Everything I can think of includes while loops and it's too complex.
ID User_ID LogDate_DT LogAction_INT
1940 18 2019-04-01 13:15:06.027 1
1941 18 2019-04-01 13:47:39.010 0
1942 18 2019-04-01 15:48:46.453 1
1943 18 2019-04-01 15:54:47.520 0
1944 68 2019-04-02 15:09:20.460 1
1945 68 2019-04-02 15:53:11.223 0
1946 86 2019-04-03 12:48:14.340 1
1947 86 2019-04-03 14:49:55.400 0
1948 80 2019-04-04 08:54:48.157 1
1949 86 2019-04-04 15:26:51.917 1
1950 86 2019-04-04 15:27:53.030 0
1951 86 2019-04-04 15:28:00.920 1
1952 86 2019-04-04 15:28:30.243 0
1953 86 2019-04-04 15:28:35.490 1
1954 86 2019-04-04 15:53:41.700 0
1955 68 2019-04-04 15:54:07.720 1
1956 80 2019-04-04 16:15:55.200 0
I expect to have something like:
User TotalSessionTime
---- -----------------
18 04:45
68 10:02
80 06:12
You can enumerate each of the types and then use conditional aggregation or a join:
select user_id, seqnum,
datediff(second, min(LogDate_DT), max(LogDate_DT)) as diff_seconds
from (select t.*,
row_number() over (partition by user_id, LogAction_INT order by id) as seqnum
from t
) t
group by user_id, seqnum;
You can then sum these by user:
select user_id, sum(diff_seconds)
from (select user_id, seqnum,
datediff(second, min(LogDate_DT), max(LogDate_DT)) as diff_seconds
from (select t.*,
row_number() over (partition by user_id, LogAction_INT order by id) as seqnum
from t
) t
group by user_id, seqnum;
) t
group by user_id;
The issue with this type of problem is that the ins and outs don't usually match up quite so cleanly. That makes this a much harder problem.
In supported versions of SQL Server, I would do this using lag().
if it is always in pair, you can use row_number() to generate a running no and then group every 2 rows as 1
; with cte as
(
select *, grp = (row_number() over (partition by User_ID order by ID) - 1) / 2
from your_table
)
cte2 as
(
select User_ID, elapsed = datediff(second, min(LogDate_DT), max(LogDate_DT))
from cte
group by User_ID, grp
)
select User_ID, sum(elapsed)
from cte2
group by User_ID

SQL: Get date when cumulative sum reaches a mark

I have a table in the following format:
APP_iD| Date | Impressions
113 2015-01-01 10
113 2015-01-02 5
113 2015-01-03 50
113 2015-01-04 35
113 2015-01-05 30
113 2015-01-06 75
Now, I need to know the date when cumulative SUM of those impressions crossed 65/100/150 and so on.
I tried using CASE WHEN statement:
CASE WHEN SUM(impressions) >100
THEN date
but it doesn't sum the data across the column. It just does checks against the individual row.
My final result should look like:
APP_iD | Date_65 | Date_100 | Date_150
113 2015-01-03 2015-01-04 2015-01-06
Does anyone know how to do this?
Is this even possible?
Use sum() over() to get the running sum and check for the required values with a case expression. Finally aggregate the results to get one row per each app_id.
select app_id,max(dt_65),max(dt_100),max(dt_150)
from (
select app_id
,case when sum(impressions) over(partition by app_id order by dt) between 65 and 99 then dt end dt_65
,case when sum(impressions) over(partition by app_id order by dt) between 100 and 149 then dt end dt_100
,case when sum(impressions) over(partition by app_id order by dt) >= 150 then dt end dt_150
from t) x
group by app_id
with c as (
select
app_id, date,
sum(impressions) over (partition by app_id order by date) as c
from t
)
select app_id, s65.date, s100.date, s150.date
from
(
select distinct on (app_id) app_id, date
from c
where c >= 65 and c < 100
order by app_id, date
) s65
left join
(
select distinct on (app_id) app_id, date
from c
where c >= 100 and c <150
order by app_id, date
) s100 using (app_id)
left join
(
select distinct on (app_id) app_id, date
from c
where c >= 150
order by app_id, date
) s150 using (app_id)
;
app_id | date | date | date
--------+------------+------------+------------
113 | 2015-01-03 | 2015-01-04 | 2015-01-06
Without the pivot:
select distinct on (app_id, break) app_id, break, date
from (
select *,
case
when c < 100 then 65
when c < 150 then 100
else 150
end as break
from (
select
app_id, date,
sum(impressions) over (partition by app_id order by date) as c
from t
) t
where c >= 65
) t
order by app_id, break, date
;
app_id | break | date
--------+-------+------------
113 | 65 | 2015-01-03
113 | 100 | 2015-01-04
113 | 150 | 2015-01-06
You can try this for desired result.
with t as (select app_id, date, sum(Impressions)
over (partition by app_id order by date) AS s from tbl)
select app_id,
min(date_65) AS date_65 ,
min(date_100) AS date_100,
min(date_150) AS date_150
-- more columns to observe other sum of Impressions
from
(select app_id,
CASE WHEN (s >= 65 and s < 100) THEN date END AS date_65,
CASE WHEN (s >= 100 and s < 150) THEN date END AS date_100,
CASE WHEN (s >= 150 ) THEN date END AS date_150
-- more cases to observe other sum of Impressions
from t) q
group by q.app_id
if you want to observe more sum of Impressions, just add more conditions