Time series group by day and kind - sql

I create a table using the command below:
CREATE TABLE IF NOT EXISTS stats (
id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
session_kind INTEGER NOT NULL,
ts TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
)
I insert some time series data using the command below:
INSERT INTO stats (session_kind) values (?1)
Some time after having executed several times the insert command, I have some time series data as below:
id session_kind ts
-----------------------------------------
1 0 2020-04-18 12:59:51 // day 1
2 1 2020-04-19 12:59:52 // day 2
3 0 2020-04-19 12:59:53
4 1 2020-04-19 12:59:54
5 0 2020-04-19 12:59:55
6 2 2020-04-19 12:59:56
7 2 2020-04-19 12:59:57
8 2 2020-04-19 12:59:58
9 2 2020-04-19 12:59:59
10 0 2020-04-20 12:59:51 // day 3
11 1 2020-04-20 12:59:52
12 0 2020-04-20 12:59:53
13 1 2020-04-20 12:59:54
14 0 2020-04-20 12:59:55
15 2 2020-04-20 12:59:56
16 2 2020-04-20 12:59:57
17 2 2020-04-20 12:59:58
18 2 2020-04-21 12:59:59 // day 4
What I would like to have a command that groups my data by date from the most recent day to the least and the number of each session_kind like below (I don't want to give any parameter to this command):
0 1 2 ts
-------------------------
0 0 1 2020-04-21 // day 4
3 2 3 2020-04-20 // day 3
2 2 4 2020-04-19 // day 2
1 0 0 2020-04-18 // day 1
How can I group my data as above?

You can do conditional aggregation:
select
sum(session_kind= 0) session_kind_0,
sum(session_kind= 1) session_kind_1,
sum(session_kind= 2) session_kind_2,
date(ts) ts_day
from mytable
group by date(ts)
order by ts_day desc
If you want something dynamic, then it might be simpler to put the results in rows rather than columns:
select date(ts) ts_day, session_kind, count(*) cnt
from mytable
group by date(ts), session_kind
order by ts_day desc, session_kind

If I understand correctly, you just want to sum the values:
select date(timestamp),
sum(case when session_kind = 1 then 1 else 0 end) as cnt_1,
sum(case when session_kind = 2 then 1 else 0 end) as cnt_2,
sum(case when session_kind = 3 then 1 else 0 end) as cnt_3
from t
group by date(timestamp);
You can also simplify this:
select date(timestamp),
sum( session_kind = 1 ) as cnt_1,
sum( session_kind = 2 ) as cnt_2,
sum( session_kind = 3 ) as cnt_3
from t
group by date(timestamp);

Related

Add a counting condition into dense_rank window Function SQL

I have a function that counts how many times you've visited and if you have converted or not.
What I'd like is for the dense_rank to re-start the count, if there has been a conversion:
SELECT
uid,
channel,
time,
conversion,
dense_rank() OVER (PARTITION BY uid ORDER BY time asc) as visit_order
FROM table
current table output:
this customer (uid) had a conversion at visit 18 and now I would want the visit_order count from dense_rank to restart at 0 for the same customer until it hits the next conversion that is non-null.
See this (I do not like "try this" 😉):
SELECT
id,
ts,
conversion,
-- SC,
ROW_NUMBER() OVER (PARTITION BY id,SC) R
FROM (
SELECT
id,
ts,
conversion,
-- COUNT(conversion) OVER (PARTITION BY id, conversion=0 ORDER BY ts ) CC,
SUM(CASE WHEN conversion=1 THEN 1000 ELSE 1 END) OVER (PARTITION BY id ORDER BY ts ) - SUM(CASE WHEN conversion=1 THEN 1000 ELSE 1 END) OVER (PARTITION BY id ORDER BY ts )%1000 SC
FROM sample
ORDER BY ts
) x
ORDER BY ts;
DBFIDDLE
output:
id
ts
conversion
R
1
2022-01-15 10:00:00
0
1
1
2022-01-16 10:00:00
0
2
1
2022-01-17 10:00:00
0
3
1
2022-01-18 10:00:00
1
1
1
2022-01-19 10:00:00
0
2
1
2022-01-20 10:00:00
0
3
1
2022-01-21 10:00:00
0
4
1
2022-01-22 10:00:00
0
5
1
2022-01-23 10:00:00
0
6
1
2022-01-24 10:00:00
0
7
1
2022-01-25 10:00:00
1
1
1
2022-01-26 10:00:00
0
2
1
2022-01-27 10:00:00
0
3

SQL status changes with start and end dates

This is a table of user statuses over the period of 9/1/2021 to 9/10/2021. 1 means "active." 0 means "canceled."
date
user
status
9/1/2021
1
1
9/1/2021
2
0
9/1/2021
3
1
9/2/2021
1
1
9/2/2021
2
1
9/2/2021
3
1
9/3/2021
1
0
9/3/2021
2
1
9/3/2021
3
1
9/4/2021
1
0
9/4/2021
2
1
9/4/2021
3
1
9/5/2021
1
0
9/5/2021
2
1
9/5/2021
3
0
9/6/2021
1
1
9/6/2021
2
1
9/6/2021
3
0
9/7/2021
1
1
9/7/2021
2
1
9/7/2021
3
0
9/8/2021
1
0
9/8/2021
2
1
9/8/2021
3
1
9/9/2021
1
0
9/9/2021
2
1
9/9/2021
3
1
9/10/2021
1
1
9/10/2021
2
0
9/10/2021
3
1
I want to get the start and end date for each user's active and canceled periods during this time. I know this involves a window function, but I can't quite figure out how to do it. This is my desired output:
user
status
start date
end date
1
1
9/1/2021
9/2/2021
1
0
9/3/2021
9/5/2021
1
1
9/6/2021
9/7/2021
1
0
9/8/2021
9/9/2021
1
1
9/10/2021
9/10/2021
2
0
9/1/2021
9/1/2021
2
1
9/2/2021
9/9/2021
2
0
9/10/2021
9/10/2021
3
1
9/1/2021
9/4/2021
3
0
9/5/2021
9/7/2021
3
1
9/8/2021
9/10/2021
Updated
Here is an example:fiddle
Updated query,
;with cte as (
SELECT *,Rank() OVER ( partition by usr,status order by dt )as rnk
,LAG(dt,1) OVER (partition by usr order by dt desc) as LAG
,Row_number() over (partition by usr order by dt asc) as rnum
,count(*) over (partition by usr,status) as cnt
FROM TABLE1
)
Select usr,status,dt as start_date,LAG as End_date from cte
I was able to figure it out.
The key components are filtering for when the current status does not equal the previous status. This indicates a date when the status of the user changes.
When you filter for these rows, you can just use the LEAD() window function and subtract 1 day to get the end date for that status.
with win as
(
select
usr
, dt
, lag(status) over (partition by usr order by dt) as prev_status
, status
from subs
)
select
usr
, status
, dt as start_date
, coalesce(lead(dt) over (partition by usr order by dt) - interval '1 day', (select max(dt) from win)) as end_date
from win
where
status <> prev_status
or prev_status is null

SQL cumulative sum until a flag value and resetting the sum

I'm still learning SQL and I'm trying to figure out a problem that I wasn't able to solve. So my problem is that I'm trying to select a table(let say Expense), ordered by date and in the table I have a column named Charged and I want to add charges to be cumulative(This part I figured out). However after that I have another column that will be acting as a flag called PayOut. When the PayOut value is 1 I want the summation of Charged(SumValue) to reset to zero. How would I do this? Here is what I have tried and the current output I get and what output I want. Note: I saw some posts using CTE's but wasn't the same scenario and more complex.
select ex.date,
ex.Charged,
(case when(ex.PayOut=1) then 0
else sum(ex.Charged) over (order by ex.date)end) as SumValue,
ex.PayOut
from Expense ex
order by ex.date asc
The data looks like this
Date Charged PayOut
01/10/2018 10 0
01/20/2018 5 0
01/30/2018 3 0
02/01/2018 0 1
02/11/2018 12 0
02/21/2018 15 0
Output I get
Date Charged PayOut SumValue
01/10/2018 10 0 10
01/20/2018 5 0 15
01/30/2018 3 0 18
02/01/2018 0 1 0
02/11/2018 12 0 30
02/21/2018 15 0 45
Output Wanted
Date Charged PayOut SumValue
01/10/2018 10 0 10
01/20/2018 5 0 15
01/30/2018 3 0 18
02/01/2018 0 1 0
02/11/2018 12 0 12
02/21/2018 15 0 27
Just create group from your PayOut Column and use it as a partition in OVER
WITH Expense AS (
SELECT CAST('01/10/2018' AS DATE) AS Date, 10 AS Charged, 0 AS PayOut
UNION ALL SELECT CAST('01/20/2018' AS DATE), 5, 0
UNION ALL SELECT CAST('01/30/2018' AS DATE), 3, 0
UNION ALL SELECT CAST('02/01/2018' AS DATE), 0, 1
UNION ALL SELECT CAST('02/11/2018' AS DATE), 12, 0
UNION ALL SELECT CAST('02/21/2018' AS DATE), 15, 0
)
SELECT
dat.date
,dat.Charged
,dat.PayOut
,dat.PayOutGroup
,SUM(dat.Charged) OVER (PARTITION BY dat.PayOutGroup ORDER BY dat.date) as SumValue
FROM (
SELECT
e.date
,e.Charged
,e.PayOut
,SUM(e.PayOut) OVER (ORDER BY e.date) AS PayOutGroup
FROM Expense e
) dat

SQLHow do I modify this query to select unique by hour

(Looking for a better title)
Hello I have the query below
Declare #CDT varchar(23)
Declare #CDT2 varchar(23)
set #cdt = '2016-01-18 00:00:00.000'
set #cdt2 = '2016-01-26 00:00:00.000'
SELECT
spt.number AS [Hour of Day],
(SELECT COUNT(DISTINCT AgentId)
FROM history t2
WHERE DATEPART(HOUR, t2.calldatetime)=spt.number
AND projectid IN (5) and calldatetime between #cdt and #cdt2) AS [Project 5 ],
(SELECT COUNT(DISTINCT AgentId)
FROM history t2
WHERE DATEPART(HOUR, t2.calldatetime)=spt.number
AND projectid IN (124) and calldatetime between #cdt and #cdt2) AS [Project 124],
(SELECT COUNT(DISTINCT AgentId)
FROM history t2
WHERE DATEPART(HOUR, t2.calldatetime)=spt.number
AND projectid IN (576) and calldatetime between #cdt and #cdt2) AS [Project 576]
FROM master..spt_values spt
WHERE spt.number BETWEEN 0 AND 11 AND spt.type = 'p'
GROUP BY spt.number
ORDER BY spt.number
I now need to select a unique number per hour rather than a distinct ammount overall.
for instance if I run this with the "select distinct(Agentid), rest of query here, it will give me a count of agentids, independant of the cases, how do I "WHEN AGENTID is unique"?
I copied examples from the original question
Project id Datetime Agentid
---------- ----------------------- ---------
5 11-23-2015 09:00:00.000 12
5 11-23-2015 10:00:00.000 12
6 11-23-2015 11:00:00.000 12
1 11-23-2015 12:00:00.000 3
3 11-23-2015 13:00:00.000 4
124 11-23-2015 14:00:00.000 7
124 11-23-2015 15:00:00.000 9
124 11-23-2015 16:00:00.000 10
576 11-23-2015 17:00:00.000 10
576 11-23-2015 18:00:00.000 44
576 11-23-2015 19:00:00.000 69
etc 11-23-2015 20:00:00.000 23
Expected output (Ignore the incorrect counts, assume they are correct from above^):
Datetime 5 124 576
------------- --- --- ---
09:00 - 09:59 0 4 5
10:00 - 10:59 4 3 1
11:00 - 11:59 5 2 1
12:00 - 12:59 1 1 1
13:00 - 13:59 6 1 1
14:00 - 14:59 6 1 1
15:00 - 15:59 7 1 2
16:00 - 16:59 8 1 3
17:00 - 17:59 9 1 3
18:00 - 18:59 1 1 2
19:00 - 19:59 12 1 0
20:00 - 20:59 0 0 0
so far
Hour of Day Project 5 Project 124 Project 576
0 0 0 0
1 0 0 0
2 0 0 0
3 0 0 0
4 0 0 0
5 0 0 0
6 0 0 0
7 0 0 0
8 0 0 0
9 0 0 0
10 0 0 0
11 0 0 0
I'm pretty sure you need to do this with subqueries:
SELECT
spt.number AS [Hour of Day],
(SELECT COUNT(DISTINCT AgentId)
FROM YourTable t2
WHERE DATEPART(HOUR, t2.yourdatetime)=spt.number
AND projectId IN (5)) AS [Project 5 ],
(SELECT COUNT(DISTINCT AgentId)
FROM YourTable t2
WHERE DATEPART(HOUR, t2.yourdatetime)=spt.number
AND projectId IN (124)) AS [Project 124],
(SELECT COUNT(DISTINCT AgentId)
FROM YourTable t2
WHERE DATEPART(HOUR, t2.yourdatetime)=spt.number
AND projectId IN (576)) AS [Project 576]
FROM master..spt_values spt
WHERE spt.number BETWEEN 0 AND 11 AND spt.type = 'p'
GROUP BY spt.number
ORDER BY spt.number
Here is the table used by these queries:
DECLARE #wt TABLE (
projectid varchar(4) not null,
edate datetime not null,
agentid int not null );
If you want to get the counts by time and project, use this query:
SELECT edate, projectid, COUNT(*) as nentries
FROM #wt
GROUP BY edate, projectid;
I haven't dealt with bucketing the dates by hour; that is a separate issue.
To get a tabular result set as you have shown:
SELECT edate, [5] AS [Project 5], [124] AS [Project 124], [576] AS [Project 576]
FROM (
SELECT edate, CAST(projectid AS int) AS projectid
FROM #wt
WHERE ISNUMERIC(projectid) <> 0 ) AS s
PIVOT (
COUNT(projectid)
FOR projectid IN ([5], [124], [576])) AS p;
Here is the result set for the PIVOT query using the above data:
However, you have to specify the projects of interest in the query. If you want to have an arbitrary number of projects and get columns for each one, that is going to require dynamic SQL to construct the PIVOT query.
#Tab Alleman: I added some data to illustrate the conditions that will test your scenario. Here is the result set with the same PIVOT query:

Counting number of rows grouped by date and hour

I am tracking customer store entry data in Microsoft SQL Server 2008 R2 that looks something like this:
DoorID DateTimeStamp EntryType
1 2013-09-02 09:01:16.000 IN
1 2013-09-02 09:04:09.000 IN
1 2013-09-02 10:19:29.000 IN
1 2013-09-02 10:19:30.000 IN
1 2013-09-02 10:19:32.000 OUT
1 2013-09-02 10:26:36.000 IN
1 2013-09-02 10:26:40.000 OUT
I don't want to count the OUT rows, just IN.
I believe that it needs to be grouped on Date, and DoorID, then get the hours totals.
I would like it to come out like this.
Date DoorID HourOfDay TotalInPersons
2013-09-02 1 0 0
2013-09-02 1 1 0
2013-09-02 1 2 0
2013-09-02 1 3 0
2013-09-02 1 4 0
2013-09-02 1 5 0
2013-09-02 1 6 0
2013-09-02 1 7 0
2013-09-02 1 8 0
2013-09-02 1 9 2
2013-09-02 1 10 3
2013-09-02 1 11 0
2013-09-02 1 12 0
2013-09-02 1 13 0
2013-09-02 1 14 0
2013-09-02 1 15 0
2013-09-02 1 16 0
2013-09-02 1 17 0
2013-09-02 1 18 0
2013-09-02 1 19 0
2013-09-02 1 20 0
2013-09-02 1 21 0
2013-09-02 1 22 0
2013-09-02 1 23 0
SELECT
[Date] = CONVERT(DATE, DateTimeStamp),
DoorID,
HourOfDay = DATEPART(HOUR, DateTimeStamp),
TotalInPersons = COUNT(*)
FROM dbo.tablename
WHERE EntryType = 'IN'
GROUP BY
CONVERT(DATE, DateTimeStamp),
DoorID,
DATEPART(HOUR, DateTimeStamp)
ORDER BY
[Date], DoorID, HourOfDay;
Of course if you need all hours, even where no rows are represented, here is one solution (which limits the output for any day only to the doors that have at least one IN entry on that day):
;WITH h AS
(
SELECT TOP (24) h = number FROM Master..spt_values
WHERE type = N'P' ORDER BY number
),
doors AS
(
SELECT DISTINCT DoorID, [Date] = CONVERT(DATE,DateTimeStamp)
FROM dbo.tablename WHERE EntryType = 'IN'
)
SELECT
d.[Date],
d.DoorID,
HourOfDay = h.h,
TotalInPersons = COUNT(t.EntryType)
FROM doors AS d CROSS JOIN h
LEFT OUTER JOIN dbo.tablename AS t
ON CONVERT(DATE, t.DateTimeStamp) = d.[Date]
AND t.DoorID = d.DoorID
AND DATEPART(HOUR, t.DateTimeStamp) = h.h
AND t.EntryType = 'IN'
GROUP BY d.[Date], d.DoorID, h.h
ORDER BY d.[Date], d.DoorID, h.h;
How about something like this:
SELECT
CAST(DateTimeStamp AS DATE) AS Date
,DoorID
,DATEPART(HOUR, DateTimeStamp) AS HourOfDay
,COUNT(*) AS TotalInPersons
FROM StoreTable
WHERE EntryType = 'IN'
GROUP BY
CAST(DateTimeStamp AS DATE)
,DoorID
,DATEPART(HOUR, DateTimeStamp)
This should work. I guessed on how you would pull DoorID and TotalPersons, but the overall logic is correct
SELECT CONVERT(date,dateColumn) AS Date,
datepart(hh,dateColumn) AS HourOfDay,
DoorID,
COUNT(people) AS TotalPersons
FROM yourtable
WHERE EntryType = 'IN'
GROUP BY CONVERT(date,dateColumn), datepart(hh,dateColumn), DoorID
ORDER BY CONVERT(date,dateColumn), datepart(hh,dateColumn)