How do I select the most frequent value for a specific month and display this value as well as the amount of times it occurs? - sql

I am struggling with a TSQL query and I'm all out of googling, so naturally I figured I might as well ask on SO.
Please keep in mind that I just began trying to learn SQL a few weeks back and I'm not really sure what rules there are and how you can and can not write your queries / sub-queries.
This is what I have so far:
Edit: Updated with DDL that should help create an example, also commented out unnecessary "Client"-column.
CREATE TABLE NumberTable
(
Number varchar(20),
Date date
);
INSERT INTO NumberTable (Number, Date)
VALUES
('55512345', '2015-01-01'),
('55512345', '2015-01-01'),
('55512345', '2015-01-01'),
('55545678', '2015-01-01'),
('55512345', '2015-02-01'),
('55523456', '2015-02-01'),
('55523456', '2015-02-01'),
('55534567', '2015-03-01'),
('55534567', '2015-03-01'),
('55534567', '2015-03-01'),
('55534567', '2015-03-01'),
('55545678', '2015-03-01'),
('55545678', '2015-04-01')
DECLARE
--#ClientNr AS int,
#FromDate AS date,
#ToDate AS date
--SET #ClientNr = 11111
SET #FromDate = '2015-01-01'
SET #ToDate = DATEADD(yy, 1, #FromDate)
SELECT
YEAR(Date) AS [Year],
MONTH(Date) AS [Month],
COUNT(Number) AS [Total Count]
FROM
NumberTable
WHERE
--Client = #ClientNr
Date BETWEEN #FromDate AND #ToDate
AND Number IS NOT NULL
AND Number NOT IN ('888', '144')
GROUP BY MONTH(Date), YEAR(Date)
ORDER BY [Year], [Month]
With this I am getting the Year, Month and Total Count.
I'm happy with only getting the top 1 most called number and count each month, but showing top 5 is preferable.
Heres an example of how I would like the table to look in the end (having the months formatted as JAN, FEB etc instead of numbers is not really important, but would be a nice bonus):
╔══════╦═══════╦═════════════╦═══════════╦══════════╦═══════════╦══════════╗
║ Year ║ Month ║ Total Count ║ #1 Called ║ #1 Count ║ #2 Called ║ #2 Count ║
╠══════╬═══════╬═════════════╬═══════════╬══════════╬═══════════╬══════════╣
║ 2016 ║ JAN ║ 80431 ║ 555-12345 ║ 45442 ║ 555-94564 ║ 17866 ║
╚══════╩═══════╩═════════════╩═══════════╩══════════╩═══════════╩══════════╝
I was told this was "easily" done with a sub-query, but I'm not so sure...

Interesting one this, I believe you can do it with a CTE and PIVOT but this is off the top of my head... This may not work verbatim
WITH Rollup_CTE
AS
(
SELECT Client,MONTH(Date) as Month, YEAR(Date) as Year, Number, Count(0) as Calls, ROW_NUMBER() OVER (PARTITION BY Client,MONTH(Date) as SqNo, YEAR(Date), Number ORDER BY COUNT(0) DESC)
from NumberTable
WHERE Number IS NOT NULL AND Number NOT IN ('888', '144')
GROUP BY Client,MONTH(Date), YEAR(Date), Number
)
SELECT * FROM Rollup_CTE Where SqNo <=5
You may then be able to pivot the data as you wish using PIVOT

artm's query corrected (PARTITION) and the last step (pivoting) simplified.
with data AS
(select '2016-01-01' as called, '111' as number
union all select '2016-01-01', '111'
union all select '2016-01-01', '111'
union all select '2016-01-01', '222'
union all select '2016-01-01', '222'
union all select '2016-01-05', '111'
union all select '2016-01-05', '222'
union all select '2016-01-05', '222')
, ordered AS (
select called
, number
, count(*) cnt
, ROW_NUMBER() OVER (PARTITION BY called ORDER BY COUNT(*) DESC) rnk
from data
group by called, number)
select called, total = sum(cnt)
, n1= max(case rnk when 1 then number end)
, cnt1=max(case rnk when 1 then cnt end)
, n2= max(case rnk when 2 then number end)
, cnt2=max(case rnk when 2 then cnt end)
from ordered
group by called
EDIT Using setup provided by OP
WITH ordered AS(
-- compute order
SELECT
[Year] = YEAR(Date)
, [Month] = MONTH(Date)
, number
, COUNT(*) cnt
, ROW_NUMBER() OVER (PARTITION BY YEAR(Date), MONTH(Date) ORDER BY COUNT(*) DESC) rnk
FROM NumberTable
WHERE Date BETWEEN #FromDate AND #ToDate
AND Number IS NOT NULL
AND Number NOT IN ('888', '144')
GROUP BY YEAR(Date), MONTH(Date), number
)
-- pivot by order
SELECT [Year], [Month]
, total = sum(cnt)
, n1 = MAX(case rnk when 1 then number end)
, cnt1 = MAX(case rnk when 1 then cnt end)
, n2 = MAX(case rnk when 2 then number end)
, cnt2 = MAX(case rnk when 2 then cnt end)
-- n3, cnt3, ....
FROM ordered
GROUP BY [Year], [Month];

This query help you:
IF OBJECT_ID('tempdb..#Test','U') IS NOT NULL DROP TABLE #Test;
CREATE TABLE #Test(Number INT NOT NULL)
INSERT INTO #Test(Number)
VALUES(1),(2),(3),(1)
SELECT TOP 1 WITH TIES
Number
FROM (
SELECT DISTINCT
Number
, COUNT(*) OVER(PARTITION BY Number) AS cnt
FROM #Test) AS T
ORDER BY cnt DESC
I have used TOP 1 WITH TIES for case when max count exists for several values.

Try this, doesn't have to be CTE but I used it to populate data, you can extend it to include 3rd, 4th etc.
;with data AS
(select '2016-01-01' as called, '111' as number
union all select '2016-01-01', '111'
union all select '2016-01-01', '111'
union all select '2016-01-01', '222'
union all select '2016-01-01', '222')
, ordered AS (
select called
, number
, count(*) cnt
, ROW_NUMBER() OVER (ORDER BY COUNT(*) DESC) rnk
from data
group by called, number)
SELECT distinct *
FROM (SELECT DATENAME(month, called) mnth FROM ordered) AS mnth,
(SELECT number MostCalledNumber FROM ordered WHERE rnk = 1) AS MostCalledNumber,
(SELECT cnt MostCalledTimes FROM ordered WHERE rnk = 1) AS MostCalledTimes,
(SELECT number SecondMostCalledNumber FROM ordered WHERE rnk = 2) AS SecondMostCalledNumber,
(SELECT cnt SecondMostCalledTimes FROM ordered WHERE rnk = 2) AS SecondMostCalledTimes

Related

SQL - Return count of consecutive days where value was unchanged

I have a table like
date
ticker
Action
'2022-03-01'
AAPL
BUY
'2022-03-02'
AAPL
SELL.
'2022-03-03'
AAPL
BUY.
'2022-03-01'
CMG
SELL.
'2022-03-02'
CMG
HOLD.
'2022-03-03'
CMG
HOLD.
'2022-03-01'
GPS
SELL.
'2022-03-02'
GPS
SELL.
'2022-03-03'
GPS
SELL.
I want to do a group by ticker then count all the times that Actions have sequentially been the value that they are as of the last date, here it's 2022-03-03. ie for this example table it'd be like;
ticker
NumSequentialDaysAction
AAPL
0
CMG
1
GPS
2
Fine to pass in 2022-03-03 as a value, don't need to figure that out on the fly.
Tried something like this
---Table Creation---
CREATE TABLE UserTable
([Date] DATETIME2, [Ticker] varchar(5), [Action] varchar(5))
;
INSERT INTO UserTable
([Date], [Ticker], [Action])
VALUES
('2022-03-01' , 'AAPL' , 'BUY'),
('2022-03-02' , 'AAPL' , 'SELL'),
('2022-03-03' , 'AAPL' , 'BUY'),
('2022-03-01' , 'CMG' , 'SELL'),
('2022-03-02' , 'CMG' , 'HOLD'),
('2022-03-03' , 'CMG' , 'HOLD'),
('2022-03-01' , 'GPS' , 'SELL'),
('2022-03-02' , 'GPS' , 'SELL'),
('2022-03-03' , 'GPS' , 'SELL')
;
---Attempted Solution---
I'm thinking that I need to do a sub query to get the last value and join on itself to get the matching values. Then apply a window function, ordered by date to see that the proceeding value is sequential.
WITH CTE AS (SELECT Date, Ticker, Action,
ROW_NUMBER() OVER (PARTITION BY Ticker, Action ORDER BY Date) as row_num
FROM UserTable)
SELECT Ticker, COUNT(DISTINCT Date) as count_of_days
FROM CTE
WHERE row_num = 1
GROUP BY Ticker;
WITH CTE AS (SELECT Date, Ticker, Action,
DENSE_RANK() OVER (PARTITION BY Ticker ORDER BY Action,Date) as rank
FROM table)
SELECT Ticker, COUNT(DISTINCT Date) as count_of_days
FROM CTE
WHERE rank = 1
GROUP BY Ticker;
You can do this with the help of the LEAD function like so. You didn't specify which RDBMS you're using. This solution works in PostgreSQL:
WITH "withSequential" AS (
SELECT
ticker,
(LEAD("Action") OVER (PARTITION BY ticker ORDER BY date ASC) = "Action") AS "nextDayIsSameAction"
FROM UserTable
)
SELECT
ticker,
SUM(
CASE
WHEN "nextDayIsSameAction" IS TRUE THEN 1
ELSE 0
END
) AS "NumSequentialDaysAction"
FROM "withSequential"
GROUP BY ticker
Here is a way to do this using gaps and islands solution.
Thanks for sharing the create and insert scripts, which helps to build the solution quickly.
dbfiddle link.
https://dbfiddle.uk/rZLDTrNR
with data
as (
select date
,ticker
,action
,case when lag(action) over(partition by ticker order by date) <> action then
1
else 0
end as marker
from usertable
)
,interim_data
as (
select *
,sum(marker) over(partition by ticker order by date) as grp_val
from data
)
,interim_data2
as (
select *
,count(*) over(partition by ticker,grp_val) as NumSequentialDaysAction
from interim_data
)
select ticker,NumSequentialDaysAction
from interim_data2
where date='2022-03-03'
Another option, you could use the difference between two row_numbers approach as the following:
select [Ticker], count(*)-1 NumSequentialDaysAction -- you could use (distinct) to remove duplicate rows
from
(
select *,
row_number() over (partition by [Ticker] order by [Date]) -
row_number() over (partition by [Ticker], [Action] order by [Date]) grp
from UserTable
where [date] <= '2022-03-03'
) RN_Groups
/* get only rows where [Action] = last date [Action] */
where [Action] = (select top 1 [Action] from UserTable T
where T.[Ticker] = RN_Groups.[Ticker] and [date] <= '2022-03-03'
order by [Date] desc)
group by [Ticker], [Action], grp
See demo

SQL group by - How to get new records that did not exist in any previous month

I have the following data.
DECLARE #Table TABLE(
RequestID varchar(100)
, PersonID varchar(100)
, StartDate DATE
, EndDate DATE
, StatusType VARCHAR(150)
)
INSERT INTO #Table
SELECT '445566', 'Person2', '9/13/2022', '9/16/2022', 'TransactionStarted'
UNION ALL
SELECT '445566', 'Person2', '9/13/2022', '9/16/2022', 'TransactionEnded'
UNION ALL
SELECT '112233', 'Person2', '8/13/2022', '8/16/2022', 'TransactionStarted'
UNION ALL
SELECT '112233', 'Person2', '8/13/2022', '8/16/2022', 'TransactionEnded'
UNION ALL
SELECT '556677', 'Person1', '8/10/2022', '8/12/2022', 'TransactionStarted'
UNION ALL
SELECT '556677', 'Person1', '8/10/2022', '8/12/2022', 'TransactionEnded'
I get the data out like this, and it works well
SELECT COUNT(Distinct RequestID) TotalCountPerPerson, DATENAME(mm, StartDate) [SRMonthName], MONTH(StartDate) [SRMonthNumber], YEAR(StartDate) [SRYear]
FROM #Table
GROUP BY MONTH(StartDate), DATENAME(mm,StartDate), YEAR(StartDate)
ORDER BY YEAR(StartDate), MONTH(StartDate)
But, I need an additional point of data that I am having trouble figuring out how to get. In the sample data, You can see that "Person 2" and "Person1" were both "new" to the system in August. I need a way to add them to the query so that I can get Total Count and New Count per month and year. Anyone help with the query?
SELECT 2 TotalCountPerPerson, 2 NewThisMonth, 'August'[SRMonthName], 8 [SRMonthNumber], 2022 [SRYear]
UNION ALL
SELECT 1, 0 NewThisMonth,'September', 9, 2022
You may use a self left join as the following:
SELECT COUNT(Distinct T.PersonID) TotalCountPerPerson,
COUNT(Distinct CASE WHEN D.PersonID IS NULL THEN T.PersonID END) NewThisMonth,
DATENAME(mm, T.StartDate) [SRMonthName],
MONTH(T.StartDate) [SRMonthNumber], YEAR(T.StartDate) [SRYear]
FROM #Table T LEFT JOIN #Table D
ON T.PersonID = D.personID AND T.StartDate > D.StartDate
GROUP BY MONTH(T.StartDate), DATENAME(mm,T.StartDate), YEAR(T.StartDate)
ORDER BY YEAR(T.StartDate), MONTH(T.StartDate)
Also, you may use EXISTS operator as the following:
WITH CTE AS
(
SELECT *, CASE WHEN NOT EXISTS
(SELECT 1 FROM #Table D WHERE D.PersonID = T.PersonID AND D.StartDate<T.StartDate)
THEN T.PersonID
END AS ISNEW
FROM #Table T
)
SELECT COUNT(Distinct PersonID) TotalCountPerPerson,
COUNT(Distinct ISNEW) NewThisMonth,
DATENAME(mm, StartDate) [SRMonthName],
MONTH(StartDate) [SRMonthNumber], YEAR(StartDate) [SRYear]
FROM CTE
GROUP BY MONTH(StartDate), DATENAME(mm,StartDate), YEAR(StartDate)
ORDER BY YEAR(StartDate), MONTH(StartDate);
See a demo.

How to get the validity date range of a price from individual daily prices in SQL

I have some prices for the month of January.
Date,Price
1,100
2,100
3,115
4,120
5,120
6,100
7,100
8,120
9,120
10,120
Now, the o/p I need is a non-overlapping date range for each price.
price,from,To
100,1,2
115,3,3
120,4,5
100,6,7
120,8,10
I need to do this using SQL only.
For now, if I simply group by and take min and max dates, I get the below, which is an overlapping range:
price,from,to
100,1,7
115,3,3
120,4,10
This is a gaps-and-islands problem. The simplest solution is the difference of row numbers:
select price, min(date), max(date)
from (select t.*,
row_number() over (order by date) as seqnum,
row_number() over (partition by price, order by date) as seqnum2
from t
) t
group by price, (seqnum - seqnum2)
order by min(date);
Why this works is a little hard to explain. But if you look at the results of the subquery, you will see how the adjacent rows are identified by the difference in the two values.
SELECT Lag.price,Lag.[date] AS [From], MIN(Lead.[date]-Lag.[date])+Lag.[date] AS [to]
FROM
(
SELECT [date],[Price]
FROM
(
SELECT [date],[Price],LAG(Price) OVER (ORDER BY DATE,Price) AS LagID FROM #table1 A
)B
WHERE CASE WHEN Price <> ISNULL(LagID,1) THEN 1 ELSE 0 END = 1
)Lag
JOIN
(
SELECT [date],[Price]
FROM
(
SELECT [date],Price,LEAD(Price) OVER (ORDER BY DATE,Price) AS LeadID FROM [#table1] A
)B
WHERE CASE WHEN Price <> ISNULL(LeadID,1) THEN 1 ELSE 0 END = 1
)Lead
ON Lag.[Price] = Lead.[Price]
WHERE Lead.[date]-Lag.[date] >= 0
GROUP BY Lag.[date],Lag.[price]
ORDER BY Lag.[date]
Another method using ROWS UNBOUNDED PRECEDING
SELECT price, MIN([date]) AS [from], [end_date] AS [To]
FROM
(
SELECT *, MIN([abc]) OVER (ORDER BY DATE DESC ROWS UNBOUNDED PRECEDING ) end_date
FROM
(
SELECT *, CASE WHEN price = next_price THEN NULL ELSE DATE END AS abc
FROM
(
SELECT a.* , b.[date] AS next_date, b.price AS next_price
FROM #table1 a
LEFT JOIN #table1 b
ON a.[date] = b.[date]-1
)AA
)BB
)CC
GROUP BY price, end_date

How to find latest status of the day in SQL Server

I have a SQL Server question that I'm trying to figure out at work:
There is a table with a status field which can contain a status called "Participate." I am only trying to find records if the latest status of the day is "Participate" and only if the status changed on the same day from another status to "Participate."
I don't want any records where the status was already "Participate." It must have changed to that status on the same day. You can tell when the status was changed by the datetime field ChangedOn.
In the sample below I would only want to bring back ID 1880 since the status of "Participated" has the latest timestamp. I would not bring back ID 1700 since the last record is "Other," and I would not bring back ID 1600 since "Participated" is the only status of that day.
ChangedOn Status ID
02/01/17 15:23 Terminated 1880
02/01/17 17:24 Participated 1880
02/01/17 09:00 Other 1880
01/31/17 01:00 Terminated 1700
01/31/17 02:00 Participated 1700
01/31/17 03:00 Other 1700
01/31/17 02:00 Participated 1600
I was thinking of using a Window function, but I'm not sure how to get started on this. It's been a few months since I've written a query like this so I'm a bit out of practice.
Thanks!
You can use window functions for this:
select t.*
from (select t.*,
row_number() over (partition by cast(ChangedOn as date)
order by ChangedOn desc
) as seqnum,
sum(case when status <> 'Participate' then 1 else 0 end) over (partition by cast(ChangedOn as date)) as num_nonparticipate
from t
) t
where (seqnum = 1 and ChangedOn = 'Participate') and
num_nonparticipate > 0;
Can you check this?
WITH sample_table(ChangedOn,Status,ID)AS(
SELECT CONVERT(DATETIME,'02/01/2017 15:23'),'Terminated',1880 UNION ALL
SELECT '02/01/2017 17:24','Participated',1880 UNION ALL
SELECT '02/01/2017 09:00','Other',1880 UNION ALL
SELECT '01/31/2017 01:00','Terminated',1700 UNION ALL
SELECT '01/31/2017 02:00','Participated',1700 UNION ALL
SELECT '01/31/2017 03:00','Other',1700 UNION ALL
SELECT '01/31/2017 02:00','Participated',1600
)
SELECT ID FROM (
SELECT *
,ROW_NUMBER()OVER(PARTITION BY ID,CONVERT(VARCHAR,ChangedOn,112) ORDER BY ChangedOn) AS rn
,COUNT(0)OVER(PARTITION BY ID,CONVERT(VARCHAR,ChangedOn,112)) AS cnt
,CASE WHEN Status<>'Participated' THEN 1 ELSE 0 END AS ss
,SUM(CASE WHEN Status!='Participated' THEN 1 ELSE 0 END)OVER(PARTITION BY ID,CONVERT(VARCHAR,ChangedOn,112)) AS OtherStatusCnt
FROM sample_table
) AS t WHERE t.rn=t.cnt AND t.Status='Participated' AND t.OtherStatusCnt>0
--Return:
1880
try this with other sample data,
declare #t table(ChangedOn datetime,Status varchar(50),ID int)
insert into #t VALUES
('02/01/17 15:23', 'Terminated' ,1880)
,('02/01/17 17:24', 'Participated' ,1880)
,('02/01/17 09:00', 'Other' ,1880)
,('01/31/17 01:00', 'Terminated' ,1700)
,('01/31/17 02:00', 'Participated' ,1700)
,('01/31/17 03:00', 'Other' ,1700)
,('01/31/17 02:00', 'Participated' ,1600)
;
WITH CTE
AS (
SELECT *
,row_number() OVER (
PARTITION BY id
,cast(ChangedOn AS DATE) ORDER BY ChangedOn DESC
) AS seqnum
FROM #t
)
SELECT *
FROM cte c
WHERE seqnum = 1
AND STATUS = 'Participated'
AND EXISTS (
SELECT id
FROM cte c1
WHERE seqnum > 1
AND c.id = c1.id
)
2nd query,this is better
here CTE is same
SELECT *
FROM cte c
WHERE seqnum = 1
AND STATUS = 'Participated'
AND EXISTS (
SELECT id
FROM cte c1
WHERE STATUS != 'Participated'
AND c.id = c1.id
)

SQL Server 2008 calculating data difference when we have only one date column

I have a date column Order_date and I am looking for ways to calculate the date difference between customer last order date and his recent previous ( previous form last) order_date ....
Example
Customer : 1, 2 , 1 , 1
Order_date: 01/02/2007, 02/01/2015, 06/02/2014, 04/02/2015
As you can see customer # 1 has three orders.
I want to know the date difference between his recent order date (04/02/2015) and his recent previous (06/02/2014).
For SQL Server 2012 & 2014 you could use LAG with a DATEDIFF to see the number of days between them.
For older versions, a CTE would probably be your best bet:
;WITH CTE AS
(
SELECT CustomerID,
Order_Date,
rn = ROW_NUMBER() OVER (PARTITION BY CustomerID ORDER BY Order_Date DESC)
)
SELECT c1.CustomerID,
DATEDIFF(d, c1.Order_Date, c2.Order_Date)
FROM CTE c1
INNER JOIN CTE c2 ON c2.rn = c1.rn + 1
In SQL Server 2012+, you can use lag() to get the difference between any two dates:
select t.*,
datediff(day, lag(order_date) over (partition by customer order by order_date),
order_date) as days_dff
from table t;
If you have an older version, you can do something similar with correlated subqueries or outer apply.
EDIT:
If you just want the difference between the two most recent dates, use conditional aggregation instead:
select customer,
datediff(day, max(case when seqnum = 2 then order_date end),
max(case when seqnum = 1 then order_date end)
) as MostRecentDiff
from (select t.*,
row_number() over (partition by customer order by order_date desc) as seqnum
from table t
) t
group by customer;
If you're using SQL Server 2008 or later, you can try CROSS APPLY.
SELECT [customers].[customer_id], DATEDIFF(DAY, MIN([recent_orders].[order_date]), MAX([recent_orders].[order_date])) AS [elapsed]
FROM [customers]
CROSS APPLY (
SELECT TOP 2 [order_date]
FROM [orders]
WHERE ([orders].[customer_id] = [customers].[customer_id])
) [recent_orders]
GROUP BY [customers].[customer_id]
SELECT DATEDIFF(DAY, Y.PrevLastOrderDate, Y.LastOrderDate) AS PreviousDays
FROM
(
SELECT X.LastOrderDate
, (SELECT MAX(OrderDate) FROM dbo.Orders SO WHERE SO.CustomerID=1 AND SO.OrderDate < X.LastOrderDate) AS PrevLastOrderDate
FROM
(
select MAX(OrderDate) AS LastOrderDate
FROM dbo.Orders O
WHERE O.CustomerID=1
)X
)Y
drop table #Invoices
create table #Invoices ( OrderId int , OrderDate datetime )
insert into #Invoices (OrderId , OrderDate )
select 101, '01/01/2001' UNION ALL Select 202, '02/02/2002' UNION ALL Select 303, '03/03/2003'
UNION ALL Select 808, '08/08/2008' UNION ALL Select 909, '09/09/2009'
;
WITH
MyCTE /* http://technet.microsoft.com/en-us/library/ms175972.aspx */
( OrderId,OrderDate,ROWID) AS
(
SELECT
OrderId,OrderDate
, ROW_NUMBER() OVER ( ORDER BY OrderDate ) as ROWID
FROM
#Invoices inv
)
SELECT
OrderId,OrderDate
,(Select Max(OrderDate) from MyCTE innerAlias where innerAlias.ROWID = (outerAlias.ROWID-1) ) as PreviousOrderDate
,
[MyDiff] =
CASE
WHEN (Select Max(OrderDate) from MyCTE innerAlias where innerAlias.ROWID = (outerAlias.ROWID-1) ) iS NULL then 0
ELSE DATEDIFF (mm, OrderDate , (Select Max(OrderDate) from MyCTE innerAlias where innerAlias.ROWID = (outerAlias.ROWID-1) ) )
END
, ROWIDMINUSONE = (ROWID-1)
, ROWID as ROWID_SHOWN_FOR_KICKS , OrderDate as OrderDateASecondTimeForConvenience
FROM
MyCTE outerAlias
ORDER BY outerAlias.OrderDate Desc , OrderId