SQL find average time difference between rows for a given category - sql

I browsed SO but could not quite find the exact answer or maybe it was for a different language.
Let's say I have a table, where each row is a record of a trade:
trade_id customer trade_date
1 A 2013-05-01 00:00:00
2 B 2013-05-01 10:00:00
3 A 2013-05-02 00:00:00
4 A 2013-05-05 00:00:00
5 B 2013-05-06 12:00:00
I would like to have the average time between trades, in days or fraction of days, for each customer, and the number of days since last trade. So for instance for customer A, time between trades 1 and 3 is 1 day and between trades 3 and 4 is 3 days, for an average of 2. So the end table would look like something like this (assuming today it's the 2013-05-10):
customer avg_time_btw_trades time_since_last_trade
A 2.0 5.0
B 5.08 3.5
If a customer has only got 1 trade I guess NULL is fine as output.
Not even sure SQL is the best way to do this (I am working with SQL server), but any help is appreciated!

SELECT
customer,
DATEDIFF(second, MIN(trade_date), MAX(trade_date)) / (NULLIF(COUNT(*), 1) - 1) / 86400.0,
DATEDIFF(second, MAX(trade_date), GETDATE() ) / 86400.0
FROM
yourTable
GROUP BY
customer
http://sqlfiddle.com/#!6/eb46e/7
EDIT: Added final field that I didn't notice, apologies.

The following SQL script uses your data and gives the expected results.
DECLARE #temp TABLE
( trade_id INT,
customer CHAR(1),
trade_date DATETIME );
INSERT INTO #temp VALUES (1, 'A', '20130501');
INSERT INTO #temp VALUES (2, 'B', '20130501 10:00');
INSERT INTO #temp VALUES (3, 'A', '20130502');
INSERT INTO #temp VALUES (4, 'A', '20130505');
INSERT INTO #temp VALUES (5, 'B', '20130506 12:00');
DECLARE #getdate DATETIME
-- SET #getdate = getdate();
SET #getdate = '20130510';
SELECT s.customer
, AVG(s.days_btw_trades) AS avg_time_between_trades
, CAST(DATEDIFF(hour, MAX(s.trade_date), #getdate) AS float)
/ 24.0 AS time_since_last_trade
FROM (
SELECT CAST(DATEDIFF(HOUR, t2.trade_date, t.trade_date) AS float)
/ 24.0 AS days_btw_trades
, t.customer
, t.trade_date
FROM #temp t
LEFT JOIN #temp t2 ON t2.customer = t.customer
AND t2.trade_date = ( SELECT MAX(t3.trade_date)
FROM #temp t3
WHERE t3.customer = t.customer
AND t3.trade_date < t.trade_date)
) s
GROUP BY s.customer

You need a date difference between every trade and average them.
select
a.customer
,avg(datediff(a.trade_date, b.trade_date))
,datediff(now(),max(a.trade_date))
from yourTable a, yourTable b
where a.customer = b.customer
and b.trade_date = (
select max(trade_date)
from yourTable c
where c.customer = a.customer
and a.trade_date > c.trade_date)
#gets the one earlier date for every trade
group by a.customer

Just for grins I added a solution that would use CTE's. You could probably use a temp table if the first query is too large. I used #MatBailie creation script for the table:
CREATE TABLE customer_trades (
id INT IDENTITY(1,1),
customer_id INT,
trade_date DATETIME,
PRIMARY KEY (id),
INDEX ix_user_trades (customer_id, trade_date)
)
INSERT INTO
customer_trades (
customer_id,
trade_date
)
VALUES
(1, '2013-05-01 00:00:00'),
(2, '2013-05-01 10:00:00'),
(1, '2013-05-02 00:00:00'),
(1, '2013-05-05 00:00:00'),
(2, '2013-05-06 12:00:00')
;
;WITH CTE as(
select customer_id, trade_date, datediff(hour,trade_date,ISNULL(LEAD(trade_date,1) over (partition by customer_id order by trade_date),GETDATE())) Trade_diff
from customer_trades
)
, CTE2 as
(SELECT customer_id, trade_diff, LAST_VALUE(trade_diff) OVER(Partition by customer_id order by trade_date) Curr_Trade from CTE)
SELECT Customer_id, AVG(trade_diff) AV, Max(Curr_Trade) Curr_Trade
FROM CTE2
GROUP BY customer_id

Related

How to display data on separate rows in one row

I have a table with the following setup
ID InOut_Status InOut_Datetime
1 IN 9/12/2017 8:00
2 IN 9/12/2017 10:00
1 OUT 9/12/2017 1:00
2 OUT 9/12/2017 3:00
I want to be able to see both status and date on the same row vs separate rows for example
ID In_Status In_Datetime Out_Status Out_Datetime
1 IN 9/12/2017 8:00 OUT 9/12/2017 1:00
2 IN 9/12/2017 10:00 OUT 9/12/2017 3:00
I would like to return all columns. I just provided a few for example. I also would like to show only the most recent Datetime for each ID and if the user hasn't checked out, I would like for the Out_Datetime to be blank.
Any assistance would be greatly appreciated. Thanks.
You can use self join:
SELECT *
FROM
(
SELECT ins.id
, ins.InOut_Datetime as in_time
, outs.InOut_Datetime as out_time
, row_number() over (partition by ins.id order by ins.InOut_Datetime desc) as ranking
FROM table ins
LEFT JOIN table outs
ON ins.id = outs.id
AND outs.InOut_Status = 'OUT'
AND outs.InOut_Datetime > ins.InOut_Datetime
WHERE ins.InOut_Status = 'IN'
and ins.InOut_Datetime > DATEADD(day, -1, GETDATE())
) t
WHERE t.ranking = 1
Updated query to :
get logins within last 24 hours
get the latest login of a user only
show out time only if it's later than in time
You need to left join, however, you want to limit the join to the first record returned in descending order using a sub query.
SELECT * FROM
(
SELECT
ID,InOut_Status,InOutDateTime,
CheckOutInstanceDescending = ROW_NUMBER() OVER(PARTITION BY ClockOut.ID ORDER BY ClockOut.InOutDateTime DESC)
FROM
MyTable ClockIn
LEFT OUTER JOIN MyTable ClockOut ON ClockOut.ID=ClockIn.ID
WHERE
ClockIn.InOut_Status='IN'
)AS Combined
WHERE
Combined.CheckOutInstanceDescending=1
A simple pivot would fix this the easiest:
https://technet.microsoft.com/en-us/library/ms177410(v=sql.105).aspx. You can run this in SSMS 2008 or higher(I wrote it in SQL 2016).
DECLARE #Temp TABLE (Id INT, InOut_Status VARCHAR(8), InOut_Datetime DATETIME)
INSERT INTO #Temp (Id, InOut_Status, InOut_Datetime) VALUES (1, 'IN', '9-12-2017 8:00'), (2, 'IN', '9-12-2017 10:00'),(1, 'OUT', '9-12-2017 13:00'),(2, 'OUT', '9-12-2017 16:00'),(3, 'IN', '9-12-2017 06:00')
SELECT
pvt.Id
, 'IN' AS In_Status
, pvt.[In]
, 'OUT' AS Out_Status
, pvt.OUT
From #Temp
PIVOT(MAX(InOut_Datetime) FOR InOut_Status IN ([In], [OUT])) AS pvt

Any one help me to solve this i try my best but did not solve this?

ItemName Price CreatedDateTime
New Card 50.00 2014-05-26 19:17:09.987
Recharge 110.00 2014-05-26 19:17:12.427
Promo 90.00 2014-05-27 16:17:12.427
Membership 70.00 2014-05-27 16:17:12.427
New Card 50.00 2014-05-26 19:20:09.987
Out Put : Need a query which Sum the sale of Current hour and
sale of item which have maximum sale in that hour in breakdownofSale
Column.
Hour SaleAmount BreakDownOfSale
19 210 Recharge
16 160 Promo
This should do it
create table #t
(
ItemName varchar(50),
Price decimal(18,2),
CreatedDateTime datetime
);
set dateformat ymd;
insert into #t values('New Card', 50.00, '2014-05-26 19:17:09.987');
insert into #t values('Recharge', 110.00, '2014-05-26 19:17:12.427');
insert into #t values('Promo', 90.00, '2014-05-27 16:17:12.427');
insert into #t values('Membership', 70.00, '2014-05-27 16:17:12.427');
insert into #t values('New Card', 50.00, '2014-05-26 19:20:09.987');
with cte as
(
select datepart(hh, CreatedDateTime) as [Hour],
ItemName,
Price,
sum(Price) over (partition by datepart(hh, CreatedDateTime)) SaleAmount,
ROW_NUMBER() over (partition by datepart(hh, CreatedDateTime) order by Price desc) rn
from #t
)
select Hour,
SaleAmount,
ItemName
from cte
where rn = 1
Though i am not clear with the question, based on your desired output, you may use the query as below.
SELECT DATEPART(HOUR,CreatedDateTime) AS Hour, sum(Price) AS Price, ItemName AS BreakDownOfSale from TableName WHERE BY ItemName,DATEPART(HOUR,CreatedDateTime)
Replace table name and column name with the actual one.
Hope this helps!
Here is the sample query.
You can use SQL Server Windows functions to get the result you need.
DECLARE #Table TABLE
(
ItemName NVARCHAR(40),
Price DECIMAL(10,2),
CreatedDatetime DATETIME
)
-- Fill table.
INSERT INTO #Table
( ItemName, Price, CreatedDatetime )
VALUES
( N'New Card' , 50.00 , '2014-05-26 19:17:09.987' ),
( N'Recharge' , 110.00 , '2014-05-26 19:17:12.427' ) ,
( N'Promo' , 90.00 , '2014-05-27 16:17:12.427' ) ,
( N'Membership' , 70.00 , '2014-05-27 16:17:12.427' ) ,
( N'New Card' , 50.00 , '2014-05-26 19:20:09.987' )
-- Check record(s).
SELECT * FROM #Table
-- Get record(s) in required way.
;WITH T1 AS
(
SELECT
DATEPART(HOUR, T.CreatedDatetime) AS Hour,
CONVERT(DATE, T.CreatedDatetime) AS Date,
T.ItemName AS BreakDownOfSales,
-- Date and hour both will give unique record(s)
SUM(Price) OVER (PARTITION BY CONVERT(DATE, T.CreatedDatetime), DATEPART(HOUR, CreatedDateTime)) AS SaleAmount,
ROW_NUMBER() OVER(PARTITION BY CONVERT(DATE, T.CreatedDatetime), DATEPART(HOUR, T.CreatedDatetime) ORDER BY T.Price DESC) AS RN
FROM
#Table T
)
SELECT
T1.Date ,
T1.Hour ,
T1.SaleAmount,
T1.BreakDownOfSales
FROM
T1
WHERE T1. RN = 1
ORDER BY
T1.Hour
Check this simple solution, Please convert it to SQL Server Query.
This will give you perfect result even if you have multiple date data.
SELECT HOUR(CreatedDateTime), SUM(Price),
(SELECT itemname FROM t it WHERE HOUR(ot.CreatedDateTime) = HOUR(it.CreatedDateTime) AND
DATE(ot.CreatedDateTime) = DATE(it.CreatedDateTime)
GROUP BY itemname
ORDER BY price DESC
LIMIT 1
) g
FROM t ot
GROUP BY HOUR(CreatedDateTime);

Count previous consecutive rows in SQL Server

I have attendance data list which is showing below. Now I am trying to find data by a specific date range (01/05/2016 ā€“ 07/05/2016) with total Present Column, Total Present Column will be calculated from previous present data (P). Suppose today is 04/05/2016. If a person has 01,02,03,04 status ā€˜pā€™ then it will show date 04-05-2016 total present 4.
Could you help me to find total present from this result set.
You can check this example, which have logic to calculate previous sum value.
declare #t table (employeeid int, datecol date, status varchar(2) )
insert into #t values (10001, '01-05-2016', 'P'),
(10001, '02-05-2016', 'P'),
(10001, '03-05-2016', 'P'),
(10001, '04-05-2016', 'P'),
(10001, '05-05-2016', 'A'),
(10001, '06-05-2016', 'P'),
(10001, '07-05-2016', 'P'),
(10001, '08-05-2016', 'L'),
(10002, '07-05-2016', 'P'),
(10002, '08-05-2016', 'L')
--select * from #t
select * ,
SUM(case when status = 'P' then 1 else 0 end) OVER (PARTITION BY employeeid ORDER BY employeeid, datecol
ROWS BETWEEN UNBOUNDED PRECEDING
AND current row)
from
#t
Another twist of the same thing via cte (as you written SQLSERVER2012, this below solution only work in Sqlserver 2012 and above)
;with cte as
(
select employeeid , datecol , ROW_NUMBER() over(partition by employeeid order by employeeid, datecol) rowno
from
#t where status = 'P'
)
select t.*, cte.rowno ,
case when ( isnull(cte.rowno, 0) = 0)
then LAG(cte.rowno) OVER (ORDER BY t.employeeid, t.datecol)
else cte.rowno
end LagValue
from #t t left join cte on t.employeeid = cte.employeeid and t.datecol = cte.datecol
order by t.employeeid, t.datecol
You could use a subquery to calculate TotalPresent for each row:
SELECT
main.EmployeeID,
main.[Date],
main.[Status],
(
SELECT SUM(CASE WHEN t.[Status] = 'P' THEN 1 ELSE 0 END)
FROM [TableName] t
WHERE t.EmployeeID = main.EmployeeID AND t.[Date] <= main.[Date]
) as TotalPresent
FROM [TableName] main
ORDER BY
main.EmployeeID,
main.[Date]
Here I used subquery to count the sum of records that have the same EmployeeID and date is less or equal to the date of current row. If status of the record is 'P', then 1 is added to the sum, otherwise 0, which counts only records that have status P.
Interesting question, this should work:
select *
, (select count(retail) from p g
where g.date <= p.date and g.id = p.id and retail = 'P')
from p
order by ID, Date;
So I believe I understand correctly. You would like to count the occurences of P per ID datewise.
This makes a lot of sense. That is why the first occurrence of ID2 was L and the Total is 0. This query will count P status for each occurrence, pause at non-P for each ID.
Here is an example

How to get date difference statistics by group with TSQL

I have a table with columns of product and sold date, and want to query the statistics of sold interval of each product group(max interval, min interval ...) , is there any good advice to make it, appreciate~
Prod SaleDate
-------------------
A 2013-02-05
D 2013-02-24
B 2013-03-01
A 2013-03-12
D 2013-03-22
A 2013-04-03
D 2013-04-08
. . .
Sold interval means days interval between two adjacent date.
Sold interval of A:
DATEDIFF(d, '2013-02-05', '2013-03-12')
DATEDIFF(d, '2013-03-12', '2013-04-03')
...
Sold interval of D:
DATEDIFF(d, '2013-02-24', '2013-03-22')
DATEDIFF(d, '2013-03-22', '2013-04-08')
and I want get the average, max and min value of sold interval.
Prod IntervalAvg IntervalMax IntervalMin
-----------------------------------------------------
A xxx xxx xxx
B xxx xxx xxx
C
. . .
Thanks Kahn's answer give me a hint. I re-implement my code for sql server 2000 by "left outer join".
DECLARE #DATA TABLE (Prod CHAR(1), SaleDate SMALLDATETIME)
INSERT INTO #DATA VALUES ('A','2013-02-05')
INSERT INTO #DATA VALUES ('D','2013-02-24')
INSERT INTO #DATA VALUES ('B','2013-03-01')
INSERT INTO #DATA VALUES ('A','2013-03-12')
INSERT INTO #DATA VALUES ('D','2013-03-22')
INSERT INTO #DATA VALUES ('A','2013-04-03')
INSERT INTO #DATA VALUES ('D','2013-04-08')
SELECT
t.Prod
, MAX(t.Interval) IntervalMax
, MIN(t.Interval) IntervalMin
, AVG(t.Interval) IntervalAvg
FROM
(
SELECT t1.*, DATEDIFF(dd, MAX(t2.SaleDate), t1.SaleDate) Interval
FROM #DATA t1
LEFT OUTER JOIN #DATA t2 ON t1.Prod = t2.Prod AND t1.SaleDate > t2.SaleDate
GROUP BY t1.Prod, t1.SaleDate
)t
GROUP BY t.Prod
ORDER BY t.Prod
Here's one way that should work:
-- Test data
DECLARE #DATA TABLE (Prod CHAR(1), SaleDate DATE)
INSERT INTO #DATA VALUES ('A','2013-02-05')
,('D','2013-02-24')
,('B','2013-03-01')
,('A','2013-03-12')
,('D','2013-03-22')
,('A','2013-04-03')
,('D','2013-04-08')
-- Actual query
;WITH CTE AS
(SELECT D.*, CA.NextSaleDate
, DATEDIFF(DD, SaleDate, NextSaleDate) DDiff
FROM #DATA D
OUTER APPLY (SELECT MIN(SaleDate) NextSaleDate FROM #DATA B WHERE B.Prod = D.Prod AND B.SaleDate > D.SaleDate) CA)
SELECT DISTINCT Prod, AvgInterval, MaxInterval, MinInterval
FROM CTE C
CROSS APPLY (SELECT AVG(DDiff) AvgInterval, MAX(DDiff) MaxInterval, MIN(DDiff) MinInterval FROM CTE B WHERE B.Prod = C.Prod) CA

Results of multiple queries with aggregates combined

I have 2 seperate select statements, using aggregate functions in each. I would like to be able to take the results and combine them.
table_a
id int
entered_date datetime (holds utc stamp)
balance money
group_id int
table_b
id int
entered_date date
balance money
transaction_type int
query 1:
select convert(date,entered_date), sum(balance) as earned
from table_a
where group_id in (1, 2, 3, 4)
group by convert(date,entered_Date)
query 2:
select convert(date,entered_date), sum(balance) as spent
where transaction_type = 2
group by convert(date,entered_Date)
results:
query 1:
2012-05-13, 5000
2012-05-14, 12000
...
query 2:
2012-05-13, 9000
2012-05-14, 55856
...
I would like to return one row for each record without using temp tables. The result set should have a date, then earned vs. spent. I have a report running using union to get the totals and that is fine, but i need to generate a result set with 1 record and a earned vs against line. Any help with this is appreciated.
Try:
;With AllDates AS
(
select convert(date,entered_date) As EnteredDate
from table_a
where group_id in (1, 2, 3, 4)
group by convert(date,entered_Date)
UNION
select convert(date,entered_date)
from table_b
where transaction_type = 2
group by convert(date,entered_Date)
)
, AllEarned AS (
select convert(date,entered_date) AS EnteredDate, sum(balance) as Earned
from table_a
where group_id in (1, 2, 3, 4)
group by convert(date,entered_Date)
)
, AllSpent AS (
select convert(date,entered_date) AS EnteredDate, sum(balance) as Spent
from table_b
where transaction_type = 2
group by convert(date,entered_Date)
)
SELECT
d.EnteredDate, e.Earned, s.Spent
FROM AllDates d
LEFT OUTER JOIN AllEarned e ON d.EnteredDate=e.EnteredDate
LEFT OUTER JOIN AllSpent s ON d.EnteredDate=s.EnteredDate
ORDER BY 1,2,3
You can combine these using logic, assuming that both are from the same table
(the second query is missing the from statement):
select convert(date,entered_date),
sum(case when group_id in (1, 2, 3, 4) then balance end) as earned,
sum(case when transaction_type = 2 then balance end) as spend
from table_a
group by convert(date,entered_Date)
SELECT
CASE WHEN a.a_date IS NULL THEN b.a_date ELSE a.a_date END as a_data,
a.earned,
b.spent
FROM
(select
convert(date,entered_date) as a_date,
sum(balance) as earned
from table_a
where group_id in (1, 2, 3, 4)
group by entered_Date) A
FULL OUTER JOIN
(select
convert(date,entered_date) as a_date,
sum(balance) as spent
from table_a
where transaction_type = 2
group by entered_Date) B
ON A.a_date=b.a_date
Or using FULL OUTER JOIN if there are data that don't meet both conditions. And using CASE WHEN a.a_date IS NULL THEN b.a_date ELSE a.a_date END as a_data
Assuming earned amounts are from table_a and spent amounts are from table_b,
; WITH a AS (
select entered_date=convert(date,entered_date), balance as earned, 0 AS spent
from table_a
where group_id in (1, 2, 3, 4)
UNION ALL
select entered_date=convert(date,entered_date), 0 AS earned, balance as spent
from table_b
where transaction_type = 2
)
SELECT entered_date
, earned=SUM(earned)
, spent=SUM(spent)
FROM a
GROUP BY entered_date