SQL Server : remove duplicates and add columns - sql

I have a table which has duplicate record this is how the table looks like.
ID Date Status ModifiedBy
------------------------------------------
1 1/2/2019 10:29 Assigned(0) xyz
1 1/2/2019 12:21 Pending(1) abc
1 1/4/2019 11:42 Completed(5)abc
1 1/20/2019 2:45 Closed(8) pqr
2 9/18/2018 10:05 Assigned(0) xyz
2 9/18/2018 11:15 Pending(1) abc
2 9/21/2018 11:15 Completed(5)abc
2 10/7/2018 2:46 Closed(8) pqr
What I want to do is take the minimum date value but also I want to add additional column which is PendingStartDate and PendingEndDate.
PendingStartDate: date when ID went into pending status
PendingEndDate: date when ID went from pending status to any other status
So my final output should look like this
ID AuditDate Status ModifiedBy PendingStartDate PendingEndDate
---------------------------------------------------------------------------
1 1/2/2019 10:29 Assigned(0) xyz 1/2/2019 12:21 1/4/2019 11:42
2 9/18/2018 10:05 Assigned(0) abc 9/18/2018 11:15 9/21/2018 11:15
Any help as to how to do this is appreciated.
Thanks

I think you want conditional aggregation:
select id, min(date) as auditdate,
max(case when seqnum = 1 then status end) as status,
max(case when seqnum = 1 then modifiedBy end) as modifiedBy,
min(case when status like 'Pending%' then date end) as pendingStartDate,
max(case when status like 'Pending%' then next_date end) as pendingEndDate
from (select t.*,
row_number() over (partition by id order by date) as seqnum,
lead(date) over (partition by id order by date) as next_date
from t
) t
group by id;

please try this:
Declare #Tab Table(Id int, [Date] DATETIME,[Status] Varchar(25),ModifiedBy varchar(10))
Insert into #Tab
SELECT 1,'1/2/2019 10:29','Assigned(0)','xyz' Union All
SELECT 1,'1/2/2019 11:29','Started(0)','xyz' Union All
SELECT 1,'1/2/2019 12:21','Pending(1)','abc' Union All
SELECT 1,'1/2/2019 12:21','In-Progress(1)','abc' Union All
SELECT 1,'1/4/2019 11:42','Completed(5)','abc'Union All
SELECT 1,'1/20/2019 2:45','Closed(8)','pqr' Union All
SELECT 2,'9/18/2018 10:05','Assigned(0)','xyz'Union All
SELECT 2,'9/18/2018 11:15','Pending(1)','abc' Union All
SELECT 2,'9/21/2018 11:15','Completed(5)','abc' Union All
SELECT 2,'10/7/2018 2:46','Closed(8)','pqr'
;with cte As
(
Select * ,lead(date) over (partition by id order by date) as pendingStartDate
from #Tab
Where Status in ('Assigned(0)','Pending(1)','Completed(5)')
)
,cte2 As
(
Select * , lead(pendingStartDate) over (partition by id order by date) As pendingEndDate
from cte
)
Select * from cte2 where Status ='Assigned(0)'
As you mentioned in comment, i have included few states between Assigned,pending and completed.

Related

Get users attendance entry and exit in one row SQL Server

I have a table with all entries for employees. I need to get all the working hours and the entry and exit time of the user in one record.
The table is like this:
How can I do that and also in case there is some missing entries or exit. Like one employee will have entry with no exit in some odd cases.
Assuming that the ins and outs line up (that is, are strictly interleaved), you can use lead() and some filtering:
select t.empId, convert(date, datetime) as date, datetime as timein,
next_datetime as timeout,
datediff(minute, datetime, next_datetime) / 60.0 as decimal_hours
from (select t.*,
lead(datetime) over (partition by empid order by datetime) as next_datetime
from t
) t
where entrytype = 'IN';
Note that this formats the duration as decimal hours rather than as a time. That part does not seem relevant to the actual question and just complicates the query.
This adds LEAD entrytype to make sure there is a corresponding OUT row. Also, it divides the date difference in minutes by 60.0 (added decimal)
select t.empId EmpID, cast(datetime as date) [Day], datetime [Timein], next_datetime [Timeout],
datediff(mi, datetime, next_datetime)/60.0 TotalHours
from (select t.*,
lead(datetime) over (partition by empid order by datetime) as next_datetime,
lead(entrytype) over (partition by empid order by datetime) as next_entrytype
from t
) t
where entrytype = 'IN'
and next_entrytype='Out';
Using Row_number to identify IN and OUT related to which employee:
SELECT EMPID, CAST([DATEUPDT] AS DATE) AS Date,
MAX(CASE WHEN ENTRYTYPE = 'IN' THEN CAST([DATEUPDT] AS TIME) END) AS TIMEIN,
MAX(CASE WHEN ENTRYTYPE = 'OUT' THEN CAST([DATEUPDT] AS TIME) END) AS TIMEOUT,
ABS(DATEDIFF(MINUTE, MAX(CASE WHEN ENTRYTYPE = 'OUT' THEN CAST([DATEUPDT] AS TIME) END), MAX(CASE WHEN ENTRYTYPE = 'IN' THEN CAST([DATEUPDT] AS TIME) END)))/60 AS DURATION
FROM
(
SELECT A.*,
ROW_NUMBER() OVER(PARTITION BY EMPID, [ENTRYTYPE] ORDER BY [DATEUPDT]) RN1
FROM EMPLOYEE_LOG A
) X
GROUP BY EMPID, RN1, CAST([DATEUPDT] AS DATE)
ORDER BY EMPID, RN1;
You can also "sessionize" in SQL Server - by using OLAP queries: With a counter that is at 1 when a new session begins and at 0 otherwise
WITH
input(id,empid,dttime,entrytype) AS (
SELECT 1,125,CAST('2020-08-13 08:10:00.000' AS DATETIME),'IN'
UNION ALL SELECT 1,157,CAST('2020-08-13 08:01:00.000' AS DATETIME),'IN'
UNION ALL SELECT 1,125,CAST('2020-08-13 15:21:00.000' AS DATETIME),'OUT'
UNION ALL SELECT 1,125,CAST('2020-08-13 15:24:00.000' AS DATETIME),'IN'
UNION ALL SELECT 1,125,CAST('2020-08-13 17:24:00.000' AS DATETIME),'OUT'
UNION ALL SELECT 1,157,CAST('2020-08-13 15:01:00.000' AS DATETIME),'OUT'
UNION ALL SELECT 1,125,CAST('2020-08-14 08:10:00.000' AS DATETIME),'IN'
UNION ALL SELECT 1,157,CAST('2020-08-14 08:01:00.000' AS DATETIME),'IN'
UNION ALL SELECT 1,125,CAST('2020-08-14 15:21:00.000' AS DATETIME),'OUT'
UNION ALL SELECT 1,125,CAST('2020-08-14 15:24:00.000' AS DATETIME),'IN'
UNION ALL SELECT 1,125,CAST('2020-08-14 17:24:00.000' AS DATETIME),'OUT'
UNION ALL SELECT 1,157,CAST('2020-08-14 15:01:00.000' AS DATETIME),'OUT'
)
,
with_session AS (
SELECT
*
, SUM(CASE entrytype WHEN 'IN' THEN 1 ELSE 0 END) OVER(
PARTITION BY empid ORDER BY dttime
) AS sessid
FROM input
)
SELECT
id
, empid
, sessid
, CAST(MAX(CASE entrytype WHEN 'IN' THEN dttime END) AS DATE) AS day
, CAST(MAX(CASE entrytype WHEN 'IN' THEN dttime END) AS TIME) AS indtm
, CAST(MAX(CASE entrytype WHEN 'OUT' THEN dttime END) AS TIME) AS outdtm
, CAST(
MAX(CASE entrytype WHEN 'OUT' THEN dttime END)
- MAX(CASE entrytype WHEN 'IN' THEN dttime END)
AS TIME
) AS totalhours
FROM with_session
GROUP BY
id
, empid
, sessid
ORDER BY
id
, 4
, empid
, sessid
;
-- out id | empid | sessid | day | indtm | outdtm | totalhours
-- out ----+-------+--------+------------+----------+----------+------------
-- out 1 | 125 | 1 | 2020-08-13 | 08:10:00 | 15:21:00 | 07:11:00
-- out 1 | 125 | 2 | 2020-08-13 | 15:24:00 | 17:24:00 | 02:00:00
-- out 1 | 157 | 1 | 2020-08-13 | 08:01:00 | 15:01:00 | 07:00:00
-- out 1 | 125 | 3 | 2020-08-14 | 08:10:00 | 15:21:00 | 07:11:00
-- out 1 | 125 | 4 | 2020-08-14 | 15:24:00 | 17:24:00 | 02:00:00
-- out 1 | 157 | 2 | 2020-08-14 | 08:01:00 | 15:01:00 | 07:00:00

Get max date for each from either of 2 columns

I have a table like below
AID BID CDate
-----------------------------------------------------
1 2 2018-11-01 00:00:00.000
8 1 2018-11-08 00:00:00.000
1 3 2018-11-09 00:00:00.000
7 1 2018-11-15 00:00:00.000
6 1 2018-12-24 00:00:00.000
2 5 2018-11-02 00:00:00.000
2 7 2018-12-15 00:00:00.000
And I am trying to get a result set as follows
ID MaxDate
-------------------
1 2018-12-24 00:00:00.000
2 2018-12-15 00:00:00.000
Each value in the id columns(AID,BID) should return the max of CDate .
ex: in the case of 1, its max CDate is 2018-12-24 00:00:00.000 (here 1 appears under BID)
in the case of 2 , max date is 2018-12-15 00:00:00.000 . (here 2 is under AID)
I tried the following.
1.
select
g.AID,g.BID,
max(g.CDate) as 'LastDate'
from dbo.TT g
inner join
(select AID,BID,max(CDate) as maxdate
from dbo.TT
group by AID,BID)a
on (a.AID=g.AID or a.BID=g.BID)
and a.maxdate=g.CDate
group by g.AID,g.BID
and 2.
SELECT
AID,
CDate
FROM (
SELECT
*,
max_date = MAX(CDate) OVER (PARTITION BY [AID])
FROM dbo.TT
) AS s
WHERE CDate= max_date
Please suggest a 3rd solution.
You can assemble the data in a table expression first, and the compute the max for each value is simple. For example:
select
id, max(cdate)
from (
select aid as id, cdate from t
union all
select bid, cdate from t
) x
group by id
You seem to only care about values that are in both columns. If this interpretation is correct, then:
select id, max(cdate)
from ((select aid as id, cdate, 1 as is_a, 0 as is_b
from t
) union all
(select bid as id, cdate, 1 as is_a, 0 as is_b
from t
)
) ab
group by id
having max(is_a) = 1 and max(is_b) = 1;

Prepare data at ID,Month level in SQL

I have a table that is something like this:
ID Date
1 10/04/2015
1 28/04/2015
1 14/07/2015
1 30/07/2015
1 30/08/2015
2 10/04/2016
2 28/04/2016
2 14/05/2016
2 30/05/2016
but i am trying to achieve like:
ID Date
1 28/04/2015
1 30/07/2015
1 30/08/2015
2 28/04/2016
2 30/05/2016
Could you please help me .
Try this:
select * from (
select id,
[Date],
row_number() over (partition by id, datepart(month, [date]) order by [Date] desc) [rn]
from (
select id,
--date convrsion, 103 - British/French - your style
convert(date, [date], 103) [Date]
from #MyTable
) a
) b where rn = 1
I don't really get the logic of expected result but the query below would work.
SELECT *
FROM yourTable
WHERE DAY([Date])>=28;
See How to get Day, Month and Year Part from DateTime in Sql Server

get all record with minimum date in every month but the latest time

Let say I have this kind of data:
Date Category Amount
01/10/2014 20:04 2 12212
01/11/2014 0:00 3 11043.38
01/11/2014 16:03 2 12082
01/11/2014 16:32 3 110.43
01/12/2014 20:41 2 12196
01/12/2014 20:42 3 103.22
31/12/2014 14:20 2 12440
31/12/2014 14:21 3 104.25
I wish to get below result:
Date Category Amount
01/10/2014 20:04 2 12212
01/11/2014 16:03 2 12082
01/11/2014 16:32 3 110.43
01/12/2014 20:41 2 12196
01/12/2014 20:42 3 103.22
So far, I am able to make this query:
select t.date, t.Category, t.Amount
from mytable t
inner join (
select Category,MONTH(date) MONTHH,YEAR(date) YEARR, max(date) as MaxDate
from mytable
group by Category,MONTH(date) MONTHH,YEAR(date) YEARR
) tm on t.date = tm.MaxDate and t.Category = tm.Category
But it returns a wrong result if there is more than 1 date in 1 month. This is the result:
Date Category Amount
01/10/2014 20:04 2 12212
01/11/2014 16:03 2 12082
01/11/2014 16:32 3 110.43
31/12/2014 14:20 2 12440
31/12/2014 14:21 3 104.25
Could anyone help please? thanks
Use row_number() and rank():
select t.*
from (select t.*,
dense_rank() over (partition by year(date), month(date) order by day(date)) as seqnum,
row_number() over (partition by year(date), month(date), day(date) order by date desc) as seqnum_d
from mytable t
) t
where seqnum = 1 and seqnum_d = 1;
Try This
;WITH CTE
AS
(
SELECT
RN = ROW_NUMBER() OVER(PARTITION BY MONTH([Date]),YEAR([Date]),Category ORDER BY [Date]),
*
FROM YourTable
)
SELECT
Date,
Category,
Amount
FROM CTE
WHERE RN = 1
Do not use reserved words as columns (i.e. Date).
No need to group, you are not using any summation/averaging:
select *
from tda t
where not exists(
-- not exists: same date, later time, cat is same
select 1
from tda b
where 1 = 1
and cast(t.dDate as date) = cast(b.dDate as date)
and t.ddate > b.ddate
and t.cat = b.cat
)
and not exists(
-- not exists: same month, earlier date
select 1
from tda b
where month(b.ddate) = month(t.ddate)
and day(b.ddate)<day(t.ddate)
)
to get (reformat the date to your liking):
ddate Cat Amount
2014-10-01T20:04:00Z 2 12212
2014-11-01T00:00:00Z 3 11043.38
2014-11-01T16:03:00Z 2 12082
2014-12-01T20:41:00Z 2 12196
2014-12-01T20:42:00Z 3 103.22
by
CREATE TABLE tda ([ddate] datetime,[Cat] int, [Amount] numeric(10,2));
INSERT INTO tda (dDate, Cat, Amount)
VALUES
('2014/10/01 20:04', 2, 12212),
('2014/11/01 0:00', 3, 11043.38),
('2014/11/01 16:03', 2, 12082),
('2014/11/01 16:32', 3, 110.43),
('2014/12/01 20:41', 2, 12196),
('2014/12/01 20:42', 3, 103.22),
('2014/12/31 14:20', 2, 12440),
('2014/12/31 14:21', 3, 104.25)
;

Getting most recent distinct records

Considering the following table:
User CreatedDateTime Quantity
----- ----------------- --------
Jim 2012-09-19 01:00 1
Jim 2012-09-19 02:00 5
Jim 2012-09-19 03:00 2
Bob 2012-09-19 02:00 2
Bob 2012-09-19 03:00 9
Bob 2012-09-19 05:00 1
What query would return the most recent rows (as defined by CreatedDateTime) for each User, so that we could determine the associated Quantity.
i.e. the following records
User CreatedDateTime Quantity
----- ----------------- --------
Jim 2012-09-19 03:00 2
Bob 2012-09-19 05:00 1
We thought that we could simply Group By User and CreatedDateTime and add a Having MessageCreationDateTime = MAX(.MessageCreationDateTime. Of course this does not work because Quantity is not available following the Group By.
Since you are using SQL Server, you can use Window Function on this.
SELECT [User], CreatedDateTime, Quantity
FROM
(
SELECT [User], CreatedDateTime, Quantity,
ROW_NUMBER() OVER(PARTITION BY [User] ORDER BY CreatedDateTime DESC) as RowNum
FROM tableName
) a
WHERE a.RowNum = 1
SQLFiddle Demo
;WITH x AS
(
SELECT [User], CreatedDateTime, Quantity,
rn = ROW_NUMBER() OVER (PARTITION BY [User] ORDER BY CreatedDateTime DESC)
FROM dbo.table_name
)
SELECT [User], CreatedDateTime, Quantity
FROM x WHERE rn = 1;
If you do not have the ability to use windowing functions, then you can use a sub-query:
select t1.[user], t2.mxdate, t1.quantity
from yourtable t1
inner join
(
select [user], max(CreatedDateTime) mxdate
from yourtable
group by [user]
) t2
on t1.[user]= t2.[user]
and t1.CreatedDateTime = t2.mxdate
see SQL Fiddle with Demo
SELECT DISTINCT
User,
CreatedDateTime,
Quantity
FROM
YourTable
WHERE
CreatedDateTime =
(SELECT MAX(CreatedDateTime) FROM YourTable t WHERE t.User = YourTable.User)
select * from <table_name> where CreatedDateTime in (select max(CreatedDateTime) from <table_name> group by user) group by user;