Query to summarize rows from table with version history

Query to summarize rows from table with version history - sql

I'm looking for help with a SQL query. Below are the details.
Database: Microsoft SQL Server 2016
Data Table:
It's a "version history" table with 3 columns: version number, effective date, and end date.
The version number with an end_dt of 12/31/9999 is considered the "active" version number.
Users can "restore" prior versions and make them active again.
version_number
eff_dt
end_dt
0
2021-04-13 18:03:26.483
2021-04-16 18:35:06.367
1
2021-04-16 18:35:06.370
2021-04-19 20:45:38.993
1
2021-04-19 20:45:38.997
2021-05-06 16:00:59.990
2
2021-05-06 16:00:59.990
2021-05-06 16:13:03.997
3
2021-05-06 16:13:04.000
2021-05-06 16:17:23.127
4
2021-05-06 16:17:23.130
2021-05-06 16:52:45.250
4
2021-05-06 16:52:45.253
2021-05-11 15:36:25.283
4
2021-05-11 15:36:25.283
2021-05-14 15:52:50.843
5
2021-05-14 15:52:50.847
2021-05-20 17:14:55.860
4
2021-05-20 17:14:55.863
2021-05-20 17:14:55.867
1
2021-05-20 17:14:55.870
9999-12-31 00:00:00.000
Desired Output:
A query to display a consolidated version history where consecutive entries in the version history table are displayed as a single row encompassing the entire date range the version was active.
version_number
eff_dt
end_dt
0
2021-04-13 18:03:26.483
2021-04-16 18:35:06.367
1
2021-04-16 18:35:06.370
2021-05-06 16:00:59.990
2
2021-05-06 16:00:59.990
2021-05-06 16:13:03.997
3
2021-05-06 16:13:04.000
2021-05-06 16:17:23.127
4
2021-05-06 16:17:23.130
2021-05-14 15:52:50.843
5
2021-05-14 15:52:50.847
2021-05-20 17:14:55.860
4
2021-05-20 17:14:55.863
2021-05-20 17:14:55.867
1
2021-05-20 17:14:55.870
9999-12-31 00:00:00.000
Question:
How would one write a SQL statement to generate the Desired Output based on the Data Table?
SQL Script to create sample data:
CREATE TABLE #t1(
[version_number] [int] NULL,
[eff_dt] [datetime] NOT NULL,
[end_dt] [datetime] NOT NULL
)
GO
INSERT #t1 ([version_number], [eff_dt], [end_dt]) VALUES (1, CAST(N'2021-05-20T17:14:55.870' AS DateTime), CAST(N'9999-12-31T00:00:00.000' AS DateTime))
GO
INSERT #t1 ([version_number], [eff_dt], [end_dt]) VALUES (5, CAST(N'2021-05-14T15:52:50.847' AS DateTime), CAST(N'2021-05-20T17:14:55.860' AS DateTime))
GO
INSERT #t1 ([version_number], [eff_dt], [end_dt]) VALUES (4, CAST(N'2021-05-20T17:14:55.863' AS DateTime), CAST(N'2021-05-20T17:14:55.867' AS DateTime))
GO
INSERT #t1 ([version_number], [eff_dt], [end_dt]) VALUES (4, CAST(N'2021-05-11T15:36:25.283' AS DateTime), CAST(N'2021-05-14T15:52:50.843' AS DateTime))
GO
INSERT #t1 ([version_number], [eff_dt], [end_dt]) VALUES (4, CAST(N'2021-05-06T16:52:45.253' AS DateTime), CAST(N'2021-05-11T15:36:25.283' AS DateTime))
GO
INSERT #t1 ([version_number], [eff_dt], [end_dt]) VALUES (4, CAST(N'2021-05-06T16:17:23.130' AS DateTime), CAST(N'2021-05-06T16:52:45.250' AS DateTime))
GO
INSERT #t1 ([version_number], [eff_dt], [end_dt]) VALUES (3, CAST(N'2021-05-06T16:13:04.000' AS DateTime), CAST(N'2021-05-06T16:17:23.127' AS DateTime))
GO
INSERT #t1 ([version_number], [eff_dt], [end_dt]) VALUES (2, CAST(N'2021-05-06T16:00:59.990' AS DateTime), CAST(N'2021-05-06T16:13:03.997' AS DateTime))
GO
INSERT #t1 ([version_number], [eff_dt], [end_dt]) VALUES (1, CAST(N'2021-04-19T20:45:38.997' AS DateTime), CAST(N'2021-05-06T16:00:59.990' AS DateTime))
GO
INSERT #t1 ([version_number], [eff_dt], [end_dt]) VALUES (1, CAST(N'2021-04-16T18:35:06.370' AS DateTime), CAST(N'2021-04-19T20:45:38.993' AS DateTime))
GO
INSERT #t1 ([version_number], [eff_dt], [end_dt]) VALUES (0, CAST(N'2021-04-13T18:03:26.483' AS DateTime), CAST(N'2021-04-16T18:35:06.367' AS DateTime))
GO

You can achieve this in two steps, using successive common table expressions (cte). Firstly, you need a consecutive ranking number within your data. On the basis of this you can then do a recursive cte, looking at the version_number of consecutive rows (necessarily one apart). This allows us to create a "batch" number: if the version_number is the same, then we take the previous batch number, if it is different, we increment the previous batch number by one. Finally we need a simple min and max on the dates grouping by the batch number. The result looks like this:
declare #t1 TABLE (
[version_number] [int] NULL,
[eff_dt] [datetime] NOT NULL,
[end_dt] [datetime] NOT NULL
);
INSERT #t1 ([version_number], [eff_dt], [end_dt])
VALUES
(1, CAST(N'2021-05-20T17:14:55.870' AS DateTime), CAST(N'9999-12-31T00:00:00.000' AS DateTime)),
(5, CAST(N'2021-05-14T15:52:50.847' AS DateTime), CAST(N'2021-05-20T17:14:55.860' AS DateTime)),
(4, CAST(N'2021-05-20T17:14:55.863' AS DateTime), CAST(N'2021-05-20T17:14:55.867' AS DateTime)),
(4, CAST(N'2021-05-11T15:36:25.283' AS DateTime), CAST(N'2021-05-14T15:52:50.843' AS DateTime)),
(4, CAST(N'2021-05-06T16:52:45.253' AS DateTime), CAST(N'2021-05-11T15:36:25.283' AS DateTime)),
(4, CAST(N'2021-05-06T16:17:23.130' AS DateTime), CAST(N'2021-05-06T16:52:45.250' AS DateTime)),
(3, CAST(N'2021-05-06T16:13:04.000' AS DateTime), CAST(N'2021-05-06T16:17:23.127' AS DateTime)),
(2, CAST(N'2021-05-06T16:00:59.990' AS DateTime), CAST(N'2021-05-06T16:13:03.997' AS DateTime)),
(1, CAST(N'2021-04-19T20:45:38.997' AS DateTime), CAST(N'2021-05-06T16:00:59.990' AS DateTime)),
(1, CAST(N'2021-04-16T18:35:06.370' AS DateTime), CAST(N'2021-04-19T20:45:38.993' AS DateTime)),
(0, CAST(N'2021-04-13T18:03:26.483' AS DateTime), CAST(N'2021-04-16T18:35:06.367' AS DateTime));
with rowdata as
(
SELECT version_number, eff_dt, end_dt,
ROW_NUMBER() OVER(ORDER BY eff_dt) rn
FROM #t1
),
cte_recursive as
(
SELECT 1 as batchno, rn, version_number, eff_dt, end_dt
FROM rowdata
WHERE version_number = 0
UNION ALL
SELECT CASE WHEN rec.version_number = rd.version_number
THEN rec.batchno
ELSE rec.batchno + 1
END,
rd.rn, rd.version_number, rd.eff_dt, rd.end_dt
FROM cte_recursive rec
INNER JOIN rowdata rd on rec.rn = rd.rn - 1
)
SELECT
version_number, min(eff_dt) as eff_dt, max(end_dt) as end_dt
FROM cte_recursive
GROUP BY version_number, batchno
A couple of points to note. I prefer to use table variables to temporary tables (has a slight advantage that they don't need to be deleted!). Secondly you can insert multiple values separated by commas, as I have shown (no need for multiple inserts).
To help you understand how the recursive element works, we begin by a simple select which is the base case, in this case selecting where version_number is 0. We then build up from that by joining to the recursive part where rn (the value returned by ROW_NUMBER()) is one greater than the value we already have. We simply need to check for a difference in the version_number between our old value and the new row, to decide if the batch number needs incrementing or not.
You may find it helpful to run these queries one at a time, to help you understand what is happening (for example just run the sub-select that includes the row_number()).
BTW it was good of you to add the create statements.

This is easily accomplished using window functions. It's a variation on the gaps and islands problem.
The premise is to identify the islands of consecutive values of the version_number. The first CTE uses lag to compare the current row value to the previous row value and marks the start of the Next Group when the values are different. The second CTE uses sum as a window function to produce a running total of the groups. This provides each group of like version_numbers with its own sequential value.
The final select is then able to group by the version_number and its sequential group number, using the min and max dates for each.
Note also that using windows function and hitting the source table just once will also be significantly more efficient than a recursive solution.
with ng as (
select *, case when Lag(version_number) over(order by end_dt) = version_number then 0 else 1 end as ng
from #t1
), grp as (
select *, Sum(ng) over(order by end_dt) as grp
from ng
)
select version_number, Min(eff_dt) eff_dt, Max(end_dt) end_dt
from grp
group by version_number, grp
order by eff_dt

you can use this Query :
DROP TABLE #t1
CREATE TABLE #t1(
[version_number] [int] NULL,
[eff_dt] [datetime] NOT NULL,
[end_dt] [datetime] NOT NULL
)
GO
INSERT #t1 ([version_number], [eff_dt], [end_dt]) VALUES (1, CAST(N'2021-05-20T17:14:55.870' AS DateTime), CAST(N'9999-12-31T00:00:00.000' AS DateTime))
GO
INSERT #t1 ([version_number], [eff_dt], [end_dt]) VALUES (5, CAST(N'2021-05-14T15:52:50.847' AS DateTime), CAST(N'2021-05-20T17:14:55.860' AS DateTime))
GO
INSERT #t1 ([version_number], [eff_dt], [end_dt]) VALUES (4, CAST(N'2021-05-20T17:14:55.863' AS DateTime), CAST(N'2021-05-20T17:14:55.867' AS DateTime))
GO
INSERT #t1 ([version_number], [eff_dt], [end_dt]) VALUES (4, CAST(N'2021-05-11T15:36:25.283' AS DateTime), CAST(N'2021-05-14T15:52:50.843' AS DateTime))
GO
INSERT #t1 ([version_number], [eff_dt], [end_dt]) VALUES (4, CAST(N'2021-05-06T16:52:45.253' AS DateTime), CAST(N'2021-05-11T15:36:25.283' AS DateTime))
GO
INSERT #t1 ([version_number], [eff_dt], [end_dt]) VALUES (4, CAST(N'2021-05-06T16:17:23.130' AS DateTime), CAST(N'2021-05-06T16:52:45.250' AS DateTime))
GO
INSERT #t1 ([version_number], [eff_dt], [end_dt]) VALUES (3, CAST(N'2021-05-06T16:13:04.000' AS DateTime), CAST(N'2021-05-06T16:17:23.127' AS DateTime))
GO
INSERT #t1 ([version_number], [eff_dt], [end_dt]) VALUES (2, CAST(N'2021-05-06T16:00:59.990' AS DateTime), CAST(N'2021-05-06T16:13:03.997' AS DateTime))
GO
INSERT #t1 ([version_number], [eff_dt], [end_dt]) VALUES (1, CAST(N'2021-04-19T20:45:38.997' AS DateTime), CAST(N'2021-05-06T16:00:59.990' AS DateTime))
GO
INSERT #t1 ([version_number], [eff_dt], [end_dt]) VALUES (1, CAST(N'2021-04-16T18:35:06.370' AS DateTime), CAST(N'2021-04-19T20:45:38.993' AS DateTime))
GO
INSERT #t1 ([version_number], [eff_dt], [end_dt]) VALUES (0, CAST(N'2021-04-13T18:03:26.483' AS DateTime), CAST(N'2021-04-16T18:35:06.367' AS DateTime))
GO
SELECT DISTINCT t.[version_number] , eff_Table.eff_dt , end_Table.end_dt FROM #t1 t INNER JOIN
(SELECT t.version_number, t.eff_dt
,ROW_NUMBER() OVER (PARTITION BY t.version_number ORDER BY t.eff_dt) AS FirstID
FROM #t1 t ) AS eff_Table ON t.version_number = eff_Table.version_number
INNER JOIN (
SELECT t.version_number,
t.end_dt
,ROW_NUMBER() OVER (PARTITION BY t.version_number ORDER BY t.end_dt desc) AS SecondID
FROM #t1 t ) AS end_Table ON end_Table.version_number = t.version_number
WHERE eff_Table.FirstID = 1 AND end_Table.SecondID = 1

Related

How to improve my double select query that takes too long to execute

My table has 2 date fields: start and end. I have millions of records with random durations.
For every date covered by my table I want to know the count of records.
I hope there is a different and cleaner way to approach my problem because I've come up with a solution that will take forever to compute, which is:
First create an intermediary table with the dates
DROP TABLE IF EXISTS time_points;
SELECT DISTINCT start_date AS time_point
INTO time_points
FROM data
UNION
SELECT DISTINCT end_date FROM data
; -- Totals 8.2 million records
Then create a request using both tables
DROP TABLE IF EXISTS results;
SELECT time_points.time_point,
(SELECT COUNT(*)
FROM data
WHERE start_date <= time_points.time_point AND end_date >= time_points.time_point
) AS n
INTO results
FROM time_points
;
Now I am waiting. When I run the second query with TOP(10), it takes 30 seconds. So it would take about 6 months to get my results. I would hope I could get them in half a day maybe.
-> Sample data
-> here is a sample of data
CREATE TABLE [dbo].[data_table](
[start_date] [datetime] NULL,
[end_date] [datetime] NULL
) ON [PRIMARY]
GO
INSERT [dbo].[data_table] ([start_date], [end_date]) VALUES (CAST(N'2021-12-08T11:07:24.000' AS DateTime), CAST(N'2021-12-08T11:28:41.000' AS DateTime))
GO
INSERT [dbo].[data_table] ([start_date], [end_date]) VALUES (CAST(N'2021-12-08T11:09:55.000' AS DateTime), CAST(N'2021-12-08T12:18:21.000' AS DateTime))
GO
INSERT [dbo].[data_table] ([start_date], [end_date]) VALUES (CAST(N'2021-12-14T12:30:01.000' AS DateTime), CAST(N'2021-12-14T12:38:40.000' AS DateTime))
GO
INSERT [dbo].[data_table] ([start_date], [end_date]) VALUES (CAST(N'2021-12-19T11:17:46.000' AS DateTime), CAST(N'2021-12-19T12:20:48.000' AS DateTime))
GO
INSERT [dbo].[data_table] ([start_date], [end_date]) VALUES (CAST(N'2021-12-31T16:34:07.000' AS DateTime), CAST(N'2021-12-31T16:39:14.000' AS DateTime))
GO
INSERT [dbo].[data_table] ([start_date], [end_date]) VALUES (CAST(N'2021-12-09T07:17:56.000' AS DateTime), CAST(N'2021-12-09T07:35:31.000' AS DateTime))
GO
INSERT [dbo].[data_table] ([start_date], [end_date]) VALUES (CAST(N'2021-12-14T07:52:21.000' AS DateTime), CAST(N'2021-12-14T07:59:02.000' AS DateTime))
GO
INSERT [dbo].[data_table] ([start_date], [end_date]) VALUES (CAST(N'2021-12-08T07:27:11.000' AS DateTime), CAST(N'2021-12-08T07:30:01.000' AS DateTime))
GO
INSERT [dbo].[data_table] ([start_date], [end_date]) VALUES (CAST(N'2021-12-01T08:01:58.000' AS DateTime), CAST(N'2021-12-01T08:06:14.000' AS DateTime))
GO
INSERT [dbo].[data_table] ([start_date], [end_date]) VALUES (CAST(N'2021-12-16T07:19:46.000' AS DateTime), CAST(N'2021-12-16T07:22:46.000' AS DateTime))
GO
-> Sample result
DROP TABLE IF EXISTS time_points;
SELECT DISTINCT start_date AS time_point
INTO time_points
FROM data_table
UNION
SELECT DISTINCT end_date FROM data_table
;
DROP TABLE IF EXISTS results;
SELECT time_points.time_point,
(SELECT COUNT(*)
FROM data_table
WHERE start_date <= time_points.time_point AND end_date >= time_points.time_point
) AS n
INTO results
FROM time_points
;
SELECT * FROM results
;
(only 2 lines of result:)
time_point n
2021-12-08 11:07:24.000 1
2021-12-08 11:09:55.000 2

SQL : 12 hr shift function from 7am - 7am

I need to query all production records by 12 hr. shift. 7am-7am. if the date is after midnight and before 7am it's actually the previous day shift. In the below example I need to make them all 2022-01-01 like the last column. If I query by 2022-01-01 I don't get all the rows. Can I use a function for this to compare the time and make it the previous day?
declare #temp table
(
Emp_id int,
Time datetime,
shiftDate date
);
insert into #temp values (1, '2022-01-01 08:10:00:000', '2022-01-01')
insert into #temp values (1, '2022-01-01 10:21:00:000', '2022-01-01')
insert into #temp values (1, '2022-01-01 13:10:00:000', '2022-01-01')
insert into #temp values (1, '2022-01-01 22:22:00:000', '2022-01-01')
insert into #temp values (1, '2022-01-02 02:15:00:000', '2022-01-01')
insert into #temp values (1, '2022-01-02 04:22:00:000', '2022-01-01')
insert into #temp values (1, '2022-01-02 06:18:00:000', '2022-01-01')
insert into #temp values (1, '2022-01-02 06:55:00:000', '2022-01-01')
select * from #temp
select * from #temp
where convert(date, [time]) = '2022-01-01'

Partition the date into a weeks from a given date to the last date in the record

I wanted to count the time gap between two rows for the same id if the second is less than an hour after the first, and partition the count for the week.
Suppose given date with time is 2020-07-01 08:00
create table #Temp (
Id integer not null,
Time datetime not null
);
insert into #Temp values (1, '2020-07-01 08:00');
insert into #Temp values (1, '2020-07-01 08:01');
insert into #Temp values (1, '2020-07-01 08:06');
insert into #Temp values (1, '2020-07-01 08:30');
insert into #Temp values (1, '2020-07-08 09:35');
insert into #Temp values (1, '2020-07-15 16:10');
insert into #Temp values (1, '2020-07-15 16:20');
insert into #Temp values (1, '2020-07-17 06:40');
insert into #Temp values (1, '2020-07-17 06:41');
insert into #Temp values (2, '2020-07-01 08:30');
insert into #Temp values (2, '2020-07-01 09:26');
insert into #Temp values (2, '2020-07-01 10:25');
insert into #Temp values (2, '2020-07-09 08:30');
insert into #Temp values (2, '2020-07-09 09:26');
insert into #Temp values (2, '2020-07-09 10:25');
insert into #Temp values (3, '2020-07-21 08:30');
insert into #Temp values (3, '2020-07-21 09:26');
insert into #Temp values (3, '2020-07-21 10:25');
The week should extend up to the last date in the record. Here, the last date is
2020-07-21 10:25
Have to transform the output from this piece of code and divide the duration weekly.
select Id, sum(datediff(minute, Time, next_ts)) as duration_minutes
from (select t.*,
lead(Time) over (partition by id order by Time) as next_ts
from #Temp t
) t
where datediff(minute, Time, next_ts) < 60
group by Id;
Output:
id duration_minutes
1 41
2 230
3 115
The desired output should divide this duration on a weekly basis,
like Week 1, Week 2, Week 3, and so on.
Desired Output:
If the
start date is 2020-07-01 08:00
end date is 2020-07-21 10:25
id | Week 1 | Week 2 | Week 3
--------------------------------------
1 | 30 | 0 | 11
2 | 115 | 115 | 0
3 | 0 | 0 | 115
similarly, if the
start date is 2020-07-08 08:00
id | Week 1 | Week 2
---------------------------
1 | 11 | 0
2 | 115 | 0
3 | 0 | 115

Is this what you want?
select Id,
1 + datediff(second, '2020-07-01 06:00', time) / (24 * 60 * 60 * 7) as week_num,
sum(datediff(minute, Time, next_ts)) as duration_minutes
from (select t.*,
lead(Time) over (partition by id order by Time) as next_ts
from Temp t
) t
where datediff(minute, Time, next_ts) < 60
group by Id, datediff(second, '2020-07-01 06:00', time) / (24 * 60 * 60 * 7)
order by id, week_num;
Here is a db<>fiddle.

I am not able to understand the logic behind the week periods. Anyone, in the example below I am using the following code to set the week:
'Week ' + CAST(DENSE_RANK() OVER (ORDER BY DATEDIFF(DAY, #FirstDate, next_ts) / 7) AS VARCHAR(12))
You can adjust it to ignore the ours, be more precise or something else to match your real requirements.
Apart from that, you just need to perform a dynamic PIVOT. Here is the full working example:
DROP TABLE IF EXISTS #Temp;
create table #Temp (
Id integer not null,
Time datetime not null
);
insert into #Temp values (1, '2020-07-01 08:00');
insert into #Temp values (1, '2020-07-01 08:01');
insert into #Temp values (1, '2020-07-01 08:06');
insert into #Temp values (1, '2020-07-01 08:30');
insert into #Temp values (1, '2020-07-08 09:35');
insert into #Temp values (1, '2020-07-15 16:10');
insert into #Temp values (1, '2020-07-15 16:20');
insert into #Temp values (1, '2020-07-17 06:40');
insert into #Temp values (1, '2020-07-17 06:41');
insert into #Temp values (2, '2020-07-01 08:30');
insert into #Temp values (2, '2020-07-01 09:26');
insert into #Temp values (2, '2020-07-01 10:25');
insert into #Temp values (2, '2020-07-09 08:30');
insert into #Temp values (2, '2020-07-09 09:26');
insert into #Temp values (2, '2020-07-09 10:25');
insert into #Temp values (3, '2020-07-21 08:30');
insert into #Temp values (3, '2020-07-21 09:26');
insert into #Temp values (3, '2020-07-21 10:25');
DROP TABLE IF EXISTS #TEST
CREATE TABLE #TEST
(
[ID] INT
,[week_day] VARCHAR(12)
,[time_in_minutes] BIGINT
)
DECLARE #FirstDate DATE;
SELECT #FirstDate = MIN(Time)
FROM #Temp
INSERT INTO #TEST
select id
,'Week ' + CAST(DENSE_RANK() OVER (ORDER BY DATEDIFF(DAY, #FirstDate, next_ts) / 7) AS VARCHAR(12))
,datediff(minute, Time, next_ts)
from (select t.*,
lead(Time) over (partition by id order by Time) as next_ts
from #Temp t
) t
where datediff(minute, Time, next_ts) < 60
DECLARE #columns NVARCHAR(MAX);
SELECT #columns = STUFF
(
(
SELECT ',' + QUOTENAME([week_day])
FROM
(
SELECT DISTINCT CAST(REPLACE([week_day], 'Week ', '') AS INT)
,[week_day]
FROM #TEST
) DS ([rowID], [week_day])
ORDER BY [rowID]
FOR XML PATH(''), TYPE
).value('.', 'VARCHAR(MAX)')
,1
,1
,''
);
DECLARE #DanymicSQL NVARCHAR(MAX);
SET #DanymicSQL = N'
SELECT [ID], ' + #columns + '
FROM #TEST
PIVOT
(
SUM([time_in_minutes]) FOR [week_day] IN (' + #columns + ')
) PVT';
EXEC sp_executesql #DanymicSQL;

How Result using cross apply, can achieve using any other joins like inner,left,right join in sql server

This is my SQL script with sample data
CREATE TABLE [dbo].[Employee]
(
[ID] [INT] IDENTITY(1,1) NOT NULL,
[Name] [VARCHAR](100) NULL
) ON [PRIMARY]
GO
CREATE TABLE [dbo].[LoginEntry]
(
[ID] [INT] IDENTITY(1,1) NOT NULL,
[LoginTime] [DATETIME] NULL,
[EmpID] [INT] NULL,
[GateNumber] [VARCHAR](50) NULL
) ON [PRIMARY]
GO
ALTER TABLE Employee
ADD CONSTRAINT Pk_Employee PRIMARY KEY (Id)
GO
ALTER TABLE LoginEntry
ADD CONSTRAINT Fk_LoginEntry_Employee
FOREIGN KEY (EmpId) REFERENCES Employee(Id)
GO
SET IDENTITY_INSERT [dbo].[Employee] ON
GO
INSERT [dbo].[Employee] ([ID], [Name])
VALUES (1, N'Employee 1'), (2, N'Employee 2'), (3, N'Employee 3'),
(4, N'Employee 4'), (5, N'Employee 5'), (6, N'Employee 6')
GO
SET IDENTITY_INSERT [dbo].[Employee] OFF
GO
SET IDENTITY_INSERT [dbo].[LoginEntry] ON
GO
INSERT [dbo].[LoginEntry] ([ID], [LoginTime], [EmpID], [GateNumber])
VALUES (1, CAST(N'2014-10-24 08:00:00.000' AS DateTime), 1, N'Gate 1'),
(2, CAST(N'2014-10-24 09:00:00.000' AS DateTime), 1, N'Gate 1'),
(3, CAST(N'2014-10-24 10:00:00.000' AS DateTime), 1, N'Gate 2'),
(4, CAST(N'2014-10-24 08:00:00.000' AS DateTime), 2, N'Gate 1'),
(5, CAST(N'2014-10-24 09:00:00.000' AS DateTime), 2, N'Gate 1'),
(6, CAST(N'2014-10-24 10:00:00.000' AS DateTime), 2, N'Gate 2'),
(7, CAST(N'2014-10-24 08:00:00.000' AS DateTime), 3, N'Gate 1'),
(8, CAST(N'2014-10-24 09:00:00.000' AS DateTime), 3, N'Gate 1'),
(9, CAST(N'2014-10-24 10:00:00.000' AS DateTime), 3, N'Gate 2'),
(10, CAST(N'2014-10-24 08:00:00.000' AS DateTime), 4, N'Gate 1'),
(11, CAST(N'2014-10-24 09:00:00.000' AS DateTime), 4, N'Gate 1'),
(19, CAST(N'2014-10-24 08:00:00.000' AS DateTime), 5, N'Gate 1'),
(20, CAST(N'2014-10-24 09:00:00.000' AS DateTime), 5, N'Gate 1'),
(21, CAST(N'2014-10-24 10:00:00.000' AS DateTime), 5, N'Gate 2'),
(22, CAST(N'2014-10-24 08:00:00.000' AS DateTime), 6, N'Gate 1'),
(23, CAST(N'2014-10-24 09:00:00.000' AS DateTime), 6, N'Gate 1'),
(24, CAST(N'2014-10-24 10:00:00.000' AS DateTime), 6, N'Gate 2')
SET IDENTITY_INSERT [dbo].[LoginEntry] OFF
GO
SELECT
e.ID, dt.EmpId, Name, LoginTime
FROM
Employee e
CROSS APPLY
(SELECT TOP 1
l.ID, l.LoginTime, l.EmpId
FROM
LoginEntry l
WHERE
l.EmpId = e.id) dt
GO
The result I get:
ID EmpId Name LoginTime
-----------------------------------------------
1 1 Employee 1 2014-10-24 08:00:00.000
2 2 Employee 2 2014-10-24 08:00:00.000
3 3 Employee 3 2014-10-24 08:00:00.000
4 4 Employee 4 2014-10-24 08:00:00.000
5 5 Employee 5 2014-10-24 08:00:00.000
6 6 Employee 6 2014-10-24 08:00:00.000
I am Expecting the same result using Joins(inner,right,left,full) in sql server i tried my luck but couldn't, pls can any one help me out thanks in advance

First, your query is incomplete. When you use TOP 1 without an ORDER BY you never have a guarantee which one it will pick. New data, concurrent processes, re-indexing, software patches, the time of day, they all can cause the result to change.
So, it should be something like...
SELECT
e.ID,dt.EmpId,Name,LoginTime
FROM
Employee e
CROSS APPLY
(
SELECT TOP 1
l.ID
,l.LoginTime
,l.EmpId
FROM
LoginEntry l
WHERE
l.EmpId=e.id
ORDER BY
l.LoginTime DESC -- Will cause TOP 1 to pick the most recent value (per employee)
)
dt
As for doing it with joins, doing the TOP 1 (or greatest-n-per-group, for which your n is 1), is longer, messier, and slower. So I won't go in to that.
But you can use ROW_NUMBER() to do the TOP 1 part, and then use a JOIN to associate that result with your main table...
WITH
ordered_logins AS
(
SELECT
*,
ROW_NUMBER() OVER (PARTITION BY EmpID ORDER BY LoginTime DESC, ID DESC) AS row_ordinal
FROM
LoginEntry
)
SELECT
e.ID, l.EmpId, e.Name, l.LoginTime
FROM
Employee e
LEFT JOIN
ordered_logins l
ON l.EmpID = e.ID
AND l.row_oridnal = 1
The ROW_NUMBER() assigns every row a value from 1 upwards (per EmpID - the partition clause). It's order by loginTime descending, so the latest logins are first, and just in case two logins have the exact same time, it's secondarily ordered by ID desc.
Then the LEFT JOIN only picks the rows numbered 1 (the latest logins) and if there are no logins gives NULLs instead (so that the employee record is Not discarded due to a lack of join).
Note: the LEFT JOIN equivalent for APPLY is to use OUTER APPLY instead of CROSS APPLY.

Just for the sake of showing one more way to solve the problem, the "longer, messier, and slower" JOIN method that (I thought) MatBailie didn't show would look like this:
SELECT TOP (1) WITH TIES
e.ID
,l.EmpID
,e.Name
,l.LoginTime
FROM
dbo.Employee AS e
JOIN
dbo.LoginEntry AS l
ON
l.EmpID = e.ID
ORDER BY
ROW_NUMBER() OVER (PARTITION BY e.ID ORDER BY l.LoginTime DESC, ID DESC)
The ROW_NUMBER in the ORDER BY clause takes all of the logins for each Employee ID and numbers them by LoginTime with the most recent first (and using the LoginEntry ID as tie breaker, and thanks for that touch, Mat).
Then, the SELECT TOP (1) WITH TIES does its thing. The WITH TIES bit means that it selects the number one result from each PARTITION BY group in the ORDER BY clause.

you can use window function row_number()
with cte as(
SELECT e.ID,l.EmpId,Name,l.LoginTime, row_number() over(partition by e.ID order by l.LoginTime) as rn
FROM Employee e join LoginEntry l on l.EmpId=e.id
) select ID,EmpId,Name,LoginTime from cte where rn=1
demo link

TSQL - Delete All Rows Except 1 Per Group

Let's say I have 5 workcenters (Workcenter 1, Workcenter 2, Workcenter 3, Workcenter 4, Workcenter 5)
Each workcenter has several rows of notes that are ordered by the date the data was entered. I would like to delete all rows per workcenter except the row of data that was entered last.
If my columns are: ID | Workcenter | Note | Log_Date
How would I go about doing this?
My code is only giving me the most current note entry for the entire table, but I want one per workcenter.
This is what I have right now:
DELETE FROM #Table
WHERE ID NOT IN (SELECT TOP 1 ID FROM #Table
GROUP BY Workcenter, ID
ORDER BY Log_Date DESC)

try this:
delete t1 from table t1
where not exists
(select 1 from
(select workcenter,max(log_date)as log_date from table group by workcenter) t2
where t1.workcenter = t2.workcenter and t1.log_date = t2.log_date
)
use exists subquery to get the max log_date for each workcenter and then connect them to the table.

using CTE we can achieve this:
;WITH cte AS
(SELECT ROW_NUMBER() OVER (PARTITION BY name ORDER BY createdate DESC ) AS rowno, * FROM workgroups)
DELETE FROM cte WHERE rowno !=1;
CREATE TABLE workgroups(id INT IDENTITY(1,1),name VARCHAR(50), createdate DATETIME DEFAULT GETDATE())
INSERT [dbo].[workgroups] ([id], [name], [createdate]) VALUES (1, N'workgroup1', CAST(0x0000A60F011F7840 AS DateTime))
INSERT [dbo].[workgroups] ([id], [name], [createdate]) VALUES (2, N'workgroup1', CAST(0x0000A60F011F7F8E AS DateTime))
INSERT [dbo].[workgroups] ([id], [name], [createdate]) VALUES (3, N'workgroup1', CAST(0x0000A60F011F8728 AS DateTime))
INSERT [dbo].[workgroups] ([id], [name], [createdate]) VALUES (4, N'workgroup2', CAST(0x0000A60F011F92B9 AS DateTime))
INSERT [dbo].[workgroups] ([id], [name], [createdate]) VALUES (5, N'workgroup2', CAST(0x0000A60F011F97C0 AS DateTime))
INSERT [dbo].[workgroups] ([id], [name], [createdate]) VALUES (6, N'workgroup3', CAST(0x0000A60F011FA443 AS DateTime))
INSERT [dbo].[workgroups] ([id], [name], [createdate]) VALUES (7, N'workgroup3', CAST(0x0000A60F011FA73B AS DateTime))
INSERT [dbo].[workgroups] ([id], [name], [createdate]) VALUES (8, N'workgroup3', CAST(0x0000A60F011FA9FB AS DateTime))
SELECT ROW_NUMBER() OVER (PARTITION BY name ORDER BY createdate DESC ) AS rowno, * FROM workgroups
;WITH cte AS
(SELECT ROW_NUMBER() OVER (PARTITION BY name ORDER BY createdate DESC ) AS rowno, * FROM workgroups)
DELETE FROM cte WHERE rowno !=1;

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

Query to summarize rows from table with version history - sql

Related

How to improve my double select query that takes too long to execute

SQL : 12 hr shift function from 7am - 7am

Partition the date into a weeks from a given date to the last date in the record

How Result using cross apply, can achieve using any other joins like inner,left,right join in sql server

TSQL - Delete All Rows Except 1 Per Group

Categories

Resources