Select one row per specific time - sql

I have a table that looks like this:
ID UserID DateTime TypeID
1 1 1/1/2010 10:00:00 1
2 2 1/1/2010 10:01:50 1
3 1 1/1/2010 10:02:50 1
4 1 1/1/2010 10:03:50 1
5 1 1/1/2010 11:00:00 1
6 2 1/1/2010 11:00:50 1
I need to query all users where their typeID is 1, but have only one row per 15 mins
For example, the result should be:
1 1 1/1/2010 10:00:00 1
2 2 1/1/2010 10:01:50 1
5 1 1/1/2010 11:00:00 1
6 2 1/1/2010 11:00:50 1
IDs 3 & 4 are not shown because 15 min haven't been passed since the last record for the specific userID.
IDs 1 & 5 are shown because 15 minutes has been passed for this specific userID
Same as for IDs 2 & 6.
How can I do it?
Thanks

Try this:
select * from
(
select ID, UserID,
Max(DateTime) as UpperBound,
Min(DateTime) as LowerBound,
TypeID
from the_table
where TypeID=1
group by ID,UserID,TypeID
) t
where datediff(mi,LowerBound,UpperBound)>=15
EDIT: SINCE MY ABOVE ATTEMPT WAS WRONG, I'm adding one more approach using a Sql table-valued Function that does not require recursion, since, understandable, it's a big concern.
Step 1: Create a table-type as follows (LoginDate is the DateTime column in Shay's example - DateTime name conflicts with a SQL data type and I think it's wise to avoid these conflicts)
CREATE TYPE [dbo].[TVP] AS TABLE(
[ID] [int] NOT NULL,
[UserID] [int] NOT NULL,
[LoginDate] [datetime] NOT NULL,
[TypeID] [int] NOT NULL
)
GO
Step 2: Create the following Function:
CREATE FUNCTION [dbo].[fnGetLoginFreq]
(
-- notice: TVP is the type (declared above)
#TVP TVP readonly
)
RETURNS
#Table_Var TABLE
(
-- This will be our result set
ID int,
UserId int,
LoginTime datetime,
TypeID int,
RowNumber int
)
AS
BEGIN
--We will insert records in this table as we go through the rows in the
--table passed in as parameter and decide that we should add an entry because
--15' had elapsed between logins
DECLARE #temp table
(
ID int,
UserId int,
LoginTime datetime,
TypeID int
)
-- seems silly, but is not because we need to add a row_number column to help
-- in our iteration and table-valued paramters cannot be modified inside the function
insert into #Table_var
select ID,UserID,Logindate,TypeID,row_number() OVER(ORDER BY UserID,LoginDate) AS [RowNumber]
from #TVP order by UserID asc,LoginDate desc
declare #Index int,#End int,#CurrentLoginTime datetime, #NextLoginTime datetime, #CurrentUserID int , #NextUserID int
select #Index=1,#End=count(*) from #Table_var
while(#Index<=#End)
begin
select #CurrentLoginTime=LoginTime,#CurrentUserID=UserID from #Table_var where RowNumber=#Index
select #NextLoginTime=LoginTime,#NextUserID=UserID from #Table_var where RowNumber=(#Index+1)
if(#CurrentUserID=#NextUserID)
begin
if( abs(DateDiff(mi,#CurrentLoginTime,#NextLoginTime))>=15)
begin
insert into #temp
select ID,UserID,LoginTime,TypeID
from #Table_var
where RowNumber=#Index
end
END
else
bEGIN
insert into #temp
select ID,UserID,LoginTime,TypeID
from #Table_var
where RowNumber=#Index and UserID=#CurrentUserID
END
if(#Index=#End)--last element?
begin
insert into #temp
select ID,UserID,LoginTime,TypeID
from #Table_var
where RowNumber=#Index and not
abs((select datediff(mi,#CurrentLoginTime,max(LoginTime)) from #temp where UserID=#CurrentUserID))<=14
end
select #Index=#Index+1
end
delete from #Table_var
insert into #Table_var
select ID, UserID ,LoginTime ,TypeID ,row_number() OVER(ORDER BY UserID,LoginTime) AS 'RowNumber'
from #temp
return
END
Step 3: Give it a spin
declare #TVP TVP
INSERT INTO #TVP
select ID,UserId,[DateType],TypeID from Shays_table where TypeID=1 --AND any other date restriction you want to add
select * from fnGetLoginFreq(#TVP) order by LoginTime asc
My tests returned this:
ID UserId LoginTime TypeID RowNumber
2 2 2010-01-01 10:01:50.000 1 3
4 1 2010-01-01 10:03:50.000 1 1
5 1 2010-01-01 11:00:00.000 1 2
6 2 2010-01-01 11:00:50.000 1 4

How about this, it's fairly straight forward and gives you the result you need:
SELECT ID, UserID, [DateTime], TypeID
FROM Users
WHERE Users.TypeID = 1
AND NOT EXISTS (
SELECT TOP 1 1
FROM Users AS U2
WHERE U2.ID <> Users.ID
AND U2.UserID = Users.UserID
AND U2.[DateTime] BETWEEN DATEADD(MI, -15, Users.[DateTime]) AND Users.[DateTime]
AND U2.TypeID = 1)
The NOT EXISTS restricts to only show records that have no record within 15minutes before them, so you will see the first record in a block rather than one every 15mins.
Edit: Since you want to see one every 15mins this should do without using recursion:
SELECT Users.ID, Users.UserID, Users.[DateTime], Users.TypeID
FROM
(
SELECT MIN(ID) AS ID, UserID,
DATEADD(minute, DATEDIFF(minute,0,[DateTime]) / 15 * 15, 0) AS [DateTime]
FROM Users
GROUP BY UserID, DATEADD(minute, DATEDIFF(minute,0,[DateTime]) / 15 * 15, 0)
) AS Dates
INNER JOIN Users AS Users ON Users.ID = Dates.ID
WHERE Users.TypeID = 1
AND NOT EXISTS (
SELECT TOP 1 1
FROM
(
SELECT MIN(ID) AS ID, UserID,
DATEADD(minute, DATEDIFF(minute,0,[DateTime]) / 15 * 15, 0) AS [DateTime]
FROM Users
GROUP BY UserID, DATEADD(minute, DATEDIFF(minute,0,[DateTime]) / 15 * 15, 0)
) AS Dates2
INNER JOIN Users AS U2 ON U2.ID = Dates2.ID
WHERE U2.ID <> Users.ID
AND U2.UserID = Users.UserID
AND U2.[DateTime] BETWEEN DATEADD(MI, -15, Users.[DateTime]) AND Users.[DateTime]
AND U2.TypeID = 1
)
ORDER BY Users.DateTime
If this doesn't work please post more sample data so that I can see what is missing.
Edit2 same as directly above but just using CTE now instead for improved readability and help improve maintainability, also I improved it to highlighted where you would also restrict the Dates table by whatever DateTime range that you would be restricting to the main query:
WITH Dates(ID, UserID, [DateTime])
AS
(
SELECT MIN(ID) AS ID, UserID,
DATEADD(minute, DATEDIFF(minute,0,[DateTime]) / 15 * 15, 0) AS [DateTime]
FROM Users
WHERE Users.TypeID = 1
--AND Users.[DateTime] BETWEEN #StartDateTime AND #EndDateTime
GROUP BY UserID, DATEADD(minute, DATEDIFF(minute,0,[DateTime]) / 15 * 15, 0)
)
SELECT Users.ID, Users.UserID, Users.[DateTime], Users.TypeID
FROM Dates
INNER JOIN Users ON Users.ID = Dates.ID
WHERE Users.TypeID = 1
--AND Users.[DateTime] BETWEEN #StartDateTime AND #EndDateTime
AND NOT EXISTS (
SELECT TOP 1 1
FROM Dates AS Dates2
INNER JOIN Users AS U2 ON U2.ID = Dates2.ID
WHERE U2.ID <> Users.ID
AND U2.UserID = Users.UserID
AND U2.[DateTime] BETWEEN DATEADD(MI, -15, Users.[DateTime]) AND Users.[DateTime]
AND U2.TypeID = 1
)
ORDER BY Users.DateTime
Also as a performance note, whenever dealing with something that might end up being recursive like this potentially could be (from other answers), you should straight away be considering if you are able to restrict the main query by a date range in general even if it's a whole year or longer range

You can use a recursive CTE for this though I would also evaluate a cursor if the result set is at all large as it may work out more efficient.
I've left out the ID column in my answer. If you really need it it would be possible to add it. It just makes the anchor part of the recursive CTE a bit more unwieldy.
DECLARE #T TABLE
(
ID INT PRIMARY KEY,
UserID INT,
[DateTime] DateTime,
TypeID INT
)
INSERT INTO #T
SELECT 1,1,'20100101 10:00:00', 1 union all
SELECT 2,2,'20100101 10:01:50', 1 union all
SELECT 3,1,'20100101 10:02:50', 1 union all
SELECT 4,1,'20100101 10:03:50', 1 union all
SELECT 5,1,'20100101 11:00:00', 1 union all
SELECT 6,2,'20100101 11:00:50', 1;
WITH RecursiveCTE
AS (SELECT UserID,
MIN([DateTime]) As [DateTime],
1 AS TypeID
FROM #T
WHERE TypeID = 1
GROUP BY UserID
UNION ALL
SELECT UserID,
[DateTime],
TypeID
FROM (
--Can't use TOP directly
SELECT T.*,
rn = ROW_NUMBER() OVER (PARTITION BY T.UserID ORDER BY
T.[DateTime])
FROM #T T
JOIN RecursiveCTE R
ON R.UserID = T.UserID
AND T.[DateTime] >=
DATEADD(MINUTE, 15, R.[DateTime])) R
WHERE R.rn = 1)

Related

Double record when both conditions are met SQL

I have a record in my table:
What I need is to create a column with order state: '1' if order was created, '0' if order was cancelled.
So for this example, when there was both creation and cancellation I need two states. The final table should be:
How can I do this?
I think you can simply do a UNION like this:
select OrderCreateDate, OrderCancelDate, ReportDate, 1 as OrderState
from your_table
where orderCreateDate is not null
union all
select OrderCreateDate, OrderCancelDate, ReportDate, 0 as OrderState
from your_table
where orderCancelDate is not null
One way to do this is to join your table multiple times with a constraint on the join to limit your result set; this is an easy way to pivot your data, but it can affect performance.
DECLARE #a TABLE (id INT, createdate date,canceldate date,reportdate DATE)
INSERT INTO #a (id, createdate, canceldate, reportdate)
VALUES (
1, -- id - int
GETDATE(), -- createdate - date
GETDATE(), -- canceldate - date
GETDATE() -- reportdate - date
)
INSERT INTO #a (id, createdate, canceldate, reportdate)
VALUES (
2, -- id - int
GETDATE(), -- createdate - date
null, -- canceldate - date
GETDATE() -- reportdate - date
)
SELECT a.id,a.createdate,a.canceldate,a.reportdate,CASE WHEN a1.id IS NOT NULL THEN '1' ELSE 0 END AS 'createdInd'
,CASE WHEN a2.id IS NOT NULL THEN '1' ELSE 0 END AS 'CancelledInd'
FROM #a a
LEFT JOIN #a a1 ON a.id = a1.id AND a1.createdate IS NOT NULL
LEFT JOIN #a a2 ON a.id = a2.id AND a2.canceldate IS NOT NULL
id createdate canceldate reportdate createdInd CancelledInd
1 2021-04-07 2021-04-07 2021-04-07 1 1
2 2021-04-07 NULL 2021-04-07 1 0
Join to the table a query that returns the values 1 and 0:
SELECT t.*, s.OrderState
FROM tablename AS t
INNER JOIN (SELECT 1 AS OrderState UNION ALL SELECT 0) AS s
ON (s.OrderState = 1 AND t.OrderCreateDate IS NOT NULL)
OR (s.OrderState = 0 AND t.OrderCancelDate IS NOT NULL)

SQL query when result is empty

I have a table like this
USER itemnumber datebought (YYYYmmDD)
a 1 20160101
b 2 20160202
c 3 20160903
d 4 20160101
Now I have to show the total number of items bought by each user after date 20160202 (2 february 2016)
I used
SELECT USER, COUNT(itemnumber)<br/>
FROM TABLE<br/>
WHERE datebought >= 20160202<br/>
GROUP BY USER<br>
It gives me results
b 1
c 1
but I want like this
a 0
b 1
c 1
d 0
Please tell me what is the most quick method / efficient method to do that ?
Try like this,
DECLARE #table TABLE
(
[USER] VARCHAR(1),
itemnumber INT,
datebought DATE
)
INSERT INTO #TABLE VALUES
('a',1,'20160101'),
('b',2,'20160202'),
('b',2,'20160202'),
('b',2,'20160202'),
('c',3,'20160903'),
('d',4,'20160101')
SELECT *
FROM #TABLE
SELECT [USER],
Sum(CASE
WHEN datebought >= '20160202' THEN 1
ELSE 0
END) AS ITEMCOUNT
FROM #TABLE
GROUP BY [USER]
Use this
SELECT USER, COUNT(itemnumber)
FROM TABLE
WHERE datebought >= 20160202
GROUP BY USER
Though this query won't be a good idea for the large amount of data:
SELECT USER, COUNT(itemnumber)
FROM TABLE
WHERE datebought >= 20160202
GROUP BY USER
UNION
SELECT DISTINCT USER, 0
FROM TABLE
WHERE datebought < 20160202
USE tempdb
GO
DROP TABLE test1
CREATE TABLE test1(a NVARCHAR(10), ino INT, datebought INT)
INSERT INTO dbo.test1
( a, ino, datebought )
VALUES ( 'a' , 1 , 20160101)
INSERT INTO dbo.test1
( a, ino, datebought )
VALUES ( 'b' , 2 , 20160202)
INSERT INTO dbo.test1
( a, ino, datebought )
VALUES ( 'c' , 3 , 20160903)
INSERT INTO dbo.test1
( a, ino, datebought )
VALUES ( 'd' , 4 , 20160101)
SELECT * FROM dbo.test1
SELECT a, COUNT(ino) OVER(PARTITION BY a) FROM dbo.test1
WHERE datebought>=20160202
UNION ALL
SELECT a, 0 FROM dbo.test1
WHERE datebought<20160202
ORDER BY a

Find conflicted date intervals using SQL

Suppose I have following table in Sql Server 2008:
ItemId StartDate EndDate
1 NULL 2011-01-15
2 2011-01-16 2011-01-25
3 2011-01-26 NULL
As you can see, this table has StartDate and EndDate columns. I want to validate data in these columns. Intervals cannot conflict with each other. So, the table above is valid, but the next table is invalid, becase first row has End Date greater than StartDate in the second row.
ItemId StartDate EndDate
1 NULL 2011-01-17
2 2011-01-16 2011-01-25
3 2011-01-26 NULL
NULL means infinity here.
Could you help me to write a script for data validation?
[The second task]
Thanks for the answers.
I have a complication. Let's assume, I have such table:
ItemId IntervalId StartDate EndDate
1 1 NULL 2011-01-15
2 1 2011-01-16 2011-01-25
3 1 2011-01-26 NULL
4 2 NULL 2011-01-17
5 2 2011-01-16 2011-01-25
6 2 2011-01-26 NULL
Here I want to validate intervals within a groups of IntervalId, but not within the whole table. So, Interval 1 will be valid, but Interval 2 will be invalid.
And also. Is it possible to add a constraint to the table in order to avoid such invalid records?
[Final Solution]
I created function to check if interval is conflicted:
CREATE FUNCTION [dbo].[fnIntervalConflict]
(
#intervalId INT,
#originalItemId INT,
#startDate DATETIME,
#endDate DATETIME
)
RETURNS BIT
AS
BEGIN
SET #startDate = ISNULL(#startDate,'1/1/1753 12:00:00 AM')
SET #endDate = ISNULL(#endDate,'12/31/9999 11:59:59 PM')
DECLARE #conflict BIT = 0
SELECT TOP 1 #conflict = 1
FROM Items
WHERE IntervalId = #intervalId
AND ItemId <> #originalItemId
AND (
(ISNULL(StartDate,'1/1/1753 12:00:00 AM') >= #startDate
AND ISNULL(StartDate,'1/1/1753 12:00:00 AM') <= #endDate)
OR (ISNULL(EndDate,'12/31/9999 11:59:59 PM') >= #startDate
AND ISNULL(EndDate,'12/31/9999 11:59:59 PM') <= #endDate)
)
RETURN #conflict
END
And then I added 2 constraints to my table:
ALTER TABLE dbo.Items ADD CONSTRAINT
CK_Items_Dates CHECK (StartDate IS NULL OR EndDate IS NULL OR StartDate <= EndDate)
GO
and
ALTER TABLE dbo.Items ADD CONSTRAINT
CK_Items_ValidInterval CHECK (([dbo].[fnIntervalConflict]([IntervalId], ItemId,[StartDate],[EndDate])=(0)))
GO
I know, the second constraint slows insert and update operations, but it is not very important for my application.
And also, now I can call function fnIntervalConflict from my application code before inserts and updates of data in the table.
Something like this should give you all overlaping periods
SELECT
*
FROM
mytable t1
JOIN mytable t2 ON t1.EndDate>t2.StartDate AND t1.StartDate < t2.StartDate
Edited for Adrians comment bellow
This will give you the rows that are incorrect.
Added ROW_NUMBER() as I didnt know if all entries where in order.
-- Testdata
declare #date datetime = '2011-01-17'
;with yourTable(itemID, startDate, endDate)
as
(
SELECT 1, NULL, #date
UNION ALL
SELECT 2, dateadd(day, -1, #date), DATEADD(day, 10, #date)
UNION ALL
SELECT 3, DATEADD(day, 60, #date), NULL
)
-- End testdata
,tmp
as
(
select *
,ROW_NUMBER() OVER(order by startDate) as rowno
from yourTable
)
select *
from tmp t1
left join tmp t2
on t1.rowno = t2.rowno - 1
where t1.endDate > t2.startDate
EDIT:
As for the updated question:
Just add a PARTITION BY clause to the ROW_NUMBER() query and alter the join.
-- Testdata
declare #date datetime = '2011-01-17'
;with yourTable(itemID, startDate, endDate, intervalID)
as
(
SELECT 1, NULL, #date, 1
UNION ALL
SELECT 2, dateadd(day, 1, #date), DATEADD(day, 10, #date),1
UNION ALL
SELECT 3, DATEADD(day, 60, #date), NULL, 1
UNION ALL
SELECT 4, NULL, #date, 2
UNION ALL
SELECT 5, dateadd(day, -1, #date), DATEADD(day, 10, #date),2
UNION ALL
SELECT 6, DATEADD(day, 60, #date), NULL, 2
)
-- End testdata
,tmp
as
(
select *
,ROW_NUMBER() OVER(partition by intervalID order by startDate) as rowno
from yourTable
)
select *
from tmp t1
left join tmp t2
on t1.rowno = t2.rowno - 1
and t1.intervalID = t2.intervalID
where t1.endDate > t2.startDate
declare #T table (ItemId int, IntervalID int, StartDate datetime, EndDate datetime)
insert into #T
select 1, 1, NULL, '2011-01-15' union all
select 2, 1, '2011-01-16', '2011-01-25' union all
select 3, 1, '2011-01-26', NULL union all
select 4, 2, NULL, '2011-01-17' union all
select 5, 2, '2011-01-16', '2011-01-25' union all
select 6, 2, '2011-01-26', NULL
select T1.*
from #T as T1
inner join #T as T2
on coalesce(T1.StartDate, '1753-01-01') < coalesce(T2.EndDate, '9999-12-31') and
coalesce(T1.EndDate, '9999-12-31') > coalesce(T2.StartDate, '1753-01-01') and
T1.IntervalID = T2.IntervalID and
T1.ItemId <> T2.ItemId
Result:
ItemId IntervalID StartDate EndDate
----------- ----------- ----------------------- -----------------------
5 2 2011-01-16 00:00:00.000 2011-01-25 00:00:00.000
4 2 NULL 2011-01-17 00:00:00.000
Not directly related to the OP, but since Adrian's expressed an interest. Here's a table than SQL Server maintains the integrity of, ensuring that only one valid value is present at any time. In this case, I'm dealing with a current/history table, but the example can be modified to work with future data also (although in that case, you can't have the indexed view, and you need to write the merge's directly, rather than maintaining through triggers).
In this particular case, I'm dealing with a link table that I want to track the history of. First, the tables that we're linking:
create table dbo.Clients (
ClientID int IDENTITY(1,1) not null,
Name varchar(50) not null,
/* Other columns */
constraint PK_Clients PRIMARY KEY (ClientID)
)
go
create table dbo.DataItems (
DataItemID int IDENTITY(1,1) not null,
Name varchar(50) not null,
/* Other columns */
constraint PK_DataItems PRIMARY KEY (DataItemID),
constraint UQ_DataItem_Names UNIQUE (Name)
)
go
Now, if we were building a normal table, we'd have the following (Don't run this one):
create table dbo.ClientAnswers (
ClientID int not null,
DataItemID int not null,
IntValue int not null,
Comment varchar(max) null,
constraint PK_ClientAnswers PRIMARY KEY (ClientID,DataItemID),
constraint FK_ClientAnswers_Clients FOREIGN KEY (ClientID) references dbo.Clients (ClientID),
constraint FK_ClientAnswers_DataItems FOREIGN KEY (DataItemID) references dbo.DataItems (DataItemID)
)
But, we want a table that can represent a complete history. In particular, we want to design the structure such that overlapping time periods can never appear in the database. We always know which record was valid at any particular time:
create table dbo.ClientAnswerHistories (
ClientID int not null,
DataItemID int not null,
IntValue int null,
Comment varchar(max) null,
/* Temporal columns */
Deleted bit not null,
ValidFrom datetime2 null,
ValidTo datetime2 null,
constraint UQ_ClientAnswerHistories_ValidFrom UNIQUE (ClientID,DataItemID,ValidFrom),
constraint UQ_ClientAnswerHistories_ValidTo UNIQUE (ClientID,DataItemID,ValidTo),
constraint CK_ClientAnswerHistories_NoTimeTravel CHECK (ValidFrom < ValidTo),
constraint FK_ClientAnswerHistories_Clients FOREIGN KEY (ClientID) references dbo.Clients (ClientID),
constraint FK_ClientAnswerHistories_DataItems FOREIGN KEY (DataItemID) references dbo.DataItems (DataItemID),
constraint FK_ClientAnswerHistories_Prev FOREIGN KEY (ClientID,DataItemID,ValidFrom)
references dbo.ClientAnswerHistories (ClientID,DataItemID,ValidTo),
constraint FK_ClientAnswerHistories_Next FOREIGN KEY (ClientID,DataItemID,ValidTo)
references dbo.ClientAnswerHistories (ClientID,DataItemID,ValidFrom),
constraint CK_ClientAnswerHistory_DeletionNull CHECK (
Deleted = 0 or
(
IntValue is null and
Comment is null
)),
constraint CK_ClientAnswerHistory_IntValueNotNull CHECK (Deleted=1 or IntValue is not null)
)
go
That's a lot of constraints. The only way to maintain this table is through merge statements (see examples below, and try to reason about why yourself). We're now going to build a view that mimics that ClientAnswers table defined above:
create view dbo.ClientAnswers
with schemabinding
as
select
ClientID,
DataItemID,
ISNULL(IntValue,0) as IntValue,
Comment
from
dbo.ClientAnswerHistories
where
Deleted = 0 and
ValidTo is null
go
create unique clustered index PK_ClientAnswers on dbo.ClientAnswers (ClientID,DataItemID)
go
And we have the PK constraint we originally wanted. We've also used ISNULL to reinstate the not null-ness of the IntValue column (even though the check constraints already guarantee this, SQL Server is unable to derive this information). If we're working with an ORM, we let it target ClientAnswers, and the history gets automatically built. Next, we can have a function that lets us look back in time:
create function dbo.ClientAnswers_At (
#At datetime2
)
returns table
with schemabinding
as
return (
select
ClientID,
DataItemID,
ISNULL(IntValue,0) as IntValue,
Comment
from
dbo.ClientAnswerHistories
where
Deleted = 0 and
(ValidFrom is null or ValidFrom <= #At) and
(ValidTo is null or ValidTo > #At)
)
go
And finally, we need the triggers on ClientAnswers that build this history. We need to use merge statements, since we need to simultaneously insert new rows, and update the previous "valid" row to end date it with a new ValidTo value.
create trigger T_ClientAnswers_I
on dbo.ClientAnswers
instead of insert
as
set nocount on
;with Dup as (
select i.ClientID,i.DataItemID,i.IntValue,i.Comment,CASE WHEN cah.ClientID is not null THEN 1 ELSE 0 END as PrevDeleted,t.Dupl
from
inserted i
left join
dbo.ClientAnswerHistories cah
on
i.ClientID = cah.ClientID and
i.DataItemID = cah.DataItemID and
cah.ValidTo is null and
cah.Deleted = 1
cross join
(select 0 union all select 1) t(Dupl)
)
merge into dbo.ClientAnswerHistories cah
using Dup on cah.ClientID = Dup.ClientID and cah.DataItemID = Dup.DataItemID and cah.ValidTo is null and Dup.Dupl = 0 and Dup.PrevDeleted = 1
when matched then update set ValidTo = SYSDATETIME()
when not matched and Dup.Dupl=1 then insert (ClientID,DataItemID,IntValue,Comment,Deleted,ValidFrom)
values (Dup.ClientID,Dup.DataItemID,Dup.IntValue,Dup.Comment,0,CASE WHEN Dup.PrevDeleted=1 THEN SYSDATETIME() END);
go
create trigger T_ClientAnswers_U
on dbo.ClientAnswers
instead of update
as
set nocount on
;with Dup as (
select i.ClientID,i.DataItemID,i.IntValue,i.Comment,t.Dupl
from
inserted i
cross join
(select 0 union all select 1) t(Dupl)
)
merge into dbo.ClientAnswerHistories cah
using Dup on cah.ClientID = Dup.ClientID and cah.DataItemID = Dup.DataItemID and cah.ValidTo is null and Dup.Dupl = 0
when matched then update set ValidTo = SYSDATETIME()
when not matched then insert (ClientID,DataItemID,IntValue,Comment,Deleted,ValidFrom)
values (Dup.ClientID,Dup.DataItemID,Dup.IntValue,Dup.Comment,0,SYSDATETIME());
go
create trigger T_ClientAnswers_D
on dbo.ClientAnswers
instead of delete
as
set nocount on
;with Dup as (
select d.ClientID,d.DataItemID,t.Dupl
from
deleted d
cross join
(select 0 union all select 1) t(Dupl)
)
merge into dbo.ClientAnswerHistories cah
using Dup on cah.ClientID = Dup.ClientID and cah.DataItemID = Dup.DataItemID and cah.ValidTo is null and Dup.Dupl = 0
when matched then update set ValidTo = SYSDATETIME()
when not matched then insert (ClientID,DataItemID,Deleted,ValidFrom)
values (Dup.ClientID,Dup.DataItemID,1,SYSDATETIME());
go
Obviously, I could have built a simpler table (not a join table), but this is my standard go-to example (albeit it took me a while to reconstruct it - I forgot the set nocount on statements for a while). But the strength here is that, the base table, ClientAnswerHistories is incapable of storing overlapping time ranges for the same ClientID and DataItemID values.
Things get more complex when you need to deal with temporal foreign keys.
Of course, if you don't want any real gaps, then you can remove the Deleted column (and associated checks), make the not null columns really not null, modify the insert trigger to do a plain insert, and make the delete trigger raise an error instead.
I've always taken a slightly different approach to the design if I have data that is never to have overlapping intervals... namely don't store intervals, but only start times. Then, have a view that helps with displaying the intervals.
CREATE TABLE intervalStarts
(
ItemId int,
IntervalId int,
StartDate datetime
)
CREATE VIEW intervals
AS
with cte as (
select ItemId, IntervalId, StartDate,
row_number() over(partition by IntervalId order by isnull(StartDate,'1753-01-01')) row
from intervalStarts
)
select c1.ItemId, c1.IntervalId, c1.StartDate,
dateadd(dd,-1,c2.StartDate) as 'EndDate'
from cte c1
left join cte c2 on c1.IntervalId=c2.IntervalId
and c1.row=c2.row-1
So, sample data might look like:
INSERT INTO intervalStarts
select 1, 1, null union
select 2, 1, '2011-01-16' union
select 3, 1, '2011-01-26' union
select 4, 2, null union
select 5, 2, '2011-01-26' union
select 6, 2, '2011-01-14'
and a simple SELECT * FROM intervals yields:
ItemId | IntervalId | StartDate | EndDate
1 | 1 | null | 2011-01-15
2 | 1 | 2011-01-16 | 2011-01-25
3 | 1 | 2011-01-26 | null
4 | 2 | null | 2011-01-13
6 | 2 | 2011-01-14 | 2011-01-25
5 | 2 | 2011-01-26 | null

How to create a stored procedure to find cliques in the table of connections between users

Loooking for a way to retrieve community from a large dataset I came across an article about the algorithm which seems to be apropriate for large datasets. Anyway the data is stored two tables: users (nodes) and connections and I would like to retrieve the communities by pure sql queries without help of custom applications (I'm using SQL Server 2008).
The algorithm to retrieve the cliques is the following:
Read the graph G
Generate set neighbors(v) for every vertex of G
for each vertex v of G
call recursive_find_cliques(v, neighbors(v))
end for
Function recursive_find_cliques(x, n)
for each vertex t ∈ n by ascending order calculate set sigma
if sigma is not empty
extend x with t
call recursive_find_cliques(x, sigma)
end if
end for
where sigma is the set of vertices that could constitute triangles with v and its neighbors.
I already created a stored procedure which returns a table of neighbors of selected node but so far I haven't delat with sql functions and advanced queries so the question is the following:
Does anyone know how to rewrite the
algorithm above in sql in order to get
the set of cliques? As the question
might be a little abstract, I may
point out that the main problem is to
create a recursive function
(recursive_find_cliques(x, n)) which
takes a table (n) as an argument).
Thank you!
EDIT:
Here is sthe stored procedure created so far:
CREATE PROCEDURE [dbo].[Peamc_Test]
AS
BEGIN
SET XACT_ABORT ON
BEGIN TRAN
SET NOCOUNT ON;
CREATE TABLE #Users
(
UserId int NOT NULL,
userLabel varchar(50) PRIMARY KEY NOT NULL,
Observed bit NOT NULL
)
CREATE TABLE #Neighbors
(
UserId int NOT NULL,
userLabel varchar(50) NOT NULL PRIMARY KEY,
Retrieved bit NOT NULL
)
CREATE TABLE #ConnectedVertices
(
UserId int NOT NULL,
userLabel varchar(50) NOT NULL PRIMARY KEY,
)
CREATE TABLE #Cliques
(
CliqueId int NOT NULL,
UserId varchar(50) NOT NULL,
)
DECLARE #UsersCount int
DECLARE #ii int
DECLARE #User varchar(50)
DECLARE #NeighborsCount int
INSERT INTO #Users(UserId, userLabel, Observed) SELECT user_id, userLabel, 0 FROM dbo.test_users WHERE user_id IS NOT NULL
SELECT #UsersCount = COUNT(*) FROM #Users
SELECT #ii = 1
WHILE #ii <= #UsersCount
BEGIN
--select user
SELECT TOP 1 #User = userLabel FROM #Users WHERE Observed = 0 ORDER BY UserId
UPDATE #Users SET Observed = 1 WHERE userLabel = #User
--Get user's neighbors
DELETE FROM #Neighbors
INSERT INTO #Neighbors(UserId, userLabel, Retrieved)
SELECT u.user_id, t2.neighbor, 0 FROM ( SELECT CALLING_NEIGHBORS.neighbor FROM ( SELECT mc.calling_party AS neighbor FROM monthly_connections_test mc WHERE mc.called_party = #User) AS CALLING_NEIGHBORS INNER JOIN (SELECT mc.called_party AS neighbor FROM monthly_connections_test mc WHERE mc.calling_party = #User) AS CALLED_NEIGHBORS ON CALLING_NEIGHBORS.neighbor = CALLED_NEIGHBORS.neighbor) AS t2 INNER JOIN test_users u ON t2.neighbor = u.userLabel
SELECT #NeighborsCount = COUNT(*) FROM #Neighbors
SELECT #ii = #ii + 1
--HERE the function recursive_find_cliques has to search for cliques and insert the found ones in #cliques
END
SELECT * FROM #Cliques
END
It does'not return anything yet as it is not finished. It though retrieves all neighbors for the currently selected nodes and the next step is to implement recursive_find_cliques function.
I realised that my first answer only works when each clique has at least one user who is not referred to by any others in that clique. In other words, closed cliques like A-B, B-C, C-A will not be found.
Here is a solution which solves this. Again we have users with IDs, now 1..20. There are several cases of neighbouring relations that need to be handled:
Compared to the simple case, it is harder to find a unique starter for each clique.
We achieve this with a little sleight of hand:
Reorder the neighbours so that for all references A-B, A is less than B, ignoring any A=B.
From these, remove any A-X references if there are any X-A, which could cause a loop. This will never remove references to A completely because X-A remains and A-X will be added in the recursion.
The resultant set are the 'starting' users and we use them to prime the CTE:
-- Get all pairs, where UserA < UserB, dropping any A=B or B=A
WITH LRNeighbours(A, B) AS (
SELECT
Neighbours.UserA, Neighbours.UserB
FROM
Neighbours
WHERE
Neighbours.UserA < Neighbours.UserB
UNION ALL
SELECT DISTINCT
Neighbours.UserB, Neighbours.UserA
FROM
Neighbours
WHERE
Neighbours.UserA > Neighbours.UserB
),
-- Isolate those that are not referred to by a higher numbered key
Starters(userid) AS (
SELECT DISTINCT
A
FROM
LRNeighbours
WHERE
A NOT IN (
SELECT
B
FROM
LRNeighbours
)
),
-- The recursive Common Table Expression
cliques(userid, clique) AS (
-- Number starters 1..N
SELECT
userid, ROW_NUMBER() OVER(ORDER BY userid) AS clique
FROM
Starters
UNION ALL
-- Recurse, adding users referred by siblings, avoiding starters themselves
SELECT
B, clique
FROM
LRNeighbours INNER JOIN
cliques ON
LRNeighbours.A = cliques.userid
AND B NOT IN (
SELECT
userid
FROM
starters
)
)
SELECT DISTINCT
clique, userid
FROM
cliques
ORDER BY
clique, userid
Results:
1 1
1 2
2 3
2 4
3 5
3 6
3 7
3 8
4 9
4 10
4 11
4 12
4 13
5 14
5 15
5 16
5 17
5 18
5 19
5 20
CREATE TABLE [dbo].[Users](
[UserID] [int] IDENTITY(1,1) NOT NULL,
[UserName] [varchar](50) NOT NULL
) ON [PRIMARY]
CREATE TABLE [dbo].[Neighbours](
[UserA] [int] NOT NULL,
[UserB] [int] NOT NULL
) ON [PRIMARY]
Users populated with 1..8 and Neighbours
UserA UserB
1 2
2 3
4 5
4 6
5 7
7 8
Then:
WITH cliques(userid, clique) AS (
SELECT
userid, ROW_NUMBER() OVER(ORDER BY userid) AS clique
FROM
Users
WHERE
users.UserID NOT IN (
SELECT
UserB
FROM
Neighbours
)
UNION ALL
SELECT
Neighbours.UserB, clique
FROM
neighbours
INNER JOIN cliques
ON Neighbours.UserA = cliques.userid
)
SELECT
clique, cliques.userid
FROM
cliques
ORDER BY
clique, userid
Result:
clique userid
1 1
1 2
1 3
2 4
2 5
2 6
2 7
2 8
See : Recursive Queries Using Common Table Expressions
I've added a two LABELS and two GOTO statements
CREATE PROCEDURE [dbo].[Peamc_Test]
AS
BEGIN
SET XACT_ABORT ON
BEGIN TRAN
SET NOCOUNT ON;
CREATE TABLE #Users
(
UserId int NOT NULL,
userLabel varchar(50) PRIMARY KEY NOT NULL,
Observed bit NOT NULL
)
CREATE TABLE #Neighbors
(
UserId int NOT NULL,
userLabel varchar(50) NOT NULL PRIMARY KEY,
Retrieved bit NOT NULL
)
CREATE TABLE #ConnectedVertices
(
UserId int NOT NULL,
userLabel varchar(50) NOT NULL PRIMARY KEY,
)
CREATE TABLE #Cliques
(
CliqueId int NOT NULL,
UserId varchar(50) NOT NULL,
)
DECLARE #UsersCount int
DECLARE #ii int
DECLARE #User varchar(50)
DECLARE #NeighborsCount int
INSERT INTO #Users(UserId, userLabel, Observed) SELECT user_id, userLabel, 0 FROM dbo.test_users WHERE user_id IS NOT NULL
SELECT #UsersCount = COUNT(*) FROM #Users
SELECT #ii = 1
WHILE #ii <= #UsersCount
BEGIN
--select user
SELECT TOP 1 #User = userLabel FROM #Users WHERE Observed = 0 ORDER BY UserId
UPDATE #Users SET Observed = 1 WHERE userLabel = #User
--Get user's neighbors
DELETE FROM #Neighbors
INSERT INTO #Neighbors(UserId, userLabel, Retrieved)
SELECT u.user_id, t2.neighbor, 0 FROM ( SELECT CALLING_NEIGHBORS.neighbor FROM ( SELECT mc.calling_party AS neighbor FROM monthly_connections_test mc WHERE mc.called_party = #User) AS CALLING_NEIGHBORS INNER JOIN (SELECT mc.called_party AS neighbor FROM monthly_connections_test mc WHERE mc.calling_party = #User) AS CALLED_NEIGHBORS ON CALLING_NEIGHBORS.neighbor = CALLED_NEIGHBORS.neighbor) AS t2 INNER JOIN test_users u ON t2.neighbor = u.userLabel
SELECT #NeighborsCount = COUNT(*) FROM #Neighbors
SELECT #ii = #ii + 1
GOTO Clique_Find
--HERE the function recursive_find_cliques has to search for cliques and insert the found ones in #cliques
--------------------
Clique_Return:
--------------------
END
SELECT * FROM #Cliques
END
--------------------
Clique_Find:
--------------------
-- Code goes here
-- Code goes here
-- Code goes here
-- Code goes here
-- Code goes here
-- Code goes here
GOTO Clique_Return

How to delete when the parameter varies by group without looping? (T-SQL)

Imagine I have these columns in a table:
id int NOT NULL IDENTITY PRIMARY KEY,
instant datetime NOT NULL,
foreignId bigint NOT NULL
For each group (grouped by foreignId) I want to delete all the rows which are 1 hour older than the max(instant). Thus, for each group the parameter is different.
Is it possible without looping?
Yep, it's pretty straightforward. Try this:
DELETE mt
FROM MyTable AS mt
WHERE mt.instant <= DATEADD(hh, -1, (SELECT MAX(instant)
FROM MyTable
WHERE ForeignID = mt.ForeignID))
Or this:
;WITH MostRecentKeys
AS
(SELECT ForeignID, MAX(instant) AS LatestInstant
FROM MyTable)
DELETE mt
FROM MyTable AS mt
JOIN MostRecentKeys mrk ON mt.ForeignID = mrt.ForeignID
AND mt.Instant <= DATEADD(hh, -1, mrk.LatestInstant)
DELETE
FROM mytable
FROM mytable mto
WHERE instant <
(
SELECT DATEADD(hour, -1, MAX(instant))
FROM mytable mti
WHERE mti.foreignid = mto.foreignid
)
Note double FROM clause, it's on purpose, otherwise you won't be able to alias the table you're deleting from.
The sample data to check:
DECLARE #mytable TABLE
(
id INT NOT NULL PRIMARY KEY,
instant DATETIME NOT NULL,
foreignID INT NOT NULL
)
INSERT
INTO #mytable
SELECT 1, '2009-22-07 10:00:00', 1
UNION ALL
SELECT 2, '2009-22-07 09:30:00', 1
UNION ALL
SELECT 3, '2009-22-07 08:00:00', 1
UNION ALL
SELECT 4, '2009-22-07 10:00:00', 2
UNION ALL
SELECT 5, '2009-22-07 08:00:00', 2
UNION ALL
SELECT 6, '2009-22-07 07:30:00', 2
DELETE
FROM #mytable
FROM #mytable mto
WHERE instant <
(
SELECT DATEADD(hour, -1, MAX(instant))
FROM #mytable mti
WHERE mti.foreignid = mto.foreignid
)
SELECT *
FROM #mytable
1 2009-07-22 10:00:00.000 1
2 2009-07-22 09:30:00.000 1
4 2009-07-22 10:00:00.000 2
I'm going to assume when you say '1 hour older than the max(instant)' you mean '1 hour older than the max(instant) for that foreignId'.
Given that, there's almost certainly a more succinct way than this, but it will work:
DELETE
TableName
WHERE
DATEADD(hh, 1, instant) < (SELECT MAX(instant)
FROM TableName T2
WHERE T2.foreignId = TableName.foreignId)
The inner subquery is called a 'correlated subquery', if you want to look for more info. The way it works is that for each row under consideration by the outer query, it is the foreignId of that row that gets referenced by the subquery.