Can I write this query without CURSORs? - sql

I am trying to find out the number of events that happened within a threshold time of timestamps found in another table for the same category. What is the fastest way to vary delta (in the case given below, delta is 5 minutes)? I just tested an approach using cursor (set a variable to 5 and then keep incrementing and executing the same query) but it is taking 10 seconds for each iteration. In my actual data, number of rows in #EVENTS is approximately equal to 100K and #CHANGES is about 500K.
My tables are as follows:
CREATE TABLE #EVENTS(Category varchar(20), Timestamp datetime)
GO
INSERT INTO #EVENTS VALUES('A', '2013-01-23 05:02:00.000')
INSERT INTO #EVENTS VALUES('A', '2013-01-23 05:04:00.000')
INSERT INTO #EVENTS VALUES('B', '2013-01-23 05:03:00.000')
INSERT INTO #EVENTS VALUES('B', '2013-01-21 05:02:00.000')
GO
CREATE TABLE #CHANGES(Category varchar(10), Timestamp datetime)
GO
INSERT INTO #CHANGES VALUES('A', '2013-01-23 05:00:00.000')
INSERT INTO #CHANGES VALUES('B', '2013-01-21 05:05:00.000')
SELECT *
FROM
(
SELECT X.Category, X.Timestamp, Y.Timestamp BeforeT, DATEADD(MINUTE, 5, Y.Timestamp) AfterT
FROM #EVENTS X, #CHANGES Y
WHERE X.Category = Y.Category
) X
WHERE X.Timestamp BETWEEN BeforeT AND AfterT
DROP TABLE #CHANGES
DROP TABLE #EVENTS
GO

Is this what you are looking for? It does a cross join to a CTE that defines the deltas:
with deltas as (
select 5 as delta union all
select 10 union all
select 20
)
SELECT *
FROM (SELECT e.Category, e.Timestamp, c.Timestamp BeforeT,
DATEADD(MINUTE, deltas.delta, c.Timestamp) AfterT,
deltas.delta
FROM #EVENTS e join
#CHANGES c
on e.Category = c.Category cross join
deltas
) X
WHERE X.Timestamp BETWEEN BeforeT AND AfterT
I also fixed your aliases. Queries read much better when the aliases are related to the underlying table name.

Related

Sql Server While Loop with Changing Condition

I have a User Table in my database that contains two fields
user_id
manager_id
I am trying to construct a query to list all of the manager_ids that are associated with a user_id in a hierarchical structure.
So if i give a user_id, i will get that users manager, followed by that persons manager all the way to the very top.
So far i have tried but it doesnt give what i need:
WITH cte(user_id, manager_id) as (
SELECT user_id, manager_id
FROM user
WHERE manager_id=#userid
UNION ALL
SELECT u.user_id, u.manager_id,
FROM user u
INNER JOIN cte c on e.manager_id = c.employee_id
)
INSERT INTO #tbl (manager_id)
select user_id, manager_id from cte;
If anyone can point me in the right direction that would be great.
I thought about a While loop but this may not be very efficient and im not too sure how to implement that.
OP asked for a while loop, and while (ha, pun) this may not be the best way... Ask and you shall receive. (:
Here is sample data I created (in the future, please provide this):
CREATE TABLE #temp (userID int, managerID int)
INSERT INTO #temp VALUES (1, 3)
INSERT INTO #temp VALUES (2, 3)
INSERT INTO #temp VALUES (3, 7)
INSERT INTO #temp VALUES (4, 6)
INSERT INTO #temp VALUES (5, 7)
INSERT INTO #temp VALUES (6, 9)
INSERT INTO #temp VALUES (7, 10)
INSERT INTO #temp VALUES (8, 10)
INSERT INTO #temp VALUES (9, 10)
INSERT INTO #temp VALUES (10, 12)
INSERT INTO #temp VALUES (11, 12)
INSERT INTO #temp VALUES (12, NULL)
While Loop:
CREATE TABLE #results (userID INT, managerID INT)
DECLARE #currentUser INT = 1 -- Would be your parameter!
DECLARE #maxUser INT
DECLARE #userManager INT
SELECT #maxUser = MAX(userID) FROM #temp
WHILE #currentUser <= #maxUser
BEGIN
SELECT #userManager = managerID FROM #temp WHERE userID = #currentUser
INSERT INTO #results VALUES (#currentUser, #userManager)
SET #currentUser = #userManager
END
SELECT * FROM #results
DROP TABLE #temp
DROP TABLE #results
Get rid of this column list in your CTE declaration that has nothing to do with the columns you are actually selecting in the CTE:
WITH cte(employee_id, name, reports_to_emp_no, job_number) as (
Just make it this:
WITH cte as (
I recommend recursive solution:
WITH Parent AS
(
SELECT * FROM user WHERE user_id=#userId
UNION ALL
SELECT T.* FROM user T
JOIN Parent P ON P.manager_id=T.user_id
)
SELECT * FROM Parent
To see demo, run following:
SELECT * INTO #t FROM (VALUES (1,NULL),(2,1),(3,2),(4,1)) T(user_id,manager_id);
DECLARE #userId int = 3;
WITH Parent AS
(
SELECT * FROM #t WHERE user_id=#userId
UNION ALL
SELECT T.* FROM #t T
JOIN Parent P ON P.manager_id=T.user_id
)
SELECT * FROM Parent

Counting number of transactions within past 1 hour on a particular user

Is there any way how to (in the best case, without using cursor) count number of transactions that the same user made in previous 1 hour.
That means that for this table
CREATE TABLE #TR (PK INT, TR_DATE DATETIME, USER_PK INT)
INSERT INTO #TR VALUES (1,'2018-07-31 06:02:00.000',10)
INSERT INTO #TR VALUES (2,'2018-07-31 06:36:00.000',10)
INSERT INTO #TR VALUES (3,'2018-07-31 06:55:00.000',10)
INSERT INTO #TR VALUES (4,'2018-07-31 07:10:00.000',10)
INSERT INTO #TR VALUES (5,'2018-07-31 09:05:00.000',10)
INSERT INTO #TR VALUES (6,'2018-07-31 06:05:00.000',11)
INSERT INTO #TR VALUES (7,'2018-07-31 06:55:00.000',11)
INSERT INTO #TR VALUES (8,'2018-07-31 07:10:00.000',11)
INSERT INTO #TR VALUES (9,'2018-07-31 06:12:00.000',12)
The result should be:
The solution could be something like: COUNT(*) OVER (PARTITION BY USER_PK ORDER BY TR_DATE ROWS BETWEEN ((WHERE DATEADD(HH,-1,PRECENDING.TR_DATE) > CURRENT ROW.TR_DATE) AND CURRENT ROW ...but I know that ROWS BETWEEN can not be used like that...
I am guessing SQL Server based on the syntax. In SQL Server, you can use apply:
select t.*, tr2.result
from #tr tr outer apply
(select count(*) as result
from #tr tr2
where tr2.user_id = tr.user_id and
tr2.tr_date > dateadd(hour, -1, tr.date) and
tr2.tr_date <= tr.tr_date
) tr2;
SELECT USER_PK, COUNT(*) AS TransactionCount
FROM #TR
WHERE DATEDIFF(MINUTE, TR_DATE, GETDATE()) <= 60
AND DATEDIFF(MINUTE, TR_DATE, GETDATE()) >= 0
GROUP BY USER_PK
You can change GETDATE() with whatever you want, but they need to have the same value

INSERT INTO with SELECT using OUTPUT

I want to store the pt_id into my temporary table by using OUTPUT, however I'm not inserting pt_id into ct_tarf, what should I do?
I get the following error:
The multi-part identifier 'pt_id' could not be bound.
Query:
DECLARE #tbl_ids_tarf TABLE (pt_id INT, pt_id_tarf INT)
INSERT INTO dbo.ct_tarf(tr_id_serv, tr_name, tr_money)
OUTPUT pt_id, inserted.tr_id INTO #tbl_ids_tarf(ptr_id, ptr_id_tarf)
SELECT
pt_id_serv, pt_name, pt_money
FROM
dbo.opr_prop_tar
WHERE
pt_id_tarf
SQL2008+:
Assuming that you want to insert into #tbl_ids_tarf (which is target of OUTPUT ... INTO clause) values from columns that are not returned by inserted or deleted virtual tables then one solution is to use MERGE statement instead of INSERT:
DECLARE #Target TABLE (
Col1 INT
)
INSERT #Target VALUES (1), (2);
DECLARE #Output TABLE (Col1 INT, ColB INT);
;WITH Source
AS (
SELECT *
FROM (VALUES (10, 100), (20, 200), (30, 300)) x(ColA, ColB)
)
MERGE INTO #Target x
USING Source y ON x.Col1 = y.ColA
WHEN NOT MATCHED BY TARGET THEN
INSERT (Col1) VALUES (y.ColA)
OUTPUT inserted.Col1, y.ColB INTO #Output (Col1, ColB);
-- ^-- y.ColB isn't returned by inserted or deleted virtual tables.
-- inserted and deleted are based on `#Target` table [variable]
SELECT * FROM #Output;
/*
Col1 ColB
----------- -----------
10 100
20 200
30 300
*/
You have several issues with your query - column naming is one, an incomplete WHERE clause is another, and a missing Inserted. prefix is the third.
Try this:
DECLARE #tbl_ids_tarf TABLE (pt_id INT, pt_id_tarf INT)
INSERT INTO dbo.ct_tarf(tr_id_serv, tr_name, tr_money)
OUTPUT inserted.pt_id, inserted.tr_id INTO #tbl_ids_tarf(pt_id, pt_id_tarf)
-- ********* ******************
SELECT
pt_id_serv, pt_name, pt_money
FROM
dbo.opr_prop_tar
WHERE
pt_id_tarf --........

Returning a set of the most recent rows from a table

I'm trying to retrieve the latest set of rows from a source table containing a foreign key, a date and other fields present. A sample set of data could be:
create table #tmp (primaryId int, foreignKeyId int, startDate datetime,
otherfield varchar(50))
insert into #tmp values (1, 1, '1 jan 2010', 'test 1')
insert into #tmp values (2, 1, '1 jan 2011', 'test 2')
insert into #tmp values (3, 2, '1 jan 2013', 'test 3')
insert into #tmp values (4, 2, '1 jan 2012', 'test 4')
The form of data that I'm hoping to retrieve is:
foreignKeyId maxStartDate otherfield
------------ ----------------------- -------------------------------------------
1 2011-01-01 00:00:00.000 test 2
2 2013-01-01 00:00:00.000 test 3
That is, just one row per foreignKeyId showing the latest start date and associated other fields - the primaryId is irrelevant.
I've managed to come up with:
select t.foreignKeyId, t.startDate, t.otherField from #tmp t
inner join (
select foreignKeyId, max(startDate) as maxStartDate
from #tmp
group by foreignKeyId
) s
on t.foreignKeyId = s.foreignKeyId and s.maxStartDate = t.startDate
but (a) this uses inner queries, which I suspect may lead to performance issues, and (b) it gives repeated rows if two rows in the original table have the same foreignKeyId and startDate.
Is there a query that will return just the first match for each foreign key and start date?
Depending on your sql server version, try the following:
select *
from (
select *, rnum = ROW_NUMBER() over (
partition by #tmp.foreignKeyId
order by #tmp.startDate desc)
from #tmp
) t
where t.rnum = 1
If you wanted to fix your attempt as opposed to re-engineering it then
select t.foreignKeyId, t.startDate, t.otherField from #tmp t
inner join (
select foreignKeyId, max(startDate) as maxStartDate, max(PrimaryId) as Latest
from #tmp
group by foreignKeyId
) s
on t.primaryId = s.latest
would have done the job, assuming PrimaryID increases over time.
Qualms about inner query would have been laid to rest as well assuming some indexes.

Computing difference in rows for all except consecutive days?

I have a table as follows. I want to compute the difference in dates (in seconds) between consecutive rows according to the following:
If the dates differ by more than a day, then we go ahead and compute the difference
If the dates differ by more than a day and there are consecutive days with the value 84600 for the second date, then I want to first combine the dates before taking a difference
I am currently doing a self-join to handle the first case but am not sure if there is a good way to handle the second case. Any suggestion?
The following also gives an example:
CREATE TABLE #TEMP(Person VARCHAR(100), StartTime Datetime, TotalSeconds INT)
INSERT INTO #TEMP VALUES('A', '2013-02-20', 49800); -- We want to take the difference with the next row in this case
INSERT INTO #TEMP VALUES('A', '2013-02-25', 3000); -- Before taking the difference, I want to first merge the next four rows because 5th March is followed by three days with the value 86400
INSERT INTO #TEMP VALUES('A', '2013-03-05', 2100);
INSERT INTO #TEMP VALUES('A', '2013-03-06', 86400);
INSERT INTO #TEMP VALUES('A', '2013-03-07', 86400);
INSERT INTO #TEMP VALUES('A', '2013-03-08', 86400);
INSERT INTO #TEMP VALUES('A', '2013-03-09', 17100);
INSERT INTO #TEMP VALUES('B', '2012-04-24', 22500);
INSERT INTO #TEMP VALUES('B', '2012-04-26', 600);
INSERT INTO #TEMP VALUES('B', '2012-04-27', 10500);
INSERT INTO #TEMP VALUES('B', '2012-04-29', 41400);
INSERT INTO #TEMP VALUES('B', '2012-05-04', 86100);
SELECT *
FROM #TEMP
DROP TABLE #TEMP
The following handles the second case:
select Person, MIN(StartTime) as StartTime, MAX(StartTime) as maxStartTime
from (SELECT *,
dateadd(d, - ROW_NUMBER() over (partition by person order by StartTime), StartTime) as thegroup
FROM #TEMP t
) t
group by Person, thegroup
It groups all the time periods for a person, with consecutive dates collapsing into a single period (with a begin and end time). The trick is to assign a sequence number, using row_number() and then take the difference from StartTime. This difference is constant for a group of consecutive dates -- hence the outer group by.
You can use a with statement to put this into your query and then get the difference that you desire between consecutive rows.