This is a new version of my question, since it seems to be confusing. Sorry. I figured it out. See the code if you're interested. Notes to solve are in there. Thanks for your help!
I got it to work this far, but the OriginaionL (L is for Little and B is for Big) is not correct. It's taking the correct date but not Origination.
CREATE TABLE MyTable
(
LoadTagID INT,
EnteredDateTime datetime,
JobNumber VARCHAR(50),
Origination VARCHAR(50)
)
INSERT INTO MyTable VALUES
(1, '2015-02-09 00:00:00.00', 11111, 'Here')
,(2, '2015-02-09 00:00:00.00', 22222, 'There')
,(3, '2016-03-09 00:00:00.00', 11111, 'Outside')
,(4, '2016-08-09 00:00:00.00', 12578, 'Anywhere')
,(252, '2017-06-29 00:00:00.00', 12345, 'Here')
,(253, '2017-08-01 00:00:00.00', 99999, 'There')
,(254, '2017-08-04 00:00:00.00', 12345, 'Outside')
,(255, '2017-08-09 00:00:00.00', 12345, 'Anywhere')
,(256, '2017-08-10 00:00:00.00', 99999, 'Anywhere')
,(257, '2017-08-10 00:00:00.00', 123456, 'Anywhere')
,(258, '2017-08-11 00:00:00.00', 123456, 'Over Yonder')
,(259, '2017-08-13 00:00:00.00', 99999, 'Under The Bridge')
--Select * From MyTable
CREATE TABLE #LTTB1 --MAX
(
LoadTagID varchar(50),
JobNumber varchar(50),
EnteredDateTime varchar(50),
Origination varchar(50)
)
CREATE TABLE #LTTB2 --MIN
(
LoadTagID varchar(50),
JobNumber varchar(50),
EnteredDateTime varchar(50),
Origination varchar(50)
)
CREATE TABLE #LTTB3
(
LoadTagIDL varchar(50),
JobNumberL varchar(50),
EnteredDateTimeL
varchar(50),
OriginationL varchar(50)
, LoadTagID varchar(50),
JobNumber varchar(50),
EnteredDateTime varchar(50),
Origination varchar(50)
)
INSERT INTO #LTTB1
SELECT
MAX(LoadTagID) AS LoadTagID,
JobNumber,
MAX(EnteredDateTime) AS EnteredDateTime,
MAX(Origination) AS Origination
FROM MyTable
WHERE CONVERT (Date, EnteredDateTime) >= CONVERT (Date, GETDATE()-10) --Gets the last 10 days.
GROUP BY JobNumber ORDER BY JobNumber
INSERT INTO #LTTB2
SELECT MIN(LoadTagID) AS LoadTagIDL,
JobNumber AS JobNumberL,
MIN(EnteredDateTime) AS EnteredDateTimeL,
MAX(Origination) AS OriginationL --MAX! This needed to be max!! Why?
FROM MyTable
Where CONVERT (Date, EnteredDateTime) >= CONVERT (Date, GETDATE()-60) --Goes further back in case one is a long.
GROUP BY JobNumber ORDER BY JobNumber
INSERT INTO #LTTB3
SELECT L.LoadTagID AS LoadTagIDL
, L.JobNumber AS JobNumberL
, L.EnteredDateTime AS EnteredDateTimeL
, L.Origination AS OriginationL
, B.LoadTagID, B.JobNumber, B.EnteredDateTime, B.Origination
FROM #LTTB1 B --MAX
INNER JOIN #LTTB2 L ON B.JobNumber = L.JobNumber
Select * From #LTTB3
So for JobNumber 12345 6/29 is correct, but it should be "Here" and not "Anywhere:
For 99999 everything is correct but for 8/1 it should be "There" and not Anywhere. That seems to be the middle value in the set. I'm so confused.
Does anyone know why it's grabbing that value? Thank you.
SELECT *
FROM mytable
WHERE LoadTagID=(SELECT MIN(LoadTagID)
FROM mytable)
OR LoadTagID=(SELECT MAX(LoadTagID)
FROM mytable);
query according to the output you want
CREATE TABLE MyTable
(
LoadTagID INT,
Date Date,
Job INT,
Origination VARCHAR(20)
)
INSERT INTO MyTable
VALUES(
252, '6/29/17', 12345, 'Here')
,(253, '8/1/17', 99999, 'There')
,(254, '8/4/17', 12345, 'Outside')
,(255, '8/8/17', 12345, 'Anywhere')
--SELECT * FROM MyTable
SELECT * INTO #Table1
FROM MyTable
WHERE LoadTagID IN (SELECT MIN(LoadTagID)
FROM MyTable)
SELECT * INTO #Table2
FROM MyTable
WHERE LoadTagID IN (SELECT MAX(LoadTagID)
FROM MyTable)
SELECT * INTO #T3
FROM ( SELECT * FROM #Table1 T1
UNION ALL
SELECT * FROM #Table2 T2
) A
SELECT #T3.Date,
#T3.Job,
#T3.LoadTagID,
#T3.Origination
FROM #T3
LEFT JOIN #Table1 T1
ON T1.Job=#T3.Job
WHERE T1.Job IS NOT NULL
INSERT INTO #LTTB1
SELECT
MAX(LoadTagID) AS LoadTagID,
JobNumber,
MAX(EnteredDateTime) AS EnteredDateTime,
MAX(Origination) AS Origination
FROM MyTable
WHERE CONVERT (Date, EnteredDateTime) >= CONVERT (Date, GETDATE()-10) --Gets the last 10 days.
GROUP BY JobNumber ORDER BY JobNumber
INSERT INTO #LTTB2
select LoadTagID
,JobNumber
,EnteredDateTime
,Origination from (
select *, ROW_NUMBER() Over(partition by jobnumber order by EnteredDateTime) l
from MyTable
Where CONVERT (Date, EnteredDateTime) >= CONVERT (Date, GETDATE()-60)
)lk
where lk.l=1
INSERT INTO #LTTB3
SELECT L.LoadTagID AS LoadTagIDL
, L.JobNumber AS JobNumberL
, L.EnteredDateTime AS EnteredDateTimeL
, L.Origination AS OriginationL
, B.LoadTagID, B.JobNumber, B.EnteredDateTime, B.Origination
FROM #LTTB1 B --MAX
INNER JOIN #LTTB2 L ON B.JobNumber = L.JobNumber
Select * From MyTable
Select * From #LTTB3
--I hope, your prob has been solved now..
Related
I need to pivot a table as show below using column "channel" and grouping it based on Units.
Actual table:
The result I need is shown below
I'm not an expert with pivotting and unpivoting concepts, I'm trying the below query to achieve the above result
SELECT [service_point_ID]
,isnull([1],0) - isnull([2],0) as net_usage_value
,[units]
,[1]
,[2]
,[channel_ID]
,[date]
,[time]
,[is_estimate]
,[UTC_offset]
,[import_history_id]
FROM #temp1
AS SourceTable PIVOT(sum(usage_value) FOR channel IN([1],[2])) AS PivotTable
If I execute this query I'm getting the below result
The same logic is achieved in r -Refernce link Pivot using Mutiple columns
Here is the SQL fiddle for this one
CREATE TABLE #temp1
(
Service_point_ID varchar(10) NUll,
usage_value decimal(18,6) NULL,
units varchar(10) NUll,
[date] Date NULL,
[time] time NULL,
channel varchar(2) NULL,
[Channel_ID] varchar(2) NULL,
is_estimate varchar(2) NULL,
UTC_Offset varchar(20) NULL
)
INSERT INTO #temp1 VALUES ('123',1.000000,'kvarh','2017-01-01','0015','1','11','A','-500')
INSERT INTO #temp1 VALUES ('123',0.200000,'kvarh','2017-01-01','0015','2','11','A','-500')
INSERT INTO #temp1 VALUES ('123',0.200000,'kwh','2017-01-01','0015','1','11','A','-500')
INSERT INTO #temp1 VALUES ('123',0.400000,'kwh','2017-01-01','0015','2','11','A','-500')
Any help is much appreciated.
This is solution using pivot function:
declare #table table(
service_point_id int,
usage_value float,
units varchar(10),
[date] date,
[time] char(4),
channel int,
channel_id int,
is_estimate char(1),
utc_offset int,
import_history int,
datecreated datetime
)
--example data you provided
insert into #table values
(123, 1, 'kvarh', '2017-01-01', '0015', 1, 11, 'A', -500, 317, '2018-03-20 10:32:42.817'),
(123, 0.2, 'kwh', '2017-01-01', '0015', 1, 33, 'A', -500, 317, '2018-03-20 10:32:42.817'),
(123, 0.3, 'kvarh', '2017-01-01', '0015', 2, 11, 'A', -500, 317, '2018-03-20 10:32:42.817'),
(123, 0.4, 'kwh', '2017-01-01', '0015', 2, 33, 'A', -500, 317, '2018-03-20 10:32:42.817')
--pivot query that does the work, it's only matter of aggregation one column, as mentioned already, so pivot query is really simple and concise
select *, [1]-[2] [net_usage_value] from
(select * from #table) [t]
pivot (
max(usage_value)
for channel in ([1],[2])
) [a]
SELECT [service_point_ID]
sum(,isnull([1],0) - isnull([2],0)) as net_usage_value
,[units]
,sum(isnull([1],0))[1]
,sum(isnull([2],0))[2]
,[channel_ID]
,[date]
,[time]
,[is_estimate]
,[UTC_offset]
,[import_history_id]
FROM #temp1
AS SourceTable PIVOT(sum(usage_value) FOR channel IN([1],[2])) AS PivotTable
group by [service_point_ID], [units],[channel_ID]
,[date]
,[time]
,[is_estimate]
,[UTC_offset]
,[import_history_id]
Inner join will out perform the pivot syntax. SQL Server pivot vs. multiple join
select a.usage_value - b.usage_value as net_usage_value , other columns
from #temp1 a inner join #temp1 b on a.service_point_id = b.service_point_id
and a.units = b.units
and a.channel = 1
and b.channel = 2
gets around the group by as well.
Please help me to solve the following issue .
consider i have two tables in a Database
1.employee 2.Details
In employee table data will be
eid ename level
1 x 9th
2 y 10th
In Address Table data will be
AId eid location Adreess_type
1 1 india permananet
2 1 US Temporary
3 2 Japan permananet
4 2 China Temporary
I need output in the below format
eid ename fulllocation
1 X INDIA -US
2 y Japan-CHINA
Try this:
SELECT
e.eid,
e.name,
GROUP_CONCAT(a.location SEPARATOR '-') AS fulllocation
FROM
employee as e
INNER JOIN address as a
ON e.eid = a.eid
GROUP BY
e.eid
select employee.eid, employee.ename, t.fulllocation
from employee
inner join (select eid, group_concat(location SEPARATOR '-') as fulllocation from Address group by eid) t
on employee.eid = t.eid
Consider that GROUP_CONCAT have some limitations, what is this and how can change (if needed) it? please check documentation for this.
DECLARE #t1 TABLE
(
eid int NOT NULL,
ename varchar(50),
level varchar(50)
)
DECLARE #t2 TABLE
(
aid int NOT NULL,
eid int,
location varchar(50),
address_type varchar(50)
)
INSERT INTO #t1 SELECT 1, 'x', '9th'
INSERT INTO #t1 SELECT 2, 'y', '10th'
INSERT INTO #t2 SELECT 1, 1, 'india', 'permanent'
INSERT INTO #t2 SELECT 2, 1, 'US', 'temporary'
INSERT INTO #t2 SELECT 3, 2, 'Japan', 'permanent'
INSERT INTO #t2 SELECT 4, 2, 'China', 'temporary'
SELECT * FROM #t1
SELECT * FROM #t2
SELECT t1.eid, t1.ename, t2.fullLocation
FROM #t1 AS t1
INNER JOIN (
SELECT eid, COUNT(*) AS noofrecs
, fullLocation = LTRIM(RTRIM(ISNULL(STUFF(
(
SELECT DISTINCT '-' + CAST(t2.location as nvarchar(max))
FROM #t2 t2
WHERE t1.eid = t2.eid
FOR XML PATH (''), TYPE).value('.', 'nvarchar(max)'
), 1, 1, ''), '')))
FROM #t2 as t1
GROUP BY eid
) AS t2
ON t1.eid = t2.eid
DECLARE #t1 TABLE
(
eid int NOT NULL,
ename varchar(50),
level varchar(50)
)
DECLARE #t2 TABLE
(
aid int NOT NULL,
eid int,
location varchar(50),
address_type varchar(50)
)
INSERT INTO #t1 SELECT 1, 'x', '9th'
INSERT INTO #t1 SELECT 2, 'y', '10th'
INSERT INTO #t2 SELECT 1, 1, 'india', 'permanent'
INSERT INTO #t2 SELECT 2, 1, 'US', 'temporary'
INSERT INTO #t2 SELECT 3, 2, 'Japan', 'permanent'
INSERT INTO #t2 SELECT 4, 2, 'China', 'temporary'
SELECT * FROM #t1
SELECT * FROM #t2
SELECT b.eid,b.ename
, STUFF((SELECT '_ ' + a.location FROM #t2 A
Where A.eid=B.eid FOR XML PATH('')),1,1,'') As fulllocation
From #t1 B
Group By b.eid,b.ename
In order to preserve the order of the locations, you could work along
SELECT
e.eid
, e.ename
, CONCAT_WS('-', p.location, t.location) AS fulllocation
FROM Employee e
JOIN Address p
ON e.eid = p.eid
AND p.address_type = 'permananet'
JOIN Address t
ON e.eid = t.eid
AND t.address_type = 'Temporary'
;
See it in action: SQL Fiddle.
Please comment if and as this requrires adjustment / further detail.
I have below SQL query, am trying to insert data into test table, but I got another requirement that I need to insert the employee number along with his/her name.
Example
firstname : 71853-osama
My question is, How I can insert two values into one attribute
I tried this
badgeno +'-'+ convert(nvarchar(100),cEmpname) as cEmpname
but it didn't work
insert into PT (
[FirstName]
,[LastName]
,[FirmID]
,[Note]
,[City]
,[ThirdPartyId]
,[RegisteredBy]
,[Registered]
,[LastUpdatedBy]
,[LastUpdated]
)
SELECT distinct
convert(nvarchar(100),cEmpname) as cEmpname
,convert(nvarchar(100),cJobTitle) as cJobTitle
,'2' as FirmID
,convert(nvarchar(500),sort1) as sort1
,convert(nvarchar(255),cnationality) as cnationality
, badgeno as 'ThirdPartyId'
,'admin' as RegisteredBy
,CURRENT_TIMESTAMP as Registered
,'admin' as LastUpdatedBy
, CURRENT_TIMESTAMP as LastUpdated
FROM [TrailBlazerNG].[dbo].[payper] where lactive = '1'
and not exists ( select 1 from PT where payper.badgeno = PT.ThirdPartyId)
Try below.
insert into PT ([FirstName],[FirmID],[Note],[City],[ThirdPartyId],[RegisteredBy],[Registered],[LastUpdatedBy],[LastUpdated])
SELECT distinct
convert(nvarchar(100),badgeno)+'-'+convert(nvarchar(100),cEmpname) as cEmpname
,convert(nvarchar(100),cJobTitle) as cJobTitle
,'2' as FirmID
,convert(nvarchar(500),sort1) as sort1
,convert(nvarchar(255),cnationality) as cnationality
, badgeno as 'ThirdPartyId'
,'admin' as RegisteredBy
,CURRENT_TIMESTAMP as Registered
,'admin' as LastUpdatedBy
, CURRENT_TIMESTAMP as LastUpdated
FROM [TrailBlazerNG].[dbo].[payper] where lactive = '1'
and not exists ( select 1 from PT where payper.badgeno = PT.ThirdPartyId)
I have a TransactionMaster table in SQL Server 2012 that has unique TransactionID. The same TransactionID wil be availabe in LowTransaction ,MediumTransaction and HighTransaction tables.
For each TransactionID in TransactionMaster, I need to display one StatusMessage. The StatusMessage may come from any of the 3 tables - based on date formulated from CRTDTEC and CRTTIME columns..
What is the best way in SQL Server 2012 to select the StatusMessage corresponding to latest date?
Note: CRTDTEC Format - YYMMDD and CRTTIME Format - HHMMSS
CODE
DECLARE #TransactionMaster TABLE (TransactionID INT)
DECLARE #LowTransaction TABLE (TransactionID INT, StatusMessage VARCHAR(80), CRTDTEC VARCHAR(8), CRTTIME VARCHAR(6))
DECLARE #MediumTransaction TABLE (TransactionID INT, StatusMessage VARCHAR(80), CRTDTEC VARCHAR(8), CRTTIME VARCHAR(6))
DECLARE #HighTransaction TABLE (TransactionID INT, StatusMessage VARCHAR(80), CRTDTEC VARCHAR(8), CRTTIME VARCHAR(6))
INSERT INTO #TransactionMaster VALUES (1)
INSERT INTO #TransactionMaster VALUES (2)
INSERT INTO #TransactionMaster VALUES (3)
INSERT INTO #LowTransaction VALUES (1,'1 Low','20131213','235959')
INSERT INTO #MediumTransaction VALUES (1,'1','20131213','235900')
INSERT INTO #HighTransaction VALUES (1,'1 High','20111213','235959')
INSERT INTO #LowTransaction VALUES (2,'2 Low','20111213','235959')
INSERT INTO #LowTransaction VALUES (3,'3 Low','20111213','235959')
INSERT INTO #MediumTransaction VALUES (3,'3 Medium','20111213','235959')
INSERT INTO #HighTransaction VALUES (3,'3 High','20140101','235959')
Expected Result
(1,'1 Low','20131213','235959')
(2,'2 Low','20111213','235959')
(3,'3 High','20140101','235959')
The simplest way would probably be
WITH TransConsolidated
AS (SELECT *
FROM #LowTransaction
UNION ALL
SELECT *
FROM #MediumTransaction
UNION ALL
SELECT *
FROM #HighTransaction)
SELECT TM.TransactionID,
CA.*
FROM #TransactionMaster TM
CROSS APPLY (SELECT TOP 1 *
FROM TransConsolidated TC
WHERE TC.TransactionID = TM.TransactionID
ORDER BY CRTDTEC DESC,
CRTTIME DESC) CA
Or another possibility (if there is a covering index ordered by TransactionID on all tables) would be to merge join all four tables involved
;WITH CTE
AS (SELECT TM.TransactionID,
MAX(CA.CRTDTEC + CA.CRTTIME + CA.StatusMessage) AS MaxRow
FROM #TransactionMaster TM
LEFT MERGE JOIN #LowTransaction LT
ON LT.TransactionID = TM.TransactionID
LEFT MERGE JOIN #MediumTransaction MT
ON MT.TransactionID = TM.TransactionID
LEFT MERGE JOIN #HighTransaction HT
ON HT.TransactionID = TM.TransactionID
CROSS APPLY (SELECT LT.*
UNION ALL
SELECT MT.*
UNION ALL
SELECT HT.*) CA
GROUP BY TM.TransactionID)
SELECT TransactionID,
SUBSTRING(MaxRow, 1, 8),
SUBSTRING(MaxRow, 9, 6),
SUBSTRING(MaxRow, 16, 80)
FROM CTE
I guess you could do something like this...
SELECT TransactionID, StatusMessage, CRTDTEC, CRTTIME
FROM
(
SELECT *, rn = ROW_NUMBER() OVER (PARTITION BY TransactionID ORDER BY CRTDTEC DESC)
FROM
(
SELECT * FROM #LowTransaction
UNION ALL
SELECT * FROM #MediumTransaction
UNION ALL
SELECT * FROM #HighTransaction
)q
) q2
WHERE rn = 1
Result Set
TransactionID StatusMessage CRTDTEC CRTTIME
1 1 Low 20131213 235959
2 2 Low 20111213 235959
3 3 High 20140101 235959
I am facing a conceptual problem that I am having a hard time overcoming. I am hoping the SO folks can help me overcome it with a nudge in the right direction.
I am in the process of doing some ETL work with the source data being very similar and very large. I am loading it into a table that is intended for replication and I only want the most basic of information in this target table.
My source table looks something like this:
I need my target table to reflect it as such:
As you can see I didn't duplicate the InTransit status where it was duplicated in the source table. The steps I am trying to figure out how to achieve are
Get any new distinct rows entered since the last time the query ran. (Easy)
For each TrackingId I need to check if each new status is already the most recent status in the target and if so disregard otherwise go ahead and insert it. Which this means I have to also start at the earliest of the new statuses and go from there. (I have no *(!#in clue how I'll do this)
Do this every 15 minutes so that statuses are kept very recent so step #2 must be performant.
My source table could easily consist of 100k+ rows but having the need to run this every 15 minutes requires me to make sure this is very performant thus why I am really trying to avoid cursors.
Right now the only way I can see to do this is using a CLR sproc but I think there may be better ways thus I am hoping you guys can nudge me in the right direction.
I am sure I am probably leaving something out that you may need so please let me know what info you may need and I'll happily provide.
Thank you in advance!
EDIT:
Ok I wasn't explicit enough in my question. My source table is going to contain multiple tracking Ids. It may be up to 100k+ rows containing mulitple TrackingId's and multiple statuses for each trackingId. I have to update the target table as above for each individual tracking Id but my source will be an amalgam of trackingId's.
Here's a solution without self-joins:
WITH q AS
(
SELECT *,
ROW_NUMBER() OVER (ORDER BY statusDate) AS rn,
ROW_NUMBER() OVER (PARTITION BY status ORDER BY statusDate) AS rns
FROM tracking
WHERE tackingId = #id
),
qs AS
(
SELECT *,
ROW_NUMBER() OVER (PARTITION BY rn - rns ORDER BY statusDate) AS rnn
FROM q
)
SELECT *
FROM qs
WHERE rnn = 1
ORDER BY
statusDate
Here's a script to check:
DECLARE #tracking TABLE
(
id INT NOT NULL PRIMARY KEY,
trackingId INT NOT NULL,
status INT,
statusDate DATETIME
)
INSERT
INTO #tracking
SELECT 1, 1, 1, DATEADD(d, 1, '2010-01-01')
UNION ALL
SELECT 2, 1, 2, DATEADD(d, 2, '2010-01-01')
UNION ALL
SELECT 3, 1, 2, DATEADD(d, 3, '2010-01-01')
UNION ALL
SELECT 4, 1, 2, DATEADD(d, 4, '2010-01-01')
UNION ALL
SELECT 5, 1, 3, DATEADD(d, 5, '2010-01-01')
UNION ALL
SELECT 6, 1, 3, DATEADD(d, 6, '2010-01-01')
UNION ALL
SELECT 7, 1, 4, DATEADD(d, 7, '2010-01-01')
UNION ALL
SELECT 8, 1, 2, DATEADD(d, 8, '2010-01-01')
UNION ALL
SELECT 9, 1, 2, DATEADD(d, 9, '2010-01-01')
UNION ALL
SELECT 10, 1, 1, DATEADD(d, 10, '2010-01-01')
;
WITH q AS
(
SELECT *,
ROW_NUMBER() OVER (ORDER BY statusDate) AS rn,
ROW_NUMBER() OVER (PARTITION BY status ORDER BY statusDate) AS rns
FROM #tracking
),
qs AS
(
SELECT *,
ROW_NUMBER() OVER (PARTITION BY rn - rns ORDER BY statusDate) AS rnn
FROM q
)
SELECT *
FROM qs
WHERE rnn = 1
ORDER BY
statusDate
Here you go. I'll let you clean it up and do optimizations. one of the sub queries can go into a view and the messy date comparison can be cleaned up. If you're using SQL 2008 R2 then use CAST as DATE instead.
declare #tbl1 table(
id int, Trackingid int, Status varchar(50), StatusDate datetime
)
declare #tbl2 table(
id int, Trackingid int, Status varchar(50), StatusDate datetime
)
----Source data
insert into #tbl1 (id, trackingid, status, statusdate) values(1,1,'PickedUp','10/01/10 1:00') --
insert into #tbl1 (id, trackingid, status, statusdate) values(2,1,'InTransit','10/02/10 1:00') --
insert into #tbl1 (id, trackingid, status, statusdate) values(8,1,'InTransit','10/02/10 3:00')
insert into #tbl1 (id, trackingid, status, statusdate) values(4,1,'Delayed','10/03/10 1:00')
insert into #tbl1 (id, trackingid, status, statusdate) values(5,1,'InTransit','10/03/10 1:01')
insert into #tbl1 (id, trackingid, status, statusdate) values(6,1,'AtDest','10/03/10 2:00')
insert into #tbl1 (id, trackingid, status, statusdate) values(7,1,'Deliv','10/03/10 3:00') --
insert into #tbl1 (id, trackingid, status, statusdate) values(3,2,'InTransit','10/03/10 1:00')
insert into #tbl1 (id, trackingid, status, statusdate) values(9,2,'AtDest','10/04/10 1:00')
insert into #tbl1 (id, trackingid, status, statusdate) values(10,2,'Deliv','10/04/10 1:05')
insert into #tbl1 (id, trackingid, status, statusdate) values(11,1,'Delayed','10/02/10 2:05')
----Target data
insert into #tbl2 (id, trackingid, status, statusdate) values(1,1,'PickedUp','10/01/10 1:00')
insert into #tbl2 (id, trackingid, status, statusdate) values(2,1,'InTransit','10/02/10 1:00')
insert into #tbl2 (id, trackingid, status, statusdate) values(3,1,'Deliv','10/03/10 3:00')
select d.* from
(
select
* ,
ROW_NUMBER() OVER(PARTITION BY trackingid, CAST((STR( YEAR( statusdate ) ) + '/' +STR( MONTH(statusdate ) ) + '/' +STR( DAY( statusdate ) )) AS DATETIME) ORDER BY statusdate) AS 'RN'
from #tbl1
) d
where
not exists
(
select RN from
(
select
* ,
ROW_NUMBER() OVER(PARTITION BY trackingid, CAST((STR( YEAR( statusdate ) ) + '/' +STR( MONTH(statusdate ) ) + '/' +STR( DAY( statusdate ) )) AS DATETIME) ORDER BY statusdate) AS 'RN'
from #tbl1
)f where f.RN = d.RN + 1 and d.status = f.status and f.trackingid = d.trackingid and
CAST((STR( YEAR( f.statusdate ) ) + '/' +STR( MONTH(f.statusdate ) ) + '/' +STR( DAY( f.statusdate ) )) AS DATETIME) =
CAST((STR( YEAR( d.statusdate ) ) + '/' +STR( MONTH(d.statusdate ) ) + '/' +STR( DAY( d.statusdate ) )) AS DATETIME)
)
and
not exists
(
select 1 from #tbl2 t2
where (t2.trackingid = d.trackingid
and t2.statusdate = d.statusdate
and t2.status = d.status)
)
and (
not exists
(
select 1 from
(
select top 1 * from #tbl2 t2
where t2.trackingid = d.trackingid
order by t2.statusdate desc
) g
where g.status = d.status
)
or not exists
(
select 1 from
(
select top 1 * from #tbl2 t2
where t2.trackingid = d.trackingid
and t2.statusdate <= d.statusdate
order by t2.statusdate desc
) g
where g.status = d.status
)
)
order by trackingid,statusdate
How well this performs will depend on indexes, and particularly if you are targeting a single TrackingID at a time, but this is one way to use a CTE and self-join to obtain the desired results:
CREATE TABLE #foo
(
TrackingID INT,
[Status] VARCHAR(32),
StatusDate SMALLDATETIME
);
INSERT #foo SELECT 1, 'PickedUp', '2010-10-01 08:15';
INSERT #foo SELECT 1, 'InTransit', '2010-10-02 03:07';
INSERT #foo SELECT 1, 'InTransit', '2010-10-02 10:28';
INSERT #foo SELECT 1, 'Delayed', '2010-10-03 09:52';
INSERT #foo SELECT 1, 'InTransit', '2010-10-03 20:09';
INSERT #foo SELECT 1, 'AtDest', '2010-10-04 13:42';
INSERT #foo SELECT 1, 'Deliv', '2010-10-04 17:05';
WITH src AS
(
SELECT
TrackingID,
[Status],
StatusDate,
ab = ROW_NUMBER() OVER (ORDER BY [StatusDate])
FROM #foo
WHERE TrackingID = 1
),
realsrc AS
(
SELECT
a.TrackingID,
leftrow = a.ab,
rightrow = b.ab,
leftstatus = a.[Status],
leftstatusdate = a.StatusDate,
rightstatus = b.[Status],
rightstatusdate = b.StatusDate
FROM src AS a
LEFT OUTER JOIN src AS b
ON a.ab = b.ab - 1
)
SELECT
Id = ROW_NUMBER() OVER (ORDER BY [leftstatusdate]),
TrackingID,
[Status] = leftstatus,
[StatusDate] = leftstatusdate
FROM
realsrc
WHERE
rightrow IS NULL
OR (leftrow = rightrow - 1 AND leftstatus <> rightstatus)
ORDER BY
[StatusDate];
GO
DROP TABLE #foo;
If you need to support multiple TrackingIDs in the same query:
CREATE TABLE #foo
(
TrackingID INT,
[Status] VARCHAR(32),
StatusDate SMALLDATETIME
);
INSERT #foo SELECT 1, 'PickedUp', '2010-10-01 08:15';
INSERT #foo SELECT 1, 'InTransit', '2010-10-02 03:07';
INSERT #foo SELECT 1, 'InTransit', '2010-10-02 10:28';
INSERT #foo SELECT 1, 'Delayed', '2010-10-03 09:52';
INSERT #foo SELECT 1, 'InTransit', '2010-10-03 20:09';
INSERT #foo SELECT 1, 'AtDest', '2010-10-04 13:42';
INSERT #foo SELECT 1, 'Deliv', '2010-10-04 17:05';
INSERT #foo SELECT 2, 'InTransit', '2010-10-02 10:28';
INSERT #foo SELECT 2, 'Delayed', '2010-10-03 09:52';
INSERT #foo SELECT 2, 'InTransit', '2010-10-03 20:09';
INSERT #foo SELECT 2, 'AtDest', '2010-10-04 13:42';
WITH src AS
(
SELECT
TrackingID,
[Status],
StatusDate,
ab = ROW_NUMBER() OVER (ORDER BY [StatusDate])
FROM #foo
),
realsrc AS
(
SELECT
a.TrackingID,
leftrow = a.ab,
rightrow = b.ab,
leftstatus = a.[Status],
leftstatusdate = a.StatusDate,
rightstatus = b.[Status],
rightstatusdate = b.StatusDate
FROM src AS a
LEFT OUTER JOIN src AS b
ON a.ab = b.ab - 1
AND a.TrackingID = b.TrackingID
)
SELECT
Id = ROW_NUMBER() OVER (ORDER BY TrackingID, [leftstatusdate]),
TrackingID,
[Status] = leftstatus,
[StatusDate] = leftstatusdate
FROM
realsrc
WHERE
rightrow IS NULL
OR (leftrow = rightrow - 1 AND leftstatus <> rightstatus)
ORDER BY
TrackingID,
[StatusDate];
GO
DROP TABLE #foo;
If this is SQL 2005 then you can use ROW_NUMBER with a sub query or CTE:
If the dataset is really huge though and performance is an issue then one of the above that got pasted while I was trying to get the code block to work could well be more efficient.
/**
* This is just to create a sample table to use in the test query
**/
DECLARE #test TABLE(ID INT, TrackingID INT, Status VARCHAR(20), StatusDate DATETIME)
INSERT #test
SELECT 1,1,'PickedUp', '01 jan 2010 08:00' UNION
SELECT 2,1,'InTransit', '01 jan 2010 08:01' UNION
SELECT 3,1,'InTransit', '01 jan 2010 08:02' UNION
SELECT 4,1,'Delayed', '01 jan 2010 08:03' UNION
SELECT 5,1,'InTransit', '01 jan 2010 08:04' UNION
SELECT 6,1,'AtDest', '01 jan 2010 08:05' UNION
SELECT 7,1,'Deliv', '01 jan 2010 08:06'
/**
* This would be the select code to exclude the duplicate entries.
* Sorting desc in row_number would get latest instead of first
**/
;WITH n AS
(
SELECT ID,
TrackingID,
Status,
StatusDate,
--For each Status for a tracking ID number by ID (could use date but 2 may be the same)
ROW_NUMBER() OVER(PARTITION BY TrackingID, Status ORDER BY ID) AS [StatusNumber]
FROM #test
)
SELECT ID,
TrackingID,
Status,
StatusDate
FROM n
WHERE StatusNumber = 1
ORDER BY ID
I think this example will do what you're looking for:
CREATE TABLE dbo.srcStatus (
Id INT IDENTITY(1,1),
TrackingId INT NOT NULL,
[Status] VARCHAR(10) NOT NULL,
StatusDate DATETIME NOT NULL
);
CREATE TABLE dbo.tgtStatus (
Id INT IDENTITY(1,1),
TrackingId INT NOT NULL,
[Status] VARCHAR(10) NOT NULL,
StatusDate DATETIME NOT NULL
);
INSERT INTO dbo.srcStatus ( TrackingId, [Status], StatusDate ) VALUES ( 1,'PickedUp','10/1/2010 8:15 AM');
INSERT INTO dbo.srcStatus ( TrackingId, [Status], StatusDate ) VALUES ( 1,'InTransit','10/2/2010 3:07 AM');
INSERT INTO dbo.srcStatus ( TrackingId, [Status], StatusDate ) VALUES ( 1,'InTransit','10/2/2010 10:28 AM');
INSERT INTO dbo.srcStatus ( TrackingId, [Status], StatusDate ) VALUES ( 2,'PickedUp','10/1/2010 8:15 AM');
INSERT INTO dbo.srcStatus ( TrackingId, [Status], StatusDate ) VALUES ( 2,'InTransit','10/2/2010 3:07 AM');
INSERT INTO dbo.srcStatus ( TrackingId, [Status], StatusDate ) VALUES ( 2,'Delayed','10/2/2010 10:28 AM');
INSERT INTO dbo.srcStatus ( TrackingId, [Status], StatusDate ) VALUES ( 1,'Delayed','10/3/2010 9:52 AM');
INSERT INTO dbo.srcStatus ( TrackingId, [Status], StatusDate ) VALUES ( 1,'InTransit','10/3/2010 8:09 PM');
INSERT INTO dbo.srcStatus ( TrackingId, [Status], StatusDate ) VALUES ( 1,'AtDest','10/4/2010 1:42 PM');
INSERT INTO dbo.srcStatus ( TrackingId, [Status], StatusDate ) VALUES ( 1,'Deliv','10/4/2010 5:05 PM');
INSERT INTO dbo.srcStatus ( TrackingId, [Status], StatusDate ) VALUES ( 2,'InTransit','10/3/2010 9:52 AM');
INSERT INTO dbo.srcStatus ( TrackingId, [Status], StatusDate ) VALUES ( 2,'InTransit','10/3/2010 8:09 PM');
INSERT INTO dbo.srcStatus ( TrackingId, [Status], StatusDate ) VALUES ( 2,'AtDest','10/4/2010 1:42 PM');
INSERT INTO dbo.srcStatus ( TrackingId, [Status], StatusDate ) VALUES ( 2,'Deliv','10/4/2010 5:05 PM');
WITH cteSrcTrackingIds
AS ( SELECT DISTINCT
TrackingId
FROM dbo.srcStatus
),
cteAllTrackingIds
AS ( SELECT TrackingId ,
[Status] ,
StatusDate
FROM dbo.srcStatus
UNION
SELECT tgtStatus.TrackingId ,
tgtStatuS.[Status] ,
tgtStatus.StatusDate
FROM cteSrcTrackingIds
INNER JOIN dbo.tgtStatus ON cteSrcTrackingIds.TrackingId = tgtStatus.TrackingId
),
cteAllTrackingIdsWithRownums
AS ( SELECT TrackingId ,
[Status] ,
StatusDate ,
ROW_NUMBER() OVER ( PARTITION BY TrackingId ORDER BY StatusDate ) AS rownum
FROM cteAllTrackingIds
),
cteTrackingIdsWorkingSet
AS ( SELECT src.rownum AS [id] ,
src2.rownum AS [id2] ,
src.TrackingId ,
src.[Status] ,
src.StatusDate ,
ROW_NUMBER() OVER ( PARTITION BY src.TrackingId,
src.rownum ORDER BY src.StatusDate ) AS rownum
FROM cteAllTrackingIdsWithRownums AS [src]
LEFT OUTER JOIN cteAllTrackingIdsWithRownums AS [src2] ON src.TrackingId = src2.TrackingId
AND src.rownum < src2.rownum
AND src.[Status] != src2.[Status]
),
cteTrackingIdsSubset
AS ( SELECT id ,
TrackingId ,
[Status] ,
StatusDate ,
ROW_NUMBER() OVER ( PARTITION BY TrackingId, id2 ORDER BY id ) AS rownum
FROM cteTrackingIdsWorkingSet
WHERE rownum = 1
)
INSERT INTO dbo.tgtStatus
( TrackingId ,
[status] ,
StatusDate
)
SELECT cteTrackingIdsSubset.TrackingId ,
cteTrackingIdsSubset.[status] ,
cteTrackingIdsSubset.StatusDate
FROM cteTrackingIdsSubset
LEFT OUTER JOIN dbo.tgtStatus ON cteTrackingIdsSubset.TrackingId = tgtStatus.TrackingId
AND cteTrackingIdsSubset.[status] = tgtStatus.[status]
AND cteTrackingIdsSubset.StatusDate = tgtStatus.StatusDate
WHERE cteTrackingIdsSubset.rownum = 1
AND tgtStatus.id IS NULL
ORDER BY cteTrackingIdsSubset.TrackingId ,
cteTrackingIdsSubset.StatusDate;