SELECT latest entry of versioned SQL text field - sql

I have a log table where one of the fields is a filename. These filenames are versioned with a suffix at the end of filename. Say we made file SampleName.xml but later had to revise this -- the new version would appear in the log as SampleName_V2.xml (and this could continue increasing indefinitely, but the most I've seen is V8).
I need a way to SELECT every entry in this log, but only keep the entry with the latest version number on the filename.
I feel like there's got to be an easy answer to this, but I've been trying to think of it all day and can't come to it.
Anyone have any ideas?
EDIT: We do have a DateTime field in every row as well, if that helps.

Here is something that will do the job for you. Idea is to use temp table that also holds file names without _v suffix.
I’ve probably made this more complex than needed but you’ll be able to see the point
DROP TABLE #TmpResults
CREATE TABLE #TmpResults
(
Original nvarchar(100),
WO_Version nvarchar(100),
Last_Update datetime
)
INSERT INTO #TmpResults
(Original, WO_Version, Last_Update)
VALUES
('file1.xml', 'file1.xml', '01/01/2013'),
('file2.xml', 'file2.xml', '02/01/2013'),
('file2_v2.xml', 'file2.xml', '03/01/2013'),
('file3.xml', 'file3.xml', '01/01/2013'),
('file3_v2.xml', 'file3.xml', '01/02/2013'),
('file3_v3.xml', 'file3.xml', '01/03/2013'),
('file4.xml', 'file4.xml', '05/01/2013'),
('file5.xml', 'file5.xml', '06/01/2013'),
('file5_v2.xml', 'file5.xml', '06/02/2013'),
('file5_v3.xml', 'file5.xml', '06/03/2013'),
('file5_v4.xml', 'file5.xml', '06/04/2013')
SELECT
P.WO_Version,
(SELECT MAX(Last_Update) FROM #TmpResults T WHERE T.WO_Version =
P.WO_Version) as Last_Update,
(SELECT TOP 1 Original
FROM #TmpResults T
WHERE T.Last_Update =
( SELECT MAX(Last_Update)
FROM #TmpResults Tm
WHERE Tm.WO_Version = P.WO_Version) ) as Last_FileVersion
FROM
(
SELECT DISTINCT WO_Version
FROM #TmpResults
GROUP BY WO_Version
) P
Here is the SELECT query you can use to fill the temp table with SELECT INTO
SELECT
Original_File_Name,
REPLACE(#Original_File_Name,
SUBSTRING(#Original_File_Name, LEN(#Original_File_Name) - CHARINDEX('v_',REVERSE(#Original_File_Name), 1), LEN(#Original_File_Name) - CHARINDEX('v_',REVERSE(#Original_File_Name), 1)),
SUBSTRING(#Original_File_Name, LEN(#Original_File_Name) - CHARINDEX('.',REVERSE(#Original_File_Name), 1) +1 , LEN(#Original_File_Name) - CHARINDEX('.',REVERSE(#Original_File_Name), 1))) as WO_Version,
Last_Update
FROM OriginalDataTable

I think this will gives you the result
SELECT TOP(1) filename
FROM table
ORDER BY datetime_field DESC
If you are sure that your version number is in the order of _V1 to _V8 this will help you
SELECT TOP(1) filename
FROM table
ORDER BY CAST(RIGHT(SUBSTRING([Filename],1,LEN(SUBSTRING([Filename], 0,
PATINDEX('%.%',[Filename])) + '.') - 1),1) AS INT)
UPDATED
I am suggesting another method which gives you all the file name with latest version.
;WITH cte AS
(
SELECT
ROW_NUMBER() OVER (PARTITION BY LEFT(#products,LEN(#products)-CHARINDEX('_',#products))
ORDER BY date_field DESC
/*OR order by CAST(RIGHT(SUBSTRING([Filename],1,LEN(SUBSTRING([Filename], 0,
PATINDEX('%.%',[Filename])) + '.') - 1),1) AS INT) ASC*/
) AS rno,
filename
FROM table
)
SELECT * FROM cte WHERE rno=1

Related

How to use a special while loop in tsql, do while numeric

I'm loading some quite nasty data through Azure data factory
This is how the data looks after being loaded, existing of 2 parts:
1. Metadata of a test
2. Actual measurements of the test -> the measurement is numeric
Image I have about 10 times such 'packages' of 1.Metadata + 2.Measurements
What I would like it to be / what I'm looking for is the following:
The number column with 1,2,.... is what I'm looking for!
Imagine my screenshot could go no further but this goes along until id=10
I guess a while loop is necessary here...
Query before:
SELECT Field1 FROM Input
Query after:
SELECT GeneratedId, Field1 FROM Input
Thanks a lot in advance!
EDIT: added a hint:
Here is a solution, this requires SQL-SERVER 2012 or later.
Start by getting an Id column on your data. If you can do this previous to the script that would be even better, but if not, try something like this...
CREATE TABLE #InputTable (
Id INT IDENTITY(1, 1),
TestData NVARCHAR(MAX) )
INSERT INTO #InputTable (TestData)
SELECT Field1 FROM Input
Now create a query to get the GeneratedId of each package as well as the Id where they start and end. You can do this by getting all the records LIKE 'title%' since that is the first record of each package, then using ROW_NUMBER, Id, and LEAD for the GeneratedId, StartId, and EndId respectively.
SELECT
GeneratedId = ROW_NUMBER() OVER(ORDER BY (Id)),
StartId = Id,
EndId = LEAD(Id) OVER (ORDER BY (Id))
FROM #InputTable
WHERE TestData LIKE 'title%'
Lastly, join this to the input in order to get all the records, with the correct GeneratedId.
SELECT
package.GeneratedId, i.TestData
FROM (
SELECT
GeneratedId = ROW_NUMBER() OVER(ORDER BY (Id)),
StartId = Id,
EndId = LEAD(Id) OVER (ORDER BY (Id))
FROM #InputTable
WHERE TestData LIKE 'title%' ) package
INNER JOIN #InputTable i
ON i.Id >= package.StartId
AND (package.EndId IS NULL OR i.Id < package.EndId)

Reconcile Rows using sql

I am trying to set up a process to reconcile a table based on specific constraints. (SQL Server)
Table contains the following cols,
Start Time
End Time
Status
Hours
Note
My logic is the following
starting at row 2
if Start Time(row 2) = End time( row 1 ) and status(row2)=status(row1)
then
hours = hours(row1)+hours(2)
move to next row
Any tips would be greatly appreciated on how I should approach this problem.
Thanks
Your question is unclear, but the following should contain the elements that should help you achieve what you really want (please, edit the question like already suggested):
Setup
-- drop table DatesAndTimes
create table DatesAndTimes
(
RowNo INT NOT NULL IDENTITY(1, 1),
StartTime DATETIME2,
EndTime DATETIME2,
Status INT,
Hours INT
)
GO
insert into DatesAndTimes (StartTime, EndTime, Status, Hours) VALUES
('20170101', '20170102', 1, 5),
('20170102', '20170103', 1, 6),
('20170104', '20170105', 2, 4),
('20170105', '20170107', 2, 3),
('20170110', '20170111', 3, 2)
Test
select * from DatesAndTimes
begin tran
;with cte as (
select TOP 100 PERCENT RowNo, StartTime, EndTime, Status, Hours,
LAG(EndTime) OVER (ORDER BY RowNo) PrevEndTime,
LAG(Status) OVER (ORDER BY RowNo) PrevStatus,
LAG(Hours) OVER (ORDER BY RowNo) PrevHours
from DatesAndTimes
order by RowNo
)
update Dest
SET Dest.Hours = (
CASE WHEN C.StartTime = C.PrevEndTime AND C.Status = C.PrevStatus THEN C.Hours + C.PrevHours
ELSE C.Hours
END)
from cte AS C
join DatesAndTimes Dest ON Dest.RowNo = C.RowNo
select * from DatesAndTimes
rollback
begin tran.. rollback are placed because I do not to actually update initial data in the table. They should be dropped when really doing the update.
LAG is a SQL Server 2012+ function that allows to access values from "behind" (or after, if a negative offset is used as input).
TOP 100 PERCENT .. ORDER BY is put to ensure the order of UPDATE. Although it usually happens using the clustered index or insert order of records, it is not guaranteed. Not really it is the most clever way to order (order not allowed in CTE, looks like a hack to me).

SQL: Dedupe table data and manipulate merged data

I have an SQL table with:
Id INT, Name NVARCHAR(MAX), OldName NVARCHAR(MAX)
There are multiple duplicates in the name column.
I would like to remove these duplicates keeping only one master copy of 'Name'. When the the dedupe happens I want to concatenate the old names into the OldName field.
E.G:
Dave | Steve
Dave | Will
Would become
Dave | Steve, Will
After merging.
I know how to de-dupe data using something like:
with x as (select *,rn = row_number()
over(PARTITION BY OrderNo,item order by OrderNo)
from #temp1)
select * from x
where rn > 1
But not sure how to update the new 'master' record whilst I am at it.
This is really too complicated to do in a single update, because you need to update and delete rows.
select n.name,
stuff((select ',' + t2.oldname
from sqltable t2
where t2.name = n.name
for xml path (''), type
).value('/', 'nvarchar(max)'
), 1, 1, '') as oldnames
into _temp
from (select distinct name from sqltable) n;
truncate table sqltable;
insert into sqltable(name, oldnames)
select name, oldnames
from _temp;
Of course, test, test, test before deleting the old table (copy it for safe keeping). This doesn't use a temporary table. That way, if something happens -- like a server reboot -- before the insert is finished, you still have all the data.
Your question doesn't specify what to do with the id column. You can add min(id) or max(id) to the _temp if you want to use one of those values.

How to select values by date field (not as simple as it sounds)

I have a table called tblMK The table contains a date time field.
What I wish to do is create a query which will each time, select the 2 latest entries (by the datetime column) and then get the date difference between them and show only that.
How would I go around creating this expression. This doesn't necessarily need to be a query, it could be a view/function/procedure or what ever works. I have created a function called getdatediff which receives to dates, and returns a string the says (x days y hours z minutes) basically that will be the calculated field. So how would I go around doing this?
Edit: I need to each time select 2 and 2 and so on until the oldest one. There will always be an even amount of rows.
Use only sql like this:
create table t1(c1 integer, dt datetime);
insert into t1 values
(1, getdate()),
(2, dateadd(day,1,getdate())),
(3, dateadd(day,2,getdate()));
with temp as (select top 2 dt
from t1
order by dt desc)
select datediff(day,min(dt),max(dt)) as diff_of_dates
from temp;
sql fiddle
On MySQL use limit clause
select max(a.updated_at)-min(a.updated_at)
From
( select * from mytable order by updated_at desc limit 2 ) a
Thanks guys I found the solution please ignore the additional columns they are for my db:
; with numbered as (
Select part,taarich,hulia,mesirakabala,
rowno = row_number() OVER (Partition by parit order.by taarich)
From tblMK)
Select a.rowno-1,a.part, a.Julia,b.taarich,as.taarich_kabala,a.taarich, a.mesirakabala,getdatediff(b.taarich,a.taarich) as due
From numbered a
Left join numbered b ON b.parit=a.parit
And b.rowno = a.rowno - 1
Where b.taarich is not null
Order by part,taarich
Sorry about mistakes I might of made, I'm on my smartphone.

SQL Server 2008 CTE And CONTAINSTABLE Statement - Why the error?

I am testing out moving our database from SQL Server 2005 to 2008. We use CTE's for paging.
When using full-text CONTAINSTABLE, the CTE will not run and generates an error.
Here's my non-working code-
WITH results AS (
SELECT ROW_NUMBER() over (ORDER BY GBU.CreateDate DESC ) as rowNum,
GBU.UserID,
NULL AS DistanceInMiles
FROM User GBU WITH (NOLOCK)
WHERE 1=1
AND GBU.CountryCode IN (SELECT [Value] FROM fn_Split('USA',','))
AND GBU.UserID IN (SELECT [KEY] FROM CONTAINSTABLE(VW_GBU_Search, *, 'COMPASS'))
)
SELECT * from results
WHERE rowNum BETWEEN 0 and 25
If I comment out the CONTAINSTABLE line, the statement executes. If I only run the SELECT statement (not the WITH), the statement executes fine.
The un-helpful error I get on this is:
Msg 0, Level 11, State 0, Line 0 A
severe error occurred on the current
command. The results, if any, should
be discarded. Msg 0, Level 20, State
0, Line 0 A severe error occurred on
the current command. The results, if
any, should be discarded.
Any suggestions?
Appears to be a bug. See http://connect.microsoft.com/SQLServer/feedback/ViewFeedback.aspx?FeedbackID=426981
Sounds like the fix should be in the next MSSQL SP.
Assuming the other answers are correct, and that the underlying issue is a bug, since you aren't referencing RANK from CONTAINSTABLE, perhaps a query something like the following would be a workaround, where "ID" is the ID column in VW_GBU_Search (untested)?
;WITH results AS (
SELECT ROW_NUMBER() OVER (ORDER BY GBU.CreateDate DESC ) AS rowNum,
GBU.UserID,
NULL AS DistanceInMiles
FROM User GBU WITH (NOLOCK)
WHERE 1=1
AND GBU.CountryCode IN (SELECT [Value] FROM fn_Split('USA',','))
AND GBU.UserID IN (SELECT ID FROM VW_GBU_Search WHERE CONTAINS(*, 'COMPASS'))
)
SELECT * FROM results
WHERE rowNum BETWEEN 0 AND 25
Also, why do you have the "1=1" clause? Can you eliminate it?
I banged my head against the wall on this problem for hours; here is a workaround:
ASSUME: A table in database called
Items ( ItemId int PK, Content varchar(MAX) ),
which has a fulltext index already applied.
GO
CREATE FUNCTION udf_SearchItemsTable(#FreeText)
RETURNS #SearchHits
TABLE(
Relevance int,
ItemId int,
Content varchar(MAX)
)
AS
BEGIN
INSERT #SearchHits
SELECT Results.[Rank] AS Relevance
,Items.ItemId AS ItemId
,Items.Content AS Content
FROM SearchableItems AS Items INNER JOIN
CONTAINSTABLE(SearchableItems, *, #FreeText) AS Results
Results.[Key] = Items.Id
RETURN
END
GO
...
GO
CREATE FUNCTION udf_SearchItems( #SearchText, #StartRowNum, #MaxRows)
RETURNS #SortedItems
TABLE (
ItemId int,
Content varchar(MAX)
)
AS
BEGIN
WITH Matches AS
(
SELECT
ROW_NUMBER() OVER (ORDER BY Hits.Relevance DESC) AS RowNum
,Hits.*
FROM ( udf_SearchItemsTable(#SearchText) ) AS Hits
)
SELECT
ItemId, Content
FROM
Matches
WHERE
Matches.RowNum BETWEEN #StartRowNum
AND #StartRowNum + #MaxRows
;
RETURN
END
GO
select * from udf_SearchItems('some free text stuff', 10, 20)