Update a column with it's concatenated previous value? - sql

Hello dear Stackoverflow SQL gurus.
Using this simple data model:
create table test(Id INT, Field1 char(1), Field2 varchar(max));
insert into test (id, Field1) values (1, 'a');
insert into test (id, Field1) values (2, 'b');
insert into test (id, Field1) values (3, 'c');
insert into test (id, Field1) values (4, 'd');
I'm able to update Field2 with Field1 and Field2 concatenated previous value in a simple TSQL anonymous block like this :
BEGIN
DECLARE #CurrentId INT;
DECLARE #CurrentField1 char(1);
DECLARE #Field2 varchar(max) = NULL;
DECLARE cur CURSOR FOR
SELECT id, Field1
FROM test
ORDER BY id;
OPEN cur
FETCH NEXT FROM cur INTO #CurrentId, #CurrentField1;
WHILE ##FETCH_STATUS = 0
BEGIN
SET #Field2 = CONCAT(#Field2, #CurrentId, #CurrentField1);
UPDATE test
SET Field2 = #Field2
WHERE Id = #CurrentId;
FETCH NEXT FROM cur INTO #CurrentId, #CurrentField1;
END
CLOSE cur;
DEALLOCATE cur;
END
GO
Giving me the desired result:
select * from test;
Id Field1 Field2
1 a 1a
2 b 1a2b
3 c 1a2b3c
4 d 1a2b3c4d
I want to achieved the same result with a single UPDATE command to avoid CURSOR.
I thought it was possible with the LAG() function:
UPDATE test set Field2 = NULL; --reset data
UPDATE test
SET Field2 = NewValue.NewField2
FROM (
SELECT CONCAT(Field2, Id, ISNULL(LAG(Field2,1) OVER (ORDER BY Id), '')) AS NewField2,
Id
FROM test
) NewValue
WHERE test.Id = NewValue.Id;
But this give me this:
select * from test;
Id Field1 Field2
1 a 1
2 b 2
3 c 3
4 d 4
Field2 is not correctly updated with Id+Field1+(previous Field2).
The update result is logic to me because when the LAG() function re-select the value in the table this value is not yet updated.
Do you think their is a way to do this with a single SQL statement?

One method is with a recursive Common Table Expression (rCTE) to iterate through the data. This assumes that all values of Id are sequential:
WITH rCTE AS(
SELECT Id,
Field1,
CONVERT(varchar(MAX),CONCAT(ID,Field1)) AS Field2
FROM dbo.test
WHERE ID = 1
UNION ALL
SELECT t.Id,
t.Field1,
CONVERT(varchar(MAX),CONCAT(r.Field2,t.Id,t.Field1)) AS Field2
FROM dbo.test t
JOIN rCTe r ON t.id = r.Id + 1)
SELECT *
FROM rCTe;
If they aren't sequential, you can use a CTE to row number the rows first:
WITH RNs AS(
SELECT Id,
Field1,
ROW_NUMBER() OVER (ORDER BY ID) AS RN
FROM dbo.Test),
rCTE AS(
SELECT Id,
Field1,
CONVERT(varchar(MAX),CONCAT(ID,Field1)) AS Field2,
RN
FROM RNs
WHERE ID = 1
UNION ALL
SELECT RN.Id,
RN.Field1,
CONVERT(varchar(MAX),CONCAT(r.Field2,RN.Id,RN.Field1)) AS Field2,
RN.RN
FROM RNs RN
JOIN rCTe r ON RN.RN = r.RN + 1)
SELECT Id,
Field1,
Field2
FROM rCTe;

Unfortunately, SQL Server does not (yet) support string_agg() as a window function.
Instead, you can use cross apply to calculate the values:
select t.*, t2.new_field2
from test t cross apply
(select string_agg(concat(id, field1), '') within group (order by id) as new_field2
from test t2
where t2.id <= t.id
) t2;
For an update:
with toupdate as (
select t.*, t2.new_field2
from test t cross apply
(select string_agg(concat(id, field1), '') within group (order by id) as new_field2
from test t2
where t2.id <= t.id
) t2
)
update toupdate
set field2 = new_field2;
Here is a db<>fiddle.
Note: This works for small tables, but it would not be optimal on large tables. But then again, on large tables, the string would quickly become unwieldy.

Related

SQL Server : stretch values to emptly cells

I have problem with my table which look like this:
TABLE_XY
Dog
NULL
NULL
NULL
NULL
NULL
NULL
cat
NULL
NULL
NULL
Frog
..
I need from my table
dog
dog
dog
dog
dog
dog
cat
cat
cat
cat
cat
frog
frog
…
Do you know script which will stretch values to empty cells work my same like in excel. I cannot do manually because I have over 1 million rows..
My opinion is > UPDATE column name when exist value extend down to next..
Thank for help
Assuming you have a column that specifies each row, you can do this using apply:
update t
set t.col = tt.col
from yourtable t cross apply
(select top 1 t2.*
from yourtable t2
where t2.id < t.id and t2.col is not null
order by t2.id desc
) tt
where t.col is null;
If you have a column to order these results by
with CTE as
(
select Column1, row_number() over (order by OrderingColumn) as rn
from TableXYZ
)
select coalesce (x1.Column1, x2.Column1) as Column1
from CTE x1
left join CTE x2
on x2.rn = x1.rn+1
Assuming you have a column (let's say id) for ordering, you can execute the following query:
UPDATE table t
SET t.value = (
SELECT value FROM table WHERE id = (
SELECT MAX(id) FROM table WHERE id <= t.id AND value IS NOT NULL
)
)
WHERE t.value IS NULL;
The following solution will work, but the only caveat is that it might take a while to process all the million rows
declare #tempTable table
(
Animal varchar(100)
)
insert into #tempTable
(Animal)
values
('Dog'),
(NULL),
(NULL),
('Cow'),
(NULL),
(NULL),
(NULL),
('cat'),
(NULL),
('Sheep'),
(NULL),
('Frog'),
(null),
('Buffalo')
declare #animal varchar(100)
declare #notNullAnimal varchar(100)
declare animalCursor cursor for
select animal
from #tempTable
open animalCursor
while (1=1)
begin
fetch next from animalCursor into #animal
if(#animal is not null)
begin
set #notNullAnimal = #animal
end
else
begin
if(##FETCH_STATUS <> 0) break;
update #tempTable
set Animal = #notNullAnimal
where current of animalCursor
end
if(##FETCH_STATUS <> 0) break;
end
close animalCursor
deallocate animalCursor
select * from #tempTable
I have a SSIS solution for this problem which is part of the SQL Server family.
It's pretty easy.
I used a SQL Statement to generate your sample and you can see it as animal in the data viewer.
The work is done in a very short script component. Code below (Note the setup of variable outside of the ProcessInputRow):

SQL - populate new column according to data in row above

I need to populate a new column in a table known as RowType, where if the ID column contains the same ID value as the one above RowType is populated with 'D', if the value is new then RowType is populate with 'H', how would the SQL code look to be able to do this?
I.e should look something like below:
RowType (to be populated), ID (already there)
H, 1
D, 1
D, 1
H, 2
D, 2
H, 3
D, 3
D, 3
Thanks
You can use Row_Number and case
select *, RowType = case when Row_Number() over (partition by id order by id) = 1 then 'H' else 'D' End from #yourid
Your input table:
create table #yourId (id int)
insert into #yourid (id) values
(1)
,(1)
,(1)
,(2)
,(2)
,(3)
,(3)
,(3)
Use ROW_NUMER concept :
CREATE TABLE #table(Id INT)
INSERT INTO #table(Id)
SELECT 1 UNION ALL
SELECT 1 UNION ALL
SELECT 1 UNION ALL
SELECT 2 UNION ALL
SELECT 2 UNION ALL
SELECT 3 UNION ALL
SELECT 3 UNION ALL
SELECT 3
SELECT CASE WHEN RowType = 1 THEN 'H' ELSE 'D' END RowType , Id
FROM
(
SELECT ROW_NUMBER() OVER (PARTITION BY Id ORDER BY id) RowType , Id
FROM #table
) A
Please try...
UPDATE tableName
SET RowType = CASE
WHEN ( ID = LAG( ID ) OVER ( ORDER BY ID ) ) THEN 'D'
ELSE 'H'
END
If you have any questions or comments, then please feel free to post a Comment accordingly.
Further Reading
https://learn.microsoft.com/en-us/sql/t-sql/functions/lag-transact-sql (for information on LAG()).
It may not be the best solution, however it can point you somewhere, and it works.
Go through the code carfuly and make sure you understand this.
create table yourTable (RowType char, id int)
insert into yourTable (RowType, id) values
('',1)
,('',1)
,('',1)
,('',2)
,('',2)
,('',3)
,('',3)
,('',3)
select
row_number() over (order by id) as rowNumber,
RowType,
id
into #tempTable
from yourTable
declare #maxRow int = (select max(rowNumber) from #tempTable)
declare #currentRow int = 1
while (#currentRow <= #maxRow)
begin
if (#currentRow = 1)
begin
update #tempTable
set RowType = 'H'
where rowNumber = #currentRow
end
else
begin
if (select id from #tempTable where rowNumber = #currentRow) = (select id from #tempTable where rowNumber = #currentRow - 1)
begin
update #tempTable
set RowType = 'D'
where rowNumber = #currentRow
end
else
begin
update #tempTable
set RowType = 'H'
where rowNumber = #currentRow
end
end
set #currentRow = #currentRow +1
end
-- update data in actual table, you can do below if only those two columns exist in table !!!
delete from yourTable
-- insert into table from updated temp table
insert into yourTable
select RowType, ID
from #tempTable
select * from yourTable
select * from #tempTable
-- drop temp table
drop table #tempTable

Delete repeated Ids from Table - Performance Improvement

I have a table with repeated codes, I need to clean the table removing the repeated, but having at least one left of then in the table.
My table is this:
FriendlyFunctionCode MemberFirmId FunctionLevel3Desc
1 Value1 Value2
1 Value2 Value3
2 Value4 Value5
I need something like this: (It doesn't matter which row is left, just to have at least one)
FriendlyFunctionCode MemberFirmId FunctionLevel3Desc
1 Value1 Value2
2 Value4 Value5
I have this query, but the performance is awful
SELECT MemberFirmId, FriendlyFunctionCode
INTO #ToDeleteRepeated
FROM [dbo].[FirmFunction]
GROUP BY MemberFirmId, FriendlyFunctionCode
HAVING COUNT(1) > 1
DECLARE #Code VARCHAR(100), #Desc VARCHAR(250)
WHILE ((SELECT COUNT(1) FROM #ToDeleteRepeated) > 0)
BEGIN
SELECT TOP 1 #Code = FriendlyFunctionCode FROM #ToDeleteRepeated
WHILE ((SELECT COUNT(1) FROM [FirmFunction] WHERE FriendlyFunctionCode = #Code) > 0)
BEGIN
SELECT TOP 1 #Desc = FunctionLevel3Desc FROM [FirmFunction] WHERE FriendlyFunctionCode = #Code
DELETE FROM [FirmFunction] WHERE FriendlyFunctionCode = #Code AND FunctionLevel3Desc = #Desc
END
END
Any suggestions?
WITH CTE AS (SELECT MemberFirmId, FriendlyFunctionCode,
ROW_NUMBER() over (PARTITION by FriendlyFunctionCode ORDER BY FriendlyFunctionCode ) AS RN
FROM [dbo].[FirmFunction]
)
DELETE CTE WHERE CTE.RN >1
Delete using CTE with row_number()
;with cte as (
select *, row_number() over(partition by friendlyfunctioncode order by memberfirmid) rn
from deletingtable)
delete from cte where rn > 1
This executes with below execution plan:
Table/Clustered index scan --> sort(if no index) --> segment --> Sequence Project --> Filter and then delete,
If it has proper index on FriendlyFunctionCode it executes faster in single scan
You could use a windowing function like this. Saves having to use a cursor (which don't perform well in SQL Server). You can run the inner select on it's own to see what it's doing with the row number.
Test Data
CREATE TABLE #TestData (FriendlyFunctionCode int, MemberFirmId nvarchar(10), FunctionLevel3Desc nvarchar(10))
INSERT INTO #TestData
VALUES
(1,'Value1','Value2')
,(1,'Value2','Value3')
,(2,'Value4','Value5')
Query
SELECT
a.FriendlyFunctionCode
,a.MemberFirmId
,a.FunctionLevel3Desc
INTO #SavedData
FROM
(
SELECT
FriendlyFunctionCode
,MemberFirmId
,FunctionLevel3Desc
,ROW_NUMBER() OVER(PARTITION BY FriendlyFunctionCode ORDER BY FriendlyFunctionCode) RowNum
FROM #TestData
) a
WHERE a.RowNum = 1
TRUNCATE TABLE #TestData
INSERT INTO #TestData (FriendlyFunctionCode, MemberFirmId, FunctionLevel3Desc)
SELECT
FriendlyFunctionCode
,MemberFirmId
,FunctionLevel3Desc
FROM #SavedData
DROP TABLE #SavedData
Result
FriendlyFunctionCode MemberFirmId FunctionLevel3Desc
1 Value1 Value2
2 Value4 Value5
You can just use MAX and group on the FunctionCode.
SELECT
FriendlyFunctionCode,
MAX(MemberFirmId) as MemberFirmId,
MAX(FunctionLevel3Desc) as FuncationLevel3Desc
INTO #StagingTable
FROM
FirmFunction
GROUP BY
FriendlyFunctionCode
Then Truncate Your Table, and select back into it... or just create a table all together and insert the distinct (max) records into it.
TRUNCATE TABLE FirmFunction
INSERT INTO FirmFunction (FriendlyFunctionCode,MemberFirmId,FunctionLevel3Desc)
SELECT * FROM #StagingTable
This is less safe than creating a table FirmFunction2 for example with the same schema as your original and then just inserting into it, then renaming it....
SELECT TOP 1 INTO FirmFunction2 FROM FirmFunction WHERE 1=0
INSERT INTO FirmFunction2 (FriendlyFunctionCode, MemberFirmId, FunctionLevel3Desc)
SELECT
FriendlyFunctionCode,
MAX(MemberFirmId) as MemberFirmId,
MAX(FunctionLevel3Desc) as FuncationLevel3Desc
INTO #StagingTable
FROM
FirmFunction
GROUP BY
FriendlyFunctionCode
Then you can check the date in FirmFunction2 and if you are satisfied... rename it after dropping the other table.

Insert rows in table while maintaining IDs

TABLEA
MasterCategoryID MasterCategoryDesc
1 Housing
1 Housing
1 Housing
2 Car
2 Car
2 Car
3 Shop
TABLEB
ID Description
1 Home
2 Home
3 Plane
4 Car
INSERT into TableA
(
[MasterCategoryID]
[MasterCategoryDesc]
)
Select
case when (Description) not in (select MasterCategoryDesc from TableA)
then (select max(MasterCategoryID)+1 from TableA)
else (select top 1 MasterCategoryID from TableA where MasterCategoryDesc = Description)
end as [MasterCategoryID]
,Description as MasterCategoryDesc
from TableB
I want to enter rows using SQL/Stored Procedure from tableB to tableA. for example when inserting first row 'Home' it does not exist in MastercategoryDesc therefore will insert '4' in MasterCategoryID. Second row should keep the '4' again in MasterCategoryID.
The code below does it however after the first row the MastercategoryID remains the same for all rows. I Dont know how to keep track of ids while inserting the new rows.
p.s. Pls do not reply by saying i need to use IDENTITY() index. I have to keep the table structure the same and cannot change it. thanks
Create a new table your_table with fields x_MasterCategoryDesc ,x_SubCategoryDesc
Insert all your values in that table and the run the below SP.
CREATE PROCEDURE x_experiment
AS
BEGIN
IF object_id('TEMPDB..#TABLES') IS NOT NULL
BEGIN
DROP TABLE #TABLES
END
DECLARE #ROWCOUNT INT
DECLARE #ROWINDEX INT =0,
#MasterCategoryDesc VARCHAR(256),
#SubCategoryDesc VARCHAR(256)
select IDENTITY(int,1,1) as ROWID,*
into #TABLES
From your_table
SELECT #ROWCOUNT=COUNT(*) from #TABLES --where ROWID between 51 and 100
WHILE (#ROWINDEX<#ROWCOUNT)
BEGIN
set #ROWINDEX=#ROWINDEX+1
Select
#MasterCategoryDesc=x_MasterCategoryDesc,
#SubCategoryDesc=x_SubCategoryDesc
from #TABLES t
where rowid = #ROWINDEX
INSERT into Table1
([MasterCategoryID], [MasterCategoryDesc], [SubCategoryDesc], [SubCategoryID])
select TOP 1
case when #MasterCategoryDesc not in (select [MasterCategoryDesc] from Table1)
then (select max([MasterCategoryID])+1 from Table1)
else (select distinct max([MasterCategoryID]) from Table1
where [MasterCategoryDesc]=#MasterCategoryDesc
group by [MasterCategoryID])
end as [MasterCategoryID]
,#MasterCategoryDesc as [MasterCategoryDesc]
,#SubCategoryDesc as [SubCategoryDesc]
,case when #SubCategoryDesc not in (select [SubCategoryDesc] from Table1)
then (select max([SubCategoryID])+1 from Table1 )
else (select max([SubCategoryID]) from Table1
where [SubCategoryDesc]=#SubCategoryDesc
group by [SubCategoryID])
end as [SubCategoryID]
from Table1
END
select * from Table1 order by MasterCategoryID
END
GO
exec x_experiment --SP Execute
SQL FIDDLE
Use a CURSOR to do the work. The cursor loops through each row of TableA and the MasterCategoryID increases if it is not found in TableB. This happens before the next row of TableA is loaded into the cursor ...
DECLARE #ID int
DECLARE #Description VARCHAR(MAX)
DECLARE my_cursor CURSOR FOR
SELECT ID, Description FROM TableB
OPEN my_cursor
FETCH NEXT FROM my_cursor
INTO #ID, #Description
WHILE ##FETCH_STATUS = 0
BEGIN
INSERT into TableA(MasterCategoryID, MasterCategoryDesc)
SELECT CASE WHEN #Description NOT IN (SELECT MasterCategoryDesc FROM TableA)
THEN (SELECT MAX(MasterCategoryID)+1 FROM TableA)
ELSE (SELECT TOP 1 MasterCategoryID
FROM TableA
WHERE MasterCategoryDesc = #Description)
END AS MasterCategoryID, Description as MasterCategoryDesc
FROM TableB
WHERE ID = #ID
FETCH NEXT FROM my_cursor
INTO #ID, #Description
END
Your data structure leaves something to be desired. You shouldn't have a master id column that has repeated values.
But you can still do what you want:
INSERT into TableA ([MasterCategoryID], [MasterCategoryDesc])
Select coalesce(a.MasterCategoryId,
amax.maxid + row_number() over (partition by (a.MasterCategoryId) order by b.id)
),
coalesce(a.MasterCategoryDesc, b.desc)
from TableB b left outer join
(select desc, max(MasterCaegoryId) as maxid
from TableA a
group by desc
) a
on b.desc = a.desc left outer join
(select max(MasterCategoryID) as maxid
from TableA
) amax
The idea is to take the information from the master table when it is available. When not available, then MasterCategoryId will be NULL. A new id is calculated, using row_number() to generate sequential numbers. These are then added to the previous maximum id.

T-Sql count string sequences over multiple rows

How can I find subsets of data over multiple rows in sql?
I want to count the number of occurrences of a string (or number) before another string is found and then count the number of times this string occurs before another one is found.
All these strings can be in random order.
This is what I want to achieve:
I have one table with one column (columnx) with data like this:
A
A
B
C
A
B
B
The result I want from the query should be like this:
2 A
1 B
1 C
1 A
2 B
Is this even possible in sql or would it be easier just to write a little C# app to do this?
Since, as per your comment, you can add a column that will unambiguously define the order in which the columnx values go, you can try the following query (provided the SQL product you are using supports CTEs and ranking functions):
WITH marked AS (
SELECT
columnx,
sortcolumn,
grp = ROW_NUMBER() OVER ( ORDER BY sortcolumn)
- ROW_NUMBER() OVER (PARTITION BY columnx ORDER BY sortcolumn)
FROM data
)
SELECT
columnx,
COUNT(*)
FROM marked
GROUP BY
columnx,
grp
ORDER BY
MIN(sortcolumn)
;
You can see the method in work on SQL Fiddle.
If sortcolumn is an auto-increment integer column that is guaranteed to have no gaps, you can replace the first ROW_NUMBER() expression with just sortcolumn. But, I guess, that cannot be guaranteed in general. Besides, you might indeed want to sort on a timestamp instead of an integer.
I dont think you can do it with a single select.
You can use AdventureWorks cursor:
create table my_Strings
(
my_string varchar(50)
)
insert into my_strings values('A'),('A'),('B'),('C'),('A'),('B'),('B') -- this method will only work on SQL Server 2008
--select my_String from my_strings
declare #temp_result table(
string varchar(50),
nr int)
declare #myString varchar(50)
declare #myLastString varchar(50)
declare #nr int
set #myLastString='A' --set this with the value of your FIRST string on the table
set #nr=0
DECLARE string_cursor CURSOR
FOR
SELECT my_string as aux_column FROM my_strings
OPEN string_cursor
FETCH NEXT FROM string_cursor into #myString
WHILE ##FETCH_STATUS = 0 BEGIN
if (#myString = #myLastString) begin
set #nr=#nr+1
set #myLastString=#myString
end else begin
insert into #temp_result values (#myLastString, #nr)
set #myLastString=#myString
set #nr=1
end
FETCH NEXT FROM string_cursor into #myString
END
insert into #temp_result values (#myLastString, #nr)
CLOSE string_cursor;
DEALLOCATE string_cursor;
select * from #temp_result
Result:
A 2
B 1
C 1
A 1
B 2
Try this :
;with sample as (
select 'A' as columnx
union all
select 'A'
union all
select 'B'
union all
select 'C'
union all
select 'A'
union all
select 'B'
union all
select 'B'
), data
as (
select columnx,
Row_Number() over(order by (select 0)) id
from sample
) , CTE as (
select * ,
Row_Number() over(order by (select 0)) rno from data
) , result as (
SELECT d.*
, ( SELECT MAX(ID)
FROM CTE c
WHERE NOT EXISTS (SELECT * FROM CTE
WHERE rno = c.rno-1 and columnx = c.columnx)
AND c.ID <= d.ID) AS g
FROM data d
)
SELECT columnx,
COUNT(1) cnt
FROM result
GROUP BY columnx,
g
Result :
columnx cnt
A 2
B 1
C 1
A 1
B 2