SQL Server : stretch values to emptly cells - sql

I have problem with my table which look like this:
TABLE_XY
Dog
NULL
NULL
NULL
NULL
NULL
NULL
cat
NULL
NULL
NULL
Frog
..
I need from my table
dog
dog
dog
dog
dog
dog
cat
cat
cat
cat
cat
frog
frog
…
Do you know script which will stretch values to empty cells work my same like in excel. I cannot do manually because I have over 1 million rows..
My opinion is > UPDATE column name when exist value extend down to next..
Thank for help

Assuming you have a column that specifies each row, you can do this using apply:
update t
set t.col = tt.col
from yourtable t cross apply
(select top 1 t2.*
from yourtable t2
where t2.id < t.id and t2.col is not null
order by t2.id desc
) tt
where t.col is null;

If you have a column to order these results by
with CTE as
(
select Column1, row_number() over (order by OrderingColumn) as rn
from TableXYZ
)
select coalesce (x1.Column1, x2.Column1) as Column1
from CTE x1
left join CTE x2
on x2.rn = x1.rn+1

Assuming you have a column (let's say id) for ordering, you can execute the following query:
UPDATE table t
SET t.value = (
SELECT value FROM table WHERE id = (
SELECT MAX(id) FROM table WHERE id <= t.id AND value IS NOT NULL
)
)
WHERE t.value IS NULL;

The following solution will work, but the only caveat is that it might take a while to process all the million rows
declare #tempTable table
(
Animal varchar(100)
)
insert into #tempTable
(Animal)
values
('Dog'),
(NULL),
(NULL),
('Cow'),
(NULL),
(NULL),
(NULL),
('cat'),
(NULL),
('Sheep'),
(NULL),
('Frog'),
(null),
('Buffalo')
declare #animal varchar(100)
declare #notNullAnimal varchar(100)
declare animalCursor cursor for
select animal
from #tempTable
open animalCursor
while (1=1)
begin
fetch next from animalCursor into #animal
if(#animal is not null)
begin
set #notNullAnimal = #animal
end
else
begin
if(##FETCH_STATUS <> 0) break;
update #tempTable
set Animal = #notNullAnimal
where current of animalCursor
end
if(##FETCH_STATUS <> 0) break;
end
close animalCursor
deallocate animalCursor
select * from #tempTable

I have a SSIS solution for this problem which is part of the SQL Server family.
It's pretty easy.
I used a SQL Statement to generate your sample and you can see it as animal in the data viewer.
The work is done in a very short script component. Code below (Note the setup of variable outside of the ProcessInputRow):

Related

Update a column with it's concatenated previous value?

Hello dear Stackoverflow SQL gurus.
Using this simple data model:
create table test(Id INT, Field1 char(1), Field2 varchar(max));
insert into test (id, Field1) values (1, 'a');
insert into test (id, Field1) values (2, 'b');
insert into test (id, Field1) values (3, 'c');
insert into test (id, Field1) values (4, 'd');
I'm able to update Field2 with Field1 and Field2 concatenated previous value in a simple TSQL anonymous block like this :
BEGIN
DECLARE #CurrentId INT;
DECLARE #CurrentField1 char(1);
DECLARE #Field2 varchar(max) = NULL;
DECLARE cur CURSOR FOR
SELECT id, Field1
FROM test
ORDER BY id;
OPEN cur
FETCH NEXT FROM cur INTO #CurrentId, #CurrentField1;
WHILE ##FETCH_STATUS = 0
BEGIN
SET #Field2 = CONCAT(#Field2, #CurrentId, #CurrentField1);
UPDATE test
SET Field2 = #Field2
WHERE Id = #CurrentId;
FETCH NEXT FROM cur INTO #CurrentId, #CurrentField1;
END
CLOSE cur;
DEALLOCATE cur;
END
GO
Giving me the desired result:
select * from test;
Id Field1 Field2
1 a 1a
2 b 1a2b
3 c 1a2b3c
4 d 1a2b3c4d
I want to achieved the same result with a single UPDATE command to avoid CURSOR.
I thought it was possible with the LAG() function:
UPDATE test set Field2 = NULL; --reset data
UPDATE test
SET Field2 = NewValue.NewField2
FROM (
SELECT CONCAT(Field2, Id, ISNULL(LAG(Field2,1) OVER (ORDER BY Id), '')) AS NewField2,
Id
FROM test
) NewValue
WHERE test.Id = NewValue.Id;
But this give me this:
select * from test;
Id Field1 Field2
1 a 1
2 b 2
3 c 3
4 d 4
Field2 is not correctly updated with Id+Field1+(previous Field2).
The update result is logic to me because when the LAG() function re-select the value in the table this value is not yet updated.
Do you think their is a way to do this with a single SQL statement?
One method is with a recursive Common Table Expression (rCTE) to iterate through the data. This assumes that all values of Id are sequential:
WITH rCTE AS(
SELECT Id,
Field1,
CONVERT(varchar(MAX),CONCAT(ID,Field1)) AS Field2
FROM dbo.test
WHERE ID = 1
UNION ALL
SELECT t.Id,
t.Field1,
CONVERT(varchar(MAX),CONCAT(r.Field2,t.Id,t.Field1)) AS Field2
FROM dbo.test t
JOIN rCTe r ON t.id = r.Id + 1)
SELECT *
FROM rCTe;
If they aren't sequential, you can use a CTE to row number the rows first:
WITH RNs AS(
SELECT Id,
Field1,
ROW_NUMBER() OVER (ORDER BY ID) AS RN
FROM dbo.Test),
rCTE AS(
SELECT Id,
Field1,
CONVERT(varchar(MAX),CONCAT(ID,Field1)) AS Field2,
RN
FROM RNs
WHERE ID = 1
UNION ALL
SELECT RN.Id,
RN.Field1,
CONVERT(varchar(MAX),CONCAT(r.Field2,RN.Id,RN.Field1)) AS Field2,
RN.RN
FROM RNs RN
JOIN rCTe r ON RN.RN = r.RN + 1)
SELECT Id,
Field1,
Field2
FROM rCTe;
Unfortunately, SQL Server does not (yet) support string_agg() as a window function.
Instead, you can use cross apply to calculate the values:
select t.*, t2.new_field2
from test t cross apply
(select string_agg(concat(id, field1), '') within group (order by id) as new_field2
from test t2
where t2.id <= t.id
) t2;
For an update:
with toupdate as (
select t.*, t2.new_field2
from test t cross apply
(select string_agg(concat(id, field1), '') within group (order by id) as new_field2
from test t2
where t2.id <= t.id
) t2
)
update toupdate
set field2 = new_field2;
Here is a db<>fiddle.
Note: This works for small tables, but it would not be optimal on large tables. But then again, on large tables, the string would quickly become unwieldy.

how to use a field of a table in the parameter of the contains function?

I am trying to use a field of a table as parameter of the contains method, but I don't know how to do it.
I am trying this:
SELECT * FROM tableA AS t1 WITH(UPDLOCK), TableB AS t2
WHERE CONTAINS(t1.FieldX, '"' + t2.FieldY + '"')
AND t2.ID IN(1,2,3,4,5);
However I get an error that says that is expected a ")" before the first "+".
How can I do it?
You could do it without contains and full-text-search, I mean using like operator:
select * from tableA as t1 with(UPDLOCK), TableB as t2
where t1.FieldX like '%"'+t2.FieldY+'"%'
and t2.ID IN(1,2,3,4,5);
The function you are looking for is CHARINDEX or PATINDEX...
where CHARINDEX('"' + t2.FieldY + '"', t1.FieldX) <> 0
not sure if you need the '"'
If you want to use wildcards then use the PATINDEX function
Let me know if this works.
You can't do this in one query with SQL Server full text. You're essentially trying to run a different full text query against each row.
You would have to actually run a separate query for each row like this:
-- put all tableA.ID values in table var so we can loop through them
declare #tableARowsToSearch table (ID int)
INSERT #tableARowsToSearch
SELECT ID FROM tableA WITH(UPDLOCK)
declare #fullTextResults table (ID int, FieldX varchar(max), ...)
-- for each tableA.ID...
declare #currentID int, #fullTextCondition nvarchar(4000)
set #currentID = (SELECT TOP 1 ID FROM #tableARowsToSearch ORDER BY ID)
while (#currentID is not null) begin
-- construct full text condition based on TableB.FieldY
set #fullTextCondition = (
SELECT t2.FieldY FROM tableA AS t1 WITH(UPDLOCK), TableB AS t2
WHERE t1.ID = #currentID
AND t2.ID IN(1,2,3,4,5)
)
-- run full text query against one row in tableA
INSERT #fullTextResults
SELECT t1.ID, t1.FieldX, ... FROM tableA AS t1 WITH(UPDLOCK)
WHERE t1.ID = #currentID
AND CONTAINS(t1.FieldX, #fullTextCondition)
set #currentID = (SELECT TOP 1 ID FROM #tableARowsToSearch WHERE ID > #currentID ORDER BY ID)
end
This is likely going to be very slow. You're better off using LIKE (see Tan_Blaytan's answer) or consider redesigning your tables.

How to combine the values of the same field from several rows into one string in a one-to-many select?

Imagine the following two tables:
create table MainTable (
MainId integer not null, -- This is the index
Data varchar(100) not null
)
create table OtherTable (
MainId integer not null, -- MainId, Name combined are the index.
Name varchar(100) not null,
Status tinyint not null
)
Now I want to select all the rows from MainTable, while combining all the rows that match each MainId from OtherTable into a single field in the result set.
Imagine the data:
MainTable:
1, 'Hi'
2, 'What'
OtherTable:
1, 'Fish', 1
1, 'Horse', 0
2, 'Fish', 0
I want a result set like this:
MainId, Data, Others
1, 'Hi', 'Fish=1,Horse=0'
2, 'What', 'Fish=0'
What is the most elegant way to do this?
(Don't worry about the comma being in front or at the end of the resulting string.)
There is no really elegant way to do this in Sybase. Here is one method, though:
select
mt.MainId,
mt.Data,
Others = stuff((
max(case when seqnum = 1 then ','+Name+'='+cast(status as varchar(255)) else '' end) +
max(case when seqnum = 2 then ','+Name+'='+cast(status as varchar(255)) else '' end) +
max(case when seqnum = 3 then ','+Name+'='+cast(status as varchar(255)) else '' end)
), 1, 1, '')
from MainTable mt
left outer join
(select
ot.*,
row_number() over (partition by MainId order by status desc) as seqnum
from OtherTable ot
) ot
on mt.MainId = ot.MainId
group by
mt.MainId, md.Data
That is, it enumerates the values in the second table. It then does conditional aggregation to get each value, using the stuff() function to handle the extra comma. The above works for the first three values. If you want more, then you need to add more clauses.
Well, here is how I implemented it in Sybase 13.x. This code has the advantage of not being limited to a number of Names.
create proc
as
declare
#MainId int,
#Name varchar(100),
#Status tinyint
create table #OtherTable (
MainId int not null,
CombStatus varchar(250) not null
)
declare OtherCursor cursor for
select
MainId, Name, Status
from
Others
open OtherCursor
fetch OtherCursor into #MainId, #Name, #Status
while (##sqlstatus = 0) begin -- run until there are no more
if exists (select 1 from #OtherTable where MainId = #MainId) begin
update #OtherTable
set CombStatus = CombStatus + ','+#Name+'='+convert(varchar, Status)
where
MainId = #MainId
end else begin
insert into #OtherTable (MainId, CombStatus)
select
MainId = #MainId,
CombStatus = #Name+'='+convert(varchar, Status)
end
fetch OtherCursor into #MainId, #Name, #Status
end
close OtherCursor
select
mt.MainId,
mt.Data,
ot.CombStatus
from
MainTable mt
left join #OtherTable ot
on mt.MainId = ot.MainId
But it does have the disadvantage of using a cursor and a working table, which can - at least with a lot of data - make the whole process slow.

Insert rows in table while maintaining IDs

TABLEA
MasterCategoryID MasterCategoryDesc
1 Housing
1 Housing
1 Housing
2 Car
2 Car
2 Car
3 Shop
TABLEB
ID Description
1 Home
2 Home
3 Plane
4 Car
INSERT into TableA
(
[MasterCategoryID]
[MasterCategoryDesc]
)
Select
case when (Description) not in (select MasterCategoryDesc from TableA)
then (select max(MasterCategoryID)+1 from TableA)
else (select top 1 MasterCategoryID from TableA where MasterCategoryDesc = Description)
end as [MasterCategoryID]
,Description as MasterCategoryDesc
from TableB
I want to enter rows using SQL/Stored Procedure from tableB to tableA. for example when inserting first row 'Home' it does not exist in MastercategoryDesc therefore will insert '4' in MasterCategoryID. Second row should keep the '4' again in MasterCategoryID.
The code below does it however after the first row the MastercategoryID remains the same for all rows. I Dont know how to keep track of ids while inserting the new rows.
p.s. Pls do not reply by saying i need to use IDENTITY() index. I have to keep the table structure the same and cannot change it. thanks
Create a new table your_table with fields x_MasterCategoryDesc ,x_SubCategoryDesc
Insert all your values in that table and the run the below SP.
CREATE PROCEDURE x_experiment
AS
BEGIN
IF object_id('TEMPDB..#TABLES') IS NOT NULL
BEGIN
DROP TABLE #TABLES
END
DECLARE #ROWCOUNT INT
DECLARE #ROWINDEX INT =0,
#MasterCategoryDesc VARCHAR(256),
#SubCategoryDesc VARCHAR(256)
select IDENTITY(int,1,1) as ROWID,*
into #TABLES
From your_table
SELECT #ROWCOUNT=COUNT(*) from #TABLES --where ROWID between 51 and 100
WHILE (#ROWINDEX<#ROWCOUNT)
BEGIN
set #ROWINDEX=#ROWINDEX+1
Select
#MasterCategoryDesc=x_MasterCategoryDesc,
#SubCategoryDesc=x_SubCategoryDesc
from #TABLES t
where rowid = #ROWINDEX
INSERT into Table1
([MasterCategoryID], [MasterCategoryDesc], [SubCategoryDesc], [SubCategoryID])
select TOP 1
case when #MasterCategoryDesc not in (select [MasterCategoryDesc] from Table1)
then (select max([MasterCategoryID])+1 from Table1)
else (select distinct max([MasterCategoryID]) from Table1
where [MasterCategoryDesc]=#MasterCategoryDesc
group by [MasterCategoryID])
end as [MasterCategoryID]
,#MasterCategoryDesc as [MasterCategoryDesc]
,#SubCategoryDesc as [SubCategoryDesc]
,case when #SubCategoryDesc not in (select [SubCategoryDesc] from Table1)
then (select max([SubCategoryID])+1 from Table1 )
else (select max([SubCategoryID]) from Table1
where [SubCategoryDesc]=#SubCategoryDesc
group by [SubCategoryID])
end as [SubCategoryID]
from Table1
END
select * from Table1 order by MasterCategoryID
END
GO
exec x_experiment --SP Execute
SQL FIDDLE
Use a CURSOR to do the work. The cursor loops through each row of TableA and the MasterCategoryID increases if it is not found in TableB. This happens before the next row of TableA is loaded into the cursor ...
DECLARE #ID int
DECLARE #Description VARCHAR(MAX)
DECLARE my_cursor CURSOR FOR
SELECT ID, Description FROM TableB
OPEN my_cursor
FETCH NEXT FROM my_cursor
INTO #ID, #Description
WHILE ##FETCH_STATUS = 0
BEGIN
INSERT into TableA(MasterCategoryID, MasterCategoryDesc)
SELECT CASE WHEN #Description NOT IN (SELECT MasterCategoryDesc FROM TableA)
THEN (SELECT MAX(MasterCategoryID)+1 FROM TableA)
ELSE (SELECT TOP 1 MasterCategoryID
FROM TableA
WHERE MasterCategoryDesc = #Description)
END AS MasterCategoryID, Description as MasterCategoryDesc
FROM TableB
WHERE ID = #ID
FETCH NEXT FROM my_cursor
INTO #ID, #Description
END
Your data structure leaves something to be desired. You shouldn't have a master id column that has repeated values.
But you can still do what you want:
INSERT into TableA ([MasterCategoryID], [MasterCategoryDesc])
Select coalesce(a.MasterCategoryId,
amax.maxid + row_number() over (partition by (a.MasterCategoryId) order by b.id)
),
coalesce(a.MasterCategoryDesc, b.desc)
from TableB b left outer join
(select desc, max(MasterCaegoryId) as maxid
from TableA a
group by desc
) a
on b.desc = a.desc left outer join
(select max(MasterCategoryID) as maxid
from TableA
) amax
The idea is to take the information from the master table when it is available. When not available, then MasterCategoryId will be NULL. A new id is calculated, using row_number() to generate sequential numbers. These are then added to the previous maximum id.

T-Sql count string sequences over multiple rows

How can I find subsets of data over multiple rows in sql?
I want to count the number of occurrences of a string (or number) before another string is found and then count the number of times this string occurs before another one is found.
All these strings can be in random order.
This is what I want to achieve:
I have one table with one column (columnx) with data like this:
A
A
B
C
A
B
B
The result I want from the query should be like this:
2 A
1 B
1 C
1 A
2 B
Is this even possible in sql or would it be easier just to write a little C# app to do this?
Since, as per your comment, you can add a column that will unambiguously define the order in which the columnx values go, you can try the following query (provided the SQL product you are using supports CTEs and ranking functions):
WITH marked AS (
SELECT
columnx,
sortcolumn,
grp = ROW_NUMBER() OVER ( ORDER BY sortcolumn)
- ROW_NUMBER() OVER (PARTITION BY columnx ORDER BY sortcolumn)
FROM data
)
SELECT
columnx,
COUNT(*)
FROM marked
GROUP BY
columnx,
grp
ORDER BY
MIN(sortcolumn)
;
You can see the method in work on SQL Fiddle.
If sortcolumn is an auto-increment integer column that is guaranteed to have no gaps, you can replace the first ROW_NUMBER() expression with just sortcolumn. But, I guess, that cannot be guaranteed in general. Besides, you might indeed want to sort on a timestamp instead of an integer.
I dont think you can do it with a single select.
You can use AdventureWorks cursor:
create table my_Strings
(
my_string varchar(50)
)
insert into my_strings values('A'),('A'),('B'),('C'),('A'),('B'),('B') -- this method will only work on SQL Server 2008
--select my_String from my_strings
declare #temp_result table(
string varchar(50),
nr int)
declare #myString varchar(50)
declare #myLastString varchar(50)
declare #nr int
set #myLastString='A' --set this with the value of your FIRST string on the table
set #nr=0
DECLARE string_cursor CURSOR
FOR
SELECT my_string as aux_column FROM my_strings
OPEN string_cursor
FETCH NEXT FROM string_cursor into #myString
WHILE ##FETCH_STATUS = 0 BEGIN
if (#myString = #myLastString) begin
set #nr=#nr+1
set #myLastString=#myString
end else begin
insert into #temp_result values (#myLastString, #nr)
set #myLastString=#myString
set #nr=1
end
FETCH NEXT FROM string_cursor into #myString
END
insert into #temp_result values (#myLastString, #nr)
CLOSE string_cursor;
DEALLOCATE string_cursor;
select * from #temp_result
Result:
A 2
B 1
C 1
A 1
B 2
Try this :
;with sample as (
select 'A' as columnx
union all
select 'A'
union all
select 'B'
union all
select 'C'
union all
select 'A'
union all
select 'B'
union all
select 'B'
), data
as (
select columnx,
Row_Number() over(order by (select 0)) id
from sample
) , CTE as (
select * ,
Row_Number() over(order by (select 0)) rno from data
) , result as (
SELECT d.*
, ( SELECT MAX(ID)
FROM CTE c
WHERE NOT EXISTS (SELECT * FROM CTE
WHERE rno = c.rno-1 and columnx = c.columnx)
AND c.ID <= d.ID) AS g
FROM data d
)
SELECT columnx,
COUNT(1) cnt
FROM result
GROUP BY columnx,
g
Result :
columnx cnt
A 2
B 1
C 1
A 1
B 2