How to complete and fill in gaps between dates in SQL? - sql

I have data in Redshift that I'm aggregating to the Year-Quarter level i.e. number of items by Year-Quarter
I need to show a continuous trend and hence I need to fill-in the gaps in Year-Quarter. The picture below should give a clearer idea of my current data and desired output.
How can I achieve this in Redshift SQL?

A query like this should do the trick:
create table test (yq int, items int);
INSERT INTO test Values (20201,10),(20204, 15),(20213, 25),(20222, 30);
with recursive quarters(q) as (
select min(yq) as q
from test
union all
select decode(right(q::text, 1), 4, q + 7, q + 1) as q
from quarters
where q < (select max(yq) from test)
)
select q as yq, decode(items is null, true,
lag(items ignore nulls) over (order by q), items) as items
from test t
right join quarters q
on t.yq = q.q
order by q;
It uses a recursive CTE to generate the quarters range needed, right joins this with the source data, and then uses a LAG() window function to populate the items if the value is NULL.

This is known as forward filling values:
CREATE TABLE #Temp
(
[YQ] nvarchar(5),
[items] int
)
INSERT INTO #Temp Values ('20201',10),('20204', 15),('20213', 25),('20222', 30)
---------------------------------------------------------------------------------
DECLARE #start int, #end int, #starty int, #endy int
SELECT #start=1, #end=4
SELECT #starty=MIN(Substring(YQ,0,5)), #endy=MIN(Substring(YQ,0,5)) from #Temp
;With cte1(y) as
(
Select #starty as y
union all
Select y + 1
from cte1
where y <= #endy + 1
)
, cte2(n) as
(
Select #start as n
union all
Select n + 1
from cte2
where n < #end
)
SELECT t1.YQ AS 'Year-Quarter',
CASE WHEN t2.items is null then (SELECT TOP 1 MAX(items) from #Temp WHERE items is not null and YQ < t1.YQ) ELSE t2.items END AS '# Items'
FROM
(
SELECT CAST(cte1.y AS nvarchar(4)) + CAST(cte2.n AS nvarchar(1)) AS YQ
FROM cte1, cte2
) t1
LEFT JOIN #Temp t2 ON t2.YQ = t1.YQ
WHERE t1.YQ <= (SELECT MAX(YQ) FROM #Temp)
ORDER BY t1.YQ, t2.items

Related

SQL Recursive CTE replace statement too slow

I have a recursive CTE that replaces multiple values from an expression, but it is too slow when there are many expressions.
CREATE TABLE #table1(IdExpresion INT, expresion VARCHAR(MAX))
CREATE TABLE #table2(IdExpresion INT, searchExpresion VARCHAR(50), replacementExpresion VARCHAR(50))
INSERT INTO #table1(IdExpresion, expresion)
VALUES(1, 'Mary had a little lamb'),
(2, 'The new student, student_name has the following grades Math - math_grade, Science - Science_grade')
INSERT INTO #table2(IdExpresion, searchExpresion, replacementExpresion)
VALUES(1, 'lamb','dog'),
(2, 'student_name','Joe Smith'),
(2, 'math_grade','A'),
(2, 'Science_grade','B+')
;WITH cte(IdExpresion, expresion, lvl) AS
(
SELECT t1.IdExpresion, t1.expresion, 1
FROM #table1 t1
UNION ALL
SELECT cte.IdExpresion, REPLACE(cte.expresion, t2.searchExpresion, t2.replacementExpresion), cte.lvl + 1
FROM cte
INNER JOIN #table2 t2
ON cte.IdExpresion = t2.IdExpresion
AND CHARINDEX(t2.searchExpresion, cte.expresion) > 0
)
SELECT DISTINCT c2.expresion
FROM (SELECT IdExpresion, MAX(lvl) AS lvl
FROM cte
GROUP BY IdExpresion) c1
INNER JOIN cte c2
ON c1.IdExpresion = c2.IdExpresion
AND c1.lvl = c2.lvl
OPTION (MAXRECURSION 0);
Anyone have any advice? I am using SQL Server by the way
Not sure if any more performant, but here is a brute force approach just for fun.
Already +1 LukStorm's answer, I suspect that is the way to go.
Example
Declare #S varchar(max) = (Select IdExpresion,expresion = replace(' '+expresion,' ',concat(' ',IdExpresion,'|||')) From #Table1 For XML Raw )
Select #S = replace(#S,concat(IdExpresion,'|||',searchExpresion),replacementExpresion) From #table2
Select IdExpresion = B.i.value('#IdExpresion', 'int')
,expresion = ltrim(replace(B.i.value('#expresion', 'varchar(max)'),B.i.value('#IdExpresion', 'varchar(25)')+'|||',''))
From (Select x = Cast(#S as xml).query('.')) as A
Cross Apply x.nodes('row') AS B(i)
Returns
IdExpresion expresion
1 Mary had a little dog
2 The new student, Joe Smith has the following grades Math - A, Science - B+
You could add another CTE to it that gets a row_number for each replacement, partitioned by the IdExpresion.
Then in the recursive CTE, instead of counting up, count down till there's no match with the replacement row_number.
The last entry in the CTE, that had all replacements, will have Lvl 0 then.
;WITH SEARCH AS (
SELECT
IdExpresion,
row_number() over (partition by IdExpresion order by searchExpresion) as rn,
searchExpresion, replacementExpresion
FROM #table2
), CTE(IdExpresion, expresion, lvl) AS
(
SELECT t1.IdExpresion, t1.expresion, count(*)
FROM #table1 t1
JOIN #table2 t2 ON t2.IdExpresion = t1.IdExpresion
GROUP BY t1.IdExpresion, t1.expresion
UNION ALL
SELECT c.IdExpresion, REPLACE(c.expresion, s.searchExpresion, s.replacementExpresion), c.lvl - 1
FROM CTE c
JOIN SEARCH s
ON s.IdExpresion = c.IdExpresion AND s.rn = c.lvl
)
SELECT IdExpresion, expresion
FROM CTE
WHERE lvl = 0
OPTION (MAXRECURSION 0);
This way, each REPLACE is only done once per IdExpresion.
And that without having to use CHARINDEX.
You could also replace that SEARCH cte with a temporary table.
One that has the records from #table2 with that row_number.
This has the benefit that with a table you can add a compound index.
On a large table it should speed up the recursive join to the replacements.
Test on rextester here
CREATE TABLE #tmpSearch (
IdExpresion INT,
rn INT,
searchExpresion VARCHAR(50),
replacementExpresion VARCHAR(50),
primary key (IdExpresion, rn));
insert into #tmpSearch (IdExpresion, rn, searchExpresion, replacementExpresion)
select
IdExpresion,
row_number() over (partition by IdExpresion order by searchExpresion) as rn,
searchExpresion,
replacementExpresion
from #table2
order by IdExpresion, searchExpresion;
;WITH CTE(IdExpresion, expresion, lvl) AS
(
SELECT t1.IdExpresion, t1.expresion, max(s.rn)
FROM #table1 t1
JOIN #tmpSearch s ON s.IdExpresion = t1.IdExpresion
GROUP BY t1.IdExpresion, t1.expresion
UNION ALL
SELECT c.IdExpresion, REPLACE(c.expresion, s.searchExpresion, s.replacementExpresion), c.lvl - 1
FROM CTE c
JOIN #tmpSearch s
ON s.IdExpresion = c.IdExpresion AND s.rn = c.lvl
)
SELECT IdExpresion, expresion
FROM CTE
WHERE lvl = 0
OPTION (MAXRECURSION 0);
Good day,
Here is another solution. Please check if this fit your needs. This solution does not use any loop but simple dynamic query.
DECLARE #SQLString nvarchar(MAX);
-- do not make mistake, this is simple CTE and not a recursive CTE (no Loop)
;With MyCTE as (
select R
From table1 t1
CROSS APPLY (
SELECT R = 'SELECT ' + CONVERT (NVARCHAR(MAX),t1.IdExpresion) + ' as IdExpresion,' + STRING_AGG ('REPLACE','(') + '(' + 't1.expresion,''' + STRING_AGG(t2.searchExpresion + ''',''' + t2.replacementExpresion , '''),''') + ''') as expresion FROM table1 t1 where t1.IdExpresion = ' + CONVERT (NVARCHAR(MAX),t1.IdExpresion)
from table2 t2
where t2.IdExpresion = t1.IdExpresion
) C
)
SELECT #SQLString = STRING_AGG(R,'
UNION ALL
')
FROM MyCTE
--PRINT #SQLString
EXECUTE sp_executesql #SQLString
GO
Note! I recommend to execute some tests to confirm that this solves all cases
Note! I am using the function STRING_AGG which was added to SQL Server 2017. In older version you can get the exact same solution using FOR XML statement.
Since we don't have the real DDL+DML we cannot really discuss about performance, but the difference in the execution plans of the solutions is 10% to 90% (In general, You should check IO and Time statistics in production in addition, before choosing your solution)
So... here is the Execution Plans Image (above query is my dynamic SQL solution and bellow is LukStorms solution using recursive CTE = Loop)

SQL String join to table

Given few strings as
SET #Codes1 = 3,4
SET #Codes2 = 1
SET #Codes3 = --empty
Table -- TblCode
Id Code
1 A
2 B
3 C
4 D
How to convert the #Codes1, #Codes2, #Codes3 with join to the table TblCode so it returns the following output :
1. #Codes1 = CD
2. #Codes2 = A
3. #Codes3 = --empty
Note that the concatenation for the output is without the comma.
PS - This is a small example to a much larger and complex data set. Kindly ignore any wrongful design pattern here.
You can try this. I added the answer just for #Codes1, but it works with #Codes2 and #Codes3 too.
DECLARE #TblCode TABLE (Id INT, Code VARCHAR(2))
INSERT INTO #TblCode
VALUES(1, 'A'),
(2,'B'),
(3,'C'),
(4,'D')
DECLARE #Codes1 VARCHAR(10) = '3,4'
DECLARE #Codes2 VARCHAR(10) = '1'
DECLARE #Codes3 VARCHAR(10) = NULL
DECLARE #CodesOut VARCHAR(10) = ''
;WITH CTE_1 AS (
SELECT CODE= #Codes1 + ','
)
, CTE_2 AS -- It silit text to rows
(
SELECT RIGHT(CTE_1.CODE, LEN(CTE_1.CODE) - CHARINDEX(',',CTE_1.CODE)) CODE , SUBSTRING(CTE_1.CODE, 0, CHARINDEX(',',CTE_1.CODE)) ID, CHARINDEX(',',CTE_1.CODE) AS CI
FROM CTE_1
UNION ALL
SELECT RIGHT(CTE_2.CODE, LEN(CTE_2.CODE) - CHARINDEX(',',CTE_2.CODE)) CODE , SUBSTRING(CTE_2.CODE, 0, CHARINDEX(',',CTE_2.CODE)) ID, CHARINDEX(',',CTE_2.CODE) AS CI
FROM CTE_2 WHERE LEN(CTE_2.CODE) > 0
)
SELECT #CodesOut = #CodesOut + C.Code FROM CTE_2 INNER JOIN #TblCode C ON CTE_2.ID = C.Id
SELECT #CodesOut
Result:
CD
You can use a recursive CTE. Here is one method:
with c as (
select c.*, row_number() over (partition by id) as seqnum
from c
),
cte as (
select cast(#codes as varchar(max)) as str,
replace(#codes, id, code) as newstr,
1 as lev
from c
where seqnum = 1
union all
select str, replace(newstr, id, code), lev + 1
from cte join
c
on c.seqnum = cte.lev + 1
)
select top (1) newstr
from cte
order by lev desc;
If there is an error in the syntax, set up a SQL Fiddle or Rextester or something similar so it can be fixed.

SQL Select Concat between 2 numbers

I work with SQL Server 2012 and need a concatenate between 2 different columns.
eg:
3 and 7 = 34567
or 1 and 4 = 1234
or 2 and 2 = 2
When I use the Concat Function, I am just able to Concate the first and the last number. But I need the numbers between, too.
Try this query. Here firstcolumn =3 and secondcolumn=7
SELECT t.Id,
,STUFF((SELECT '' + CAST( n AS VARCHAR(50)) [text()]
FROM (SELECT DISTINCT n = number
FROM master..[spt_values]
WHERE number >= firstcolumn AND number <= secondcolumn
)a
FOR XML PATH(''), TYPE)
.value('.','NVARCHAR(MAX)'),1,0,'') List_Output
FROM tablename t
There are multiple ways to generate sequences in sql-server. Here is a simple that doesn't need a number-table:
WITH Numbers AS
(
SELECT TOP (2000) n = ROW_NUMBER() OVER (ORDER BY object_id)
FROM sys.all_objects ORDER BY n
)
SELECT n FROM Numbers
WHERE n BETWEEN 3 AND 7
Here's a recursive query that will go from start to end recursively and generate the string you want or an INTEGER value:
DECLARE #start INT = 3
DECLARE #end INT = 7
DECLARE #int_value INT = 0
DECLARE #str_value VARCHAR(100) = '';
WITH rec AS (
SELECT #start AS val
UNION ALL
SELECT val + 1
FROM rec
WHERE val < #end
)
SELECT #str_value = CONCAT(#str_value, val),
#int_value = #int_value * 10 + val
FROM rec
SELECT #str_value, #int_value
This is Itzik's style
declare #values varchar(100)='', #from int, #to int
select #from=3, #to=7
;WITH
n0 AS (SELECT 0 AS number UNION ALL SELECT 0),
n1 AS (SELECT 0 AS number FROM n0 AS a CROSS JOIN n0 AS b),
n2 AS (SELECT 0 AS number FROM n1 AS a CROSS JOIN n1 AS b),
n3 AS (SELECT 0 AS number FROM n2 AS a CROSS JOIN n2 AS b)
select #values=#values+ltrim(sno) from
(select row_number() over (order by number) as sno from n3) as t
where sno between #from and #to
select #values as [values]
Thank you for the Answer.
I am going to use the answer from #Mukesh Kalgude.
So, my full query is the follow:
select
DayFrom,DayTo,
STUFF((SELECT TOP 7'' + CAST( n AS VARCHAR(50)) [text()]
FROM (SELECT DISTINCT n = number
FROM master..[spt_values]
WHERE number >= DayFrom AND DayTo <= 7
)a
FOR XML PATH(''), TYPE)
.value('.','NVARCHAR(MAX)'),1,0,'') List_Output
from SwitchProfilePairs
The result is
dayFrom = 1 day To = 1 But the List_Output is 1234567
Try this using SUBSTRING() function(Fiddle example):
--Declare sample table
DECLARE #T TABLE (id int identity, numCol1 int, numCol2 int)
--Add some values
INSERT #T (numCol1, numCol2)
VALUES (3, 7), (1, 4), (2, 2)
--Actual Query
SELECT *, SUBSTRING('123456789', numCol1, numCol2 - numCol1 + 1) Number
FROM #T
Above query works only with single digit numbers. Modified version (below) to work with numbers like 34, 78
SELECT *,
SUBSTRING('123456789', CONVERT(int, LEFT(numCol1,1)),
CONVERT(int, RIGHT(numCol2, 1)) - convert(int, LEFT(numCol1,1)) + 1) YourNumber
FROM #T
Note: Number column is returning a string, can be converted to an int using convert() function

SQL replace from list

I'm trying to figure our how I can replace a string using data from another table
I have a table that looks like this:
Id Translation
1 Peter
2 Sandra
3 Olga
Now I want to select all and replace the translations using a list that looks like this:
Original New
e #
r ?
lg *%
So that the select list looks like this:
Id Translation
1 P#t#?
2 Sand?a
3 O*%a
So, for each translation, I need to have a REPLACE(Translation,Original,New).
Or in other words: I need to go through every "Translation" in my first list and make another loop in my replacement table to see what to replace
Bare in mind that the first list has 25'000 rows and the second has 50'000, so I can't just type it by hand :)
EDIT
Just to clarify:
The Original and New from my look up table can be both letters and words so the table can looks like this:
Original New
one two
three fifty
sun moon
To do this in one query, you need to use a recursive CTE. Something like:
with trans as (
select t.original, t.new, row_number() over (order by t.original) as seqnum,
count(*) over () as cnt
from translations
),
t as (
select tt.id, tt.string, replace(tt.string, trans.original, trans.new) as replaced,
seqnum + 1 as seqnum, cnt
from totranslate tt join
trans
on trans.id = 1
union all
select t.id, t.string, replace(t.string, trans.original, trans.new),
seqnum + 1 as seqnum, cnt
from t join
trans
on t.seqnum = trans.id
where t.seqnum <= t.cnt
)
select t.id, t.string, t.replaced
from t
where seqnum = cnt;
You can use a UDF:
CREATE FUNCTION [dbo].[Translate]
(
-- Add the parameters for the function here
#Str nvarchar(max)
)
RETURNS nvarchar(max)
AS
BEGIN
DECLARE #Result nvarchar(max) = #Str;
SELECT #Result = replace(#Result,Original,New) from dbo.Mappings order BY Pos;
RETURN #Result;
END
Here I assumed the table containing translations is called dbo.Mappings and beside the Original and New columns you need another column Pos int which will be used to determine the order in which the translations are applied (to address the problems mentioned by #Thorsten Kettner in comments)
Also with recursive cte:
DECLARE #translations TABLE
(
Id INT ,
Translation NVARCHAR(20)
)
INSERT INTO #translations
VALUES ( 1, 'Peter' ),
( 2, 'Sandra' ),
( 3, 'Olga' )
DECLARE #replacements TABLE
(
Original VARCHAR(2) ,
New VARCHAR(2)
)
INSERT INTO #replacements
VALUES ( 'e', '#' ),
( 'r', '?' ),
( 'lg', '*%' );
WITH cte1 AS (SELECT *, ROW_NUMBER() OVER (PARTITION BY id ORDER BY (SELECT 1)) rn
FROM #translations CROSS JOIN #replacements),
cte2 AS (SELECT Id, rn, REPLACE(Translation, Original, New) AS NTranslation
FROM cte1
WHERE rn = 1
UNION ALL
SELECT c2.Id, c2.rn + 1, REPLACE(c2.NTranslation, c1.Original, c1.New)
FROM cte1 c1
JOIN cte2 c2 ON c2.Id = c1.Id AND c2.rn + 1 = c1.rn)
SELECT * FROM cte2
WHERE rn = (SELECT COUNT(*) FROM #replacements)
ORDER BY Id
EDIT:
WITH cte1 AS (SELECT t.*, p.Id AS Old, p.Code, ROW_NUMBER() OVER (PARTITION BY t.id ORDER BY (SELECT 1)) rn
FROM translations t CROSS JOIN Property p),
cte2 AS (SELECT Id, rn, REPLACE(Trans, Old, Code) AS NTranslation
FROM cte1
WHERE rn = 1
UNION ALL
SELECT c2.Id, c2.rn + 1, REPLACE(c2.NTranslation, c1.Old, c1.Code)
FROM cte1 c1
JOIN cte2 c2 ON c2.Id = c1.Id AND c2.rn + 1 = c1.rn)
SELECT * FROM cte2
WHERE rn = (SELECT COUNT(*) FROM Property)
ORDER BY Id
Here is something I worked out that will allow you to replace multiple characters with one specified string.
[Split2] is stolen from https://blogs.msdn.microsoft.com/amitjet/2009/12/11/convert-comma-separated-string-to-table-4-different-approaches/
USE <Your Database>
GO
CREATE FUNCTION [dbo].[Split2]
(
#strString varchar(4000)
)
RETURNS #Result TABLE
(
RID INT IDENTITY(0,1) Primary Key
,Value varchar(4000)
)
AS
BEGIN
WITH StrCTE(start, stop) AS
(
SELECT 1, CHARINDEX(',' , #strString )
UNION ALL
SELECT stop + 1, CHARINDEX(',' ,#strString , stop + 1)
FROM StrCTE
WHERE stop > 0
)
INSERT INTO #Result
SELECT SUBSTRING(#strString , start, CASE WHEN stop > 0 THEN stop - start ELSE 4000 END) AS stringValue
FROM StrCTE
RETURN
END
GO
USE <Your Database>
GO
CREATE FUNCTION [dbo].[MultiReplace]
(
#MyString varchar(MAX)
,#RepChars varchar(4000)
,#NewChars varchar(4000)
)
RETURNS varchar(MAX)
AS
BEGIN
DECLARE #CurRow int = 0
DECLARE #MaxRow int
SELECT #MaxRow = MAX(RID)
FROM dbo.split2 ( #RepChars )
WHILE #CurRow <= #MaxRow
BEGIN
SELECT #MyString = REPLACE(#MyString,VALUE,#NewChars)
FROM dbo.split2 ( #RepChars )
WHERE RID = #CurRow
SET #CurRow = #CurRow + 1
END
RETURN (#MyString);
END
GO
In this example I replace each character with no space
SELECT [dbo].[MultiReplace]('6th month 2016-06 (test / requested)',',1st,2nd,3rd,4th,5th,6th,0,1,2,3,4,5,6,7,8,9,(,),/,-,+, ','')
Result:
monthtestrequested
I hope this is useful for you.

initialize and increment variable inside cte query sqlserver 2008

I am using sqlserver 2008 ,I want to initialize and increment variable (#NUMTwo) both at the same time, in my second part(Problem Line).
I am creating a cte query.
Is this possible , if yes then please let me know.
following is a sample example.I hope i am clear.
CREATE table #TempTable
(
childProductID INT,parentProductID INT,productModel varchar(50),[Num2] VARCHAR(100)
)
DECLARE #NUMTwo INT = 0
WITH tableR AS
(
-- First Part
SELECT childProductID = null,parentProductID=null,productModel from Products where productid in (#a),[Num2] = convert(varchar(100), '')
UNION ALL
--Second Part
SELECT e.childProductID,e.parentProductID,prd.productModel FROM ProductIncludes AS e
,[Num2] = convert(varchar(100),'1.' + #NUMTwo+=1 ) -- Problem line
INNER JOIN Products AS PRD ON e.childProductID = PRD.productID
WHERE parentProductID in (#a)
)
INSERT INTO #TempTable(childProductID,parentProductID,productModel,[Num2])
SELECT childProductID,parentProductID,productModel,[Num2]
END
SELECT * FROM #TempTable
You need to "Initialize" a column in the acnhor part of the query, and then "oncrement" this column in the recursive parts.
Something like
DECLARE #NUMTwo INT = 0
;WITH Test AS (
SELECT [Num2] = convert(varchar(MAX), ''),
#NUMTwo [N]
UNION ALL
SELECT [Num2] = '1.' + convert(varchar(MAX),[N]+1),
[N]+1
FROM TEst
WHERE [N] < 10
)
SELECT *
FROM Test
SQL Fiddle DEMO
If the parameter #NUMTwo is just for numbering rows you can use the ROW_NUMBER() OVER(...) instead of it like so:
WITH tableR AS
(
SELECT childProductID = NULL, parentProductID = NULL,
productModel, NUMTwo = CAST('0' AS VARCHAR(10))
FROM Products
WHERE
productid in (#a),
[Num2] = convert(varchar(100), '')
UNION ALL
SELECT e.childProductID, e.parentProductID,
prd.productModel,
NUMTwo = '1.' +
CAST( ROW_NUMBER() OVER(ORDER BY (SELECT 0)) AS VARCHAR(10))
FROM ProductIncludes AS e
INNER JOIN Products AS PRD ON e.childProductID = PRD.productID
WHERE parentProductID in (#a)
)