SQL multiplying rows in select - sql

I would like to select some rows multiple-times, depending on the column's value.
Source table
Article | Count
===============
A | 1
B | 4
C | 2
Wanted result
Article
===============
A
B
B
B
B
C
C
Any hints or samples, please?

You could use:
SELECT m.Article
FROM mytable m
CROSS APPLY (VALUES (1),(2),(3),(4),(5),(6),(7),(8),(9),(10)) AS s(n)
WHERE s.n <= m.[Count];
LiveDemo
Note: CROSS APLLY with any tally table. Here values up to 10.
Related: What is the best way to create and populate a numbers table?

You could also use a recursive CTE which works with numbers > 10 (here up to 1000):
With NumberSequence( Number ) as
(
Select 0 as Number
union all
Select Number + 1
from NumberSequence
where Number BETWEEN 0 AND 1000
)
SELECT Article
FROM ArticleCounts
CROSS APPLY NumberSequence
WHERE Number BETWEEN 1 AND [Count]
ORDER BY Article
Option (MaxRecursion 0)
Demo
A number-table will certainly be the best option.
http://sqlperformance.com/2013/01/t-sql-queries/generate-a-set-2

Please check following SQL script
Before executing the SELECT statement, note that I used a user function which is used to simulate a numbers table
You can find the sql codes of numbers table in SQL Server at referred tutorial
----create table myTempTbl (Article varchar(10), Count int)
--insert into myTempTbl select 'A',1
--insert into myTempTbl select 'B',4
--insert into myTempTbl select 'C',2
select t.*
from myTempTbl t
cross apply dbo.NumbersTable(1,100,1) n
where n.i <= t.Count
order by t.Article

one more CTE
with cte_t as (
select c as c, 1 as i
from mytable
group by c
union all
select t.c, ctet.i + 1
from mytable t
join cte_t ctet
on ctet.c = t.c
and ctet.i < t.i
)
select cte_t.c
from cte_t
order by cte_t.c

Can obtain the output using simple WHILE LOOP
DECLARE #table TABLE
(ID int ,Article varchar(5),[Count] int)
INSERT INTO #table
(ID,Article,Count)
VALUES
(1,'A',1),(2,'B',4),(3,'C',2)
DECLARE #temp TABLE
(Article varchar(5))
DECLARE #Cnt1 INT
DECLARE #Cnt2 INT
DECLARE #Check INT
DECLARE #max INT
SET #max =0
SET #Cnt1 = (SELECT Count(Article) FROM #table)
WHILE (#max < #Cnt1)
BEGIN
SET #max = #max +1
SET #Cnt2 = (SELECT [Count] FROM #table WHERE ID =#max)
SET #Check =(SELECT [Count] FROM #table WHERE ID =#max)
WHILE (#Cnt2 > 0)
BEGIN
INSERT INTO #temp
SELECT Article FROM #table WHERE [Count] =#Check
SET #Cnt2 = #Cnt2 -1
END
END
SELECT * FROM #temp

Related

I want to create series data in SQL

I want to create series data in SQL for my testing purpose.
Ex.
Product1
Product2
Product3
.
.
.
Product1000
I want to create 1000 records like above manner in SQL.
Can anybody suggest me the solution ?
Thank you
You can do that via recursive way if you are working with SQL Server:
with series as (
select 1 as id
union all
select id+1
from series
where id < 1000
)
select concat('Product', id)
from series s
option (maxrecursion 1000);
Just another option using an ad-hoc tally table
Example
Select Top 1000 Product=concat('Product',Row_Number() Over (Order By (Select NULL)))
From master..spt_values n1
Returns
Product1
Product2
Product3
Product4
...
Product998
Product999
Product1000
Here you go:
DECLARE #temp TABLE(
Products VARCHAR(15)
);
DECLARE #counter INT
SET #counter = 1
WHILE (#counter <= 1000)
BEGIN
INSERT INTO #temp
VALUES('Product'+CONVERT(VARCHAR(5),#counter));
SET #counter = #counter + 1
END
SELECT * FROM #temp;
This should get you started.
DECLARE #table table (Product varchar(200))
DECLARE #cnt INT; SET #cnt = 0
WHILE #cnt <=10
BEGIN
SET #cnt = #cnt + 1
insert into #table
SELECT 'Product' +CAST(#cnt as varchar(150))
END
select * from #table
A set-based CTE can generate test data quickly. Even better would be a tally (a.k.a.) numbers table.
WITH
t10 AS (SELECT n FROM (VALUES(0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) t(n))
,t1k AS (SELECT ROW_NUMBER() OVER (ORDER BY (SELECT 0)) AS num FROM t10 AS a CROSS JOIN t10 AS b)
SELECT 'Product' + CAST(num AS varchar(10))
FROM t1k;

How to effectively split grouped records into batches

For each group in table I need to split that group into specific amount of records (batches) and mark each record in batch with according batch id.
Right now, my implementation based on cursors is IMHO clumsy. It takes 1 minute to split set of 10 000 rows which is, needless to say, very slow. Any clues how to make that work faster?
Here is test script.
-- Needed to generate big data
DECLARE #Naturals TABLE (ID INT)
INSERT INTO #Naturals (ID)
VALUES (1),(2),(3),(4),(5),(6),(7),(8),(9),(10)
DECLARE #TestData TABLE
(
LINK INT,
F_House INT,
F_Batch UNIQUEIDENTIFIER
)
INSERT INTO #TestData (LINK, F_House)
SELECT ROW_NUMBER() OVER (order by T1.ID), ROW_NUMBER() OVER (order by T1.ID) % 5
FROM
#Naturals T1
CROSS JOIN #Naturals T2
CROSS JOIN #Naturals T3
CROSS JOIN #Naturals T4
--CROSS JOIN #Naturals T5 -- that would give us 100 000
-- Finished preparing Data (10 000 rows)
SELECT 'Processing:', COUNT(*) FROM #TestData
DECLARE #batchSize INT -- That would be amount of rows in each batch
SET #batchSize = 50
IF OBJECT_ID('tempdb..#G') IS NOT NULL -- Split set of data into groups. We need to create batches in each group.
DROP TABLE #G
SELECT
buf.F_House, COUNT(*) AS GroupCount
INTO #G
FROM #TestData buf
GROUP BY buf.F_House -- That logic could be tricky one. Right now simplifying
DECLARE #F_House INT -- That would be group key
DECLARE db_cursor CURSOR FOR
SELECT F_House
FROM #G
ORDER BY F_House
OPEN db_cursor FETCH NEXT FROM db_cursor INTO #F_House
WHILE ##FETCH_STATUS = 0
BEGIN
PRINT 'Processing house group: ' + CAST(#F_House AS VARCHAR(10))
-- For each group let's create batches
WHILE EXISTS (SELECT 1 FROM #TestData AS itmds
WHERE itmds.F_House = #F_House
AND itmds.F_Batch IS NULL
)
BEGIN
DECLARE #batchLink UNIQUEIDENTIFIER
SET #batchLink = NEWID()
UPDATE itmds
SET itmds.F_Batch = #batchLink
FROM #TestData AS itmds
WHERE itmds.F_House = #F_House
AND itmds.F_Batch IS NULL
AND itmds.LINK IN
(
SELECT TOP (#batchSize)
sub.LINK
FROM #TestData sub
WHERE sub.F_House = #F_House
AND sub.F_Batch IS NULL
)
END
FETCH NEXT FROM db_cursor INTO #F_House
END
CLOSE db_cursor
DEALLOCATE db_cursor
SELECT
buf.F_House, COUNT(distinct F_Batch) AS BatchCountInHouse
FROM #TestData buf
GROUP BY buf.F_House
ORDER BY buf.F_House
Expected output (considering batchsize = 50)
10 000 rows / 5 houses = 2000 rows/house
2000 rows/house / 50(batchSize) = 40 batches/house
This is set based avoiding a cursor. The assigned F_Batch is a BIGINT:
;with baseRowNum as
(
SELECT LINK, F_House,
-- row number per F_House
Row_Number() Over (PARTITION BY F_House ORDER BY LINK) AS rn
FROM #TestData
)
SELECT *,
-- combine F_House & group number into a unique result
F_House * 10000 +
-- start a new sub group for every F_House or after #batchSize rows
Sum(CASE WHEN rn % #batchSize = 1 THEN 1 ELSE 0 END)
Over (ORDER BY F_House, rn
ROWS Unbounded Preceding) AS F_Batch
FROM baseRowNum
If you really need a UNIQUEINDENTIFIER you can join back:
;with baseRowNums as
(
SELECT LINK, F_House,
-- row number per F_House
Row_Number() Over (PARTITION BY F_House ORDER BY LINK) AS rn
FROM #TestData
)
,batchNums as
(
SELECT *,
-- combine F_House & group number into a unique result
F_House * 10000 +
-- start a new sub group for every F_House or after #batchSize rows
Sum(CASE WHEN rn % #batchSize = 1 THEN 1 ELSE 0 END)
Over (ORDER BY F_House, rn
ROWS Unbounded Preceding) AS F_Batch
FROM baseRowNums
)
,GUIDs as
(
select F_Batch, MAX(newid()) as GUID
from batchNums
group by F_Batch
)
-- select * from
--from batchNums join GUIDs
-- on batchNums.F_Batch = GUIDs.F_Batch
select F_House, GUID, count(*)
from batchNums join GUIDs
on batchNums.F_Batch = GUIDs.F_Batch
group by F_House, GUID
order by F_House, count(*) desc
See Fiddle.
I would use an inner looping inside of a looping referencing a grouping level.
Then you can iterate through from the grouping down into the BatchGrouping. However as you pointed out speed is an issue with table variables and CTE's for that reason I in this case tested with a tempdb # table. This way I could index after the insert and optimize performance. I can run a million rows of aggregation logic in about 16 seconds. I consider that acceptable performance. But my Dev Box is an I7 6700, with 16 gigs of DDR4, and an SSD. Performance times may vary based on hardware obviously.
--Make up some fake data for example
DECLARE
#Start INT = 1
, #End INT = 100000
;
SET NOCOUNT ON;
IF OBJECT_ID('tempdb..#Temp') IS NOT NULL
DROP TABLE tempdb..#Temp
CREATE Table #Temp (Id INT, Grp int, Val VARCHAR(8), BatchGroup int)
WHILE #Start <= #End
BEGIN
INSERT INTO #Temp (Id, Grp, Val)
VALUES (#Start, CAST(RAND() * 8 AS INT) + 1, LEFT(NEWID(), 8))
SELECT #Start += 1;
END
CREATE CLUSTERED INDEX IX_Temp_Grp ON #Temp(Grp, BatchGroup)
--Determine Batch Size You want for groupings
DECLARE #BatchSize INT = 1000;
--Let's randomly mess with groupings
DECLARE #X INT = 1
WHILE #X <= 4
BEGIN
; WITH x AS
(
SELECT TOP (#BatchSize * 4)
Id
, Grp
, Val
FROM #Temp
WHERE Grp = CAST(RAND() * 8 AS INT) + 1
)
UPDATE x
SET Grp = CAST(RAND() * 8 AS INT) + 1
SELECT #X += 1
END
DECLARE
#CurrentGroup INT = 1
, #CurrentBatch INT = 1
WHILE #CurrentGroup <= (SELECT MAX(Grp) FROM #Temp) -- Exists (SELECT 1 FROM #Temp WHERE BatchGroup IS NULL)
BEGIN
WHILE EXISTS (SELECT 1 FROM #Temp WHERE Grp = #CurrentGroup AND BatchGroup IS NULL)
BEGIN
; WITH x AS
(
SELECT TOP (#BatchSize) *
FROM #Temp
WHERE Grp = #CurrentGroup
AND BatchGroup IS NULL
)
update x
SET BatchGroup = #CurrentBatch
SELECT #CurrentBatch += 1;
END
SELECT #CurrentBatch = 1
SELECT #CurrentGroup += 1;
END
--Proof
Select
Grp
, COUNT(DISTINCT Id)
, COUNT(DISTINCT BatchGroup)
From #Temp
GROUP BY Grp
Actually, I've tried NTILE() with cursors and it's quite fast(I mean its faster then 1 minute for 10 000 rows).
10 000 rows for 0 seconds.
100 000 rows for 3 seconds.
1 000 000 rows for 34 seconds.
10 000 000 rows for 6 minutes
Linear grow in complexity which is nice.
-- Needed to generate big data
DECLARE #Naturals TABLE (ID INT)
INSERT INTO #Naturals (ID)
VALUES (1),(2),(3),(4),(5),(6),(7),(8),(9),(10)
DECLARE #TestData TABLE
(
LINK INT,
F_House INT,
F_Batch UNIQUEIDENTIFIER
)
INSERT INTO #TestData (LINK, F_House)
SELECT ROW_NUMBER() OVER (order by T1.ID), ROW_NUMBER() OVER (order by T1.ID) % 5
FROM
#Naturals T1
CROSS JOIN #Naturals T2
CROSS JOIN #Naturals T3
CROSS JOIN #Naturals T4
--CROSS JOIN #Naturals T5 -- that would give us 100 000
-- Finished preparing Data (10 000 rows)
SELECT 'Processing:', COUNT(*) FROM #TestData
DECLARE #batchSize INT -- That would be amount of rows in each batch
SET #batchSize = 50
IF OBJECT_ID('tempdb..#G') IS NOT NULL -- Split set of data into groups. We need to create batches in each group.
DROP TABLE #G
SELECT
buf.F_House, COUNT(*) AS GroupCount
INTO #G
FROM #TestData buf
GROUP BY buf.F_House -- That logic could be tricky one. Right now simplifying
DECLARE #F_House INT -- That would be group key
DECLARE db_cursor CURSOR FOR
SELECT F_House
FROM #G
ORDER BY F_House
OPEN db_cursor FETCH NEXT FROM db_cursor INTO #F_House
WHILE ##FETCH_STATUS = 0
BEGIN
PRINT 'Processing house group: ' + CAST(#F_House AS VARCHAR(10))
DECLARE #rowsInGroup INT
SELECT #rowsInGroup = COUNT(*) FROM #TestData
WHERE F_House = #F_House
IF OBJECT_ID('tempdb..#TileBatch') IS NOT NULL
DROP TABLE #TileBatch
SELECT
T.[NTile], NEWID() AS F_Batch
INTO #TileBatch
FROM
(
SELECT distinct
NTILE(#rowsInGroup / #batchSize) OVER (ORDER BY LINK) AS [NTile]
from
#TestData
WHERE F_House = #F_House
) T
UPDATE D
SET D.F_Batch = B.F_Batch
FROM
#TestData D
INNER JOIN
(
SELECT
*, NTILE(#rowsInGroup / #batchSize) OVER (ORDER BY LINK) AS [NTile]
from
#TestData
WHERE F_House = #F_House
) DT ON D.LINK = DT.LINK
INNER JOIN
#TileBatch B ON DT.[NTile] = B.[NTile]
WHERE D.F_House = #F_House
FETCH NEXT FROM db_cursor INTO #F_House
END
CLOSE db_cursor
DEALLOCATE db_cursor
SELECT
buf.F_House, COUNT(distinct F_Batch) AS BatchCountInHouse
FROM #TestData buf
GROUP BY buf.F_House
ORDER BY buf.F_House

Inserting individual values into table based on a number

Here is my problem: I have a stored procedure in SQL Server 2012 which should do the following thing.
I will pass an input parameter #Range, and the stored procedure should insert values into a table starting from 0 to #Range-1.
CREATE PROC MyExample
(#Range INT)
AS
BEGIN
// Suppose the value of #Range is 100
// So I should do INSERT into MyTable Values(0,1,2,3,4,5,6,......99)
END
Any idea how to achieve this?
You can use while loop as below:
Declare #Index AS INT=0
WHILE #Index<#Range
BEGIN
INSERT into MyTable Values(#Index)
SET #Index=#Index+1
END
I am thinking your teacher may suspect why you use cte when you just learn a loop
CREATE PROC MyExample
(
#Range INT,
)
AS
BEGIN
;WITH numbers AS
(
SELECT 0 AS Value WHERE #Range >= 0 -- Validate the #Range value too, try 0 or negative values
UNION ALL SELECT Value + 1 FROM numbers WHERE Value + 1 < #Range
)
INSERT INTO MyTable
SELECT * FROM numbers
OPTION (MAXRECURSION 0)
END
And here is a set based approach:
CREATE PROC MyExample
(
#Range INT,
)
AS
BEGIN
INSERT INTO MyTable (Number)
SELECT TOP (#Range) ROW_NUMBER() OVER(ORDER BY (SELECT NULL)) - 1
FROM sys.objects s1
CROSS JOIN sys.objects s2
END
(Based on this SO post)
CREATE PROC MyExample
(
#Range INT,
)
AS
BEGIN
declare #RANGE_COUNT int
select #RANGE_COUNT =#Range
//Suppose the value of #Range is 100
while #RANGE_COUNT<>0
begin
//So I should do INSERT into MyTable Values(0,1,2,3,4,5,6,......99)
INSERT into MyTable Values(#Range)
set #RANGE_COUNT = RANGE_COUNT -1
end
END
Using tally table technique:
DECLARE #range INT = 100
SELECT TOP(#range) -1 + ROW_NUMBER() OVER(ORDER BY (SELECT NULL)) AS rn
FROM
(VALUES(0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) t1(n) CROSS JOIN --10
(VALUES(0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) t2(n) CROSS JOIN --100
(VALUES(0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) t3(n) --1000
--...continue to cover all possible #range values

Select non-existing rows

Let say I have a table:
ColumnA ColumnB
---------------------------------
1 10.75
4 1234.30
6 2000.99
How can I write a SELECT query that will result in the following:
ColumnA ColumnB
---------------------------------
1 10.75
2 0.00
3 0.00
4 1234.30
5 0.00
6 2000.99
You can use a CTE to create a list of numbers from 1 to the maximum value in your table:
; with numbers as
(
select max(ColumnA) as nr
from YourTable
union all
select nr - 1
from numbers
where nr > 1
)
select nr.nr as ColumnA
, yt.ColumnB
from numbers nr
left join
YourTable yt
on nr.nr = yt.ColumnA
order by
nr.nr
option (maxrecursion 0)
See it working at SQL Fiddle.
Please try:
declare #min int, #max int
select #min=MIN(ColumnA), #max=MAX(ColumnA) from tbl
select
distinct number ColumnA,
isnull(b.ColumnB, 0) ColumnB
from
master.dbo.spt_values a left join tbl b on a.number=b.ColumnA
where number between #min and #max
Create a TallyTable (or NumbersTable) - see this question: What is the best way to create and populate a numbers table?
With that table create an insert statement:
INSERT INTO YourTable (ColumnA, ColumnB)
SELECT Number FROM NumberTable
WHERE
NOT EXISTS (SELECT 1 FROM YourTable WHERE NumberTable.Number = YourTable.ColumnA)
-- Adjust this value or calculate it with a query to the maximum of the source table
AND NumberTable.Number < 230130
DECLARE #t TABLE (ID INT,Val DECIMAL(10,2))
INSERT INTO #t (ID,Val) VALUES (1,10.75)
INSERT INTO #t (ID,Val) VALUES (4,6.75)
INSERT INTO #t (ID,Val) VALUES (7,4.75)
declare #MinNo int
declare #MaxNo int
declare #IncrementStep int
set #MinNo = 1
set #MaxNo = 10
set #IncrementStep = 1
;with C as
(
select #MinNo as Num
union all
select Num + #IncrementStep
from C
where Num < #MaxNo
)
select Num,
CASE WHEN Val IS NOT NULL THEN Val ELSE 0.00 END AS NUMBER
from C
LEFT JOIN #t t
ON t.ID = c.Num
You could use a number-table or following trick to generate a sequence which you can LEFT OUTER JOIN with your table. I assume you want to determine the boundaries dynamically:
WITH Seq AS
(
SELECT TOP ((SELECT Max(ColumnA)FROM Table1) - (SELECT Min(ColumnA) FROM Table1) + 1)
Num = (SELECT Min(ColumnA) FROM Table1)+ Row_number() OVER (ORDER BY [object_id]) -1
FROM sys.all_objects)
SELECT ColumnA = Seq.Num,
ColumnB = COALESCE(t.ColumnB ,0.00)
FROM Seq
LEFT OUTER JOIN Table1 t
ON Seq.Num = t.ColumnA
Demo with your sample.
Worth reading: http://www.sqlperformance.com/2013/01/t-sql-queries/generate-a-set-1
I have my collect of table functions like these.
create function dbo.GetNumbers(#Start int, #End int)
returns #Items table
(
Item int
)
as
begin
while (#Start <= #End)
begin
insert into #Items
values (#Start)
set #Start = #Start + 1
end
return
end
Then I can use it to left join to my data table and every value will be there.
declare #min int, #max int
set #min = 10
set #max = 20
select gn.Item
from dbo.GetNumbers(#min, #max) gn
I have similar table functions for date ranges, times, timezones, etc.

How to find missing id in the table

I have column looks like below
SID101
SID102
SID103
SID105
SID107
In the above criteria i need to find missed SID numbers. SID104 and SID 106 are missed while ordering.
How can i find the missed id numbers.Could any one help me finding it.
Thanks in advance.
If your table contains gaps with length more than 1 item, you can use this query:
declare #t table(s varchar(20))
insert #t values ('SID101'),('SID102'),('SID103'),('SID105'),('SID108');
with cte as
(
select substring(t.s, 4, len(t.s)) [i]
from #t t
)
select 'SID' + cast(m.number as varchar(20))
from master..spt_values m
left join cte c on c.i = m.number
where [Type] = 'P'
and m.number >= (select min(i) from cte)
and m.number <= (select max(i) from cte)
and c.i is null
Output:
-----------------------
SID104
SID106
SID107
Something like this should work:
DECLARE #i INT;
SET #i = 100;
CREATE TABLE #idsToCheck (checkId varchar(100));
WHILE (#i < 200)
BEGIN
INSERT INTO #idsToCheck VALUES ('SID' + CONVERT(varchar(100), #i));
SET #i = #i + 1;
END
SELECT * FROM #idsToCheck itc
LEFT OUTER JOIN MainTable mt ON itc.checkId = mt.realId
WHERE mt.realId = NULL
DROP TABLE #idsToCheck
... where MainTable is your table containing the SID101, SID102, etc. column values, and MainTable.realId is the column containing those IDs. Modify the #i initial value and number in the while loop condition based on which SIDs you want to check from/to.
It's difficult. With
SELECT COUNT(*),MAX(CAST(REPLACE(y.name,'SID','') AS INT)) AS col_max FROM
sys.objects x INNER JOIN sys.columns y ON x.object_id=y.object_id
WHERE x.name='<TABLE_NAME>'
you should know, how much columns are missing (i.e. COUNT(*) is 5 and col_max is 107)
When you have a table, which contains only one column with all possible IDs from 1 to max (i.e. 100,101,102,103,104,...,132) then you could do
SELECT * FROM (
SELECT CAST(REPLACE(y.name,'SID','') AS INT) AS col_id FROM
sys.objects x INNER JOIN sys.columns y ON x.object_id=y.object_id
WHERE x.name='<TABLE_NAME>'
) a
RIGHT JOIN <TABLE_IDS> b ON a.col_id=b.id
WHERE a.col_id IS NULL AND b.id<=(
SELECT MAX(CAST(REPLACE(y.name,'SID','') AS INT)) AS col_max FROM
sys.objects x INNER JOIN sys.columns y ON x.object_id=y.object_id
WHERE x.name='<TABLE_NAME>'
)
EDIT: sorry, I've seen just now, that these values aren't column names, but values. My solution will find missing column names
 Declare #St int
declare #end int
set #st = CAST( (select RIGHT( max(data),4) from orderno)as int)
set #end = CAST( (select RIGHT( min(data),4) from orderno)as int)
create table #temp(data int)
while(#St <= #end )
begin
insert into #temp values(#St)
set #St = #St +1
end
select * from orderno
select * from #temp
select data from #temp where data not in (select cast(RIGHT(data,4))
declare #t table(s varchar(20))
insert #t values ('SID101'),('SID102'),('SID103'),('SID105'),('SID107');
with cte as
(
select substring(t.s, 4, len(t.s)) [i]
from #t t
)
select 'SID' + cast(t1.i + 1 as varchar(20))
from cte t1
join cte t2 on t2.i > t1.i
and not exists(
select 1
from cte c3
where c3.i > t1.i and c3.i < t2.i
)
where t2.i <> t1.i + 1
Output:
-----------------------
SID104
SID106