create range using with cte in sql - sql

I have a table with only single column which have values like as follow
declare #numberrange table
(
num int
)
insert into #numberrange (num)
values(24) ,
(29) ,
( 34 ),
( 39 ),
( 44 ),
( 49 ),
( 54 ),
( 59 ),
( 64 ),
( 69 ),
( 74 ),
( 99 )
Now i want to show the result like as follow
24 24-29
29 30-34
34 35-39
39 40-44
44 45-49
49 50-54
54 55-59
59 60-64
64 65-69
69 70-74
74 75-99
99
i have tried with sql cte function but not able to get the desired result.

You can use this CTE query. The query builds the range from the last record so that the last record has empty range and the first one has a range.
;with cte
as
(
select top 1 num, sortorder, convert(varchar(21), null) rangevalue
from (select num, ROW_NUMBER() over (order by num) as sortorder from #numberrange) x
order by sortorder desc
union all
select x.num, x.sortorder, convert(varchar(10), x.num) + '-' + convert(varchar(10), cte.num)
from (select num, ROW_NUMBER() over (order by num) as sortorder from #numberrange) x
inner join cte
on cte.sortorder = x.sortorder + 1
)
select num, rangevalue from cte
order by num

Related

Break up running sum into maximum group size / length

I am trying to break up a running (ordered) sum into groups of a max value. When I implement the following example logic...
IF OBJECT_ID(N'tempdb..#t') IS NOT NULL DROP TABLE #t
SELECT TOP (ABS(CHECKSUM(NewId())) % 1000) ROW_NUMBER() OVER (ORDER BY name) AS ID,
LEFT(CAST(NEWID() AS NVARCHAR(100)),ABS(CHECKSUM(NewId())) % 30) AS Description
INTO #t
FROM sys.objects
DECLARE #maxGroupSize INT
SET #maxGroupSize = 100
;WITH t AS (
SELECT
*,
LEN(Description) AS DescriptionLength,
SUM(LEN(Description)) OVER (/*PARTITION BY N/A */ ORDER BY ID) AS [RunningLength],
SUM(LEN(Description)) OVER (/*PARTITION BY N/A */ ORDER BY ID)/#maxGroupSize AS GroupID
FROM #t
)
SELECT *, SUM(DescriptionLength) OVER (PARTITION BY GroupID) AS SumOfGroup
FROM t
ORDER BY GroupID, ID
I am getting groups that are larger than the maximum group size (length) of 100.
A recusive common table expression (rcte) would be one way to resolve this.
Sample data
Limited set of fixed sample data.
create table data
(
id int,
description nvarchar(20)
);
insert into data (id, description) values
( 1, 'qmlsdkjfqmsldk'),
( 2, 'mldskjf'),
( 3, 'qmsdlfkqjsdm'),
( 4, 'fmqlsdkfq'),
( 5, 'qdsfqsdfqq'),
( 6, 'mds'),
( 7, 'qmsldfkqsjdmfqlkj'),
( 8, 'qdmsl'),
( 9, 'mqlskfjqmlkd'),
(10, 'qsdqfdddffd');
Solution
For every recursion step evaluate (r.group_running_length + len(d.description) <= #group_max_length) if the previous group must be extended or a new group must be started in a case expression.
Set group target size to 40 to better fit the sample data.
declare #group_max_length int = 40;
with rcte as
(
select d.id,
d.description,
len(d.description) as description_length,
len(d.description) as running_length,
1 as group_id,
len(d.description) as group_running_length
from data d
where d.id = 1
union all
select d.id,
d.description,
len(d.description),
r.running_length + len(d.description),
case
when r.group_running_length + len(d.description) <= #group_max_length
then r.group_id
else r.group_id + 1
end,
case
when r.group_running_length + len(d.description) <= #group_max_length
then r.group_running_length + len(d.description)
else len(d.description)
end
from rcte r
join data d
on d.id = r.id + 1
)
select r.id,
r.description,
r.description_length,
r.running_length,
r.group_id,
r.group_running_length,
gs.group_sum
from rcte r
cross apply ( select max(r2.group_running_length) as group_sum
from rcte r2
where r2.group_id = r.group_id ) gs -- group sum
order by r.id;
Result
Contains both the running group length as well as the group sum for every row.
id description description_length running_length group_id group_running_length group_sum
-- ---------------- ------------------ -------------- -------- -------------------- ---------
1 qmlsdkjfqmsldk 14 14 1 14 33
2 mldskjf 7 21 1 21 33
3 qmsdlfkqjsdm 12 33 1 33 33
4 fmqlsdkfq 9 42 2 9 39
5 qdsfqsdfqq 10 52 2 19 39
6 mds 3 55 2 22 39
7 qmsldfkqsjdmfqlkj 17 72 2 39 39
8 qdmsl 5 77 3 5 28
9 mqlskfjqmlkd 12 89 3 17 28
10 qsdqfdddffd 11 100 3 28 28
Fiddle to see things in action (includes random data version).

ABC sorting T-SQL

I have a select output with this column, that has about 318 rows
skiped_times
-----------
967
967
244
153
125
116
116
116
116
116
116
98
94
89
73
73
72
66
61
60
60
60
60
60
55
48
...
The sum value of this column is about 6 100 and I want to another column sort these values into 3 groups, where for example in group A will be the first 20 % rows of counted values, in group B 30 % and in C the rest
I hope you understand my question
Use window functions:
select t.*,
(case when seqnum < num * 0.2 as 'A'
when seqnum < num * 0.5 as 'B'
else 'C'
end) as grp
from (select t.*,
rank() over (order by skiped_times desc) as seqnum,
count(*) over () as num
from t
) t;
Note: The use of rank() means that values with ties are placed in the same group. If they can split between groups, then use row_number().
EDIT:
If you want this by the sum of values -- well, first you should be clear in the question, not in a comment. You can use the same idea:
select t.*,
(case when running_sum < total * 0.2 as 'A'
when running_sum < total * 0.5 as 'B'
else 'C'
end) as grp
from (select t.*,
sum(skiped_times) over (order by skiped_times desc) as running_sum,
sum(skiped_times) over () as total
from t
) t;
Oh my friend.. You gave me a good training for this question.
Hear is a solution. I hope this work for you
Declare #Percent20 int
Declare #Percent30 int
set #Percent20 = (select count(skiped_times) from TABLE_NAME) * 0.20
select #Percent20
set #Percent30 = (select count(skiped_times) from TABLE_NAME) * 0.30
select #Percent30
select
ID,
(Case
when exists(select result.skiped_times where result.skiped_times in (select top (#Percent20) skiped_times from TABLE_NAME order by skiped_times ))
then 'A'
when Exists(select result.skiped_times where result.skiped_times in (select top (#Percent30) skiped_times from TABLE_NAME order by skiped_times ))
then 'B'
when Exists(select result.skiped_times where result.skiped_times in (select skiped_times from TABLE_NAME order by skiped_times offset (#Percent30) rows ))
then 'C'
end
) as Per
from (
select
s.skiped_times from TABLE_NAME s
) as result
order by result.skiped_times

SQL Select first missing value in a series

I was asked the following: I have a table (lets call it tbl) and it has one column of type int (let call it num) and it had sequential numbers in it:
num
---
1
2
4
5
6
8
9
11
Now you need to write a query that returns the first missing number (in this example that answer would be 3).
here's my answer (works):
select top 1 (num + 1)
from tbl
where (num + 1) not in (select num from tbl)
after writing this, I was asked, what if tbl contained 10 million records - how would you improve performence (because obviously myinner query would cause a full table scan).
My thoughts were about an index in on the num field and doing a not exists. but I would love to hear some alternatives.
In SQL Server 2012+, I would simply use lead():
select num
from (select num, lead(num) over (order by num) as next_num
from tbl
) t
where next_num <> num + 1;
However, I suspect that your version would have the best performance if you have an index on tbl(num). The not exists version is worth testing:
select top 1 (num + 1)
from tbl t
where not exists (select 1 from tbl t2 where t2.num = t.num + 1);
The only issue is getting the first number. You are not guaranteed that the table is read "in order". So, this will return one number. With an index (or better yet, clustered index) on num, the following should be fast and guaranteed to return the first number:
select top 1 (num + 1)
from tbl t
where not exists (select 1 from tbl t2 where t2.num = t.num + 1)
order by num;
Here is another approach using ROW_NUMBER:
SQL Fiddle
;WITH CteRN AS(
SELECT *,
RN = num - ROW_NUMBER() OVER(ORDER BY num)
FROM tbl
)
SELECT TOP 1 num - RN
FROM CteRN
WHERE RN > 0
ORDER BY num ASC
With a proper INDEX on num, here is the stats under a million row test harness.
Original - NOT IN : CPU time = 296 ms, elapsed time = 289 ms
wewesthemenace : CPU time = 0 ms, elapsed time = 0 ms
notulysses(NOT EXISTS): CPU time = 687 ms, elapsed time = 679 ms.
All the above answers are accurate alternatively if we can make an table like date dimension that has all sequential number starting from 1 to n.
insert into sequenceNumber
select 1 union all
select 2 union all
select 3 union all
select 4 union all
select 5 union all
select 6 union all
select 7 union all
select 8 union all
select 9 union all
select 10 union all
select 11 union all
select 12 union all
select 13
select * from sequenceNumber
RESULT:
1
2
3
4
5
6
7
8
9
10
11
12
13
Now we can use below query to find out missing number.
select a.num,b.num
from sequenceNumber a
left outer join tbl b on a.num = b.num
where b.num is null
Hope it will be useful.
You can try with not exists:
select top 1 t.num + 1
from tbl t
where not exists (select * from tbl where num = t.num + 1) order by t.num
SQLFiddle
Or use a row_number:
select top 1 t.r
from (
select num
, row_number() over (order by num) as r
from tbl) t
where t.r <> t.num
order by t.num
SQLFiddle
from my side also one example
-- test data
DECLARE #Sequence AS TABLE ( Num INT )
INSERT INTO #Sequence
( Num )
VALUES ( 1 ),
( 2 ),
( 4 ),
( 5 ),
( 6 ),
( 8 ),
( 9 ),
( 11 )
--Final query
SELECT TOP 1
S.RN AS [Missing]
FROM ( SELECT RN = ROW_NUMBER() OVER ( ORDER BY num )
FROM #Sequence
) AS S
LEFT JOIN #Sequence AS S2 ON S.RN = S2.Num
WHERE S2.Num IS NULL
ORDER BY S.RN

Get Starting and ending of a Dataset in SQL Server 2008

I need to extract the starting and ending points of a data set from a table. For Ex if data is like:
1
5
10
15
20
40
45
50
55
60
65
70
Now the 2 data sets are 1 - 20 and 40 - 70. So the Data will always be sequential and the difference between points in a single dataset will max be 7. So the resultant query should give me 3 columns:
1. 5 15
2. 45 65
i.e second and second last point in the dataset.
Is it possible to do without using a cursor of forloop. Please post a query if you can.
I tried doing is using over and partition by but no luck
If I understand you properly, this returns what you're asking for.
DECLARE #tmp TABLE
(
numVal INT PRIMARY KEY
);
INSERT #tmp
VALUES
( 1 )
,( 5 )
,( 10 )
,( 15 )
,( 20 )
,( 40 )
,( 45 )
,( 50 )
,( 55 )
,( 60 )
,( 65 )
,( 70 );
;WITH breaks AS
(
SELECT
t.numval breakMax
, ROW_NUMBER()
OVER(
ORDER BY t.numval
) breakGroup
FROM
#tmp t
WHERE
NOT EXISTS
(
SELECT
NULL
FROM
#tmp t1
WHERE
t1.numVal > t.numVal
AND
t1.numVal <= t.numVal + 7
)
)
SELECT
v.breakGroup
, MIN(v.numval) secondNum
, MAX(v.numVal) secondLastNum
FROM
(
SELECT
t.numVal
, br.breakGroup
, ROW_NUMBER()
OVER(
PARTITION BY
br.breakGroup
ORDER BY
t.numval
) ar
, ROW_NUMBER()
OVER(
PARTITION BY
br.breakGroup
ORDER BY
t.numval DESC
) dr
FROM
#tmp t
CROSS APPLY
(
SELECT
TOP 1
breakGroup
FROM
breaks b
WHERE
b.breakMax >= t.numVal
ORDER BY
b.breakGroup
) br
) v
WHERE
v.ar = 2
OR
v.dr = 2
GROUP BY
v.breakGroup

SQL Server query for finding the sum of 4 consecutive values

Can somebody help me in finding the sum of 4 consecutive values i.e rolling sum of last 4 values.
Like:
VALUE SUM
1 NULL
2 NULL
3 NULL
4 10
5 14
6 18
7 22
8 26
9 30
10 34
11 38
12 42
13 46
14 50
15 54
16 58
17 62
18 66
19 70
20 74
21 78
22 82
23 86
24 90
25 94
26 98
27 102
28 106
29 110
30 114
31 118
32 122
33 126
34 130
35 134
36 138
37 142
38 146
Thanks,
select sum(select top 4 Value from [table] order by Value Desc)
or, perhaps
select sum(value)
from [Table]
where Value >= (Max(Value) - 4)
I haven't actually tried either of those- and can't at the moment, but they should get you pretty close.
Quick attempt, which gets the results you've posted in your question (except the 1st 3 rows are not NULL). Assumes that VALUE field is unique and in ascending order:
-- Create test TABLE with 38 values in
DECLARE #T TABLE (Value INTEGER)
DECLARE #Counter INTEGER
SET #Counter = 1
WHILE (#Counter <= 38)
BEGIN
INSERT #T VALUES(#Counter)
SET #Counter = #Counter + 1
END
-- This gives the results
SELECT t1.VALUE, x.Val
FROM #T t1
OUTER APPLY(SELECT SUM(VALUE) FROM (SELECT TOP 4 VALUE FROM #T t2 WHERE t2.VALUE <= t1.VALUE ORDER BY t2.VALUE DESC) x) AS x(Val)
ORDER BY VALUE
At the very least, you should see the kind of direction I was heading in.
Assuming ID can give you the last 4 rows.
SELECT SUM([SUM])
FROM
(
SELECT TOP 4 [SUM] FROM myTable ORDER BY ID DESC
) foo
Each time you query it, it will read the last 4 rows.
If this is wrong (e.g. you want the sum of each consecutive 4 rows), then please give sample output
Following would work if your Value column is sequential
;WITH q (Value) AS (
SELECT 1
UNION ALL
SELECT q.Value + 1
FROM q
WHERE q.Value < 38
)
SELECT q.Value
, CASE WHEN q.Value >= 4 THEN q.Value * 4 - 6 ELSE NULL END
FROM q
otherwise you might use something like this
;WITH q (Value) AS (
SELECT 1
UNION ALL
SELECT q.Value + 1
FROM q
WHERE q.Value < 38
)
, Sequential (ID, Value) AS (
SELECT ID = ROW_NUMBER() OVER (ORDER BY Value)
, Value
FROM q
)
SELECT s1.Value
, [SUM] = s1.Value + s2.Value + s3.Value + s4.Value
FROM Sequential s1
LEFT OUTER JOIN Sequential s2 ON s2.ID = s1.ID - 1
LEFT OUTER JOIN Sequential s3 ON s3.ID = s2.ID - 1
LEFT OUTER JOIN Sequential s4 ON s4.ID = s3.ID - 1
Note that the table qin the examples is a stub for your actual table. The actual statement then becomes
;WITH Sequential (ID, Value) AS (
SELECT ID = ROW_NUMBER() OVER (ORDER BY Value)
, Value
FROM YourTable
)
SELECT s1.Value
, [SUM] = s1.Value + s2.Value + s3.Value + s4.Value
FROM Sequential s1
LEFT OUTER JOIN Sequential s2 ON s2.ID = s1.ID - 1
LEFT OUTER JOIN Sequential s3 ON s3.ID = s2.ID - 1
LEFT OUTER JOIN Sequential s4 ON s4.ID = s3.ID - 1