Get Interval Percentage - sql

Please consider this Table:
FileName FileSize
----------------------------
1 33
2 198
3 10
4 127
5 85
6 23
7 105
8 158
9 78
10 90
and I want to create such this result:
FileSize Percentage
--------------------------------
1-50 30%
50-150 50%
150-200 20%
How I can group by and create percentage based on intervals?
Thanks

For a query without bounds table you can create a table inline using VALUES constructor like below
select DISTINCT
CAST(I.LowNumber as VARCHAR) + ' - '+ CAST(I.HighNumber as VARCHAR) as FileSize,
COUNT(*) OVER (PARTITION BY lowNumber,HighNumber ORDER By lowNumber) * 100.00 /COUNT(*) OVER ( ORDER BY (SELECT 1)) as percentage
from TblFile F
join (values (1, 50),(50, 150),(150, 200)) as I(LowNumber, highNumber)
on F.FileSize >=I.LowNumber and F.FileSize<I.HighNumber
Order By I.LowNumber
Your query should look like
select DISTINCT
CAST(I.LowNumber as VARCHAR) + ' - '+ CAST(I.HighNumber as VARCHAR) as FileSize,
COUNT(*) OVER (PARTITION BY lowNumber,HighNumber ORDER By lowNumber) * 100.00 /COUNT(*) OVER ( ORDER BY (SELECT 1)) as percentage
from TblFile F
join TblInterval I
on F.FileSize >=I.LowNumber and F.FileSize<I.HighNumber
Explanation:
Ideally you should leverage set based approach and store the range values in a table. This allows for faster processing and also allows you a single place to limits externally and not in the procedure. This is also in line with Dependency injection principle.
For inline anonymous tables use VALUES constructor. More on this at this msdn link
PS: Insert scripts for table
--create table tblInterval (LowNumber Int, HighNumber Int)
--insert into tblInterval values
--(1,50),(50,150),(150,200)
create table tblFile (fileName int,fileSize int)
insert into tblFile values
( 1 ,33)
,( 2 ,198 )
,( 3 ,10 )
,( 4 ,127 )
,( 5 ,85 )
,( 6 ,23 )
,( 7 ,105 )
,( 8 ,158 )
,( 9 ,78 )
,( 10,90 )
Assuming you have a table like below
TblInterval
LowNumber HighNumber
1 50
50 150
150 200

You can use case statement to make the file size range then get percentage by the count like this
select a.range as FileSize, (Count(*)* 100 / (Select Count(*) From MyTable)) as Percentage
from (
select case
when FileSize between 1 and 50 then '1-50'
when FileSize between 50 and 150 then '50-150'
when FileSize between 150 and 200 then '150-200' end as range
from MyTable) a
group by a.range

CREATE TABLE #A
(
FILENAME INT, FILESIZE INT
)
INSERT INTO #A VALUES
(1,33),
(2,198),
(3,10),
(4,127),
(5,85),
(6,23),
(7,105),
(8,158),
(9,78),
(10,90)
SELECT RANGE,COUNT(*)*100/(SELECT COUNT(*) FROM #A) AS PERCENTAGE
FROM (
SELECT *,CASE
WHEN FILESIZE BETWEEN 1 AND 50 THEN '1-50'
WHEN FILESIZE BETWEEN 50 AND 150 THEN '50-150'
WHEN FILESIZE BETWEEN 150 AND 200 THEN '150-200' END AS RANGE
FROM #A) A
GROUP BY A.RANGE
ORDER BY CASE WHEN RANGE = '1-50' THEN 1
WHEN RANGE ='50-150' THEN 2
WHEN RANGE ='150-200' THEN 3
END
output
RANGE PERCENTAGE
1-50 30
50-150 50
150-200 20

You can use AVG together with partition by:
select distinct
case
when FileSize between 1 and 50 then '1-50'
when FileSize between 50 and 150 then '50-150'
when FileSize between 150 and 200 then '150-200' end as range
, avg(FileSize) OVER (PARTITION BY (select
case when filesize between 0 and 50 then 1
when filesize between 50 and 150 then 2
when filesize between 150 and 200 then 3
end)) as percentage
from mytable

Related

Break up running sum into maximum group size / length

I am trying to break up a running (ordered) sum into groups of a max value. When I implement the following example logic...
IF OBJECT_ID(N'tempdb..#t') IS NOT NULL DROP TABLE #t
SELECT TOP (ABS(CHECKSUM(NewId())) % 1000) ROW_NUMBER() OVER (ORDER BY name) AS ID,
LEFT(CAST(NEWID() AS NVARCHAR(100)),ABS(CHECKSUM(NewId())) % 30) AS Description
INTO #t
FROM sys.objects
DECLARE #maxGroupSize INT
SET #maxGroupSize = 100
;WITH t AS (
SELECT
*,
LEN(Description) AS DescriptionLength,
SUM(LEN(Description)) OVER (/*PARTITION BY N/A */ ORDER BY ID) AS [RunningLength],
SUM(LEN(Description)) OVER (/*PARTITION BY N/A */ ORDER BY ID)/#maxGroupSize AS GroupID
FROM #t
)
SELECT *, SUM(DescriptionLength) OVER (PARTITION BY GroupID) AS SumOfGroup
FROM t
ORDER BY GroupID, ID
I am getting groups that are larger than the maximum group size (length) of 100.
A recusive common table expression (rcte) would be one way to resolve this.
Sample data
Limited set of fixed sample data.
create table data
(
id int,
description nvarchar(20)
);
insert into data (id, description) values
( 1, 'qmlsdkjfqmsldk'),
( 2, 'mldskjf'),
( 3, 'qmsdlfkqjsdm'),
( 4, 'fmqlsdkfq'),
( 5, 'qdsfqsdfqq'),
( 6, 'mds'),
( 7, 'qmsldfkqsjdmfqlkj'),
( 8, 'qdmsl'),
( 9, 'mqlskfjqmlkd'),
(10, 'qsdqfdddffd');
Solution
For every recursion step evaluate (r.group_running_length + len(d.description) <= #group_max_length) if the previous group must be extended or a new group must be started in a case expression.
Set group target size to 40 to better fit the sample data.
declare #group_max_length int = 40;
with rcte as
(
select d.id,
d.description,
len(d.description) as description_length,
len(d.description) as running_length,
1 as group_id,
len(d.description) as group_running_length
from data d
where d.id = 1
union all
select d.id,
d.description,
len(d.description),
r.running_length + len(d.description),
case
when r.group_running_length + len(d.description) <= #group_max_length
then r.group_id
else r.group_id + 1
end,
case
when r.group_running_length + len(d.description) <= #group_max_length
then r.group_running_length + len(d.description)
else len(d.description)
end
from rcte r
join data d
on d.id = r.id + 1
)
select r.id,
r.description,
r.description_length,
r.running_length,
r.group_id,
r.group_running_length,
gs.group_sum
from rcte r
cross apply ( select max(r2.group_running_length) as group_sum
from rcte r2
where r2.group_id = r.group_id ) gs -- group sum
order by r.id;
Result
Contains both the running group length as well as the group sum for every row.
id description description_length running_length group_id group_running_length group_sum
-- ---------------- ------------------ -------------- -------- -------------------- ---------
1 qmlsdkjfqmsldk 14 14 1 14 33
2 mldskjf 7 21 1 21 33
3 qmsdlfkqjsdm 12 33 1 33 33
4 fmqlsdkfq 9 42 2 9 39
5 qdsfqsdfqq 10 52 2 19 39
6 mds 3 55 2 22 39
7 qmsldfkqsjdmfqlkj 17 72 2 39 39
8 qdmsl 5 77 3 5 28
9 mqlskfjqmlkd 12 89 3 17 28
10 qsdqfdddffd 11 100 3 28 28
Fiddle to see things in action (includes random data version).

Running Total added to Next partition

declare #Temp table
(
Grp int,
Bal float,
[Value] float
)
declare #Amt float =1000;
Insert into #Temp(Grp,[Value])
Values(1,10),(1,5),(1,15)
,(2,20),(2,5),(2,15)
,(3,50),(3,50)
select Grp,#Amt as Amount,Value,Bal from #Temp
Required Output:
Grp Amount Value Bal
1 1000 10 1000
1 1000 5 1000
1 1000 15 1000
2 1000 20 1030 ---(10+5+15)
2 1000 5 1030
2 1000 15 1030
3 1000 50 1070 ---- (20+5+15)
3 1000 50 1070
Balance calculated based on running total of 'Value' of Group1 added to Group2 and running total of group 2 added to balance of Group3 and soon
I know how to calculate the running total but I can't as sums are added to next partition.
Please help to get required result efficiently. I am using SQL Server 2017
One method is outer apply:
select t.*, t.amount + coalesce(t2.value, 0)
from #temp t outer apply
(select sum(t2.value) as value
from #temp t2
where t2.grp < t.grp
) t2;
It is possibly more efficient to use aggregation and a running sum:
select t.*,
(1000 + tt.running_value)
from #temp t join
(select t.grp, sum(value) as value,
sum(sum(value)) over (order by grp) - sum(value) as running_value
from #temp t
group by grp
) tt
on t.grp = tt.grp;
Unfortunately, SQL Server doesn't fully support range window frames, so I don't think there is a convenient way to do this only with window functions. But the group by will probably have much better performance.

SQL Running Total Grouped By Limit

I am trying to determine how to group records together based the cumulative total of the Qty column so that the group size doesn't exceed 50. The desired group is given in the group column with sample data below.
Is there a way to accomplish this in SQL (specifically SQL Server 2012)?
Thank you for any assistance.
ID Qty Group
1 10 1
2 20 1
3 30 2 <- 60 greater than 50 so new group
4 40 3
5 2 3
6 3 3
7 10 4
8 25 4
9 15 4
10 5 5
You can use CTE to achieve the goal.
If one of the item exceeds Qty 50, a group still assign for it
DECLARE #Data TABLE (ID int identity(1,1) primary key, Qty int)
INSERT #Data VALUES (10), (20), (30), (40), (2), (3), (10), (25), (15), (5)
;WITH cte AS
(
SELECT ID, Qty, 1 AS [Group], Qty AS RunningTotal FROM #Data WHERE ID = 1
UNION ALL
SELECT data.ID, data.Qty,
-- The group limits to 50 Qty
CASE WHEN cte.RunningTotal + data.Qty > 50 THEN cte.[Group] + 1 ELSE cte.[Group] END,
-- Reset the running total for each new group
data.Qty + CASE WHEN cte.RunningTotal + data.Qty > 50 THEN 0 ELSE cte.RunningTotal END
FROM #Data data INNER JOIN cte ON data.ID = cte.ID + 1
)
SELECT ID, Qty, [Group] FROM cte
The following query gives you most of what you want. One more self-join of the result would compute the group sizes:
select a.ID, G, sum(b.Qty) as Total
from (
select max(ID) as ID, G
from (
select a.ID, sum(b.Qty) / 50 as G
from T as a join T as b
where a.ID >= b.ID
group by a.ID
) as A
group by G
) as a join T as b
where a.ID >= b.ID
group by a.ID
ID G Total
---------- ---------- ----------
2 0 30
3 1 60
8 2 140
10 3 160
The two important tricks:
Use a self-join with an inequality to get running totals
Use integer division to calculate group numbers.
I discuss this and other techniques on my canonical SQL page.
You need to create a stored procedure for this.
If you have Group column in your database then you have to take care about it while inserting a new record by fetching the max Group value and its sum of Qty column otherwise if you want Group column as computed in select statement then you have to code stored procedure accordingly.

grouping results based on time diff in sql

I have results like this
TimeDiffMin | OrdersCount
10 | 2
12 | 5
09 | 6
20 | 15
27 | 11
I would like the following
TimeDiffMin | OrdersCount
05 | 0
10 | 8
15 | 5
20 | 15
25 | 0
30 | 11
So you can see that i want the grouping of every 5 minutes and show the total order count in those 5 minutes. eg. 0-5 minutes 0 orders, 5-10 minutes 8 orders
any help would be appreciated.
current query:
SELECT TimeDifferenceInMinutes, count(OrderId) NumberOfOrders FROM (
SELECT AO.OrderID, AO.OrderDate, AON.CreatedDate AS CancelledDate, DATEDIFF(minute, AO.OrderDate, AON.CreatedDate) AS TimeDifferenceInMinutes
FROM
(SELECT OrderID, OrderDate FROM AC_Orders) AO
JOIN
(SELECT OrderID, CreatedDate FROM AC_OrderNotes WHERE Comment LIKE '%has been cancelled.') AON
ON AO.OrderID = AON.OrderID
WHERE DATEDIFF(minute, AO.OrderDate, AON.CreatedDate) <= 100 AND AO.OrderDate >= '2016-12-01'
) AS Temp1
GROUP BY TimeDifferenceInMinutes
Now, if you are open to a TVF.
I use this UDF to create dynamic Date/Time Ranges. You supply the range and increment
Declare #YourTable table (TimeDiffMin int,OrdersCount int)
Insert Into #YourTable values
(10, 2),
(12, 5),
(09, 6),
(20,15),
(27,11)
Select TimeDiffMin = cast(R2 as int)
,OrdersCount = isnull(sum(OrdersCount),0)
From (Select R1=RetVal,R2=RetVal+5 From [dbo].[udf-Range-Number](0,25,5)) A
Left Join (
-- Your Complicated Query
Select * From #YourTable
) B on TimeDiffMin >= R1 and TimeDiffMin<R2
Group By R1,R2
Order By 1
Returns
TimeDiffMin OrdersCount
5 0
10 6
15 7
20 0
25 15
30 11
The UDF if interested
CREATE FUNCTION [dbo].[udf-Range-Number] (#R1 money,#R2 money,#Incr money)
Returns Table
Return (
with cte0(M) As (Select cast((#R2-#R1)/#Incr as int)),
cte1(N) As (Select 1 From (Values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) N(N)),
cte2(N) As (Select Top (Select M from cte0) Row_Number() over (Order By (Select NULL)) From cte1 a,cte1 b,cte1 c,cte1 d,cte1 e,cte1 f,cte1 g,cte1 h )
Select RetSeq=1,RetVal=#R1 Union All Select N+1,(N*#Incr)+#R1
From cte2
)
-- Max 100 million observations
-- Select * from [dbo].[udf-Range-Number](0,4,0.25)
You can do this using a derived table to first build up your time difference windows and then joining from that to sum up all the Orders that fall within that window.
declare #t table(TimeDiffMin int
,OrdersCount int
);
insert into #t values
(10, 2)
,(12, 5)
,(09, 6)
,(20,15)
,(27,11);
declare #Increment int = 5; -- Set your desired time windows here.
with n(n)
as
( -- Select 10 rows to start with:
select n from(values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) as n(n)
),n2 as
( -- CROSS APPLY these 10 rows to get 10*10=100 rows we can use to generate incrementing ROW_NUMBERs. Use more CROSS APPLYs to get more rows:
select (row_number() over (order by (select 1))-1) * #Increment as StartMin
,(row_number() over (order by (select 1))) * #Increment as EndMin
from n -- 10 rows
cross apply n n2 -- 100 rows
--cross apply n n3 -- 1000 rows
--cross apply n n4 -- 10000 rows
)
select m.EndMin as TimeDiffMin
,isnull(sum(t.OrdersCount),0) as OrdersCount
from n2 as m
left join #t t
on(t.TimeDiffMin >= m.StartMin
and t.TimeDiffMin < m.EndMin
)
where m.EndMin <= 30 -- Filter as required
group by m.EndMin
order by m.EndMin
Query result:
TimeDiffMin OrdersCount
5 0
10 6
15 7
20 0
25 15
30 11

How to take column value count

Using SQL Server 2000
Table1
Column1
20
30
40
20
40
30
30
I want take a count like this
20 - 2
30 - 3
40 - 2
In case if the column value 20 or 30 or 40 is not available, it should display 20 - 0 or 30 - 0 or 40 - 0.
For example
Column1
20
30
20
30
30
Expected output
20 - 2
30 - 3
40 - 0
I will get only 20, 30. 40. No more value will come.
How to make a query
Need help
select item,count (item) from table group by item
EDIT : ( after your edit)
CREATE TABLE #table1 ( numbers int )
insert into #table1 (numbers) select 20
insert into #table1 (numbers) select 30
insert into #table1 (numbers) select 40
SELECT [num]
FROM [DavidCard].[dbo].[sssssss]
select numbers,count (num) from #table1 LEFT JOIN [sssssss] ON #table1.numbers = [sssssss].num group by numbers
SQL Query 101:
SELECT Column1, COUNT(*)
FROM dbo.YourTable
GROUP BY Column1
ORDER BY Column1
Update: if you want to get a list of possible values, and their potential count (or 0) in another table, you need two tables, basically - one with all the possible values, one with the actual values - and a LEFT OUTER JOIN to put them together - something like:
SELECT
p.Column1, ISNULL(COUNT(t.Column1), 0)
FROM
(SELECT 20 AS 'Column1'
UNION
SELECT 30
UNION
SELECT 40) AS p
LEFT OUTER JOIN
dbo.YourTable t ON t.Column1 = p.Column1
GROUP BY
p.Column1
ORDER BY
p.Column1