SQL Server : data between specific range - sql

I have a data which is something like this
stories value
--------------------------
0 2194940472.78964
1 1651820586.1447
2 627935051.75
3 586994698.4272
4 89132137.57
5 134608008
6 40759564
7 0
8 0
10 0
11 0
12 0
13 26060602
17 0
18 0
19 84522335
20 316478066.045
24 0
I want to sum it up as per the range
Output which I am expected
stories value
0-3 125201021
4-7 215453123
8-12 453121545
12-max(numstories) 21354322
I tried this but not able to figure it out what is wrong
select t.NumStories, SUM(t.bldnvalue)
from
(select
a.NumStories,
case
when a.NumStories between 0 and 3 then sum(a.BldgValue)
when a.NumStories between 4 and 7 then sum(a.BldgValue)
when a.NumStories between 8 and 12 then sum(a.BldgValue)
when a.NumStories between 13 and max(a.NumStories) then sum(a.BldgValue)
end as bldnvalue
from
dbo.EDM_CocaCola_Coca_Cola_Company_1_1 a
group by
a.NumStories) t
group by
t.NumStories
With this query I am getting this output
NumStories value
-------------------------------
0 2194940472.78964
3 586994698.4272
12 0
6 40759564
7 0
1 1651820586.1447
24 0
18 0
10 0
4 89132137.57
19 84522335
13 26060602
5 134608008
2 627935051.75
17 0
11 0
20 316478066.045
8 0

I like this result, I tried to use the BIN concept. I think the only issue would be with your max bin. I don't understand how you got your output sums. the first records value is '2,194,940,472.78964' which is bigger than your value in 0-3 bin
if OBJECT_ID('tempdb..#Test') is not null
drop table #Test;
Create table #Test (
Stories int
, Value float
)
insert into #Test
values
(0 , 2194940472.78964)
, (1 , 1651820586.1447 )
, (2 , 627935051.75 )
, (3 , 586994698.4272 )
, (4 , 89132137.57 )
, (5 , 134608008 )
, (6 , 40759564 )
, (7 , 0 )
, (8 , 0 )
, (10, 0 )
, (11, 0 )
, (12, 0 )
, (13, 26060602 )
, (17, 0 )
, (18, 0 )
, (19, 84522335 )
, (20, 316478066.045 )
, (24, 0 )
if OBJECT_ID('tempdb..#Bins') is not null
drop table #Bins;
create Table #Bins(
Label varchar(20)
, Min int
, Max int
)
insert into #Bins values
('0-3', 0, 3)
, ('4-7', 4, 7)
, ('8-12', 8, 12)
, ('13 - Max', 13, 999999999)
Select b.Label
, sum(t.Value) as Value
from #Test t
join #Bins b
on t.stories between b.Min and b.Max
Group by b.Label
order by 1
Output:
Label Value
-------------------- ----------------------
0-3 5061690809.11154
13 - Max 427061003.045
4-7 264499709.57
8-12 0

Just build the grouping string first that you want and group by that variable.
select
case
when a.NumStories between 0 and 3 then '0-3'
when a.NumStories between 4 and 7 then '4-7'
when a.NumStories between 8 and 12 then '8-12'
when a.NumStories >= 13 then '13-max'
end as stories,
sum(a.BldgValue) as value
from
dbo.EDM_CocaCola_Coca_Cola_Company_1_1 a
group by 1;
If you really want to print the max too, then you can put in a subquery in the "13-max" line as (SELECT MAX(BldgValue) FROM dbo.EDM_CocaCola_Coca_Cola_Company_1_1)

You can try this:
SELECT '0-3' AS stories,
SUM(value) AS value
FROM dbo.EDM_CocaCola_Coca_Cola_Company_1_1
WHERE stories BETWEEN 0 AND 3
UNION ALL
SELECT '4-7' AS stories,
SUM(value) AS value
FROM dbo.EDM_CocaCola_Coca_Cola_Company_1_1
WHERE stories BETWEEN 4 AND 7
UNION ALL
...

Here is solution with CTE that should work for any data set, without copying the code.
declare #YourTable table(stories int, value money)
declare #GroupMemberCount int=4
insert #YourTable (stories,value) values (0,5),(1,10),(2,11),(3,7),(4,18),(5,13),(7,15)
;with cte as
(
select c.stories+v.i*#GroupMemberCount FirstGroupMember, c.stories+v.i*#GroupMemberCount+#GroupMemberCount -1 LastGroupMember
,CAST(c.stories+v.i*#GroupMemberCount as varchar(50))
+'-'+CAST(c.stories+v.i*#GroupMemberCount+#GroupMemberCount -1 as varchar(50))GroupName
from (select MIN(stories) stories from #YourTable) c
cross join (values (0),(1),(2),(3),(4)/* and so on */) v(i)
where exists (select * from #YourTable yt where yt.stories>=c.stories+v.i*3)
)
select c.GroupName, SUM(yt.value)
from cte c
JOIN #YourTable yt ON yt.stories BETWEEN c.FirstGroupMember AND C.LastGroupMember
GROUP BY c.GroupName

Related

Break up running sum into maximum group size / length

I am trying to break up a running (ordered) sum into groups of a max value. When I implement the following example logic...
IF OBJECT_ID(N'tempdb..#t') IS NOT NULL DROP TABLE #t
SELECT TOP (ABS(CHECKSUM(NewId())) % 1000) ROW_NUMBER() OVER (ORDER BY name) AS ID,
LEFT(CAST(NEWID() AS NVARCHAR(100)),ABS(CHECKSUM(NewId())) % 30) AS Description
INTO #t
FROM sys.objects
DECLARE #maxGroupSize INT
SET #maxGroupSize = 100
;WITH t AS (
SELECT
*,
LEN(Description) AS DescriptionLength,
SUM(LEN(Description)) OVER (/*PARTITION BY N/A */ ORDER BY ID) AS [RunningLength],
SUM(LEN(Description)) OVER (/*PARTITION BY N/A */ ORDER BY ID)/#maxGroupSize AS GroupID
FROM #t
)
SELECT *, SUM(DescriptionLength) OVER (PARTITION BY GroupID) AS SumOfGroup
FROM t
ORDER BY GroupID, ID
I am getting groups that are larger than the maximum group size (length) of 100.
A recusive common table expression (rcte) would be one way to resolve this.
Sample data
Limited set of fixed sample data.
create table data
(
id int,
description nvarchar(20)
);
insert into data (id, description) values
( 1, 'qmlsdkjfqmsldk'),
( 2, 'mldskjf'),
( 3, 'qmsdlfkqjsdm'),
( 4, 'fmqlsdkfq'),
( 5, 'qdsfqsdfqq'),
( 6, 'mds'),
( 7, 'qmsldfkqsjdmfqlkj'),
( 8, 'qdmsl'),
( 9, 'mqlskfjqmlkd'),
(10, 'qsdqfdddffd');
Solution
For every recursion step evaluate (r.group_running_length + len(d.description) <= #group_max_length) if the previous group must be extended or a new group must be started in a case expression.
Set group target size to 40 to better fit the sample data.
declare #group_max_length int = 40;
with rcte as
(
select d.id,
d.description,
len(d.description) as description_length,
len(d.description) as running_length,
1 as group_id,
len(d.description) as group_running_length
from data d
where d.id = 1
union all
select d.id,
d.description,
len(d.description),
r.running_length + len(d.description),
case
when r.group_running_length + len(d.description) <= #group_max_length
then r.group_id
else r.group_id + 1
end,
case
when r.group_running_length + len(d.description) <= #group_max_length
then r.group_running_length + len(d.description)
else len(d.description)
end
from rcte r
join data d
on d.id = r.id + 1
)
select r.id,
r.description,
r.description_length,
r.running_length,
r.group_id,
r.group_running_length,
gs.group_sum
from rcte r
cross apply ( select max(r2.group_running_length) as group_sum
from rcte r2
where r2.group_id = r.group_id ) gs -- group sum
order by r.id;
Result
Contains both the running group length as well as the group sum for every row.
id description description_length running_length group_id group_running_length group_sum
-- ---------------- ------------------ -------------- -------- -------------------- ---------
1 qmlsdkjfqmsldk 14 14 1 14 33
2 mldskjf 7 21 1 21 33
3 qmsdlfkqjsdm 12 33 1 33 33
4 fmqlsdkfq 9 42 2 9 39
5 qdsfqsdfqq 10 52 2 19 39
6 mds 3 55 2 22 39
7 qmsldfkqsjdmfqlkj 17 72 2 39 39
8 qdmsl 5 77 3 5 28
9 mqlskfjqmlkd 12 89 3 17 28
10 qsdqfdddffd 11 100 3 28 28
Fiddle to see things in action (includes random data version).

How to get conditional SUM?

I am trying to get a conditional sum based on another column. For example, suppose I have this dataset:
ID Date Type Total
-----------------------
5 12/16/2019 0 7
5 12/16/2019 1 0
5 12/17/2019 0 7
5 12/17/2019 1 7
5 12/18/2019 0 7
5 12/18/2019 1 0
5 12/19/2019 0 7
5 12/19/2019 1 7
5 12/20/2019 0 7
5 12/20/2019 1 7
5 12/23/2019 0 7
5 12/24/2019 0 7
5 12/25/2019 0 7
5 12/26/2019 0 7
5 12/27/2019 0 7
If there is a type of 1 then I only want that data for that data, else if there is only 0 then I want that data for that date.
So for 12/16/2019 I would want the value 0. For 12/23/2019 - 12/27/2019 I would want the value 7.
You can use row_number() :
select t.*
from (select t.*, row_number() over (partition by id, date order by type desc) as seq
from table t
) t
where seq = 1;
A simple ROW_NUMBER can handle this quite easily. I changed some of the column names because reserved words are just painful to work with.
declare #Something table
(
ID int
, SomeDate Date
, MyType int
, Total int
)
insert #Something values
(5, '12/16/2019', 0, 7)
, (5, '12/16/2019', 1, 0)
, (5, '12/17/2019', 0, 7)
, (5, '12/17/2019', 1, 7)
, (5, '12/18/2019', 0, 7)
, (5, '12/18/2019', 1, 0)
, (5, '12/19/2019', 0, 7)
, (5, '12/19/2019', 1, 7)
, (5, '12/20/2019', 0, 7)
, (5, '12/20/2019', 1, 7)
, (5, '12/23/2019', 0, 7)
, (5, '12/24/2019', 0, 7)
, (5, '12/25/2019', 0, 7)
, (5, '12/26/2019', 0, 7)
, (5, '12/27/2019', 0, 7)
select ID
, SomeDate
, MyType
, Total
from
(
select *
, RowNum = ROW_NUMBER()over(partition by SomeDate order by MyType)
from #Something
) x
where x.RowNum = 1
You can do this with simple aggregation . . . well, and case:
select id, date, max(type),
coalesce(max(case when type = 1 then total end),
max(total)
) as total
from t
group by id, date;
This formulation is assuming that you have only types 0 and 1 and at most one of each type on each day for a given id.

skip consecutive rows after specific value

Note: I have a working query, but am looking for optimisations to use it on large tables.
Suppose I have a table like this:
id session_id value
1 5 7
2 5 1
3 5 1
4 5 12
5 5 1
6 5 1
7 5 1
8 6 7
9 6 1
10 6 3
11 6 1
12 7 7
13 8 1
14 8 2
15 8 3
I want the id's of all rows with value 1 with one exception:
skip groups with value 1 that directly follow a value 7 within the same session_id.
Basically I would look for groups of value 1 that directly follow a value 7, limited by the session_id, and ignore those groups. I then show all the remaining value 1 rows.
The desired output showing the id's:
5
6
7
11
13
I took some inspiration from this post and ended up with this code:
declare #req_data table (
id int primary key identity,
session_id int,
value int
)
insert into #req_data(session_id, value) values (5, 7)
insert into #req_data(session_id, value) values (5, 1) -- preceded by value 7 in same session, should be ignored
insert into #req_data(session_id, value) values (5, 1) -- ignore this one too
insert into #req_data(session_id, value) values (5, 12)
insert into #req_data(session_id, value) values (5, 1) -- preceded by value != 7, show this
insert into #req_data(session_id, value) values (5, 1) -- show this too
insert into #req_data(session_id, value) values (5, 1) -- show this too
insert into #req_data(session_id, value) values (6, 7)
insert into #req_data(session_id, value) values (6, 1) -- preceded by value 7 in same session, should be ignored
insert into #req_data(session_id, value) values (6, 3)
insert into #req_data(session_id, value) values (6, 1) -- preceded by value != 7, show this
insert into #req_data(session_id, value) values (7, 7)
insert into #req_data(session_id, value) values (8, 1) -- new session_id, show this
insert into #req_data(session_id, value) values (8, 2)
insert into #req_data(session_id, value) values (8, 3)
select id
from (
select session_id, id, max(skip) over (partition by grp) as 'skip'
from (
select tWithGroups.*,
( row_number() over (partition by session_id order by id) - row_number() over (partition by value order by id) ) as grp
from (
select session_id, id, value,
case
when lag(value) over (partition by session_id order by session_id) = 7
then 1
else 0
end as 'skip'
from #req_data
) as tWithGroups
) as tWithSkipField
where tWithSkipField.value = 1
) as tYetAnotherOutput
where skip != 1
order by id
This gives the desired result, but with 4 select blocks I think it's way too inefficient to use on large tables.
Is there a cleaner, faster way to do this?
The following should work well for this.
WITH
cte_ControlValue AS (
SELECT
rd.id, rd.session_id, rd.value,
ControlValue = ISNULL(CAST(SUBSTRING(MAX(bv.BinVal) OVER (PARTITION BY rd.session_id ORDER BY rd.id), 5, 4) AS INT), 999)
FROM
#req_data rd
CROSS APPLY ( VALUES (CAST(rd.id AS BINARY(4)) + CAST(NULLIF(rd.value, 1) AS BINARY(4))) ) bv (BinVal)
)
SELECT
cv.id, cv.session_id, cv.value
FROM
cte_ControlValue cv
WHERE
cv.value = 1
AND cv.ControlValue <> 7;
Results...
id session_id value
----------- ----------- -----------
5 5 1
6 5 1
7 5 1
11 6 1
13 8 1
Edit: How and why it works...
The basic premise is taken from Itzik Ben-Gan's "The Last non NULL Puzzle".
Essentially, we are relying 2 different behaviors that most people don't usually think about...
1) NULL + anything = NULL.
2) You can CAST or CONVERT an INT into a fixed length BINARY data type and it will continue to sort as an INT (as opposed to sorting like a text string).
This is easier to see when the intermittent steps are added to the query in the CTE...
SELECT
rd.id, rd.session_id, rd.value,
bv.BinVal,
SmearedBinVal = MAX(bv.BinVal) OVER (PARTITION BY rd.session_id ORDER BY rd.id),
SecondHalfAsINT = CAST(SUBSTRING(MAX(bv.BinVal) OVER (PARTITION BY rd.session_id ORDER BY rd.id), 5, 4) AS INT),
ControlValue = ISNULL(CAST(SUBSTRING(MAX(bv.BinVal) OVER (PARTITION BY rd.session_id ORDER BY rd.id), 5, 4) AS INT), 999)
FROM
#req_data rd
CROSS APPLY ( VALUES (CAST(rd.id AS BINARY(4)) + CAST(NULLIF(rd.value, 1) AS BINARY(4))) ) bv (BinVal)
Results...
id session_id value BinVal SmearedBinVal SecondHalfAsINT ControlValue
----------- ----------- ----------- ------------------ ------------------ --------------- ------------
1 5 7 0x0000000100000007 0x0000000100000007 7 7
2 5 1 NULL 0x0000000100000007 7 7
3 5 1 NULL 0x0000000100000007 7 7
4 5 12 0x000000040000000C 0x000000040000000C 12 12
5 5 1 NULL 0x000000040000000C 12 12
6 5 1 NULL 0x000000040000000C 12 12
7 5 1 NULL 0x000000040000000C 12 12
8 6 7 0x0000000800000007 0x0000000800000007 7 7
9 6 1 NULL 0x0000000800000007 7 7
10 6 3 0x0000000A00000003 0x0000000A00000003 3 3
11 6 1 NULL 0x0000000A00000003 3 3
12 7 7 0x0000000C00000007 0x0000000C00000007 7 7
13 8 1 NULL NULL NULL 999
14 8 2 0x0000000E00000002 0x0000000E00000002 2 2
15 8 3 0x0000000F00000003 0x0000000F00000003 3 3
Looking at the BinVal column, we see an 8 byte hex value for all non-[value] = 1 rows and NULLS where [value] = 1... The 1st 4 bytes are the Id (used for ordering) and the 2nd 4 bytes are [value] (used to set the "previous non-1 value" or set the whole thing to NULL.
The 2nd step is to "smear" the non-NULL values into the NULLs using the window framed MAX function, partitioned by session_id and ordered by id.
The 3rd step is to parse out the last 4 bytes and convert them back to an INT data type (SecondHalfAsINT) and deal with any nulls that result from not having any non-1 preceding value (ControlValue).
Since we can't reference a windowed function in the WHERE clause, we have to throw the query into a CTE (a derived table would work just as well) so that we can use the new ControlValue in the where clause.
SELECT CRow.id
FROM #req_data AS CRow
CROSS APPLY (SELECT MAX(id) AS id FROM #req_data PRev WHERE PRev.Id < CRow.id AND PRev.session_id = CRow.session_id AND PRev.value <> 1 ) MaxPRow
LEFT JOIN #req_data AS PRow ON MaxPRow.id = PRow.id
WHERE CRow.value = 1 AND ISNULL(PRow.value,1) <> 7
You can use the following query:
select id, session_id, value,
coalesce(sum(case when value <> 1 then 1 end)
over (partition by session_id order by id), 0) as grp
from #req_data
to get:
id session_id value grp
----------------------------
1 5 7 1
2 5 1 1
3 5 1 1
4 5 12 2
5 5 1 2
6 5 1 2
7 5 1 2
8 6 7 1
9 6 1 1
10 6 3 2
11 6 1 2
12 7 7 1
13 8 1 0
14 8 2 1
15 8 3 2
So, this query detects islands of consecutive 1 records that belong to the same group, as specified by the first preceding row with value <> 1.
You can use a window function once more to detect all 7 islands. If you wrap this in a second cte, then you can finally get the desired result by filtering out all 7 islands:
;with session_islands as (
select id, session_id, value,
coalesce(sum(case when value <> 1 then 1 end)
over (partition by session_id order by id), 0) as grp
from #req_data
), islands_with_7 as (
select id, grp, value,
count(case when value = 7 then 1 end)
over (partition by session_id, grp) as cnt_7
from session_islands
)
select id
from islands_with_7
where cnt_7 = 0 and value = 1

include zeroes in count for same table

I have table with two columns, one int and another varchar column
SeqId status
int varchar(50)
The status column has 10 statuses, lets say status1, status2, status3, ... status10
I want to write a query to find range of SeqId where status7, status8, status9 count(*) is zero.
Table data,
SeqId Status
1 status1
2 status2
3 status3
4 status4
5 status5
6 status6
7 status7
8 status8
9 status9
10 status10
11 status1
12 status2
13 status3
14 status4
15 status5
16 status9
17 status2
18 status7
19 status3
20 status5
......
1000 status6
My 1st desired output
When I use range between 15 and 20,
Incorrect query where right now zeroes are not included,
Select status, count(*)
from table1
where seqId between 15 and 20
group by status
**Status Count**
status1 0
status2 1
status3 1
status4 0
status5 2
status6 0
status7 1
status8 0
status9 1
status10 0
Next if possible, I want to find a range where all these 3 statuses, 8, 9, and 10, are zero.
You can use a case statement to turn this into a boolean problem. So:
select t.*, (case when status in ('status8', 'status9', 'status10') then 0 else 1 end) as flag
from t;
You now want to find the longest sequence of zeros. This is a gaps-and-islands problem. One solution is a difference of row numbers to define groups:
select top 1 min(id), max(id), count(*) as length
from (select t.*,
(row_number() over (order by id) - row_number() over (partition by flag order by id)) as grp
from (select t.*, (case when status in ('status8', 'status9', 'status10') then 0 else 1 end) as flag
from t
) t
) t
where flag = 0
group by grp, flag
order by count(*) desc;
Assuming you have a status table:
SELECT A.STATUS, COALESCE(B.COUNT, 0) AS COUNT FROM STATUS_TABLE A
LEFT JOIN (SELECT STATUS, COUNT(*) AS COUNT FROM TABLE1 GROUP BY STATUS) B
ON A.STATUS = B.STATUS
Select base.status, count(statusVal.Status)
from table1 base
left join ( select distinct Status from table1
) statusVal
on statusVal.Status = base.status
where seqId between 15 and 20
group by base.status
all zero
select SeqId from data
except
select SeqId from data where Status in ( ..... )
For your first question, a generic t-sql solution will be
create table #t (id int, [status] varchar(12))
go
insert into #t (id, status) values
(1 ,'status1')
, (2 ,'status2')
, (3 ,'status3')
, (4 ,'status4')
, (5 ,'status5')
, (6 ,'status6')
, (7 ,'status7')
, (8 ,'status8')
, (9 ,'status9')
, (10 ,'status10')
, (11 ,'status1')
, (12 ,'status2')
, (13 ,'status3')
, (14 ,'status4')
, (15 ,'status5')
, (16 ,'status9')
, (17 ,'status2')
, (18 ,'status7')
, (19 ,'status3')
, (20 ,'status5')
go
; WITH c (status, cnt) as
(
select distinct [status], cnt=0
from #t
)
, c2 as ( select t.[status], cnt=count(*)
from #t t
where t.id between 15 and 20
group by t.[Status])
select c.* from c2
right join c
on c.[status]=c2.[status]
where c2.status is null
union
select c2.* from c2;
For your second question, #Gordon Linoff provides an excellent solution, but I think there is a typo in his solution. The where clause should be
where flag = 1 -- instead of 0
select convert(int,substring(a.[status],7,2)),a.*,isnull(b.CNT,0)
from
(select distinct [status] from #t) a
left join
(select [status],COUNT(*)as CNT from #t
where id between 15 and 20
Group by [status]) b
on a.status=b.status
order by 1

Get Last Value Before zero

I Have this table in SQL Server:
Cell1 Cell2 Cell3 Cell4 Cell5 Cell6 vbzero
3 65 **12** 0 0 0
12 4 5 1 6 8
10 0 0 0 0 0
1 90 0 0 0 0
I want get Last Value Before zero. for example in row one get 12 , and in row two get 8 because row tow don't containt zero and for row3 get 10 ,...
How can Do this in SQL Server .
Try something like ...
SELECT COALESCE( NULLIF(Column3, 0) , NULLIF(Column2, 0) , NULLIF(Column1, 0) )
FROM TableName
Test Data
DECLARE #TABLE TABLE
(Cell1 INT,Cell2 INT,Cell3 INT,Cell4 INT,Cell5 INT,Cell6 INT)
INSERT INTO #TABLE VALUES
(3 , 65 , 12, 0 , 0 , 0),
(12, 4 , 5 , 1 , 6 , 8),
(10, 0 , 0 , 0 , 0 , 0),
(1 , 90 , 0 , 0 , 0 , 0)
Query
SELECT COALESCE(
NULLIF(Cell6, 0) , NULLIF(Cell5, 0) , NULLIF(Cell4, 0)
,NULLIF(Cell3, 0) , NULLIF(Cell2, 0) , NULLIF(Cell1, 0)
)
FROM #TABLE
Result
12
8
10
90