I am trying to break up a running (ordered) sum into groups of a max value. When I implement the following example logic...
IF OBJECT_ID(N'tempdb..#t') IS NOT NULL DROP TABLE #t
SELECT TOP (ABS(CHECKSUM(NewId())) % 1000) ROW_NUMBER() OVER (ORDER BY name) AS ID,
LEFT(CAST(NEWID() AS NVARCHAR(100)),ABS(CHECKSUM(NewId())) % 30) AS Description
INTO #t
FROM sys.objects
DECLARE #maxGroupSize INT
SET #maxGroupSize = 100
;WITH t AS (
SELECT
*,
LEN(Description) AS DescriptionLength,
SUM(LEN(Description)) OVER (/*PARTITION BY N/A */ ORDER BY ID) AS [RunningLength],
SUM(LEN(Description)) OVER (/*PARTITION BY N/A */ ORDER BY ID)/#maxGroupSize AS GroupID
FROM #t
)
SELECT *, SUM(DescriptionLength) OVER (PARTITION BY GroupID) AS SumOfGroup
FROM t
ORDER BY GroupID, ID
I am getting groups that are larger than the maximum group size (length) of 100.
A recusive common table expression (rcte) would be one way to resolve this.
Sample data
Limited set of fixed sample data.
create table data
(
id int,
description nvarchar(20)
);
insert into data (id, description) values
( 1, 'qmlsdkjfqmsldk'),
( 2, 'mldskjf'),
( 3, 'qmsdlfkqjsdm'),
( 4, 'fmqlsdkfq'),
( 5, 'qdsfqsdfqq'),
( 6, 'mds'),
( 7, 'qmsldfkqsjdmfqlkj'),
( 8, 'qdmsl'),
( 9, 'mqlskfjqmlkd'),
(10, 'qsdqfdddffd');
Solution
For every recursion step evaluate (r.group_running_length + len(d.description) <= #group_max_length) if the previous group must be extended or a new group must be started in a case expression.
Set group target size to 40 to better fit the sample data.
declare #group_max_length int = 40;
with rcte as
(
select d.id,
d.description,
len(d.description) as description_length,
len(d.description) as running_length,
1 as group_id,
len(d.description) as group_running_length
from data d
where d.id = 1
union all
select d.id,
d.description,
len(d.description),
r.running_length + len(d.description),
case
when r.group_running_length + len(d.description) <= #group_max_length
then r.group_id
else r.group_id + 1
end,
case
when r.group_running_length + len(d.description) <= #group_max_length
then r.group_running_length + len(d.description)
else len(d.description)
end
from rcte r
join data d
on d.id = r.id + 1
)
select r.id,
r.description,
r.description_length,
r.running_length,
r.group_id,
r.group_running_length,
gs.group_sum
from rcte r
cross apply ( select max(r2.group_running_length) as group_sum
from rcte r2
where r2.group_id = r.group_id ) gs -- group sum
order by r.id;
Result
Contains both the running group length as well as the group sum for every row.
id description description_length running_length group_id group_running_length group_sum
-- ---------------- ------------------ -------------- -------- -------------------- ---------
1 qmlsdkjfqmsldk 14 14 1 14 33
2 mldskjf 7 21 1 21 33
3 qmsdlfkqjsdm 12 33 1 33 33
4 fmqlsdkfq 9 42 2 9 39
5 qdsfqsdfqq 10 52 2 19 39
6 mds 3 55 2 22 39
7 qmsldfkqsjdmfqlkj 17 72 2 39 39
8 qdmsl 5 77 3 5 28
9 mqlskfjqmlkd 12 89 3 17 28
10 qsdqfdddffd 11 100 3 28 28
Fiddle to see things in action (includes random data version).
I have a table this way:
ID ATTR_NAME SUB_ATTR_NAME VALUE
1 ATTR-1 SUB-ATTR-1 23
2 ATTR-1 SUB-ATTR-2 32
3 ATTR-1 SUB-ATTR-3 25
4 ATTR-1 SUB-ATTR-4 28
5 ATTR-2 SUB-ATTR-1 78
6 ATTR-2 SUB-ATTR-2 45
7 ATTR-2 SUB-ATTR-3 48
8 ATTR-2 SUB-ATTR-4 41
9 ATTR-3 SUB-ATTR-1 47
10 ATTR-3 SUB-ATTR-2 12
11 ATTR-3 SUB-ATTR-3 16
12 ATTR-3 SUB-ATTR-4 18
But using SQL, I want a table this way:
SUB-ATTR-1 SUB-ATTR-2 SUB-ATTR-3 SUB-ATTR-4
ATTR-1 23 32 25 28
ATTR-2 78 45 48 41
ATTR-3 47 12 16 18
Please help I am a newbie to SQL
Sample Data
IF OBJECT_ID('tempdb..#Temp') IS NOT NULL
DROP TABLE #Temp
CREATE TABLE #Temp(ID INT, ATTR_NAME VARCHAR(20), SUB_ATTR_NAME VARCHAR(20),VALUE INT)
INSERT INTO #Temp
SELECT 1 ,'ATTR-1','SUB-ATTR-1', 23 UNION ALL
SELECT 2 ,'ATTR-1','SUB-ATTR-2', 32 UNION ALL
SELECT 3 ,'ATTR-1','SUB-ATTR-3', 25 UNION ALL
SELECT 4 ,'ATTR-1','SUB-ATTR-4', 28 UNION ALL
SELECT 5 ,'ATTR-2','SUB-ATTR-1', 78 UNION ALL
SELECT 6 ,'ATTR-2','SUB-ATTR-2', 45 UNION ALL
SELECT 7 ,'ATTR-2','SUB-ATTR-3', 48 UNION ALL
SELECT 8 ,'ATTR-2','SUB-ATTR-4', 41 UNION ALL
SELECT 9 ,'ATTR-3','SUB-ATTR-1', 47 UNION ALL
SELECT 10,'ATTR-3','SUB-ATTR-2', 12 UNION ALL
SELECT 11,'ATTR-3','SUB-ATTR-3', 16 UNION ALL
SELECT 12,'ATTR-3','SUB-ATTR-4', 18
SELECT * FROM #Temp
Using Dynamic Sql
DECLARE #Sql nvarchar(max),
#Col nvarchar(max),
#Col2 nvarchar(max)
SELECT #Col=STUFF((SELECT DISTINCT ', '+QUOTENAME(SUB_ATTR_NAME) FROM #Temp
FOR XML PATH ('')),1,1,'')
SELECT #Col2=STUFF((SELECT DISTINCT ', '+'MAX( '+QUOTENAME(SUB_ATTR_NAME)+' ) AS'+QUOTENAME(SUB_ATTR_NAME) FROM #Temp
FOR XML PATH ('')),1,1,'')
SET #Sql='
SELECT ATTR_NAME,'+#Col2+' FROM
(
SELECT * FROM #Temp
)AS SRC
PIVOT
(
SUM(VALUE) FOR SUB_ATTR_NAME IN ('+#Col+')
)AS PVT
GROUP BY ATTR_NAME'
PRINT #Sql
EXEC (#Sql)
Result
ATTR_NAME SUB-ATTR-1 SUB-ATTR-2 SUB-ATTR-3 SUB-ATTR-4
----------------------------------------------------------
ATTR-1 23 32 25 28
ATTR-2 78 45 48 41
ATTR-3 47 12 16 18
Demo :http://rextester.com/SURJ9296
You need to group your table by ATTR_NAME, then you can get sum of the SUB_ATTR
select attr_name ,
sum( case CASE
WHEN SUB_ATTR_NAME = 'SUB-ATTR-1' THEN VALUE
else 0 end) SUB-ATTR-1,
sum( case CASE
WHEN SUB_ATTR_NAME = 'SUB-ATTR-2' THEN VALUE
else 0 end) SUB-ATTR-2,
sum( case CASE
WHEN SUB_ATTR_NAME = 'SUB-ATTR-3' THEN VALUE
else 0 end) SUB-ATTR-3,
sum( case CASE
WHEN SUB_ATTR_NAME = 'SUB-ATTR-4' THEN VALUE
else 0 end) SUB-ATTR-4
END sum(value)
from TABLE
group by attr_name
I've put together what I view to be overly complicated SQL to get to what I'm after. I'm hoping for insight into a quicker and less complicated method.
What I'm after is the ability to assign an ID to groups of data where there is common groups of data across two columns.
For example I have the following subset of data:
CustID PartID RplcID
28 4 4
28 4 16
28 4 17
28 16 4
28 16 16
28 16 17
28 17 4
28 17 16
28 17 17
I want to create an ID for CustID=28 where there is overlap in the RplcID and PartID. So in this example, PartID 4, 16, 17 all have RplcIDs in common (4, 16, 17). As such, all of these pairs should have the same ID.
The method I'm using works (and is faster with temp tables instead of solely using CTEs) except for large datasets this thing is S-L-O-W. I'm sure there's a more efficient method out there and hoping someone can lend their expertise.
I'm outlining my current approach for as much clarity into my muddled thinking as possible.
STEP 1
Generate temporary ID using DENSE_RANK() partitioned by CustID, ordered by PartID.
RowID CustID PartID RplcID
1 28 16 16
1 28 17 16
1 28 4 16
2 28 16 17
2 28 17 17
2 28 4 17
3 28 16 4
3 28 17 4
3 28 4 4
STEP 2:
Then use these results and aggregate the PartIDs by using XML to create a comma separated string with which to group by.
RowID CustID RplcID PartIDS
4 28 16 16,17,4
4 28 17 16,17,4
4 28 4 16,17,4
STEP 3:
And finally split out these groups using the assigned ID by parsing the XML.
RowID CustID PartID RplcID
4 28 16 16
4 28 16 17
4 28 16 4
4 28 17 16
4 28 17 17
4 28 17 4
4 28 4 16
4 28 4 17
4 28 4 4
And the entirety of the SQL:
DECLARE #Parts TABLE
(
CustID VARCHAR(10),
PartID VARCHAR(10),
RplcID VARCHAR(10)
)
Insert Into #Parts VALUES
('26','19','93'),('26','19','63'),
('26','31','93'),('26','31','63'),('26','32','93'),('26','32','63'),('26','33','93'),('26','33','63'),('26','34','93'),
('26','34','63'),('26','35','93'),('26','35','63'),('26','36','93'),('26','36','63'),('26','37','93'),('26','37','63'),
('26','38','93'),('26','38','63'),('26','39','93'),('26','39','63'),('27','40','95'),('27','41','94'),
('27','41','95'),('27','42','94'),('27','42','95'),('27','43','94'),('27','43','95'),('27','44','94'),('27','44','95'),
('27','45','94'),('27','45','95'),('27','46','94'),('27','46','95'),('27','47','94'),('27','47','95'),('27','48','94'),
('27','48','95'),('27','49','94'),('27','49','95'),('27','50','94'),('27','50','95'),('27','17','94'),('27','17','95'),
('27','51','94'),('27','51','95'),('27','52','94'),('27','52','95'),('27','53','94'),('27','53','95'),('27','54','94'),
('27','54','95'),('27','33','94'),('27','33','95'),('27','55','94'),('27','55','95'),('27','34','94'),('27','34','95'),
('27','56','94'),('27','56','95'),('27','35','94'),('27','35','95'),('27','57','94'),('27','57','95'),('27','58','94'),
('27','58','95'),('27','59','94'),('27','59','95'),('27','37','94'),('27','37','95'),('27','60','94'),('27','60','95'),
('27','61','94'),('27','61','95'),('27','62','94'),('27','62','95'),('27','63','94'),('27','63','95'),('27','64','94'),
('27','64','95'),('27','3','96'),('27','3','97'),('27','3','98'),('27','3','99'),('27','3','100'),('28','4','4'),
('28','4','16'),('28','4','17'),('28','16','4'),('28','16','16'),('28','16','17'),('28','17','4'),('28','17','16'),
('28','17','17')
;
--Step 1: Create the initial ID
SELECT DISTINCT DENSE_RANK()
OVER(
partition BY r.CustID
ORDER BY r2.RplcID) AS RowID,
r.CustID,
r.BuyID,
r2.RplcID
INTO #tmp
FROM #Parts r
JOIN #Parts r1
ON r.CustID = r1.CustID
AND r.RplcID = r1.RplcID
JOIN #Parts r2
ON r.CustID = r2.CustID
AND r1.BuyID = r2.BuyID
--Step 2: Group the BuyIDs
SELECT DENSE_RANK()
OVER(
ORDER BY CustID, BuyIDs) AS RowID,
*
INTO #tmp2
FROM (SELECT CustID,
Rtrim(RplcID) RplcID,
Stuff((SELECT ',' + Rtrim(BuyID)
FROM #tmp RSLT2
WHERE RSLT2.ROWID = RSLT.ROWID
AND RSLT2.CustID = RSLT.CustID
FOR xml path('')), 1, 1, '') [BuyIDs]
FROM #tmp RSLT
GROUP BY RSLT.CustID,
RSLT.ROWID,
RSLT.RplcID)A
--Step 3: Using the grouped BuyIDs, split the strings using XML and assign RowID
SELECT RowID,
CustID,
BuyID,
RplcID
INTO #tmp3
FROM (SELECT RowID,
CustID,
n.r.value('.','varchar(10)') AS BuyID,
RplcID
FROM #tmp2
CROSS APPLY(SELECT Cast('<r>' + Replace(BuyIDs, ',', '</r><r>')
+ '</r>' AS XML)) AS S(xmlcol)
CROSS APPLY s.xmlcol.nodes('r') AS n(r))A
Order by RowID
Select * from #tmp3 where CustID='28'
Select distinct BuyID
from #tmp3
where CustID='28'
Select distinct RplcID
from #tmp3
where CustID='28'
TABLEA contains the data, while TABLEB contains the search criteria
Here is a SQL Fiddle with the data
Tables
TABLEA
visited_states_time
AL= Alabama,2, AK=Alaska,5
AR=Arkansas,6
AZ=Arizona,10
CA=California, 10,CT=Connecticut,20
TABLEB
CRITERIA
AL
HI
CA
CT
AK
Desired Result
visited_states ................................... total_time_spent
AL= Alabama, AK=Alaska ............................ 7
CA=California, CT=Connecticut................... 30
That's a terrible data model. also you didn't say the condition for tableb. if any state matches, or if all?
as we need to split the rows up (to sum()) and then recombine them you can use:
SQL> with v as (select rownum r,
2 ','||visited_states_time||',' visited_states_time,
3 length(
4 regexp_replace(visited_states_time, '[^,]', '')
5 )+1 fields
6 from tablea)
7 select trim(both ',' from visited_states_time) visited_states_time,
8 sum(total_time_spent) total_time_spent
9 from (select *
10 from v
11 model
12 partition by (r)
13 dimension by (0 as f)
14 measures (visited_states_time, cast('' as varchar2(2)) state,
15 0 as total_time_spent, fields)
16 rules (
17 state[for f from 0 to fields[0]-1 increment 2]
18 = trim(
19 substr(visited_states_time[0],
20 instr(visited_states_time[0], ',', 1, cv(f)+1)+1,
21 instr(visited_states_time[0], '=', 1, (cv(f)/2)+1)
22 - instr(visited_states_time[0], ',', 1, cv(f)+1)-1
23 )),
24 visited_states_time[any]= visited_states_time[0],
25 total_time_spent[any]
26 = substr(visited_states_time[0],
27 instr(visited_states_time[0], ',', 1, (cv(f)+2))+1,
28 instr(visited_states_time[0], ',', 1, (cv(f)+3))
29 - instr(visited_states_time[0], ',', 1, (cv(f)+2))-1
30 )
31 ))
32 where state in (select criteria from tableb)
33 group by visited_states_time;
VISITED_STATES_TIME TOTAL_TIME_SPENT
------------------------------------- ----------------
CA=California, 10,CT=Connecticut,20 30
AL=Alabama,2, AK=Alaska,5 7
but seriously, rewrite that data model to store them separately to start with.