I am trying to break up a running (ordered) sum into groups of a max value. When I implement the following example logic...
IF OBJECT_ID(N'tempdb..#t') IS NOT NULL DROP TABLE #t
SELECT TOP (ABS(CHECKSUM(NewId())) % 1000) ROW_NUMBER() OVER (ORDER BY name) AS ID,
LEFT(CAST(NEWID() AS NVARCHAR(100)),ABS(CHECKSUM(NewId())) % 30) AS Description
INTO #t
FROM sys.objects
DECLARE #maxGroupSize INT
SET #maxGroupSize = 100
;WITH t AS (
SELECT
*,
LEN(Description) AS DescriptionLength,
SUM(LEN(Description)) OVER (/*PARTITION BY N/A */ ORDER BY ID) AS [RunningLength],
SUM(LEN(Description)) OVER (/*PARTITION BY N/A */ ORDER BY ID)/#maxGroupSize AS GroupID
FROM #t
)
SELECT *, SUM(DescriptionLength) OVER (PARTITION BY GroupID) AS SumOfGroup
FROM t
ORDER BY GroupID, ID
I am getting groups that are larger than the maximum group size (length) of 100.
A recusive common table expression (rcte) would be one way to resolve this.
Sample data
Limited set of fixed sample data.
create table data
(
id int,
description nvarchar(20)
);
insert into data (id, description) values
( 1, 'qmlsdkjfqmsldk'),
( 2, 'mldskjf'),
( 3, 'qmsdlfkqjsdm'),
( 4, 'fmqlsdkfq'),
( 5, 'qdsfqsdfqq'),
( 6, 'mds'),
( 7, 'qmsldfkqsjdmfqlkj'),
( 8, 'qdmsl'),
( 9, 'mqlskfjqmlkd'),
(10, 'qsdqfdddffd');
Solution
For every recursion step evaluate (r.group_running_length + len(d.description) <= #group_max_length) if the previous group must be extended or a new group must be started in a case expression.
Set group target size to 40 to better fit the sample data.
declare #group_max_length int = 40;
with rcte as
(
select d.id,
d.description,
len(d.description) as description_length,
len(d.description) as running_length,
1 as group_id,
len(d.description) as group_running_length
from data d
where d.id = 1
union all
select d.id,
d.description,
len(d.description),
r.running_length + len(d.description),
case
when r.group_running_length + len(d.description) <= #group_max_length
then r.group_id
else r.group_id + 1
end,
case
when r.group_running_length + len(d.description) <= #group_max_length
then r.group_running_length + len(d.description)
else len(d.description)
end
from rcte r
join data d
on d.id = r.id + 1
)
select r.id,
r.description,
r.description_length,
r.running_length,
r.group_id,
r.group_running_length,
gs.group_sum
from rcte r
cross apply ( select max(r2.group_running_length) as group_sum
from rcte r2
where r2.group_id = r.group_id ) gs -- group sum
order by r.id;
Result
Contains both the running group length as well as the group sum for every row.
id description description_length running_length group_id group_running_length group_sum
-- ---------------- ------------------ -------------- -------- -------------------- ---------
1 qmlsdkjfqmsldk 14 14 1 14 33
2 mldskjf 7 21 1 21 33
3 qmsdlfkqjsdm 12 33 1 33 33
4 fmqlsdkfq 9 42 2 9 39
5 qdsfqsdfqq 10 52 2 19 39
6 mds 3 55 2 22 39
7 qmsldfkqsjdmfqlkj 17 72 2 39 39
8 qdmsl 5 77 3 5 28
9 mqlskfjqmlkd 12 89 3 17 28
10 qsdqfdddffd 11 100 3 28 28
Fiddle to see things in action (includes random data version).
I have data that looks like this
Name XX YY
alpha 10 77
beta 10 90
alpha 20 72
beta 20 91
alpha 30 75
beta 30 94
alpha 40 76
beta 40 95
If I use
select * from scores order by Name, XX
I will get
Name XX YY
alpha 10 77
alpha 20 72
alpha 30 75
alpha 40 76
beta 10 90
beta 20 91
beta 30 94
beta 40 95
At the moment, I'm retrieving the data in this form and using some C coding to format it like
Name xx=10 xx=20 xx=30 xx=40
alpha 77 72 75 76
beta 90 91 94 95
Assuming that there will always be entries for 10, 20, 30, 40 for every name, is something like this possible in SQL without creating a new table like in SQL Reformatting table columns
You need to use PIVOT to get your desired results. Before using PIVOT, some customization required in your value in column XX so that the final column output can meet your expectation.
SELECT * FROM
(
SELECT Name,'XX='+CAST(XX AS VARCHAR) XX,YY
FROM your_table
)AS P
PIVOT(
SUM(YY)
FOR XX IN ([XX=10],[XX=20],[XX=30],[XX=40])
) PP
Output-
Name XX=10 XX=20 XX=30 XX=40
alpha 77 72 75 76
beta 90 91 94 95
The same output can be also achieved with this following query-
SELECT Name,
[10] AS [XX=10],
[20] AS [XX=20],
[30] AS [XX=30],
[40] AS [XX=40]
FROM
(
SELECT Name, XX,YY
FROM your_table
)AS P
PIVOT(
SUM(YY)
FOR XX IN ([10],[20],[30],[40])
) PP
You could use dynamic PIVOT to achieve the same result which would be more robust,
CREATE TABLE #table1 (Name varchar(100), XX INT, YY INT)
INSERT INTO #table1 VALUES
('alpha',10,77),
('beta',10,90),
('alpha',20,72),
('beta',20,91),
('alpha',30,75),
('beta',30,94),
('alpha',40,76),
('beta',40,95)
DECLARE #pvt NVARCHAR(MAX) = '';
DECLARE #dynamicCol NVARCHAR(MAX) = '';
SELECT #pvt += ', ' +QUOTENAME([XX]) FROM #table1 GROUP BY [XX];
SELECT #dynamicCol += ', ' +QUOTENAME([XX]) + ' AS ' + QUOTENAME('XX=' + CAST([XX] AS VARCHAR(25))) FROM #table1 GROUP BY [XX];
SET #pvt = STUFF(#pvt,1,1,'')
SET #dynamicCol = STUFF(#dynamicCol,1,1,'')
EXEC ('
SELECT [Name],' + #dynamicCol+'
FROM #table1 a
PIVOT
(
SUM([YY])
FOR [XX] IN ('+ #pvt+')
) PIV');
The result is as below,
Name XX=10 XX=20 XX=30 XX=40
alpha 77 72 75 76
beta 90 91 94 95
Another solution with case
SELECT Name
,SUM(CASE when XX = '10' then YY else 0 END) AS 'xx=10'
,SUM( CASE when XX = '20' then YY else 0 END) AS 'xx=20'
,SUM( CASE when XX = '30' then YY else 0 END) AS 'xx=30'
,SUM( CASE when XX = '40' then YY else 0 END) AS 'xx=40'
FROM temp_0
group by Name
I've put together what I view to be overly complicated SQL to get to what I'm after. I'm hoping for insight into a quicker and less complicated method.
What I'm after is the ability to assign an ID to groups of data where there is common groups of data across two columns.
For example I have the following subset of data:
CustID PartID RplcID
28 4 4
28 4 16
28 4 17
28 16 4
28 16 16
28 16 17
28 17 4
28 17 16
28 17 17
I want to create an ID for CustID=28 where there is overlap in the RplcID and PartID. So in this example, PartID 4, 16, 17 all have RplcIDs in common (4, 16, 17). As such, all of these pairs should have the same ID.
The method I'm using works (and is faster with temp tables instead of solely using CTEs) except for large datasets this thing is S-L-O-W. I'm sure there's a more efficient method out there and hoping someone can lend their expertise.
I'm outlining my current approach for as much clarity into my muddled thinking as possible.
STEP 1
Generate temporary ID using DENSE_RANK() partitioned by CustID, ordered by PartID.
RowID CustID PartID RplcID
1 28 16 16
1 28 17 16
1 28 4 16
2 28 16 17
2 28 17 17
2 28 4 17
3 28 16 4
3 28 17 4
3 28 4 4
STEP 2:
Then use these results and aggregate the PartIDs by using XML to create a comma separated string with which to group by.
RowID CustID RplcID PartIDS
4 28 16 16,17,4
4 28 17 16,17,4
4 28 4 16,17,4
STEP 3:
And finally split out these groups using the assigned ID by parsing the XML.
RowID CustID PartID RplcID
4 28 16 16
4 28 16 17
4 28 16 4
4 28 17 16
4 28 17 17
4 28 17 4
4 28 4 16
4 28 4 17
4 28 4 4
And the entirety of the SQL:
DECLARE #Parts TABLE
(
CustID VARCHAR(10),
PartID VARCHAR(10),
RplcID VARCHAR(10)
)
Insert Into #Parts VALUES
('26','19','93'),('26','19','63'),
('26','31','93'),('26','31','63'),('26','32','93'),('26','32','63'),('26','33','93'),('26','33','63'),('26','34','93'),
('26','34','63'),('26','35','93'),('26','35','63'),('26','36','93'),('26','36','63'),('26','37','93'),('26','37','63'),
('26','38','93'),('26','38','63'),('26','39','93'),('26','39','63'),('27','40','95'),('27','41','94'),
('27','41','95'),('27','42','94'),('27','42','95'),('27','43','94'),('27','43','95'),('27','44','94'),('27','44','95'),
('27','45','94'),('27','45','95'),('27','46','94'),('27','46','95'),('27','47','94'),('27','47','95'),('27','48','94'),
('27','48','95'),('27','49','94'),('27','49','95'),('27','50','94'),('27','50','95'),('27','17','94'),('27','17','95'),
('27','51','94'),('27','51','95'),('27','52','94'),('27','52','95'),('27','53','94'),('27','53','95'),('27','54','94'),
('27','54','95'),('27','33','94'),('27','33','95'),('27','55','94'),('27','55','95'),('27','34','94'),('27','34','95'),
('27','56','94'),('27','56','95'),('27','35','94'),('27','35','95'),('27','57','94'),('27','57','95'),('27','58','94'),
('27','58','95'),('27','59','94'),('27','59','95'),('27','37','94'),('27','37','95'),('27','60','94'),('27','60','95'),
('27','61','94'),('27','61','95'),('27','62','94'),('27','62','95'),('27','63','94'),('27','63','95'),('27','64','94'),
('27','64','95'),('27','3','96'),('27','3','97'),('27','3','98'),('27','3','99'),('27','3','100'),('28','4','4'),
('28','4','16'),('28','4','17'),('28','16','4'),('28','16','16'),('28','16','17'),('28','17','4'),('28','17','16'),
('28','17','17')
;
--Step 1: Create the initial ID
SELECT DISTINCT DENSE_RANK()
OVER(
partition BY r.CustID
ORDER BY r2.RplcID) AS RowID,
r.CustID,
r.BuyID,
r2.RplcID
INTO #tmp
FROM #Parts r
JOIN #Parts r1
ON r.CustID = r1.CustID
AND r.RplcID = r1.RplcID
JOIN #Parts r2
ON r.CustID = r2.CustID
AND r1.BuyID = r2.BuyID
--Step 2: Group the BuyIDs
SELECT DENSE_RANK()
OVER(
ORDER BY CustID, BuyIDs) AS RowID,
*
INTO #tmp2
FROM (SELECT CustID,
Rtrim(RplcID) RplcID,
Stuff((SELECT ',' + Rtrim(BuyID)
FROM #tmp RSLT2
WHERE RSLT2.ROWID = RSLT.ROWID
AND RSLT2.CustID = RSLT.CustID
FOR xml path('')), 1, 1, '') [BuyIDs]
FROM #tmp RSLT
GROUP BY RSLT.CustID,
RSLT.ROWID,
RSLT.RplcID)A
--Step 3: Using the grouped BuyIDs, split the strings using XML and assign RowID
SELECT RowID,
CustID,
BuyID,
RplcID
INTO #tmp3
FROM (SELECT RowID,
CustID,
n.r.value('.','varchar(10)') AS BuyID,
RplcID
FROM #tmp2
CROSS APPLY(SELECT Cast('<r>' + Replace(BuyIDs, ',', '</r><r>')
+ '</r>' AS XML)) AS S(xmlcol)
CROSS APPLY s.xmlcol.nodes('r') AS n(r))A
Order by RowID
Select * from #tmp3 where CustID='28'
Select distinct BuyID
from #tmp3
where CustID='28'
Select distinct RplcID
from #tmp3
where CustID='28'
I am trying to do transpose data. The number of columns are not fixed(i.e. selected ShiftNames are not fixed). Here is my input data.
Date_time ShiftName Consumption
28-07-2016 Shift 1 20
28-07-2016 Shift 2 21
28-07-2016 Shift 3 22
29-07-2016 Shift 1 30
29-07-2016 Shift 2 31
29-07-2016 Shift 3 32
30-07-2016 Shift 1 40
30-07-2016 Shift 2 41
30-07-2016 Shift 3 42
And the output will be like this
Shift 1 Shift 2 Shift 3 Date_Time
20 21 23 28-07-2016
30 31 32 29-07-2016
40 41 42 30-07-2016
You can do this with an pivot. Here is an example:
Test data:
DECLARE #temp TABLE(Date_time varchar(100), ShiftName VARCHAR(100), Consumption INT)
INSERT INTO #temp
VALUES
('28-07-2016','Shift 1',20),
('28-07-2016','Shift 2',21),
('28-07-2016','Shift 3',22),
('29-07-2016','Shift 1',30),
('29-07-2016','Shift 2',31),
('29-07-2016','Shift 3',32),
('30-07-2016','Shift 1',40),
('30-07-2016','Shift 2',41),
('30-07-2016','Shift 3',42)
Pivot:
SELECT
*
FROM
(
SELECT
Date_time,
ShiftName,
Consumption
FROM
#temp
) AS sourceTable
PIVOT
(
SUM(Consumption)
FOR ShiftName IN ([Shift 1],[Shift 2],[Shift 3])
) AS pvt
Result:
Date_time Shift 1 Shift 2 Shift 3
28-07-2016 20 21 22
29-07-2016 30 31 32
30-07-2016 40 41 42
Reference:
Using PIVOT and UNPIVOT
Since the shiftName is dynamic,use this dynamic query
DECLARE #cols AS NVARCHAR(MAX), #query AS NVARCHAR(MAX)
SELECT
#cols = STUFF((SELECT distinct ',' + QUOTENAME(ShiftName )
FROM
#temp
FOR XML PATH(''), TYPE
).value('.', 'NVARCHAR(MAX)'),1,1,'')
SET #query = 'SELECT * FROM
(
SELECT * FROM #temp
) x
PIVOT
(
Sum(consumption)
FOR ShiftName IN (' + #cols + ')
) p '
EXECUTE(#query);