GROUP BY with summing null values to every group - sql

I want to group values and sum them up by a category value - or none.
For example, I have the following table:
+-------+----------+
| value | category |
+-------+----------+
| 99 | A |
| 42 | A |
| 76 | [NULL] |
| 66 | B |
| 10 | C |
| 13 | [NULL] |
| 27 | C |
+-------+----------+
My desired result should look like this:
+-------+----------+
| sum | category |
+-------+----------+
| 230 | A |
| 155 | B |
| 126 | C |
| 89 | [NULL] |
+-------+----------+
I tried a group by category but obviously this doesn't bring up the right numbers.
Any ideas?
I'm using SQL Server 2012.
EDIT:
Ok, as requested, I can explain my intents and give my query so far, although that is not very helpful I think.
I need to sum all value for the given categories AND add the sum of all values without a category [=> NULL]
So in my example, I would sum
99 + 42 + 76 + 13 = 230 for category A
66 + 76 + 13 = 155 for category B
10 + 27 + 76 + 13 = 126 for category C
76 + 13 = 89 for no category
I hope that gives you an idea of my goal.
Query so far:
SELECT SUM([value]), [category]
FROM [mytable]
GROUP BY [category]

First calculate the sum of nulls then add it to each group:
DECLARE #t TABLE
(
value INT ,
category CHAR(1)
)
INSERT INTO #t
VALUES ( 99, 'A' ),
( 42, 'A' ),
( 76, NULL ),
( 66, 'B' ),
( 10, 'C' ),
( 13, NULL ),
( 27, 'C' )
;with cte as(select sum(value) as s from #t where category is null)
select category, sum(value) + s
from #t
cross join cte
where category is not null
group by category, s
Another version:
;WITH cte AS(SELECT category, SUM(value) OVER(PARTITION BY category) +
SUM(CASE WHEN category IS NULL THEN value ELSE 0 END) OVER() AS value
FROM #t)
SELECT DISTINCT * FROM cte WHERE category IS NOT NULL

If you want to add the NULL values to all the groups, then do something like:
with cte as (
select category, sum(value) as sumv
from t
group by category
)
select cte.category,
(cte.sumv +
(case when category is not null then coalesce(ctenull.sumv) else 0 end)
) as sumWithNulls
from cte left join
cte ctenull
on ctenull.category is null -- or should that be `= '[NULL]'`?
This seems like a strange operation.
EDIT:
You can almost do this with window functions:
select category,
(sum(value) +
sum(case when category is null then sum(value) else 0 end) over ()
) as sumWithNulls
from t
group by category;
The problem is that NULLs get over counted for that category. So:
select category,
(sum(value) +
(case when category is not null
then sum(case when category is null then sum(value) else 0 end) over ()
else 0
end
) as sumWithNulls
from t
group by category;

You want to get the sum of the NULL category and add it to the value of the other (non-null) categories:
DECLARE #Table1 TABLE (Value int, Category varchar(1))
DECLARE #NullCategorySum int
INSERT INTO #Table1
(Value, Category)
VALUES
(99, 'A'),
(42, 'A'),
(76, NULL),
(66, 'B'),
(10, 'C'),
(13, NULL),
(27, 'C')
SELECT #NullCategorySum = SUM(Value)
FROM #Table1
WHERE Category IS NULL
SELECT SUM(t1.Value)
+ CASE
WHEN Category IS NOT NULL THEN #NullCategorySum
END
AS SumValue, Category
FROM #Table1 t1
GROUP BY Category
This outputs
SumValue Category
89 NULL
230 A
155 B
126 C

Maybe you just missed using the SUM built-in function? This should work:
SELECT
SUM(value) AS [sum], category
FROM
[YourTableHere]
GROUP BY category
Edit: Ah, I see what you are doing now. I was able to do it by joining a 2nd query with just the NULL sum, so the NULL sum comes back with every row. Then you can just add it in the final step.
SELECT
MainSet.sum + JustNulls.sum AS [sum], MainSet.category
FROM
(SELECT SUM(X.value) AS [sum], X.category FROM [YourTableHere] X
WHERE X.category IS NOT NULL GROUP BY category
UNION SELECT 0, NULL) MainSet
FULL JOIN
(SELECT SUM(Y.value) AS [sum], Y.category FROM [YourTableHere] Y
WHERE Y.category IS NULL GROUP BY category) JustNulls ON 1=1

similar to levelonehuman put might be a little faster
declare #countNull int = (select sum(textUniqueWordCount) from docSVsys where mimeType is null);
select mimeType, sum(isnull(textUniqueWordCount, 0)) + #countNull as [sum]
from docSVsys
where mimeType is not null
group by mimeType
union
select null, #countNull;

Related

How to update one row that has max value in column (SQL Server)

I am trying to update the Rows that have the Max score with the value of 'Yes' in the Text1 column.
This is straightforward except when there are multiple rows with the max score.
If they have the same score, I just want to select the top row to have the value 'Yes'. Only one row with identical Vendor IDs should have the 'Yes' value.
UPDATE Suppliers
SET Text1='Yes'
--SELECT DISTINCT *
FROM Suppliers INNER JOIN
(
SELECT Vendor, MAX(VCScore) as MaxVCScore
FROM Suppliers
GROUP BY Vendor
) maxTable
ON Suppliers.Vendor=maxTable.Vendor
AND Suppliers.VCScore=maxTable.MaxVCScore
I do not want to use TOP 1 because that will only update one row in the whole table. I instead want only one row for each Vendor to be updated. (Vendor can be identical which is what I am trying to fix.) I cannot add a Group By clause to the Update statement as I would like to group by Vendor but that is incorrect syntax.
with t as (
select * , row_number() over (partition by Vendor order by VCScore desc) rn
from Suppliers
)
update s
set Text1 = 'Yes'
from supplier s
join t on s.pkey = t.pkey and t.rn = 1
Here's one way you may about this using a Common Table Expression (CTE). The following may be run in SSMS:
DECLARE #Suppliers table ( Vendor varchar(20), VCScore int, Text1 varchar(3), SupplierPK int IDENTITY (1,1) );
INSERT INTO #Suppliers ( Vendor, VCScore ) VALUES
( 'Vendor1', 85 ), ( 'Vendor1', 85 ), ( 'Vendor1', 85 ), ( 'Vendor2', 65 ), ( 'Vendor2', 65 );
DECLARE #Vendor table ( Vendor varchar(20), VCScore int );
INSERT INTO #Vendor VALUES
( 'Vendor1', 85 ), ( 'Vendor1', 25 ), ( 'Vendor1', 45 ), ( 'Vendor2', 45 ), ( 'Vendor2', 65 );
WITH cte AS (
SELECT
Vendor,
Text1,
SupplierPK,
ROW_NUMBER() OVER ( PARTITION BY Vendor ORDER BY SupplierPK ) AS RowNo
FROM #Suppliers AS s
OUTER APPLY (
SELECT MAX ( VCScore ) AS MaxVCScore FROM #Vendor AS v WHERE v.Vendor = s.Vendor
) AS x
WHERE
s.VCScore = x.MaxVCScore
)
UPDATE cte
SET
Text1 = 'Yes'
WHERE
cte.RowNo = 1;
SELECT * FROM #Suppliers ORDER BY Vendor, SupplierPK;
Returns
+---------+---------+-------+------------+
| Vendor | VCScore | Text1 | SupplierPK |
+---------+---------+-------+------------+
| Vendor1 | 85 | Yes | 1 |
| Vendor1 | 85 | NULL | 2 |
| Vendor1 | 85 | NULL | 3 |
| Vendor2 | 65 | Yes | 4 |
| Vendor2 | 65 | NULL | 5 |
+---------+---------+-------+------------+
I am making the assumption that you have a primary key value that can be sorted in your Suppliers table.
I recommend using an updatable CTE:
with toupdate as (
select s.* ,
row_number() over (partition by Vendor order by VCScore desc) as seqnum
from Suppliers s
)
update toupdate
set Text1 = 'Yes'
where seqnum = 1;
Note that no JOIN is needed.

How can I transpose a table and make aggregation at the same time in SQL Server?

I a beginner with SQL. I wrote code for pivoting my table (from column view to row view). It works fine.
My query takes a 2 rows x 195 columns and converts it into a 195 rows x 3 columns.
select Element = [Key]
,New = max(case when time_index=1 then value end)
,'Current' = max(case when time_index>=2 then value end)
From (
Select [time_index]
,B.*
From (select * from ifrs17.output_bba where id in (618830,618686)) A
Cross Apply (
Select [Key]
,Value
From OpenJson( (Select A.* For JSON Path,Without_Array_Wrapper ) )
Where [Key] not in ('time_index')
) B
) A
Group By [Key]
In the query you can read "(618830,618686)". It corresponds to the exctraction of two lines.
time_index
legal_entity_code
cohort
...
...
1
AAA
50
...
...
2
BBB
55
...
...
TO
Element
time_index_1
time_index_2 and more
legal_entity_code
AAA
BBB
cohort
50
55
...
...
...
...
...
...
I would like now to add a third line (or even more) in my input table and make a sum by time_index. (Example: if two lines have time_index = 1, these two lines should be aggregated. Meaning that the values are aggregated and the characters flagged as "NULL".)
time_index
legal_entity_code
cohort
...
...
1
AAA
50
...
...
2
BBB
55
...
...
2
CCC
45
...
...
to
Element
time_index_1
time_index_2 and more
legal_entity_code
AAA
"NULL"
cohort
50
100
...
...
...
...
...
...
How can I proceed?
Here's attempt.
But summing the numbers is kinda problematic if you don't know their datatypes.
In the test below there's a TRY_CAST to a decimal.
And I doubt you want to sum the id's.
create table yourtable (
id int primary key,
time_index int,
legal_entity_code varchar(30),
cohort decimal(2,0),
other1 nvarchar(5)
)
insert into yourtable
(id, time_index, legal_entity_code, cohort, other1) values
(618830, 1, 'AAA', 50, 'X1234')
, (618686, 2, 'BBB', 55, 'Y5678' )
, (618144, 2, 'CCC', 45, 'Y5678' )
with DATA as (
select * from yourtable
where id in (618830, 618686, 618144)
)
select
[Key] AS [Element]
, [TimeIndex1] = max(case when time_index = 1 then value end)
, CASE
WHEN [Key] IN ('id')
THEN string_agg(case when time_index >= 2 then value end,',')
WHEN count(distinct case when time_index >= 2 then value end) = 1
THEN max(case when time_index >= 2 then value end)
WHEN sum(1-isnumeric(value)) = 0 -- numbers
THEN cast(sum(case when time_index >= 2 then numeric_value end) as nvarchar(30))
END AS [TimeIndex2Plus]
from (
select [time_index], B.*
, try_cast(value as decimal(20)) numeric_value
from DATA AS A
cross apply (
select [Key], [Value]
from OpenJson( (Select A.* For JSON Path, Without_Array_Wrapper ) )
where [Key] not in ('time_index')
) B
) C
Group By [Key]
GO
Element | TimeIndex1 | TimeIndex2Plus
:---------------- | :--------- | :-------------
cohort | 50 | 100
id | 618830 | 618686,618144
legal_entity_code | AAA | null
other1 | X1234 | Y5678
db<>fiddle here

How to calculate average in SQL?

lets say I have the following table:
**FOOD** | **AMOUNT**
Bread | 2
Banana | 5
Pizza | 4
Apple | 57
Mandarin| 9
Orange | 8
Final result:
Bread | Percentage Of Total
Banana | percentage of total
etc
etc
I tried it in every single way, but couldn't find a solution. I hope someone can help me.
Using ANSI SQL (and SQL Server supports this syntax), you can do:
select food, sum(amount),
sum(amount) / sum(sum(amount)) over () as proportion_of_total
from t
group by food;
Note: Some databases do integer division, so you may need to convert to a floating point or fixed point type.
We can also try like below-
DECLARE #tbl AS TABLE
(
food VARCHAR(15)
,amount INT
)
INSERT INTO #tbl VALUES
('bread', 2)
,('banana', 5)
,('pizza', 4)
,('apple', 57)
,('mandarin', 9)
,('orange', 8)
SELECT
DISTINCT
food
,SUM(amount) OVER() TotalAmount
,SUM(amount) OVER (PARTITION BY food) PerFoodTotal
,CAST(SUM(amount) OVER (PARTITION BY food) * 100. / (SUM(amount) OVER()) AS DECIMAL(10,2)) [Percentage Of Total]
FROM #tbl
OUTPUT
food TotalAmount PerFoodTotal Percentage Of Total
--------------- ----------- ------------ ---------------------------------------
apple 85 57 67.06
banana 85 5 5.88
bread 85 2 2.35
mandarin 85 9 10.59
orange 85 8 9.41
pizza 85 4 4.71
(6 row(s) affected)
You can try something like this:
declare #tbl as table (
food varchar(15)
,amount int
)
insert into #tbl values
('bread', 2)
,('banana', 5)
,('pizza', 4)
,('apple', 57)
,('mandarin', 9)
,('orange', 8)
select SUM(amount) from #tbl
select
food
,SUM(amount) as [food amount]
,(SUM(cast(amount as numeric(18,2))) / (select sum(cast(amount as numeric(18,2))) from #tbl)) * 100 as [Percentage Of Total]
,(select sum(amount) from #tbl) as total
from #tbl
group by food
Here you got a way fo getting the PercentageOfTotal, asuming that the sum of all will not be 0
DECLARE #total INT = (SELECT SUM(AMOUNT) FROM Table1)
SELECT FOOD, CAST((CAST((100 * AMOUNT) AS DECIMAL (18,2)) / #total ) AS DECIMAL(18,2)) AS PercentageOfTotal from Table1
SQL Fiddle
MS SQL Server 2014 Schema Setup:
CREATE TABLE MusicGenres (name varchar(10)) ;
INSERT INTO MusicGenres (name)
VALUES ('Pop'),('Techno'),('Trance'),('trap'),('Hardcore'),('Electro') ;
CREATE TABLE Table2 (SongID int, MusicGenres varchar(10)) ;
INSERT INTO Table2 (SongID, MusicGenres)
VALUES (1,'Hardcore')
,(2,'Hardcore')
,(3,'Pop')
,(4,'Trap')
,(5,'Hardcore')
,(6,'Pop')
,(7,'Electro')
,(8,'Electro')
,(9,'Pop')
,(10,'Pop')
,(11,'Pop')
;
Query 1:
SELECT s1.name
, s1.recCount
, ( s1.recCount / CAST( ( SUM(recCount) OVER() ) AS decimal(5,2) ) )*100 AS pct
FROM (
SELECT m.name
, count(t.SongID) AS recCount
FROM MusicGenres m
LEFT OUTER JOIN Table2 t ON m.name = t.MusicGenres
GROUP BY m.name
) s1
Could be shortened to
SELECT m.name
, count(t.SongID) AS recCount
, ( count(t.SongID) / CAST( ( SUM(count(t.SongID)) OVER() ) AS decimal(5,2) )
)*100 AS pct
FROM MusicGenres m
LEFT OUTER JOIN Table2 t ON m.name = t.MusicGenres
GROUP BY m.name
Results:
| name | recCount | pct |
|----------|----------|---------|
| Electro | 2 | 18.1818 |
| Hardcore | 3 | 27.2727 |
| Pop | 5 | 45.4545 |
| Techno | 0 | 0 |
| Trance | 0 | 0 |
| trap | 1 | 9.0909 |

SQL query create cross column

I have this table
customer | product | quantity
-------------------------------
CLI01 | A | 10
CLI01 | B | 20
CLI02 | A | 31
CLI03 | A | 10
CLI03 | C | 12
and I want to create in SQL Server this output:
customer | crossProduct | quantity
-----------------------------------
CLI01 | A+B | 30
CLI02 | Only A | 31
CLI03 | B+C | 22
Thanks in advance
Niko
If you only care about two products, then this is simple aggregation:
select customer,
(case when count(distinct product) > 2 then 'Lots of Products'
when min(product) = max(product) then 'Only ' + min(product)
else min(product) + '+' + max(product)
end) as crossproduct,
sum(quantity)
from t
group by customer;
If you care about more than two products, then you'll need to do aggregation string concatenation. That is a bit painful in SQL Server. Start by Googling "sql server aggregate string concatenation".
This is s sample:
----- Test Data ----------
DECLARE #TestData TABLE (customer VARCHAR(10),product VARCHAR(10),quantity INT)
INSERT INTO #TestData
SELECT 'CLI01','A',10 UNION ALL
SELECT 'CLI01','B',20 UNION ALL
SELECT 'CLI02','A',31 UNION ALL
SELECT 'CLI03','A',10 UNION ALL
SELECT 'CLI03 ','C',12
----- Query -------------
SELECT customer,CASE WHEN COUNT( DISTINCT t.product)=1 THEN 'Only ' ELSE '' END + LEFT(c.product,LEN(c.product)-1) AS Product,SUM(quantity) AS quantity
FROM #TestData AS t
CROSS APPLY(SELECT a.product+'+' FROM #TestData AS a WHERE a.customer=t.customer FOR XML PATH('')) c(product)
GROUP BY customer,c.product
ORDER BY t.customer
customer Product quantity
CLI01 A+B 30
CLI02 Only A 31
CLI03 A+C 22
Have you tried using stuff? This will give you what you need. Works with as many products as necessary, from sql 2008 onwards.
CREATE TABLE x (customer VARCHAR (20), product CHAR(1), quantity INT )
INSERT INTO x
VALUES( 'CLI01', 'A', 10),
( 'CLI01', 'B', 20),
( 'CLI02', 'A', 31),
( 'CLI03', 'A', 10),
( 'CLI03', 'C', 12)
SELECT x1.customer, x3.Products, SUM(x1.quantity)
FROM x x1
CROSS APPLY ( SELECT Products = STUFF( (select '+' + product AS [text()]
FROM x x2
WHERE x2.customer = x1.customer
FOR XML PATH ('') ), 1, 1,'') ) x3
GROUP BY x1.customer, x3.Products

How to pivot rows to columns with known max number of columns

I have a table structured as such:
Pricing_Group
GroupID | QTY
TestGroup1 | 1
TestGroup1 | 2
TestGroup1 | 4
TestGroup1 | 8
TestGroup1 | 22
TestGroup2 | 2
TestGroup3 | 2
TestGroup3 | 5
What I'm looking for is a result like this:
Pricing_Group
GroupID | QTY1 | QTY2 | QTY3 | QTY4 | QTY5
TestGroup1 | 1 | 2 | 4 | 8 | 22
TestGroup2 | 2 | NULL | NULL | NULL | NULL
TestGroup3 | 2 | 5 | NULL | NULL | NULL
Note that there can only ever be a maximum of 5 different quantities for a given GroupID, there's just no knowing what those 5 quantities will be.
This seems like an application of PIVOT, but I can't quite wrap my head around the syntax that would be required for an application like this.
Thanks for taking the time to look into this!
Perfect case for pivot and you don't need a CTE:
Declare #T Table (GroupID varchar(10) not null,
QTY int)
Insert Into #T
Values ('TestGroup1', 1),
('TestGroup1', 2),
('TestGroup1', 4),
('TestGroup1', 8),
('TestGroup1', 22),
('TestGroup2', 2),
('TestGroup3', 2),
('TestGroup3', 5)
Select GroupID, [QTY1], [QTY2], [QTY3], [QTY4], [QTY5]
From (Select GroupID, QTY,
RowID = 'QTY' + Cast(ROW_NUMBER() Over (Partition By GroupID Order By QTY) as varchar)
from #T) As Pvt
Pivot (Min(QTY)
For RowID In ([QTY1], [QTY2], [QTY3], [QTY4], [QTY5])
) As Pvt2
You can pivot on a generated rank;
;with T as (
select
rank() over (partition by GroupID order by GroupID, QTY) as rank,
GroupID,
QTY
from
THE_TABLE
)
select
*
from
T
pivot (
max(QTY)
for rank IN ([1],[2],[3],[4],[5])
) pvt
>>
GroupID 1 2 3 4 5
----------------------------------------
TestGroup1 1 2 4 8 22
TestGroup2 2 NULL NULL NULL NULL
TestGroup3 2 5 NULL NULL NULL
You can also use case statement to perform the pivot:
declare #t table ( GroupID varchar(25), QTY int)
insert into #t
values ('TestGroup1', 1),
('TestGroup1', 2),
('TestGroup1', 4),
('TestGroup1', 8),
('TestGroup1', 22),
('TestGroup2', 2),
('TestGroup3', 2),
('TestGroup3', 5)
;with cte_Stage (r, GroupId, QTY)
as ( select row_number() over(partition by GroupId order by QTY ),
GroupId,
QTY
from #t
)
select GroupId,
[QTY1] = sum(case when r = 1 then QTY else null end),
[QTY2] = sum(case when r = 2 then QTY else null end),
[QTY3] = sum(case when r = 3 then QTY else null end),
[QTY4] = sum(case when r = 4 then QTY else null end),
[QTY5] = sum(case when r = 5 then QTY else null end),
[QTYX] = sum(case when r > 5 then QTY else null end)
from cte_Stage
group
by GroupId;