How to calculate average in SQL? - sql

lets say I have the following table:
**FOOD** | **AMOUNT**
Bread | 2
Banana | 5
Pizza | 4
Apple | 57
Mandarin| 9
Orange | 8
Final result:
Bread | Percentage Of Total
Banana | percentage of total
etc
etc
I tried it in every single way, but couldn't find a solution. I hope someone can help me.

Using ANSI SQL (and SQL Server supports this syntax), you can do:
select food, sum(amount),
sum(amount) / sum(sum(amount)) over () as proportion_of_total
from t
group by food;
Note: Some databases do integer division, so you may need to convert to a floating point or fixed point type.

We can also try like below-
DECLARE #tbl AS TABLE
(
food VARCHAR(15)
,amount INT
)
INSERT INTO #tbl VALUES
('bread', 2)
,('banana', 5)
,('pizza', 4)
,('apple', 57)
,('mandarin', 9)
,('orange', 8)
SELECT
DISTINCT
food
,SUM(amount) OVER() TotalAmount
,SUM(amount) OVER (PARTITION BY food) PerFoodTotal
,CAST(SUM(amount) OVER (PARTITION BY food) * 100. / (SUM(amount) OVER()) AS DECIMAL(10,2)) [Percentage Of Total]
FROM #tbl
OUTPUT
food TotalAmount PerFoodTotal Percentage Of Total
--------------- ----------- ------------ ---------------------------------------
apple 85 57 67.06
banana 85 5 5.88
bread 85 2 2.35
mandarin 85 9 10.59
orange 85 8 9.41
pizza 85 4 4.71
(6 row(s) affected)

You can try something like this:
declare #tbl as table (
food varchar(15)
,amount int
)
insert into #tbl values
('bread', 2)
,('banana', 5)
,('pizza', 4)
,('apple', 57)
,('mandarin', 9)
,('orange', 8)
select SUM(amount) from #tbl
select
food
,SUM(amount) as [food amount]
,(SUM(cast(amount as numeric(18,2))) / (select sum(cast(amount as numeric(18,2))) from #tbl)) * 100 as [Percentage Of Total]
,(select sum(amount) from #tbl) as total
from #tbl
group by food

Here you got a way fo getting the PercentageOfTotal, asuming that the sum of all will not be 0
DECLARE #total INT = (SELECT SUM(AMOUNT) FROM Table1)
SELECT FOOD, CAST((CAST((100 * AMOUNT) AS DECIMAL (18,2)) / #total ) AS DECIMAL(18,2)) AS PercentageOfTotal from Table1

SQL Fiddle
MS SQL Server 2014 Schema Setup:
CREATE TABLE MusicGenres (name varchar(10)) ;
INSERT INTO MusicGenres (name)
VALUES ('Pop'),('Techno'),('Trance'),('trap'),('Hardcore'),('Electro') ;
CREATE TABLE Table2 (SongID int, MusicGenres varchar(10)) ;
INSERT INTO Table2 (SongID, MusicGenres)
VALUES (1,'Hardcore')
,(2,'Hardcore')
,(3,'Pop')
,(4,'Trap')
,(5,'Hardcore')
,(6,'Pop')
,(7,'Electro')
,(8,'Electro')
,(9,'Pop')
,(10,'Pop')
,(11,'Pop')
;
Query 1:
SELECT s1.name
, s1.recCount
, ( s1.recCount / CAST( ( SUM(recCount) OVER() ) AS decimal(5,2) ) )*100 AS pct
FROM (
SELECT m.name
, count(t.SongID) AS recCount
FROM MusicGenres m
LEFT OUTER JOIN Table2 t ON m.name = t.MusicGenres
GROUP BY m.name
) s1
Could be shortened to
SELECT m.name
, count(t.SongID) AS recCount
, ( count(t.SongID) / CAST( ( SUM(count(t.SongID)) OVER() ) AS decimal(5,2) )
)*100 AS pct
FROM MusicGenres m
LEFT OUTER JOIN Table2 t ON m.name = t.MusicGenres
GROUP BY m.name
Results:
| name | recCount | pct |
|----------|----------|---------|
| Electro | 2 | 18.1818 |
| Hardcore | 3 | 27.2727 |
| Pop | 5 | 45.4545 |
| Techno | 0 | 0 |
| Trance | 0 | 0 |
| trap | 1 | 9.0909 |

Related

In T-SQL, What is the best way to find % of male customers by area

Support I have a table with area, customer and customer's sex info and I want to find out % of male customers in each area. Whats the best way to come up with that?
create table temp(area_id varchar(10),customer_id varchar(10),customer_sex varchar(10))
insert into temp select 1,1,'male'
insert into temp select 1,1,'male'
insert into temp select 1,1,'female'
insert into temp select 1,1,'female'
insert into temp select 2,1,'male'
insert into temp select 2,1,'female'
insert into temp select 2,1,'female'
insert into temp select 3,1,'male'
insert into temp select 3,1,'female'
insert into temp select 4,1,'male'
insert into temp select 5,1,'female'
select * from temp
The result should be like below:
; WITH x AS
(
select
area_id
, count(*) AS total_customers
, SUM(CASE WHEN customer_sex = 'male' THEN 1 ELSE 0 END) AS total_male_customers
FROM temp
GROUP BY area_id
)
SELECT
area_id
, total_customers
, total_male_customers
, CASE WHEN total_male_customers > 0 THEN CAST( (total_male_customers * 100.0) / total_customers AS DECIMAL(6,2)) ELSE 0 END AS Male_percentage
From x
Group by and case will provide your results:
SELECT area_id, count(customer_id) as Total_Customers, Total_Male_Customers = sum(case when customer_sex = 'male' then 1 else 0 end),
Format(sum(case when customer_sex = 'male' then 1 else 0 end)/(count(customer_id)*1.0),'P') as MaleCustomers
FROM dbo.temp
GROUP BY area_id
HAVING sum(case when customer_sex = 'male' then 1 else 0 end) > 0
Here if it is smaller dataset format is better else it has performance issues you can go with custom multiplication and concatenating % symbol.
Output as below:
+---------+-----------------+----------------------+---------------+
| area_id | Total_Customers | Total_Male_Customers | MaleCustomers |
+---------+-----------------+----------------------+---------------+
| 1 | 4 | 2 | 50.00 % |
| 2 | 3 | 1 | 33.33 % |
| 3 | 2 | 1 | 50.00 % |
| 4 | 1 | 1 | 100.00 % |
+---------+-----------------+----------------------+---------------+
Use IIF (Sqlserver 2012+) otherwise CASE, group by and sum of males /count all customers * 100
+ 0.0 to treat sum of males and all customer as float or decimal to get the correct result.
select area_id,count(customer_id) [Total Customers],
sum(iif(customer_sex='male',1,0)) [Total Males],
cast(cast(((sum(iif(customer_sex='male',1,0)) + 0.0) / (count(customer_sex) + 0.0)) * 100 as decimal(18,1)) as varchar(10)) + '%' [percentage of males]
from temp
group by area_id
This will do:
select x.area_id, x.total, x.m, cast(CONVERT(DECIMAL(10,2), x.m * 100.0 / x.total) as nvarchar(max)) + '%'
from
(
select t.area_id, count(1) total, sum(iif(t.customer_sex = 'male', 1, 0)) m
from #temp t
group by t.area_id
)x

SQL query create cross column

I have this table
customer | product | quantity
-------------------------------
CLI01 | A | 10
CLI01 | B | 20
CLI02 | A | 31
CLI03 | A | 10
CLI03 | C | 12
and I want to create in SQL Server this output:
customer | crossProduct | quantity
-----------------------------------
CLI01 | A+B | 30
CLI02 | Only A | 31
CLI03 | B+C | 22
Thanks in advance
Niko
If you only care about two products, then this is simple aggregation:
select customer,
(case when count(distinct product) > 2 then 'Lots of Products'
when min(product) = max(product) then 'Only ' + min(product)
else min(product) + '+' + max(product)
end) as crossproduct,
sum(quantity)
from t
group by customer;
If you care about more than two products, then you'll need to do aggregation string concatenation. That is a bit painful in SQL Server. Start by Googling "sql server aggregate string concatenation".
This is s sample:
----- Test Data ----------
DECLARE #TestData TABLE (customer VARCHAR(10),product VARCHAR(10),quantity INT)
INSERT INTO #TestData
SELECT 'CLI01','A',10 UNION ALL
SELECT 'CLI01','B',20 UNION ALL
SELECT 'CLI02','A',31 UNION ALL
SELECT 'CLI03','A',10 UNION ALL
SELECT 'CLI03 ','C',12
----- Query -------------
SELECT customer,CASE WHEN COUNT( DISTINCT t.product)=1 THEN 'Only ' ELSE '' END + LEFT(c.product,LEN(c.product)-1) AS Product,SUM(quantity) AS quantity
FROM #TestData AS t
CROSS APPLY(SELECT a.product+'+' FROM #TestData AS a WHERE a.customer=t.customer FOR XML PATH('')) c(product)
GROUP BY customer,c.product
ORDER BY t.customer
customer Product quantity
CLI01 A+B 30
CLI02 Only A 31
CLI03 A+C 22
Have you tried using stuff? This will give you what you need. Works with as many products as necessary, from sql 2008 onwards.
CREATE TABLE x (customer VARCHAR (20), product CHAR(1), quantity INT )
INSERT INTO x
VALUES( 'CLI01', 'A', 10),
( 'CLI01', 'B', 20),
( 'CLI02', 'A', 31),
( 'CLI03', 'A', 10),
( 'CLI03', 'C', 12)
SELECT x1.customer, x3.Products, SUM(x1.quantity)
FROM x x1
CROSS APPLY ( SELECT Products = STUFF( (select '+' + product AS [text()]
FROM x x2
WHERE x2.customer = x1.customer
FOR XML PATH ('') ), 1, 1,'') ) x3
GROUP BY x1.customer, x3.Products

GROUP BY with summing null values to every group

I want to group values and sum them up by a category value - or none.
For example, I have the following table:
+-------+----------+
| value | category |
+-------+----------+
| 99 | A |
| 42 | A |
| 76 | [NULL] |
| 66 | B |
| 10 | C |
| 13 | [NULL] |
| 27 | C |
+-------+----------+
My desired result should look like this:
+-------+----------+
| sum | category |
+-------+----------+
| 230 | A |
| 155 | B |
| 126 | C |
| 89 | [NULL] |
+-------+----------+
I tried a group by category but obviously this doesn't bring up the right numbers.
Any ideas?
I'm using SQL Server 2012.
EDIT:
Ok, as requested, I can explain my intents and give my query so far, although that is not very helpful I think.
I need to sum all value for the given categories AND add the sum of all values without a category [=> NULL]
So in my example, I would sum
99 + 42 + 76 + 13 = 230 for category A
66 + 76 + 13 = 155 for category B
10 + 27 + 76 + 13 = 126 for category C
76 + 13 = 89 for no category
I hope that gives you an idea of my goal.
Query so far:
SELECT SUM([value]), [category]
FROM [mytable]
GROUP BY [category]
First calculate the sum of nulls then add it to each group:
DECLARE #t TABLE
(
value INT ,
category CHAR(1)
)
INSERT INTO #t
VALUES ( 99, 'A' ),
( 42, 'A' ),
( 76, NULL ),
( 66, 'B' ),
( 10, 'C' ),
( 13, NULL ),
( 27, 'C' )
;with cte as(select sum(value) as s from #t where category is null)
select category, sum(value) + s
from #t
cross join cte
where category is not null
group by category, s
Another version:
;WITH cte AS(SELECT category, SUM(value) OVER(PARTITION BY category) +
SUM(CASE WHEN category IS NULL THEN value ELSE 0 END) OVER() AS value
FROM #t)
SELECT DISTINCT * FROM cte WHERE category IS NOT NULL
If you want to add the NULL values to all the groups, then do something like:
with cte as (
select category, sum(value) as sumv
from t
group by category
)
select cte.category,
(cte.sumv +
(case when category is not null then coalesce(ctenull.sumv) else 0 end)
) as sumWithNulls
from cte left join
cte ctenull
on ctenull.category is null -- or should that be `= '[NULL]'`?
This seems like a strange operation.
EDIT:
You can almost do this with window functions:
select category,
(sum(value) +
sum(case when category is null then sum(value) else 0 end) over ()
) as sumWithNulls
from t
group by category;
The problem is that NULLs get over counted for that category. So:
select category,
(sum(value) +
(case when category is not null
then sum(case when category is null then sum(value) else 0 end) over ()
else 0
end
) as sumWithNulls
from t
group by category;
You want to get the sum of the NULL category and add it to the value of the other (non-null) categories:
DECLARE #Table1 TABLE (Value int, Category varchar(1))
DECLARE #NullCategorySum int
INSERT INTO #Table1
(Value, Category)
VALUES
(99, 'A'),
(42, 'A'),
(76, NULL),
(66, 'B'),
(10, 'C'),
(13, NULL),
(27, 'C')
SELECT #NullCategorySum = SUM(Value)
FROM #Table1
WHERE Category IS NULL
SELECT SUM(t1.Value)
+ CASE
WHEN Category IS NOT NULL THEN #NullCategorySum
END
AS SumValue, Category
FROM #Table1 t1
GROUP BY Category
This outputs
SumValue Category
89 NULL
230 A
155 B
126 C
Maybe you just missed using the SUM built-in function? This should work:
SELECT
SUM(value) AS [sum], category
FROM
[YourTableHere]
GROUP BY category
Edit: Ah, I see what you are doing now. I was able to do it by joining a 2nd query with just the NULL sum, so the NULL sum comes back with every row. Then you can just add it in the final step.
SELECT
MainSet.sum + JustNulls.sum AS [sum], MainSet.category
FROM
(SELECT SUM(X.value) AS [sum], X.category FROM [YourTableHere] X
WHERE X.category IS NOT NULL GROUP BY category
UNION SELECT 0, NULL) MainSet
FULL JOIN
(SELECT SUM(Y.value) AS [sum], Y.category FROM [YourTableHere] Y
WHERE Y.category IS NULL GROUP BY category) JustNulls ON 1=1
similar to levelonehuman put might be a little faster
declare #countNull int = (select sum(textUniqueWordCount) from docSVsys where mimeType is null);
select mimeType, sum(isnull(textUniqueWordCount, 0)) + #countNull as [sum]
from docSVsys
where mimeType is not null
group by mimeType
union
select null, #countNull;

Which product sales is Increasing year wise?

Table name is SALES
**PROD_ID** **YEAR** **QUANTITY**
P1 2012 50
P1 2013 40
P1 2014 30
P2 2012 20
P2 2013 30
P2 2014 40
Output should be P2 but how..?
How about this?
select prod_id
from sales
group by prod_id
having (sum(case when year = 2014 then quantity else 0 end) >
sum(case when year = 2012 then quantity else 0 end)
);
A slightly complex way to accomplish this with ctes.
Fiddle with sample data
with diff as (
select prod_id ,
case when quantity - nvl(lag(quantity) over(partition by prod_id order by yr),0) > 0
then 1 else 0 end as df
from sales
)
,totdiff as (select prod_id, sum(df) totdf from diff group by prod_id)
, totals as (select prod_id, count(*) cnt from sales group by prod_id)
select d.prod_id
from totdiff d join totals t on t.prod_id = d.prod_id and d.totdf = t.cnt
Edit: as suggested by #shawnt00 in the comments..the query could be simplified to
with diff as (
select prod_id ,
case when quantity - nvl(lag(quantity) over(partition by prod_id order by yr),0) > 0
then 1 else 0 end as df
from sales
)
select prod_id
from diff
group by prod_id
having count(*) = sum(df)
This question can be approached with 2 steps
First, create a column to calculate difference of current year sales from previous year using lag function from windows, and then another column to calculate the distinct number of years for each PROD_ID
Second, Group the data using a group by clause on PROD_ID and filter the correct products only if all the distinct years had a positive sales compared to last year.
Data Table -
+---------+------+-------+
| PROD_ID | Year | Sales |
+---------+------+-------+
| P1 | 2012 | 50 |
| P1 | 2013 | 40 |
| P1 | 2014 | 30 |
| P2 | 2012 | 20 |
| P2 | 2013 | 30 |
| P2 | 2014 | 40 |
+---------+------+-------+
Query -
select PROD_ID
from
(
select
PROD_ID, sales,
sales - LAG(sales,1,0) over (partition by PROD_ID order by year asc) as diff,
count(year) over (partition by PROD_ID) as num_of_years
from sales
) inner_tbl
group by PROD_ID,num_of_years
having SUM(CASE WHEN diff > 0 THEN 1 ELSE 0 END) = num_of_years
Innner query output -
+---------+--------+------+--------------+
| PROD_ID | sales | diff | num_of_years |
+---------+--------+------+--------------+
| P1 | 50 | 50 | 3 |
| P1 | 40 | -10 | 3 |
| P1 | 30 | -10 | 3 |
| P2 | 20 | 20 | 3 |
| P2 | 30 | 10 | 3 |
| P2 | 40 | 10 | 3 |
+---------+--------+------+--------------+
Final output -
+---------+
| PROD_ID |
+---------+
| P2 |
+---------+
I know its a very old question, posting answer since i was able to solve it in a different way.
create table sales (prod_id varchar(10), yr int, quantity int);
insert into sales values ('P1',2012 , 50);
insert into sales values ('P1', 2013, 40);
insert into sales values ('P1', 2014, 30);
insert into sales values ('P2', 2012, 20);
insert into sales values ('P2', 2013, 30);
insert into sales values ('P2', 2014, 40);
with next_year_sales as
(
select s.prod_id, s.yr, nvl(s1.yr,0) as prev_yr, s.quantity, nvl(s1.quantity,0) as prev_qty from sales s
left outer join sales s1 on s.prod_id = s1.prod_id and s.yr = s1.yr+1
),
flag_high_sales as
(
select prod_id, yr, case when prev_yr=0 then 1 when quantity > prev_qty then 1 else 0 end as flag from next_year_sales A
)
select prod_id, min(flag) from flag_high_sales group by prod_id having min(flag)=1;
I can think of 3 ways of doing it :
select a.prod_id from
(
select
prod_id,
CASE WHEN quantity > coalesce(lag(quantity) over(partition by prod_id order by year asc),0) THEN 1 ELSE 0 END as val
FROM
sales
) a
group by a.prod_id
having sum(a.val) = count(prod_id)
;
select a.prod_id from
(
select
prod_id,
quantity - coalesce(lag(quantity) over(partition by prod_id order by year asc),0) as val
FROM
sales
) a
group by a.prod_id
having min(a.val) >=0
;
select a.prod_id from
(
select
prod_id,
year - dense_rank() over(partition by prod_id order by quantity asc) as cal
FROM
sales
) a
group by a.prod_id
having count(distinct cal)=1
;
The following solution uses CTE and OVER CLAUSE -
WITH Sales_CTE
AS (
SELECT n1.Prod_ID AS n1Prod_ID
,COUNT(n1.Year) OVER (PARTITION BY n1.Prod_ID) AS #CountYn1
,COUNT(n2.Year) OVER (PARTITION BY n1.Prod_ID) AS #CountYn2
FROM #Q2 n1
LEFT JOIN #Q2 n2 ON n1.Prod_ID = n2.Prod_ID
AND (n1.Year + 1) = n2.Year
AND n1.Quantity < n2.Quantity
)
SELECT DISTINCT n1Prod_ID AS [Product ID]
FROM Sales_CTE
WHERE #CountYn1 = (#CountYn2 + 1);

Select invoices based on quantity needed

I have a table that looks like this:
+---------------+---------------+------------------+--------------+
| InvoiceNumber | ProductNumber | ReceivedQuantity | ReceivedDate |
+---------------+---------------+------------------+--------------+
| INV001 | P001 | 500 | 09/01/2015 |
| INV002 | P001 | 600 | 09/02/2015 |
| INV003 | P001 | 700 | 09/03/2015 |
+---------------+---------------+------------------+--------------+
When a product is ordered. System needs to know which invoice it gets it from. First in first out.
For example I need 1000 quantity of product number P001. It should select the following invoices. It does not display the last invoice since 500 + 600 is already sufficient quantity
+---------------+---------------+------------------+--------------+
| InvoiceNumber | ProductNumber | ReceivedQuantity | ReceivedDate |
+---------------+---------------+------------------+--------------+
| INV001 | P001 | 500 | 09/01/2015 |
| INV002 | P001 | 600 | 09/02/2015 |
+---------------+---------------+------------------+--------------+
I can replicate this by making a cursor and looping through the table but looking for the best way to achieve this. Any nudge to the right direction would help a lot.
I think you can use a query like this:
;WITH t As (
SELECT *
, ROW_NUMBER() OVER (ORDER BY ReceivedDate, InvoiceNumber) As RowNo
FROM yourTable
), firstOverflow AS (
SELECT TOP(1)
t1.RowNo
FROM t t1
LEFT JOIN
t t2 ON t1.ProductNumber = t2.ProductNumber AND t1.ReceivedDate >= t2.ReceivedDate
GROUP BY t1.RowNo, t1.InvoiceNumber, t1.ProductNumber, t1.ReceivedQuantity, t1.ReceivedDate
HAVING SUM(t2.ReceivedQuantity) >= 1000
ORDER BY SUM(t2.ReceivedQuantity) - 1000)
SELECT *
FROM t
JOIN
firstOverflow ON t.RowNo <= firstOverflow.RowNo;
A better solution is this:
DECLARE #value int = 1000;
WITH t As (
SELECT *
, ROW_NUMBER() OVER (ORDER BY ReceivedDate, InvoiceNumber) As seq
FROM yourTable
), s As (
SELECT t.InvoiceNumber, t.ProductNumber, t.ReceivedQuantity, t.ReceivedDate, SUM(tt.ReceivedQuantity) As currentTotal
FROM t
LEFT JOIN
t tt ON t.ProductNumber = tt.ProductNumber AND t.seq >= tt.seq
GROUP BY t.InvoiceNumber, t.ProductNumber, t.ReceivedQuantity, t.ReceivedDate
), st As (
SELECT *
, ROW_NUMBER() OVER (ORDER BY (CASE WHEN s.currentTotal > #value THEN -currentTotal ELSE Null END) DESC) As seq
FROM s)
SELECT st.InvoiceNumber, st.ProductNumber, st.ReceivedQuantity, st.ReceivedDate
FROM st
WHERE currentTotal < #value
UNION ALL
SELECT st.InvoiceNumber, st.ProductNumber, st.ReceivedQuantity, st.ReceivedDate
FROM st
WHERE currentTotal >= #value AND st.seq = 1;
Try this query and give some feedback:
DECLARE #table TABLE (InvoiceNumber nvarchar(100),
ProductNumber nvarchar(100),
ReceivedQuantity int)
INSERT INTO #table VALUES ('inv001', 'p001', 500)
INSERT INTO #table VALUES ('inv002', 'p001', 600)
INSERT INTO #table VALUES ('inv003', 'p001', 600)
INSERT INTO #table VALUES ('inv004', 'p001', 600)
SQL 2012:
SELECT v.* FROM
(
SELECT t.*,
SUM(ReceivedQuantity) OVER (PARTITION BY ProductNumber ORDER BY InvoiceNumber) AS sum
FROM #table t
) v
WHERE sum <= 1000
SQL 2008:
SELECT v.* FROM
(
SELECT
a.InvoiceNumber
, a.ProductNumber
, SUM(b.ReceivedQuantity) AS sum
FROM
#table a
INNER JOIN #table b
ON a.InvoiceNumber >= b.InvoiceNumber AND a.ProductNumber = b.ProductNumber
GROUP BY
a.InvoiceNumber
, a.ProductNumber
) v
WHERE sum <= 1000