Age intervals as row statistics with SQL - sql

I'm trying to create this summary statistic table, counting the number of individuals in each category
Agegroup | All | Female | Male
------------------------------------------------
All | 560594 | 34324 | 234244
< 20 | 4324 | 545 | 3456
20 - 30 | 76766 | 3424 | 32428
30 - 40 | 36766 | 764 | 82427
40 - 50 | 46766 | 4324 | 72422
50 - 60 | 66766 | 3424 | 52424
> 60 | 76766 | 43424 | 12423
from this table
PersonID | Age | Sex
----------------------------
A | 43 | F
B | 22 | F
C | 65 | M
D | 33 | F
E | 28 | M
Is this even possible "in one go" with SQL? I experimented with this, but it's not really coming together..
SELECT SUM(CASE WHEN Age < 20 THEN 1 ELSE 0 END) AS [Under 20],
SUM(CASE WHEN Age BETWEEN 20 AND 30 THEN 1 ELSE 0 END) AS [20-30],
SUM(CASE WHEN Age BETWEEN 30 AND 40 THEN 1 ELSE 0 END) AS [30-40]
FROM Persons

I believe the below is the simplest way to achieve this, and also get the row back even if there are no people within that age range. Also since Sex only has 2 possible values you can use NULLIF instead of the case expression.
SELECT [Agegroup] = Name,
[All] = COUNT(Person.PersonID),
[Female] = COUNT(NULLIF(Person.Sex, 'M')),
[Male] = COUNT(NULLIF(Person.Sex, 'F'))
FROM (VALUES
(0, 1000, 'All'),
(0, 20, '< 20'),
(20, 30, '20 - 30'),
(30, 40, '30 - 40'),
(40, 50, '40 - 40'),
(50, 60, '50 - 40'),
(60, 1000, '> 60')
) AgeRange (MinValue, MaxValue, Name)
LEFT JOIN Person
ON Person.Age >= AgeRange.MinValue
AND Person.Age < AgeRange.Maxvalue
GROUP BY AgeRange.Name, AgeRange.MinValue, AgeRange.Maxvalue
ORDER BY AgeRange.MinValue, AgeRange.MaxValue DESC
Example on SQL Fiddle

Maybe something like this:
DECLARE #T TABLE(PersonID VARCHAR(5), Age INT,Sex VARCHAR(5))
INSERT INTO #T
VALUES
('A',43,'F'),
('B',22 ,'F'),
('C ',65,'M'),
('D',33,'F'),
('E',28,'M')
SQL
SELECT
'All' AS Agegroup,
COUNT(*) AS [All],
SUM(CASE WHEN tbl.Sex='F' THEN 1 ELSE 0 END) AS Female,
SUM(CASE WHEN tbl.Sex='M' THEN 1 ELSE 0 END) AS Male
FROM
#T AS tbl
UNION ALL
SELECT
tbl.Agegroup,
COUNT(*) AS [All],
SUM(CASE WHEN tbl.Sex='F' THEN 1 ELSE 0 END) AS Female,
SUM(CASE WHEN tbl.Sex='M' THEN 1 ELSE 0 END) AS Male
FROM
(
SELECT
(
CASE
WHEN Age BETWEEN 0 and 20
THEN '< 20'
WHEN Age BETWEEN 20 and 30
THEN '20 - 30'
WHEN Age BETWEEN 30 and 40
THEN '30 - 40'
WHEN Age BETWEEN 40 and 50
THEN '40 - 50'
WHEN Age BETWEEN 50 and 60
THEN '50 - 60'
WHEN Age> 60
THEN '> 60'
END
) AS Agegroup,
t.Age,
t.Sex
FROM
#T AS t
) AS tbl
GROUP BY
tbl.Agegroup

Your best pattern would be to create an age range table (or virtual table as in the example below) and join to it, then pivot the results to get your results into a columnar form.
select range as AgeGroup, m as Male, F as Female, m+f as [all]
from
(
select PersonID, range, sex
from
(
select 'all' as range, 0 as minval, 200 as maxval
union select '<20',0,19
union select '20-29',20,29
-- etc....
) ranges
left join
yourtable t
on t.age between minval and maxval
) src
pivot
(count(personid) for sex in ([m],[f])) p

try this:
;with Age_range as(
select '<20' as age union all
select '20 - 30' as age union all
select '30 - 40' as age union all
select '40 - 50' as age union all
select '50 - 60' as age union all
select '>60' as age
),
cte as(
select [Sex],
sum(case when [Age]<20 then 1 else 0 end) as '<20' ,
sum(case when [Age]between 20 and 29 then 1 else 0 end) as '20 - 30',
sum(case when [Age]between 30 and 39 then 1 else 0 end) as '30 - 40',
sum(case when [Age]between 40 and 49 then 1 else 0 end) as '40 - 50',
sum(case when [Age]between 50 and 59 then 1 else 0 end) as '50 - 60',
sum(case when [Age]>=60 then 1 else 0 end) as '>60'
from Persons
group by [Sex]),
cte1 as(select Sex,'<20' as cnt from cte where [<20]>0 union all
select Sex,'20 - 30' as cnt from cte where [20 - 30]>0 union all
select Sex,'30 - 40' as cnt from cte where [30 - 40]>0 union all
select Sex,'40 - 50' as cnt from cte where [40 - 50]>0 union all
select Sex,'50 - 60' as cnt from cte where [50 - 60]>0 union all
select Sex,'>60' as cnt from cte where [>60]>0)
select A.age,
COUNT(case when sex in ('M','F') then 1 end) as [All],
COUNT(case when sex='F' then 1 end) as Female,
COUNT(case when sex='M' then 1 end) as Male
from Age_range A left join cte1 C
on A.age=C.cnt
group by A.age
SQL Fiddle Demo

select 'All' as [Age Group]
. count(*) as [All],
, sum(case Sex when 'F' then 1 end) as Female
, sum(case Sex when 'M' then 1 end) as Male
from Persons
union all
select '< 20' as [Age Group]
. count(*) as [All],
, sum(case Sex when 'F' then 1 end) as Female
, sum(case Sex when 'M' then 1 end) as Male
from Persons
where Age < 20
union all
select '20 - 30' as [Age Group]
. count(*) as [All],
, sum(case Sex when 'F' then 1 end) as Female
, sum(case Sex when 'M' then 1 end) as Male
from Persons
where 20 <= Age and Age < 30
union all
...

Related

Count average with multiple conditions

I'm trying to create a query which allows to categorize the average percentage for specific data per month.
Here's how my dataset presents itself:
Date
Name
Group
Percent
2022-01-21
name1
gr1
5.2
2022-01-22
name1
gr1
6.1
2022-01-26
name1
gr1
4.9
2022-02-01
name1
gr1
3.2
2022-02-03
name1
gr1
8.1
2022-01-22
name2
gr1
36.1
2022-01-25
name2
gr1
32.1
2022-02-10
name2
gr1
35.8
...
...
...
...
And here's what I want to obtain with my query (based on what I showed of the table):
Month
<=25%
25<_<=50%
50<_<=75%
75<_<=100%
01
1
1
0
0
02
1
1
0
0
...
...
...
...
...
The result needs to:
Be ordered by month
Have the average use for each name counted and categorized
So far I know how to get the average of the Percent value per Name:
SELECT Name,
AVG(Percent)
from `table`
where Group = 'gr1'
group by Name
and how to count iterations of Percent in the categories created for the query:
SELECT EXTRACT(MONTH FROM Date) as Month,
COUNT(CASE WHEN Percent <= 25 AND Group = 'gr1' THEN Name END) `_25`,
COUNT(CASE WHEN Percent > 25 AND Percent <= 50 AND Group = 'gr1' THEN Name END) `_50`,
COUNT(CASE WHEN Percent > 50 AND Percent <= 75 AND Group = 'gr1' THEN Name END) `_75`,
COUNT(CASE WHEN Percent > 75 AND Percent <= 100 AND Group = 'gr1' THEN Name END) `_100`,
FROM `table`
GROUP BY Month
ORDER BY Month
but this counts all iterations of every name where I want the average of those values.
I've been struggling to figure out how to combine the two queries or to create a new one that answers my need.
I'm working with the BigQuery service from Google Cloud
This query produces the needed result, based on your example. So basically this combines your 2 queries using subquery, where the subquery is responsible to calculate AVG grouped by Name, Month and Group, and the outer query is for COUNT and "categorization"
SELECT
Month,
COUNT(CASE
WHEN avg <= 25 THEN Name
END) AS _25,
COUNT(CASE
WHEN avg > 25
AND avg <= 50 THEN Name
END) AS _50,
COUNT(CASE
WHEN avg > 50
AND avg <= 75 THEN Name
END) AS _75,
COUNT(CASE
WHEN avg > 75
AND avg <= 100 THEN Name
END) AS _100
FROM
(
SELECT
EXTRACT(MONTH from Date) AS Month,
Name,
AVG(Percent) AS avg
FROM
table1
GROUP BY Month, Name, Group
HAVING Group = 'gr1'
) AS namegr
GROUP BY Month
This is the result:
Month
_25
_50
_75
_100
1
1
1
0
0
2
1
1
0
0
See also Fiddle (BUT on MySql) - http://sqlfiddle.com/#!9/16c5882/9
You can use this query to Group By Month and each Name
SELECT CONCAT(EXTRACT(MONTH FROM Date), ', ', Name) AS DateAndName,
CASE
WHEN AVG(Percent) <= 25 THEN '1'
ELSE '0'
END AS '<=25%',
CASE
WHEN AVG(Percent) > 25 AND AVG(Percent) <= 50 THEN '1'
ELSE '0'
END AS '25<_<=50%',
CASE
WHEN AVG(Percent) > 50 AND AVG(Percent) <= 75 THEN '1'
ELSE '0'
END AS '50<_<=75%',
CASE
WHEN AVG(Percent) > 75 AND AVG(Percent) <= 100 THEN '1'
ELSE '0'
END AS '75<_<=100%'
from DataTable /*change to your table name*/
group by EXTRACT(MONTH FROM Date), Name
order by DateAndName
It gives the following result:
DateAndName
<=25%
25<_<=50%
50<_<=75%
75<_<=100%
1, name1
1
0
0
0
1, name2
0
1
0
0
2, name1
1
0
0
0
2, name2
0
1
0
0

How to group Ages with case

Someone knows how i can group ages witch the CASE statement?
I got the ages 14,15,16,17,18,19,20,21,22,23,24,25. I want groups like <18,19,20,>21
I started like:
Case age when BETWEEN 14 AND 18 THEN sum(age)
when >=21 THEN sum(age)
ELSE age END as age_groups
But i think aggregate functions don't work in CASE statements, at least it brings up an error.
CASE statement would work, you have a syntax error, your query would be:
SELECT Case when age BETWEEN 14 AND 18 THEN '14 - 18'
when age >=21 THEN '>=21'
ELSE age END as age_groups, SUM(age)
FROM YOUR_TABLE
GROUP BY Case when age BETWEEN 14 AND 18 THEN '14 - 18'
when age >=21 THEN '>=21'
ELSE age END
Use a CTE to calculate the age_groups, and then you can normally sum the ages for each group.
WITH age_groups AS (
SELECT Age,
Case when age BETWEEN 14 AND 18 THEN '14 - 18'
when age >=21 THEN '>=21'
ELSE age END as age_group
FROM YOUR_TABLE
)
SELECT age_group, sum(Age)
FROM age_groups
GROUP BY age_group
Another neat alternative is to place the CASE inside a CROSS APPLY (VALUES so you can then refer to it in other parts of the query without repetition:
SELECT
v.age_group,
SUM(Age)
FROM age_groups ag
CROSS APPLY (VALUES (
CASE WHEN ag.age BETWEEN 14 AND 18 THEN '14 - 18'
WHEN ag.age >= 21 THEN '>=21'
ELSE CAST(ag.age AS varchar(10)) END
) ) v(age_group)
GROUP BY v.age_group
You can use Case inside Sum():
select
Sum(Case when age < 18 then 1 end) '<18',
Sum(Case when age = 19 then 1 end) '19',
Sum(Case when age = 20 then 1 end) '20',
Sum(Case when age > 20 then 1 end) '>20'
from myTable;
DBFiddle demo is here
EDIT: If you meant to get the results vertically, still keep it simple:
select '<18' as ageGroup, Sum(Case when age < 18 then 1 end) total from myTable
union
select '19', Sum(Case when age = 19 then 1 end) from myTable
union
select '20', Sum(Case when age = 20 then 1 end) from myTable
union
select '20>', Sum(Case when age > 20 then 1 end) from myTable;

Group the number of clients by age in ranges ("under 30" - "30 - 39" ...) group by products they have bought using Oracle SQL

Edited !!!!
Sorry I've been explaining myself wrong. So I tried to get an output like this which groups the number of clients by age in ranges ("under 30" - "30 - 39" ...) by products.
range
product1
product2
...
productn
under 30
272(clients)
250
30 to 39
380
310
40 to 49
410
450
...
...
...
Total
...
...
...
...
I have 3 tables
| main | | products | |clients |
| -------- | |---------| |--------- |
| main_id | |prod_id | | client_id |
| client_id | |prod_cat | | birth_date|
| product_id| |... | |... |
| ... |
Products
| prod_id || prod_cat || ... |
| -------- || -------- ||---- |
| 1 || Apple || |
| 2 || Tv || |
|... || ... ||... |
and here is what I've tried
select p.prod_cat ProdCat,
count(case when age_average < 30 then 1 end) under 30,
count(case when age_average between 30 and 39 then 1 end) 30 to 39,
count(case when age_average between 40 and 49 then 1 end) 40 to 49,
count(case when age_average between 50 and 59 then 1 end) 50 to 59,
count(case when age_average > 60 then 1 end) over 60,
round((months_between(sysdate,c.birth_date)/12)) age_average
from main m
inner join clients c on m.client_id = c.client_id
inner join products p on m.prod_id = p.prod_id
group by p.prod_cat
order by age_average
I got the error
> ORA-00904: "age_average": invalid id
There's no age_average column in any of those tables, so you'll have to calculate it first, and then re-use it in the rest of the query. Or, use it directly (the calculation, I mean) in every CASE.
Also, you'll have to enclose labels into double quotes; names you used are invalid.
with temp as
(select p.prod_cat prodcat,
round(avg(months_between(sysdate,c.birth_date)/12),1) age_average
from main m
inner join clients c on m.client_id = c.client_id
inner join products p on m.prod_id = p.prod_id
group by p.prod_cat
)
select prodcat,
count(case when age_average < 30 then 1 end) "under 30",
count(case when age_average between 30 and 39 then 1 end) "30 to 39",
count(case when age_average between 40 and 49 then 1 end) "40 to 49",
count(case when age_average between 50 and 59 then 1 end) "50 to 59",
count(case when age_average > 60 then 1 end) "over 60",
max(age_average) age_average
from temp
group by rollup(prodcat);
You seem to want the counts in each age range for the products, with the products in columns and the rows being the age range.
You need to list each product (or product category) separately. The idea is:
select (case when months_between(sysdate, c.birth_date) < 30 * 12 then 'under 30'
when months_between(sysdate, c.birth_date) < 40 * 12 then '30 to 39'
when months_between(sysdate, c.birth_date) < 50 * 12 then '40 to 49'
when months_between(sysdate, c.birth_date) < 60 * 12 then '50 to 59'
else 'over 60'
end) as age_range,
sum(case when product_cat = 'product1' then 1 else 0 end) as product1,
sum(case when product_cat = 'product2' then 1 else 0 end) as product1
from main m join
clients c
on m.client_id = c.client_id join
products p
on m.prod_id = p.prod_id
group by (case when months_between(sysdate, c.birth_date) < 30 * 12 then 'under 30'
when months_between(sysdate, c.birth_date) < 40 * 12 then '30 to 39'
when months_between(sysdate, c.birth_date) < 50 * 12 then '40 to 49'
when months_between(sysdate, c.birth_date) < 60 * 12 then '50 to 59'
else 'over 60'
end)
order by max(c.birth_date) desc;
Here is my solution
select p.prod_cat
count(case when (months_between(sysdate,c.birth_date)/12) < 30 then 1 end) "under 30",
count(case when (months_between(sysdate,c.birth_date)/12) 30 and 39 then 1 end) "30 to 39",
count(case when (months_between(sysdate,c.birth_date)/12) 40 and 49 then 1 end) "40 to 49",
count(case when (months_between(sysdate,c.birth_date)/12) 50 and 59 then 1 end) "50 to 59",
count(case when (months_between(sysdate,c.birth_date)/12) > 60 then 1 end) "over 60"
from main m
inner join clients c on m.client_id = c.client_id
inner join products p on m.prod_id = p.prod_id
group by p.prod_cat

Creating Range Buckets of column

I am having one base table named test_table on which I am doing calculation of the age and it is coming as per expectation.
Here is the query I am using for age calculation.
select acol,
DATEDIFF(hour,CONVERT(DATEADD('SECOND', (epoch_time)/1000, DATE '1970-01-01'), DATE), CURRENT_TIMESTAMP()) AS age
from test_table;
But now I want to create different range of age column which will be displayed as a result by modifying the existing query.
Range would be :
1000-2000
2000-3000
3000-4000
ACOL AGE
MAG 1168
MAG 2168
MAG 3168
MAG 1100
MAG 2168
PNB 1672
MUM 1600
MUM 2696
MUM 3696
MUM 1696
Result after successful query has to look like below table.
ACOL 1000-2000 2000-3000 3000-4000
MAG 2 2 1
PNB 1 0 0
MUM 2 1 1
I know that we can do this using select case but not able to build the working query.
can anyone help me in this.
thanks in advance.
You can use conditional aggregation with a CASE expression:
SELECT t.ACOL,
SUM(CASE WHEN t.AGE >= 1000 AND t.AGE < 2000 THEN t.AGE ELSE 0 END) AS `1000-2000`,
SUM(CASE WHEN t.AGE < 3000 THEN t.AGE ELSE 0 END) AS `2000-3000`,
SUM(CASE WHEN t.AGE < 3000 AND t.AGE < 4000 THEN t.AGE ELSE 0 END) AS `3000-4000`
FROM
(
SELECT ACOL,
DATEDIFF(HOUR, CONVERT(DATEADD(SECOND, (epoch_time)/1000, DATE '1970-01-01'), DATE), CURRENT_TIMESTAMP()) AS AGE
FROM test_table
) t
GROUP BY t.ACOL
SELECT ACOL,
COUNT(CASE WHEN AGE >= 1000 AND AGE < 2000 THEN AGE ELSE NULL END) AS `1000-2000`,
COUNT(CASE WHEN AGE >= 2000 AGE < 3000 THEN AGE ELSE NULL END) AS `2000-3000`,
COUNT(CASE WHEN AGE >= 3000 AND AGE < 4000 THEN AGE ELSE NULL END) AS `3000-4000`
FROM (select ACOL,DATEDIFF(hour,CONVERT(DATEADD('SECOND', (epoch_time)/1000, DATE '1970-01-01'), DATE), CURRENT_TIMESTAMP()) AS AGE FROM test_table )
GROUP BY ACOL
Modified the from clause...
Working ...
:)

selecting male and female seats based on age

I have a passenger_information table where the columns are like booking_id, gender_id, passenger_name and passenger_age. I need to select the number of adults seats both male and female booked and also the child seats both male and female.
The gender_id table defines 24 as male and 25 as female. To differentiate between child and adults seats we use passenger_age where the age for the child should not exceed 11 years.
I am trying to write a query to get the details based on booking_id but failing miserably.
Can some one help me out with this?
Somnething like this:
select
booking_id
, sum(case when gender_id = 24 and passenger_age > 11 then 1 else 0 end) adult_male
, sum(case when gender_id = 25 and passenger_age > 11 then 1 else 0 end) adult_female
, sum(case when gender_id = 24 and passenger_age <= 11 then 1 else 0 end) child_male
, sum(case when gender_id = 24 and passenger_age <= 11 then 1 else 0 end) child_female
from passenger_information
group by booking_id
Try using Group by AND UNION.
Select
booking_id,
CASE gender_id WHEN 24 then 'Adult - Male'
ELSE 'Adult - Female' END as 'Gender',
count(booking_id)
FROM passenger_information
WHERE passenger_age>11
Group by booking_id,gender_id
UNION
Select
booking_id,
CASE gender_id WHEN 24 then 'Child - Male'
ELSE 'Child - Female' END as 'Gender',
count(booking_id)
FROM passenger_information
WHERE passenger_age<=11
Group by booking_id,gender_id