SQL to find course popularity from a survey - sql

I am trying to write an SQL query that displays the course popularity,
in descending order.
Course popularity is measured in points, which determined as follows: For every survey:
a. if the votes difference > 10% of total votes, the more popular course gets 1 point, and the less popular course gets 0 points
b. if the votes difference <= 10% of total votes, each course gets 0.5
point
course_id
course_name
faculty
1001
economics_101
business
1002
algebra_101
math
1003
geometry_101
math
1004
management_101
business
1005
marketing_101
business
1006
physics_101
science
survey_id
option_a
option_b
votes_a
votes_b
2001
economics_101
geometry_101
61
34
2002
algebra_101
economics_101
31
68
2003
marketing_101
management_101
11
72
2005
management_101
algebra_101
43
54
2004
geometry_101
marketing_101
48
46
Result achieved so far:
course
popularity
economics_101
4
management_101
2
algebra_101
2
marketing_101
1
geometry_101
1
[NULL]
0
I managed to join it so far, would be great to have inputs on optimizing this query:
WITH x AS
(
WITH b AS
(
WITH a as
(
select * from course c
LEFT JOIN survey s
on c.course_name = s.option_a
UNION ALL
select * from course c
LEFT JOIN survey s
on c.course_name = s.option_b
)
SELECT a.*,
SUM(votes_a+votes_b) as total_votes,
CASE WHEN (a.votes_a - a.votes_b) > (0.1*SUM(votes_a+votes_b)) THEN 1
WHEN (a.votes_b - a.votes_a) <= (0.1*SUM(votes_a+votes_b)) THEN 0.5
ELSE 0
END AS 'Popularity_a',
CASE WHEN (a.votes_b - a.votes_a) > (0.1*SUM(votes_a+votes_b)) THEN 1
WHEN (a.votes_a - a.votes_b) <= (0.1*SUM(votes_a+votes_b)) THEN 0.5
ELSE 0
END AS 'Popularity_b'
FROM
a
GROUP BY
a.course_name ,
a.course_id,
a.faculty ,
a.survey_id ,
a.option_a ,
a.option_b ,
a.votes_a ,
a.votes_b
)
SELECT b.option_a as course,
b.Popularity_a as pop
FROM b
LEFT JOIN
course cx
ON b.option_a = cx.course_name
UNION ALL
SELECT b.option_b as course ,
b.Popularity_b as pop
FROM b
LEFT JOIN
course cx
ON b.option_b = cx.course_name
)
select
x.course ,
sum (x.pop) as popularity
from x
GROUP BY
x.course
order by popularity desc

Use UNION ALL to extract all courses and the respective points they get from the table survey and aggregate to get the popularity.
Then join to course:
WITH
cte AS (
SELECT option_a course_name,
CASE
WHEN votes_a - votes_b > 0.1 * (votes_a + votes_b) THEN 1.0
WHEN votes_b - votes_a > 0.1 * (votes_a + votes_b) THEN 0.0
ELSE 0.5
END points
FROM survey
UNION ALL
SELECT option_b,
CASE
WHEN votes_b - votes_a > 0.1 * (votes_a + votes_b) THEN 1.0
WHEN votes_a - votes_b > 0.1 * (votes_a + votes_b) THEN 0.0
ELSE 0.5
END
FROM survey
),
points AS (
SELECT course_name, SUM(points) total_points
FROM cte
GROUP BY course_name
)
SELECT c.*, COALESCE(p.total_points, 0) popularity
FROM course c LEFT JOIN points p
ON p.course_name = c.course_name
ORDER BY popularity DESC;
See the demo.

Related

SUM GROUP Multiple Sub Querys

I have a Query that gives me the correct information, However the Data is not displaying Grouped and Summed, Possibly due to the joins and construction.
However if i change the order, then i end up with duplicated values because of the last Join Table having multiple lines. benefitdistribution, There is multiple values per PLU
This query provides me with the correct results, How would i go about Summing the output of this query without creating a view and then summing the view?
SELECT dbo.plumovement.deptno,
dbo.departmentdata.NAME AS DeptName,
dbo.groupdata.no AS GroupNo,
dbo.groupdata.NAME AS GroupName,
Sum(dbo.plumovement.qty) AS SalesQty,
Sum(dbo.plumovement.value) AS SalesValue,
Sum(dbo.plumovement.netvalue) AS NetValue,
Sum(dbo.plumovement.cost) AS Cost,
Sum(dbo.plumovement.vatvalue) AS VatValue,
Max(dbo.departmentdata.targetgp) AS TargetGP,
-- isNull(DistributionTotals.discountno, - 1) AS DiscountNo,
isNull(DistributionTotals.DiscountQty, 0) AS BenefitQty,
isNull(DistributionTotals.DiscountValue, 0) AS DiscountValue,
CASE
WHEN Sum(plumovement.cost) = 0 THEN 100
ELSE
(
( ( Sum(plumovement.netvalue) ) + ( Sum(plumovement.cost) ) ) / ( Sum(plumovement.netvalue) ) * 100 )
END AS [GP]
FROM dbo.plumovement
LEFT OUTER JOIN dbo.departmentdata
ON dbo.plumovement.deptno = dbo.departmentdata.no
LEFT OUTER JOIN dbo.groupdata
ON dbo.departmentdata.parentgroup = dbo.groupdata.no
LEFT OUTER JOIN (SELECT txnid,
pluno,
-- discountno,
Sum(qty) AS DiscountQty,
Sum(value) AS DiscountValue
FROM dbo.benefitdistribution
GROUP BY txnid,
pluno,
discountno) AS DistributionTotals
ON DistributionTotals.txnid = plumovement.txnid
AND DistributionTotals.pluno = plumovement.pluno
WHERE ( dbo.plumovement.movetype = 0 )
AND ( dbo.plumovement.termno > 0 )
AND ( dbo.plumovement.consolidatedtermno = 0 )
AND ( dbo.plumovement.processed = 1 )
AND ( plumovement.date BETWEEN '2022-10-01' AND '2022-11-01' )
GROUP BY dbo.groupdata.no,
dbo.groupdata.NAME,
dbo.plumovement.deptno,
dbo.departmentdata.NAME,
DistributionTotals.DiscountQty,
DistributionTotals.DiscountValue
Out putted results:
deptno
DeptName
GroupNo
GroupName
SalesQty
SalesValue
NetValue
Cost
VatValue
TargetGP
BenefitQty
DiscountValue
GP
1
Draught Beer + Cider
1
DRINK
-16
164.9
137.4167
-15.191
27.4833
40
0
0
88.94
1
Draught Beer + Cider
1
DRINK
-1
4.3373
3.6144
-0.8682
0.7229
40
1
0.1627
75.97
1
Draught Beer + Cider
1
DRINK
-6
25.65
21.375
-5.2092
4.275
40
2
0.45
75.62
1
Draught Beer + Cider
1
DRINK
-1
3.2442
2.7035
-0.8682
0.5407
40
2
1.2558
67.88
1
Draught Beer + Cider
1
DRINK
-4
7
5.8334
-5.1318
1.1666
40
2
3.5
12.02
1
Draught Beer + Cider
1
DRINK
-4
17.1
14.25
-3.4728
2.85
40
4
0.9
75.62
1
Draught Beer + Cider
1
DRINK
-4
7
5.8333
-5.1318
1.1667
40
4
7
12.02
9
Red Wine
1
DRINK
-1
-16.45
-13.7083
-1.4532
-2.7417
20
1
22
110.6
10
Rose Wine
1
DRINK
-1
3.55
2.9583
-0.7351
0.5917
45
0
0
75.15
10
Rose Wine
1
DRINK
-1
4.5058
3.7548
-1.4699
0.751
45
2
1.7442
60.85
To confirm there should be 1 line for each unique DeptNo for example
deptno
DeptName
GroupNo
GroupName
SalesQty
SalesValue
NetValue
Cost
VatValue
TargetGP
BenefitQty
DiscountValue
GP
1
Draught Beer + Cider
1
DRINK
-36
229.2315
191.0263
-35.873
38.2052
80
15
13.2685
79.00
The TargetGP does not need summing as this is just one value of the the "departmentdata.targetgp" as shown in the example.
The GP However needs to be calculated from the outputted summed results.
As it should be around 75-80 of the top of my head
Use a CTE:
with cte as (
SELECT dbo.plumovement.deptno,
dbo.departmentdata.NAME AS DeptName,
dbo.groupdata.no AS GroupNo,
dbo.groupdata.NAME AS GroupName,
Sum(dbo.plumovement.qty) AS SalesQty,
Sum(dbo.plumovement.value) AS SalesValue,
Sum(dbo.plumovement.netvalue) AS NetValue,
Sum(dbo.plumovement.cost) AS Cost,
Sum(dbo.plumovement.vatvalue) AS VatValue,
Max(dbo.departmentdata.targetgp) AS TargetGP,
-- isNull(DistributionTotals.discountno, - 1) AS DiscountNo,
isNull(DistributionTotals.DiscountQty, 0) AS BenefitQty,
isNull(DistributionTotals.DiscountValue, 0) AS DiscountValue,
CASE
WHEN Sum(plumovement.cost) = 0 THEN 100
ELSE
(
( ( Sum(plumovement.netvalue) ) + ( Sum(plumovement.cost) ) ) / ( Sum(plumovement.netvalue) ) * 100 )
END AS [GP]
FROM dbo.plumovement
LEFT OUTER JOIN dbo.departmentdata
ON dbo.plumovement.deptno = dbo.departmentdata.no
LEFT OUTER JOIN dbo.groupdata
ON dbo.departmentdata.parentgroup = dbo.groupdata.no
LEFT OUTER JOIN (SELECT txnid,
pluno,
-- discountno,
Sum(qty) AS DiscountQty,
Sum(value) AS DiscountValue
FROM dbo.benefitdistribution
GROUP BY txnid,
pluno,
discountno) AS DistributionTotals
ON DistributionTotals.txnid = plumovement.txnid
AND DistributionTotals.pluno = plumovement.pluno
WHERE ( dbo.plumovement.movetype = 0 )
AND ( dbo.plumovement.termno > 0 )
AND ( dbo.plumovement.consolidatedtermno = 0 )
AND ( dbo.plumovement.processed = 1 )
AND ( plumovement.date BETWEEN '2022-10-01' AND '2022-11-01' )
GROUP BY dbo.groupdata.no,
dbo.groupdata.NAME,
dbo.plumovement.deptno,
dbo.departmentdata.NAME,
DistributionTotals.DiscountQty,
DistributionTotals.DiscountValue
)
select
deptno,
DeptName,
GroupNo,
GroupName,
sum(SalesQty) as SalesQty,
sum(SalesValue) as SalesValue,
sum(NetValue) as NetValue,
sum(Cost) as Cost,
sum(VatValue) as VatValue,
sum(TargetGP) as TargetGP,
sum(DiscountNo) as DiscountNo,
sum(BenefitQty) as BenefitQty,
sum(DiscountValue) as DiscountValue,
sum(GP) as GP
from cte
group by deptno, DeptName, GroupNo, GroupName

How to sum and count in grouping in sql

I have tables like below.
I would like to grouping and counting by referreing to its score.
customer score
A 10
A 20
B 30
B 40
C 50
C 60
First, I would like to sum score by customer
It achived by group by method.
customer score
A 30
B 70
C 110
second, I would like to count by binning following band.
I couldn't figure out how to count after grouping
band count
0-50 1
50-100 1
100- 0
Are there any way to achieve this?
Thanks
You could use a union approach:
WITH cte AS (
SELECT COUNT(*) AS score
FROM yourTable
GROUP BY customer
)
SELECT '0-50' AS band, COUNT(*) AS count, 0 AS position FROM cte WHERE score <= 50 UNION ALL
SELECT '50-100', COUNT(*), 1 FROM cte WHERE score > 50 AND score <= 100 UNION ALL
SELECT '100-', COUNT(*), 2 FROM cte WHERE score > 100
ORDER BY position;
Try the following with case expression
select
case
when score >= 0 and score <= 50 then '0-50'
when score >= 50 and score <= 100 then '50-100'
when score >= 100 then '100-'
end as band,
count(*) as count
from
(
select
customer,
sum(score) as score
from myTable
group by
customer
) val
group by
case
when score >= 0 and score <= 50 then '0-50'
when score >= 50 and score <= 100 then '50-100'
when score >= 100 then '100-'
end
I would recommend two levels of aggregation but with a left join:
select b.band, count(c.customer)
from (select 0 as lo, 50 as hi, '0-50' as band from dual union all
select 50 as lo, 100 as hi, '50-100' as band from dual union all
select 100 as lo, null as hi, '100+' as band from dual
) b left join
(select customer, sum(score) as score
from myTable
group by customer
) c
on c.score >= b.lo and
(c.score < b.hi or b.hi is null)
group by b.band;
This structure also suggests that the bands can be stored in a separate reference table. That can be quite handy, if you intend to reuse these across different queries or over time.

Calculation of points if there are 2 student with same rank in sql server

Rank_Table
ID Rank
1 1
2 1
3 3
4 3
5 5
Price
No Points
1 10
2 9
3 8
4 7
5 6
Expected Output
ID Rank Points
1 1 9.5
2 1 9.5
3 3 7.5
4 3 7.5
5 5 6
2nd rank is not present so 1st and 2nd points are sum together and distributed among the number of the student
for eg : (10+9) / 2 = 9.5
When I join the 2 table like
select *
from Rank_table a join
Price b
on a.ID = b.No
I am getting the output as
ID Rank No Points
1 1 1 10
2 1 2 9
3 3 3 8
4 3 4 7
5 5 5 6
This seems to be quite a simple requirement, simply using AVG and an OVER clause.
CREATE TABLE [Rank] (ID int, [Rank] int)
CREATE TABLE Price ([No] int, Points int);
GO
INSERT INTO [Rank]
VALUES
(1,1),
(2,1),
(3,3),
(4,3),
(5,5);
INSERT INTO Price
VALUES
(1,10),
(2,9),
(3,8),
(4,7),
(5,6);
GO
SELECT R.ID,
R.[Rank],
AVG(CONVERT(decimal(2,0),P.Points)) OVER (PARTITION BY R.[Rank]) AS Points
FROM [Rank] R
JOIN Price P ON R.ID = P.[No];
GO
DROP TABLE [Rank];
DROP TABLE Price;
You need simple JOIN :
select rn.*,
avg(convert(decimal(10,0), p.Points)) over (partition by rn.rnk) as points
from Rank_Table rn inner join
Price p
on p.id = rn.No;
SELECT *,
AA.pts AS POINTS
FROM rank_table R
INNER JOIN (SELECT rank,
Sum(points) / Count(*) AS PTS
FROM rank_table a
JOIN price b
ON a.id = b.no
GROUP BY rank)AA
ON ( R.rank = AA.rank )
You can calculate AVG at rank level and then join back to Rank_Table like in this working demo
select R.*,T.points from Rank_table R
JOIN
(
select rank, points=avg(cast(points as decimal(10,2)))
from Rank_table a join
Price b
on a.ID = b.No
group by rank
)T
on T.rank=R.rank
Hmmm. You seem to want a non-equijoin based on the "next" rank as well as the rank in each row. Unfortunately, SQL Server 2008 doesn't support lead(), but you can use apply:
select rt.id, rt.rank, avg(p.price * 1.0) as points
from rank_table rt outer apply
(select min(rt2.rank) as next_rank
from rank_table rt2
where rt2.rank > rt.rank
) rt2 left join
price p
on p.no >= rt.rank >= p.no and
(p.no < rt2.next_rank or rt2.next_rank is null)
group by rt.id, rt.rank;

Pivot SQL with Rank

Basically i have the following query and i am trying to distinguish only the unique ranks from this:
WITH numbered_rows
as (
SELECT Claim,
reserve,
time,
RANK() OVER (PARTITION BY ClaimNumber ORDER BY time asc) as 'Rank'
FROM (
SELECT cc.Claim,
MAX(csd.time) as time,
csd.reserve
FROM ClaimData csd WITH (NOLOCK)
JOIN Core cc WITH (NOLOCK)
on cc.ClaimID = csd.ClaimID
GROUP BY cc.Claim, csd.Reserve
) as t
)
select *
from numbered_rows cur, numbered_rows prev
where cur.Claim= prev.Claim
and cur.Rank = prev.Rank -1
The results set I get is the following:
Claim reserve Time Rank Claim reserve Time Rank
--------------------------------------------------------------------
11 0 12/10/2012 1 11 15000 5/30/2013 2
34 2000 1/21/2013 1 34 750 1/31/2013 2
34 750 1/31/2013 2 34 0 3/31/2013 3
07 800000 5/9/2013 1 07 0 5/10/2013 2
But what I only want to see the following: (have the Claim 34 Rank 2 removed because its not the highest
Claim reserve Time Rank Claim reserve Time Rank
--------------------------------------------------------------------
11 0 12/10/2012 1 11 15000 5/30/2013 2
34 750 1/31/2013 2 34 0 3/31/2013 3
07 800000 5/9/2013 1 07 0 5/10/2013 2
I think you can do this by just reversing your logic, i.e. order by time DESC, switching cur and prev in your final select and changing -1 to +1 in your final select, then just limiting prev.rank to 1, therefore ensuring that the you only include the latest 2 results for each claim:
WITH numbered_rows AS
( SELECT Claim,
reserve,
time,
[Rank] = RANK() OVER (PARTITION BY ClaimNumber ORDER BY time DESC)
FROM ( SELECT cc.Claim,
[Time] = MAX(csd.time),
csd.reserve
FROM ClaimData AS csd WITH (NOLOCK)
INNER JOIN JOIN Core AS cc WITH (NOLOCK)
ON cc.ClaimID = csd.ClaimID
GROUP BY cc.Claim, csd.Reserve
) t
)
SELECT *
FROM numbered_rows AS prev
INNER JOIN numbered_rows AS cur
ON cur.Claim= prev.Claim
AND cur.Rank = prev.Rank + 1
WHERE prev.Rank = 1;

How to combine Date periods in SQL and create a composite timeline

In a oracle table for each family (unique id) can have several people (unique id) in different relationship for a date range. I would like to get a timeline created to obtain the FamilyType based on combinations of relationship for the time periods. An example is given below for a particular Family.
P1|-----Head---------------------------------------|
P2|--Partner--------------|
P3|---Child----------------------|
P4|---Child------------|
|=Single=|=Couple=|=Family=======|=SingleParent==|
Table has columns
FamilyId, PersonId, Relationship, StartDate, EndDate
Each | is a date (no time portion). The data guarantees that on a given date
* there will always be one person who is Head.
* There can be 0 or 1 Partner.
* There can be 0 or n child.
The rules are
* If there is only a Head the FamilyType is Single
* If there is a Head and a Partner the FamilyType is Couple
* If there is a Head , a Partner and 1 or more Children the FamilyType is Family
* If there is a Head and 1 or more Children the FamilyType is SingleParent
People can join or leave from a family on any date.
And people can change relationships. So following scenarios are possible
P1|----------Head--------------------|
P2|----partner------------|---Head--------|
P3|---Child----------------------|
P4|--Child-----------------|
|=Single=|=Couple=|=Family=======|=SingleParent==|
P1|----------Head--------------------|
P2|----partner------------|---Head--------|
P3|---Child----------------------|
P4|--Child-----------------|
p5|---Partner-----|
|=Single=|=Couple=|=Family=======================|
How can this be done using SQL in Oracle 11GR2 (working using SQL only and not using procedural code). I am trying to evaluate whether this is best done in SQL or C#. As a curiosity answer specific for SQL Server 2012 is also good to have.
The result should be rows with StartDate, EndDate and FamilyType.
you could do something like this:
with family_ranges(familyid, min_start, max_end, curr_date)
as (select familyid,
min(startdate),
max(enddate),
to_number(to_char(min(startdate), 'j'))
from family
group by familyid
union all
select familyid, min_start, max_end, curr_date+1
from family_ranges
where curr_date < to_number(to_char(max_end,'j')))
select familyid, min(curr_date) fromdate, max(curr_date) todate, state
from (select familyid, to_date(curr_date,'j') curr_date,
case when head = 'Y' and partner = 'Y' and child = 'Y' then 'Family'
when head = 'Y' and partner = 'Y' then 'Couple'
when head = 'Y' and child = 'Y' then 'SingleParent'
when head = 'Y' then 'Single'
end state
from (select f.familyid, d.curr_date, f.relationship
from family_ranges d
inner join family f
on f.familyid = d.familyid
and to_date(d.curr_date,'j') between f.startdate and f.enddate)
pivot (
max('Y')
for relationship in ('Head' as head, 'Partner' as partner, 'Child' as child)
))
group by familyid, state
order by familyid, fromdate;
forgive the nonsense with the date->julian. it's to work round a bug with 11.2.0.1-3 where date arithmetic fails with factored subqueries.
the fatored subquery part gets us a list of dates that the family spans. From that we join it back to family to work out who was in the family on that day.
select f.familyid, d.curr_date, f.relationship
from family_ranges d
inner join family f
on f.familyid = d.familyid
and to_date(d.curr_date,'j') between f.startdate and f.enddate;
now we pivot this to get a simple Y/N list
SQL> with family_ranges(familyid, min_start, max_end, curr_date)
2 as (select familyid,
3 min(startdate),
4 max(enddate),
5 to_number(to_char(min(startdate), 'j'))
6 from family
7 group by familyid
8 union all
9 select familyid, min_start, max_end, curr_date+1
10 from family_ranges
11 where curr_date < to_number(to_char(max_end,'j')))
12 select familyid, to_date(curr_date,'j') curr_date, head, partner, child
13 from (select f.familyid, d.curr_date, f.relationship
14 from family_ranges d
15 inner join family f
16 on f.familyid = d.familyid
17 and to_date(d.curr_date,'j') between f.startdate and f.enddate)
18 pivot (
19 max('Y')
20 for relationship in ('Head' as head, 'Partner' as partner, 'Child' as child)
21 );
FAMILYID CURR_DATE H P C
---------- --------- - - -
1 09-NOV-12 Y
1 11-NOV-12 Y
1 13-NOV-12 Y
1 23-NOV-12 Y
2 23-NOV-12 Y
2 28-NOV-12 Y Y
2 29-NOV-12 Y Y
1 30-NOV-12 Y Y
1 01-DEC-12 Y Y
1 03-DEC-12 Y Y
2 18-DEC-12 Y Y Y
2 20-DEC-12 Y Y Y
then its a simple case to get your required string from the rules and a group by to get the date ranges.
SQL> with family_ranges(familyid, min_start, max_end, curr_date)
2 as (select familyid,
3 min(startdate),
4 max(enddate),
5 to_number(to_char(min(startdate), 'j'))
6 from family
7 group by familyid
8 union all
9 select familyid, min_start, max_end, curr_date+1
10 from family_ranges
11 where curr_date < to_number(to_char(max_end,'j')))
12 select familyid, min(curr_date) fromdate, max(curr_date) todate, state
13 from (select familyid, to_date(curr_date,'j') curr_date,
14 case when head = 'Y' and partner = 'Y' and child = 'Y' then 'Family'
15 when head = 'Y' and partner = 'Y' then 'Couple'
16 when head = 'Y' and child = 'Y' then 'SingleParent'
17 when head = 'Y' then 'Single'
18 end state
19 from (select f.familyid, d.curr_date, f.relationship
20 from family_ranges d
21 inner join family f
22 on f.familyid = d.familyid
23 and to_date(d.curr_date,'j') between f.startdate and f.enddate)
24 pivot (
25 max('Y')
26 for relationship in ('Head' as head, 'Partner' as partner, 'Child' as child)
27 ))
28 group by familyid, state
29 order by familyid, fromdate;
FAMILYID FROMDATE TODATE STATE
---------- --------- --------- ------------
1 05-NOV-12 24-NOV-12 Single
1 25-NOV-12 14-DEC-12 Couple
1 15-DEC-12 24-JAN-13 Family
1 25-JAN-13 13-FEB-13 SingleParent
2 05-NOV-12 24-NOV-12 Single
2 25-NOV-12 14-DEC-12 Couple
2 15-DEC-12 13-FEB-13 Family
fiddle: http://sqlfiddle.com/#!4/484b5/1