Running Distinct Count with a Partition

Running Distinct Count with a Partition - sql

I'd like a running distinct count with a partition by year for the following data:
DROP TABLE IF EXISTS #FACT;
CREATE TABLE #FACT("Year" INT,"Month" INT, "Acc" varchar(5));
INSERT INTO #FACT
values
(2015, 1, 'A'),
(2015, 1, 'B'),
(2015, 1, 'B'),
(2015, 1, 'C'),
(2015, 2, 'D'),
(2015, 2, 'E'),
(2015, 3, 'E'),
(2016, 1, 'A'),
(2016, 1, 'A'),
(2016, 2, 'B'),
(2016, 2, 'C');
SELECT * FROM #FACT;
The following returns the correct answer but is there a more concise way that is also performant?
WITH
dnsRnk AS
(
SELECT
"Year"
, "Month"
, DenseR = DENSE_RANK() OVER(PARTITION BY "Year", "Month" ORDER BY "Acc")
FROM #FACT
),
mxPerMth AS
(
SELECT
"Year"
, "Month"
, RunningTotal = MAX(DenseR)
FROM dnsRnk
GROUP BY
"Year"
, "Month"
)
SELECT
"Year"
, "Month"
, X = SUM(RunningTotal) OVER (PARTITION BY "Year" ORDER BY "Month")
FROM mxPerMth
ORDER BY
"Year"
, "Month";
The above returns the following - the answer should also return exactly the same table:

If you want a running count of distinct accounts:
SELECT f.*,
sum(case when seqnum = 1 then 1 else 0 end) over (partition by year order by month) as cume_distinct_acc
FROM (
SELECT
f.*
,row_number() over (partition by account order by year, month) as seqnum
FROM #fact f
) f;
This counts each account during the first month when it appears.
EDIT:
Oops. The above doesn't aggregate by year and month and then start over for each year. Here is the correct solution:
SELECT
year
,month
,sum( sum(case when seqnum = 1 then 1 else 0 end)
) over (partition by year order by month) as cume_distinct_acc
FROM (
SELECT
f.*
,row_number() over (partition by account, year order by month) as seqnum
FROM #fact f
) f
group by year, month
order by year, month;
And, SQL Fiddle isn't working but the following is an example:
with FACT as (
SELECT yyyy, mm, account
FROM (values
(2015, 1, 'A'),
(2015, 1, 'B'),
(2015, 1, 'B'),
(2015, 1, 'C'),
(2015, 2, 'D'),
(2015, 2, 'E'),
(2015, 3, 'E'),
(2016, 1, 'A'),
(2016, 1, 'A'),
(2016, 2, 'B'),
(2016, 2, 'C')) v(yyyy, mm, account)
)
SELECT
yyyy
,mm
,sum(sum(case when seqnum = 1 then 1 else 0 end)) over (partition by yyyy order by mm) as cume_distinct_acc
FROM (
SELECT
f.*
,row_number() over (partition by account, yyyy order by mm) as seqnum
FROM fact f
) f
group by yyyy, mm
order by yyyy, mm;

Demo Here:
;with cte as (
SELECT yearr, monthh, count(distinct acc) as cnt
FROM #fact
GROUP BY yearr, monthh
)
SELECT
yearr
,monthh
,sum(cnt) over (Partition by yearr order by yearr, monthh rows unbounded preceding ) as x
FROM cte

Related

sql that finds records within 3 days of a condition being met

I am trying to find all records that exist within a date range prior to an event occurring. In my table below, I want to pull all records that are 3 days or less from when the switch field changes from 0 to 1, ordered by date, partitioned by product. My solution does not work, it includes the first record when it should skip as it's outside the 3 day window. I am scanning a table with millions of records, is there a way to reduce the complexity/cost while maintaining my desired results?
http://sqlfiddle.com/#!18/eebe7
CREATE TABLE productlist
([product] varchar(13), [switch] int, [switchday] date)
;
INSERT INTO productlist
([product], [switch], [switchday])
VALUES
('a', 0, '2019-12-28'),
('a', 0, '2020-01-02'),
('a', 1, '2020-01-03'),
('a', 0, '2020-01-06'),
('a', 0, '2020-01-07'),
('a', 1, '2020-01-09'),
('a', 1, '2020-01-10'),
('a', 1, '2020-01-11'),
('b', 1, '2020-01-01'),
('b', 0, '2020-01-02'),
('b', 0, '2020-01-03'),
('b', 1, '2020-01-04')
;
my solution:
with switches as (
SELECT
*,
case when lead(switch) over (partition by product order by switchday)=1
and switch=0 then 'first day switch'
else null end as leadswitch
from productlist
),
switchdays as (
select * from switches
where leadswitch='first day switch'
)
select pl.*
,'lead'
from productlist pl
left join switchdays ss
on pl.product=ss.product
and pl.switchday = ss.switchday
and datediff(day, pl.switchday, ss.switchday)<=3
where pl.switch=0
desired output, capturing records that occur within 3 days of a switch going from 0 to 1, for each product, ordered by date:
product switch switchday
a 0 2020-01-02 lead
a 0 2020-01-06 lead
a 0 2020-01-07 lead
b 0 2020-01-02 lead
b 0 2020-01-03 lead

If I understand correctly, you can just use lead() twice:
select pl.*
from (select pl.*,
lead(switch) over (partition by product order by switchday) as next_switch_1,
lead(switch, 2) over (partition by product order by switchday) as next_switch_2
from productlist pl
) pl
where switch = 0 and
1 in (next_switch_1, next_switch_2);
Here is a db<>fiddle.
EDIT (based on comment):
select pl.*
from (select pl.*,
min(case when switch = 1 then switchdate end) over (partition by product order by switchdate desc) as next_switch_1_day
from productlist pl
) pl
where switch = 0 and
next_switch_one_day <= dateadd(day, 2, switchdate);

Making a pivot table group by users

I want to see user statics, so I made query:
SELECT l.partner AS Partner ,
bu.meno||' '||decode(substr(bu.priezvisko, 1, 2), 'Sz',
substr(bu.priezvisko, 1, 2), 'Gy',
substr(bu.priezvisko, 1, 2), 'Ny',
substr(bu.priezvisko, 1, 2), 'Zs',
substr(bu.priezvisko, 1, 2), 'Cs',
substr(bu.priezvisko, 1, 2),
substr(bu.priezvisko, 1, 1))
||'.' AS prod_man --hungarian names have 2letter (surname)
, SUM(CASE
WHEN o.pocet!=0 THEN 1
ELSE 0
END) AS obj_pocet -- counting items
, SUM(CASE
WHEN o.pocet=0 OR o.p_del+o.p_del_dod>=o.pocet THEN 1
ELSE 0
END) AS nedod_pocet -- counting items2
, ROUND(SUM(CASE
WHEN o.pocet=0 OR o.p_del+o.p_del_dod>=o.pocet THEN 1
ELSE 0
END)/count(*), 3) * 100 AS "%" --percentage
FROM obj_odb_o o
JOIN obj_odb_l l ON o.rid_o=l.rid
JOIN sklad_karta sk ON sk.id=o.kod_id
JOIN bartex_users bu ON bu.id=sk.id.prod_man
WHERE l.partner in (325,
326)
GROUP BY l.partner
, bu.meno||' '||decode(substr(bu.priezvisko, 1, 2), 'Sz',
substr(bu.priezvisko, 1, 2), 'Gy',
substr(bu.priezvisko, 1, 2), 'Ny',
substr(bu.priezvisko, 1, 2), 'Zs',
substr(bu.priezvisko, 1, 2), 'Cs',
substr(bu.priezvisko, 1, 2),
substr(bu.priezvisko, 1, 1))
||'.'
It's working. Here is the result:
But I want to make a pivot by Months (last 6 months)...
WITH MONTHS AS
(
SELECT ADD_MONTHS(TRUNC(SYSDATE,'MONTH'),-LEVEL+1) AS MONTH,
DECODE(LEVEL,1,'Akt_mesiac','minuly_mesiac'||(LEVEL-1)) AS MONTH_NAME FROM DUAL CONNECT BY LEVEL <=7)
SELECT
partner,
prod_man,
'%',
NVL(Akt_mesiac,0) AS Akt_mesiac,
NVL(minuly_mesiac1,0) AS minuly_mesiac1,
NVL(minuly_mesiac2,0) AS minuly_mesiac2,
NVL(minuly_mesiac3,0) AS minuly_mesiac3,
NVL(minuly_mesiac4,0) AS minuly_mesiac4,
NVL(minuly_mesiac5,0) AS minuly_mesiac5,
NVL(minuly_mesiac6,0) AS minuly_mesiac6
FROM (
SELECT
-- my query - HERE I HAVE PROBLEM HERE
FROM MONTHS M
JOIN obj_odb_l l ON M.MONTH=TRUNC(l.datum_p,'MONTH')
) PIVOT
( SUM(CNT)
FOR MONTH_NAME IN
('Akt_mesiac' AS Akt_mesiac,
'minuly_mesiac1' AS minuly_mesiac1,
'minuly_mesiac2' AS minuly_mesiac2,
'minuly_mesiac3' AS minuly_mesiac3,
'minuly_mesiac4' AS minuly_mesiac4,
'minuly_mesiac5' AS minuly_mesiac5,
'minuly_mesiac6' AS minuly_mesiac6)
);
Table: obj_odb_l l ->date column -> l.datum_p -> trunc(l.datum_p,'MONTH')
How can I make a pivot table ?

Consider adding the month expression, TRUNC(l.datum_p,'MONTH'), into above aggregate query. Then run the query as another CTE in pivot query for JOIN in pivot's data source.
WITH MONTHS AS (
SELECT ADD_MONTHS(TRUNC(SYSDATE,'MONTH'),-LEVEL+1) AS MONTH
, DECODE(LEVEL,1,'Akt_mesiac','minuly_mesiac'||(LEVEL-1)) AS MONTH_NAME
FROM DUAL CONNECT BY LEVEL <=7
)
, AGG AS (
-- SAME AGGREGATE QUERY WITH TRUNC(l.datum_p,'MONTH') ADDED TO SELECT AND GROUP BY
-- POSSIBLY ADD WHERE CONDITION FOR LAST SIX MONTHS (IF DATA GOES BACK YEARS)
)
SELECT *
FROM (
SELECT AGG.partner
, AGG.prod_man
, AGG.obj_pocet
, AGG.nedod_pocet
, AGG.'%' AS PCT -- AVOID SPECIAL CHARS AS NAME
, M.MONTH_NAME
FROM MONTHS M
INNER JOIN AGG
ON M.MONTH = AGG.MONTH -- NEW FIELD USED FOR JOIN
)
PIVOT
( SUM(PCT) -- ONLY PIVOTS ONE NUM AT A TIME
FOR MONTH_NAME IN
('Akt_mesiac' AS Akt_mesiac,
'minuly_mesiac1' AS minuly_mesiac1,
'minuly_mesiac2' AS minuly_mesiac2,
'minuly_mesiac3' AS minuly_mesiac3,
'minuly_mesiac4' AS minuly_mesiac4,
'minuly_mesiac5' AS minuly_mesiac5,
'minuly_mesiac6' AS minuly_mesiac6)
);

When changing "NLS_CALENDAR" parameter from Gregorian to Persian,it has no result?

I have the following table to calculate the trend of inventory in the warehouse.
create table TREND_WAREHOUSE
(
id NUMBER(30) not null,
reg_date NUMBER(30),
org_id NUMBER(30),
goods_id NUMBER(30),
qty NUMBER(30),
reg_datetime TIMESTAMP(6)
)
insert into TREND_WAREHOUSE (id, reg_date, org_id, goods_id, qty, reg_datetime)
values (1, 13930527, 10, 1, 100, to_timestamp('18-08-2014 00:00:00.000000', 'dd-mm-yyyy hh24:mi:ss.ff'));
insert into TREND_WAREHOUSE (id, reg_date, org_id, goods_id, qty, reg_datetime)
values (2, 13930609, 10, 1, 20, to_timestamp('31-08-2014 14:03:44.689000', 'dd-mm-yyyy hh24:mi:ss.ff'));
insert into TREND_WAREHOUSE (id, reg_date, org_id, goods_id, qty, reg_datetime)
values (3, 13940902, 10, 1, -20, to_timestamp('23-11-2015 00:00:00.000000', 'dd-mm-yyyy hh24:mi:ss.ff'));
insert into TREND_WAREHOUSE (id, reg_date, org_id, goods_id, qty, reg_datetime)
values (4, 13940902, 10, 1, 100, to_timestamp('23-11-2015 00:00:00.000000', 'dd-mm-yyyy hh24:mi:ss.ff'));
insert into TREND_WAREHOUSE (id, reg_date, org_id, goods_id, qty, reg_datetime)
values (5, 13940902, 10, 1, 300, to_timestamp('23-11-2015 00:00:00.000000', 'dd-mm-yyyy hh24:mi:ss.ff'));
commit;
the "reg_date" is store date in a Persian calendar type which I want to use them for calculating the trend of inventory.
here is my query to calculate it in Gregorian Calendar and it works successfully
WITH date_inventory_range AS
(SELECT TRUNC(MIN(date_inventory), 'MONTH') AS first_date_inventory,
ADD_MONTHS(TRUNC(MAX(date_inventory), 'MONTH'), 1) - 1 AS last_date_inventory
FROM (SELECT W.ORG_ID,
W.GOODS_ID,
TO_DATE(TO_CHAR(CAST(SUBSTR(W.REG_DATETIME, 1, 10) AS DATE),
'YYYYMMDD'/*,
'NLS_CALENDAR=PERSIAN'*/),
'YYYYMMDD') date_inventory,
SUM(W.QTY) amount
FROM TREND_WAREHOUSE W
GROUP BY W.ORG_ID,
W.GOODS_ID,
CAST(SUBSTR(W.REG_DATETIME, 1, 10) AS DATE)
ORDER BY 3)),
all_dates AS
(SELECT first_date_inventory + LEVEL - 1 AS date_inventory
FROM date_inventory_range
CONNECT BY LEVEL <= 1 + last_date_inventory - first_date_inventory),
dense_data AS
(SELECT s.org_id,
s.goods_id,
TRUNC(d.date_inventory, 'MONTH') AS mnth,
NVL(SUM(s.amount) OVER(PARTITION BY s.org_id,
s.goods_id ORDER BY d.date_inventory),
0) AS total_amount
FROM all_dates d
LEFT OUTER JOIN (SELECT W.ORG_ID,
W.GOODS_ID,
CAST(SUBSTR(W.REG_DATETIME, 1, 10) AS DATE) date_inventory,
SUM(W.QTY) amount
FROM TREND_WAREHOUSE W
WHERE W.Org_Id = 10
and W.Goods_Id = 1
GROUP BY W.ORG_ID,
W.GOODS_ID,
CAST(SUBSTR(W.REG_DATETIME, 1, 10) AS DATE)
ORDER BY 3) s PARTITION BY(s.org_id, s.goods_id)
ON s.date_inventory = d.date_inventory)
SELECT org_id,
goods_id,
TO_CHAR(mnth, 'YYYY-MM') AS month,
AVG(total_amount) AS month_avg
FROM dense_data
GROUP BY org_id, goods_id, mnth
ORDER BY org_id, goods_id, mnth;
but when I change "NLS_CALENDAR" parameter to Persian it has zero resulted for all of the months.
WITH date_inventory_range AS
(SELECT TRUNC(MIN(date_inventory), 'MONTH') AS first_date_inventory,
ADD_MONTHS(TRUNC(MAX(date_inventory), 'MONTH'), 1) - 1 AS last_date_inventory
FROM (SELECT W.ORG_ID,
W.GOODS_ID,
TO_DATE(TO_CHAR(CAST(SUBSTR(W.REG_DATETIME, 1, 10) AS DATE),
'YYYYMMDD',
'NLS_CALENDAR=PERSIAN'),
'YYYYMMDD') date_inventory,
SUM(W.QTY) amount
FROM TREND_WAREHOUSE W
GROUP BY W.ORG_ID,
W.GOODS_ID,
CAST(SUBSTR(W.REG_DATETIME, 1, 10) AS DATE)
ORDER BY 3)),
all_dates AS
(SELECT first_date_inventory + LEVEL - 1 AS date_inventory
FROM date_inventory_range
CONNECT BY LEVEL <= 1 + last_date_inventory - first_date_inventory),
dense_data AS
(SELECT s.org_id,
s.goods_id,
TRUNC(d.date_inventory, 'MONTH') AS mnth,
NVL(SUM(s.amount) OVER(PARTITION BY s.org_id,
s.goods_id ORDER BY d.date_inventory),
0) AS total_amount
FROM all_dates d
LEFT OUTER JOIN (SELECT W.ORG_ID,
W.GOODS_ID,
CAST(SUBSTR(W.REG_DATETIME, 1, 10) AS DATE) date_inventory,
SUM(W.QTY) amount
FROM TREND_WAREHOUSE W
WHERE W.Org_Id = 10
and W.Goods_Id = 1
GROUP BY W.ORG_ID,
W.GOODS_ID,
CAST(SUBSTR(W.REG_DATETIME, 1, 10) AS DATE)
ORDER BY 3) s PARTITION BY(s.org_id, s.goods_id)
ON s.date_inventory = d.date_inventory)
SELECT org_id,
goods_id,
TO_CHAR(mnth, 'YYYY-MM') AS month,
AVG(total_amount) AS month_avg
FROM dense_data
GROUP BY org_id, goods_id, mnth
ORDER BY org_id, goods_id, mnth;
thanks for your guidance.

Because you need to replace CAST(SUBSTR(W.REG_DATETIME, 1, 10) AS DATE) with
TO_DATE(TO_CHAR(CAST(SUBSTR(W.REG_DATETIME, 1, 10) AS DATE),
'YYYYMMDD',
'NLS_CALENDAR=PERSIAN'),
'YYYYMMDD')
for each four occurences, not only for SELECT-list, but also in GROUP BY-list.

How to make a specific group by (window like function)

Bellow you can see the Table and context.
I want to get 3 groups from the context in the table, so i want to group by ABDC_IDENT but when the DATE_RANK order breaks as you can see in the data after DATE_RANK 11 comes 1,2 (because it is the group B) then it continues to rank up for the group A (the order by is by VARIOUS_DATES).
What i want to get is 3 groups, the first is group A rank 1 to 11, the second group is B rank 1,2 and the third group should be the group A but ranks from 12 to 21. I hope this is clear for everyone.
Im currently experimenting with rows between UNBOUNDED PRECEDING and current row, any idea is welcomed, maybe it can be done in some other way also. Cheers and thx
Here is my fiddle, so you can build it easy for yourself:
CREATE TABLE Table1
(ABDC_IDENT varchar(5), VARIOUS_DATES date, DATE_RANKS int)
;
INSERT INTO Table1
(ABDC_IDENT, VARIOUS_DATES, DATE_RANKS)
VALUES
('A', '31.12.2010', 1),
('A', '31.01.2011', 2),
('A', '28.02.2011', 3),
('A', '31.03.2011', 4),
('A', '29.04.2011', 5),
('A', '31.05.2011', 6),
('A', '30.06.2011', 7),
('A', '29.07.2011', 8),
('A', '31.08.2011', 9),
('A', '30.09.2011', 10),
('A', '31.10.2011', 11),
('B', '30.11.2011', 1),
('B', '30.12.2011', 2),
('A', '31.01.2012', 12),
('A', '29.02.2012', 13),
('A', '30.03.2012', 14),
('A', '30.04.2012', 15),
('A', '31.05.2012', 16),
('A', '29.06.2012', 17),
('A', '31.07.2012', 18),
('A', '31.08.2012', 19),
('A', '28.09.2012', 20),
('A', '31.10.2012', 21)
;
The desired result would be then inserted into another table
Table2
GROUP_ABC | MIN_DATE | MAX_DATE |
A |31.12.2010 | 31.10.2011 |
B |30.11.2011 | 30.12.2011 |
C |31.01.2012 | 31.10.2012 |

I think you can use convert format 104 to handle the date strings.
Does this work?
select
substring('ABCDEF', row_number() over (order by min(VARIOUS_DATES)), 1) as GROUP_ABC,
min(VARIOUS_DATES) as MIN_DATE,
max(VARIOUS_DATES) as MAX_DATE
from (
select
ABDC_IDENT,
convert(date, VARIOUS_DATES, 104) as VARIOUS_DATES
row_number() over (order by convert(date, VARIOUS_DATES, 104)) - DATE_RANKS as grp
from Table1
) data
group by ABDC_IDENT, grp
or:
select
substring('ABCDEF', row_number() over (order by MIN_DATE), 1) as GROUP_ABC,
MIN_DATE, MAX_DATE
from (
select
ABDC_IDENT as GROUP_ABC,
min(VARIOUS_DATES) as MIN_DATE,
max(VARIOUS_DATES) as MAX_DATE
from (
select
ABDC_IDENT,
convert(date, VARIOUS_DATES, 104) as VARIOUS_DATES
row_number()
over (order by convert(date, VARIOUS_DATES, 104)) - DATE_RANKS as grp
from Table1
) data
group by ABDC_IDENT, grp
) t

doesn't sort correctly with ties

I'm trying to use row_number to calculate median, lower quartile, and upper quartile for a box plot chart. However, my row_number sort is off because of ties.
Here is some sample data:
CREATE TABLE EStats
(
PersonID VARCHAR(30) NOT NULL,
Grade VARCHAR(25) NOT NULL,
CourseDate Date NOT NULL
);
INSERT INTO EStats
(
PersonID, Grade, CourseDate
)
VALUES
('100', '91', '2010-03-01'),
('101', '96', '2010-03-01'),
('102', '88', '2010-03-01'),
('103', '92', '2010-03-01'),
('104', '81', '2010-03-01'),
('105', '85', '2010-03-01'),
('106', '91', '2010-03-01'),
('107', '89', '2010-03-01'),
('108', '99', '2010-03-01'),
('109', '88', '2010-03-01'),
('110', '81', '2011-03-02'),
('111', '77', '2011-03-02'),
('112', '88', '2011-03-02'),
('113', '76', '2011-03-02'),
('114', '69', '2011-03-02'),
('115', '70', '2011-03-02'),
('116', '75', '2011-03-02'),
('117', '88', '2011-03-02'),
('118', '76', '2011-03-02'),
('119', '95', '2012-03-01'),
('120', '96', '2012-03-01'),
('121', '90', '2012-03-01'),
('122', '80', '2012-03-01'),
('123', '85', '2012-03-01'),
('124', '94', '2012-03-01'),
('125', '89', '2012-03-01'),
('126', '97', '2012-03-01'),
('127', '94', '2012-03-01'),
('128', '72', '2012-03-01'),
('129', '88', '2012-03-01'),
('130', '91', '2012-03-01')
Here is one of my inner queries that shows the sort not working:
SELECT
CourseDate,
Grade,
ROW_NUMBER() OVER (
PARTITION BY LEFT(CourseDate, 4)
ORDER BY Grade ASC) AS RowAsc,
ROW_NUMBER() OVER (
PARTITION BY LEFT(CourseDate, 4)
ORDER BY Grade DESC) AS RowDesc
FROM EStats
Notice that for CourseDate 2010-03-01 the RowAsc does this:
10
9
8
6
7
5
3
4
2
1
However, I need all of the rows to have a number in sequential order so that I can calculate median in the case where an even amount of numbers exists. (Rank and dense_rank don't work because of the "gaps" they leave).
Actually, below is the entire thing. Again, I'm trying to calculate median, lower quartile, upper quartile, min, and max for a blox plot chart. ANY help is really appreciated!
WITH Q3 AS
(
SELECT
CourseDate,
AVG(CAST(Grade AS Numeric)) AS Median
FROM
(
SELECT
CourseDate,
Grade,
ROW_NUMBER() OVER (
PARTITION BY LEFT(CourseDate, 4)
ORDER BY Grade ASC) AS RowAsc,
ROW_NUMBER() OVER (
PARTITION BY LEFT(CourseDate, 4)
ORDER BY Grade DESC) AS RowDesc
FROM EStats
)x
WHERE
RowAsc IN (RowDesc, RowDesc - 1, RowDesc + 1)
GROUP BY CourseDate
--ORDER BY CourseDate
),
Q2 AS
(
SELECT
x.CourseDate,
AVG(CAST(Grade AS Numeric)) AS LowerQuartile
FROM
(
SELECT
Estats.CourseDate,
Estats.Grade,
ROW_NUMBER() OVER (
PARTITION BY LEFT(EStats.CourseDate, 4)
ORDER BY Grade ASC) AS RowAsc,
ROW_NUMBER() OVER (
PARTITION BY LEFT(Estats.CourseDate, 4)
ORDER BY Grade DESC) AS RowDesc
FROM EStats JOIN Q3 on EStats.CourseDate = Q3.CourseDate
WHERE EStats.Grade < Q3.Median
)x
WHERE
RowAsc IN (RowDesc, RowDesc - 1, RowDesc + 1)
GROUP BY x.CourseDate
),
Q4 AS
(
SELECT
x.CourseDate,
AVG(CAST(Grade AS Numeric)) AS UpperQuartile
FROM
(
SELECT
Estats.CourseDate,
Estats.Grade,
ROW_NUMBER() OVER (
PARTITION BY LEFT(EStats.CourseDate, 4)
ORDER BY Grade ASC) AS RowAsc,
ROW_NUMBER() OVER (
PARTITION BY LEFT(Estats.CourseDate, 4)
ORDER BY Grade DESC) AS RowDesc
FROM EStats JOIN Q3 on EStats.CourseDate = Q3.CourseDate
WHERE EStats.Grade > Q3.Median
)x
WHERE
RowAsc IN (RowDesc, RowDesc - 1, RowDesc + 1)
GROUP BY x.CourseDate
)
SELECT Q3.CourseDate, Q3.Median AS Median, Q2.LowerQuartile, Q4.UpperQuartile, MIN(EStats.Grade) AS Min, MAX(EStats.Grade) AS Max
FROM Q3
JOIN Q2 ON Q3.CourseDate = Q2.CourseDate
JOIN Q4 ON Q3.CourseDate = Q4.CourseDate
JOIN EStats ON Q3.CourseDate = EStats.CourseDate
GROUP BY Q3.CourseDate, Q3.Median, Q2.LowerQuartile, Q4.UpperQuartile
ORDER BY Q3.CourseDate

Try this to get the median:
select avg(case when seqnum*2 = totnum+1 then col
when seqnum*2 in (totnum, totnum + 2) then col
end)
from (select t.*, row_number() over (order by col) as seqnum,
count(*) over () as totnum
from t
) t
It looks arcane, but the idea is to do the average that you want for the even numbers and the single value for the others. If using SQL Server, recall that it uses integer division. You can actually simplify the above to:
select avg(case when seqnum*2 in (totnum, totnum+1, totnum+2) then col end)
This works because an odd total cnt just matches totnum+1 and the even matches the other two values.

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

Running Distinct Count with a Partition - sql

Demo Here: ;with cte as ( SELECT yearr, monthh, count(distinct acc) as cnt FROM #fact GROUP BY yearr, monthh ) SELECT yearr ,monthh ,sum(cnt) over (Partition by yearr order by yearr, monthh rows unbounded preceding ) as x FROM cte

Related

sql that finds records within 3 days of a condition being met

Making a pivot table group by users

When changing "NLS_CALENDAR" parameter from Gregorian to Persian,it has no result?

How to make a specific group by (window like function)

doesn't sort correctly with ties

Categories

Resources