SQLServer Query Windowing - sql

I have a table like below
select 1 group_rank, 1 row_rank union all
select 1 , 2 union all
select 1 , 3 union all
select 1 , 4 union all
select 1 , 5 union all
select 2 , 1 union all
select 2 , 2 union all
select 2 , 3 union all
select 2 , 4 union all
select 2 , 5 union all
select 3 , 1 union all
select 3 , 2 union all
select 3 , 3 union all
select 3 , 4 union all
select 3 , 5 union all
select 4 , 1 union all
select 4 , 2 union all
select 4 , 3 union all
select 4 , 4 union all
select 4 , 5
I want to break row_rank further based upon size. If my size is 2, split the row_rank further like below. Output third column should be like below
select 1 group_rank, 1 row_rank, 1 batch_number union all
select 1 , 2, 1 union all
select 1 , 3, 2 union all
select 1 , 4, 2 union all
select 1 , 5, 3 union all
select 2 , 1, 4 union all
select 2 , 2, 4 union all
select 2 , 3, 5 union all
select 2 , 4, 5 union all
select 2 , 5, 6 union all
select 3 , 1, 7 union all
select 3 , 2, 7 union all
select 3 , 3, 8 union all
select 3 , 4, 8 union all
select 3 , 5, 9 union all
select 4 , 1, 10 union all
select 4 , 2, 10 union all
select 4 , 3, 11 union all
select 4 , 4, 11 union all
select 4 , 5, 12
As the split size is 2,
first two rows with in the 1st group_rank gets 1st batch number,
third fourth rows with in 1st group_rank gets 2nd batch number,
fifth row with in 1st group_rank gets 3rd batch number,
first two rows with in the 2nd group_rank gets 4th batch number,
third fourth rows with in 2nd group_rank gets 5th batch number,
fifth row gets with in 3rd group_rank gets 6th batch number
... and so on ..
As, I vary the split size,,, the batch number should grow or shrink accordingly.
Please provide me sql server TSQL query to do this.
Thanks,

Sounds like simple math to me, with a little 0-based/1-based voodoo at least:
((row_number() over (order by group_rank, row_rank) - 1) / #batch_size) + 1
SQL Fiddle demo
What this does is:
row_number() over (order by group_rank, row_rank): get the row number ordered over the entire set, ordered first by group_rank then by row_rank
- 1: make the row numbers 0-based
/ #batch_size: divide by the "batch size" you want (integer division at its best)
+ 1: make the result 1-based to match your output

Related

Calculate Total Runs For Each Over and I Also Want Sum of First and Second Over. Table Structure is Given Below

**Source Table: - Cricket_Scores
**
Over
Balls
Runs
1
1
1
1
2
2
1
3
4
1
4
0
1
5
1
1
6
2
2
1
3
2
2
1
2
3
1
2
4
4
2
5
6
2
6
0
And I Want Output Should be Like Below: -
Over
Runs
1
10
2
25
10 Runs: - (Total Runs of first 6 balls which makes an One over) and
25 Runs: - (Total Runs of first 6 balls + Second 6 balls)
OR
Something Like Below: -
Over
Runs
1
10
2
15
10 Runs: - (Total Runs of first 6 balls which makes an One over) and
15 Runs: - (Total Runs of Second 6 balls)
How to Write a Query In Oracle/SQL Developer.
GROUP BY the overs and SUM the runs to get the runs-per-over:
SELECT over,
SUM(runs) AS runs_per_over
FROM cricket_scores
GROUP BY over
Then use an analytic function to get the running total:
SELECT over,
SUM(SUM(runs)) OVER (ORDER BY over) AS total_runs
FROM cricket_scores
GROUP BY over
Which, for the sample data:
CREATE TABLE cricket_scores( Over, Balls, Runs ) AS
SELECT 1, 1, 1 FROM DUAL UNION ALL
SELECT 1, 2, 2 FROM DUAL UNION ALL
SELECT 1, 3, 4 FROM DUAL UNION ALL
SELECT 1, 4, 0 FROM DUAL UNION ALL
SELECT 1, 5, 1 FROM DUAL UNION ALL
SELECT 1, 6, 2 FROM DUAL UNION ALL
SELECT 2, 1, 3 FROM DUAL UNION ALL
SELECT 2, 2, 1 FROM DUAL UNION ALL
SELECT 2, 3, 1 FROM DUAL UNION ALL
SELECT 2, 4, 4 FROM DUAL UNION ALL
SELECT 2, 5, 6 FROM DUAL UNION ALL
SELECT 2, 6, 0 FROM DUAL;
Outputs:
OVER
TOTAL_RUNS
1
10
2
25
fiddle

How can the complete hierarchy of the parent child relation ship be shown in big query

Some background ---
I have two tables
One - table lists all the entities in the system ,
the other specifies the relationship between the entities
Ask --
The ask is looking at the tables can we chart out relationship for each of the child entity to the parent.
-- What I have done
CREATE TEMP TABLE rell AS
SELECT 3 child_id, 2 parent_id UNION ALL
SELECT 2, 1 UNION ALL
SELECT 4, 1 UNION ALL
SELECT 6, 2 UNION ALL
SELECT 14, 6 UNION ALL
SELECT 15, 14 UNION ALL
SELECT 7, 8 UNION ALL
SELECT 8, 5 UNION ALL
SELECT 9, 10 UNION ALL
SELECT 11, 12 ;
CREATE TEMP TABLE mapp AS
SELECT 1 item_id, 'app' type UNION ALL
SELECT 2 , 'ci' UNION ALL
SELECT 3 , 'ci' UNION ALL
SELECT 4 , 'ci' UNION ALL
SELECT 5 , 'app' UNION ALL
SELECT 6 , 'ci' UNION ALL
SELECT 7 , 'ci' UNION ALL
SELECT 8 , 'ci' UNION ALL
SELECT 9 , 'app' UNION ALL
SELECT 10 , 'ci' UNION ALL
SELECT 11 , 'ci' UNION ALL
SELECT 14 , 'ci' UNION ALL
SELECT 15 , 'ci' UNION ALL
SELECT 12 , 'ci' ;
The above listing 'mapp' has all the entities ( type - app are the final parent ) and the rel table has the relations.
Can I have the output of something like below
original_child final_parent path
4 1 4>1
3 1 3>2>1
7 5 7>8>5
14 1 14>6>2>1
15 1 15>14>6>2>1
11 12 11>12
2 1 2>1
8 5 8>5
6 1 6>2>1
Ok So after much struggle of searching the internet and trying out multiple options here is what I have come up with , it took a lot of time to understand the details but I think I have found a solution. Maybe it will save people of the trouble that I went though. I will try to explain as I go
-- Initialise variables
DECLARE steps INT64 DEFAULT 1;
DECLARE table_holder ARRAY<STRUCT<original_child INT64, latest_parent INT64,path STRING>>;
--- Set up dummy tables
CREATE TEMP TABLE rell AS
SELECT 3 child_id, 2 parent_id UNION ALL
SELECT 2, 1 UNION ALL
SELECT 4, 1 UNION ALL
SELECT 6, 2 UNION ALL
SELECT 14, 6 UNION ALL
SELECT 15, 14 UNION ALL
SELECT 7, 8 UNION ALL
SELECT 8, 5 UNION ALL
SELECT 9, 10 UNION ALL
SELECT 11, 12 ;
CREATE TEMP TABLE mapp AS
SELECT 1 item_id, 'app' type UNION ALL
SELECT 2 , 'ci' UNION ALL
SELECT 3 , 'ci' UNION ALL
SELECT 4 , 'ci' UNION ALL
SELECT 5 , 'app' UNION ALL
SELECT 6 , 'ci' UNION ALL
SELECT 7 , 'ci' UNION ALL
SELECT 8 , 'ci' UNION ALL
SELECT 9 , 'app' UNION ALL
SELECT 10 , 'ci' UNION ALL
SELECT 11 , 'ci' UNION ALL
SELECT 14 , 'ci' UNION ALL
SELECT 15 , 'ci' UNION ALL
SELECT 12 , 'ci' ;
SET table_holder = (
SELECT ARRAY_AGG(STRUCT(a.item_id,
b.parent_id, CONCAT(CAST(a.item_id AS STRING),">",CAST(b.parent_id AS STRING)))
) cls from mapp a inner join rell b on a.item_id = b.child_id where a.type!='app') ;
LOOP
SET table_holder = (
SELECT ARRAY_AGG(STRUCT(a.original_child,
coalesce(b.parent_id,a.latest_parent), coalesce( CONCAT(path,">",CAST(b.parent_id AS STRING)),path))
) cls from UNNEST (table_holder) a left outer join rell b on a.latest_parent = b.child_id ) ;
SET steps = steps+1;
IF steps=5 THEN LEAVE; END IF;
END LOOP;
SELECT * from UNNEST (table_holder);
Arrays and struct have been utilised as they are easier to play with. and bigquery scripting has been used for looping. Runaway condition can be increased if people expect many levels.
Here is the final output
original_child final_parent path
4 1 4>1
3 1 3>2>1
7 5 7>8>5
14 1 14>6>2>1
15 1 15>14>6>2>1
11 12 11>12
2 1 2>1
8 5 8>5
6 1 6>2>1
Hope it helps someone down the line for similar exercise.

Oracle Sql SUM MAX

I have following scenario:
ID Campus Credit_Hr
===== ====== ====
1 MIC 3
1 Warrens 4
1 Online 3
1 Online 3
2 MIC 5
2 Warrens 3
2 Online 6
3 Online 3
3 Online 3
3 West 2
4 Warrens 3
4 MIC 3
4 West 7
5 Online 3
5 West 3
5 East 3
Warrens and MIC are major campus. So, when Warrens and MIC has equal credit hr, like in ID 4, chose either Warrens / MIC
For ID 1: Warrens > MIC , chose Warrens though sum(Online) = 6 and is greater
For ID 2: MIC> Warrens, chose MIC
For ID 3: no Major Campus (Warrens/MIC) so chose max credit hr. er sum(online) is maximum so chose Online
For ID 5: West / East /Online all are minor campus, so chose any of them.
There are more than 50 campuses in real.
Assign information about MAJOR campuses, then use this column for ordering, in addition to the sum of hours:
dbfiddle demo
select *
from (
select a.*, row_number() over (partition by id order by major, sm desc) rn
from (
select id, campus,
case when campus in ('MIC', 'Warrens') then 1 else 2 end major,
sum(credit_hr) over (partition by id, campus) sm
from t) a)
where rn = 1
If all you need is to select max credit hours for each ID, but in such a way that if credit hours exist for 'MIC' or 'Warrens' for a given ID, then all other campuses for the same ID should be ignored, then the most efficient way is to use the FIRST aggregate function, like so:
with
sample_data(id, campus, credit_hr) as (
select 1, 'MIC' , 3 from dual union all
select 1, 'Warrens', 4 from dual union all
select 1, 'Online' , 3 from dual union all
select 1, 'Online' , 3 from dual union all
select 2, 'MIC' , 5 from dual union all
select 2, 'Warrens', 3 from dual union all
select 2, 'Online' , 6 from dual union all
select 3, 'Online' , 3 from dual union all
select 3, 'Online' , 3 from dual union all
select 3, 'West' , 2 from dual union all
select 4, 'Warrens', 3 from dual union all
select 4, 'MIC' , 3 from dual union all
select 4, 'West' , 7 from dual union all
select 5, 'Online' , 3 from dual union all
select 5, 'West' , 3 from dual union all
select 5, 'East' , 3 from dual
)
select id,
max(credit_hr) keep (dense_rank first
order by case when campus in ('MIC', 'Warrens') then 0 end)
as max_hr
from sample_data
group by id
order by id
;
ID MAX_HR
----- ------------------
1 4
2 5
3 3
4 3
5 3
You can also modify the query (add more columns) to show whether the max was from a main campus (that is, if that ID had ANY credit hours from one of the major campuses), and/or to show which campus had the max hours for that ID (or one of the campuses, if there was a tie for most hours).

Flag individuals that share common features with Oracle SQL

Consider the following table:
ID Feature
1 1
1 2
1 3
2 3
2 4
2 6
3 5
3 10
3 12
4 12
4 18
5 10
5 30
I would like to group the individuals based on overlapping features. If two of these groups again have overlapping features, I would consider both as one group. This process should be repeated until there are no overlapping features between groups. The result of this procedure on the table above would be:
ID Feature Flag
1 1 A
1 2 A
1 3 A
2 3 A
2 4 A
2 6 A
3 5 B
3 10 B
3 12 B
4 12 B
4 18 B
5 10 B
5 30 B
So actually the problem I am trying to solve is finding connected components in a graph. Here [1,2,3] is the graph with ID 1 (see https://en.wikipedia.org/wiki/Connectivity_(graph_theory)). The problem is equivalent to this problem, however I would like to solve it with Oracle SQL.
Here is one way to do this, using a hierarchical ("connect by") query. The first step is to extract the initial relationships from the base data; the hierarchical query is built on the result from this first step. I added one more row to the inputs to illustrate a node that is a connected component by itself.
You marked the connected components as A and B - of course, that won't work if you have, say, 30,000 connected components. In my solution, I use the minimum node name as the marker for each connected component.
with
sample_data (id, feature) as (
select 1, 1 from dual union all
select 1, 2 from dual union all
select 1, 3 from dual union all
select 2, 3 from dual union all
select 2, 4 from dual union all
select 2, 6 from dual union all
select 3, 5 from dual union all
select 3, 10 from dual union all
select 3, 12 from dual union all
select 4, 12 from dual union all
select 4, 18 from dual union all
select 5, 10 from dual union all
select 5, 30 from dual union all
select 6, 40 from dual
)
-- select * from sample_data; /*
, initial_rel(id_base, id_linked) as (
select distinct s1.id, s2.id
from sample_data s1 join sample_data s2
on s1.feature = s2.feature and s1.id <= s2.id
)
-- select * from initial_rel; /*
select id_linked as id, min(connect_by_root(id_base)) as id_group
from initial_rel
start with id_base <= id_linked
connect by nocycle prior id_linked = id_base and id_base < id_linked
group by id_linked
order by id_group, id
;
Output:
ID ID_GROUP
------- ----------
1 1
2 1
3 3
4 3
5 3
6 6
Then, if you need to add the ID_GROUP as a FLAG to the base data, you can do so with a trivial join.

Contiguous Group By

I have the following table:
SELECT *
FROM mytable
ORDER BY id;
id name code time
1 A 111 1
2 A 111 2
3 A 888 3
4 A 888 4
5 A 888 5
6 A 888 6
7 A 888 7
8 A 111 8
9 A 111 9
10 A 111 10
I need to get a result like this:
name code times_between
A 111 1,2
A 888 3,7
A 111 8,10
Is it possible to group by "chunks"?
I need to make a distinction based on time, so I can't just group by name,code and get the first and last element only.
One way is this:
with the_table(id, name , code , time) as(
select 1, 'A',111 , 1 union all
select 2, 'A',111 , 2 union all
select 3, 'A',888 , 3 union all
select 4, 'A',888 , 4 union all
select 5, 'A',888 , 5 union all
select 6, 'A',888 , 6 union all
select 7, 'A',888 , 7 union all
select 8, 'A',111 , 8 union all
select 9, 'A',111 , 9 union all
select 10, 'A',111 , 10
)
select name, code, min(time) ||','|| max(time) from (
select name, code, time, id,
row_number() over(order by id) -
row_number() over(partition by name , code order by id) as grp
from the_table
) t
group by name, code, grp
order by min(id)
(I forgot and just can't find/remember the name of technique, which creates groups grp)