selecting specific occurrences from a group SQL - sql

Data set looks like
id statusid statusdate
100 22 04/12/2016
100 22 04/14/2016
100 25 04/16/2016
100 25 04/17/2016
100 25 04/19/2016
100 22 04/22/2016
100 22 05/14/2016
100 27 05/19/2016
100 27 06/14/2016
100 25 06/18/2016
100 22 07/14/2016
100 22 07/18/2016
Task is to select the First time each status was logged. Number of unique times each status were logged.
Example :
For Status 22
First time status date: 04/12/2016
Last time Status first date: 07/14/2016
Number of Unique times it went to that status: 3

This is complicated than it looks. I assume you want the unique consecutive times for a given status.
Use a difference of row numbers approach to classify consecutive rows into groups. Then get the row numbers in those groups. Finally aggregate to get the first day in the first and last group and the number of distinct groups.
select statusid
,max(case when rn_grp_asc=1 then statusdate end) as last_time_first_status
,max(case when rn_grp_desc=1 then statusdate end) as last_time_first_status
,count(distinct grp) as unique_times_in_status
from (select t.*
,row_number() over(partition by statusid order by grp,statusdate) as rn_grp_asc
,row_number() over(partition by statusid order by grp desc,statusdate) as rn_grp_desc
from (select t.*,row_number() over(order by statusdate)
-row_number() over(partition by statusid order by statusdate) as grp
from tbl t
) t
) t
group by statusid

select ID,
statusid,
Status,
max(case when rn_grp_asc=1 then statusdate end) as frst_time_first_status,
max(case when rn_grp_desc=1 then statusdate end) as last_time_first_status,
count(distinct grp) as unique_times_in_status
from ( select t.*,
row_number() over(partition by id,statusid order by grp,statusdate) as rn_grp_asc,
row_number() over(partition by id,statusid order by grp desc,statusdate) as rn_grp_desc
from ( Select ID,
StatusID,
Status,
StatusDate,
row_number() over(partition by id order by Systemdate)-row_number() over(partition by id,statusid order by Systemdate) as grp
from t
where ID=100
) t
) t
group by id,statusid,Status
Above query returns the correct results, but the following query does not.
select ID,
statusid,
Status,
max(case when rn_grp_asc=1 then statusdate end) as frst_time_first_status,
max(case when rn_grp_desc=1 then statusdate end) as last_time_first_status,
count(distinct grp) as unique_times_in_status
from ( select t.*,
row_number() over(partition by id,statusid order by grp,statusdate) as rn_grp_asc,
row_number() over(partition by id,statusid order by grp desc,statusdate) as rn_grp_desc
from ( Select ID,
StatusID,
Status,
StatusDate,
row_number() over(partition by id order by Systemdate)-row_number() over(partition by id,statusid order by Systemdate) as grp
from t
) t
) t
where ID=100
group by id,statusid,Status

Related

How to get columns from multiple rows in a single row in SQL

I want to get 2 columns col_a and col_b's values for min and max of some other column. For example:
id
last_updated
col_a
col_b
1
2021-01-01
abc
xyz
1
2021-01-02
abc_0
xyz_0
1
2021-01-03
abc_1
xyz_1
1
2021-01-04
abc_2
xyz_2
2
2021-01-01
abc
xyz
2
2021-01-01
abc
xyz
...
I want to get the result:
|1|abc|abc_2|xyz|xyz_2|
That is the result of grouping by id, and getting the values of these columns while putting the condition of min and max on some other column(last_updated).
I came up with the following query:
select id, max(last_updated), min(last_updated)
from my_table
group by id
This gives me the id and min and max dates but not the other 2 columns. I'm not sure how to get the values for the other 2 columns for both dates in same query.
You can use MIN and MAX analytical function as follows:
select id,
max(case when mindt = last_updated then col_a end) as min_col_a,
max(case when maxdt = last_updated then col_a end) as max_col_a,
max(case when mindt = last_updated then col_b end) as min_col_b,
max(case when maxdt = last_updated then col_b end) as max_col_b
from
(select t.*,
min(last_updated) over (partition by id) as mindt,
max(last_updated) over (partition by id) as maxdt
from your_table t) t
group by id
We can use ROW_NUMBER, twice, to find the first and last rows, as ordered by last_updated, for each id group of records. Then, aggregate by id and pivot out columns for the various col_a and col_b values.
WITH cte AS (
SELECT *, ROW_NUMBER() OVER (PARTITION BY id ORDER BY last_updated) rn_min,
ROW_NUMBER() OVER (PARTITION BY id ORDER BY last_updated DESC) rn_max
FROM yourTable
)
SELECT
id,
MAX(CASE WHEN rn_min = 1 THEN col_a END) AS col_a_min,
MAX(CASE WHEN rn_max = 1 THEN col_a END) AS col_a_max,
MAX(CASE WHEN rn_min = 1 THEN col_b END) AS col_b_min,
MAX(CASE WHEN rn_max = 1 THEN col_b END) AS col_b_max
FROM cte
GROUP BY id;
Not the neatest solution but demonstrates another way to obtain the data you want. We join the table on itself as we normally want data from 2 rows, then we use cross apply to restrict it to first and last.
select T1.id, T2.col_a, T1.col_a, T2.col_b, T1.col_b
from #my_table T1
inner join #my_table T2 on T1.id = T2.id
cross apply (
select id, max(last_updated) MaxLastUpdated, min(last_updated) MinLastUpdated
from #my_table
group by id
) X
where T1.last_updated = X.MaxLastUpdated and T2.last_updated = X.MinLastUpdated;
With the sample data provided this appear to perform worse than the row_number() solution. The fastest solution is the analytical functions.
select id,max(last_updated) last_updatedMax, min(last_updated) last_updatedMin,max(col_aMax) col_aMax, max(col_aMin) col_aMin,max(col_bMax) col_bMax, max(col_bMin) col_bMin
from
(
select
*
, first_value(col_a) OVER (PARTITION BY id ORDER BY last_updated desc) as col_aMax
, first_value(col_a) OVER (PARTITION BY id ORDER BY last_updated asc) as col_aMin
, first_value(col_b) OVER (PARTITION BY id ORDER BY last_updated desc) as col_bMax
, first_value(col_b) OVER (PARTITION BY id ORDER BY last_updated asc) as col_bMin
from my_table
) t
group by id

Selecting rows that have row_number more than 1

I have a table as following (using bigquery):
id
year
month
sales
row_number
111
2020
11
1000
1
111
2020
12
2000
2
112
2020
11
3000
1
113
2020
11
1000
1
Is there a way in which I can select rows that have row numbers more than one?
For example, my desired output is:
id
year
month
sales
row_number
111
2020
11
1000
1
111
2020
12
2000
2
I don't want to just exclusively select rows with row_number = 2 but also row_number = 1 as well.
The original code block I used for the first table result is:
SELECT
id,
year,
month,
SUM(sales) AS sales,
ROW_NUMBER() OVER (PARTITIONY BY id ORDER BY id ASC) AS row_number
FROM
table
GROUP BY
id, year, month
You can use window functions:
select t.* except (cnt)
from (select t.*,
count(*) over (partition by id) as cnt
from t
) t
where cnt > 1;
As applied to your aggregation query:
SELECT iym.* EXCEPT (cnt)
FROM (SELECT id, year, month,
SUM(sales) as sales,
ROW_NUMBER() OVER (Partition by id ORDER BY id ASC) AS row_number
COUNT(*) OVER(Partition by id ORDER BY id ASC) AS cnt
FROM table
GROUP BY id, year, month
) iym
WHERE cnt > 1;
You can wrap your query as in below example
select * except(flag) from (
select *, countif(row_number > 1) over(partition by id) > 0 flag
from (YOUR_ORIGINAL_QUERY)
)
where flag
so it can look as
select * except(flag) from (
select *, countif(row_number > 1) over(partition by id) > 0 flag
from (
SELECT id,
year,
month,
SUM(sales) as sales,
ROW_NUMBER() OVER(Partition by id ORDER BY id ASC) AS row_number
FROM table
GROUP BY id, year, month
)
)
where flag
so when applied to sample data in your question - it will produce below output
Try this:
with tmp as (SELECT id,
year,
month,
SUM(sales) as sales,
ROW_NUMBER() OVER(Partition by id ORDER BY id ASC) AS row_number
FROM table
GROUP BY id, year, month)
select * from tmp a where exists ( select 1 from tmp b where a.id = b.id and b.row_number =2)
It's a so clearly exists statement SQL
This is what I use, it's similar to #ElapsedSoul answer but from my understanding for static list "IN" is better than using "EXISTS" but I'm not sure if the performance difference, if any, is significant:
Difference between EXISTS and IN in SQL?
WITH T1 AS
(
SELECT
id,
year,
month,
SUM(sales) as sales,
ROW_NUMBER() OVER(PARTITION BY id ORDER BY id ASC) AS ROW_NUM
FROM table
GROUP BY id, year, month
)
SELECT *
FROM T1
WHERE id IN (SELECT id FROM T1 WHERE ROW_NUM > 1);

Grouping multiple lines onto one in Oracle SQL

How can I get this data set in Image 1 to look like the data in Image 2. Basically rather than having each purchase on its own line I want to group by Name and have all that persons purchases on one line. They can buy a max of 5 items and my database is about 30 million lines worth of purchases.
P.S The date order is not important
You can use row_number() and conditional aggregation:
select name,
max(case when seqnum = 1 then item end) as item_1,
max(case when seqnum = 1 then date end) as date_1,
max(case when seqnum = 2 then item end) as item_2,
max(case when seqnum = 2 then date end) as date_2,
max(case when seqnum = 3 then item end) as item_3,
max(case when seqnum = 3 then date end) as date_3
from (select t.*,
row_number() over (partition by name order by date asc) as seqnum
from t
) t
group by name;
You can use PIVOT with row_number as follows:
Select * from
(select t.*,
row_number() over (partition by name order by date_purchased) rn
from your_table t
) PIVOT
(Max(item_purchased), max(date_purchased) For rn in (1,2,3));

SQL Server : create group of N rows each and give group number for each group

I want to create a SQL query that SELECT a ID column and adds an extra column to the query which is a group number as shown in the output below.
Each group consists of 3 rows and should have the MIN(ID) as a GroupID for each group. The order by should be ASC on the ID column.
ID GroupNr
------------
100 100
101 100
102 100
103 103
104 103
105 103
106 106
107 106
108 106
I've tried solutions with ROW_NUMBER() and DENSE_RANK(). And also this query:
SELECT
*, MIN(ID) OVER (ORDER BY ID ASC ROWS 2 PRECEDING) AS Groupnr
FROM
Table
ORDER BY
ID ASC
Use row_number() to enumerate the rows, arithmetic to assign the group and then take the minimum of the id:
SELECT t.*, MIN(ID) OVER (PARTITION BY grp) as groupnumber
FROM (SELECT t.*,
( (ROW_NUMBER() OVER (ORDER BY ID) - 1) / 3) as grp
FROM Table
) t
ORDER BY ID ASC;
It is possible to do this without a subquery, but the logic is rather messy:
select t.*,
(case when row_number() over (order by id) % 3 = 0
then lag(id, 2) over (order by id)
when row_number() over (order by id) % 3 = 2
then lag(id, 1) over (order by id)
else id
end) as groupnumber
from table t
order by id;
Assuming you want the lowest value in the group, and they are always groups of 3, rather than the NTILE (as Saravantn suggests, which splits the data into that many even(ish) groups), you could use a couple of window functions:
WITH Grps AS(
SELECT V.ID,
(ROW_NUMBER() OVER (ORDER BY V.ID) -1) / 3 AS Grp
FROM (VALUES(100),
(101),
(102),
(103),
(104),
(105),
(106),
(107),
(108))V(ID))
SELECT G.ID,
MIN(G.ID) OVER (PARTITION BY G.Grp) AS GroupNr
FROM Grps G;
SELECT T2.ID, T1.ID
FROM (
SELECT MIN(ID) AS ID, GroupNr
FROM
(
SELECT ID, ( Row_number()OVER(ORDER BY ID) - 1 ) / 3 + 1 AS GroupNr
FROM Table
) AS T1
GROUP BY GroupNr
) AS T1
INNER JOIN (
SELECT ID, ( Row_number()OVER(ORDER BY ID) - 1 ) / 3 + 1 AS GroupNr
FROM Table
) T2 ON T1.GroupNr = T2.GroupNr

How to get the last inserted value for every id in SQL Server 2008?

I have to count the unique status from multiple values. Here is my table example
Id Status OrderId
-------------------
1 1 43
2 2 43
3 1 44
Desired output
It should give the count(status) for Status '1' is 1 and Status '2' is 1. But when using count its giving 2 for status '1'.
You have to do
count(DISTINCT status)
instead of
count(status)
to get
unique status from multiple values.
EDIT:
If you want to get (not count) the Status value of the last inserted record for every OrderId, then you can do:
SELECT Status
FROM (
SELECT Id, Status, OrderId,
ROW_NUMBER() OVER (PARTITION BY OrderId
ORDER BY Id DESC) AS rn
FROM mytable ) t
WHERE t.rn = 1
If you want to get the last status for each order:
with cte as(select *, row_number()
over(partition by OrderID order by Id desc) from TableName)
select * from cte where rn = 1
Or:
select * from (select *, row_number()
over(partition by OrderID order by Id desc) from TableName) t
where rn = 1