Group sequential repeated values sqlite - sql

I have data that repeated sequentially..
A
A
A
B
B
B
A
A
A
I need to group them like this
A
B
A
What is the best approach to do so using sqlite?

Assuming that you have a column that defines the ordering of the rows, say id, you can address this gaps-and-island problem with window functions:
select col, count(*) cnt, min(id) first_id, max(id) last_id
from (
select t.*,
row_number() over(order by id) rn1,
row_number() over(partition by col order by id) rn2
from mytable t
) t
group by col, rn1 - rn2
order by min(id)
I added a few columns to the resultset that give more information about the content of each group.

If you have defined a column that defines the order of the rows, like an id, you can use window function LEAD():
select col
from (
select col, lead(col, 1, '') over (order by id) next_col
from tablename
)
where col <> next_col
See the demo.
Results:
| col |
| --- |
| A |
| B |
| A |

Related

Distinct particular field in select query

I have table with below sample values.
|Id|Keyword|insertedon|
|:-|:------|:---------|
|1 | abcd | 13/12/20 |
|2 | cdef | 14/12/20 |
|3 | abcd | 14/12/20 |
|4 | defg | 14/12/20 |
In the above table i need distinct values of keywords order by insertedon desc order.
I need recent top 5 results.
Expected Result:
defc
abcd
cdef
Please let me know how to achieve this.
You get the top 5 results with TOP(5) in SQL Server. You'd order the keywords by their last insertedon date:
select top(5) keyword
from mytable
group by keyword
order by max(insertedon) desc;
If you are looking for latest entries based on insertedon column, you can find using the group by clause, something like this:
select keyword, max(insertedon)
from table
group by keyword
order by 2 desc
You can just use select distinct:
select distinct keyword
from t;
If you wanted a full row, you could use row_number():
select t.*
from (select t.*,
row_number() over (partition by keyword order by newid()) as seqnum
from t
) t
where seqnum = 1;
EDIT:
For the edited version, you can use:
select distinct keyword
from (select top (5) keyword
from t
order by insertedon desc
) k
Give a row number based on the descending order of the date column and then select the row wth row number 1.
Query
;with cte as(
select [rn] = row_number() over(
partition by [keyword]
order by [insertedon] desc, [id] desc
)
)
select [keyword] from cte
where [rn] = 1;
You can use the analytical functions as follows:
select t.* from
(select t.*,
row_number() over (partition by keyword order by insertedon desc) as rn,
Dense_rank() over (order by insertedon desc) as dr
from t ) t where rn = 1 and dr <= 5;

PostgreSQL group by column with aggregate

I need to group by id and select the task with min/max seq as start and end
id | task | seq
----+------+-----
1 | aaa | 1
1 | bbb | 2
1 | ccc | 3
SELECT
id,
CASE WHEN seq = MIN(seq) THEN task AS start,
CASE WHEN seq = MAX(seq) THEN task AS end
FROM table
GROUP BY id;
But this results in
ERROR: column "seq" must appear in the GROUP BY clause or be used in an aggregate function
But I do not want group by seq
One method uses arrays:
SELECT id,
(ARRAY_AGG(task ORDER BY seq ASC))[1] as start_task,
(ARRAY_AGG(task ORDER BY seq DESC))[1] as end_task
FROM table
GROUP BY id;
Another method uses window functions with SELECT DISTINCT:
select distinct id,
first_value(task) over (partition by id order by seq) as start_task,
first_value(task) over (partition by id order by seq desc) as end_task
from t;
You can use window functions with a derived table:
select id, task, min_seq as start, max_seq as "end"
from (
select id, task, seq,
max(seq) over (partition by id) as max_seq,
min(seq) over (partition by id) as min_seq
from the_table
) t
where seq in (max_seq, min_seq)
One option here would be to use ROW_NUMBER along with aggregation and pivoting logic:
WITH cte AS (
SELECT *,
ROW_NUMBER() OVER (PARTITION BY id ORDER BY seq) rn_min,
ROW_NUMBER() OVER (PARTITION BY id ORDER BY seq DESC) rn_max
FROM yourTable
)
SELECT
id,
MAX(CASE WHEN rn_min = 1 THEN task END) AS start,
MAX(CASE WHEN rn_max = 1 THEN task END) AS end
FROM cte
GROUP BY
id;
Demo

Select SUM and column with max

I looking best or simplest way to SELECT type, user_with_max_value, SUM(value) GROUP BY type. Table look similar
type | user | value
type1 | 1 | 100
type1 | 2 | 200
type2 | 1 | 50
type2 | 2 | 10
And result look:
type1 | 2 | 300
type2 | 1 | 60
Use window functions:
select type, max(case when seqnum = 1 then user end), sum(value)
from (select t.*,
row_number() over (partition by type order by value desc) as seqnum
from t
) t
where seqnum = 1;
Some databases have functionality for an aggregation function that returns the first value. One method without a subquery using standard SQL is:
select distinct type,
first_value(user) over (partition by type order by value desc) as user,
sum(value) over (partition by type)
from t;
You can use window function :
select t.*
from (select t.type,
row_number() over (partition by type order by value desc) as seq,
sum(value) over (partition by type) as value
from table t
) t
where seq = 1;
Try below query.
It will help you.
SELECT type, max(user), SUM(value) from table1 GROUP BY type
use analytical functions
create table poo2
(
thetype varchar(5),
theuser int,
thevalue int
)
insert into poo2
select 'type1',1,100 union all
select 'type1',2,200 union all
select 'type2',1,50 union all
select 'type2',2,10
select thetype,theuser,mysum
from
(
select thetype ,theuser
,row_number() over (partition by thetype order by thevalue desc) r
,sum(thevalue) over (partition by thetype) mysum from poo2
) ilv
where r=1

SQL Server : create group of N rows each and give group number for each group

I want to create a SQL query that SELECT a ID column and adds an extra column to the query which is a group number as shown in the output below.
Each group consists of 3 rows and should have the MIN(ID) as a GroupID for each group. The order by should be ASC on the ID column.
ID GroupNr
------------
100 100
101 100
102 100
103 103
104 103
105 103
106 106
107 106
108 106
I've tried solutions with ROW_NUMBER() and DENSE_RANK(). And also this query:
SELECT
*, MIN(ID) OVER (ORDER BY ID ASC ROWS 2 PRECEDING) AS Groupnr
FROM
Table
ORDER BY
ID ASC
Use row_number() to enumerate the rows, arithmetic to assign the group and then take the minimum of the id:
SELECT t.*, MIN(ID) OVER (PARTITION BY grp) as groupnumber
FROM (SELECT t.*,
( (ROW_NUMBER() OVER (ORDER BY ID) - 1) / 3) as grp
FROM Table
) t
ORDER BY ID ASC;
It is possible to do this without a subquery, but the logic is rather messy:
select t.*,
(case when row_number() over (order by id) % 3 = 0
then lag(id, 2) over (order by id)
when row_number() over (order by id) % 3 = 2
then lag(id, 1) over (order by id)
else id
end) as groupnumber
from table t
order by id;
Assuming you want the lowest value in the group, and they are always groups of 3, rather than the NTILE (as Saravantn suggests, which splits the data into that many even(ish) groups), you could use a couple of window functions:
WITH Grps AS(
SELECT V.ID,
(ROW_NUMBER() OVER (ORDER BY V.ID) -1) / 3 AS Grp
FROM (VALUES(100),
(101),
(102),
(103),
(104),
(105),
(106),
(107),
(108))V(ID))
SELECT G.ID,
MIN(G.ID) OVER (PARTITION BY G.Grp) AS GroupNr
FROM Grps G;
SELECT T2.ID, T1.ID
FROM (
SELECT MIN(ID) AS ID, GroupNr
FROM
(
SELECT ID, ( Row_number()OVER(ORDER BY ID) - 1 ) / 3 + 1 AS GroupNr
FROM Table
) AS T1
GROUP BY GroupNr
) AS T1
INNER JOIN (
SELECT ID, ( Row_number()OVER(ORDER BY ID) - 1 ) / 3 + 1 AS GroupNr
FROM Table
) T2 ON T1.GroupNr = T2.GroupNr

Need to delete duplicate records from the table using row_number()

I am having a table test having data as follows and I want to delete the trsid 124 and I have millions entry in my DB it is just a scenarion. Concept is to delete the duplicate entry from the table
--------------------------------------------
TrsId | ID | Name |
--------------------------------------------
123 | 1 | ABC |
124 | 1 | ABC |
I am trying something like
delete from test
select T.* from
(
select ROW_NUMBER() over (partition by ID order by name) as r,
Trsid,
ID,
name
from test
) t
where r = 2
Even if I update the query which is Ok for me
update test set id=NULL
select T.* from
(
select ROW_NUMBER() over (partition by ID order by name) as r,
Trsid,
ID,
name
from test
) t
where r = 2
But if i run both this query it deletes all the records from table test. And if i update it update both the records.
I dont know what I am doing wrong here
WITH cte AS
(
SELECT ROW_NUMBER() OVER(PARTITION by ID ORDER BY name) AS Row
FROM test
)
DELETE FROM cte
WHERE Row > 1
Use the below query.
;WITH cte_1
AS (SELECT ROW_NUMBER() OVER(PARTITION BY ID,NAME ORDER BY TrsId ) Rno,*
FROM YourTable)
DELETE
FROM cte_1
WHERE RNO>1
WITH cte_DUP AS (
SELECT * FROM (
select <col1,col2,col3..coln>, row_number()
over(partition by <col1,col2,col3..coln>
order by <col1,col2,col3..coln> ) rownumber
from <your table> ) AB WHERE rownumber > 1)
DELETE FROM cte_DUP WHERE ROWNUMBER > 1
To find duplicate records we can write like below query,
;WITH dup_val
AS (SELECT a,
b,
Row_number()
OVER(
partition BY a, b
ORDER BY b, NAME)AS [RANK]
FROM table_name)
SELECT *
FROM dup_val
WHERE [rank] <> 1;