PostgreSQL order in recursive query - sql

I try to get result of my query in natural order, but have a fail.
create table Tab2 (id int, F1 varchar(100));
insert into Tab2 values(1, '10,56,657,34,767,71');
insert into Tab2 values(3, '1,5487,27,9');
insert into Tab2 values(4, '11,13,37,2');
insert into Tab2 values(2, '12,6,65,8,67,22,70,5');
WITH RECURSIVE etc (id, DataItem, F1) AS (
(SELECT id,
LEFT(F1, strpos(concat(F1, ','), ',')-1) AS Part,
overlay(F1 placing '' from 1 for strpos(concat(F1, ','),',')) AS Remainder
FROM Tab2
--ORDER BY Remainder
)
UNION ALL
(SELECT id,
LEFT(F1, strpos(concat(F1, ','), ',')-1),
overlay(F1 placing '' from 1 for strpos(concat(F1, ','),','))
FROM etc e
WHERE F1 > ''
--ORDER BY Dataitem
)
)
SELECT id, row_number() over(partition BY id ORDER BY id) num, DataItem from etc ORDER BY id;
http://sqlfiddle.com/#!15/b0ccc6/89/0
Where is my mistake?

If I understand your query correctly you are trying to get all elements from your (badly designed) comma separated string. There is no need to use a recursive query for that.
You can convert the string to array which can then be "unnested" into rows. Using the option with ordinality will also return the index of each element in the array which can be used in an order by to preserve the original order of the items in the string.
select t2.id, i.num, i.dataitem
from tab2 t2
cross join unnest(string_to_array(f1,',')) with ordinality as i(dataitem, num)
order by t2.id, i.num;
Online example

Assuming that you want to get DataItem in order, in which it is placed in comma separated string, you can use another field to get an "index" (in below example it's a rowno).
For example:
id, dataitem
1, 10
1, 56
1, 657
...
1, 71
2, 12
...
2, 5
etc.
See:
WITH RECURSIVE etc (id, rowno, DataItem, F1) AS (
(SELECT id, 1 as rowno,
LEFT(F1, strpos(concat(F1, ','), ',')-1) AS Part,
overlay(F1 placing '' from 1 for strpos(concat(F1, ','),',')) AS Remainder
FROM Tab2
)
UNION ALL
(SELECT id, rowno +1 as rowno,
LEFT(F1, strpos(concat(F1, ','), ',')-1),
overlay(F1 placing '' from 1 for strpos(concat(F1, ','),','))
FROM etc e
WHERE F1 > ''
)
)
SELECT id, DataItem
from etc
ORDER BY id, RowNo;
SqlFiddle (after changes)

Related

Query to Find maximum possible combinations between two columns

The target is to create all possible combinations of joining the two columns. every article of the first column ('100','101','102','103') must be in the combination result.
Sample Code
create table basis
(article Integer,
supplier VarChar(10) );
Insert into basis Values (100, 'A');
Insert into basis Values (101, 'A');
Insert into basis Values (101, 'B');
Insert into basis Values (101, 'C');
Insert into basis Values (102, 'D');
Insert into basis Values (103, 'B');
Result set
combination_nr;article;supplier
1;100;'A'
1;101;'A'
1;102;'D'
1;103;'B'
2;100;'A'
2;101;'B'
2;102;'D'
2;103;'B'
3;100;'A'
3;101;'C'
3;102;'D'
3;103;'B'
Let suppose if we add one more row against 102 as 'A' then our result set will be like this
Also according to the below-given calculations now we have 24 result sets
1;100;'A'
1;101;'A'
1;102;'A'
1;103;'B'
2;100;'A'
2;101;'A'
2;102;'D'
2;103;'B'
3;100;'A'
3;101;'B'
3;102;'A'
3;103;'B'
4;100;'A'
4;101;'B'
4;102;'D'
4;103;'B'
5;100;'A'
5;101;'C'
5;102;'A'
5;103;'B'
6;100;'A'
6;101;'C'
6;102;'D'
6;103;'B'
Already tried code
I have tried different Cross Joins but they always give exceeded rows according to my result sets.
SELECT article, supplier
FROM (SELECT DISTINCT supplier FROM basis2) AS t1
CROSS JOIN (SELECT DISTINCT article FROM basis2) AS t2;
Calculations:
article 100: 1 supplier ('A')
article 101: 3 suppliers ('A','B','C')
article 102: 1 supplier ('D')
article 103: 1 supplier ('B')
unique articles: 4 (100,101,102,103)
1x3x1x1 x 4 = 12 (combination rows)
You can do what you want using a recursive CTE. It is easier to put the combinations in single rows rather than across multiple rows:
with b as (
select b.*, dense_rank() over (order by article) as seqnum
from basis b
),
cte as (
select convert(varchar(max), concat(article, ':', supplier)) as suppliers, seqnum
from b
where seqnum = 1
union all
select concat(cte.suppliers, ',', concat(article, ':', supplier)), b.seqnum
from cte join
b
on b.seqnum = cte.seqnum + 1
)
select row_number() over (order by suppliers), suppliers
from (select cte.*, max(seqnum) over () as max_seqnum
from cte
) cte
where seqnum = max_seqnum;
For your particular result set, you can unroll the string:
with b as (
select b.*, dense_rank() over (order by article) as seqnum
from basis b
),
cte as (
select convert(varchar(max), concat(article, ':', supplier)) as suppliers, seqnum
from b
where seqnum = 1
union all
select concat(cte.suppliers, ',', concat(article, ':', supplier)), b.seqnum
from cte join
b
on b.seqnum = cte.seqnum + 1
)
select seqnum,
left(s.value, charindex(':', s.value) - 1) as article,
stuff(s.value, 1, charindex(s.value, ':'), '') as supplier
from (select row_number() over (order by suppliers) as seqnum, suppliers
from (select cte.*, max(seqnum) over () as max_seqnum
from cte
) cte
where seqnum = max_seqnum
) cte cross apply
string_split(suppliers, ',') s;
Here is a db<>fiddle.

How to get last record from Master-Details tables

I have a table that has 3 columns.
create table myTable
(
ID int Primary key,
Detail_ID int references myTable(ID) null, -- reference to self
Master_Value varchar(50) -- references to master table
)
this table has the follow records:
insert into myTable select 100,null,'aaaa'
insert into myTable select 101,100,'aaaa'
insert into myTable select 102,101,'aaaa'
insert into myTable select 103,102,'aaaa' ---> last record
insert into myTable select 200,null,'bbbb'
insert into myTable select 201,200,'bbbb'
insert into myTable select 202,201,'bbbb' ---> last record
the records is saved In the form of relational with ID and Detail_ID columns.
I need to select the last record each Master_Value column. follow output:
lastRecordID Master_Value Path
202 bbbb 200=>201=>202
103 aaaa 100=>101=>102=>103
tips:
The records are not listed in order in the table.
I can not use the max(ID) keyword. beacuse data is not sorted.(may
be the id column updated manually.)
attempts:
I was able to Prepare follow query and is working well:
with Q as
(
select ID ,Detail_ID, Master_Value , 1 RowOrder, CAST(id as varchar(max)) [Path] from myTable where Detail_ID is null
union all
select R.id,R.Detail_ID , r.Master_Value , (q.RowOrder + 1) RowOrder , (q.[Path]+'=>'+CAST(r.id as varchar(max))) [Path] from myTable R inner join Q ON Q.ID=R.Detail_ID --where r.Dom_ID_RowType=1010
)
select * into #q from Q
select Master_Value, MAX(RowOrder) lastRecord into #temp from #Q group by Master_Value
select
q.ID lastRecordID,
q.Master_Value,
q.[Path]
from #temp t
join #q q on q.RowOrder = t.lastRecord
where
q.Master_Value = t.Master_Value
but I need to simple way (one select) and optimal method.
Can anyone help me?
One method uses a correlated subquery to get the last value (which is how I interpreted your question):
select t.*
from mytable t
where not exists (select 1
from mytable t2
where t2.master_value = t.master_value and
t2.id = t.detail_id
);
This returns rows that are not referred to by another row.
For the path, you need a recursive CTE:
with cte as (
select master_value, id as first_id, id as child_id, convert(varchar(max), id) as path, 1 as lev
from mytable t
where detail_id is null
union all
select cte.master_value, cte.first_id, t.id, concat(path, '->', t.id), lev + 1
from cte join
mytable t
on t.detail_id = cte.child_id and t.master_value = cte.master_value
)
select cte.*
from (select cte.*, max(lev) over (partition by master_value) as max_lev
from cte
) cte
where max_lev = lev
Here is a db<>fiddle.

Select first occurrence of list item in table

I have a list like this example:
abc, efg, rty
and a table with following data:
1 abcd
2 efgh
3 abcd
4 rtyu
5 efgh
now I want to find the first-row which start with list item in the table. my expected result is:
1 abcd
2 efgh
4 rtyu
This is a complete script to do the job
Declare #v_List Table
(
Text nvarchar(100)
)
Declare #v_Data Table
(
Number int,
Text nvarchar(100)
)
Insert Into #v_List values(N'abc')
Insert Into #v_List values(N'efg')
Insert Into #v_List values(N'rty')
Insert Into #v_Data values(1, N'abcd')
Insert Into #v_Data values(2, N'efgh')
Insert Into #v_Data values(3, N'abcd')
Insert Into #v_Data values(4, N'rtyu')
Insert Into #v_Data values(5, N'efgh')
;with CTE as
(
Select D.Number,
D.Text,
ROW_NUMBER() OVER (PARTITION BY L.Text Order By D.Number) as Row_No
From #v_Data D
Join #v_List L
On D.Text like L.Text + '%'
)
Select CTE.Number,
CTE.Text
From CTE
Where CTE.Row_No = 1
select * from TableName
where Id in
(
select min(Id) from
(
select Id,
case
when Val like 'abc%' then 1
when Val like 'efg%' then 2
when Val like 'rty%' then 3
else 0 end temp
from TableName
)t where temp > 0
group by temp
)
You can use a windowed ROW_NUMBER to generate a sequential number by each different value, then just display the first one only.
;WITH RowNumbersByValue AS
(
SELECT
T.ID,
T.Value,
RowNumber = ROW_NUMBER() OVER (PARTITION BY T.Value ORDER BY T.ID)
FROM
YourTable AS T
)
SELECT
R.ID,
R.Value
FROM
RowNumbersByValue AS R
WHERE
R.Value IN ('abcd', 'efgh', 'rtyu') AND
R.RowNumber = 1
For SQL Server I prefer this version, which does not require a subquery:
SELECT TOP 1 WITH TIES ID, Value
FROM yourTable
WHERE Value LIKE 'abc%' OR Value LIKE 'efg%' OR Value LIKE 'rty%'
ORDER BY ROW_NUMBER() OVER (PARTITION BY Value ORDER BY ID);
SELECT * INTO #temp FROM (VALUES
(1 ,'abcd'),
(2 ,'efgh'),
(3 ,'abcd'),
(4 ,'rtyu'),
(5 ,'efgh'))a([id], [name])
You can use min and group by function
SELECT MIN(id), name FROM #temp GROUP BY name
You may use this, there are so many ways to achieve this, use whichever suits you better.
using subquery
select id, col from
(select Row_number() over (partition by col order by id) as slno, id, col from yourtable)
as tb where tb.slno=1
using cte
; with cte as (
select row_number() over (partition by col order by id) as Slno, id, col from table)
select id, col from cte where slno=1
using min
select Min(id) , col from table group by col
Note:-
In the end of any above mentioned query you may apply your where clause to filter your records as needed.

Compare groups have same binding keys

I have samples as below in 2 different tables. I would have to write a sql to compare same source keys are binded together. binding keys in both the tables doesnt match.
In below example, in table 1 there are 3 binding keys 1, 2 & 3. Binding key 1 has 3 members attached to it ABC, XYZ, & QBC. Similarly binding key 2 & 3 each has got 2 source keys attached to them.
In table 2, binding key 99 has same 3 keys attached which are same as table 1(both count and keys are identical) whereas binding key 78 has got the same count as table 1's binding key 2 but they source keys are different. binding key 64 has 1 source key and binding key 65 has 1.
table 1:
==============================
Binding Key|source Key
1|ABC
1|XYZ
1|QBC
2|xxx
2|yyy
3|uuu
3|ddd
Table 2:
==========================
Binding Key|source Key
99|XYZ
99|QBC
99|ABC
78|xxx
78|QQQ
64|uuu
65|ddd
Expected output is to identify groups that doesnt match the count or the source key members.
Expected Output:
===========================
xxx
yyy
uuu
ddd
QQQ
Many Thanks!!
I found a solution. Its with the help of listagg function to concatenate strings within the same group and then compare them. example sql is shown below.
> SELECT * FROM (SELECT grp ,
> ListAgg( elmnt, ',' ) within GROUP ( ORDER BY pos) AS list FROM table1 GROUP BY grp ) table1 WHERE table1.list NOT IN
> (SELECT ListAgg( elmnt, ',' ) within GROUP ( ORDER BY pos) AS list
> FROM table2 GROUP BY grp ) UNION SELECT * FROM (SELECT grp ,
> ListAgg( elmnt, ',' ) within GROUP ( ORDER BY pos) AS list FROM table2 GROUP BY grp ) table1 WHERE table1.list NOT IN
> (SELECT ListAgg( elmnt, ',' ) within GROUP ( ORDER BY pos) AS list
> FROM table1 GROUP BY grp ) ;
This query gives desired output:
with t1 as ( select row_number() over (partition by grp order by elmnt) pos,
grp, elmnt from table1 ),
t2 as ( select row_number() over (partition by grp order by elmnt) pos,
grp, elmnt from table2 ),
tx1 as (select pos, grp grp1, elmnt,
listagg(elmnt, ',') within group (order by pos)
over (partition by grp) list
from t1),
tx2 as (select pos, grp grp2, elmnt,
listagg(elmnt, ',') within group (order by pos)
over (partition by grp) list
from t2)
select distinct elmnt
from (select * from tx1 full join tx2 using (list, elmnt))
where grp1 is null or grp2 is null;
You could easily change it to show lists, just replace distinct elmnt with distinct list. The difference between your answer and my query is listagg in analytic version and filtered full join instead of union combined with two not in clauses.
First two subqueries (t1 and t2) only adds pos column, which you did not present in original question ;-) Probably this can also be done with minus operator.
Test data and output:
create table table1 (grp number(3), elmnt varchar2(5));
insert into table1 values (1, 'ABC');
insert into table1 values (1, 'XYZ');
insert into table1 values (1, 'QBC');
insert into table1 values (2, 'xxx');
insert into table1 values (2, 'yyy');
insert into table1 values (3, 'uuu');
insert into table1 values (3, 'ddd');
create table table2 (grp number(3), elmnt varchar2(5));
insert into table2 values (99, 'XYZ');
insert into table2 values (99, 'QBC');
insert into table2 values (99, 'ABC');
insert into table2 values (78, 'xxx');
insert into table2 values (78, 'QQQ');
insert into table2 values (64, 'uuu');
insert into table2 values (65, 'ddd');
ELMNT
-----
uuu
QQQ
yyy
ddd
xxx

How to select top 3 values from each group in a table with SQL which have duplicates [duplicate]

This question already has answers here:
Select top 10 records for each category
(14 answers)
Closed 5 years ago.
Assume we have a table which has two columns, one column contains the names of some people and the other column contains some values related to each person. One person can have more than one value. Each value has a numeric type. The question is we want to select the top 3 values for each person from the table. If one person has less than 3 values, we select all the values for that person.
The issue can be solved if there are no duplicates in the table by the query provided in this article Select top 3 values from each group in a table with SQL . But if there are duplicates, what is the solution?
For example, if for one name John, he has 5 values related to him. They are 20,7,7,7,4. I need to return the name/value pairs as below order by value descending for each name:
-----------+-------+
| name | value |
-----------+-------+
| John | 20 |
| John | 7 |
| John | 7 |
-----------+-------+
Only 3 rows should be returned for John even though there are three 7s for John.
In many modern DBMS (e.g. Postgres, Oracle, SQL-Server, DB2 and many others), the following will work just fine. It uses CTEs and ranking function ROW_NUMBER() which is part of the latest SQL standard:
WITH cte AS
( SELECT name, value,
ROW_NUMBER() OVER (PARTITION BY name
ORDER BY value DESC
)
AS rn
FROM t
)
SELECT name, value, rn
FROM cte
WHERE rn <= 3
ORDER BY name, rn ;
Without CTE, only ROW_NUMBER():
SELECT name, value, rn
FROM
( SELECT name, value,
ROW_NUMBER() OVER (PARTITION BY name
ORDER BY value DESC
)
AS rn
FROM t
) tmp
WHERE rn <= 3
ORDER BY name, rn ;
Tested in:
Postgres
Oracle
SQL-Server
In MySQL and other DBMS that do not have ranking functions, one has to use either derived tables, correlated subqueries or self-joins with GROUP BY.
The (tid) is assumed to be the primary key of the table:
SELECT t.tid, t.name, t.value, -- self join and GROUP BY
COUNT(*) AS rn
FROM t
JOIN t AS t2
ON t2.name = t.name
AND ( t2.value > t.value
OR t2.value = t.value
AND t2.tid <= t.tid
)
GROUP BY t.tid, t.name, t.value
HAVING COUNT(*) <= 3
ORDER BY name, rn ;
SELECT t.tid, t.name, t.value, rn
FROM
( SELECT t.tid, t.name, t.value,
( SELECT COUNT(*) -- inline, correlated subquery
FROM t AS t2
WHERE t2.name = t.name
AND ( t2.value > t.value
OR t2.value = t.value
AND t2.tid <= t.tid
)
) AS rn
FROM t
) AS t
WHERE rn <= 3
ORDER BY name, rn ;
Tested in MySQL
I was going to downvote the question. However, I realized that it might really be asking for a cross-database solution.
Assuming you are looking for a database independent way to do this, the only way I can think of uses correlated subqueries (or non-equijoins). Here is an example:
select distinct t.personid, val, rank
from (select t.*,
(select COUNT(distinct val) from t t2 where t2.personid = t.personid and t2.val >= t.val
) as rank
from t
) t
where rank in (1, 2, 3)
However, each database that you mention (and I note, Hadoop is not a database) has a better way of doing this. Unfortunately, none of them are standard SQL.
Here is an example of it working in SQL Server:
with t as (
select 1 as personid, 5 as val union all
select 1 as personid, 6 as val union all
select 1 as personid, 6 as val union all
select 1 as personid, 7 as val union all
select 1 as personid, 8 as val
)
select distinct t.personid, val, rank
from (select t.*,
(select COUNT(distinct val) from t t2 where t2.personid = t.personid and t2.val >= t.val
) as rank
from t
) t
where rank in (1, 2, 3);
Using GROUP_CONCAT and FIND_IN_SET you can do that.Check SQLFIDDLE.
SELECT *
FROM tbl t
WHERE FIND_IN_SET(t.value,(SELECT
SUBSTRING_INDEX(GROUP_CONCAT(t1.value ORDER BY VALUE DESC),',',3)
FROM tbl t1
WHERE t1.name = t.name
GROUP BY t1.name)) > 0
ORDER BY t.name,t.value desc
If your result set is not so heavy, you can write a stored procedure (or an anonymous PL/SQL-block) for that problem which iterates the result set and finds the bigges three by a simple comparing algorithm.
Try this -
CREATE TABLE #list ([name] [varchar](100) NOT NULL, [value] [int] NOT NULL)
INSERT INTO #list VALUES ('John', 20), ('John', 7), ('John', 7), ('John', 7), ('John', 4);
WITH cte
AS (
SELECT NAME
,value
,ROW_NUMBER() OVER (
PARTITION BY NAME ORDER BY (value) DESC
) RN
FROM #list
)
SELECT NAME
,value
FROM cte
WHERE RN < 4
ORDER BY value DESC
This works for MS SQL. Should be workable in any other SQL dialect that has the ability to assign row numbers in a group by or over clause (or equivelant)
if object_id('tempdb..#Data') is not null drop table #Data;
GO
create table #data (name varchar(25), value integer);
GO
set nocount on;
insert into #data values ('John', 20);
insert into #data values ('John', 7);
insert into #data values ('John', 7);
insert into #data values ('John', 7);
insert into #data values ('John', 5);
insert into #data values ('Jack', 5);
insert into #data values ('Jane', 30);
insert into #data values ('Jane', 21);
insert into #data values ('John', 5);
insert into #data values ('John', -1);
insert into #data values ('John', -1);
insert into #data values ('Jane', 18);
set nocount off;
GO
with D as (
SELECT
name
,Value
,row_number() over (partition by name order by value desc) rn
From
#Data
)
SELECT Name, Value
FROM D
WHERE RN <= 3
order by Name, Value Desc
Name Value
Jack 5
Jane 30
Jane 21
Jane 18
John 20
John 7
John 7