Query to Find maximum possible combinations between two columns - sql

The target is to create all possible combinations of joining the two columns. every article of the first column ('100','101','102','103') must be in the combination result.
Sample Code
create table basis
(article Integer,
supplier VarChar(10) );
Insert into basis Values (100, 'A');
Insert into basis Values (101, 'A');
Insert into basis Values (101, 'B');
Insert into basis Values (101, 'C');
Insert into basis Values (102, 'D');
Insert into basis Values (103, 'B');
Result set
combination_nr;article;supplier
1;100;'A'
1;101;'A'
1;102;'D'
1;103;'B'
2;100;'A'
2;101;'B'
2;102;'D'
2;103;'B'
3;100;'A'
3;101;'C'
3;102;'D'
3;103;'B'
Let suppose if we add one more row against 102 as 'A' then our result set will be like this
Also according to the below-given calculations now we have 24 result sets
1;100;'A'
1;101;'A'
1;102;'A'
1;103;'B'
2;100;'A'
2;101;'A'
2;102;'D'
2;103;'B'
3;100;'A'
3;101;'B'
3;102;'A'
3;103;'B'
4;100;'A'
4;101;'B'
4;102;'D'
4;103;'B'
5;100;'A'
5;101;'C'
5;102;'A'
5;103;'B'
6;100;'A'
6;101;'C'
6;102;'D'
6;103;'B'
Already tried code
I have tried different Cross Joins but they always give exceeded rows according to my result sets.
SELECT article, supplier
FROM (SELECT DISTINCT supplier FROM basis2) AS t1
CROSS JOIN (SELECT DISTINCT article FROM basis2) AS t2;
Calculations:
article 100: 1 supplier ('A')
article 101: 3 suppliers ('A','B','C')
article 102: 1 supplier ('D')
article 103: 1 supplier ('B')
unique articles: 4 (100,101,102,103)
1x3x1x1 x 4 = 12 (combination rows)

You can do what you want using a recursive CTE. It is easier to put the combinations in single rows rather than across multiple rows:
with b as (
select b.*, dense_rank() over (order by article) as seqnum
from basis b
),
cte as (
select convert(varchar(max), concat(article, ':', supplier)) as suppliers, seqnum
from b
where seqnum = 1
union all
select concat(cte.suppliers, ',', concat(article, ':', supplier)), b.seqnum
from cte join
b
on b.seqnum = cte.seqnum + 1
)
select row_number() over (order by suppliers), suppliers
from (select cte.*, max(seqnum) over () as max_seqnum
from cte
) cte
where seqnum = max_seqnum;
For your particular result set, you can unroll the string:
with b as (
select b.*, dense_rank() over (order by article) as seqnum
from basis b
),
cte as (
select convert(varchar(max), concat(article, ':', supplier)) as suppliers, seqnum
from b
where seqnum = 1
union all
select concat(cte.suppliers, ',', concat(article, ':', supplier)), b.seqnum
from cte join
b
on b.seqnum = cte.seqnum + 1
)
select seqnum,
left(s.value, charindex(':', s.value) - 1) as article,
stuff(s.value, 1, charindex(s.value, ':'), '') as supplier
from (select row_number() over (order by suppliers) as seqnum, suppliers
from (select cte.*, max(seqnum) over () as max_seqnum
from cte
) cte
where seqnum = max_seqnum
) cte cross apply
string_split(suppliers, ',') s;
Here is a db<>fiddle.

Related

Group by with gap in date sequence ("gaps and islands")

I am trying to solve a "gaps and islands" by date issue I'm facing (kudos to Gordon Linoff helping me identify this issue). I want to group the below table by person, office and job while respecting order by person,from_date. consider the table below:
declare #temp table(person varchar(25),office varchar(25),job varchar(25),from_date date,to_date date)
insert into #temp values ('jon','ny','programmer','1/1/2020','1/3/2020');
insert into #temp values ('jon','ny','programmer','1/4/2020','1/5/2020');
insert into #temp values ('jon','dc','programmer','1/6/2020','1/7/2020');
insert into #temp values ('jon','ny','programmer','1/8/2020','1/9/2020');
insert into #temp values ('lou','ny','programmer','1/1/2020','1/3/2020');
insert into #temp values ('lou','ny','programmer','1/4/2020','1/5/2020');
insert into #temp values ('lou','dc','programmer','1/6/2020','1/7/2020');
insert into #temp values ('lou','ny','programmer','1/8/2020','1/9/2020');
the intended output is
This is a type of gaps-and-islands problem. If there are no gaps in the dates, the simplest solution is the difference of row numbers:
select person, office, job, min(from_date), max(to_date)
from (select t.*,
row_number() over (partition by person, office, job order by from_date) as seqnum,
row_number() over (partition by person, office order by from_date) as seqnum_2
from t
) t
group by person, office, job, (seqnum - seqnum_2)
This is a general solution:
WITH preceders_and_followers AS (
SELECT
b.person,
b.office,
b.job,
b.from_date,
b.to_date,
CASE
WHEN EXISTS (
SELECT
c.*
FROM
ora$ptt_tmp c
WHERE
b.person = c.person
AND b.office = c.office
AND b.job = c.job
AND ( b.from_date - 1 BETWEEN c.from_date AND c.to_date )
) THEN
1
END AS has_preceder,
CASE
WHEN EXISTS (
SELECT
c.*
FROM
ora$ptt_tmp c
WHERE
b.person = c.person
AND b.office = c.office
AND b.job = c.job
AND ( b.to_date + 1 BETWEEN c.from_date AND c.to_date )
) THEN
1
END AS has_follower
FROM
ora$ptt_tmp b
ORDER BY
1,
2,
3
)
SELECT DISTINCT
pf1.person,
pf1.office,
pf1.job,
pf1.from_date,
(
SELECT
MIN(pf2.to_date)
FROM
preceders_and_followers pf2
WHERE
pf1.person = pf2.person
AND pf1.office = pf2.office
AND pf1.job = pf2.job
AND pf2.to_date >= pf1.from_date
AND has_follower IS NULL
) to_date
FROM
preceders_and_followers pf1
WHERE
pf1.has_preceder IS NULL
ORDER BY
1,
4,
2,
3;

Find all records within x units of each other

I have a table like this:
CREATE TABLE t(idx integer primary key, value integer);
INSERT INTO t(idx, value)
VALUES
(1, 1),
(2, 2),
(3, 3),
(4, 6),
(5, 7),
(6, 12)
I would like to return all the groups of records where the values are within 2 of each other, with an associated group label as a new column by which to identify them.
I thought perhaps a recursive query might be suitable...but my sql-fu is lacking.
You can use a recursive CTE:
with recursive tt as (
select t.*, row_number() over (order by idx) as seqnum
from t
),
cte as (
select idx, value, value as grp,
seqnum, 1 as lev
from tt
where seqnum = 1
union all
select tt.idx, tt.value,
(case when tt.value > grp + 2 then tt.value else cte.grp end),
tt.seqnum, 1 + lev
from cte join
tt
on tt.seqnum = cte.seqnum + 1
)
select *
from cte;
Here is a db<>fiddle. Note that this added a row with the value of "4" to show that the first four rows are split into two groups.
I assume you want to group rows so that any two values in each group may differ only by at most 2. Then you are right, recursive query is the solution. In each level of recursion the bounds of new group are precomputed. Groups are disjoint so finally join original table with computed group number and group by this number. Db fiddle here.
with recursive r (minv,maxv,level) as (
select min(t.value), min(t.value) + 2, 1
from t
union all
select minv, maxv, level from (
select t.value as minv, t.value + 2 as maxv, r.level + 1 as level, row_number() over (order by minv) rn
from r
join t on t.value > r.maxv
) x where x.rn = 1
)
select r.level
, format('ids from %s to %s', min(t.idx), max(t.idx)) as id_label
, format('values from %s to %s', min(t.value), max(t.value)) as value_label
from t join r on t.value between r.minv and r.maxv
group by r.level
order by r.level
(The inner query in the recursive part is just to limit number of newly added rows only to one. Simpler clause select min(t.value), min(t.value) + 2 is not possible because aggregation functions are not allowed in recursive part, analytic function is workaround.)

Swap two adjacent rows of a column in sql

I'm trying to solve this following problem:
Write a sql query to swap two adjacent rows in a column of a table.
Input table
Name Id
A 1
B 2
C 3
D 4
E 5
Output table
Name Id
A 2
B 1
C 4
D 3
E 5
Description:- 1 is associated with A and 2 with B, swap them, thus now 1 is associated with B and 2 with A, Similarly do for C and D, Since E doesn't has any pair, leave it as it is.
Note:- This may be solved using CASE Statements, but I am trying for a generalized solution, Say currently it is only 5 rows, it may be 10,20 etc..
Eg:
SELECT
*,CASE WHEN Name = A then 2 ELSEIF Name = B then 1 etc...
FROM YourTable
You can use window functions to solve this.
on MySQL (>= 8.0):
SELECT ID, IFNULL(CASE WHEN t.rn % 2 = 0 THEN LAG(Name) OVER (ORDER BY ID) ELSE LEAD(Name) OVER (ORDER BY ID) END, Name) AS Name
FROM (
SELECT ID, Name, ROW_NUMBER() OVER (ORDER BY ID) AS rn
FROM table_name
) t
demo on dbfiddle.uk
on SQL-Server:
SELECT ID, ISNULL(CASE WHEN t.rn % 2 = 0 THEN LAG(Name) OVER (ORDER BY ID) ELSE LEAD(Name) OVER (ORDER BY ID) END, Name) AS Name
FROM (
SELECT ID, Name, ROW_NUMBER() OVER (ORDER BY ID) AS rn
FROM table_name
) t
demo on dbfiddle.uk
If you have sql-server, you can try this.
DECLARE #YourTable TABLE (Name VARCHAR(10), Id INT)
INSERT INTO #YourTable VALUES
('A', 1),
('B', 2),
('C', 3),
('D', 4),
('E', 5)
;WITH CTE AS (
SELECT *, ROW_NUMBER()OVER(ORDER BY Name) AS RN FROM #YourTable
)
SELECT T1.Name, ISNULL(T2.Id, T1.Id) Id FROM CTE T1
LEFT JOIN CTE T2 ON T1.RN + CASE WHEN T1.RN%2 = 0 THEN - 1 ELSE 1 END = T2.RN
Result:
Name Id
---------- -----------
A 2
B 1
C 4
D 3
E 5
You didn't specify your DBMS, but the following is standard ANSI SQL.
You can use a values() clause to provide the mapping of the IDs and then join against that:
with id_map (source_id, target_id) as (
values
(1, 2),
(2, 1)
)
select t.name, coalesce(m.target_id, t.id) as mapped_id
from the_table t
left join id_map m on m.source_id = t.id
order by name;
Alternatively if you only want to specify the mapping once for one direction, you can use this:
with id_map (source_id, target_id) as (
values
(1, 2)
)
select t.name,
case id
when m.source_id then m.target_id
when m.target_id then m.source_id
else id
end as mapped_id
from the_table t
left join id_map m on t.id in (m.source_id, m.target_id)
order by name;
Online example: https://rextester.com/FBFH52231

SQL Server CTE doesn't Join Properly

The data for this project contains two columns with semicolon-delimited strings. These are actually ordered pairs. So, for example, in: "a;b;c", "x;y;z", 'a' is paired with 'x'. The goal for our query is to create a table where this relationship is clearly represented one row at a time.
Here is a script to re-create the sample data:
DROP TABLE IF EXISTS dbo.sampleData;
DROP TABLE IF EXISTS dbo.lookupCPT;
GO
CREATE TABLE sampleData
(
numRow bigint IDENTITY(1,1) NOT NULL CONSTRAINT PK_numRow PRIMARY KEY,
sDelimQty varchar(MAX) NULL,
sDelimCPT varchar(MAX) NULL
)
CREATE TABLE lookupCPT
(
numRow bigint IDENTITY(1,1) NOT NULL CONSTRAINT PK_numRowCPT PRIMARY KEY,
sCPTCode varchar(10) NULL,
decCPTRate decimal(16,2) NULL
)
INSERT [dbo].[lookupCPT] ([numRow], [sCPTCode], [decCPTRate])
VALUES (1, N'123', CAST(4.00 AS Decimal(16, 2)))
INSERT [dbo].[lookupCPT] ([numRow], [sCPTCode], [decCPTRate])
VALUES (2, N'456', CAST(5.00 AS Decimal(16, 2)))
INSERT [dbo].[lookupCPT] ([numRow], [sCPTCode], [decCPTRate])
VALUES (3, N'789', CAST(7.00 AS Decimal(16, 2)))
INSERT [dbo].[sampleData] ([numRow], [sDelimQty], [sDelimCPT])
VALUES (1, N'1;2', N'123;789')
INSERT [dbo].[sampleData] ([numRow], [sDelimQty], [sDelimCPT])
VALUES (2, N'3', N'456')
We attempted to accomplish this using common table expressions:
WITH Qty_CTE (numRowQ, Qty) AS
(
SELECT numRow, value
FROM sampleData
CROSS APPLY STRING_SPLIT(sDelimQty, ';')
),
CPT_CTE (numRowC, CPT) AS
(
SELECT numRow, value
FROM sampleData
CROSS APPLY STRING_SPLIT(sDelimCPT, ';')
)
SELECT *
FROM sampleData
JOIN CPT_CTE c on c.numRowC = sampleData.numRow
JOIN Qty_CTE q on q.numRowQ = sampleData.numRow
However, doing this doubles the amount of rows in our output:
q1
But, if we remove either one of the two joins, it returns correctly:
q2
Any ideas? Thanks very much
After all the helpful answers, below is the final solution. Cheers!
WITH Qty_CTE (numRowQ, Qty, RN) AS
(
SELECT
numRow, value,
ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS RN
FROM
sampleData
CROSS APPLY
STRING_SPLIT(sDelimQty, ';')
),
CPT_CTE (numRowC, CPT, CPTRate, RN) AS
(
SELECT
s.numRow, value as CPT, l.decCPTRate as CPTRate,
ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS RN
FROM
sampleData s
CROSS APPLY
STRING_SPLIT(sDelimCPT, ';')
JOIN
lookupCPT l ON value = l.sCPTCode
)
SELECT
numRow, sDelimCPT, sDelimQty, CPT, CPTRate, Qty, CPTRate * Qty as Total
FROM
sampleData
JOIN
CPT_CTE c on c.numRowC = sampleData.numRow
JOIN
Qty_CTE q on q.numRowQ = sampleData.numRow AND c.RN = q.RN
If your STRING_SPLIT function preserve order, then this will work.
WITH Qty_CTE (numRowQ, Qty, RN) AS
(
SELECT numRow, value, ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS RN
FROM sampleData
CROSS APPLY STRING_SPLIT(sDelimQty, ';')
),
CPT_CTE (numRowC, CPT, RN) AS
(
SELECT numRow, value, ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS RN
FROM sampleData
CROSS APPLY STRING_SPLIT(sDelimCPT, ';')
)
SELECT * FROM sampleData
JOIN CPT_CTE c on c.numRowC = sampleData.numRow
JOIN Qty_CTE q on q.numRowQ = sampleData.numRow AND c.RN = q.RN
Both your examples q1 and q2 work as expected. Qty_CTE has a cardinality of 3 (Record 1 appears twice, record 2 appears once), so does CPT_CTE.
sampleData has a cardinality of 2 (each row appears once only)
Since you're joining on the PK, sampleData x Qty_CTE x CPT_CTE should return 5 records, which it does (1x2x2 records for numrow 1 and 1x1x1 record for numrow 2). If you remove either Qty_CTE or CPT_CTE it should return 3 records, which it does (1x1x1 record for numrow 2 and 1x2 records for numrow 1).
We could propose a solution based on an expected result, if you had one.

How to select top 3 values from each group in a table with SQL which have duplicates [duplicate]

This question already has answers here:
Select top 10 records for each category
(14 answers)
Closed 5 years ago.
Assume we have a table which has two columns, one column contains the names of some people and the other column contains some values related to each person. One person can have more than one value. Each value has a numeric type. The question is we want to select the top 3 values for each person from the table. If one person has less than 3 values, we select all the values for that person.
The issue can be solved if there are no duplicates in the table by the query provided in this article Select top 3 values from each group in a table with SQL . But if there are duplicates, what is the solution?
For example, if for one name John, he has 5 values related to him. They are 20,7,7,7,4. I need to return the name/value pairs as below order by value descending for each name:
-----------+-------+
| name | value |
-----------+-------+
| John | 20 |
| John | 7 |
| John | 7 |
-----------+-------+
Only 3 rows should be returned for John even though there are three 7s for John.
In many modern DBMS (e.g. Postgres, Oracle, SQL-Server, DB2 and many others), the following will work just fine. It uses CTEs and ranking function ROW_NUMBER() which is part of the latest SQL standard:
WITH cte AS
( SELECT name, value,
ROW_NUMBER() OVER (PARTITION BY name
ORDER BY value DESC
)
AS rn
FROM t
)
SELECT name, value, rn
FROM cte
WHERE rn <= 3
ORDER BY name, rn ;
Without CTE, only ROW_NUMBER():
SELECT name, value, rn
FROM
( SELECT name, value,
ROW_NUMBER() OVER (PARTITION BY name
ORDER BY value DESC
)
AS rn
FROM t
) tmp
WHERE rn <= 3
ORDER BY name, rn ;
Tested in:
Postgres
Oracle
SQL-Server
In MySQL and other DBMS that do not have ranking functions, one has to use either derived tables, correlated subqueries or self-joins with GROUP BY.
The (tid) is assumed to be the primary key of the table:
SELECT t.tid, t.name, t.value, -- self join and GROUP BY
COUNT(*) AS rn
FROM t
JOIN t AS t2
ON t2.name = t.name
AND ( t2.value > t.value
OR t2.value = t.value
AND t2.tid <= t.tid
)
GROUP BY t.tid, t.name, t.value
HAVING COUNT(*) <= 3
ORDER BY name, rn ;
SELECT t.tid, t.name, t.value, rn
FROM
( SELECT t.tid, t.name, t.value,
( SELECT COUNT(*) -- inline, correlated subquery
FROM t AS t2
WHERE t2.name = t.name
AND ( t2.value > t.value
OR t2.value = t.value
AND t2.tid <= t.tid
)
) AS rn
FROM t
) AS t
WHERE rn <= 3
ORDER BY name, rn ;
Tested in MySQL
I was going to downvote the question. However, I realized that it might really be asking for a cross-database solution.
Assuming you are looking for a database independent way to do this, the only way I can think of uses correlated subqueries (or non-equijoins). Here is an example:
select distinct t.personid, val, rank
from (select t.*,
(select COUNT(distinct val) from t t2 where t2.personid = t.personid and t2.val >= t.val
) as rank
from t
) t
where rank in (1, 2, 3)
However, each database that you mention (and I note, Hadoop is not a database) has a better way of doing this. Unfortunately, none of them are standard SQL.
Here is an example of it working in SQL Server:
with t as (
select 1 as personid, 5 as val union all
select 1 as personid, 6 as val union all
select 1 as personid, 6 as val union all
select 1 as personid, 7 as val union all
select 1 as personid, 8 as val
)
select distinct t.personid, val, rank
from (select t.*,
(select COUNT(distinct val) from t t2 where t2.personid = t.personid and t2.val >= t.val
) as rank
from t
) t
where rank in (1, 2, 3);
Using GROUP_CONCAT and FIND_IN_SET you can do that.Check SQLFIDDLE.
SELECT *
FROM tbl t
WHERE FIND_IN_SET(t.value,(SELECT
SUBSTRING_INDEX(GROUP_CONCAT(t1.value ORDER BY VALUE DESC),',',3)
FROM tbl t1
WHERE t1.name = t.name
GROUP BY t1.name)) > 0
ORDER BY t.name,t.value desc
If your result set is not so heavy, you can write a stored procedure (or an anonymous PL/SQL-block) for that problem which iterates the result set and finds the bigges three by a simple comparing algorithm.
Try this -
CREATE TABLE #list ([name] [varchar](100) NOT NULL, [value] [int] NOT NULL)
INSERT INTO #list VALUES ('John', 20), ('John', 7), ('John', 7), ('John', 7), ('John', 4);
WITH cte
AS (
SELECT NAME
,value
,ROW_NUMBER() OVER (
PARTITION BY NAME ORDER BY (value) DESC
) RN
FROM #list
)
SELECT NAME
,value
FROM cte
WHERE RN < 4
ORDER BY value DESC
This works for MS SQL. Should be workable in any other SQL dialect that has the ability to assign row numbers in a group by or over clause (or equivelant)
if object_id('tempdb..#Data') is not null drop table #Data;
GO
create table #data (name varchar(25), value integer);
GO
set nocount on;
insert into #data values ('John', 20);
insert into #data values ('John', 7);
insert into #data values ('John', 7);
insert into #data values ('John', 7);
insert into #data values ('John', 5);
insert into #data values ('Jack', 5);
insert into #data values ('Jane', 30);
insert into #data values ('Jane', 21);
insert into #data values ('John', 5);
insert into #data values ('John', -1);
insert into #data values ('John', -1);
insert into #data values ('Jane', 18);
set nocount off;
GO
with D as (
SELECT
name
,Value
,row_number() over (partition by name order by value desc) rn
From
#Data
)
SELECT Name, Value
FROM D
WHERE RN <= 3
order by Name, Value Desc
Name Value
Jack 5
Jane 30
Jane 21
Jane 18
John 20
John 7
John 7