Get permutations of ordered sets of N values - sql

I have a table that consists of a set codes for an item. Each code's group is defined by group_id. The table is defined as follows:
CREATE TABLE item_code (
id int PRIMARY KEY NOT NULL IDENTITY (1,1),
item_id int DEFAULT NULL,
group_id int NOT NULL,
code varchar(50) NOT NULL
);
CREATE TABLE groups (
id int PRIMARY KEY NOT NULL IDENTITY (1,1),
name varchar(50) NOT NULL,
order int NOT NULL
)
For each item_id in the table, I need to select 1 code from each group_id ordered by the group's order. For example:
INSERT INTO groups (id, name, order) VALUES (1, 'one', 10), (2, 'two', 20), (3, 'three', 30);
INSERT INTO item_code (item_id, group_id, [code])
VALUES
(99, 1, 'code1-1'),
(99, 1, 'code1-2'),
(99, 2, 'code2-1'),
(99, 2, 'code2-2'),
(99, 3, 'code3-1'),
(100,1, 'another-code');
would result in the set:
item_id code_combination
99 "code1-1"
99 "code1-2"
99 "code2-1"
99 "code2-2"
99 "code3-1"
99 "code1-1, code2-1"
99 "code1-1, code2-2"
99 "code1-2, code2-1"
99 "code1-2, code2-2"
99 "code1-1, code3-1"
99 "code1-2, code3-1"
99 "code2-1, code3-1"
99 "code2-2, code3-1"
99 "code1-1, code2-1, code3-1"
99 "code1-2, code2-1, code3-1"
99 "code1-1, code2-2, code3-1"
99 "code1-2, code2-2, code3-1"
100 "another-code"
The order of the actual results does not matter. I included a row for item_id == 100 just to show that results for all item_id should be included.
What I've done so far:
I've build a CTE that gets combinations of codes, but it does not respect item_id, groups or order and that's where I'm stuck:
;WITH cte ( combination, curr ) AS (
SELECT CAST(ic.code AS VARCHAR(MAX)), ic.id
FROM items_code ic
UNION ALL
SELECT CAST( c.combination + ',' + CAST(ic.code AS VARCHAR(10) ) AS VARCHAR(MAX) ), ic.id
FROM item_code ic
INNER JOIN
cte c
ON ( c.curr < ic.id )
)
SELECT combination FROM cte
UPDATE: I have a slightly more complicated schema than what I originally posted, and have built the schema in this fiddle. The idea is the same, it's just that "order" is defined on a different table.

Adding a little more to your recursive cte, expanding the final join conditions, as well as some additional columns:
;with cte as (
select
ic.id
, ic.item_id
, ic.group_id
, g.[order]
, level = 0
, combination = cast(ic.code as varchar(max))
from item_code ic
inner join groups g
on ic.group_id = g.id
union all
select
ic.id
, ic.item_id
, ic.group_id
, g.[order]
, level = c.level + 1
, combination = cast( c.combination + ',' + cast(ic.code as varchar(10) ) as varchar(max) )
from item_code ic
inner join groups g
on ic.group_id = g.id
inner join cte c
on c.id < ic.id
and c.[order] < g.[order]
and c.item_id = ic.item_id
)
select *
from cte
order by item_id, level, combination
rextester demo: http://rextester.com/PJC44281
returns:
+----+---------+----------+-------+-------+-------------------------+
| id | item_id | group_id | order | level | combination |
+----+---------+----------+-------+-------+-------------------------+
| 1 | 99 | 1 | 10 | 0 | code1-1 |
| 2 | 99 | 1 | 10 | 0 | code1-2 |
| 3 | 99 | 2 | 20 | 0 | code2-1 |
| 4 | 99 | 2 | 20 | 0 | code2-2 |
| 5 | 99 | 3 | 30 | 0 | code3-1 |
| 3 | 99 | 2 | 20 | 1 | code1-1,code2-1 |
| 4 | 99 | 2 | 20 | 1 | code1-1,code2-2 |
| 5 | 99 | 3 | 30 | 1 | code1-1,code3-1 |
| 3 | 99 | 2 | 20 | 1 | code1-2,code2-1 |
| 4 | 99 | 2 | 20 | 1 | code1-2,code2-2 |
| 5 | 99 | 3 | 30 | 1 | code1-2,code3-1 |
| 5 | 99 | 3 | 30 | 1 | code2-1,code3-1 |
| 5 | 99 | 3 | 30 | 1 | code2-2,code3-1 |
| 5 | 99 | 3 | 30 | 2 | code1-1,code2-1,code3-1 |
| 5 | 99 | 3 | 30 | 2 | code1-1,code2-2,code3-1 |
| 5 | 99 | 3 | 30 | 2 | code1-2,code2-1,code3-1 |
| 5 | 99 | 3 | 30 | 2 | code1-2,code2-2,code3-1 |
| 6 | 100 | 1 | 10 | 0 | another-code |
+----+---------+----------+-------+-------+-------------------------+

Related

Generate multiple record from existing records based on interval columns [from and to]

I have 2 types of score [M,B] in column 3, if a type is M, then the score is either an S[scored] or SB[bonus scored] in column 6. Every interval [from_hrs - to_hrs] for a type B must have a corresponding SB for type M, thus, an interval for a type B cannot have a score of S for a type M. I have several records that were unfortunately captured as seen in the table below.
CREATE TABLE SCORE_TBL
(
ID int IDENTITY(1,1) PRIMARY KEY,
PERSONID_FK int NOT NULL,
S_TYPE varchar(50) NULL,
FROM_HRS int NULL,
TO_HRS int NULL,
SCORE varchar(50) NULL,
);
INSERT INTO SCORE_TBL(PERSONID_FK,S_TYPE,FROM_HRS,TO_HRS,SCORE)
VALUES
(1, 'M' , 0,20, 'S'),
(1, 'B',6, 8, 'B'),
(2, 'B',0, 2, 'B'),
(2, 'M',0,20, 'S'),
(2, 'B', 10,13, 'B'),
(2, 'B', 18,20, 'B'),
(2, 'M', 13,18, 'S');
| ID | PERSONID_FK |S_TYPE| FROM_HRS | TO_HRS | SCORE |
|----|-------------|------|----------|--------|-------|
| 1 | 1 | M | 0 | 20 | S |
| 2 | 1 | B | 6 | 8 | B |
| 3 | 2 | B | 0 | 2 | B |
| 4 | 2 | M | 0 | 20 | S |
| 5 | 2 | B | 10 | 13 | B |
| 6 | 2 | B | 18 | 20 | B |
| 7 | 2 | M | 13 | 18 | S |
I want the data to look like this
| ID | PERSONID_FK |S_TYPE| FROM_HRS | TO_HRS | SCORE |
|----|-------------|------|----------|--------|-------|
| 1 | 1 | M | 0 | 6 | S |
| 2 | 1 | M | 6 | 8 | SB |
| 3 | 1 | B | 6 | 8 | B |
| 4 | 1 | M | 8 | 20 | S |
| 5 | 2 | B | 0 | 2 | B |
| 6 | 2 | M | 0 | 2 | SB |
| 7 | 2 | M | 2 | 10 | S |
| 8 | 2 | B | 10 | 13 | B |
| 9 | 2 | M | 10 | 13 | SB |
| 10 | 2 | M | 13 | 18 | S |
| 11 | 2 | B | 18 | 20 | B |
| 12 | 2 | S | 18 | 20 | SB |
Any ideas on how to generate this data in SQL Server select statement? Visually, this what am trying to get.
Tricky part here is that interval might need to be split in several pieces like 0..20 for person 2.
Window functions to the rescue. This query illustrates what you need to do:
WITH
deltas AS (
SELECT personid_fk, hrs, sum(delta_s) as delta_s, sum(delta_b) as delta_b
FROM (SELECT personid_fk, from_hrs as hrs,
case when score = 'S' then 1 else 0 end as delta_s,
case when score = 'B' then 1 else 0 end as delta_b
FROM score_tbl
UNION ALL
SELECT personid_fk, to_hrs as hrs,
case when score = 'S' then -1 else 0 end as delta_s,
case when score = 'B' then -1 else 0 end as delta_b
FROM score_tbl) _
GROUP BY personid_fk, hrs
),
running AS (
SELECT personid_fk, hrs as from_hrs,
lead(hrs) over (partition by personid_fk order by hrs) as to_hrs,
sum(delta_s) over (partition by personid_fk order by hrs) running_s,
sum(delta_b) over (partition by personid_fk order by hrs) running_b
FROM deltas
)
SELECT personid_fk, 'M' as s_type, from_hrs, to_hrs,
case when running_b > 0 then 'SB' else 'S' end as score
FROM running
WHERE running_s > 0
UNION ALL
SELECT personid_fk, s_type, from_hrs, to_hrs, score
FROM score_tbl
WHERE s_type = 'B'
ORDER BY personid_fk, from_hrs;
Step by step:
deltas is union of two passes on score_tbl - one for start and one for end of score/bonus interval, creating a timeline of +1/-1 events
running calculates running total of deltas over time, yielding split intervals where score/bonus are active
final query just converts score codes and unions bonus intervals (which are passed unchanged)
SQL Fiddle here.

SQL recursive query same table

I have a situation where I have to extract data from a non well designed database.
I have two tables
tableA
+----+----------+-----+---------+
| ID | NAME | AGE | UNIT_ID |
+----+----------+-----+---------+
| 1 | Brown | 25 | 50 |
| 2 | White | 27 | 100 |
| 3 | Gilmour | 24 | 150 |
+----+----------+-----+---------+
tableB
+-----+----------+--------+--------+
| ID | DESC | ID_LV1 | ID_LV2 |
+-----+----------+--------+--------+
| 20 | Unit_20 | 20 | 40 |
| 40 | Unit_40 | 40 | 50 |
| 50 | Unit_50 | 100 | 40 |
| 100 | Unit_100 | 100 | 50 |
| 150 | Unit_150 | 50 | 20 |
+-----+----------+--------+--------+
ID_LV1 and ID_LV2 are linked to ID of the same table (tableB)
The goal is to run a query and get these results:
+----+----------+-----+-----------+-----------+-----------+
| ID | NAME | AGE | UNIT_DESC | LV1_DESC | LV2_DESC |
+----+----------+-----+-----------+-----------+-----------+
| 1 | Brown | 25 | Unit_50 | Unit_100 | Unit_40 |
| 2 | White | 27 | Unit_100 | Unit_100 | Unit_50 |
| 3 | Gilmour | 24 | Unit_150 | Unit_50 | Unit_20 |
+----+----------+-----+-----------+-----------+-----------+
My SQL is pretty rusty. The SQL server that I'm working with doesn't allow me to create views.
My last chance is to import in excel and run a vlookup :-)!
You can achieve your desired result simply using inner join
create table tableA( ID int, NAME varchar(100), AGE int, UNIT_ID int);
insert into tableA values( 1 , 'Brown' , 25 , 50 );
insert into tableA values( 2 , 'White' , 27 , 100 );
insert into tableA values( 3 , 'Gilmour' , 24 , 150 );
create table tableB( ID INT, DESCRIPTION VARCHAR(100), ID_LV1 INT, ID_LV2 INT);
INSERT INTO tableB values( 20 , 'Unit_20' , 20 , 40 );
INSERT INTO tableB values( 40 , 'Unit_40' , 40 , 50 );
INSERT INTO tableB values( 50 , 'Unit_50' ,100 , 40 );
INSERT INTO tableB values( 100 , 'Unit_100' , 100 , 50 );
INSERT INTO tableB values( 150 , 'Unit_150' , 50 , 20 );
Query:
select a.ID, a.Name, a.Age,b.Description, b_lv1.Description, b_lv2.Description from tableA a
inner join tableB b on a.UNIT_ID=b.ID
inner join tableB b_lv1 on b.ID_LV1=b_lv1.ID
inner join tableB b_lv2 on b.ID_LV2=b_lv2.ID
order by a.ID
Output:
ID
Name
Age
Description
Description
Description
1
Brown
25
Unit_50
Unit_100
Unit_40
2
White
27
Unit_100
Unit_100
Unit_50
3
Gilmour
24
Unit_150
Unit_50
Unit_20
db<>fiddle here

Leaderboard toplist with current userid in center - SQL query

I have the following table:
+---------+------------+----------+-------+
| userId | campaignId | countryId| points|
+---------+------------+----------+-------+
| 10 | 1 | 101 | 72 |
| 3 | 1 | 101 | 30 |
| 6 | 1 | 101 | 72 |
| 4 | 1 | 101 | 49 |
| 1 | 1 | 101 | 53 |
| 8 | 1 | 101 | 67 |
| 5 | 1 | 101 | 6 |
| 7 | 1 | 101 | 87 |
| 2 | 1 | 101 | 41 |
| 11 | 1 | 101 | 76 |
| 9 | 1 | 101 | 50 |
+---------+------------+----------+-------+
I have already created a leaderboard toplist with a query like this:
select
RANK() OVER(order by T.points desc) AS rowRank,
T.UserID, T.points
from table as T
where T.campaignId=#campaignId
OFFSET (#page-1)*#limit ROWS FETCH NEXT #limit ROWS ONLY
Above query returns a regular toplist from top to bottom.
However, next requirement is to create a leaderboard toplist that returns current user Id rank + the above 2 ranked users + the 2 below ranked users, in total 5 users should be listed with current user in the center.
So additional input parameters would be:
set #userId = 8 // current user where leader board should center around
set #maxTopLimit = 2 // include 2 users ranked above current user
set #maxBottomLimit = 2 // include 2 users ranked below current user
The leaderboard returned should look like this with userId 8 in the center
+---------+------------+----------+-------+---------|
| userId | campaignId | countryId| points| rowRank |
+---------+------------+----------+-------+---------+
| 11 | 1 | 101 | 76 | 3 |
| 10 | 1 | 101 | 72 | 4 |
#####|###### 8 | 1 #########|##### 101 |### 67 |## 5 ####|########
| 9 | 1 | 101 | 50 | 6 |
| 2 | 1 | 101 | 49 | 7 |
+---------+------------+----------+-------+------+--+
How do I write a SQL query that behaves like this?
Move the ranking results in a subquery or common table expression.
with cte_rank as (...)
Select the target user.
from cte_rank cr where cr.UserId = #userId
Join the target row with all rows in the defined interval.
join cte_rank cr2 on cr2.RowRank >= cr.RowRank - #before and cr2.RowRank <= cr.RowRank + #after
Select all rows from the interval.
select cr2.*
Sample data
create table CampaignPoints
(
UserId int,
CampaignId int,
CountryId int,
Points int
);
insert into CampaignPoints (UserId, CampaignId, CountryId, Points) values
(10, 1, 101, 72),
( 3, 1, 101, 30),
( 6, 1, 101, 72),
( 4, 1, 101, 49),
( 1, 1, 101, 53),
( 8, 1, 101, 67),
( 5, 1, 101, 6),
( 7, 1, 101, 87),
( 2, 1, 101, 41),
(11, 1, 101, 76),
( 9, 1, 101, 50);
Solution
declare #userId int = 8;
declare #before int = 2;
declare #after int = 2;
with cte_rank as
(
select cp.UserId,
cp.CampaignId,
cp.CountryId,
cp.Points,
rank() over(order by cp.Points desc) as RowRank
from CampaignPoints cp
)
select cr2.*
from cte_rank cr
join cte_rank cr2
on cr2.RowRank >= cr.RowRank - #before
and cr2.RowRank <= cr.RowRank + #after
where cr.UserId = #userId
order by cr2.RowRank;
Result
UserId CampaignId CountryId Points RowRank
------ ---------- --------- ------ -------
10 1 101 72 3
6 1 101 72 3
8 1 101 67 5
1 1 101 53 6
9 1 101 50 7
Fiddle to see things in action.

Record batching on bases of running total values by specific number (FileSize wise batching)

We are dealing with large recordset and are currently using NTILE() to get the range of FileIDs and then using FileID column in BETWEEN clause to get specific records set. Using FileID in BETWEEN clause is a mandatory requirement from Developers. So, we cannot have random FileIDs in one batch, it has to be incremental.
As per new requirement, we have to make range based on FileSize column, e.g. 100 GB per batch.
For example:
Batch 1 : 1 has 100 size So ID: 1 record only.
Batch 2 : 2,3,4,5 = 80 but it is < 100 GB, so have to take FileId 6 if 120 GB (Total 300 GB)
Batch 3 : 7 ID has > 100 so 1 record only
And so on…
Below are my sample code, but it is not giving the expected result:
CREATE TABLE zFiles
(
FileId INT
,FileSize INT
)
INSERT INTO dbo.zFiles (
FileId
,FileSize
)
VALUES (1, 100)
,(2, 20)
,(3, 20)
,(4, 30)
,(5, 10)
,(6, 120)
,(7, 400)
,(8, 50)
,(9, 100)
,(10, 60)
,(11, 40)
,(12, 5)
,(13, 20)
,(14, 95)
,(15, 40)
DECLARE #intBatchSize FLOAT = 100;
SELECT y.FileID ,
y.FileSize ,
y.RunningTotal ,
DENSE_RANK() OVER (ORDER BY CEILING(RunningTotal / #intBatchSize)) Batch
FROM ( SELECT i.FileID ,
i.FileSize ,
RunningTotal = SUM(i.FileSize) OVER ( ORDER BY i.FileID ) -- RANGE UNBOUNDED PRECEDING)
FROM dbo.zFiles AS i WITH ( NOLOCK )
) y
ORDER BY y.FileID;
Result:
+--------+----------+--------------+-------+
| FileID | FileSize | RunningTotal | Batch |
+--------+----------+--------------+-------+
| 1 | 100 | 100 | 1 |
| 2 | 20 | 120 | 2 |
| 3 | 20 | 140 | 2 |
| 4 | 30 | 170 | 2 |
| 5 | 10 | 180 | 2 |
| 6 | 120 | 300 | 3 |
| 7 | 400 | 700 | 4 |
| 8 | 50 | 750 | 5 |
| 9 | 100 | 850 | 6 |
| 10 | 60 | 910 | 7 |
| 11 | 40 | 950 | 7 |
| 12 | 5 | 955 | 7 |
| 13 | 20 | 975 | 7 |
| 14 | 95 | 1070 | 8 |
| 15 | 40 | 1110 | 9 |
+--------+----------+--------------+-------+
Expected Result:
+--------+---------------+---------+
| FileID | FileSize (GB) | BatchNo |
+--------+---------------+---------+
| 1 | 100 | 1 |
| 2 | 20 | 2 |
| 3 | 20 | 2 |
| 4 | 30 | 2 |
| 5 | 10 | 2 |
| 6 | 120 | 2 |
| 7 | 400 | 3 |
| 8 | 50 | 4 |
| 9 | 100 | 4 |
| 10 | 60 | 5 |
| 11 | 40 | 5 |
| 12 | 5 | 6 |
| 13 | 20 | 6 |
| 14 | 95 | 6 |
| 15 | 40 | 7 |
+--------+---------------+---------+
We can achieve this if somehow we can reset the running total once it gets over 100. We can write a loop to have this result, but for that we need to go record by record, which is time consuming.
Please somebody help us on this?
You need to do this with a recursive CTE:
with cte as (
select z.fileid, z.filesize, z.filesize as batch_filesize, 1 as batchnum
from zfiles z
where z.fileid = 1
union all
select z.fileid, z.filesize,
(case when cte.batch_filesize + z.filesize > #intBatchSize
then z.filesize
else cte.batch_filesize + z.filesize
end),
(case when cte.batch_filesize + z.filesize > #intBatchSize
then cte.batchnum + 1
else cte.batchnum
end)
from cte join
zfiles z
on z.fileid = cte.fileid + 1
)
select *
from cte;
Note: I realize that fileid probably is not a sequence. You can create a sequence using row_number() in a CTE, to make this work.
There is a technical reason why running sums don't work for this. Essentially, any given fileid needs to know the breaks before it.
Small modification on above answered by Gordon Linoff and got expected result.
DECLARE #intBatchSize INT = 100
;WITH cte as (
select z.fileid, z.filesize, z.filesize as batch_filesize, 1 as batchnum
from zfiles z
where z.fileid = 1
union all
select z.fileid, z.filesize,
(case when cte.batch_filesize >= #intBatchSize
then z.filesize
else cte.batch_filesize + z.filesize
end),
(case when cte.batch_filesize >= #intBatchSize
then cte.batchnum + 1
else cte.batchnum
end)
from cte join
zfiles z
on z.fileid = cte.fileid + 1
)
select *
from cte;

Show missing rows with 0 values to maintain the order

I have a table with a Name column that its values are either 'A', 'B' or 'C'. They come in order ( A, B, C, A, B, C, ...) however, sometimes a Name might be missing (A, B,[missing C] A, B, C, ...). I want a query that gives me all of Names in order without any missing name. The Value for missing names must be 0.
PS: The table is in a Netezza database and it gets truncated and reloaded with fresh data each time by an SSIS package. What we know is that there is also an ID column with a value between 1 and 27. But the number of rows after each truncation and loading could be different. The table I want does not need the ID column, but if it had, it would be from 1 to 27, meaning that the 'table I want' must always have 27 rows.
I would recommend fixing this in the source SSIS package, but I think the following will work in Netazza (for versions that support the WITH command). Note that recursion is not used which I believe isn't support by Netazza.
If the WITH command isn't supported then some other source of a numeric seqeunce could be used (e.g. by row_number() )
setup:
CREATE TABLE TableHave
(Name varchar(1), ID int, Value decimal(5,2))
;
INSERT INTO TableHave
(Name, ID)
VALUES
('A', 1),
('A', 4),
('A', 7),
('C', 21),
('B', 23),
('A', 25)
;
update TableHave set Value = id*1.12;
Query:
;WITH
Digits AS (
SELECT 0 AS digit UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL
SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9
),
Tally AS (
SELECT
ones.digit
+ tens.digit * 10
+ hundreds.digit * 100
-- + thousands.digit * 1000
as num
FROM Digits ones
CROSS JOIN Digits tens
CROSS JOIN Digits hundreds
-- CROSS JOIN Digits thousands (keep adding more if needed)
)
select
d.id
, d.name
, t.value
from (
select
num + 1 as id
, case when num % 3 = 1 then 'B'
when num % 3 = 2 then 'C'
else 'A'
end Name
, coalesce(t.value,0) value
from Tally
where num <= (select ((max(id)/3)*3)+2 from TableHave)
) d
left join TableHave t on d.id = t.id
order by d.id
result:
+----+------+-------+
| id | name | value |
+----+------+-------+
| 1 | A | 1.12 |
| 2 | B | 0 |
| 3 | C | 0 |
| 4 | A | 4.48 |
| 5 | B | 0 |
| 6 | C | 0 |
| 7 | A | 7.84 |
| 8 | B | 0 |
| 9 | C | 0 |
| 10 | A | 0 |
| 11 | B | 0 |
| 12 | C | 0 |
| 13 | A | 0 |
| 14 | B | 0 |
| 15 | C | 0 |
| 16 | A | 0 |
| 17 | B | 0 |
| 18 | C | 0 |
| 19 | A | 0 |
| 20 | B | 0 |
| 21 | C | 23.52 |
| 22 | A | 0 |
| 23 | B | 25.76 |
| 24 | C | 0 |
| 25 | A | 28.00 |
| 26 | B | 0 |
| 27 | C | 0 |
+----+------+-------+
A running example (on SQL Server) is available here http://rextester.com/VXB89713