SQL Server query to extract all rows - sql

I've two database tables, one called "Headers" and one called "Rows".
The structure is:
Header: IDPK | Description
Row: IDPK | IDPK_Header | Item_ID | Qty
I need to do a query that says: "From a Header, IDPK find another header that have the same number of rows and the same item ID and quantity".
For example:
Header Rows
IDPK Description IDPK Item_ID Qty
1 'Test1' 1 'A' 10
1 'Test1' 2 'B' 20
2 'Test2' 3 'A' 10
2 'Test2' 4 'B' 20
3 'Test3' 5 'A' 5
3 'Test3' 6 'B' 20
4 'Test4' 7 'A' 10
Header Test1 match Test2 but not Test3 and Test4
The problem is that the number of rows must be exactly the same. I try with ALL operator but without luck.
How I can do the query with an eye for the performance? The two tables can be very huge (~500.000 records).

Assuming there are no duplicates:
with r as (
select r.*, count(*) over (partition by idpk_header) as num_items
from rows r
)
select r1.idpk_header, r2.idpk_header
from r r1 join
r r2
on r1.item_id = r1.item_id and r2.qty = r1.qty and r2.num_items = r1.num_items
group by r1.idpk_header, r2.idpk_header, r1.num_items
having count(*) = r1.num_items;
Basically, this does a self-join on the items, so you only get matches. The on validates that the two have the same number of items. And the having guarantees that all match.
Note: This version returns each match of the header to itself. That is a nice check. You can of course filter this out in the on or a where clause.
If you do have duplicate items, you can simply replace r with:
select idpk_header, item_id, sum(qty) as qty,
count(*) over (partition by idpk_header) as num_items
from rows r
group by idpk_header, item_id;

I woul suggest using a forxml query in order to create the list of items per IDPK. Next I would search for matching item lists and quantities. See following example:
DECLARE #Headers TABLE(
IDPK INT,
Description NVARCHAR(100)
)
DECLARE #Rows TABLE(
IDPK INT,
ITEMID NVARCHAR(1),
Qty INT
)
INSERT INTO #Headers VALUES
(1, 'Test1'),
(2, 'Test2'),
(3, 'Test3'),
(4, 'Test4'),
(5, 'Test5')
INSERT INTO #Rows VALUES
(1, 'A', 10),
(1, 'B', 20),
(2, 'A', 10),
(2, 'B', 20),
(3, 'A', 5 ),
(3, 'B', 20),
(4, 'C', 10),
(5, 'A', 10),
(5, 'C', 20)
;
WITH cteHeaderRows AS(
SELECT IDPK
,ItemIDs=STUFF(
(
SELECT ',' + CAST(ITEMID AS VARCHAR(MAX))
FROM #Rows t2
WHERE t2.IDPK = t1.IDPK
ORDER BY ITEMID, QTY
FOR XML PATH('')
),1,1,''
)
,Qtys=STUFF(
(
SELECT ',' + CAST(Qty AS VARCHAR(MAX))
FROM #Rows t2
WHERE t2.IDPK = t1.IDPK
ORDER BY ITEMID, QTY
FOR XML PATH('')
),1,1,''
)
FROM #Rows t1
GROUP BY IDPK
),
cteFilter AS(
SELECT h1.IDPK AS IDPK1, h2.IDPK AS IDPK2
FROM cteHeaderRows h1
JOIN cteHeaderRows h2 ON h1.IDPK != h2.IDPK AND h1.ItemIDs = h2.ItemIDs AND h2.Qtys = h1.Qtys
)
SELECT DISTINCT h.IDPK, h.Description, r.ItemID, r.Qty
FROM #Headers h
JOIN cteFilter f ON f.IDPK1 = h.IDPK
JOIN #Rows r ON r.IDPK = f.IDPK1
ORDER BY 1,3,4

Related

SQL Group By specific column with nullable

Let's say I have this data in my table A:
group_id
type
active
1
A
true
1
B
false
1
C
true
2
null
false
3
B
true
3
C
false
I want to create a query which return the A row if exists (without the type column), else return a row with active false.
For this specific table the result will be:
group_id
active
1
true
2
false
3
false
How can I do this ?
I'm assuming I have to use a GROUP BY but I can't find a way to do it.
Thank you
This is a classic row_number problem, generate a row number based on your ordering criteria, then select just the first row in each grouping.
declare #MyTable table (group_id int, [type] char(1), active bit);
insert into #MyTable (group_id, [type], active)
values
(1, 'A', 1),
(1, 'B', 0),
(1, 'C', 1),
(2, null, 0),
(3, 'B', 1),
(3, 'C', 0);
with cte as (
select *
, row_number() over (
partition by group_id
order by case when [type] = 'A' then 1 else 0 end desc, active asc
) rn
from #MyTable
)
select group_id, active
from cte
where rn = 1
order by group_id;
Returns:
group_id
active
1
1
2
0
3
0
Note: Providing the DDL+DML as I have shown makes it much easier for people to assist.
This should do it. We select all the distinct group_ids and then join our table back to that. There is an ISNULL function that will insert the 'false' when 'A' type records are not found.
DECLARE #tableA TABLE (
group_id int
, [type] nchar(1)
, active nvarchar(10)
);
INSERT INTO #tableA (group_id, [type], active)
VALUES
(1, 'A', 'true')
, (1,'B','false')
, (1,'C', 'false')
, (2, null, 'false')
, (3, 'B', 'true')
, (3, 'C', 'false')
;
SELECT
gid.group_id
, ISNULL(a.active,'false') as active
FROM (SELECT DISTINCT group_id FROM #tableA) as gid
LEFT OUTER JOIN #tableA as a
ON a.group_id = gid.group_id
AND a.type = 'A'

Find and classify sequential patterns for a distinct group T-SQL

I need help finding and classifying sequential patterns for each distinct key.
From the data I have, I need to create a new table that contains the key and a pattern identifier that belongs to that key.
From the example below the pattern is as follows:
Key #1 and #3 have the values 1, 2 and 3. The Key #3 has the values 8,
9 and 10. When a distinct pattern exists for a key I.E (1, 2, 3) I
need to create an entry on the table for the key # and that specific
pattern (1, 2, 3)
Data:
key value
1 1
1 2
1 3
2 8
2 9
2 10
3 1
3 2
3 3
Expected Output:
key pattern
1 1
2 2
3 1
Fiddle:
http://sqlfiddle.com/#!6/4fe39
Example table:
CREATE TABLE yourtable
([key] int, [value] int)
;
INSERT INTO yourtable
([key], [value])
VALUES
(1, 1),
(1, 2),
(1, 3),
(2, 8),
(2, 9),
(2, 10),
(3, 1),
(3, 2),
(3, 3)
;
You can concatenate the values together in several ways. The traditional method in SQL Server uses for xml:
select k.key,
stuff( (select ',' + cast(t.id as varchar(255))
from t
where k.key = t.key
for xml path ('')
order by t.id
), 1, 1, ''
) as ids
from (select distinct key from t) k;
You can convert this to a unique number using a CTE/subquery:
with cte as (
select k.key,
stuff( (select ',' + cast(t.id as varchar(255))
from t
where k.key = t.key
for xml path ('')
order by t.id
), 1, 1, ''
) as ids
from (select distinct key from t) k
)
select cte.*, dense_rank() over (order by ids) as ids_id
from cte;

T-SQL Summation

I'm trying to create result set with 3 columns. Each column coming from the summation of 1 Column of Table A but grouped by different ID's. Here's an overview of what I wanted to do..
Table A
ID Val.1
1 4
1 5
1 6
2 7
2 8
2 9
3 10
3 11
3 12
I wanted to create something like..
ROW SUM.VAL.1 SUM.VAL.2 SUM.VAL.3
1 15 21 33
I understand that I can not get this using UNION, I was thinking of using CTE but not quite sure with the logic.
You need conditional Aggregation
select 1 as Row,
sum(case when ID = 1 then Val.1 end),
sum(case when ID = 2 then Val.1 end),
sum(case when ID = 3 then Val.1 end)
From yourtable
You may need dynamic cross tab or pivot if number of ID's are not static
DECLARE #col_list VARCHAR(8000)= Stuff((SELECT ',sum(case when ID = '+ Cast(ID AS VARCHAR(20))+ ' then [Val.1] end) as [val.'+Cast(ID AS VARCHAR(20))+']'
FROM Yourtable
GROUP BY ID
FOR xml path('')), 1, 1, ''),
#sql VARCHAR(8000)
exec('select 1 as Row,'+#col_list +'from Yourtable')
Live Demo
I think pivoting the data table will yield the desired result.
IF OBJECT_ID('tempdb..#TableA') IS NOT NULL
DROP TABLE #TableA
CREATE TABLE #TableA
(
RowNumber INT,
ID INT,
Value INT
)
INSERT #TableA VALUES (1, 1, 4)
INSERT #TableA VALUES (1, 1, 5)
INSERT #TableA VALUES (1, 1, 6)
INSERT #TableA VALUES (1, 2, 7)
INSERT #TableA VALUES (1, 2, 8)
INSERT #TableA VALUES (1, 2, 9)
INSERT #TableA VALUES (1, 3, 10)
INSERT #TableA VALUES (1, 3, 11)
INSERT #TableA VALUES (1, 3, 12)
-- https://msdn.microsoft.com/en-us/library/ms177410.aspx
SELECT RowNumber, [1] AS Sum1, [2] AS Sum2, [3] AS Sum3
FROM
(
SELECT RowNumber, ID, Value
FROM #TableA
) a
PIVOT
(
SUM(Value)
FOR ID IN ([1], [2], [3])
) AS p
This technique works if the ids you are seeking are constant, otherwise I imagine some dyanmic-sql would work as well if changing ids are needed.
https://msdn.microsoft.com/en-us/library/ms177410.aspx

Common Table Expression to traverse down hierarchy

The Structure
I have 2 tables that link to each other. One is a set of values and a nullable foreign key that points to the Id of the other table, which contains 2 foreign keys back to the other table.
HierarchicalTable
Id LeftId RightId SomeValue
1 1 2 some value
2 3 4 top level in tree
3 5 6 incorrect hierarchy 1
4 7 8 incorrect result top level
IntermediateTable
Id SomeValue HierarchicalTableId
1 some value NULL
2 value NULL
3 NULL 1
4 value NULL
5 incorrect result 1 NULL
6 incorrect result 3 NULL
7 incorrect result 3 NULL
8 NULL 3
Each table points down the hierarchy. Here is this structure graphed out for the Hierarchical Table records 1 & 2 and their IntermediateTable values:
(H : HierarchicalTable, I : IntermediateTable)
H-2
/ \
I-3 I-4
/
H-1
/ \
I-1 I-2
The Problem
I need to be able to send in an Id for a given HierarchicalTable and get all the HierarchicalTable records below it. So, for the structure above, if I pass 1 into a query, I should just get H-1 (and from that, I can load the related IntermediateTable values). If I pass 2, I should get H-2 and H-1 (and, again, use those to load the relevant IntermediateTable values).
The Attempts
I've tried using a CTE, but there are a few main things that are different from the examples I've seen:
In my structure, the objects point down to their children, instead of up to their parent
I have the Id of the top object, not the Id of the bottom object.
My hierarchy is split across 2 tables. This shouldn't be a big issue once I understand the algorithm to find the results I need, but this could be causing additional confusion for me.
If I run this query:
declare #TargetId bigint = 2
;
with test as (
select h.*
from dbo.hierarchicaltable h
inner join dbo.intermediatetable i
on (h.leftid = i.id or h.rightid = i.id)
union all
select h.*
from dbo.hierarchicaltable h
where h.id = #TargetId
)
select distinct *
from test
I get all 4 records in the HierarchicalTable, instead of just records 1 & 2. I'm not sure if what I want is possible to do with a CTE.
Try this:
I'm build entire tree with both tables, then filter (only hierarchicaltable records).
DECLARE #HierarchicalTable TABLE(
Id INT,
LeftId INT,
RightId INT,
SomeValue VARCHAR(MAX)
)
INSERT INTO #HierarchicalTable
VALUES
(1, 1, 2, 'some value '),
(2, 3, 4, 'top level in tree '),
(3, 5, 6, 'incorrect hierarchy 1 '),
(4, 7, 8, 'incorrect result top level')
DECLARE #IntermediateTable TABLE(
Id INT,
SomeValue VARCHAR(MAX),
HierarchicalTableId INT
)
INSERT INTO #IntermediateTable
VALUES
(1, 'some value' ,NULL ),
(2, 'value ' ,NULL ),
(3, NULL ,1 ),
(4, 'value ' ,NULL ),
(5, 'incorrect result 1' ,NULL ),
(6, 'incorrect result 3' ,NULL ),
(7, 'incorrect result 3' ,NULL ),
(8, NULL ,3 )
DECLARE #TargetId INT = 2;
WITH CTE AS (
SELECT Id AS ResultId, LeftId, RightId, NULL AS HierarchicalTableId
FROM #HierarchicalTable
WHERE Id = #TargetId
UNION ALL
SELECT C.Id AS ResultId, C.LeftId, C.RightId, NULL AS HierarchicalTableId
FROM #HierarchicalTable C
INNER JOIN CTE P ON P.HierarchicalTableId = C.Id
UNION ALL
SELECT NULL AS ResultId, NULL AS LeftId, NULL AS RightId, C.HierarchicalTableId
FROM #IntermediateTable C
INNER JOIN CTE P ON P.LeftId = C.Id OR P.RightId = C.Id
)
SELECT *
FROM CTE
WHERE ResultId IS NOT NULL

Matching multiple key/value pairs in SQL

I have metadata stored in a key/value table in SQL Server. (I know key/value is bad, but this is free-form metadata supplied by users, so I can't turn the keys into columns.) Users need to be able to give me an arbitrary set of key/value pairs and have me return all DB objects that match all of those criteria.
For example:
Metadata:
Id Key Value
1 a p
1 b q
1 c r
2 a p
2 b p
3 c r
If the user says a=p and b=q, I should return object 1. (Not object 2, even though it also has a=p, because it has b=p.)
The metadata to match is in a table-valued sproc parameter with a simple key/value schema. The closest I have got is:
select * from [Objects] as o
where not exists (
select * from [Metadata] as m
join #data as n on (n.[Key] = m.[Key])
and n.[Value] != m.[Value]
and m.[Id] = o.[Id]
)
My "no rows exist that don't match" is an attempt to implement "all rows match" by forming its contrapositive. This does eliminate objects with mismatching metadata, but it also returns objects with no metadata at all, so no good.
Can anyone point me in the right direction? (Bonus points for performance as well as correctness.)
; WITH Metadata (Id, [Key], Value) AS -- Create sample data
(
SELECT 1, 'a', 'p' UNION ALL
SELECT 1, 'b', 'q' UNION ALL
SELECT 1, 'c', 'r' UNION ALL
SELECT 2, 'a', 'p' UNION ALL
SELECT 2, 'b', 'p' UNION ALL
SELECT 3, 'c', 'r'
),
data ([Key], Value) AS -- sample input
(
SELECT 'a', 'p' UNION ALL
SELECT 'b', 'q'
),
-- here onwards is the actual query
data2 AS
(
-- cnt is to count no of input rows
SELECT [Key], Value, cnt = COUNT(*) OVER()
FROM data
)
SELECT m.Id
FROM Metadata m
INNER JOIN data2 d ON m.[Key] = d.[Key] AND m.Value= d.Value
GROUP BY m.Id
HAVING COUNT(*) = MAX(d.cnt)
The following SQL query produces the result that you require.
SELECT *
FROM #Objects m
WHERE Id IN
(
-- Include objects that match the conditions:
SELECT m.Id
FROM #Metadata m
JOIN #data d ON m.[Key] = d.[Key] AND m.Value = d.Value
-- And discount those where there is other metadata not matching the conditions:
EXCEPT
SELECT m.Id
FROM #Metadata m
JOIN #data d ON m.[Key] = d.[Key] AND m.Value <> d.Value
)
Test schema and data I used:
-- Schema
DECLARE #Objects TABLE (Id int);
DECLARE #Metadata TABLE (Id int, [Key] char(1), Value char(2));
DECLARE #data TABLE ([Key] char(1), Value char(1));
-- Data
INSERT INTO #Metadata VALUES
(1, 'a', 'p'),
(1, 'b', 'q'),
(1, 'c', 'r'),
(2, 'a', 'p'),
(2, 'b', 'p'),
(3, 'c', 'r');
INSERT INTO #Objects VALUES
(1),
(2),
(3),
(4); -- Object with no metadata
INSERT INTO #data VALUES
('a','p'),
('b','q');