concatenate row data of a column based on group - sql

Thank you for anyone who is help. I am seeking help to create an sql script to do the following.
Have the following table
REF 1
REF 2
Ref 3
wt
91112
43111
14928-001
2
91112
43111
14928-002
5
91113
42555
14940-001
3
91112
43111
14928-003
4
91114
42556
14941-001
1
where the yield would be great like the following
REF 1
REF 2
REF 3
WT
91112
43111
14928-001/002/003
11
91113
42555
14940-001
3
91114
42556
14941-001
1
I stumped at how to get 14928-001|002|003 as a merged value
this is for T-SQL
Thank you,

If you were using SQL Server 2017 or later, you could use the STRING_AGG function. But since you are using SQL Server 2014, you have to use the FOR XML trick.
I have a solution, and it isn't elegant and there is most likely a better way to this, but here goes.
First, I set up a table variable and put your data in it. I then used a common table expression and some grouping to create an intermediate result:
Declare #Data TABLE(REF1 INT NOT NULL, REF2 INT NOT NULL, REF3 VARCHAR(20) NOT NULL, WT INT NOT NULL);
INSERT INTO #Data(REF1, REF2, REF3, WT)
VALUES (91112, 43111, '14928-001', 2),
(91112, 43111, '14928-002', 5),
(91113, 42555, '14940-001', 3),
(91112, 43111, '14928-003', 4),
(91114, 42556, '14941-001', 1);
;WITH Step1 AS
(
SELECT REF1, REF2, REF3,
LEFT(REF3, CHARINDEX('-', REF3) - 1) AS REF3_LEFT,
SUBSTRING(REF3, CHARINDEX('-', REF3) + 1, LEN(REF3) - CHARINDEX('-', REF3)) AS REF3_RIGHT,
WT
FROM #Data
)
SELECT p.REF1, p.REF2, p.REF3_LEFT,
(SELECT '/' + REF3_RIGHT FROM Step1 AS sub
WHERE p.REF1 = sub.REF1 AND p.REF2 = sub.REF2
ORDER BY sub.REF3_RIGHT
FOR XML PATH (''), TYPE).value('.','VARCHAR(MAX)') AS suffixes,
SUM(WT) AS WT
FROM Step1 AS p
GROUP BY REF1, REF2, REF3_LEFT
ORDER BY REF1, REF2;
The result of this look like the following:
REF1
REF2
REF3_LEFT
suffixes
WT
91112
43111
14928
/001/002/003
11
91113
42555
14940
/001
3
91114
42556
14941
/001
1
This is very close... all we need to do now is replace the leading / of suffixes with a - and the concatenate it to REF3_LEFT and we will have it. So I will make another common table expression from the first and get the result you have shown above:
Declare #Data TABLE(REF1 INT NOT NULL, REF2 INT NOT NULL, REF3 VARCHAR(20) NOT NULL, WT INT NOT NULL);
INSERT INTO #Data(REF1, REF2, REF3, WT)
VALUES (91112, 43111, '14928-001', 2),
(91112, 43111, '14928-002', 5),
(91113, 42555, '14940-001', 3),
(91112, 43111, '14928-003', 4),
(91114, 42556, '14941-001', 1);
;WITH Step1 AS
(
SELECT REF1, REF2, REF3,
LEFT(REF3, CHARINDEX('-', REF3) - 1) AS REF3_LEFT,
SUBSTRING(REF3, CHARINDEX('-', REF3) + 1, LEN(REF3) - CHARINDEX('-', REF3)) AS REF3_RIGHT,
WT
FROM #Data
), Step2 AS
(
SELECT p.REF1, p.REF2, p.REF3_LEFT,
(SELECT '/' + REF3_RIGHT FROM Step1 AS sub
WHERE p.REF1 = sub.REF1 AND p.REF2 = sub.REF2
ORDER BY sub.REF3_RIGHT
FOR XML PATH (''), TYPE).value('.','VARCHAR(MAX)') AS suffixes,
SUM(WT) AS WT
FROM Step1 AS p
GROUP BY REF1, REF2, REF3_LEFT
)
SELECT REF1, REF2, CONCAT(REF3_LEFT , '-', SUBSTRING(suffixes, 2, LEN(suffixes) -1)) AS REF3, WT
FROM Step2;
And this is the result:
REF1
REF2
REF3
WT
91112
43111
14928-001/002/003
11
91113
42555
14940-001
3
91114
42556
14941-001
1
There is the desired output you have shown. Obviously you do not need the table variable since you have the table and data, but it should be easy for you to replace #Data with the actual table name.

Related

SQL Server query to extract all rows

I've two database tables, one called "Headers" and one called "Rows".
The structure is:
Header: IDPK | Description
Row: IDPK | IDPK_Header | Item_ID | Qty
I need to do a query that says: "From a Header, IDPK find another header that have the same number of rows and the same item ID and quantity".
For example:
Header Rows
IDPK Description IDPK Item_ID Qty
1 'Test1' 1 'A' 10
1 'Test1' 2 'B' 20
2 'Test2' 3 'A' 10
2 'Test2' 4 'B' 20
3 'Test3' 5 'A' 5
3 'Test3' 6 'B' 20
4 'Test4' 7 'A' 10
Header Test1 match Test2 but not Test3 and Test4
The problem is that the number of rows must be exactly the same. I try with ALL operator but without luck.
How I can do the query with an eye for the performance? The two tables can be very huge (~500.000 records).
Assuming there are no duplicates:
with r as (
select r.*, count(*) over (partition by idpk_header) as num_items
from rows r
)
select r1.idpk_header, r2.idpk_header
from r r1 join
r r2
on r1.item_id = r1.item_id and r2.qty = r1.qty and r2.num_items = r1.num_items
group by r1.idpk_header, r2.idpk_header, r1.num_items
having count(*) = r1.num_items;
Basically, this does a self-join on the items, so you only get matches. The on validates that the two have the same number of items. And the having guarantees that all match.
Note: This version returns each match of the header to itself. That is a nice check. You can of course filter this out in the on or a where clause.
If you do have duplicate items, you can simply replace r with:
select idpk_header, item_id, sum(qty) as qty,
count(*) over (partition by idpk_header) as num_items
from rows r
group by idpk_header, item_id;
I woul suggest using a forxml query in order to create the list of items per IDPK. Next I would search for matching item lists and quantities. See following example:
DECLARE #Headers TABLE(
IDPK INT,
Description NVARCHAR(100)
)
DECLARE #Rows TABLE(
IDPK INT,
ITEMID NVARCHAR(1),
Qty INT
)
INSERT INTO #Headers VALUES
(1, 'Test1'),
(2, 'Test2'),
(3, 'Test3'),
(4, 'Test4'),
(5, 'Test5')
INSERT INTO #Rows VALUES
(1, 'A', 10),
(1, 'B', 20),
(2, 'A', 10),
(2, 'B', 20),
(3, 'A', 5 ),
(3, 'B', 20),
(4, 'C', 10),
(5, 'A', 10),
(5, 'C', 20)
;
WITH cteHeaderRows AS(
SELECT IDPK
,ItemIDs=STUFF(
(
SELECT ',' + CAST(ITEMID AS VARCHAR(MAX))
FROM #Rows t2
WHERE t2.IDPK = t1.IDPK
ORDER BY ITEMID, QTY
FOR XML PATH('')
),1,1,''
)
,Qtys=STUFF(
(
SELECT ',' + CAST(Qty AS VARCHAR(MAX))
FROM #Rows t2
WHERE t2.IDPK = t1.IDPK
ORDER BY ITEMID, QTY
FOR XML PATH('')
),1,1,''
)
FROM #Rows t1
GROUP BY IDPK
),
cteFilter AS(
SELECT h1.IDPK AS IDPK1, h2.IDPK AS IDPK2
FROM cteHeaderRows h1
JOIN cteHeaderRows h2 ON h1.IDPK != h2.IDPK AND h1.ItemIDs = h2.ItemIDs AND h2.Qtys = h1.Qtys
)
SELECT DISTINCT h.IDPK, h.Description, r.ItemID, r.Qty
FROM #Headers h
JOIN cteFilter f ON f.IDPK1 = h.IDPK
JOIN #Rows r ON r.IDPK = f.IDPK1
ORDER BY 1,3,4

T-SQL Calculate the max value from an Alpha Numeric key

I have a customers table which has an Alphanumeric key consisting of 5 letters and 3 numbers.
I'm trying to calculate the next 3 digit number in sequence for each 5 letters for example:
Example Key
ALPHA001
ALPHA002
NUMBE001
NUMBE002
NUMBE003
PREST001
PREST002
PREST003
PREST004
PREST005
From the list of keys above i'd like to return the maximum of each unique 5 letter key. i.e.
Returned Values
ALPHA002
NUMBE003
PREST005
First of all: Do not store more than one value within one column. should store the key and the running number in separate columns and combine them just for display purpose...
Try this
DECLARE #mockupTable TABLE(ID INT IDENTITY,YourKey VARCHAR(100));
INSERT INTO #mockupTable VALUES
('ALPHA001')
,('ALPHA002')
,('NUMBE001')
,('NUMBE002')
,('NUMBE003')
,('PREST001')
,('PREST002')
,('PREST003')
,('PREST004')
,('PREST005');
WITH cte AS
(
SELECT *
,ROW_NUMBER() OVER(PARTITION BY LEFT(YourKey,5) ORDER BY CAST(RIGHT(YourKey,3) AS INT) DESC) AS PartitionedRowNumber
FROM #mockupTable
)
SELECT *
FROM cte
WHERE PartitionedRowNumber =1;
The result
ID Key
2 ALPHA002
5 NUMBE003
10 PREST005
You can use row_number():
select top (1) with ties t.*
from table t
order by row_number() over (partition by left(col, patindex('%[0-9]%', col)) order by col desc);
If the letters are fixed then just use left() :
order by row_number() over (partition by left(col, 5) order by col desc);
I'm trying to calculate the next 3 digit number in sequence for each 5
letters
This should do it:
SELECT CONCAT(LEFT(k, 5), FORMAT(MAX(RIGHT(k, 3)) + 1, '000'))
FROM (VALUES
('ALPHA001'),
('ALPHA002'),
('NUMBE001'),
('NUMBE002'),
('NUMBE003'),
('PREST001'),
('PREST002'),
('PREST003'),
('PREST004'),
('PREST005')
) tests(k)
GROUP BY LEFT(k, 5)
You can do this with GROUP BY and MAX:
SELECT KeyPrefix = LEFT(ExampleKey, 5),
NextKey = CONCAT(LEFT(ExampleKey, 5),
RIGHT(CONCAT('000', MAX(CONVERT(INT, RIGHT(ExampleKey, 3))) + 1), 3))
FROM (VALUES
('ALPHA001'), ('ALPHA002'), ('NUMBE001'), ('NUMBE002'), ('NUMBE003'),
('PREST001'), ('PREST002'), ('PREST003'), ('PREST004'), ('PREST005')
) t (ExampleKey)
GROUP BY LEFT(ExampleKey, 5);
The key operations being:
Get number part of key: RIGHT(ExampleKey, 3)
Convert this to an integer: CONVERT(INT, <output from 1>)
Find the max for the key type and add 1: MAX(<output from 2>) + 1
Pad this with zeros: RIGHT(CONCAT('000', MAX(<output from 3>), 3)
Concatenate withthe original prefix: CONCAT(LEFT(ExampleKey, 5), <output from 4>)
I would however highly recommed storing this in two columns, and use a computed column to combine then:
CREATE TABLE dbo.T
(
KeyPrefix CHAR(5) NOT NULL,
KeySequence INT NOT NULL,
TKey AS CONCAT(KeyPrefix, RIGHT(CONCAT('000', KeySequence), 3))
);
Then your query becomes much simpler:
SELECT KeyPrefix,
KeySequence = MAX(KeySequence) + 1,
TKey = CONCAT(KeyPrefix, RIGHT(CONCAT('000', MAX(KeySequence) + 1), 3))
FROM (VALUES
('ALPHA', 1), ('ALPHA', 2), ('NUMBE', 1), ('NUMBE', 2), ('NUMBE', 3),
('PREST', 1), ('PREST', 2), ('PREST', 3), ('PREST', 4), ('PREST', 5)
) t (KeyPrefix, KeySequence)
GROUP BY KeyPrefix;
Although worth noting that you would never actually need to reconstruct the key as I have done above in the column TKey, you just need the max keysequence.
Use this query.
GO
;WITH cte AS
(
SELECT ROW_NUMBER() OVER(PARTITION BY LEFT(YourKey,patindex('%[0-9]%', YourKey)) ORDER BY CAST(RIGHT(YourKey,patindex('%[A-Z]%', YourKey)) AS INT) DESC) AS rr , YourKey FROM #mockupTable
)
SELECT YourKey FROM cte WHERE rr =1;
GO

Find and classify sequential patterns for a distinct group T-SQL

I need help finding and classifying sequential patterns for each distinct key.
From the data I have, I need to create a new table that contains the key and a pattern identifier that belongs to that key.
From the example below the pattern is as follows:
Key #1 and #3 have the values 1, 2 and 3. The Key #3 has the values 8,
9 and 10. When a distinct pattern exists for a key I.E (1, 2, 3) I
need to create an entry on the table for the key # and that specific
pattern (1, 2, 3)
Data:
key value
1 1
1 2
1 3
2 8
2 9
2 10
3 1
3 2
3 3
Expected Output:
key pattern
1 1
2 2
3 1
Fiddle:
http://sqlfiddle.com/#!6/4fe39
Example table:
CREATE TABLE yourtable
([key] int, [value] int)
;
INSERT INTO yourtable
([key], [value])
VALUES
(1, 1),
(1, 2),
(1, 3),
(2, 8),
(2, 9),
(2, 10),
(3, 1),
(3, 2),
(3, 3)
;
You can concatenate the values together in several ways. The traditional method in SQL Server uses for xml:
select k.key,
stuff( (select ',' + cast(t.id as varchar(255))
from t
where k.key = t.key
for xml path ('')
order by t.id
), 1, 1, ''
) as ids
from (select distinct key from t) k;
You can convert this to a unique number using a CTE/subquery:
with cte as (
select k.key,
stuff( (select ',' + cast(t.id as varchar(255))
from t
where k.key = t.key
for xml path ('')
order by t.id
), 1, 1, ''
) as ids
from (select distinct key from t) k
)
select cte.*, dense_rank() over (order by ids) as ids_id
from cte;

T-SQL Summation

I'm trying to create result set with 3 columns. Each column coming from the summation of 1 Column of Table A but grouped by different ID's. Here's an overview of what I wanted to do..
Table A
ID Val.1
1 4
1 5
1 6
2 7
2 8
2 9
3 10
3 11
3 12
I wanted to create something like..
ROW SUM.VAL.1 SUM.VAL.2 SUM.VAL.3
1 15 21 33
I understand that I can not get this using UNION, I was thinking of using CTE but not quite sure with the logic.
You need conditional Aggregation
select 1 as Row,
sum(case when ID = 1 then Val.1 end),
sum(case when ID = 2 then Val.1 end),
sum(case when ID = 3 then Val.1 end)
From yourtable
You may need dynamic cross tab or pivot if number of ID's are not static
DECLARE #col_list VARCHAR(8000)= Stuff((SELECT ',sum(case when ID = '+ Cast(ID AS VARCHAR(20))+ ' then [Val.1] end) as [val.'+Cast(ID AS VARCHAR(20))+']'
FROM Yourtable
GROUP BY ID
FOR xml path('')), 1, 1, ''),
#sql VARCHAR(8000)
exec('select 1 as Row,'+#col_list +'from Yourtable')
Live Demo
I think pivoting the data table will yield the desired result.
IF OBJECT_ID('tempdb..#TableA') IS NOT NULL
DROP TABLE #TableA
CREATE TABLE #TableA
(
RowNumber INT,
ID INT,
Value INT
)
INSERT #TableA VALUES (1, 1, 4)
INSERT #TableA VALUES (1, 1, 5)
INSERT #TableA VALUES (1, 1, 6)
INSERT #TableA VALUES (1, 2, 7)
INSERT #TableA VALUES (1, 2, 8)
INSERT #TableA VALUES (1, 2, 9)
INSERT #TableA VALUES (1, 3, 10)
INSERT #TableA VALUES (1, 3, 11)
INSERT #TableA VALUES (1, 3, 12)
-- https://msdn.microsoft.com/en-us/library/ms177410.aspx
SELECT RowNumber, [1] AS Sum1, [2] AS Sum2, [3] AS Sum3
FROM
(
SELECT RowNumber, ID, Value
FROM #TableA
) a
PIVOT
(
SUM(Value)
FOR ID IN ([1], [2], [3])
) AS p
This technique works if the ids you are seeking are constant, otherwise I imagine some dyanmic-sql would work as well if changing ids are needed.
https://msdn.microsoft.com/en-us/library/ms177410.aspx

SQL Coalesce with missing values

I have two tables, master and child. The master's primary key MM is an INT. The child table has a compound key of two columns and value column:
MM (INT)
POS (INT, values 1-32)
VV (INT, values 1-9)
Sample master table data:
(1, other data)
(2, other data)
(3, other data)
Sample child table data
(1, 1,2)
(1, 2,2)
(1, 4,1)
(1,15,1)
(2, 4,5)
(2, 5,3)
(2,31,7)
(3,3,1)
(4,18,2)
{4,19,5)
For a report I could like to de-normalize the data with an output like this:
(1,'22010000000000010000000000000000')
(2,'00053000000000000000000000000070')
(3,'00100000000000000000000000000000')
(4,'00000000000000000025000000000000')
I was thinking to use a select query with coalesce like this but the output is not not exactly what I want:
(1,'22110')
(2,'537')
(3,'1')
(4,'25')
How do I fill in the missing data with zeros?
One way I can think to do this uses a decimal value with a precision of 32 and sum() and then convert back to a zero-padded string:
select mm,
right(replicate('0', 32) + cast(sum(val) as varchar(32)), 32)
from (select c.*,
cast(cast(val as varchar(32)) + replicate('0', 32 - pos) as decimal(32, 0)) as val
from child c
) c
group by mm;
EDIT:
The above isn't generalizable (say, above 38 characters or to use letters as well as digits). Here is a more generalizable, but longer version:
select c.mm,
(max(case when pos = 1 then valc else '0' end) +
max(case when pos = 2 then valc else '0' end) +
max(case when pos = 3 then valc else '0' end) +
. . .
max(case when pos = 32 then valc else '0' end) +
)
from (select c.*, cast(val as varchar(255)) as valc
from child c
) c
group by c.mm;
I should note that if you want to handle a master with no children, then use a left join. That aspect of the problem seems less interesting than combining the values in the appropriate positions.
Try it like this
DECLARE #master TABLE(MM INT,OtherData VARCHAR(100));
INSERT INTO #master VALUES
(1, 'Other Data 1')
,(2, 'Other Data 2')
,(3, 'Other Data 3');
DECLARE #child TABLE(MM INT, POS INT, VV INT)
INSERT INTO #child VALUES
(1, 1,2)
,(1, 2,2)
,(1, 4,1)
,(1,15,1)
,(2, 4,5)
,(2, 5,3)
,(2,31,7)
,(3,3,1)
,(4,18,2)
,(4,19,5);
--One CTE to get 32 numbers
WITH Numbers(Nr) AS
(SELECT TOP 32 ROW_NUMBER() OVER(ORDER BY (SELECT NULL)) FROM sys.objects) --get 32 numbers
--another CTE to get distinct MMs
,MMs AS
(
SELECT c.MM
,m.OtherData
FROM #child AS c
LEFT JOIN #master AS m ON c.MM=m.MM
GROUP BY c.MM,m.OtherData
)
--In this CTE The CROSS JOIN with the Numbers will create a list of 32 rows, which carry in all positions with a corresponding child its number. COALESCE will set a zero in the place of all NULLs
,Masked AS
(
SELECT MMs.MM
,MMs.OtherData
,Nr
,COALESCE(VV,0) AS Val
FROM MMs
CROSS JOIN Numbers
LEFT JOIN #child AS c1 ON c1.MM=MMs.MM AND c1.POS=Nr
)
-The final SELECT uses FOR XML PATH to get the 32 numbers in rows back to a string
SELECT *
,(
SELECT Masked.Val AS [*]
FROM Masked
WHERE Masked.MM=MMs.MM
FOR XML PATH('')
)
FROM MMs
The result
1 22010000000000100000000000000000
2 00053000000000000000000000000070
3 00100000000000000000000000000000
4 00000000000000000250000000000000