Building Matrix via SQL - sql

I am using two query from a schema which counts companies, but I am struggling how to combine them give Columns as Country and Industry as row and give the company counts accordingly.
select g.simpleindustrydescription, count(c.companyid) as companycount from ciqcompany c
join ciqsimpleindustry g on g.simpleIndustryid = c.simpleIndustryid
join ciqbusinessdescription b on b.companyid = c.companyid
group by g.simpleindustrydescription
select g.country, count(c.companyid) as companycount from ciqcompany c
join ciqcountrygeo g on g.countryid = c.countryid
join ciqbusinessdescription b on b.companyid = c.companyid
group by g.country
Expected Output:
Country A Country B Country C
Industry A 5 5 6
Industry B 3 3 4
Industry C 4 8 6

Due to lack of real example data, here a simple pivot example basing on some dummy records:
CREATE TABLE #tCountry
(
ID INT
,Name NVARCHAR(100)
);
INSERT INTO #tCountry VALUES (1, 'Country A'), (2, 'Country B'), (3, 'Country C');
CREATE TABLE #tIndustry
(
ID INT
,Name NVARCHAR(100)
);
INSERT INTO #tIndustry VALUES (1, 'Industry A'), (2, 'Industry B'), (3, 'Industry C');
CREATE TABLE #tMapping
(
ID INT
,CountryID INT
,IndustryID INT
,Name NVARCHAR(100)
);
INSERT INTO #tMapping VALUES (1, 1, 1, 'Country A Industry A - 1'), (2, 1, 1, 'Country A Industry A - 2');
INSERT INTO #tMapping VALUES (3, 1, 2, 'Country A Industry B - 1'), (4, 1, 2, 'Country A Industry b - 2'), (5, 1, 2, 'Country A Industry b - 3');
INSERT INTO #tMapping VALUES (6, 2, 1, 'Country B Industry A - 1');
DECLARE #lCountries NVARCHAR(max) = N'';
DECLARE #stmt NVARCHAR(MAX) = N'';
SELECT #lCountries += N', ' + QUOTENAME(CountryName)
FROM(
SELECT DISTINCT tc.Name CountryName
FROM #tCountry tc
JOIN #tMapping tm ON tm.CountryID = tc.ID
) x;
SELECT #lCountries = STUFF(#lCountries, 1, 2, '');
SELECT #stmt = 'SELECT *
FROM
(
SELECT ti.Name IndustryName, tc.Name CountryName, COUNT(*) MappingCounter
FROM #tMapping tm
JOIN #tCountry tc ON tm.CountryID = tc.ID
JOIN #tIndustry ti ON tm.IndustryID = ti.ID
GROUP BY ti.Name, tc.Name
) t
PIVOT (MAX(t.MappingCounter) FOR CountryName in (' + #lCountries + ')) AS x';
EXEC sp_executesql #stmt
Anyways, if you are dealing with an unknown number of countries, you might want to extend this example a little and use dynamic SQL in order to build the pivot statement. Got an example somewhere, but would have to search for it...
Result of my example:
IndustryName Country A Country B Country C
Industry A 2 1 NULL
Industry B 3 NULL NULL

Related

SQL LEFT JOIN to many categories

Suppose the following easy scenario, where a product row gets connected to one primary category, subcategory, and sub-subcategory.
DECLARE #PRODUCTS TABLE (ID int, DESCRIPTION varchar(50), CAT varchar(30), SUBCAT varchar(30), SUBSUBCAT varchar(30));
INSERT #PRODUCTS (ID, DESCRIPTION, CAT, SUBCAT, SUBSUBCAT) VALUES
(1, 'NIKE MILLENIUM', '1', '10', '100'),
(2, 'NIKE CORTEZ', '1', '12', '104'),
(3, 'ADIDAS PANTS', '2', '27', '238'),
(4, 'PUMA REVOLUTION 5', '3', '35', '374'),
(5, 'SALOMON SHELTER CS', '4', '15', '135'),
(6, 'NIKE EBERNON LOW', '2', '14', '157');
DECLARE #CATS TABLE (ID int, DESCR varchar(100));
INSERT #CATS (ID, DESCR) VALUES
(1, 'MEN'),
(2, 'WOMEN'),
(3, 'UNISEX'),
(4, 'KIDS'),
(5, 'TEENS'),
(6, 'BACK TO SCHOOL');
DECLARE #SUBCATS TABLE (ID int, DESCR varchar(100));
INSERT #SUBCATS (ID, DESCR) VALUES
(10, 'FOOTWEAR'),
(12, 'OUTERWEAR'),
(14, 'SWIMWEAR'),
(15, 'HOODIES'),
(27, 'CLOTHING'),
(35, 'SPORTS');
DECLARE #SUBSUBCATS TABLE (ID int, DESCR varchar(100));
INSERT #SUBSUBCATS (ID, DESCR) VALUES
(100, 'RUNNING'),
(104, 'ZIP TOPS'),
(135, 'FLEECE'),
(157, 'BIKINIS'),
(238, 'PANTS'),
(374, 'JOGGERS');
SELECT prod.ID,
prod.DESCRIPTION,
CONCAT(cat1.DESCR, ' > ', cat2.DESCR, ' > ', cat3.DESCR) AS CATEGORIES
FROM #PRODUCTS AS prod
LEFT JOIN #CATS AS cat1 ON cat1.ID = prod.CAT
LEFT JOIN #SUBCATS AS cat2 ON cat2.ID = prod.SUBCAT
LEFT JOIN #SUBSUBCATS AS cat3 ON cat3.ID = prod.SUBSUBCAT;
Now suppose that the foreign keys on #PRODUCTS table aren't just indices to their respective tables. They are comma-separated indices to more than one categories, subcategories, and sub-subcategories like here.
DECLARE #PRODUCTS TABLE (ID int, DESCRIPTION varchar(50), CAT varchar(30), SUBCAT varchar(30), SUBSUBCAT varchar(30));
INSERT #PRODUCTS (ID, DESCRIPTION, CAT, SUBCAT, SUBSUBCAT) VALUES
(1, 'NIKE MILLENIUM', '1, 2', '10, 12', '100, 135'),
(2, 'NIKE CORTEZ', '1, 5', '12, 15', '104, 374'),
(3, 'ADIDAS PANTS', '2, 6', '27, 35', '238, 374');
DECLARE #CATS TABLE (ID int, DESCR varchar(100));
INSERT #CATS (ID, DESCR) VALUES
(1, 'MEN'),
(2, 'WOMEN'),
(3, 'UNISEX'),
(4, 'KIDS'),
(5, 'TEENS'),
(6, 'BACK TO SCHOOL');
DECLARE #SUBCATS TABLE (ID int, DESCR varchar(100));
INSERT #SUBCATS (ID, DESCR) VALUES
(10, 'FOOTWEAR'),
(12, 'OUTERWEAR'),
(14, 'SWIMWEAR'),
(15, 'HOODIES'),
(27, 'CLOTHING'),
(35, 'SPORTS');
DECLARE #SUBSUBCATS TABLE (ID int, DESCR varchar(100));
INSERT #SUBSUBCATS (ID, DESCR) VALUES
(100, 'RUNNING'),
(104, 'ZIP TOPS'),
(135, 'FLEECE'),
(157, 'BIKINIS'),
(238, 'PANTS'),
(374, 'JOGGERS');
SELECT prod.ID,
prod.DESCRIPTION
--CONCAT(cat1.DESCR, ' > ', cat2.DESCR, ' > ', cat3.DESCR) AS CATEGORIES
FROM #PRODUCTS AS prod
--LEFT JOIN #CATS AS cat1 ON cat1.ID = prod.CAT
--LEFT JOIN #SUBCATS AS cat2 ON cat2.ID = prod.SUBCAT
--LEFT JOIN #SUBSUBCATS AS cat3 ON cat3.ID = prod.SUBSUBCAT;
In this case I want to achieve the following:
Be able to retrieve the respective names of the cats, subcats, sub-subcats, ie. for cats '1, 2' be able to retrieve their names (I tried LEFT JOIN #CATS AS cat1 ON cat1.ID IN prod.CAT but it doesn't work)
Create triplets of the corresponding cats, subcats, sub-subcats, ie. for
cats '1, 2'
subcats '12, 17'
sub-subcats '239, 372'
(after retrieving the appropriate names) create pipe-separated category routes like name of cat 1 > name of subcat 12 > name of sub-subcat 239 | name of cat 2 > name of subcat 17 > name of sub-subcat 372
So, for a row like (1, 'NIKE MILLENIUM', '1, 2', '10, 12', '100, 135'),
I would like to get the following result
ID
DESCRIPTION
CATEGORIES
1
NIKE MILLENIUM
MEN > FOOTWEAR > RUNNING # WOMEN > OUTERWEAR > FLEECE (I had to use # as the delimiter of the two triplets because pipe messed with the table's columns)
In case the user stupidly stores more cat IDs than subcat IDs, or sub-subcat IDs, the query should just match the ones that have a corresponding position match, ie for
cats '1, 2'
subcats '12'
sub-subcats '239, 372'
it should just create one triplet, like name of 1 > name of 12 > name of 239
STRING_SPLIT() does not promise to return the values in a specific order, so it won't work in this case as ordinal position matters.
Use OPENJSON() split the string into separate rows to ensure the values are returned in the same order.
OPENJSON() also returns a key field, so you can join on the row number within each grouping. You'll want an INNER JOIN since your requirement is that all values in that "column" must exist.
Use STUFF() to assemble the various cat>subcat>subsubcat values.
DECLARE #PRODUCTS TABLE (ID int, DESCRIPTION varchar(50), CAT varchar(30), SUBCAT varchar(30), SUBSUBCAT varchar(30));
INSERT #PRODUCTS (ID, DESCRIPTION, CAT, SUBCAT, SUBSUBCAT) VALUES
(1, 'NIKE MILLENIUM', '1, 2', '10, 12', '100, 135'),
(2, 'NIKE CORTEZ', '1, 5', '12, 15', '104, 374'),
(3, 'ADIDAS PANTS', '2, 6, 1', '27, 35, 10', '238, 374, 100'),
(4, 'JOE THE PLUMBER JEANS', '1, 5', '27', '238, 374');
DECLARE #CATS TABLE (ID int, DESCR varchar(100));
INSERT #CATS (ID, DESCR) VALUES
(1, 'MEN'),
(2, 'WOMEN'),
(3, 'UNISEX'),
(4, 'KIDS'),
(5, 'TEENS'),
(6, 'BACK TO SCHOOL');
DECLARE #SUBCATS TABLE (ID int, DESCR varchar(100));
INSERT #SUBCATS (ID, DESCR) VALUES
(10, 'FOOTWEAR'),
(12, 'OUTERWEAR'),
(14, 'SWIMWEAR'),
(15, 'HOODIES'),
(27, 'CLOTHING'),
(35, 'SPORTS');
DECLARE #SUBSUBCATS TABLE (ID int, DESCR varchar(100));
INSERT #SUBSUBCATS (ID, DESCR) VALUES
(100, 'RUNNING'),
(104, 'ZIP TOPS'),
(135, 'FLEECE'),
(157, 'BIKINIS'),
(238, 'PANTS'),
(374, 'JOGGERS');
;
with prod as (
SELECT p.ID,
p.DESCRIPTION
--CONCAT(cat1.DESCR, ' > ', cat2.DESCR, ' > ', cat3.DESCR) AS CATEGORIES
, c.value as CatId
, c.[key] as CatKey
, sc.value as SubCatId
, sc.[key] as SubCatKey
, ssc.value as SubSubCatId
, ssc.[key] as SubSubCatKey
FROM #PRODUCTS p
cross apply OPENJSON(CONCAT('["', REPLACE(cat, ', ', '","'), '"]')) c
cross apply OPENJSON(CONCAT('["', REPLACE(subcat, ', ', '","'), '"]')) sc
cross apply OPENJSON(CONCAT('["', REPLACE(subsubcat, ', ', '","'), '"]')) ssc
where c.[key] = sc.[key]
and c.[key] = ssc.[key]
)
, a as (
select p.ID
, p.DESCRIPTION
, c.DESCR + ' > ' + sc.DESCR + ' > ' + ssc.DESCR as CATEGORIES
, p.CatKey
from prod p
inner join #CATS c on c.ID = p.CatId
inner join #SUBCATS sc on sc.ID = p.SubCatId
inner join #SUBSUBCATS ssc on ssc.ID = p.SubSubCatId
)
select DISTINCT ID
, DESCRIPTION
, replace(STUFF((SELECT distinct ' | ' + a2.CATEGORIES
from a a2
where a.ID = a2.ID
FOR XML PATH(''))
,1,2,''), '>', '>') CATEGORIES
from a
Totally separate answer because of the change to older technology. I think my original answer is still good for folks using current SQL Server versions, so I don't want to remove it.
I don't remember where I got the function. When I found it today it was named split_delimiter. I changed the name, added some comments, and incorporated the ability to have a delimiter that is more than one character long.
CREATE FUNCTION [dbo].[udf_split_string](#delimited_string VARCHAR(8000), #delimiter varchar(10))
RETURNS TABLE AS
RETURN
WITH cte10(num) AS ( -- 10 rows
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
)
, cte100(num) AS ( -- 100 rows
SELECT 1
FROM cte10 t1, cte10 t2
)
, cte10000(num) AS ( -- 10000 rows
SELECT 1
FROM cte100 t1, cte100 t2
)
, cte1(num) AS ( -- 1 row per character
SELECT TOP (ISNULL(DATALENGTH(#delimited_string), 0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL))
FROM cte10000
)
, cte2(num) AS ( -- locations of strings
SELECT 1
UNION ALL
SELECT t.num + len(replace(#delimiter, ' ', '_'))
FROM cte1 t
WHERE SUBSTRING(#delimited_string, t.num, len(replace(#delimiter, ' ', '_'))) = #delimiter
)
, cte3(num, [len]) AS (
SELECT t.num
, ISNULL(NULLIF(CHARINDEX(#delimiter, #delimited_string, t.num), 0) - t.num, 8000)
FROM cte2 t
)
SELECT [Key] = ROW_NUMBER() OVER (ORDER BY t.num)
, [Value] = SUBSTRING(#delimited_string, t.num, t.[len])
FROM cte3 t;
GO
DECLARE #PRODUCTS TABLE (ID int, DESCRIPTION varchar(50), CAT varchar(30), SUBCAT varchar(30), SUBSUBCAT varchar(30));
INSERT #PRODUCTS (ID, DESCRIPTION, CAT, SUBCAT, SUBSUBCAT) VALUES
(1, 'NIKE MILLENIUM', '1, 2', '10, 12', '100, 135'),
(2, 'NIKE CORTEZ', '1, 5', '12, 15', '104, 374'),
(3, 'ADIDAS PANTS', '2, 6, 1', '27, 35, 10', '238, 374, 100'),
(4, 'JOE THE PLUMBER JEANS', '1, 5', '27', '238, 374');
DECLARE #CATS TABLE (ID int, DESCR varchar(100));
INSERT #CATS (ID, DESCR) VALUES
(1, 'MEN'),
(2, 'WOMEN'),
(3, 'UNISEX'),
(4, 'KIDS'),
(5, 'TEENS'),
(6, 'BACK TO SCHOOL');
DECLARE #SUBCATS TABLE (ID int, DESCR varchar(100));
INSERT #SUBCATS (ID, DESCR) VALUES
(10, 'FOOTWEAR'),
(12, 'OUTERWEAR'),
(14, 'SWIMWEAR'),
(15, 'HOODIES'),
(27, 'CLOTHING'),
(35, 'SPORTS');
DECLARE #SUBSUBCATS TABLE (ID int, DESCR varchar(100));
INSERT #SUBSUBCATS (ID, DESCR) VALUES
(100, 'RUNNING'),
(104, 'ZIP TOPS'),
(135, 'FLEECE'),
(157, 'BIKINIS'),
(238, 'PANTS'),
(374, 'JOGGERS');
;
with prod as (
SELECT p.ID,
p.DESCRIPTION
, c.value as CatId
, c.[key] as CatKey
, sc.value as SubCatId
, sc.[key] as SubCatKey
, ssc.value as SubSubCatId
, ssc.[key] as SubSubCatKey
FROM #PRODUCTS p
cross apply dbo.udf_split_string(cat, ', ') c
cross apply dbo.udf_split_string(subcat, ', ') sc
cross apply dbo.udf_split_string(subsubcat, ', ') ssc
where c.[key] = sc.[key]
and c.[key] = ssc.[key]
)
, a as (
select p.ID
, p.DESCRIPTION
, c.DESCR + ' > ' + sc.DESCR + ' > ' + ssc.DESCR as CATEGORIES
, p.CatKey
from prod p
inner join #CATS c on c.ID = p.CatId
inner join #SUBCATS sc on sc.ID = p.SubCatId
inner join #SUBSUBCATS ssc on ssc.ID = p.SubSubCatId
)
select DISTINCT ID
, DESCRIPTION
, replace(STUFF((SELECT distinct ' | ' + a2.CATEGORIES
from a a2
where a.ID = a2.ID
FOR XML PATH(''))
,1,2,''), '>', '>') CATEGORIES
from a
Well that should do work, i changed your character ">" for "-" just for see the data more simple.
the design of your tables is not perfect but the first try almost never is.
select mainp.ID, mainp.DESCRIPTION, stuff(ppaths.metapaths, len(ppaths.metapaths),1,'') metalinks
from #PRODUCTS mainp
cross apply(
select
(select
c.DESCR + '-' + sc.DESCR + '-' + sbc.DESCR + '|'
from #PRODUCTS p
cross apply (select row_number() over(order by Value) id, Value from split(p.CAT, ','))cat_ids
inner join #cats c on c.ID = cat_ids.Value
cross apply (select row_number() over(order by Value) id, Value from split(p.SUBCAT, ','))subcat_ids
inner join #SUBCATS sc on sc.ID = subcat_ids.Value
and subcat_ids.id = subcat_ids.id
cross apply (select row_number() over(order by Value) id, Value from split(p.SUBSUBCAT, ','))subsubcat_ids
inner join #SUBSUBCATS sbc on sbc.ID = subsubcat_ids.Value
and subsubcat_ids.id = subcat_ids.id
where p.id = mainp.ID
for xml path('')) metapaths
) ppaths
the link for split function
https://desarrolladores.me/2014/03/sql-server-funcion-split-para-dividir-un-string/

Getting Paged Distinct Records Using SQL (Not Duplicate)

Following #mdb's answer to apply pagination using SQL SERVER, I find it hard to retrieve distinct records when the main table is joined to other tables for a one-to-many relationship, i.e, A person has many addresses.
Use case, suppose I want to retrieve all persons which has an address in New York given tables #temp_person and #temp_addresses, I would join them on PersonID and OwnerID.
The problem arises when there are multiple addresses for a person, the result set contains duplicate records.
To make it clearer, here's a sample query with data:
Sample Data:
create table #temp_person (
PersonID int not null,
FullName varchar(max) not null
)
create table #temp_addresses(
AddressID int identity not null,
OwnerID int not null,
Address1 varchar(max),
City varchar(max)
)
insert into #temp_person
values
(1, 'Sample One'),
(2, 'Sample Two'),
(3, 'Sample Three')
insert into #temp_addresses (OwnerID, Address1, City)
values
(1, 'Somewhere East Of', 'New York'),
(1, 'Somewhere West Of', 'New York'),
(2, 'blah blah blah', 'Atlantis'),
(2, 'Address2 Of Sample Two', 'New York'),
(2, 'Address3 Of Sample Two', 'Nowhere City'),
(3, 'Address1 Of Sample Three', 'New York'),
(3, 'Address2 Of Sample Three', 'Seattle')
--drop table #temp_addresses, #temp_person
Pagination Query:
SELECT
(
CAST( RowNum as varchar(MAX) )
+ '/'
+ CAST(TotalCount as varchar(MAX))
) as ResultPosition
, PersonID
, FullName
FROM (
SELECT DISTINCT
ROW_NUMBER() OVER(ORDER BY p.FullName ASC) as RowNum
, p.PersonID
, p.FullName
, Count(1) OVER() as TotalCount
FROM #temp_person p
LEFT JOIN #temp_addresses a
ON p.PersonID = a.OwnerID
WHERE City = 'New York'
) as RowConstrainedResult
WHERE RowNum > 0 AND RowNum <= 3
ORDER BY RowNum
Expected Results:
ResultPosition PersonID FullName
1/3 1 Sample One
2/3 2 Sample Two
3/3 3 Sample Three
Actual Results:
ResultPosition PersonID FullName
1/4 1 Sample One
2/4 1 Sample One
3/4 3 Sample Three
As you can see, the inner query is returning multiple records due to the join with #temp_addresses.
Is there a way we could only return unique records by PersonID?
UPDATE:
Actual use case is for an "Advanced Search" functionality where the user can search using different filters, i.e, name, firstname, last name, birthdate, address, etc.. The <WHERE_CLAUSE> and <JOIN_STATEMENTS> in the query are added dynamically so GROUP BY is not applicable here.
Also, please address the "Pagination" scheme for this question. That is, I want to retrieve only N number of results from Start while also retrieving the total count of the results as if they are not paged. i.e, I retrieve only 25 rows out of a total of 500 results.
Just do group by PersonID and no need to use subquery
SELECT
cast(row_number() over (order by (select 1)) as varchar(max)) +'/'+
cast(Count(1) OVER() as varchar(max)) ResultPosition,
p.PersonID,
max(p.FullName) FullName
FROM #temp_person p
LEFT JOIN #temp_addresses a ON p.PersonID = a.OwnerID
WHERE City = 'New York'
group by p.PersonID
EDIT : I would use CTE for the pagination
;with cte as
(
SELECT
row_number() over(order by (select 1)) rn,
cast(row_number() over (order by (select 1)) as varchar(max)) +'/'+
cast(Count(1) OVER() as varchar(max)) ResultPosition,
p.PersonID,
max(p.FullName) FullName
FROM #temp_person p
LEFT JOIN #temp_addresses a ON p.PersonID = a.OwnerID
WHERE City = 'New York'
group by p.PersonID
)
select * from cte
where rn > 0 and rn <= 2
Result:
ResultPosition PersonID FullName
1/3 1 Sample One
2/3 2 Sample Two
3/3 3 Sample Three
You need to have distinct rows before using ROW_NUMBER().
If you will filter by City, there are no need to use LEFT JOIN. Use INNER JOIN instead.
select ResultPosition = cast(row_number() over (order by (r.PersonID)) as varchar(max)) +'/'+ cast(Count(r.PersonID) OVER() as varchar(max)), *
from(
SELECT distinct p.PersonID,
p.FullName
FROM #temp_person p
JOIN #temp_addresses a ON
p.PersonID = a.OwnerID
WHERE City = 'New York') r
EDIT:
Considering pagination
declare #page int =1, #rowsPage int = 25
select distinct position, ResultPosition = cast(position as varchar(10)) + '/' + cast(count(*) OVER() as varchar(10)), *
from(
SELECT position = DENSE_RANK () over (order by p.PersonID),
p.PersonID,
p.FullName
FROM #temp_person p
LEFT JOIN #temp_addresses a ON
p.PersonID = a.OwnerID
WHERE City = 'New York'
) r
where position between #rowsPage*(#page-1)+1 and #rowsPage*#page
Geoman Yabes, Check if this help... Gives results expected in your example and you can have pagination using RowNum:-
SELECT *
FROM
(SELECT ROW_NUMBER() OVER(ORDER BY RowConstrainedResult.PersonId ASC) As RowNum,
Count(1) OVER() As TotalRows,
RowConstrainedResult.PersonId,
RowConstrainedResult.FullName
FROM (
SELECT
RANK() OVER(PARTITION BY p.PersonId ORDER BY a.Address1 ASC) as Ranking
, p.PersonID
, p.FullName
FROM #temp_person p
INNER JOIN #temp_addresses a ON p.PersonID = a.OwnerID WHERE City = 'New York'
) as RowConstrainedResult WHERE Ranking = 1) Filtered
Where RowNum > 0 And RowNum <= 4
Sample Data:
insert into #temp_person
values
(1, 'Sample One'),
(2, 'Sample Two'),
(3, 'Sample Three'),
(4, 'Sample 4'),
(5, 'Sample 5'),
(6, 'Sample 6')
insert into #temp_addresses (OwnerID, Address1, City)
values
(1, 'Somewhere East Of', 'New York'),
(1, 'Somewhere West Of', 'New York'),
(2, 'blah blah blah', 'Atlantis'),
(2, 'Address2 Of Sample Two', 'New York'),
(2, 'Address3 Of Sample Two', 'Nowhere City'),
(3, 'Address1 Of Sample Three', 'New York'),
(3, 'Address2 Of Sample Three', 'Seattle'),
(4, 'Address1 Of Sample 4', 'New York'),
(4, 'Address1 Of Sample 4', 'New York 2'),
(5, 'Address1 Of Sample 5', 'New York'),
(6, 'Address1 Of Sample 6', 'New York')

Identifying/comparing sets of rows within groups

I have a matter which seemed simple to solve but now I find it troublesome.
In simplification - I need to find a way to identify unique sets of rows within groups defined by another column. In basic example the source table contains only two columns:
routeID nodeID nodeName
1 1 a
1 2 b
2 1 a
2 2 b
3 1 a
3 2 b
4 1 a
4 2 c
5 1 a
5 2 c
6 1 a
6 2 b
6 3 d
7 1 a
7 2 b
7 3 d
So, the routeID column refers to set of nodes which define a route.
What I need to do is to somehow group the routes, so that there will be only one unique sequence of nodes for one routeID.
In my actual case I tried to use window function to add columns which help to identify nodes sequence, but I still have no idea how to get those unique sequences and group routes.
As a final effect I want to get only unique routes - for example routes 1,2 and 3 aggregated to one route.
Do you have any idea how to help me ?
EDIT:
The other table which I would like to join with the one from the example may look like that:
journeyID nodeID nodeName routeID
1 1 a 1
1 2 b 1
2 1 a 1
2 2 b 1
3 1 a 4
3 2 c 4
...........................
...........................
You can try this idea:
DECLARE #DataSource TABLE
(
[routeID] TINYINT
,[nodeID] TINYINT
,[nodeName] CHAR(1)
);
INSERT INTO #DataSource ([routeID], [nodeID], [nodeName])
VALUES ('1', '1', 'a')
,('1', '2', 'b')
,('2', '1', 'a')
,('2', '2', 'b')
,('3', '1', 'a')
,('3', '2', 'b')
,('4', '1', 'a')
,('4', '2', 'c')
,('5', '1', 'a')
,('5', '2', 'c')
,('6', '1', 'a')
,('6', '2', 'b')
,('6', '3', 'd')
,('7', '1', 'a')
,('7', '2', 'b')
,('7', '3', 'd');
SELECT DS.[routeID]
,nodes.[value]
,ROW_NUMBER() OVER (PARTITION BY nodes.[value] ORDER BY [routeID]) AS [rowID]
FROM
(
-- getting unique route ids
SELECT DISTINCT [routeID]
FROM #DataSource DS
) DS ([routeID])
CROSS APPLY
(
-- for each route id creating CSV list with its node ids
SELECT STUFF
(
(
SELECT ',' + [nodeName]
FROM #DataSource DSI
WHERE DSI.[routeID] = DS.[routeID]
ORDER BY [nodeID]
FOR XML PATH(''), TYPE
).value('.', 'VARCHAR(MAX)')
,1
,1
,''
)
) nodes ([value]);
The code will give you this output:
So, you simple need to filter by rowID = 1. Of course, you can change the code as you like in order to satisfy your bussness criteria (for example showing no the first route ID with same nodes, but the last).
Also, ROW_NUMBER function cannot be used directly in the WHERE clause, so you need to wrap the code before filtering:
WITH DataSource AS
(
SELECT DS.[routeID]
,nodes.[value]
,ROW_NUMBER() OVER (PARTITION BY nodes.[value] ORDER BY [routeID]) AS [rowID]
FROM
(
-- getting unique route ids
SELECT DISTINCT [routeID]
FROM #DataSource DS
) DS ([routeID])
CROSS APPLY
(
-- for each route id creating CSV list with its node ids
SELECT STUFF
(
(
SELECT ',' + [nodeName]
FROM #DataSource DSI
WHERE DSI.[routeID] = DS.[routeID]
ORDER BY [nodeID]
FOR XML PATH(''), TYPE
).value('.', 'VARCHAR(MAX)')
,1
,1
,''
)
) nodes ([value])
)
SELECT DS2.*
FROM DataSource DS1
INNER JOIN #DataSource DS2
ON DS1.[routeID] = DS2.[routeID]
WHERE DS1.[rowID] = 1;
ok, let's use some recursion to create a complete node list for each routeID
First of all let's populate source table and journeyes tale
-- your source
declare #r as table (routeID int, nodeID int, nodeName char(1))
-- your other table
declare #j as table (journeyID int, nodeID int, nodeName char(1), routeID int)
-- temp results table
declare #routes as table (routeID int primary key, nodeNames varchar(1000))
;with
s as (
select *
from (
values
(1, 1, 'a'),
(1, 2, 'b'),
(2, 1, 'a'),
(2, 2, 'b'),
(3, 1, 'a'),
(3, 2, 'b'),
(4, 1, 'a'),
(4, 2, 'c'),
(5, 1, 'a'),
(5, 2, 'c'),
(6, 1, 'a'),
(6, 2, 'b'),
(6, 3, 'd'),
(7, 1, 'a'),
(7, 2, 'b'),
(7, 3, 'd')
) s (routeID, nodeID, nodeName)
)
insert into #r
select *
from s
;with
s as (
select *
from (
values
(1, 1, 'a', 1),
(1, 2, 'b', 1),
(2, 1, 'a', 1),
(2, 2, 'b', 1),
(3, 1, 'a', 4),
(3, 2, 'c', 4)
) s (journeyID, routeID, nodeID, nodeName)
)
insert into #j
select *
from s
now let's exctract routes:
;with
d as (
select *, row_number() over (partition by r.routeID order by r.nodeID desc) n2
from #r r
),
r as (
select d.*, cast(nodeName as varchar(1000)) Names, cast(0 as bigint) i2
from d
where nodeId=1
union all
select d.*, cast(r.names + ',' + d.nodeName as varchar(1000)), r.n2
from d
join r on r.routeID = d.routeID and r.nodeId=d.nodeId-1
)
insert into #routes
select routeID, Names
from r
where n2=1
table #routes will be like this:
routeID nodeNames
1 'a,b'
2 'a,b'
3 'a,b'
4 'a,c'
5 'a,c'
6 'a,b,d'
7 'a,b,d'
an now the final output:
-- the unique routes
select MIN(r.routeID) routeID, nodeNames
from #routes r
group by nodeNames
-- the unique journyes
select MIN(journeyID) journeyID, r.nodeNames
from #j j
inner join #routes r on j.routeID = r.routeID
group by nodeNames
output:
routeID nodeNames
1 'a,b'
4 'a,c'
6 'a,b,d'
and
journeyID nodeNames
1 'a,b'
3 'a,c'

COUNT of different tables in GROUP

I have a table that holds a list of classes available at a school. Each class can have a number of sessions. And each class can have pupils assigned to it.
What I need to do is get a count of all sessions for each class, as well as the number of students attending the class. I have done the first bit, but if I join to the pupil allocation table, my counts will be wrong.
I have conjured up some fake SQL that you can use.
I'm stuck with efficiently getting a count from the Pupil link table.
DECLARE #Class TABLE
(
ClassID INT NOT NULL,
ClassName VARCHAR(20) NOT NULL
)
INSERT INTO #Class VALUES (1, 'English')
INSERT INTO #Class VALUES (2, 'Maths')
DECLARE #ClassSession TABLE
(
ClassSessionID INT NOT NULL,
ClassID INT NOT NULL,
Description VARCHAR(100) NOT NULL
)
INSERT INTO #ClassSession VALUES (1, 1, 'Basic English')
INSERT INTO #ClassSession VALUES (2, 1, 'Advanced English')
INSERT INTO #ClassSession VALUES (3, 1, 'Amazing English')
INSERT INTO #ClassSession VALUES (4, 2, 'Basic English')
INSERT INTO #ClassSession VALUES (5, 2, 'Basic English')
DECLARE #ClassPupil TABLE
(
ClassPupilID INT NOT NULL,
ClassID INT NOT NULL,
PupilID INT NOT NULL -- FK to the Pupils table.
)
INSERT INTO #ClassPupil VALUES (1, 1, 1000)
INSERT INTO #ClassPupil VALUES (2, 1, 1001)
INSERT INTO #ClassPupil VALUES (3, 1, 1002)
INSERT INTO #ClassPupil VALUES (4, 1, 1003)
INSERT INTO #ClassPupil VALUES (5, 1, 1004)
INSERT INTO #ClassPupil VALUES (6, 2, 1005)
INSERT INTO #ClassPupil VALUES (7, 2, 1006)
INSERT INTO #ClassPupil VALUES (8, 2, 1007)
SELECT ClassName, COUNT(*) AS Sessions, '??' AS NumerOfPupils
FROM #Class c
INNER JOIN #ClassSession cs
ON cs.ClassID = c.ClassID
GROUP BY c.ClassID, c.ClassName
It can maybe be done with a sub query? Is that the best way?
You have two independent dimensions for each class. You need to aggregat them separately:
SELECT c.ClassName, cs.Sessions, cp.Pupils
FROM #Class c INNER JOIN
(SELECT ClassId, COUNT(*) as sessions
FROM #ClassSession cs
GROUP BY ClassId
) cs
ON cs.ClassID = c.ClassID INNER JOIN
(SELECT ClassId, COUNT(*) as pupils
FROM #ClassPupil cp
GROUP BY ClassId
) cp
ON cp.ClassId = c.ClassId;
Another method is to use CROSS APPLY to get the count of pupils:
SELECT
ClassName, COUNT(*) AS Sessions, cp.NumberOfPupils
FROM #Class c
INNER JOIN #ClassSession cs
ON cs.ClassID = c.ClassID
CROSS APPLY (
SELECT COUNT(*) AS NumberOfPupils
FROM #ClassPupil
WHERE
ClassID = c.ClassID
) cp
GROUP BY c.ClassID, c.ClassName, cp.NumberOfPupils
SELECT ClassName, COUNT(distinct cs.ClassSessionID) AS Sessions, /*'??'*/ count( distinct cp.PupilID) AS NumerOfPupils
FROM #Class c
INNER JOIN #ClassSession cs
ON cs.ClassID = c.ClassID
inner join #ClassPupil cp on c.ClassID=cp.ClassID
GROUP BY /*c.ClassID,*/ c.ClassName
count(distinct...) solves (works around) the problem.
Generally, this is (A -> B, C) issue.

Summarize the list into a comma-separated string

This is the current result that can be changed from day to day
(int) (nvarchar)
Number Grade
--------------
1 a
1 c
2 a
2 b
2 c
3 b
3 a
What I need help is to achieve this result below.
Number Grade
-----------------
1 a, c
2 a, b, c
3 b, a
Use:
declare #t table(Number int, Grade varchar)
insert #t values(1, 'a'), (1, 'c'), (2, 'a'), (2, 'b'), (2, 'c'),
(3, 'b'), (3, 'a')
select t1.Number
, stuff((
select ',' + Grade
from #t t2
where t2.Number = t1.Number
for xml path(''), type
).value('.', 'varchar(max)'), 1, 1, '') [values]
from #t t1
group by t1.Number
You'll need to replace dbo.tablename with your actual table. Also I'm assuming you're using SQL Server 2005 or better - always useful to specify.
SELECT Number, Grades = STUFF((
SELECT N', ' + Grade FROM dbo.tablename
WHERE Number = x.Number
FOR XML PATH(''),
TYPE).value(N'./text()[1]', N'nvarchar(max)'), 1, 2, N'')
FROM dbo.tablename AS x
GROUP BY Number;
In SQL Server 2017 and Azure SQL Database, you can use the new aggregation function STRING_AGG(), which is a lot tidier in this case:
SELECT Number, Grades = STRING_AGG(Grade, N', ')
FROM dbo.tablename
GROUP BY Number;