Summarize the list into a comma-separated string - sql

This is the current result that can be changed from day to day
(int) (nvarchar)
Number Grade
--------------
1 a
1 c
2 a
2 b
2 c
3 b
3 a
What I need help is to achieve this result below.
Number Grade
-----------------
1 a, c
2 a, b, c
3 b, a

Use:
declare #t table(Number int, Grade varchar)
insert #t values(1, 'a'), (1, 'c'), (2, 'a'), (2, 'b'), (2, 'c'),
(3, 'b'), (3, 'a')
select t1.Number
, stuff((
select ',' + Grade
from #t t2
where t2.Number = t1.Number
for xml path(''), type
).value('.', 'varchar(max)'), 1, 1, '') [values]
from #t t1
group by t1.Number

You'll need to replace dbo.tablename with your actual table. Also I'm assuming you're using SQL Server 2005 or better - always useful to specify.
SELECT Number, Grades = STUFF((
SELECT N', ' + Grade FROM dbo.tablename
WHERE Number = x.Number
FOR XML PATH(''),
TYPE).value(N'./text()[1]', N'nvarchar(max)'), 1, 2, N'')
FROM dbo.tablename AS x
GROUP BY Number;
In SQL Server 2017 and Azure SQL Database, you can use the new aggregation function STRING_AGG(), which is a lot tidier in this case:
SELECT Number, Grades = STRING_AGG(Grade, N', ')
FROM dbo.tablename
GROUP BY Number;

Related

Find duplicate sets of data grouped by foreign key

How to check if the above table contains duplicate group of rows based on id. For ex here first two rows of id 1 are matching with the next two rows of id 2 but id 2 also have the third row which is not matching with any two rows of id 1 so it's not duplicate and there could be n no of ids.
I tried it to do with the group by and string_agg but it didn't work.
Here what I tried:
declare #t2 Table( m1 int, m2 int,n varchar(50),n2 varchar(50), id int)
insert into #t2 values(3,1,'c','',1),(2,1,'s','o',1),(2,1,'s','o',2),(3,1,'c','',2),(3,1,'f','',2)
if exists( SELECT *
FROM #t2
GROUP BY m1,m2,n,n2
HAVING COUNT(*) > 1)
begin
select 'Same.'
end
else
begin
select 'not found'
end
Any help here will be great.
Thanks
Thanks Iptr As per your solution in comment I am posting the same here:
declare #t2 table(m1 int, m2 int, n varchar(5), n2 varchar(5), id int);
insert into #t2(m1, m2, n, n2, id)
values
(3, 1, 'c', '', 1),
(2, 1, 's', 'o', 1),
(2, 1, 's', 'o', 2),
(3, 1, 'c', '', 2),
(3, 1, 'f', '', 2),
(3, 1, 'c', '', 4),
(2, 1, 's', 'o', 4),
(3, 1, 'c', '', 10),
(2, 1, 's', 'o', 10),
(3, 1, 'c', '', 5);
--if exists(select a.id from(.. having count(*) = a.idcnt)
select a.id, b.id
from
(
select *, count(*) over (partition by id) as idcnt
from #t2
) as a
join
(
select *, count(*) over (partition by id) as idcnt
from #t2
) as b on a.id </*>*/ b.id and a.m1 = b.m1 and a.m2 = b.m2 and a.n = b.n and a.n2 = b.n2 and a.idcnt = b.idcnt
group by a.id, b.id, a.idcnt
having count(*) = a.idcnt;
--if exists(select j.j from (.. having count(*) > 1;)
select string_agg(i.id, ',')
from
(
select distinct id
from #t2
) as i
cross apply
(
select r.m1, r.m2, r.n, r.n2
from #t2 as r
where r.id = i.id
order by r.m1, r.m2, r.n, r.n2
for json path
) as j(j)
group by j.j
having count(*) > 1;
You can count how many different ids for each set of rows. If the count is more than one, then there are duplicates. For example:
select m1, m2, n, n2, count(distinct id) as cnt
from t
group by m1, m2, n, n2
having count(distinct id) > 1

Building Matrix via SQL

I am using two query from a schema which counts companies, but I am struggling how to combine them give Columns as Country and Industry as row and give the company counts accordingly.
select g.simpleindustrydescription, count(c.companyid) as companycount from ciqcompany c
join ciqsimpleindustry g on g.simpleIndustryid = c.simpleIndustryid
join ciqbusinessdescription b on b.companyid = c.companyid
group by g.simpleindustrydescription
select g.country, count(c.companyid) as companycount from ciqcompany c
join ciqcountrygeo g on g.countryid = c.countryid
join ciqbusinessdescription b on b.companyid = c.companyid
group by g.country
Expected Output:
Country A Country B Country C
Industry A 5 5 6
Industry B 3 3 4
Industry C 4 8 6
Due to lack of real example data, here a simple pivot example basing on some dummy records:
CREATE TABLE #tCountry
(
ID INT
,Name NVARCHAR(100)
);
INSERT INTO #tCountry VALUES (1, 'Country A'), (2, 'Country B'), (3, 'Country C');
CREATE TABLE #tIndustry
(
ID INT
,Name NVARCHAR(100)
);
INSERT INTO #tIndustry VALUES (1, 'Industry A'), (2, 'Industry B'), (3, 'Industry C');
CREATE TABLE #tMapping
(
ID INT
,CountryID INT
,IndustryID INT
,Name NVARCHAR(100)
);
INSERT INTO #tMapping VALUES (1, 1, 1, 'Country A Industry A - 1'), (2, 1, 1, 'Country A Industry A - 2');
INSERT INTO #tMapping VALUES (3, 1, 2, 'Country A Industry B - 1'), (4, 1, 2, 'Country A Industry b - 2'), (5, 1, 2, 'Country A Industry b - 3');
INSERT INTO #tMapping VALUES (6, 2, 1, 'Country B Industry A - 1');
DECLARE #lCountries NVARCHAR(max) = N'';
DECLARE #stmt NVARCHAR(MAX) = N'';
SELECT #lCountries += N', ' + QUOTENAME(CountryName)
FROM(
SELECT DISTINCT tc.Name CountryName
FROM #tCountry tc
JOIN #tMapping tm ON tm.CountryID = tc.ID
) x;
SELECT #lCountries = STUFF(#lCountries, 1, 2, '');
SELECT #stmt = 'SELECT *
FROM
(
SELECT ti.Name IndustryName, tc.Name CountryName, COUNT(*) MappingCounter
FROM #tMapping tm
JOIN #tCountry tc ON tm.CountryID = tc.ID
JOIN #tIndustry ti ON tm.IndustryID = ti.ID
GROUP BY ti.Name, tc.Name
) t
PIVOT (MAX(t.MappingCounter) FOR CountryName in (' + #lCountries + ')) AS x';
EXEC sp_executesql #stmt
Anyways, if you are dealing with an unknown number of countries, you might want to extend this example a little and use dynamic SQL in order to build the pivot statement. Got an example somewhere, but would have to search for it...
Result of my example:
IndustryName Country A Country B Country C
Industry A 2 1 NULL
Industry B 3 NULL NULL

SQL Server recursive self join

I have a simple categories table as with the following columns:
Id
Name
ParentId
So, an infinite amount of Categories can be the child of a category. Take for example the following hierarchy:
I want, in a simple query that returns the category "Business Laptops" to also return a column with all it's parents, comma separator or something:
Or take the following example:
Recursive cte to the rescue....
Create and populate sample table (Please save us this step in your future questions):
DECLARE #T as table
(
id int,
name varchar(100),
parent_id int
)
INSERT INTO #T VALUES
(1, 'A', NULL),
(2, 'A.1', 1),
(3, 'A.2', 1),
(4, 'A.1.1', 2),
(5, 'B', NULL),
(6, 'B.1', 5),
(7, 'B.1.1', 6),
(8, 'B.2', 5),
(9, 'A.1.1.1', 4),
(10, 'A.1.1.2', 4)
The cte:
;WITH CTE AS
(
SELECT id, name, name as path, parent_id
FROM #T
WHERE parent_id IS NULL
UNION ALL
SELECT t.id, t.name, cast(cte.path +','+ t.name as varchar(100)), t.parent_id
FROM #T t
INNER JOIN CTE ON t.parent_id = CTE.id
)
The query:
SELECT id, name, path
FROM CTE
Results:
id name path
1 A A
5 B B
6 B.1 B,B.1
8 B.2 B,B.2
7 B.1.1 B,B.1,B.1.1
2 A.1 A,A.1
3 A.2 A,A.2
4 A.1.1 A,A.1,A.1.1
9 A.1.1.1 A,A.1,A.1.1,A.1.1.1
10 A.1.1.2 A,A.1,A.1.1,A.1.1.2
See online demo on rextester

Identifying/comparing sets of rows within groups

I have a matter which seemed simple to solve but now I find it troublesome.
In simplification - I need to find a way to identify unique sets of rows within groups defined by another column. In basic example the source table contains only two columns:
routeID nodeID nodeName
1 1 a
1 2 b
2 1 a
2 2 b
3 1 a
3 2 b
4 1 a
4 2 c
5 1 a
5 2 c
6 1 a
6 2 b
6 3 d
7 1 a
7 2 b
7 3 d
So, the routeID column refers to set of nodes which define a route.
What I need to do is to somehow group the routes, so that there will be only one unique sequence of nodes for one routeID.
In my actual case I tried to use window function to add columns which help to identify nodes sequence, but I still have no idea how to get those unique sequences and group routes.
As a final effect I want to get only unique routes - for example routes 1,2 and 3 aggregated to one route.
Do you have any idea how to help me ?
EDIT:
The other table which I would like to join with the one from the example may look like that:
journeyID nodeID nodeName routeID
1 1 a 1
1 2 b 1
2 1 a 1
2 2 b 1
3 1 a 4
3 2 c 4
...........................
...........................
You can try this idea:
DECLARE #DataSource TABLE
(
[routeID] TINYINT
,[nodeID] TINYINT
,[nodeName] CHAR(1)
);
INSERT INTO #DataSource ([routeID], [nodeID], [nodeName])
VALUES ('1', '1', 'a')
,('1', '2', 'b')
,('2', '1', 'a')
,('2', '2', 'b')
,('3', '1', 'a')
,('3', '2', 'b')
,('4', '1', 'a')
,('4', '2', 'c')
,('5', '1', 'a')
,('5', '2', 'c')
,('6', '1', 'a')
,('6', '2', 'b')
,('6', '3', 'd')
,('7', '1', 'a')
,('7', '2', 'b')
,('7', '3', 'd');
SELECT DS.[routeID]
,nodes.[value]
,ROW_NUMBER() OVER (PARTITION BY nodes.[value] ORDER BY [routeID]) AS [rowID]
FROM
(
-- getting unique route ids
SELECT DISTINCT [routeID]
FROM #DataSource DS
) DS ([routeID])
CROSS APPLY
(
-- for each route id creating CSV list with its node ids
SELECT STUFF
(
(
SELECT ',' + [nodeName]
FROM #DataSource DSI
WHERE DSI.[routeID] = DS.[routeID]
ORDER BY [nodeID]
FOR XML PATH(''), TYPE
).value('.', 'VARCHAR(MAX)')
,1
,1
,''
)
) nodes ([value]);
The code will give you this output:
So, you simple need to filter by rowID = 1. Of course, you can change the code as you like in order to satisfy your bussness criteria (for example showing no the first route ID with same nodes, but the last).
Also, ROW_NUMBER function cannot be used directly in the WHERE clause, so you need to wrap the code before filtering:
WITH DataSource AS
(
SELECT DS.[routeID]
,nodes.[value]
,ROW_NUMBER() OVER (PARTITION BY nodes.[value] ORDER BY [routeID]) AS [rowID]
FROM
(
-- getting unique route ids
SELECT DISTINCT [routeID]
FROM #DataSource DS
) DS ([routeID])
CROSS APPLY
(
-- for each route id creating CSV list with its node ids
SELECT STUFF
(
(
SELECT ',' + [nodeName]
FROM #DataSource DSI
WHERE DSI.[routeID] = DS.[routeID]
ORDER BY [nodeID]
FOR XML PATH(''), TYPE
).value('.', 'VARCHAR(MAX)')
,1
,1
,''
)
) nodes ([value])
)
SELECT DS2.*
FROM DataSource DS1
INNER JOIN #DataSource DS2
ON DS1.[routeID] = DS2.[routeID]
WHERE DS1.[rowID] = 1;
ok, let's use some recursion to create a complete node list for each routeID
First of all let's populate source table and journeyes tale
-- your source
declare #r as table (routeID int, nodeID int, nodeName char(1))
-- your other table
declare #j as table (journeyID int, nodeID int, nodeName char(1), routeID int)
-- temp results table
declare #routes as table (routeID int primary key, nodeNames varchar(1000))
;with
s as (
select *
from (
values
(1, 1, 'a'),
(1, 2, 'b'),
(2, 1, 'a'),
(2, 2, 'b'),
(3, 1, 'a'),
(3, 2, 'b'),
(4, 1, 'a'),
(4, 2, 'c'),
(5, 1, 'a'),
(5, 2, 'c'),
(6, 1, 'a'),
(6, 2, 'b'),
(6, 3, 'd'),
(7, 1, 'a'),
(7, 2, 'b'),
(7, 3, 'd')
) s (routeID, nodeID, nodeName)
)
insert into #r
select *
from s
;with
s as (
select *
from (
values
(1, 1, 'a', 1),
(1, 2, 'b', 1),
(2, 1, 'a', 1),
(2, 2, 'b', 1),
(3, 1, 'a', 4),
(3, 2, 'c', 4)
) s (journeyID, routeID, nodeID, nodeName)
)
insert into #j
select *
from s
now let's exctract routes:
;with
d as (
select *, row_number() over (partition by r.routeID order by r.nodeID desc) n2
from #r r
),
r as (
select d.*, cast(nodeName as varchar(1000)) Names, cast(0 as bigint) i2
from d
where nodeId=1
union all
select d.*, cast(r.names + ',' + d.nodeName as varchar(1000)), r.n2
from d
join r on r.routeID = d.routeID and r.nodeId=d.nodeId-1
)
insert into #routes
select routeID, Names
from r
where n2=1
table #routes will be like this:
routeID nodeNames
1 'a,b'
2 'a,b'
3 'a,b'
4 'a,c'
5 'a,c'
6 'a,b,d'
7 'a,b,d'
an now the final output:
-- the unique routes
select MIN(r.routeID) routeID, nodeNames
from #routes r
group by nodeNames
-- the unique journyes
select MIN(journeyID) journeyID, r.nodeNames
from #j j
inner join #routes r on j.routeID = r.routeID
group by nodeNames
output:
routeID nodeNames
1 'a,b'
4 'a,c'
6 'a,b,d'
and
journeyID nodeNames
1 'a,b'
3 'a,c'

Help with a query

Based on the following table
ID Effort Name
-------------------------
1 1 A
2 1 A
3 8 A
4 10 B
5 4 B
6 1 B
7 10 C
8 3 C
9 30 C
I want to check if the total effort against a name is less than 40 then add a row with effort = 40 - (Total Effort) for the name. The ID of the new row can be anything. If the total effort is greater than 40 then trucate the data for one of the rows to make it 40.
So after applying the logic above table will be
ID Effort Name
-------------------------
1 1 A
2 1 A
3 8 A
10 30 A
4 10 B
5 4 B
6 1 B
11 25 B
7 10 C
8 3 C
9 27 C
I was thinking of opening a cursor, keeping a counter of the total effort, and based on the logic insert existing and new rows in another temporary table.
I am not sure if this is an efficient way to deal with this. I would like to learn if there is a better way.
I think the first part could be done this way:
INSERT INTO tbl(Effort, Name)
SELECT 40 - SUM(Effort), Name
FROM tbl
GROUP BY Name
HAVING SUM(Effort) < 40)
The second part is harder. Perhaps you could do something like this instead?
INSERT INTO tbl(Effort, Name)
SELECT 40 - SUM(Effort), Name
FROM tbl
GROUP BY Name
HAVING SUM(Effort) <> 40)
What this does is, rather than making changes to your actual data, adds a row with a negative number for the Name if the total effort is > 40 hours, or a positive value if it is < 40 hours. This seems much safer for your data integrity than messing with the original values.
In SQL Server 2008, this may be done with a single MERGE statement:
DECLARE #efforts TABLE (id INT NOT NULL PRIMARY KEY, effort INT NOT NULL, name CHAR(1))
INSERT
INTO #efforts
VALUES (1, 1, 'A'),
(2, 1, 'A'),
(3, 8, 'A'),
(4, 10, 'B'),
(5, 4, 'B'),
(6, 1, 'B'),
(7, 10, 'C'),
(8, 3, 'C'),
(9, 30, 'C'),
(10, 60, 'C')
SELECT *
FROM #efforts
ORDER BY
name, id
;WITH total AS
( SELECT *
FROM #efforts e
UNION ALL
SELECT ROW_NUMBER() OVER(ORDER BY name) +
(
SELECT MAX(id)
FROM #efforts
),
40 - SUM(effort),
name
FROM #efforts
GROUP BY
name
HAVING SUM(effort) < 40
),
source AS
(
SELECT *,
(
SELECT SUM(effort)
FROM total ep
WHERE ep.name = e.name
AND ep.id <= e.id
) AS ce,
COALESCE(
(
SELECT SUM(effort)
FROM total ep
WHERE ep.name = e.name
AND ep.id < e.id
), 0) AS cp
FROM total e
)
MERGE
INTO #efforts e
USING source s
ON e.id = s.id
WHEN MATCHED AND 40 BETWEEN cp AND ce THEN
UPDATE
SET e.effort = s.effort + 40 - ce
WHEN MATCHED AND cp > 40 THEN
DELETE
WHEN NOT MATCHED BY TARGET THEN
INSERT (id, effort, name)
VALUES (id, effort, name);
SELECT *
FROM #efforts
ORDER BY
name, id
In SQL Server 2005, you'll need two statements (in one transaction):
DECLARE #efforts TABLE (id INT NOT NULL PRIMARY KEY, effort INT NOT NULL, name CHAR(1))
INSERT
INTO #efforts
VALUES (1, 1, 'A')
INSERT
INTO #efforts
VALUES (2, 1, 'A')
INSERT
INTO #efforts
VALUES (3, 8, 'A')
INSERT
INTO #efforts
VALUES (4, 10, 'B')
INSERT
INTO #efforts
VALUES (5, 4, 'B')
INSERT
INTO #efforts
VALUES (6, 1, 'B')
INSERT
INTO #efforts
VALUES (7, 10, 'C')
INSERT
INTO #efforts
VALUES (8, 3, 'C')
INSERT
INTO #efforts
VALUES (9, 30, 'C')
INSERT
INTO #efforts
VALUES (10, 60, 'C')
;WITH total AS
(
SELECT *,
COALESCE(
(
SELECT SUM(effort)
FROM #efforts ep
WHERE ep.name = e.name
AND ep.id <= e.id
), 0) AS cp
FROM #efforts e
)
DELETE
FROM total
WHERE cp > 40
INSERT
INTO #efforts
SELECT (
SELECT MAX(id)
FROM #efforts
) +
ROW_NUMBER() OVER (ORDER BY name),
40 - SUM(effort),
name
FROM #efforts
GROUP BY
name
HAVING SUM(effort) < 40
SELECT *
FROM #efforts
ORDER BY
name, id
This will give you the names that need modify:
SELECT Name, SUM(Effort)
FROM Table
GROUP BY Name
HAVING SUM(Effort) < 40
Select this into a temp table, Add a column for 40 - SUM, then create an insert statement from that. Much better than a cursor.
This will do the first part:
Insert Into dbo.Test (Name, Effort)
Select t.Name, 40 - SUM(t.Effort)
From dbo.Test t
Group By t.Name
Having SUM(t.Effort) < 40
And this will do the second part:
Update a
Set a.Effort = a.Effort - b.AmountToDeduct
From dbo.Test a
Join (
Select t.Name, (40 - SUM(t.Effort)) as 'AmountToDeduct'
From dbo.Test t
Group By t.Name
Having SUM(t.Effort) > 40
)b on a.Name = b.Name
Where a.ID = (Select MAX(c.ID)
From dbo.Test c
Where c.Name = a.Name
)