how to make a DISTINCT of STRING_AGG - sql

I am trying to do a DISTINCT of the offerDetails column of the STRING_AGG function
OUTER APPLY
(
SELECT
STRING_AGG(CAST(co.OfferId AS NVARCHAR(MAX)), ';') AS OfferDetails,
STRING_AGG(CAST(ct.ConferenceId AS NVARCHAR(MAX)), ';') AS CustomTransferDetails
FROM ConferenceTransfer ct
INNER JOIN ConferenceOffer co ON ct.ConferenceId = co.ConferenceId
WHERE
ct.ConferenceTransferTypeId = 1 AND
ct.DateStarted IS NOT NULL AND
ct.CallerUserId = #agentId AND
ct.DateInitiated BETWEEN #dateFrom AND #dateTo
) CallTransfered

I'm not aware of any native way of making string_agg distinct, but you can ignore all but the first instance of each value by using a subquery and row_number to identify the first instance, i.e.
OUTER APPLY
(
SELECT
STRING_AGG(CAST(CASE WHEN c.RowNumber = 1 THEN c.OfferId END AS NVARCHAR(MAX)), ';') AS OfferDetails,
STRING_AGG(CAST(c.ConferenceId AS NVARCHAR(MAX)), ';') AS CustomTransferDetails
FROM ( SELECT co.OfferId,
ct.ConferenceId,
RowNumber = ROW_NUMBER() OVER(PARTITION BY co.OfferId ORDER BY ct.ConferenceId)
FROM ConferenceTransfer ct
INNER JOIN ConferenceOffer co
ON ct.ConferenceId = co.ConferenceId
WHERE
ct.ConferenceTransferTypeId = 1 AND
ct.DateStarted IS NOT NULL AND
ct.CallerUserId = #agentId AND
ct.DateInitiated BETWEEN #dateFrom AND #dateTo
) AS c
) CallTransfered
The key part being CASE WHEN c.RowNumber = 1 THEN c.OfferId END, so for anything other than the first instance, you are passing NULL, so the end result will only contain each OfferId once.

You need to group by OfferId, then group again
OUTER APPLY
(
SELECT
STRING_AGG(CAST(c.OfferId AS NVARCHAR(MAX)), ';') AS OfferDetails,
STRING_AGG(c.CustomTransferDetails, ';') AS CustomTransferDetails
FROM (
SELECT
co.OfferId,
STRING_AGG(CAST(ct.ConferenceId AS NVARCHAR(MAX)), ';') AS CustomTransferDetails
FROM ConferenceTransfer ct
INNER JOIN ConferenceOffer co ON ct.ConferenceId = co.ConferenceId
WHERE
ct.ConferenceTransferTypeId = 1 AND
ct.DateStarted IS NOT NULL AND
ct.CallerUserId = #agentId AND
ct.DateInitiated BETWEEN #dateFrom AND #dateTo
GROUP BY
co.OfferId
) c
) CallTransfered
This obviously only works for one DISTINCT. If you need both to be distinct, you can re-split the values, distinct them and re-aggregate
OUTER APPLY
(
SELECT
(
SELECT
STRING_AGG(s.value, ';')
FROM (
SELECT DISTINCT s.value
FROM STRING_SPLIT(OfferDetails, ';') s
) s
) AS OfferDetails,
(
SELECT
STRING_AGG(s.value, ';')
FROM (
SELECT DISTINCT s.value
FROM STRING_SPLIT(ConferenceId, ';') s
) s
) AS CustomTransferDetails
FROM (
SELECT
STRING_AGG(CAST(co.OfferId AS NVARCHAR(MAX)), ';') AS OfferDetails,
STRING_AGG(CAST(ct.ConferenceId AS NVARCHAR(MAX)), ';') AS CustomTransferDetails
FROM ConferenceTransfer ct
INNER JOIN ConferenceOffer co ON ct.ConferenceId = co.ConferenceId
WHERE
ct.ConferenceTransferTypeId = 1 AND
ct.DateStarted IS NOT NULL AND
ct.CallerUserId = #agentId AND
ct.DateInitiated BETWEEN #dateFrom AND #dateTo
GROUP BY
co.OfferId
) c
) CallTransfered

Related

Avoid SQL Pivot returning duplicate rows

I have the following SQL script which returns duplciate values in PIVOT. How do I combine those duplicate records to one row.
Please check the below image for the results set.
SELECT *
FROM (SELECT X.stockcode,
X.description,
X.pack,
X.location,
X.lname,
X.qty,
Y.stockcode AS StockCode2,
y.periodname,
Y.months,
Y.saleqty
FROM (SELECT dbo.stock_items.stockcode,
dbo.stock_items.description,
dbo.stock_items.pack,
dbo.stock_loc_info.location,
dbo.stock_locations.lname,
dbo.stock_loc_info.qty
FROM dbo.stock_locations
INNER JOIN dbo.stock_loc_info
ON dbo.stock_locations.locno = dbo.stock_loc_info.location
LEFT OUTER JOIN dbo.stock_items
ON dbo.stock_loc_info.stockcode = dbo.stock_items.stockcode
WHERE ( dbo.stock_items.status = 's' )) AS X
LEFT OUTER JOIN (SELECT dbo.dr_invlines.stockcode,
( 12 + Datepart(month, Getdate()) - Datepart(month, dbo.dr_trans.transdate) ) % 12 + 1 AS Months,
Sum(dbo.dr_invlines.quantity) AS SaleQty,
dbo.period_status.periodname
FROM dbo.dr_trans
INNER JOIN dbo.period_status
ON dbo.dr_trans.period_seqno = dbo.period_status.seqno
LEFT OUTER JOIN dbo.stock_items AS STOCK_ITEMS_1
RIGHT OUTER JOIN dbo.dr_invlines
ON STOCK_ITEMS_1.stockcode = dbo.dr_invlines.stockcode
ON dbo.dr_trans.seqno = dbo.dr_invlines.hdr_seqno
WHERE ( STOCK_ITEMS_1.status = 'S' )
AND ( dbo.dr_trans.transtype IN ( 1, 2 ) )
AND ( dbo.dr_trans.transdate >= Dateadd(m, -6, Getdate()) )
GROUP BY dbo.dr_invlines.stockcode,
Datepart(month, dbo.dr_trans.transdate),
dbo.period_status.periodname) AS Y
ON X.stockcode = Y.stockcode) z
PIVOT (Sum(saleqty) FOR [months] IN ([1],[2],[3],[4],[5],[6])) AS pivoted
EDIT: I missed the root-cause of your issue being the inclusion of the periodname column causing the percieved duplication. I am leaving this in place as general solution showing CTE usage, because it could still be useful if you then want to do extra filtering/transformation of your pivot results
One way is to take the results of the pivot query and run it through a SELECT DISTINCT query.
An example of wrapping your pivot query as a CTE and using it to feed a SELECT DISTINCT below (please note: untested, but parses as valid in my SSMS)
WITH PivotResults_CTE (
stockcode,
description,
pack,
location,
lname,
qty,
StockCode2,
periodname,
months,
saleqty
)
AS (
SELECT *
FROM (
SELECT X.stockcode
,X.description
,X.pack
,X.location
,X.lname
,X.qty
,Y.stockcode AS StockCode2
,y.periodname
,Y.months
,Y.saleqty
FROM (
SELECT dbo.stock_items.stockcode
,dbo.stock_items.description
,dbo.stock_items.pack
,dbo.stock_loc_info.location
,dbo.stock_locations.lname
,dbo.stock_loc_info.qty
FROM dbo.stock_locations
INNER JOIN dbo.stock_loc_info ON dbo.stock_locations.locno = dbo.stock_loc_info.location
LEFT OUTER JOIN dbo.stock_items ON dbo.stock_loc_info.stockcode = dbo.stock_items.stockcode
WHERE (dbo.stock_items.STATUS = 's')
) AS X
LEFT OUTER JOIN (
SELECT dbo.dr_invlines.stockcode
,(12 + Datepart(month, Getdate()) - Datepart(month, dbo.dr_trans.transdate)) % 12 + 1 AS Months
,Sum(dbo.dr_invlines.quantity) AS SaleQty
,dbo.period_status.periodname
FROM dbo.dr_trans
INNER JOIN dbo.period_status ON dbo.dr_trans.period_seqno = dbo.period_status.seqno
LEFT OUTER JOIN dbo.stock_items AS STOCK_ITEMS_1
RIGHT OUTER JOIN dbo.dr_invlines ON STOCK_ITEMS_1.stockcode = dbo.dr_invlines.stockcode ON dbo.dr_trans.seqno = dbo.dr_invlines.hdr_seqno WHERE (STOCK_ITEMS_1.STATUS = 'S')
AND (
dbo.dr_trans.transtype IN (
1
,2
)
)
AND (dbo.dr_trans.transdate >= Dateadd(m, - 6, Getdate()))
GROUP BY dbo.dr_invlines.stockcode
,Datepart(month, dbo.dr_trans.transdate)
,dbo.period_status.periodname
) AS Y ON X.stockcode = Y.stockcode
) z
PIVOT(Sum(saleqty) FOR [months] IN (
[1]
,[2]
,[3]
,[4]
,[5]
,[6]
)) AS pivoted
)
SELECT DISTINCT *
FROM
PivotResults_CTE
;
Also note, your sql included in the above may look slightly different to your original but that is only because i ran it through a reformatter to ensure i understood the structure of it.
In other words, the basic CTE wrapper for your pivot query is:
WITH PivotResults_CTE (
Field1,
Field2,
...
)
AS (
YOUR_PIVOT_QUERY_HERE
)
SELECT DISTINCT *
FROM
PivotResults_CTE
;

Trying to join two sql statement

I would like to join Query 1 and Query 2 on TripId.
Query 1
SELECT tblTrips.TripId,tblVehicles.VehicleNo
FROM tblTrips INNER JOIN tblVehicles ON tblTrips.VehicleId = tblVehicles.VehicleId
Query 2
;with T1 as (
SELECT tblTrips.TripId, tblTripDeductions.Amount, CONVERT(VARCHAR(400),tblDeductionTypes.DeductionType+' - '+tblTripDeductions.Description+' - '+ CONVERT(VARCHAR(24),tblTripDeductions.Amount)) as DeductionFor
FROM tblTrips INNER JOIN
tblTripDeductions ON tblTrips.TripId = tblTripDeductions.TripId INNER JOIN
tblDeductionTypes ON tblTripDeductions.DeductionId = tblDeductionTypes.DeductionId
)select **T1.TripId**, SUM(T1.Amount) as Amount, stuff((select '#',' ' + CONVERT(varchar(1000),T2.DeductionFor) from T1 AS T2 where T1.TripId = T2.TripId for xml path('')),1,1,'') [Description] from T1
Group by TripId
First query's output is list of TripId and VehicleNo.
Second query's output is list of TripId, Amount and description.
And my desire output is TripId, VehicleNo, amount and description.
The Syntax for WITH (Common Table Expressions) allows you to create multiple CTE's.
Using that you can turn your final part of Query2 in to a CTE (Which I'll name Query2) and your query for Query1 can also be made in to a CTE (which I'll name Query1).
Then, the final SELECT statement can simply join those two CTE's together.
;
WITH
T1 as (
SELECT tblTrips.TripId, tblTripDeductions.Amount, CONVERT(VARCHAR(400),tblDeductionTypes.DeductionType+' - '+tblTripDeductions.Description+' - '+ CONVERT(VARCHAR(24),tblTripDeductions.Amount)) as DeductionFor
FROM tblTrips INNER JOIN
tblTripDeductions ON tblTrips.TripId = tblTripDeductions.TripId INNER JOIN
tblDeductionTypes ON tblTripDeductions.DeductionId = tblDeductionTypes.DeductionId
)
,
Query2 AS (
select **T1.TripId**, SUM(T1.Amount) as Amount, stuff((select '#',' ' + CONVERT(varchar(1000),T2.DeductionFor) from T1 AS T2 where T1.TripId = T2.TripId for xml path('')),1,1,'') [Description] from T1
Group by TripId
)
,
Query1 AS (
<Your Code For Query1>
)
SELECT
*
FROM
Query1
INNER JOIN
Query2
ON Query1.TripID = Query2.TripID
I haven't don't anything to check your queries, as the layout that you have used isn't very readable.
Just merge the queries using CTE (didn't change/review your code, just formatted it for the sake of readability - input was pretty horrible to read)
;WITH T1 AS (
SELECT tblTrips.TripId
, tblTrips.DestinationDistrictId
, tblTrips.VehicleId
, tblTrips.No
, tblVehicles.VehicleNo
, tblTrips.CoachNo
, CONVERT(VARCHAR(24), tblTrips.GoDate, 105) AS GoDate
, tblTrips.GoTime
, CASE WHEN tblTrips.IsCome=1
THEN CONVERT(VARCHAR(24), tblTrips.ComeDate, 105)
ELSE '-'
END AS ComeDate
, CASE WHEN tblTrips.IsCome=1
THEN tblTrips.ComeTime
ELSE '-'
END AS ComeTime
, CASE WHEN tblTrips.IsCome=1
THEN (SD.DistrictName + ' - ' + DD.DistrictName + ' - ' + SD.DistrictName)
ELSE (SD.DistrictName + ' - ' + DD.DistrictName)
END AS Destination
, tblSupervisors.Name AS Supervisor
, tblDrivers.Name AS Driver
, tblTrips.AdvanceAmount
, tblTrips.AdvanceDescription
FROM tblTrips
INNER JOIN tblSupervisors ON tblTrips.SuperVisorId = tblSupervisors.SupervisorId
INNER JOIN tblDrivers ON tblTrips.DriverId = tblDrivers.DriverId
INNER JOIN tblDistricts SD ON tblTrips.StartDistrictId = SD.DistrictId
INNER JOIN tblDistricts DD ON tblTrips.DestinationDistrictId = DD.DistrictId
INNER JOIN tblVehicles ON tblTrips.VehicleId = tblVehicles.VehicleId
)
, Q1 AS (
SELECT T1.TripId
, SUM(T1.Amount) AS Amount
, STUFF((
SELECT '#', ' ' + CONVERT(VARCHAR(MAX), T2.DeductionFor)
FROM T1 AS T2
WHERE T1.TripId = T2.TripId FOR XML PATH(''))
,1,1,'') AS [Description]
FROM T1
GROUP BY TripId
)
, Q2 AS (
SELECT tblTrips.TripId
, tblTripDeductions.Amount
, CONVERT(VARCHAR(400), tblDeductionTypes.DeductionType + ' - ' + tblTripDeductions.Description + ' - ' + CONVERT(VARCHAR(24), tblTripDeductions.Amount)) AS DeductionFor
FROM tblTrips
INNER JOIN tblTripDeductions ON tblTrips.TripId = tblTripDeductions.TripId
INNER JOIN tblDeductionTypes ON tblTripDeductions.DeductionId = tblDeductionTypes.DeductionId
)
SELECT *
FROM Q1
INNER JOIN Q2 ON Q1.TripId = Q2.TripId

Call one CTE in another CTE

How could I call a CTE in another CTE ?
WITH cte1
AS (
SELECT City.*
FROM City
WHERE (City.CityName COLLATE SQL_Latin1_General_CP1_CI_AI) LIKE 'são paulo'
)
, cte2
AS (
SELECT Imovel.Imovel_Id
FROM Imovel
WHERE Imovel.Number = 311
AND Imovel.ZIPCode = '30280490'
AND Imovel.Complement = ''
AND Imovel.Street = 'Do furquim'
-- the line below has an error in cte.City_Id
AND Imovel.City_Id = cte1.City_Id
)
You have to join both like with a normal table:
WITH cte1
AS (SELECT city.*
FROM city
WHERE ( city.cityname COLLATE sql_latin1_general_cp1_ci_ai ) LIKE
'são paulo'),
cte2
AS (SELECT imovel.imovel_id
FROM imovel
INNER JOIN cte1
ON imovel.city_id = cte1.city_id
WHERE imovel.number = 311
AND imovel.zipcode = '30280490'
AND imovel.complement = ''
AND imovel.street = 'Do furquim')
SELECT * FROM cte2
Note that i have appended SELECT * FROM cte2 since CTE's cannot "stand" alone.

Difference between IN and JOIN

I was wondering what was the difference between the two queries mentioned below, because the first one takes more than 10 seconds to execute on the server and the second one executes in less than one second...
UPDATE - I
Here are the actual queries and their Execution Plans as copied and pasted from SQL Server (as is), sorry for any inconvenience caused by my previous queries... :(
SELECT REPLACE(CONVERT(VARCHAR(11), m.PlanDate, 106), ' ', '-') AS ManagmentPlanDate
FROM ManagmentPlan m
INNER JOIN Product p ON p.Product_ID = m.ProductID
INNER JOIN Category c ON c.C_ID = p.C_ID
LEFT OUTER JOIN Employee e ON e.emp_no = m.PrescribedBy
LEFT OUTER JOIN dbo.Issue_Stock i ON i.serial_no = m.IssueStockID
INNER JOIN dbo.Units u ON u.U_ID = p.U_ID
WHERE ( ( #PatientID IS NULL )
AND ( #VisitID IS NULL )
AND ( m.WardRegNo = #WardRegNo )
)
OR --Get only cuurent admission TP
( ( #PatientID IS NULL )
AND ( #WardRegNo IS NULL )
AND ( VisitID = #VisitID
AND m.WardRegNo IS NULL
)
)
OR -- Get Only Current OPD visit TP
( ( #WardRegNo IS NULL )
AND ( #VisitID IS NULL )
AND ( visitid IN ( SELECT id
FROM PatientVisit
WHERE PatientID = #PatientID ) )
)
OR --Get All Visits TP
( ( #PatientID IS NULL )
AND ( #VisitID IS NOT NULL )
AND ( #WardRegNo IS NOT NULL )
AND ( ( VisitID = #VisitID )
OR ( m.WardRegNo = #WardRegNo )
)
) -- Get Current OPD visit and cuurent admission TP (Both)
AND m.Deleted != 1
AND m.PatientDeptID = #PatientDeptID
GROUP BY REPLACE(CONVERT(VARCHAR(11), m.PlanDate, 106), ' ', '-')
ORDER BY CAST(REPLACE(CONVERT(VARCHAR(11), m.PlanDate, 106), ' ', '-') AS DATETIME) DESC
and
SELECT REPLACE(CONVERT(VARCHAR(11), m.PlanDate, 106), ' ', '-') AS ManagmentPlanDate
FROM ManagmentPlan m
WHERE m.ProductID IN ( SELECT Product_ID
FROM Product
WHERE C_ID IN ( SELECT C_ID
FROM Category )
AND U_ID IN ( SELECT U_ID
FROM Units ) )
AND m.PrescribedBy IN ( SELECT Emp_no
FROM Employee )
AND m.IssueStockID IN ( SELECT Serial_No
FROM Issue_Stock )
AND ( ( #PatientID IS NULL )
AND ( #VisitID IS NULL )
AND ( m.WardRegNo = #WardRegNo )
)
OR --Get only cuurent admission TP
( ( #PatientID IS NULL )
AND ( #WardRegNo IS NULL )
AND ( VisitID = #VisitID
AND m.WardRegNo IS NULL
)
)
OR -- Get Only Current OPD visit TP
( ( #WardRegNo IS NULL )
AND ( #VisitID IS NULL )
AND ( visitid IN ( SELECT id
FROM PatientVisit
WHERE PatientID = #PatientID ) )
)
OR --Get All Visits TP
( ( #PatientID IS NULL )
AND ( #VisitID IS NOT NULL )
AND ( #WardRegNo IS NOT NULL )
AND ( ( VisitID = #VisitID )
OR ( m.WardRegNo = #WardRegNo )
)
) -- Get Current OPD visit and cuurent admission TP (Both)
AND m.Deleted != 1
AND m.PatientDeptID = #PatientDeptID
GROUP BY REPLACE(CONVERT(VARCHAR(11), m.PlanDate, 106), ' ', '-')
ORDER BY CAST(REPLACE(CONVERT(VARCHAR(11), m.PlanDate, 106), ' ', '-') AS DATETIME) DESC
Although, it solved my problem of speed or optimization of the query, but just was curious as to what exactly is the difference between those two queries, as I thought the first one translates to the second one...
UPDATE - I
As you can see, both queries differ in only the JOINS converted to IN statements...
For one, your first statement retrieves all matching records from both the Products and Category tables whereas your second statement only retrieves all matching rows from Products.
What is the performance difference if you change your first statement to
SELECT p.*
FROM Products p
INNER JOIN Category c ON p.CatNo = c.CatNo
Edit
(as mentioned by Martin) note that the number of rows is only identical for both statements if CatNo is unique in the Category table. The INNER JOIN will return as many records as there are in the Category table whereas the IN statement will return as many records as there are unique CatNo in the Category table.
An in clause filters the rows that come back from product. An inner join adds columns from category to the select statement output.

How can I use Sql to Order By This Statement?

How can I order the list 'widgets_spec by number of widgets?
select distinct
m.p_c_id
,(select distinct '<li>' +convert(varchar,widgets) + '<br> '
from dbo.spec_master m2
where m.p_c_id = m2.p_c_id and widgets is not null
for xml path(''), type).value('.[1]', 'nvarchar(max)'
) as widgets_spec
from dbo.spec_master m
inner join dbo.ProductVaration pv on pv.p_c_id = m.p_c_id
inner join dbo.Varation v on v.varation_id = pv.varation_type_id
where v.varation_id = 4
group by m.p_c_id
Right now output looks like:
<li>10<br> <li>12<br> <li>15<br> <li>8<br>
When I want it to look like:
<li>8<br> <li>10<br> <li>12<br> <li>15<br>
Thanks for your help.
EDIT: I'm trying to order the internal select statement that concatenates the values.
You do not need both Distinct and Group By. You should use one or the other. In this case, I believe you have to use Group By for it to work.
Select m.p_c_id
, (
Select '<li>' + Cast( m2.num_of_lights As varchar(10)) + '<br /> '
From dbo.spec_master As m2
Where m.p_c_id = m2.p_c_id
And m2.num_of_lights Is Not Null
Group By m2.num_of_lights
Order By m2.num_of_lights
For Xml Path(''), type).value('.[1]', 'nvarchar(max)'
) As numLights_spec
From dbo.spec_master As m
Inner Join dbo.ProductVaration As pv
On pv.p_c_id = m.p_c_id
Inner Join dbo.Varation As v
On v.varation_id = pv.varation_type_id
Where v.varation_id = 4
Group by m.p_c_id
select distinct
m.p_c_id
,(select distinct '<li>' +convert(varchar,num_of_lights) + '<br> '
from dbo.spec_master m2
where m.p_c_id = m2.p_c_id and num_of_lights is not null
ORDER BY convert(varchar,num_of_lights)
) as numLights_spec
from dbo.spec_master m
inner join dbo.ProductVaration pv on pv.p_c_id = m.p_c_id
inner join dbo.Varation v on v.varation_id = pv.varation_type_id
where v.varation_id = 4
group by m.p_c_id
) As SubA
Some of the other answers here won't work, since ordering by the now-varchar num_of_lights will put '8' after '15' as is happening now. You want to order the numLights numerically, which isn't going to happen with those html tags around them. You can add a subselect to your subselect so that you order them, then select them with the tags around them. Example (not tested):
SELECT * FROM (
select distinct
m.p_c_id
,(select distinct '<li>' +convert(varchar,num_of_lights) + '<br> '
from (select distinct p_c_id, num_of_lights from dbo.spec_master order by num_of_lights) m2
where m.p_c_id = m2.p_c_id and num_of_lights is not null
for xml path(''), type).value('.[1]', 'nvarchar(max)'
) as numLights_spec
from dbo.spec_master m
inner join dbo.ProductVaration pv on pv.p_c_id = m.p_c_id
inner join dbo.Varation v on v.varation_id = pv.varation_type_id
where v.varation_id = 4
group by m.p_c_id
Personally, I'd just add the html tags in whatever back-end code is getting the result of the query.