SQL: How to get distinct values out of string_Agg() function? - sql

The following code
SELECT
DISTINCT(p.ID) AS ID
, PIT.Code AS Code
, year(PT.Date) AS Year
FROM fact.PreT PT
INNER JOIN dim.ProdIType PIT
ON PIT.ProdITypeSKey = PT.ProdITypeSKey
INNER JOIN dim.Proudct P
ON P.ProductSKey = pt.ProductSKey
WHERE p.ID = '15'
GROUP BY p.ID, PIT.Code, PT.Year
returns the following:
I have reconfigured my script to add aggregate and group the codes by id and year, however duplicates are spotted. Code and output below:
SELECT
DISTINCT(p.ID) AS ID
, string_agg(PIT.Code, ',') AS Code
, year(PT.Date) AS Year
FROM fact.PreT PT
INNER JOIN dim.ProdIType PIT
ON PIT.ProdITypeSKey = PT.ProdITypeSKey
INNER JOIN dim.Proudct P
ON P.ProductSKey = pt.ProductSKey
WHERE p.ID = '15'
GROUP BY p.ID, PT.Year
Result:
Desired output - distinct and ordered code ascending:
Can someone explain why string_acc is duplicating codes? how should I tackle this issue?

You need to subquery it and group again. Note that DISTINCT is not a function, it acts over the whole resultset, and is the same as grouping by all column.
SELECT
ID
, string_agg(Code, ',') AS Code
, [Year]
FROM (
SELECT
p.ID
, PIT.Code AS Code
, year(PT.Date) AS Year
FROM fact.PreT PT
INNER JOIN dim.ProdIType PIT
ON PIT.ProdITypeSKey = PT.ProdITypeSKey
INNER JOIN dim.Proudct P
ON P.ProductSKey = pt.ProductSKey
WHERE p.ID = '15'
GROUP BY p.ID, year(PT.Date), PIT.Code
) p
GROUP BY p.ID, PT.Year;

Related

Access Subquery On mulitple conditions

This SQL query needs to be done in ACCESS.
I am trying to do a subquery on the total sales, but I want to link the sale to the province AND to product. The below query will work with one or the other: (po.product_name = allp.all_products) AND (p.province = allp.all_province); -- but it will no take both.
I will be including every month into this query, once I can figure out the subquery on with two criteria.
Select
p.province as [Province],
po.product_name as [Product],
all_price
FROM
(purchase_order po
INNER JOIN person p
on p.person_id = po.person_id)
left join
(
select
po1.product_name AS [all_products],
sum(pp1.price) AS [all_price],
p1.province AS [all_province]
from (purchase_order po1
INNER JOIN product pp1
on po1.product_name = pp1.product_name)
INNER JOIN person p1
on po1.person_id = p1.person_id
group by po1.product_name, pp1.price, p1.province
)
as allp
on (po.product_name = allp.all_products) AND (p.province = allp.all_province);
Make the first select sql into a table by giving it an alias and join table 1 to table 2. I don't have your table structure or data to test it but I think this will lead you down the right path:
select table1.*, table2.*
from
(Select
p.province as [Province],
po.product_name as [Product]
--removed this ,all_price
FROM
(purchase_order po
INNER JOIN person p
on p.person_id = po.person_id) table1
left join
(
select
po1.product_name AS [all_products],
sum(pp1.price) AS [all_price],
p1.province AS [all_province]
from (purchase_order po1
INNER JOIN product pp1
on po1.product_name = pp1.product_name)
INNER JOIN person p1
on po1.person_id = p1.person_id
group by po1.product_name, pp1.price, p1.province --check your group by, I dont think you want pp1.price here if you want to aggregate
) as table2 --changed from allp
on (table1.product = table2.all_products) AND (table1.province = table2.all_province);

How to display distinct values based on MAX date in report builder?

I'm quite new to SQL and I hope you can help me.
I'm trying to retrieve unique values from my table based on the latest date where specific users are selected.
This is the data:
Raw Data
And this is what I'm looking to achieve:
Desired Data
I tried to write 2 queries but unfortunately:
My 1st query would display duplicated rows for each company:
SELECT DISTINCT FilteredAppointment.regardingobjectidname ,FilteredAppointment.owneridname ,FilteredAppointment.subject ,MAX(FilteredAppointment.scheduledstart) as Date ,FilteredAppointment.location ,FilteredCcx_member.ccx_mnemonic FROM FilteredAppointment INNER JOIN FilteredAccount ON FilteredAppointment.regardingobjectid = FilteredAccount.accountid INNER JOIN FilteredCcx_member ON FilteredAccount.accountid = FilteredCcx_member.ccx_accountid WHERE FilteredAppointment.statecodename != N'Canceled' AND FilteredAppointment.owneridname IN (N'User1', N'User2', N'User3') GROUP BY FilteredAppointment.regardingobjectidname ,FilteredAppointment.owneridname ,FilteredAppointment.subject ,FilteredAppointment.scheduledstart ,FilteredAppointment.location ,FilteredCcx_member.ccx_mnemonic ORDER BY FilteredAppointment.regardingobjectidname
And my 2nd query would display one row only:
SELECT DISTINCT FilteredAppointment.regardingobjectidname ,FilteredAppointment.owneridname ,FilteredAppointment.subject ,FilteredAppointment.scheduledstart ,FilteredAppointment.location ,FilteredCcx_member.ccx_mnemonic FROM FilteredAppointment INNER JOIN FilteredAccount ON FilteredAppointment.regardingobjectid = FilteredAccount.accountid INNER JOIN FilteredCcx_member ON FilteredAccount.accountid = FilteredCcx_member.ccx_accountid WHERE FilteredAppointment.scheduledstart = (SELECT MAX(FilteredAppointment.scheduledstart) FROM FilteredAppointment WHERE FilteredAppointment.regardingobjectidname = FilteredAppointment.regardingobjectidname) AND FilteredAppointment.statecodename != N'Canceled' AND FilteredAppointment.owneridname IN (N'User1', N'User2', N'User3') GROUP BY FilteredAppointment.regardingobjectidname ,FilteredAppointment.owneridname ,FilteredAppointment.subject ,FilteredAppointment.scheduledstart ,FilteredAppointment.location ,FilteredCcx_member.ccx_mnemonic ORDER BY FilteredAppointment.regardingobjectidname
Try this:-
SELECT distinct a.date, a.company, a.companyID, a.User, a.Location, a.topic
FROM tablename a
inner join
(
Select company, companyID, User, max(date) as recent_date
from
tablename
group by company, companyID, User
) b
on a.date=b.recent_date and a.company=b.company and a.companyID=b.companyID
and a.User=b.User;
I managed to solve the issue - Thank you for the help again!
WITH apptmts AS (SELECT TOP 1 WITH TIES fa.scheduledstart,fa.location,fa.regardingobjectidname,mem.ccx_mnemonic,fa.owneridname,fa.subject FROM FilteredAppointment fa JOIN FilteredAccount acc on fa.regardingobjectid = acc.accountid JOIN FilteredCcx_member mem ON acc.accountid = mem.ccx_accountid WHERE fa.statecodename != N'Canceled' AND fa.owneridname IN (N'User1', N'User2', N'User3') ORDER BY ROW_NUMBER() OVER(PARTITION BY fa.regardingobjectidname ORDER BY fa.scheduledstart DESC) ) SELECT * FROM apptmts ORDER BY scheduledstart DESC

Column 'Pay_records.ActualTime' is invalid in the select list because it is not contained in either an aggregate function or the GROUP BY clause

I am trying to get the TimeCodeTotal per TimeCode. SQL Server doesn't like my current setup and wants me to add PR.Actual time to the GROUP BY.
This is not a solution because adding PR.ActualTime to the GROUP BY changes the results to incorrect values;
I only want to GROUP BY the values provided in the query below.
Is there another way to seperate by timecode without using the OVER (BY PARTITION) clause?
SELECT YEAR(PR.date) AS TimeYear
,SUBSTRING(DATENAME(MONTH,DATEADD(MONTH,MONTH(PR.Date),0)- 1),1,3) AS TimeMonth
,TC.TimeCode
,SUM(PR.ActualTime) OVER (PARTITION BY TC.TimeCode) AS TimeCodeTotal
,SUM(PR.ActualTime) AS MonthTotal
FROM Pay_records PR
INNER JOIN Employee E ON E.employee_no = PR.employee_no
INNER JOIN Dept_Names D ON D.Dept = E.department
INNER JOIN DepartmentTimeCategory DTC ON DTC.Dept = D.Dept
INNER JOIN TimeCategory TMC ON TMC.ID = DTC.TimeCategoryID
INNER JOIN TimeCode TC ON TC.TimeCodeID = TMC.TimeCodeID
WHERE PR.DATE BETWEEN '01/01/2015' AND '02/17/16'
AND D.Dept IN ('02Z103')
AND E.Billable IN (1,0)
GROUP BY YEAR(PR.date), MONTH(PR.date), TC.TimeCode
ORDER BY YEAR(PR.date), MONTH(PR.date)
I think an easy fix is to remove SUM(PR.ActualTime) OVER (PARTITION BY TC.TimeCode) AS TimeCodeTotal from the query, wrap the whole thing in a CTE, and then select everything from the CTE and add a SUM(TimeCodeTotal).
;WITH CTE AS (
SELECT YEAR(PR.date) AS TimeYear
,SUBSTRING(DATENAME(MONTH,DATEADD(MONTH,MONTH(PR.Date),0)- 1),1,3) AS TimeMonth
,TC.TimeCode
,SUM(PR.ActualTime) AS TimeCodeSubTotal
FROM Pay_records PR
INNER JOIN Employee E ON E.employee_no = PR.employee_no
INNER JOIN Dept_Names D ON D.Dept = E.department
INNER JOIN DepartmentTimeCategory DTC ON DTC.Dept = D.Dept
INNER JOIN TimeCategory TMC ON TMC.ID = DTC.TimeCategoryID
INNER JOIN TimeCode TC ON TC.TimeCodeID = TMC.TimeCodeID
WHERE PR.DATE BETWEEN '01/01/2015' AND '02/17/16'
AND D.Dept IN ('02Z103')
AND E.Billable IN (1,0)
GROUP BY YEAR(PR.date)
,MONTH(PR.date)
,TC.TimeCode
)
SELECT TimeYear
, TimeMonth
, TimeCode
, TimeCodeSubTotal
, Sum(TimeCodeSubTotal)
FROM CTE
GROUP BY TimeYear
, TimeMonth
, TimeCode
, TimeCodeSubTotal
ORDER BY YEAR(PR.date)
, MONTH(PR.date)

Get Distinct results of all columns based on MAX DATE of one

Using SQL Server 2012
I have seen a few threads about this topic but I can't find one that involves multiple joins in the query. I can't create a VIEW on this database so the joins are needed.
The Query
SELECT
p.Price
,s.Type
,s.Symbol
, MAX(d.Date) Maxed
FROM AdventDW.dbo.FactPrices p
INNER JOIN dbo.DimSecurityMaster s
ON s.SecurityID = p.SecurityID
INNER JOIN dbo.DimDateTime d
ON
p.DateTimeKey = d.DateTimeKey
GROUP BY p.Price ,
s.Type ,
s.Symbol
ORDER BY s.Symbol
The query works but does not produce distinct results. I am using Order by to validate the results, but it is not required once I get it working. I The result set looks like this.
Price Type Symbol Maxed
10.57 bfus *bbkd 3/31/1989
10.77 bfus *bbkd 2/28/1990
100.74049 cbus 001397AA6 8/2/2005
100.8161 cbus 001397AA6 7/21/2005
The result set I want is
Price Type Symbol Maxed
10.77 bfus *bbkd 2/28/1990
100.74049 cbus 001397AA6 8/2/2005
Here were a few other StackOverflow threads I tried but couldn't get t work with my specific query
How can I SELECT rows with MAX(Column value), DISTINCT by another column in SQL?
SQL Selecting distinct rows from multiple columns based on max value in one column
If you want data for the maximum date, use row_number() rather than group by:
SELECT ts.*
FROM (SELECT p.Price, s.Type, s.Symbol, d.Date,
ROW_NUMBER() OVER (PARTITION BY s.Type, s.Symbol
ORDER BY d.Date DESC
) as seqnum
FROM AdventDW.dbo.FactPrices p INNER JOIN
dbo.DimSecurityMaster s
ON s.SecurityID = p.SecurityID INNER JOIN
dbo.DimDateTime d
ON p.DateTimeKey = d.DateTimeKey
) ts
WHERE seqnum = 1
ORDER BY s.Symbol;
You should use a derived table since you really only want to group the DateTimeKey table to get the MAX date.
SELECT p.Price ,
s.Type ,
s.Symbol ,
tmp.MaxDate
FROM AdventDW.dbo.FactPrices p
INNER JOIN dbo.DimSecurityMaster s ON s.SecurityID = p.SecurityID
INNER JOIN
( SELECT MAX(d.Date) AS MaxDate ,
d.DateTimeKey
FROM dbo.DimDateTime d
GROUP BY d.DateTimeKey ) tmp ON p.DateTimeKey = tmp.DateTimeKey
ORDER BY s.Symbol;
/*
this is your initial select which is fine because this is base from your original criteria,
I cannot ignore this so i'll keep this in-tact. Instead from here i'll create a temp
*/
SELECT
p.Price
, s.Type
, s.Symbol
, MAX(d.Date) Maxed
INTO #tmpT
FROM AdventDW.dbo.FactPrices p
INNER JOIN dbo.DimSecurityMaster s
ON s.SecurityID = p.SecurityID
INNER JOIN dbo.DimDateTime d
ON p.DateTimeKey = d.DateTimeKey
GROUP BY p.Price ,
s.Type ,
s.Symbol
ORDER BY s.Symbol
SELECT innerTable.Price, innerTable.Symbol, innerTable.Type, innerTable.Maxed
FROM (
SELECT
ROW_NUMBER () OVER (PARTITION BY t1.Symbol, t1.Type, t1.Maxed ORDER BY t1.Maxed DESC) as row
, *
FROM #tmpT AS t1
) AS innerTable
WHERE row = 1
DROP TABLE #tmpT

use field in sql join where clause

I am trying to write a crystal report using a sql statement because it runs much much faster. But I am having trouble with some of the linkings. I need to use the result of a link for criteria in subsequent links.
Ok, here is a sample of what my statement looks like:
(The lines marked with ** are the lines in question)
SELECT
Part.PartNum,
Cust.CustNum,
Cust.CustID,
YTD.Qty
FROM
(
SELECT
Pub.Part.PartNum,
Pub.Part.UserChar1 AS CustID
FROM
Pub.Part
) AS Part
LEFT OUTER JOIN (
SELECT
Pub.Customer.CustID,
Pub.Customer.CustNum,
Pub.Customer.Name
FROM
Pub.Customer
WHERE
Pub.Customer.CustID = '1038'
) AS Cust
ON Part.CustID = Cust.CustID
LEFT OUTER JOIN (
SELECT
Pub.OrderDtl.PartNum,
Sum(Pub.OrderDtl.OrderQty) AS Qty
FROM
Pub.OrderHed JOIN Pub.OrderDtl ON
Pub.OrderHed.OrderNum = Pub.OrderDtl.OrderNum
WHERE
**Pub.OrderHed.CustNum = Cust.CustNum AND**
**Pub.OrderDtl.PartNum = Part.PartNum AND**
YEAR(Pub.OrderHed.OrderDate)=YEAR(CURDATE())
GROUP BY
Pub.OrderDtl.PartNum
) AS YTD ON Part.PartNum = YTD.PartNum
Now, I get an error that says:
Part.PartNum cannot be found or is not specified for the query.
I get the same error for Cust.CustNum. Will you help me figure out what I am doing wrong? Thanks!
The problem is that you are using one of the aliases, inside of a sub-query which you cannot do. You will have to do something similar to this:
SELECT Part.PartNum,
Cust.CustNum,
Cust.CustID,
YTD.Qty
FROM
(
SELECT Pub.Part.PartNum,
Pub.Part.UserChar1 AS CustID
FROM Pub.Part
) AS Part
LEFT OUTER JOIN
(
SELECT Pub.Customer.CustID,
Pub.Customer.CustNum,
Pub.Customer.Name
FROM Pub.Customer
WHERE Pub.Customer.CustID = '1038'
) AS Cust
ON Part.CustID = Cust.CustID
LEFT OUTER JOIN
(
SELECT Pub.OrderDtl.PartNum,
Sum(Pub.OrderDtl.OrderQty) AS Qty,
Pub.OrderHed.CustNum
FROM Pub.OrderHed
JOIN Pub.OrderDtl
ON Pub.OrderHed.OrderNum = Pub.OrderDtl.OrderNum
WHERE YEAR(Pub.OrderHed.OrderDate)=YEAR(CURDATE())
GROUP BY Pub.OrderDtl.PartNum, Pub.OrderHed.CustNum
) AS YTD
ON Part.PartNum = YTD.PartNum
AND Cust.CustNum = YTD.CustNum
Looking at your query more, you can actually get rid of two of the subqueries:
SELECT Part.PartNum,
Cust.CustNum,
Cust.CustID,
YTD.Qty
FROM Pub.Part Part
LEFT OUTER JOIN Pub.Customer Cust
ON Part.CustID = Cust.CustID
AND Cust.CustID = '1038'
LEFT OUTER JOIN
(
SELECT d.PartNum,
Sum(d.OrderQty) AS Qty,
h.CustNum
FROM Pub.OrderHed h
JOIN Pub.OrderDtl d
ON h.OrderNum = d.OrderNum
WHERE YEAR(h.OrderDate)=YEAR(CURDATE())
GROUP BY d.PartNum
) AS YTD
ON Part.PartNum = YTD.PartNum
AND Cust.CustNum = YTD.CustNum
This is because you can't access a parent sub-query (cust, part) within another sub-query (YTD)
However, the solution is easy in your case, filter in the ON clause instead:
SELECT
Part.PartNum,
Cust.CustNum,
Cust.CustID,
YTD.Qty
FROM
(
SELECT
Pub.Part.PartNum,
Pub.Part.UserChar1 AS CustID
FROM
Pub.Part
) AS Part
LEFT OUTER JOIN (
SELECT
Pub.Customer.CustID,
Pub.Customer.CustNum,
Pub.Customer.Name
FROM
Pub.Customer
WHERE
Pub.Customer.CustID = '1038'
) AS Cust
ON Part.CustID = Cust.CustID
LEFT OUTER JOIN (
SELECT
Pub.OrderDtl.PartNum,
Sum(Pub.OrderDtl.OrderQty) AS Qty,
Pub.OrderHed.CustNum
FROM
Pub.OrderHed JOIN Pub.OrderDtl ON
Pub.OrderHed.OrderNum = Pub.OrderDtl.OrderNum
WHERE
YEAR(Pub.OrderHed.OrderDate)=YEAR(CURDATE())
GROUP BY
Pub.OrderDtl.PartNum
) AS YTD ON Part.PartNum = YTD.PartNum AND Cust.CustNum = YTD.CustNum