SQL select distinct from a concatenated column - sql-server-2016

This query almost does what I want
SELECT staging.dbo.ITEM_CODES.ITEM_CODE, MAX(dbo.OC_VDAT_AUX.UDL40) AS SAMPLEDATE,
CONCAT(RTRIM(dbo.OC_VDATA.UDL1), RTRIM(dbo.OC_VDATA.UDL6)) as LinkID
FROM dbo.OC_VDATA
INNER JOIN dbo.OC_VDAT_AUX ON dbo.OC_VDATA.PARTNO = dbo.OC_VDAT_AUX.PARTNOAUX AND dbo.OC_VDATA.DATETIME = dbo.OC_VDAT_AUX.DATETIMEAUX
INNER JOIN stagingPLM.dbo.ITEM_CODES ON LEFT(dbo.OC_VDATA.PARTNO, 12) = staging.dbo.ITEM_CODES.SPEC_NO
AND LEFT(dbo.OC_VDAT_AUX.PARTNOAUX, 12) = stagingPLM.dbo.ITEM_CODES.SPEC_NO
INNER JOIN stagingPLM.dbo.PLANTS ON dbo.OC_VDATA.UDL1 = staging.dbo.PLANTS.PLANT_CODE
WHERE (CONVERT(DATETIME, dbo.OC_VDAT_AUX.UDL40) > DATEADD(day, - 30, GETDATE()))
GROUP BY CONCAT(RTRIM(dbo.OC_VDATA.UDL1), RTRIM(dbo.OC_VDATA.UDL6)),staging.dbo.ITEM_CODES.ITEM_CODE
Sample Table generated by query:
The end result that I am trying to achieve is the latest ITEM_CODE per unique LinkID Note the first and last rows in the table. The last row should not be pulled by the query.
How do I modify this query to make that happen?
I have tried various placements for DISTINCT and sub queries in the select and where statements.

I would do in your case with ROW_NUMBER window function and CTE.
Solution can be like this:
WITH FilterCTE AS
(
SELECT staging.dbo.ITEM_CODES.ITEM_CODE, MAX(dbo.OC_VDAT_AUX.UDL40) AS SAMPLEDATE,
CONCAT(RTRIM(dbo.OC_VDATA.UDL1), RTRIM(dbo.OC_VDATA.UDL6)) AS LinkID,
ROW_NUMBER() OVER (PARTITION BY CONCAT(RTRIM(dbo.OC_VDATA.UDL1), RTRIM(dbo.OC_VDATA.UDL6)) ORDER BY MAX(dbo.OC_VDAT_AUX.UDL40)) AS RowNumber
FROM dbo.OC_VDATA
INNER JOIN dbo.OC_VDAT_AUX ON dbo.OC_VDATA.PARTNO = dbo.OC_VDAT_AUX.PARTNOAUX AND dbo.OC_VDATA.DATETIME = dbo.OC_VDAT_AUX.DATETIMEAUX
INNER JOIN stagingPLM.dbo.ITEM_CODES ON LEFT(dbo.OC_VDATA.PARTNO, 12) = staging.dbo.ITEM_CODES.SPEC_NO
AND LEFT(dbo.OC_VDAT_AUX.PARTNOAUX, 12) = stagingPLM.dbo.ITEM_CODES.SPEC_NO
INNER JOIN stagingPLM.dbo.PLANTS ON dbo.OC_VDATA.UDL1 = staging.dbo.PLANTS.PLANT_CODE
WHERE (CONVERT(DATETIME, dbo.OC_VDAT_AUX.UDL40) > DATEADD(day, - 30, GETDATE()))
GROUP BY CONCAT(RTRIM(dbo.OC_VDATA.UDL1), RTRIM(dbo.OC_VDATA.UDL6)),staging.dbo.ITEM_CODES.ITEM_CODE
)
SELECT *
FROM FilterCTE
WHERE RowNumber = 1

Related

Left Join without duplicate values from the left table when multiple values in right table

The Following query brings back a list of IDs and a list of times. It's bringing back multimple from the Left table because it appears multiple in the right table.
It Currently brings back this:
I want it to bring back the minimum value from the right table for each ID in the left table.
Any help appreciated!
set language british
SELECT dbo.Employee.EmployeeID, SUBSTRING(CONVERT(varchar, ClockTemp.ClockTime, 108), 1, 5) as ClockTime
FROM dbo.Employee LEFT JOIN
(
Select [ClockID]
,[EmployeeID]
,[ClockTypeID]
,[ClockDate]
,[ClockTime]
FROM dbo.Clock
WHERE dbo.Clock.ClockDate = DATEADD(DD, 0, CAST('07-11-2016' AS DATE)) AND Clock.ClockTypeID=1
) As ClockTemp
ON dbo.Employee.EmployeeID = ClockTemp.EmployeeID
ORDER BY dbo.Employee.EmployeeID,clocktemp.ClockTime
Maybe this query would work for you. You do not need to order by Clocktime as the minimum value will be brought through from the MIN aggregate.
I would also change the Clocktime field to TIME if you are using SQL 2008 so that you can avoid the substring convert.
set language british
SELECT dbo.Employee.EmployeeID, SUBSTRING(CONVERT(varchar, MIN(ClockTemp.ClockTime), 108), 1, 5) as ClockTime
FROM dbo.Employee LEFT JOIN
(
Select [ClockID]
,[EmployeeID]
,[ClockTypeID]
,[ClockDate]
,[ClockTime]
FROM dbo.Clock
WHERE dbo.Clock.ClockDate = DATEADD(DD, 0, CAST('07-11-2016' AS DATE)) AND Clock.ClockTypeID=1
) As ClockTemp
ON dbo.Employee.EmployeeID = ClockTemp.EmployeeID
GROUP BY dbo.Employee.EmployeeID
ORDER BY dbo.Employee.EmployeeID
Try that:
WITH minClockTimeCTE AS (
SELECT EmployeeID, min(ClockTime) as ClockTime
FROM ClockTemp
GROUP BY EmployeeID
)
SELECT Employee.EmployeeID, minClockTimeCTE.ClockTime
FROM Employee
LEFT OUTER JOIN minClockTimeCTE
ON Employee.EmployeeID = minClockTimeCTE.EmployeeID
You could change the left join to an outer apply top 1:
SELECT dbo.Employee.EmployeeID,
SUBSTRING(CONVERT(varchar, ClockTemp.ClockTime, 108), 1, 5) as ClockTime
FROM dbo.Employee
OUTER APPLY
(
SELECT TOP 1 [ClockID]
,[EmployeeID]
,[ClockTypeID]
,[ClockDate]
,[ClockTime]
FROM dbo.Clock
WHERE dbo.Clock.ClockDate = DATEADD(DD, 0, CAST('07-11-2016' AS DATE))
AND Clock.ClockTypeID=1
AND dbo.Employee.EmployeeID = ClockTemp.EmployeeID
ORDER BY ClockTime
) As ClockTemp
ORDER BY dbo.Employee.EmployeeID,clocktemp.ClockTime

How do I remove certain duplicates in a complex SQL query

I am writing a query and need it to Remove all duplicates of a.GenUserID but also keep the most recent login date ( that is b.LogDateTime) but this date must be older than 6 months. If there are later dates, they have to be removed.
I hope this makes sense.
SELECT DISTINCT
a.GenUserID,
c.DeletionDate,
b.LogDateTime,
(CASE c.Disabled WHEN 0 THEN 'NO' else 'YES - ARCHIVED' end)
FROM RioReport.dbo.GenUser a
LEFT JOIN dbo.GenUserArchive c on a.GenUserID = c.GenUserID
LEFT JOIN dbo.GenUserAccessHistory b on a.GenUserID = b.ExtraInfo
WHERE(a.Disabled=0 or c.Disabled=0)
AND c.DeletionDate IS NOT NULL
AND ((DateAdd(MM, -6, GetDate()) > b.LogDateTime or b.LogDateTime IS NULL))
ORDER BY a.GenUserID, b.LogDateTime desc
You could add the row_number() information to your query, and wrap that query into an outer query that just takes the records with number 1 from that result:
select *
from (
select a.GenUserID,
c.DeletionDate,
b.LogDateTime,
case c.Disabled when 0 then 'NO' else 'YES - ARCHIVED' end as diabled,
row_number() over (partition by a.GenUserID
order by b.LogDateTime desc) as rn
from RioReport.dbo.GenUser a
inner join dbo.GenUserArchive c
on a.GenUserID = c.GenUserID
left join dbo.GenUserAccessHistory b
on a.GenUserID = b.ExtraInfo
where (a.Disabled=0 or c.Disabled=0)
and c.DeletionDate is not null
and (DateAdd(MM, -6, GetDate()) > b.LogDateTime or b.LogDateTime is null)
)
where rn = 1
order by a.GenUserID
Note that you can turn the first left join into an inner join without any change to the result set, since you have a non-null check on one of its fields. inner join is then preferred, and might give a performance improvement.
If GenUserAccessHistory.LogDateTime is always non-null, then you can avoid the test or b.LogDateTime is null by moving the DateAdd(MM, -6, GetDate()) > b.LogDateTime condition to the appropriate join on clause.
The generated row number will be given in order of descending LogDateTime values, and restart from 1 for every different user.
Alternative without window functions
row_number() and other window functions are supported since SQL Server 2008. In comments you write you cannot use it. If that is the case, here is an alternative using a common table expression (supported since SQL Server 2005):
;with cte as (
select a.GenUserID,
c.DeletionDate,
b.LogDateTime,
case c.Disabled when 0 then 'NO' else 'YES - ARCHIVED' end as disabled,
from RioReport.dbo.GenUser a
inner join dbo.GenUserArchive c
on a.GenUserID = c.GenUserID
left join dbo.GenUserAccessHistory b
on a.GenUserID = b.ExtraInfo
where (a.Disabled=0 or c.Disabled=0)
and c.DeletionDate is not null
and (DateAdd(MM, -6, GetDate()) > b.LogDateTime or b.LogDateTime is null)
)
select *
from cte main
where LogDateTime is null
or not exists (select 1
from cte sub
where sub.GenUserID = main.GenUserID
and sub.LogDateTime > main.LogDateTime)
order by GenUserID
Try with the below query.
;WITH CTE_Group
AS(
SELECT
ROW_NUMBER() OVER (PARTITION BY a.GenUserID ORDER BY b.LogDateTime DESC) as RNO,
a.GenUserID,
c.DeletionDate,
b.LogDateTime,
(CASE c.Disabled WHEN 0 THEN 'NO' else 'YES - ARCHIVED' end) IsArchived
FROM RioReport.dbo.GenUser a
LEFT JOIN dbo.GenUserArchive c on a.GenUserID = c.GenUserID
LEFT JOIN dbo.GenUserAccessHistory b on a.GenUserID = b.ExtraInfo
WHERE(a.Disabled=0 or c.Disabled=0)
AND c.DeletionDate IS NOT NULL
AND ((DateAdd(MM, -6, GetDate()) > b.LogDateTime or b.LogDateTime IS NULL)))
SELECT GenUserID,
DeletionDate,
LogDateTime,
IsArchived
FROM WITH_CTE_Group
WHERE RNO=1
Use cte and window function
;with ctr as (
select a.GenUserID, a.DeletionDate, a.LogDateTime
row_number()over(partition by a.GenUserID order by b.LogDateTime desc) rnk
from RioReport.dbo.GenUser a )
select a.GenUserID, a.DeletionDate, a.LogDateTime,
CASE WHEN DATEDIFF(mm,LogDateTime,getdate())<6 THEN 'NO' else 'YES - ARCHIVED' end)
from ctr a where a.rnk=1

Creating a Month Bucket from Query Result

I have the following query result:
But I want to group the results by number, and categorize the quantity by month, so the resulting output would be:
How can I create a month bucket to pick up the date and make it a column and place the values accordingly. Here is my attempt so far:
select converT(varchar,year(NewShipDate)) + 'M' + MonthBucket,
*
from(
select ItemNumber, des.[Product Family], ItemDescription, des.Country, month(RequestedShipDate) as 'Month', ItemOrderedQuantity
from FS_COLine co
join FS_Item it on co.ItemKey = it.ItemKey
left outer join FIN_PLANCAP_2..ProductDescriptions des on it.ItemNumber collate database_default = des.RICNum
where RequestedShipDate >= convert(datetime, '01/01/2016', 120) and
RequestedShipDate <= convert(datetime, '12/31/2016', 120) and
isNumeric(it.ItemNumber) = 1 and
isNumeric(des.RICNum) = 1
--and it.ItemNumber = 35191305
group by ItemNumber, des.[Product Family], ItemDescription, des.Country, month(RequestedShipDate), ItemOrderedQuantity
order by ItemNumber
)A

I have two tables with common Quote_No and I need to sum Qty in Quote_Items with Required by Date in Table Quotes

I am trying to get the sum of "Qty" in a Table A called "Quote_Items" based on a "Required_by_Date" from Table B called Quotes. Both tables have a common "Quote_No" The required date is one month ago.
I have used the following but it produces a NULL, but I cannot see why
select sum(Qty)
from quotes.Quote_Items_Quantities
left outer join quotes.Quotes on (Quote_Required_by_Date = Qty)
WHERE (DatePart(yy, Quote_Required_by_Date) = DatePart(yy, DateAdd(m,1,getdate()))) and
datepart(m,Quote_Required_by_Date) = datepart(m,dateadd(m,1,getdate()))
Any suggestions what I am doing wrong here.
Try this:
SELECT SUM(i.Qty)
FROM Quote_Items i
JOIN Quotes q on i.Quote_No = q.Quote_No
AND CONVERT(varchar, q.Required_by_Date, 112) = CONVERT(varchar, DATEADD(month, -1, getdate()), 112)
or this (equivalent without using JOIN)
SELECT SUM(i.Qty)
FROM Quote_Items i
WHERE EXISTS(SELECT 1 FROM Quotes WHERE Quote_No = i.Quote_No AND CONVERT(varchar, Required_by_Date, 112) = CONVERT(varchar, DATEADD(month, -1, getdate()), 112))
Your query is producing NULL because of the join condition. You have
on Quote_Required_by_Date = Qty
However, the date is not going to match the quantity. Instead, you need to match on the Quote_No, according to your question:
select sum(Qty)
from quotes.Quote_Items_Quantities qiq left outer join
quotes.Quotes q
on q.Quote_Required_by_Date = qiq.Qty
WHERE (DatePart(yy, Quote_Required_by_Date) = DatePart(yy, DateAdd(m,1,getdate()))) and
datepart(m,Quote_Required_by_Date) = datepart(m,dateadd(m,1,getdate()));
You can also simplify your query by using the month() and year() functions:
select sum(Qty)
from quotes.Quote_Items_Quantities qiq left outer join
quotes.Quotes q
on q.Quote_Required_by_Date = qiq.Qty
WHERE year(Quote_Required_by_Date) = year(DateAdd(m, 1, getdate()) and
month(Quote_Required_by_Date) = month(dateadd(m,1,getdate());
Finally, you mind find it useful to use group by and get the results for many months:
select year(Quote_Required_by_Date), month(Quote_Required_by_Date), sum(Qty)
from quotes.Quote_Items_Quantities qiq left outer join
quotes.Quotes q
on q.Quote_Required_by_Date = qiq.Qty
group by year(Quote_Required_by_Date), month(Quote_Required_by_Date)
order by 1 desc, 2 desc;

SQL subquery question

I have the following SQL
SELECT
Seq.UserSessionSequenceID,
Usr.SessionGuid,
Usr.UserSessionID,
Usr.SiteID,
Seq.Timestamp,
Seq.UrlTitle,
Seq.Url
FROM
tblUserSession Usr
INNER JOIN
tblUserSessionSequence Seq ON Usr.UserSessionID = Seq.UserSessionID
WHERE
(Usr.Timestamp > DATEADD(mi, -45, GETDATE())) AND (Usr.SiteID = 15)
ORDER BY Usr.Timestamp DESC
Pretty simple stuff. There are by nature multiple UserSessionIDs rows in tblUserSessionSequence. I ONLY want to return the latest (top 1) row with unique UserSessionID. How do I do that?
You can use the windowing function ROW_NUMBER to number the rows for each user and select only those rows that have row number 1.
SELECT
UserSessionSequenceID,
SessionGuid,
UserSessionID,
SiteID,
Timestamp,
UrlTitle,
Url
FROM (
SELECT
Seq.UserSessionSequenceID,
Usr.SessionGuid,
Usr.UserSessionID,
Usr.SiteID,
Usr.Timestamp AS UsrTimestamp,
Seq.Timestamp,
Seq.UrlTitle,
Seq.Url,
ROW_NUMBER() OVER (PARTITION BY Usr.UserSessionID
ORDER BY Seq.UserSessionSequenceID DESC) AS rn
FROM
tblUserSession Usr
INNER JOIN
tblUserSessionSequence Seq ON Usr.UserSessionID = Seq.UserSessionID
WHERE
(Usr.Timestamp > DATEADD(mi, -45, GETDATE())) AND (Usr.SiteID = 15)
) T1
WHERE rn = 1
ORDER BY UsrTimestamp DESC
If you're looking to return only a single row in your query (i.e., the ID with the latest timestamp), just change
SELECT
to
SELECT TOP 1
If you're looking to obtain a single row for each UserSessionID, but you want to ensure that you get the one with the latest TimeStamp, that's slightly more complex.
You could do something like this:
SELECT
Seq.UserSessionSequenceID,
Usr.SessionGuid,
Usr.UserSessionID,
Usr.SiteID,
Seq.Timestamp,
Seq.UrlTitle,
Seq.Url
FROM
tblUserSession Usr
INNER JOIN
(SELECT
UserSessionSequenceID,
UserSessionID,
Timestamp,
UrlTitle,
Url,
ROW_NUMBER() over (PARTITION BY UserSessionID ORDER BY UserSessionSequenceID) AS nbr
FROM tblUserSessionSequence) Seq ON Usr.UserSessionID = Seq.UserSessionID AND Seq.nbr = 0
WHERE
(Usr.Timestamp > DATEADD(mi, -45, GETDATE())) AND (Usr.SiteID = 15)
ORDER BY Usr.Timestamp DESC