Simple query to include all columns based upon the MAX of one - sql

I've read through a lot of other articles and at this point I think I'm just beating my head against a wall. How would I replace this statement:
SELECT * FROM EmployeeInformation
I want to see all columns in table EmployeeInformation with ONLY the most recent RateChangeDate. I've tried MAX(RateChangeDate). An acceptable output would replace the current Walters, Rob rows by:
Walters, Rob Senior Tool Designer 29.8462 2011-12-15 00:00:00.000 rob0#adventure-works.com
MSSQL Query

Select * from EmployeeInformation
Where RateChangeDate =
(Select Max(RateChangeDate)
from EmployeeInformation)
latest per employee:
Select * from EmployeeInformation e
Where RateChangeDate =
(Select Max(RateChangeDate)
from EmployeeInformation
where emplyeeId = e.emplyeeId )

You can use
SELECT *
FROM EmployeeInformation
WHERE RateChangeDate = (SELECT TOP 1 MAX(RateChangeDate)
FROM EmployeeInformation)

you can use row_number as below:
Select top(1) with ties * from EmployeeInformation
Order by Row_Number() over(partition by [Name] order by RateChangeDate Desc)

With EmpInfo AS
(
SELECT lastname + ', ' + firstname as fullname, jobtitle, rate, ratechangedate, emailaddress,
ROW_NUMBER() OVER (PARTITION BY lastname + ', ' + firstname BY ORDER BY RateChangeDate DESC) as RN
--- etc all joins
)
SELECT *
FROM EmpInfo
WHERE RN = 1

You can use correlated subqueries.
Something like this
SELECT *
FROM tablename
WHERE fullname IN (SELECT fullname
FROM tablename
WHERE Ratechangedate IN (SELECT MAX(Ratechangedate)
FROM tablename))
Inner most select selects your max date column, then it selects the fullname column for this max date and then it selects all rows with this fullname.

Related

Get all items with min values SQL Server

Here's what the table is like:
----------------------------------
EmployeeId Tasks_Count
1 1
2 1
3 2
4 1
5 3
I need a query to get all employees with min tasks count. Result should be like this:
---------------
EmployeeId
1
2
4
The problem is that i using a subquery to count tasks. Here's my code
SELECT *
FROM (SELECT EmployeeId,
COUNT(*) AS Tasks_count
FROM Tasks
INNER JOIN Status ON Tasks.StatusId=Status.Id
WHERE Status.Name != 'Closed'
GROUP BY EmployeeId
ORDER BY Tasks_count DESC) AS Employee_not_closed
WHERE Tasks_count IN (SELECT MIN(Tasks_count)
FROM Employee_not_closed)
Use FETCH FIRST WITH TIES:
select EmployeeId
from tablename
order by Tasks_Count
fetch first 1 row with ties
You can try below -
select * from tablename
where Tasks_Count in (select min(Tasks_Count) from tablename)
It can also be done using RANK() function like following.
;with cte as
(
select Employeeid, rank() over( order by Tasks_Count) rn
from #table
)
select * from cte where rn=1
You Can use the below code i have tested the code and its working fine.
select EmployeeId from StackOverFlow_3 where Tasks_Count in(select min(Tasks_Count) from StackOverFlow_3)
You can use a join on subquery
select m.EmployeeId
from my_table m
inner join
(
select min(task_count) min_task
from my_table
) t on t.min_task = m.task_count

Trying to find duplicate values in TWO rows and TWO columns - SQL Server

Using SQL Server, I'm not a DBA but I can write some general SQL. Been pulling my hair out for about an hour now. Searching I've found several solutions but they all fail due to how GROUP BY works.
I have a table with two columns that I'm trying to check for duplicates:
userid
orderdate
I'm looking for rows that have BOTH userid and orderdate as duplicates. I want to display these rows.
If I use group by, I can't pull any other data, such as the order ID, because it's not in the group by clause.
You could use the grouped query in a subquery:
SELECT *
FROM mytable a
WHERE EXISTS (SELECT userid, orderdate
FROM mytable b
WHERE a.userid = b.userid AND a.orderdate = b.orderdate
GROUP BY userid, orderdate
HAVING COUNT(*) > 1)
You can also use a windowed function:
; With CTE as
(Select *
, count(*) over (partition by UserID, OrderDate) as DupRows
from MyTable)
Select *
from CTE
where DupRows > 1
order by UserID, OrderDate
You can get the duplicates by using the groupby and having. Like so:
SELECT
userid,orderdate, COUNT(*)
FROM
yourTable
GROUP BY
userid,orderdate
HAVING
COUNT(*) > 1
EDIT:
SELECT * FROM yourTable
WHERE CONCAT(userid,orderdate) IN
(
SELECT
CONCAT(userid,orderdate)
FROM
yourTable
GROUP BY
userid,orderdate
HAVING
COUNT(*) > 1
)
SELECT *
FROM myTable
WHERE CAST(userid as Varchar) + '/' + CONVERT(varchar(10),orderdate,103) In
(
SELECT
CAST(userid as Varchar) + '/' + CONVERT(varchar(10),orderdate,103)
FROM myTable
GROUP BY userid , orderdate
HAVING COUNT(*) > 1
);

How to select the latter row in SQL

I have a result set that looks like this:
As you can see some of the contactID are repeated with same QuestionResponse. And there is one with a different QuestionResponse (the one with red lines).
I want to group this by ContactID, but select the latter row. Eg: In case of ContactID = 78100299, I want to select the row with CreateDate = 17:00:44.907 (or rowNum = 2).
I have tried this:
select
ContactID,
max(QuestionResponse) as QuestionResponse,
max(CreateDate) as CreateDate
from
theResultSet
group by
ContactID
This will NOT work because there could be QuestionResponse 2 and then 1 for the same contactID. In that case the latter one will be the one with response 1 not 2.
Thank you for you help.
I would use ROW_NUMBER() that way:
WITH Query AS
(
SELECT rowNum, ContactID, QuestionResponse, CreateDate,
ROW_NUMBER() OVER (PARTITION BY ContactID ORDER BY CreateDate DESC) Ordered
FROM theResultSet
)
SELECT * FROM Query WHERE Ordered=1
Assign numbers in ContactID group by date, descending
Filter results having number <> 1
This might work if your SQL Engine can handle it...
SELECT trs1.*
FROM theResultSet trs1
INNER JOIN
(SELECT ContactID, max(CreateDate) as CreateDate
FROM theResultSet
GROUP BY ContactID) trs2
ON trs1.ContactID = trs2.ContactID
AND trs1.CreateDate = trs2.CreateDate
The end result will be all rows from theResultSet where the creation date is the max creation date.
This should work too:
SELECT
ContactID, QuestionResponse,CreateDate
FROM (
select rowNum, ContactID, QuestionResponse,CreateDate,
max(rowNum) over(partition by ContactID) as maxrow
from theResultSet
) x
WHERE rowNum=maxrow

Remove duplicate columns from Query result

Please help me!! Newby with Sql queries
Select *
from(
select EmpID,
sum(IncomeTax) as TaxAmount,
sum(bsalary) as SalaryAmount
from PayrollHistory Pay
group by EmpID
) cumSalary
Right JOIN (
Select PayrollHistory.EmpID,
(select firstName +' '+coalesce(middleInitial,' ')+' '+ lastName
from Employee
where Employee.EmpID=PayrollHistory.EmpID)as name,
PayrollHistory.IncomeTax,
(PayrollHistory.bsalary+sum(ISNULL(Allw.amount,0)))totalTaxableSUM
from PayrollHistory
left join (
select *
from AllowanceHistory
where AllowanceHistory.taxStatus=1
) as Allw
on Allw.EmpID=PayrollHistory.EmpID and Allw.payMonth=PayrollHistory.payMonth
where PayrollHistory.payMonth=3
group by PayrollHistory.EmpID, PayrollHistory.IncomeTax, PayrollHistory.bsalary
) as tbl
on tbl.EmpID =cumSalary.EmpID
The above query result gives 2 EmpID rows that are the same. How can remove one of them and still get the same result
Instead of first Select * specify all rows that you need like:
select cumSalary.EmpID,
cumSalary.TaxAmount,
cumSalary.SalaryAmount,
tbl.name,
tbl.IncomeTax,
tbl.totalTaxableSUM
etc.
Use column name selection instead of using * , refer as below
Select cumSalary.*,PayrollHistory.name , **....etc** from(
select EmpID, sum(IncomeTax) as TaxAmount,sum(bsalary) as SalaryAmount from
PayrollHistory Pay group by EmpID
) cumSalary
Right JOIN (
Select PayrollHistory.EmpID,(select firstName +' '+coalesce(middleInitial,'
')+' '+ lastName from Employee where
Employee.EmpID=PayrollHistory.EmpID)as name,
PayrollHistory.IncomeTax,( PayrollHistory.bsalary+sum(ISNULL(Allw.amount,0)
))totalTaxableSUM
from PayrollHistory
left join (select * from AllowanceHistory where AllowanceHistory.taxStatus=1
) as Allw on
Allw.EmpID=PayrollHistory.EmpID and Allw.payMonth=PayrollHistory.payMonth
where PayrollHistory.payMonth=3
group by
PayrollHistory.EmpID,PayrollHistory.IncomeTax,PayrollHistory.bsalary
) as tbl on tbl.EmpID =cumSalary.EmpID

Performance tuning of this query?

I have below query to support employee pagination sorted by employee name
SELECT rowNumAlias
,Employee.employeeId
,Employee.NAME
FROM (
SELECT row_number() OVER (
ORDER BY Employee.NAME ASC
) rowNumAlias
,employeeId
,NAME
FROM Employee
) employeeData
INNER JOIN Employee ON Employee.employeeId = employeeData.employeeId
WHERE rowNumAlias BETWEEN ? AND ?
Where parameter rowNumAlias can be any integer number between 1 and 100
This query is taking around 7 seconds on my sql server database having 1 million records. Is there a way i can minimize query execution time ?
You can try like this:
SELECT * FROM (
SELECT (SELECT row_number() OVER (ORDER BY e2.NAME ASC) FROM Employee e2 WHERE Employee.employeeId = E2.employeeId) rowNumAlias,
,Employee.employeeId
,Employee.NAME
FROM Employee
) e3 WHERE e3.rowNumAlias BETWEEN ? AND ?
You can try to use CTE for this.
;WITH employeeData as
(
SELECT row_number() OVER (ORDER BY Employee.NAME ASC) rowNumAlias,
employeeId,
NAME
FROM Employee
)
SELECT employeeData.rowNumAlias,
employeeData.employeeId,
employeeData.NAME
FROM employeeData
INNER JOIN Employee ON Employee.employeeId = employeeData.employeeId
WHERE rowNumAlias BETWEEN ? AND ?
If you are on SQL Server 2012+ you can use the ORDER BY ... OFFSET ... FETCH ...syntax to do pagination:
SELECT EmployeeId, Name
FROM Employee
ORDER BY Employee.Name ASC OFFSET ? ROWS FETCH NEXT ? ROWS ONLY
OFFSET specifies how many rows to skip and FETCH NEXT how many to get. See the documentation.
In earlier versions you should be able to get better performance by using a common table expression (with proper indexes this query looks like it executes in halt the time of your original according to my execution plan):
;With cte (rownum, employeeid, name) as (
SELECT
rownum = row_number() OVER (ORDER BY employee.name ASC),
employeeid,
name
FROM employee
)
SELECT rownum, employeeid, name
FROM cte
WHERE rownum BETWEEN ? AND ?;