Remove duplicate columns from Query result - sql

Please help me!! Newby with Sql queries
Select *
from(
select EmpID,
sum(IncomeTax) as TaxAmount,
sum(bsalary) as SalaryAmount
from PayrollHistory Pay
group by EmpID
) cumSalary
Right JOIN (
Select PayrollHistory.EmpID,
(select firstName +' '+coalesce(middleInitial,' ')+' '+ lastName
from Employee
where Employee.EmpID=PayrollHistory.EmpID)as name,
PayrollHistory.IncomeTax,
(PayrollHistory.bsalary+sum(ISNULL(Allw.amount,0)))totalTaxableSUM
from PayrollHistory
left join (
select *
from AllowanceHistory
where AllowanceHistory.taxStatus=1
) as Allw
on Allw.EmpID=PayrollHistory.EmpID and Allw.payMonth=PayrollHistory.payMonth
where PayrollHistory.payMonth=3
group by PayrollHistory.EmpID, PayrollHistory.IncomeTax, PayrollHistory.bsalary
) as tbl
on tbl.EmpID =cumSalary.EmpID
The above query result gives 2 EmpID rows that are the same. How can remove one of them and still get the same result

Instead of first Select * specify all rows that you need like:
select cumSalary.EmpID,
cumSalary.TaxAmount,
cumSalary.SalaryAmount,
tbl.name,
tbl.IncomeTax,
tbl.totalTaxableSUM
etc.

Use column name selection instead of using * , refer as below
Select cumSalary.*,PayrollHistory.name , **....etc** from(
select EmpID, sum(IncomeTax) as TaxAmount,sum(bsalary) as SalaryAmount from
PayrollHistory Pay group by EmpID
) cumSalary
Right JOIN (
Select PayrollHistory.EmpID,(select firstName +' '+coalesce(middleInitial,'
')+' '+ lastName from Employee where
Employee.EmpID=PayrollHistory.EmpID)as name,
PayrollHistory.IncomeTax,( PayrollHistory.bsalary+sum(ISNULL(Allw.amount,0)
))totalTaxableSUM
from PayrollHistory
left join (select * from AllowanceHistory where AllowanceHistory.taxStatus=1
) as Allw on
Allw.EmpID=PayrollHistory.EmpID and Allw.payMonth=PayrollHistory.payMonth
where PayrollHistory.payMonth=3
group by
PayrollHistory.EmpID,PayrollHistory.IncomeTax,PayrollHistory.bsalary
) as tbl on tbl.EmpID =cumSalary.EmpID

Related

Pulling rows with SQL MAXDATE

I'm trying to run a query from a HR table. I'm bringing in all employees and their dependents who share the same 'Primary_Key_Value'. My statement works but I'm getting duplicates because some dependents have multiple MED_COV_EFFECTIVE_DATEs. I need to bring in only the lastest or MAX date. When I try to use the MAX(MED_COV_EFFECTIVE_DATE) function, I'm getting errors. Can someone please help me?
SELECT DISTINCT PRIMARY_KEY_VALUE, RECORD_ID, LAST_NAME, FIRST_NAME, DATE_OF_BIRTH, HIRE_DATE,
RELATIONSHIP_CODE, MED_COV_EFFECTIVE_DATE, SOCIAL_SECURITY_NUMBER
FROM COVERAGE_TABLE T1
WHERE T1.PRIMARY_KEY_VALUE IN
(
SELECT T2.PRIMARY_KEY_VALUE
FROM COVERAGE_TABLE T2
WHERE T2.HIRE_DATE IS NOT NULL
)
ORDER BY PRIMARY_KEY_VALUE, RECORD_ID
Dang, wasn't thinking about inner select can only return 1 column earlier. Try something like this:
SELECT T2.PRIMARY_KEY_VALUE, MAX(T2.Med_Cov_Effective_Date)
INTO #MostRecentCoveredKeys
FROM COVERAGE_TABLE T2
WHERE T2.HIRE_DATE IS NOT NULL
GROUP BY T2.Primary_Key_Value
This should give you a unique set of Primary_Key_Values.
Or CTE Version:
; WITH MostRecentCoveredKeys
AS
SELECT T2.PRIMARY_KEY_VALUE, MAX(T2.Med_Cov_Effective_Date)
FROM COVERAGE_TABLE T2
WHERE T2.HIRE_DATE IS NOT NULL
GROUP BY T2.Primary_Key_Value
Then JOIN the original table and cte (or temp table) like so:
SELECT PRIMARY_KEY_VALUE, RECORD_ID, LAST_NAME, FIRST_NAME, DATE_OF_BIRTH,
HIRE_DATE, RELATIONSHIP_CODE, MED_COV_EFFECTIVE_DATE,
SOCIAL_SECURITY_NUMBER
FROM COVERAGE_TABLE T1
INNER JOIN MostRecentCoveredKeys mrck
ON mrck.Primary_Key_Value = T1.Primary_Key_Value
ORDER BY T1.PRIMARY_KEY_VALUE, T1.RECORD_ID
--you need to include the '#' in front of table name
--on join if using the temp table version
--DROP TABLE #MostRecentCoveredKeys
This query will fetch the latest data:
SELECT B.PRIMARY_KEY_VALUE, B.RECORD_ID, B.LAST_NAME, B.FIRST_NAME,
B.DATE_OF_BIRTH, B.HIRE_DATE, B.RELATIONSHIP_CODE,
B.MED_COV_EFFECTIVE_DATE, B.SOCIAL_SECURITY_NUMBER
FROM
(SELECT PRIMARY_KEY_VALUE, MAX(MED_COV_EFFECTIVE_DATE) MAX_DATE
FROM COVERAGE_TABLE
WHERE HIRE_DATE IS NOT NULL
GROUP BY PRIMARY_KEY_VALUE) A INNER JOIN
(SELECT * FROM COVERAGE_TABLE
WHERE HIRE_DATE IS NOT NULL) B
ON A.PRIMARY_KEY_VALUE=B.PRIMARY_KEY_VALUE AND A.MAX_DATE=B.MED_COV_EFFECTIVE_DATE;
;with a as
(
select
*
,row_number() over(partition by PRIMARY_KEY_VALUE, order by Med_Cov_Effective_Date desc) rn
FROM COVERAGE_TABLE T1
WHERE HIRE_DATE IS NOT NULL
)
select *
from a
where rn=1
ORDER BY PRIMARY_KEY_VALUE, RECORD_ID

SQL code it's looks to complicate

Test Table
create table Test (
Id integer,
Store_N varchar(25),
Department varchar(25)
);
INSERT INTO Test (Id, Store_N, Department )
Values (25,'1','A'), (67,'1','A'), (34,'1','A'), (97,'1','C'),
(21,'1','C'), (268,'1','B'), (456,'2','A'), (349,'2','A'),
(935,'2','B'), (36,'3','B'), (637,'3','B'), (388,'3','B'),
(891,'3','B'), (344,'4','A'), (763,'4','A'), (836,'4','A')
SELECT * , ROW_NUMBER() OVER( Partition BY Store_N ORDER BY Store_N ) AS AA
FROM Test;
Result is
I need to exclude all stores which have only one department and have the only DISTINCT department for each store. The result looks like this
And this is code
SELECT DISTINCT TB4.Department, TB4.Store_N
From
(
SELECT TB0.Store_N, TB0.Department FROM Test TB0
INNER JOIN
(
SELECT TB2.Store_N , Count(*) AS AA1
FROM
(
SELECT DISTINCT TB1.Department , TB1.Store_N
FROM
( SELECT * , ROW_NUMBER() OVER( Partition BY Store_N ORDER BY Store_N ) AA
FROM Test ) TB1
) TB2
group by TB2.Store_N
HAVING
COUNT(*) > 1 ) TB3
ON TB0.Store_N = TB3.Store_N
) TB4
Now the question how to simplify this code?
Thank you
You can basically do:
select store_n, department
from test
group by store_n, department;
But, you want to exclude stores that have only one department, so lets do a count:
select store_n, department
from (select store_n, department, count(*) over (partition by store_n) as cnt
from test
group by store_n, department
) t
where cnt > 1;
Here is a SQL Fiddle.
You are going a long way round to get the functionality of the "GROUP BY" clause
SELECT TB2.Store_N , TB2.Department
FROM
(
SELECT Department , Store_N, count(Id) as c
FROM Test
GROUP BY Department, Store_N) as TB2
WHERE TB2.c > 1

Sql select distinct row by a columns highest value

I am having an issue trying to select one row per city name. This is the following collection I am getting:
This is my query so far:
select pl.PlaceId,
pl.Name,
pop.NumberOfPeople,
pop.Year
from dbo.Places pl
inner join dbo.Populations pop
on pop.PlaceId = pl.PlaceId
where pop.NumberOfPeople >= 1000
and pop.NumberOfPeople <= 99999
I am trying to get it to where it only selects a city one time, but uses the most recent date. So in the above picture, I would only see Abbeville for 2016 and not 2015. I believe I need to do either a group by or do a sub query to flatten the results. If anybody has any advice on how I can handle this, it will be greatly appreciated.
Assuming you are using SQLSERVER,you can use Rownumber
;with cte
as
(select pl.PlaceId,
pl.Name,
pop.NumberOfPeople,
pop.Year,
row_number() over(partition by pl.Name order by year desc) as rownum
from dbo.Places pl
inner join dbo.Populations pop
on pop.PlaceId = pl.PlaceId
where pop.NumberOfPeople >= 1000
and pop.NumberOfPeople <= 99999
)
select * from cte where rownum=1
The following query serves the purpose.
CREATE TABLE #TEMP_TEST
(
PlaceId INT,
Name VARCHAR(50),
NumberOfPeople INT,
YEAR INT
)
INSERT INTO #TEMP_TEST
SELECT 1,'Abbeville',2603,2016
UNION
SELECT 5,'Alabester',32948,2016
UNION
SELECT 9,'Aubum',63118,2016
UNION
SELECT 1,'Abbeville',2402,2015
UNION
SELECT 5,'Alabester',67902,2017
SELECT PlaceId, Name, NumberOfPeople, YEAR FROM
(
SELECT ROW_NUMBER() OVER (PARTITION BY PlaceId ORDER BY YEAR DESC) RNO,
PlaceId, Name, NumberOfPeople, YEAR
FROM #TEMP_TEST
)T
WHERE RNO = 1
DROP TABLE #TEMP_TEST

Trying to find duplicate values in TWO rows and TWO columns - SQL Server

Using SQL Server, I'm not a DBA but I can write some general SQL. Been pulling my hair out for about an hour now. Searching I've found several solutions but they all fail due to how GROUP BY works.
I have a table with two columns that I'm trying to check for duplicates:
userid
orderdate
I'm looking for rows that have BOTH userid and orderdate as duplicates. I want to display these rows.
If I use group by, I can't pull any other data, such as the order ID, because it's not in the group by clause.
You could use the grouped query in a subquery:
SELECT *
FROM mytable a
WHERE EXISTS (SELECT userid, orderdate
FROM mytable b
WHERE a.userid = b.userid AND a.orderdate = b.orderdate
GROUP BY userid, orderdate
HAVING COUNT(*) > 1)
You can also use a windowed function:
; With CTE as
(Select *
, count(*) over (partition by UserID, OrderDate) as DupRows
from MyTable)
Select *
from CTE
where DupRows > 1
order by UserID, OrderDate
You can get the duplicates by using the groupby and having. Like so:
SELECT
userid,orderdate, COUNT(*)
FROM
yourTable
GROUP BY
userid,orderdate
HAVING
COUNT(*) > 1
EDIT:
SELECT * FROM yourTable
WHERE CONCAT(userid,orderdate) IN
(
SELECT
CONCAT(userid,orderdate)
FROM
yourTable
GROUP BY
userid,orderdate
HAVING
COUNT(*) > 1
)
SELECT *
FROM myTable
WHERE CAST(userid as Varchar) + '/' + CONVERT(varchar(10),orderdate,103) In
(
SELECT
CAST(userid as Varchar) + '/' + CONVERT(varchar(10),orderdate,103)
FROM myTable
GROUP BY userid , orderdate
HAVING COUNT(*) > 1
);

Simple query to include all columns based upon the MAX of one

I've read through a lot of other articles and at this point I think I'm just beating my head against a wall. How would I replace this statement:
SELECT * FROM EmployeeInformation
I want to see all columns in table EmployeeInformation with ONLY the most recent RateChangeDate. I've tried MAX(RateChangeDate). An acceptable output would replace the current Walters, Rob rows by:
Walters, Rob Senior Tool Designer 29.8462 2011-12-15 00:00:00.000 rob0#adventure-works.com
MSSQL Query
Select * from EmployeeInformation
Where RateChangeDate =
(Select Max(RateChangeDate)
from EmployeeInformation)
latest per employee:
Select * from EmployeeInformation e
Where RateChangeDate =
(Select Max(RateChangeDate)
from EmployeeInformation
where emplyeeId = e.emplyeeId )
You can use
SELECT *
FROM EmployeeInformation
WHERE RateChangeDate = (SELECT TOP 1 MAX(RateChangeDate)
FROM EmployeeInformation)
you can use row_number as below:
Select top(1) with ties * from EmployeeInformation
Order by Row_Number() over(partition by [Name] order by RateChangeDate Desc)
With EmpInfo AS
(
SELECT lastname + ', ' + firstname as fullname, jobtitle, rate, ratechangedate, emailaddress,
ROW_NUMBER() OVER (PARTITION BY lastname + ', ' + firstname BY ORDER BY RateChangeDate DESC) as RN
--- etc all joins
)
SELECT *
FROM EmpInfo
WHERE RN = 1
You can use correlated subqueries.
Something like this
SELECT *
FROM tablename
WHERE fullname IN (SELECT fullname
FROM tablename
WHERE Ratechangedate IN (SELECT MAX(Ratechangedate)
FROM tablename))
Inner most select selects your max date column, then it selects the fullname column for this max date and then it selects all rows with this fullname.