Avoid inserting duplicates when working with composite primary key - sql

I'm writing a query in SQL that transfers data from an Acces DB to a new SQL server management DB.
This transfer is a one time only, so I do not have to worry about the code being super generic.
I'm trying to insert data from a couple of tables (including tables from the Acces DB), but get an error about duplicates on the composite primary key.
My problem is that I can't see why my uses of distinct does not prevent those duplicates.
The table I want to insert into is constructed as so:
The code I'm trying to execute is written as followed:
-- Insert into ComponentSupplier for [Supplier 1]
insert into CDB2020.dbo.ComponentSupplier
(
Supplier_ID,
BK_ID,
Part_ID,
Datasheet,
LF,
Preferred
)
select distinct s.Supplier_ID, c.BK_ID, a.[Supplier 1 order no], a.[PDF Data 1 sheet link], a.[Supplier 1 LF], '1'
from AccessDataMigration_1.dbo.[B-K Data] as a
inner join CDB2020.dbo.Components as c on c.BK_ID = a.[B-K no]
inner join CDB2020.dbo.Suppliers as s on s.Name = a.[Supplier 1]
-- Insert into ComponentSupplier for [Supplier 2]
insert into CDB2020.dbo.ComponentSupplier
(
Supplier_ID,
BK_ID,
Part_ID,
Datasheet,
LF,
Preferred
)
select distinct s.Supplier_ID, c.BK_ID, a.[Supplier 2 order no], a.[PDF Data 2 sheet link], a.[Supplier 2 LF], '0'
from AccessDataMigration_1.dbo.[B-K Data] as a
inner join CDB2020.dbo.Components as c on c.BK_ID = a.[B-K no]
inner join CDB2020.dbo.Suppliers as s on s.Name = a.[Supplier 2]
-- Insert into ComponentSupplier for [Supplier 3]
insert into CDB2020.dbo.ComponentSupplier
(
Supplier_ID,
BK_ID,
Part_ID,
Datasheet,
LF,
Preferred
)
select distinct s.Supplier_ID, c.BK_ID, a.[Supplier 3 order no], a.[PDF Data 3 sheet link], a.[Supplier 3 LF], '0'
from AccessDataMigration_1.dbo.[B-K Data] as a
inner join CDB2020.dbo.Components as c on c.BK_ID = a.[B-K no]
inner join CDB2020.dbo.Suppliers as s on s.Name = a.[Supplier 3]
-- Insert into ComponentSupplier for [Supplier 4]
insert into CDB2020.dbo.ComponentSupplier
(
Supplier_ID,
BK_ID,
Part_ID,
Datasheet,
LF,
Preferred
)
select distinct s.Supplier_ID, c.BK_ID, a.[Supplier 4 order no], a.[PDF Data 4 sheet link], a.[Supplier 4 LF], '0'
from AccessDataMigration_1.dbo.[B-K Data] as a
inner join CDB2020.dbo.Components as c on c.BK_ID = a.[B-K no]
inner join CDB2020.dbo.Suppliers as s on s.Name = a.[Supplier 4]
-- Insert into ComponentSupplier for [Supplier 5]
insert into CDB2020.dbo.ComponentSupplier
(
Supplier_ID,
BK_ID,
Part_ID,
Datasheet,
LF,
Preferred
)
select distinct s.Supplier_ID, c.BK_ID, a.[Supplier 5 order no], a.[PDF Data 5 sheet link], a.[Supplier 5 LF], '0'
from AccessDataMigration_1.dbo.[B-K Data] as a
inner join CDB2020.dbo.Components as c on c.BK_ID = a.[B-K no]
inner join CDB2020.dbo.Suppliers as s on s.Name = a.[Supplier 5]
-- Insert into ComponentSupplier for [Supplier 6]
insert into CDB2020.dbo.ComponentSupplier
(
Supplier_ID,
BK_ID,
Part_ID,
Datasheet,
LF,
Preferred
)
select distinct s.Supplier_ID, c.BK_ID, a.[Supplier 6 order no], a.[PDF Data 6 sheet link], a.[Supplier 6 LF], '0'
from AccessDataMigration_1.dbo.[B-K Data] as a
inner join CDB2020.dbo.Components as c on c.BK_ID = a.[B-K no]
inner join CDB2020.dbo.Suppliers as s on s.Name = a.[Supplier 6]

The distinct de-duplicates based on all the columns in the select.
The issue could be because you are inserting record using multiple insert..selects, you could try something like this to ensure records are de-duplicated correctly.
The distinct will de-dup withing each select, and the union will de-dup across the select statements:
insert into CDB2020.dbo.ComponentSupplier
(
Supplier_ID,
BK_ID,
Part_ID,
Datasheet,
LF,
Preferred
)
select distinct s.Supplier_ID, c.BK_ID, a.[Supplier 1 order no], a.[PDF Data 1 sheet link], a.[Supplier 1 LF], '1'
from AccessDataMigration_1.dbo.[B-K Data] as a
inner join CDB2020.dbo.Components as c on c.BK_ID = a.[B-K no]
inner join CDB2020.dbo.Suppliers as s on s.Name = a.[Supplier 1]
union
select distinct s.Supplier_ID, c.BK_ID, a.[Supplier 2 order no], a.[PDF Data 2 sheet link], a.[Supplier 2 LF], '0'
from AccessDataMigration_1.dbo.[B-K Data] as a
inner join CDB2020.dbo.Components as c on c.BK_ID = a.[B-K no]
inner join CDB2020.dbo.Suppliers as s on s.Name = a.[Supplier 2]
union
select distinct s.Supplier_ID, c.BK_ID, a.[Supplier 3 order no], a.[PDF Data 3 sheet link], a.[Supplier 3 LF], '0'
from AccessDataMigration_1.dbo.[B-K Data] as a
inner join CDB2020.dbo.Components as c on c.BK_ID = a.[B-K no]
inner join CDB2020.dbo.Suppliers as s on s.Name = a.[Supplier 3]
union
select distinct s.Supplier_ID, c.BK_ID, a.[Supplier 4 order no], a.[PDF Data 4 sheet link], a.[Supplier 4 LF], '0'
from AccessDataMigration_1.dbo.[B-K Data] as a
inner join CDB2020.dbo.Components as c on c.BK_ID = a.[B-K no]
inner join CDB2020.dbo.Suppliers as s on s.Name = a.[Supplier 4]
union
select distinct s.Supplier_ID, c.BK_ID, a.[Supplier 5 order no], a.[PDF Data 5 sheet link], a.[Supplier 5 LF], '0'
from AccessDataMigration_1.dbo.[B-K Data] as a
inner join CDB2020.dbo.Components as c on c.BK_ID = a.[B-K no]
inner join CDB2020.dbo.Suppliers as s on s.Name = a.[Supplier 5]
union
select distinct s.Supplier_ID, c.BK_ID, a.[Supplier 6 order no], a.[PDF Data 6 sheet link], a.[Supplier 6 LF], '0'
from AccessDataMigration_1.dbo.[B-K Data] as a
inner join CDB2020.dbo.Components as c on c.BK_ID = a.[B-K no]
inner join CDB2020.dbo.Suppliers as s on s.Name = a.[Supplier 6]
You can use a query like this to find out duplicate records (group by all fields in your composite key):
select s.Supplier_ID, c.BK_ID, count(*)
from AccessDataMigration_1.dbo.[B-K Data] as a
inner join CDB2020.dbo.Components as c on c.BK_ID = a.[B-K no]
inner join CDB2020.dbo.Suppliers as s on s.Name = a.[Supplier 6]
group by s.Supplier_ID, c.BK_ID
having count(*) > 1

Related

Only display the day with the max profit per store

Only display the day with the max profit per store in SQL Server
For example only {Monday 5929.00 DARIEN BRONX GOODS} should only show for DARIEN BRONX GOODS
What I have so far.
select
Dim_Date.WeekDayName,
sum(dbo.DIM_PRODUCT.PRODUCT_PRICE) as [Total Profit],
DIM_D_STORE.STORE_NAME as [Store Name]
from
dbo.DIM_EMPLOYEE
inner join dbo.S_ORDER_FACT on
dbo.DIM_EMPLOYEE.EMPLOYEE_ID = dbo.S_ORDER_FACT.ORDER_EMPLOYEEID
inner join dbo.DIM_CUSTOMER on
dbo.DIM_CUSTOMER.CUSTOMERID = dbo.S_ORDER_FACT.ORDER_CUSTOMERID
inner join dbo.Dim_Date on
dbo.Dim_Date.DateKey = dbo.S_ORDER_FACT.ORDER_DATEID
inner join dbo.DIM_D_STORE on
dbo.DIM_D_STORE.STOREID = dbo.S_ORDER_FACT.ORDER_STOREID
inner join dbo.DIM_PRODUCT on
dbo.DIM_PRODUCT.PRODUCTID = dbo.S_ORDER_FACT.ORDER_PRODUCTID
group by
DIM_D_STORE.STORE_NAME,
Dim_Date.WeekDayName
order by
DIM_D_STORE.STORE_NAME,
[Total Profit] desc
offset 0 rows ;
[Output in image ]
here is one way using window functions:
select * from (
select
Dim_Date.WeekDayName,
sum(dbo.DIM_PRODUCT.PRODUCT_PRICE) as [Total Profit],
DIM_D_STORE.STORE_NAME as [Store Name],
rank() over (partition by STORE_NAME order by sum(dbo.DIM_PRODUCT.PRODUCT_PRICE) desc) rn
from
dbo.DIM_EMPLOYEE
inner join dbo.S_ORDER_FACT on
dbo.DIM_EMPLOYEE.EMPLOYEE_ID = dbo.S_ORDER_FACT.ORDER_EMPLOYEEID
inner join dbo.DIM_CUSTOMER on
dbo.DIM_CUSTOMER.CUSTOMERID = dbo.S_ORDER_FACT.ORDER_CUSTOMERID
inner join dbo.Dim_Date on
dbo.Dim_Date.DateKey = dbo.S_ORDER_FACT.ORDER_DATEID
inner join dbo.DIM_D_STORE on
dbo.DIM_D_STORE.STOREID = dbo.S_ORDER_FACT.ORDER_STOREID
inner join dbo.DIM_PRODUCT on
dbo.DIM_PRODUCT.PRODUCTID = dbo.S_ORDER_FACT.ORDER_PRODUCTID
group by
DIM_D_STORE.STORE_NAME,
Dim_Date.WeekDayName
) table where rn = 1
Wouldn't Top 1 do the job for selecting highest profit?
select Top 1,
Dim_Date.WeekDayName,
sum(dbo.DIM_PRODUCT.PRODUCT_PRICE) as [Total Profit],
DIM_D_STORE.STORE_NAME as [Store Name]
from
dbo.DIM_EMPLOYEE
inner join dbo.S_ORDER_FACT on
dbo.DIM_EMPLOYEE.EMPLOYEE_ID = dbo.S_ORDER_FACT.ORDER_EMPLOYEEID
inner join dbo.DIM_CUSTOMER on
dbo.DIM_CUSTOMER.CUSTOMERID = dbo.S_ORDER_FACT.ORDER_CUSTOMERID
inner join dbo.Dim_Date on
dbo.Dim_Date.DateKey = dbo.S_ORDER_FACT.ORDER_DATEID
inner join dbo.DIM_D_STORE on
dbo.DIM_D_STORE.STOREID = dbo.S_ORDER_FACT.ORDER_STOREID
inner join dbo.DIM_PRODUCT on
dbo.DIM_PRODUCT.PRODUCTID = dbo.S_ORDER_FACT.ORDER_PRODUCTID
group by
DIM_D_STORE.STORE_NAME,
Dim_Date.WeekDayName
order by
[Total Profit] desc,
DIM_D_STORE.STORE_NAME

using sql subtotals

I have the following select statement:
SELECT
c.compname AS [Company]
,x.Ressnavn AS [Client]
,a.jobid AS [Job Number]
,a.JobNavn AS [Job Name]
,t.TName [Task Name]
,cu.DayDate AS [Booking Date]
,cu.HrsBooked AS [Scheduled Hours]
,tr.Sale AS [Sales Value]
,(cu.HrsBooked) * (tr.Sale) AS [Total]
FROM job a
INNER JOIN jobplan jp on jp.JobId=a.jobid
INNER JOIN JobDimensions AS z ON z.jobid = a.jobid
INNER JOIN Ress AS x ON x.RessId = z.custid
INNER JOIN JobPrice AS y ON y.JobId = a.Jobid
INNER JOIN task t on t.PlanId = jp.PlanId
INNER JOIN JobPriceactivity AS w ON w.priceId = y.priceId
INNER JOIN taskres tr ON tr.TaskId = t.TaskId
INNER JOIN emp e ON e.EmpId = tr.ResId
INNER JOIN comp c ON e.compid = c.compid
INNER JOIN CapUsed AS cu ON cu.RefId = tr.TaskResId AND cu.RefType=1
INNER JOIN arpaccount AS ar ON e.empname = ar.arpaccname
AND CAST (cu.DayDate AS DATE) BETWEEN #startdate AND #enddate
WHERE c.compid = '107' AND e.EMPID >='2' AND cu.HrsBooked > '0'
GROUP BY
x.Ressnavn
,a.jobid
,a.JobNavn
,t.TName
,cu.DayDate
,cu.HrsBooked
,y.priceid
,tr.Sale
,c.compname
That gives me the following output but I would like to add in a subtotal of the column 'Total' at each change in client.
An example of what I would like to try and get to is highlighted in green on the output screen grab.
Can anyone advise on the best way to do this?
Try using GROUP BY ROLLUP:
SELECT
x.Ressnavn AS [Client],
c.compname AS [Company],
a.jobid AS [Job Number],
a.JobNavn AS [Job Name],
t.TName [Task Name],
cu.DayDate AS [Booking Date],
cu.HrsBooked AS [Scheduled Hours],
tr.Sale AS [Sales Value],
(cu.HrsBooked) * (tr.Sale) AS [Total]
FROM (...)
GROUP BY
x.Ressnavn,
ROLLUP (
c.compname,
a.jobid,
a.JobNavn,
t.TName,
cu.DayDate,
cu.HrsBooked,
y.priceid,
tr.Sale )

Multiplying 2 columns in sql but calculation is incorrect?

I have the following select statement:
SELECT c.compname AS [Company]
,e.empname AS [Employee Name]
,a.jobid AS [Job Number]
,a.JobNavn AS [Job Name]
,t.TName [Task Name]
,cu.DayDate AS [Booking Date]
,cu.HrsBooked AS [Scheduled Hours]
,x.Ressnavn AS [Client]
,tr.Sale
,SUM(cu.HrsBooked) * (tr.Sale) AS [Total]
FROM job a
INNER JOIN jobplan jp ON jp.JobId = a.jobid
INNER JOIN JobDimensions AS z ON z.jobid = a.jobid
INNER JOIN Ress AS x ON x.RessId = z.custid
INNER JOIN JobPrice AS y ON y.JobId = a.Jobid
INNER JOIN task t ON t.PlanId = jp.PlanId
INNER JOIN JobPriceactivity AS w ON w.priceId = y.priceId
INNER JOIN taskres tr ON tr.TaskId = t.TaskId
INNER JOIN emp e ON e.EmpId = tr.ResId
INNER JOIN comp c ON e.compid = c.compid
INNER JOIN CapUsed AS cu ON cu.RefId = tr.TaskResId
AND cu.RefType = 1
INNER JOIN arpaccount AS ar ON e.empname = ar.arpaccname
AND CAST(cu.DayDate AS DATE) BETWEEN #startdate
AND #enddate
WHERE e.EMPID >= '2'
AND cu.HrsBooked > '0'
GROUP BY c.compname
,e.empname
,a.jobid
,a.JobNavn
,t.TName
,cu.DayDate
,cu.HrsBooked
,x.Ressnavn
,y.priceid
,tr.Sale
But the calculation of my 'SUM' in the select is not correct as highlighted in yellow in the following extract
I was expecting something like the below highlighted in green:
Can anyone highlight where I am going wrong either in my main code or column sum?

Join results of 3 queries

i have 3 worked queries, but i don't know how to combin them in one query.
TABLES :
------------------ -------------------------- -----------------
PIECE MCARTEFIDENT MCARTEFIDLIG
------------------ -------------------------- -----------------
ET_LIBELLE : Store MFC_ETABLISSEMENT : STORE MFL_ETABLISSEMENT
GP_NUMERO : TICKET MFC_VALDISPOTHEO MFL_NBPASSAGEAPRES
------------------- --------------------------
--QUERY 1
select [et_libelle] AS [STORE NAME],COUNT([GP_NUMERO])
FROM PIECE
LEFT OUTER JOIN ETABLISS ET1 ON gp_etablissement=ET1.ET_ETABLISSEMENT
GROUP BY et_libelle
--QUERY 2
SELECT MFC_ETABLISSEMENT as [STORE NAME], COUNT (MFC_VALDISPOTHEO)
FROM MCARTEFIDENT
LEFT OUTER JOIN ETABLISS ET2 ON MFC_ETABLISSEMENT=ET2.ET_ETABLISSEMENT
GROUP BY MFC_ETABLISSEMENT
--QUERY 3
SELECT MFL_ETABLISSEMENT AS [STORE NAME], MFL_NBPASSAGEAPRES
FROM MCARTEFIDLIG
LEFT OUTER JOIN ETABLISS ET3 ON MFL_ETABLISSEMENT=ET3.ET_ETABLISSEMENT
GROUP BY MFL_ETABLISSEMENT
columns should be the result after combine :
[et_libelle] (Query 1),
COUNT([GP_NUMERO]) (Query 1),
COUNT([GP_NUMERO]) (Query 2),
MFL_NBPASSAGEAPRES (Query 3)
you could use your 3 queries as subqiery and join
select a.[STORE NAME], a.count_gp_numero, b.count_mfc_valdispotheo, c.MFL_NBPASSAGEAPRES
from (
select [et_libelle] AS [STORE NAME], COUNT([GP_NUMERO]) count_gp_numero
FROM PIECE
LEFT OUTER JOIN ETABLISS ET1 ON gp_etablissement=ET1.ET_ETABLISSEMENT
GROUP BY et_libelle
) a
left join (
SELECT MFC_ETABLISSEMENT as [STORE NAME], COUNT(MFC_VALDISPOTHEO) b.count_mfc_valdispotheo
FROM MCARTEFIDENT
LEFT OUTER JOIN ETABLISS ET2 ON MFC_ETABLISSEMENT=ET2.ET_ETABLISSEMENT
GROUP BY MFC_ETABLISSEMENT
) b on a.[STORE NAME] = b.[STORE NAME]
left join (
SELECT MFL_ETABLISSEMENT AS [STORE NAME], MFL_NBPASSAGEAPRES
FROM MCARTEFIDLIG
LEFT OUTER JOIN ETABLISS ET3 ON MFL_ETABLISSEMENT=ET3.ET_ETABLISSEMENT
GROUP BY MFL_ETABLISSEMENT
) c on a.[STORE NAME] = c.[STORE NAME]
If I had to speculate, my guess would be that you want:
select e.*,
(select count(*)
from piece p
where p.gp_etablissement = e.et_etablissement
),
(select count(*)
from MCARTEFIDENT mfc
where mfc.MFC_ETABLISSEMENT = e.et_etablissement
),
(select MFL_NBPASSAGEAPRES -- perhaps a `sum()` here???
from MCARTEFIDENT mfl
where mfl.MFL_ETABLISSEMENT = e.et_etablissement
)
from etabliss e;

SQL Server where clause order count > 0

I have a query that works but returns all students, whether they place orders or not. I just want those with orders. The query works fine until I add the where clause. How can I properly write this?
SELECT top 100 percent s.id, s.fname as [First Name], s.lname as [Last Name],
(select count(student_id) from orderX x where x.student_id=s.id) as [Order Count],
(select sum(no_attendees) from orderX x where x.student_id=s.id) as [Attendees / Participants],
(select sum(eventHours) from orderX x where x.student_id=s.id) as [Event Hours],
oc1.text as [Occupation 1], oc2.text as [Occupation 2],
oc3.text as [Occupation 3], s.OccupationOther, s.dateGraduated, s.organization, s.city, s.zip, s.st, s.county,
aud.text as [Preferred Audience], pts.text as [Plans to Share], mr.text as [Main Reason]
FROM student s
left join occupation1 oc1 on s.Occupation1 = oc1.id
left join occupation2 oc2 on s.Occupation2 = oc2.id
left join occupation3 oc3 on s.Occupation3 = oc3.id
left join audience aud on s.audience = aud.id
left join PlanToShare pts on s.PlanToShare = pts.id
left join mainReason mr on s.mainReason = mr.id
where [Order Count] > 0
For starters you could replace your where clause by
where (select count(student_id) from orderX x where x.student_id=s.id) > 0
I think no correlation needed here. Find all the aggregates in subquery and "inner" join it (inner because you are anyways going to filter out zero count rows)
select top 100 percent s.id,
s.fname as [First Name],
s.lname as [Last Name],
x.order_count as [Order Count],
x.attendess_participants as [Attendees / Participants],
x.eventHours as [Event Hours],
oc1.text as [Occupation 1],
oc2.text as [Occupation 2],
oc3.text as [Occupation 3],
s.OccupationOther,
s.dateGraduated,
s.organization,
s.city,
s.zip,
s.st,
s.county,
aud.text as [Preferred Audience],
pts.text as [Plans to Share],
mr.text as [Main Reason]
from student s
left join occupation1 oc1 on s.Occupation1 = oc1.id
left join occupation1 oc2 on s.Occupation2 = oc2.id
left join occupation1 oc3 on s.Occupation3 = oc3.id
left join audience aud on s.audience = aud.id
left join PlanToShare pts on s.PlanToShare = pts.id
left join mainReason mr on s.mainReason = mr.id
inner join (
select student_id,
count(*) as order_count,
sum(no_attendees) as attendess_participants,
sum(eventHours) as event_hours
from orderX x
group by student_id
) x on x.student_id = s.id;
You will have to use
where (select count(student_id) from orderX x where x.student_id=s.id)
instead. In sql-server you cannot use aliases in where clauses.
You cannot use a column alias ([Order count]) in the WHERE clause. Either repeat the
(select count(student_id) from orderX x where x.student_id=s.id)
in the WHERE clause as
WHERE (select count(student_id) from orderX x where x.student_id=s.id) > 0
or use a cte.