I am updating records by the following queries:
update tableA set Quantity=
(select count(*) from table B where ID=x)
where ID=x
update tableA set Quantity=
(select sum(Stock) from table C where ID=y)
where ID=y
Example(Corrected):
All the ID from tableA are divided into 2 tables: TableB and TableC. I have to update the quantity field of TableA with count of TableB ( if ID.TableA is in TableB) and update the quantity field of TableA with sun(stock) of TableC ( if ID.TableA is in TableC)
There are 500k IDs to be updated like this. I was wondering how it can be done without having to execute 500k queries.
EDIT: I am fetching the count of rows from TableB, count is not a column for TableB.
Any help will be appreciated,TIA!
The names of your tables and columns are not 100% clear to me from your question, so I'm guessing a little bit about them. Correct if needed:
update tablea a set quantity = case
when (select count(*) from tableb where b.id = a.id) is not null then
(select count(*) from tableb b where b.id = a.id)
else
(select sum(stock) from tablec c where c.id = a.id)
end
declare global temporary table tablea(id int not null, quantity int) with replace on commit preserve rows not logged;
declare global temporary table tableb(id int not null) with replace on commit preserve rows not logged;
declare global temporary table tablec(id int not null, stock int) with replace on commit preserve rows not logged;
insert into session.tablea values (1, 0), (2, 0), (3, 0), (4, 0), (5, 0), (6, 0);
insert into session.tableb values 1, 1, 1, 2, 2, 3;
insert into session.tablec values (4, 3), (5, 2), (5, 2), (5, 1), (6, 3), (6, 4);
update session.tableA a
set Quantity=coalesce(
nullif((select count(*) from session.tableb b where b.ID=a.ID), 0)
, (select sum(stock) from session.tablec c where c.ID=a.ID)
);
select * from session.tableA;
You can use a correlated subquery:
update tableA
set Quantity = (select count(*) from table B where B.ID = A.ID)
Related
DELETE a
FROM TableA a
JOIN TableB b ON a.Field1 = b.Field1 AND a.Field2 = b.Field2;
vs.
DELETE
FROM TableA
WHERE Field1 IN (
SELECT Field1
FROM TableB
) AND Field2 IN (
SELECT Field2
FROM TableB
);
The logical conditions of the two statements are different.
The first statement will delete any row in TableA if both it's Field1 and Field2 correspond to the equivalent columns of a row in TableB.
The second statement will delete any row in TableA if the value of Field1 exists in Field1 of TableB, and the value of Field2 exists in Field2 of TableB - but that doesn't have to be in the same row.
It's easy to see the difference if you change the delete to select.
Here's an example. First, create and populate sample tables (Please save us this step in your future questions):
CREATE TABLE A
(
AInt int,
AChar char(1)
);
CREATE TABLE B
(
BInt int,
BChar char(1)
);
INSERT INTO A (AInt, AChar) VALUES
(1, 'a'), (2, 'a'), (3, 'a'),
(1, 'b'), (2, 'b'), (3, 'b');
INSERT INTO B (BInt, BChar) VALUES
(1, 'a'),
(2, 'b'),
(3, 'c');
The statements (translated to select statements):
SELECT A.*
FROM A
JOIN B
ON AInt = BInt AND AChar = BChar;
SELECT *
FROM A
WHERE AInt IN (
SELECT BInt
FROM B
) AND AChar IN (
SELECT BChar
FROM B
);
Results:
AInt AChar
1 a
2 b
AInt AChar
1 a
2 a
3 a
1 b
2 b
3 b
And you can see a live demo on DB<>Fiddle
I have the following query:
Original query:
SELECT
cd1.cust_number_id, cd1.cust_number_id, cd1.First_Name, cd1.Last_Name
FROM #Customer_Data cd1
inner join #Customer_Data cd2 on
cd1.Cd_Id <> cd2.Cd_Id
and cd2.cust_number_id <> cd1.cust_number_id
and cd2.First_Name = cd1.First_Name
and cd2.Last_Name = cd1.Last_Name
inner join #Customer c1 on c1.Cust_id = cd1.cust_number_id
inner join #Customer c2 on c2.cust_id = cd2.cust_number_id
WHERE c1.cust_number <> c2.cust_number
I optimized it as follows, but there is an error in my optimization and I can't find it:
Optimized query:
SELECT cd1.cust_number_id, cd1.cust_number_id, cd1.First_Name,cd1.Last_Name
FROM (
SELECT cdResult.cust_number_id, cdResult.First_Name,cdResult.Last_Name, COUNT(*) OVER (PARTITION BY cdResult.First_Name, cdResult.Last_Name) as cnt_name_bday
FROM #Customer_Data cdResult
WHERE cdResult.First_Name IS NOT NULL
AND cdResult.Last_Name IS NOT NULL) AS cd1
WHERE cd1.cnt_name_bday > 1;
Test data:
DECLARE #Customer_Data TABLE
(
Cd_Id INT,
cust_number_id INT,
First_Name NVARCHAR(30),
Last_Name NVARCHAR(30)
)
INSERT #Customer_Data (Cd_Id,cust_number_id,First_Name,Last_Name)
VALUES (1, 22, N'Alex', N'Bor'),
(2, 22, N'Alex', N'Bor'),
(3, 23, N'Alex', N'Bor'),
(4, 24, N'Tom', N'Cruse'),
(5, 25, N'Tom', N'Cruse')
DECLARE #Customer TABLE
(
Cust_id INT,
Cust_number INT
)
INSERT #Customer (Cust_id, Cust_number)
VALUES (22, 022),
(23, 023),
(24, 024),
(25, 025)
The problem is that the original query returns 6 rows (duplicating the row). And optimized returns just duplicates, how to make the optimized query also duplicated the row?
I would suggest just using window functions:
SELECT CD.cud_customer_id
FROM (SELECT cd.*, COUNT(*) OVER (PARTITION BY cud_name, cud_birthday) as cnt_name_bday FROM dbo.customer_data cd
) cd
WHERE cnt_name_bday > 1;
Your query is finding duplicates for either name or birthday. You want duplicates with both at the same time.
You can use only one exists :
SELECT cd.cud_customer_id
FROM dbo.customer_data AS cd
WHERE EXISTS (SELECT 1
FROM dbo.customer_data AS c
WHERE c.cud_name = cd.cud_name AND c.cud_birthday = cd.cud_birthday AND c.cust_id <> cd.cud_customer_id
);
I have two tables. The first one with all movements in twelve months and the second one with claims registered in the same period of time. When I run the following query from the first table I've got 10 records. Of course, there are other records with a different number of movements (e.g.: 7, 23, 2 movements):
select t.cod_suc
,t.cod_ramo_comercial
,t.Poliza
,t.Item
,t.id_pv
from temp_portafolio_personal_accidents as t
where t.cod_suc = 2
and t.cod_ramo_comercial = 46
and t.Poliza = 50283
and t.Item = 1
and t.id_pv = 788383;
With the second query, for the second table, I have the following results:
select c.cod_suc
,c.cod_ramo_comercial
,c.[No. Policy]
,c.Item
,c.[ID Incident]
,max(c.id_pv) as id_pv
,count(distinct [No. Incident]) as 'Conteo R12'
from #claims as c
where c.[ID Incident] = 343632
group by c.cod_suc
,c.cod_ramo_comercial
,c.[No. Policy]
,c.Item
,c.[ID Incident];
Now, I need to update the first table but only one record. I'm using the following query, but all records are being updated. When I sum results I have 10 but is just one claim, as the second query shows.
update p
set [No. Siniestros R12] = b.[Conteo R12]
from temp_portafolio_personal_accidents p
left join
(select c.cod_suc
,c.cod_ramo_comercial
,c.[No. Policy]
,c.Item
,c.[ID Incident]
,max(c.id_pv) as id_pv
,count(distinct [No. Incident]) as 'Conteo R12'
from
#claims as c
where c.[ID Incident] = 343632
group by c.cod_suc
,c.cod_ramo_comercial
,c.[No. Policy]
,c.Item
,c.[ID Incident]
) b
on p.id_pv = b.id_pv
and p.cod_suc = b.cod_suc
and p.cod_ramo_comercial = b.cod_ramo_comercial
and p.Poliza = b.[No. Policy]
and p.Item = b.Item
where p.id_pv = 788383;
You can use a CTE with a ROW_NUMBER() function to do this. Simple example:
DECLARE #TABLE AS TABLE (Testing INT, Testing2 VARCHAR(55), Testing3 BIT);
INSERT INTO #TABLE VALUES (1, '1', 1);
INSERT INTO #TABLE VALUES (1, '1', 1);
INSERT INTO #TABLE VALUES (1, '1', 1);
INSERT INTO #TABLE VALUES (1, '1', 1);
INSERT INTO #TABLE VALUES (1, '1', 1);
INSERT INTO #TABLE VALUES (1, '1', 1);
INSERT INTO #TABLE VALUES (1, '1', 1);
INSERT INTO #TABLE VALUES (1, '1', 1);
WITH CTE AS
(
SELECT
ROW_NUMBER() OVER (ORDER BY Testing) AS RowID
,Testing
,Testing2
,Testing3
FROM #TABLE
)
UPDATE CTE
SET Testing = 2, Testing2 = '2', Testing3 = 0
WHERE RowID = 1
;
SELECT * FROM #TABLE
;
I've inherited some fun SQL and am trying to figure out how to how to eliminate rows with duplicate IDs. Our indexes are stored in a somewhat columnar format and then we pivot all the rows into one with the values as different columns.
The below sample returns three rows of unique data, but the IDs are duplicated. I need just two rows with unique IDs (and the other columns that go along with it). I know I'll be losing some data, but I just need one matching row per ID to the query (first, top, oldest, newest, whatever).
I've tried using DISTINCT, GROUP BY, and ROW_NUMBER, but I keep getting the syntax wrong, or using them in the wrong place.
I'm also open to rewriting the query completely in a way that is reusable as I currently have to generate this on the fly (cardtypes and cardindexes are user defined) and would love to be able to create a stored procedure. Thanks in advance!
declare #cardtypes table ([ID] int, [Name] nvarchar(50))
declare #cards table ([ID] int, [CardTypeID] int, [Name] nvarchar(50))
declare #cardindexes table ([ID] int, [CardID] int, [IndexType] int, [StringVal] nvarchar(255), [DateVal] datetime)
INSERT INTO #cardtypes VALUES (1, 'Funny Cards')
INSERT INTO #cardtypes VALUES (2, 'Sad Cards')
INSERT INTO #cards VALUES (1, 1, 'Bunnies')
INSERT INTO #cards VALUES (2, 1, 'Dogs')
INSERT INTO #cards VALUES (3, 1, 'Cat')
INSERT INTO #cards VALUES (4, 1, 'Cat2')
INSERT INTO #cardindexes VALUES (1, 1, 1, 'Bunnies', null)
INSERT INTO #cardindexes VALUES (2, 1, 1, 'playing', null)
INSERT INTO #cardindexes VALUES (3, 1, 2, null, '2014-09-21')
INSERT INTO #cardindexes VALUES (4, 2, 1, 'Dogs', null)
INSERT INTO #cardindexes VALUES (5, 2, 1, 'playing', null)
INSERT INTO #cardindexes VALUES (6, 2, 1, 'poker', null)
INSERT INTO #cardindexes VALUES (7, 2, 2, null, '2014-09-22')
SELECT TOP(100)
[ID] = c.[ID],
[Name] = c.[Name],
[Keyword] = [colKeyword].[StringVal],
[DateAdded] = [colDateAdded].[DateVal]
FROM #cards AS c
LEFT JOIN #cardindexes AS [colKeyword] ON [colKeyword].[CardID] = c.ID AND [colKeyword].[IndexType] = 1
LEFT JOIN #cardindexes AS [colDateAdded] ON [colDateAdded].[CardID] = c.ID AND [colDateAdded].[IndexType] = 2
WHERE [colKeyword].[StringVal] LIKE 'p%' AND c.[CardTypeID] = 1
ORDER BY [DateAdded]
Edit:
While both solutions are valid, I ended up using the MAX() solution from #popovitsj as it was easier to implement. The issue of data coming from multiple rows doesn't really factor in for me as all rows are essentially part of the same record. I will most likely use both solutions depending on my needs.
Here's my updated query (as it didn't quite match the answer):
SELECT TOP(100)
[ID] = c.[ID],
[Name] = MAX(c.[Name]),
[Keyword] = MAX([colKeyword].[StringVal]),
[DateAdded] = MAX([colDateAdded].[DateVal])
FROM #cards AS c
LEFT JOIN #cardindexes AS [colKeyword] ON [colKeyword].[CardID] = c.ID AND [colKeyword].[IndexType] = 1
LEFT JOIN #cardindexes AS [colDateAdded] ON [colDateAdded].[CardID] = c.ID AND [colDateAdded].[IndexType] = 2
WHERE [colKeyword].[StringVal] LIKE 'p%' AND c.[CardTypeID] = 1
GROUP BY c.ID
ORDER BY [DateAdded]
You could use MAX or MIN to 'decide' on what to display for the other columns in the rows that are duplicate.
SELECT ID, MAX(Name), MAX(Keyword), MAX(DateAdded)
(...)
GROUP BY ID;
using row number windowed function along with a CTE will do this pretty well. For example:
;With preResult AS (
SELECT TOP(100)
[ID] = c.[ID],
[Name] = c.[Name],
[Keyword] = [colKeyword].[StringVal],
[DateAdded] = [colDateAdded].[DateVal],
ROW_NUMBER()OVER(PARTITION BY c.ID ORDER BY [colDateAdded].[DateVal]) rn
FROM #cards AS c
LEFT JOIN #cardindexes AS [colKeyword] ON [colKeyword].[CardID] = c.ID AND [colKeyword].[IndexType] = 1
LEFT JOIN #cardindexes AS [colDateAdded] ON [colDateAdded].[CardID] = c.ID AND [colDateAdded].[IndexType] = 2
WHERE [colKeyword].[StringVal] LIKE 'p%' AND c.[CardTypeID] = 1
ORDER BY [DateAdded]
)
SELECT * from preResult WHERE rn = 1
I have 2 tables: sets and groups. Both are joined using a 3rd table set_has_groups.
I would like to get sets that have ALL groups that I specify
One way of doing it would be
SELECT column1, column2 FROM sets WHERE
id IN(SELECT set_id FROM set_has_group WHERE group_id = 1)
AND id IN(SELECT set_id FROM set_has_group WHERE group_id = 2)
AND id IN(SELECT set_id FROM set_has_group WHERE group_id = 3)
obviously this is not the most beautiful solution
I've also tried this:
SELECT column1, column2 FROM sets WHERE
id IN(SELECT set_id FROM set_has_group WHERE group_id IN(1,2,3) GROUP BY group_id
HAVING COUNT(*) = 3
This looks prettier but the problem is that it takes forever to execute.
While the first query runs in like 200ms the 2nd one takes more than 1 minute.
Any idea why that is?
===UPDATE:
I've played with this some more and I modified the 2nd query like this
SELECT columns FROM `set` WHERE id IN(
select set_id FROM
(
SELECT set_id FROM set_has_group
WHERE group_id IN(1,2,3)
GROUP BY set_id HAVING COUNT(*) = 3
) as temp
)
that is really fast
It's the same as the 2nd query before just that I wrap it in another temporary table
Pretty strange
I am suspecting a small mistyping in the second query.
Really, I am not sure. Probably, the second query is executed via full table scan. At the same time the first one "IN" is really transformed into "EXISTS". So, you can try to use "exists". For example:
...
where 3 = (select count(*) from set_has_group
where group_id in (1, 2, 3) and set_id = id
group by set_id)
Assuming SQL Server, here is a working example with a JOIN that should work better than the IN clauses you are using as long as you have your primary and foreign keys set correctly. I have built joined 5 sets to 3 groups, but set 4 and 5 are not a part of group 3 and will not show in the answer. However, this query is not scalable (for ex. find in group 4, 5, 7, 8 and 13 will require code modifications unless you parse input params into a table variable)
set nocount on
declare #sets table
(
Id INT Identity (1, 1),
Column1 VarChar (50),
Column2 VarChar (50)
)
declare #Set_Has_Group table
(
Set_Id Int,
Group_Id Int
)
insert into #sets values (newid(), newid())
insert into #sets values (newid(), newid())
insert into #sets values (newid(), newid())
insert into #sets values (newid(), newid())
insert into #sets values (newid(), newid())
update #sets set column1 = 'Column1 at Row ' + Convert (varchar, id)
update #sets set column2 = 'Column2 at Row ' + Convert (varchar, id)
insert into #Set_Has_Group values (1, 1)
insert into #Set_Has_Group values (1, 2)
insert into #Set_Has_Group values (1, 3)
insert into #Set_Has_Group values (2, 1)
insert into #Set_Has_Group values (2, 2)
insert into #Set_Has_Group values (2, 3)
insert into #Set_Has_Group values (3, 1)
insert into #Set_Has_Group values (3, 2)
insert into #Set_Has_Group values (3, 3)
insert into #Set_Has_Group values (4, 1)
insert into #Set_Has_Group values (4, 2)
insert into #Set_Has_Group values (5, 1)
insert into #Set_Has_Group values (5, 2)
/* your query with IN */
SELECT column1, column2 FROM #sets WHERE
id IN(SELECT set_id FROM #set_has_group WHERE group_id = 1)
AND id IN(SELECT set_id FROM #set_has_group WHERE group_id = 2)
AND id IN(SELECT set_id FROM #set_has_group WHERE group_id = 3)
/* my query with JOIN */
SELECT * -- Column1, Column2
FROM #sets sets
WHERE 3 = (
SELECT Count (1)
FROM #Set_Has_Group Set_Has_Group
WHERE 1=1
AND sets.Id = Set_Has_Group.Set_Id
AND Set_Has_Group.Group_ID IN (1, 2, 3)
Group by Set_Id
)
Here's a solution that uses a non-correlated subquery and no GROUP BY:
SELECT column1, column2
FROM sets
WHERE id IN (
SELECT g1.set_id FROM set_has_group g1
JOIN set_has_group g2 ON (g1.set_id = g3.set_id)
JOIN set_has_group g3 ON (g1.set_id = g3.set_id)
WHERE g1.group_id = 1 AND g2.group_id = 2 AND g3.group_id = 3);