problem with update query - sql

I have a query
UPDATE Table_1 SET Col1='Y' WHERE ROWID IN (
select ROWID from (
SELECT BUS_ID,
row_number() over (partition by BUS_ID order by BUS_ID) dupe_count,
rowid
from Table_1
WHERE col2 <> 1
AND col3 <> 1
order by dbms_random.value
) ft
where ft.dupe_count = 1
AND ROWNUM <= 1000
);
updates only 1000 rows in table Table_1.
But if i write
UPDATE Table_1 SET Col1='Y' WHERE ROWID IN (
select ROWID from (
SELECT BUS_ID,
row_number() over (partition by BUS_ID order by BUS_ID) dupe_count,
rowid
from Table_1
WHERE col2 <> 1
AND col3 <> 1
order by dbms_random.value
) ft
where ft.dupe_count = 1
and Table_1.BUS_ID = ft.BUS_ID
AND ROWNUM <= 1000
);
it updates all rows of the table irrespective of RoWNUM <= 1000 i.e if i add
Table_1.BUS_ID = ft.BUS_ID
then it updates all rows that satisfies col2<> 1 AND col3<> 1 and ft.dupe_count=1.
The table is having following structure:
BUS_ID | col1 | col2 | col3
1 | | 0 | 0
2 | | 0 | 0
1 | | 0 | 0
3 | | 1 | 1.
Any idea why is it happening.Please help.

Niraj,
An ordinary subquery is evaluated for each table. A correlated subquery is evaluated for each row. And you have made the subquery in your second update statement correlated with the line Table_1.BUS_ID = ft.BUS_ID. And if it evaluates for each row, then it will always satisfy the ROWNUM <= 1000 predicate.
Regards,
Rob.

Related

SELECT statement that shows continuous data with condition

I consider myself good at SQL but failed at this problem.
I need a SELECT statement that shows all rows above 100 if there are
3 rows or more with 100 next to it.
Given Table "Trend":
| id | volume |
+----+---------+
| 0 | 200 |
| 1 | 90 |
| 2 | 101 |
| 3 | 120 |
| 4 | 200 |
| 5 | 10 |
| 6 | 400 |
I need a SELECT statement to produce:
| 2 | 101 |
| 3 | 120 |
| 4 | 200 |
I suspect that you are after the following logic:
select *
from (
select t.*,
sum(case when volume > 100 then 1 else 0 end) over(order by id rows between 2 preceding and 2 following) cnt
from mytable t
) t
where volume > 100 and cnt >= 3
This counts how many values are above 100 in the range made of the two preceding rows, the current row and the next two rows. Then we filter on rows whose window count is 3 or more.
This uses a syntax that most database support (provided that window functions are available). Neater expressions may be available depending on the actual database you are using.
In MySQL:
sum(volume > 100) over(order by id rows between 2 preceding and 2 following) cnt
In Postgres:
count(*) filter(where volume > 100) over(order by id rows between 2 preceding and 2 following) cnt
Or:
sum((volume > 100)::int) over(order by id rows between 2 preceding and 2 following) cnt
This is tricky because you want the original rows . . . I am going to suggest lag() and lead():
select id, volume
from (select t.*,
lag(volume, 2) over (order by id) as prev_volume_2,
lag(volume) over (order by id) as prev_volume,
lead(volume, 2) over (order by id) as next_volume_2,
lead(volume) over (order by id) as next_volume
from t
) t
where volume > 100 and
( (prev_volume_2 > 100 and prev_volume > 100) or
(prev_volume > 100 and next_volume > 100) or
(next_volume_2 > 100 and next_volume > 100)
);
Another method is to treat this as a gaps-and-islands problem. This makes the solution more generalizable. You can assign a group by counting the number of rows less than or equal to 100 up to each row. Then count the number that are greater than 100 to see if those groups qualify to be in the final results:
select id, volume
from (select t.*,
sum(case when volume > 100 then 1 else 0 end) over (partition by grp) as cnt
from (select t.*,
sum(case when volume <= 100 then 1 else 0 end) over (order by id) as grp
from t
) t
) t
where volume > 100 and cnt >= 3;
Here is a db<>fiddle with these two approaches.
Key point here is "3 rows or more". MATCH_RECOGNIZE could be used:
SELECT *
FROM trend
MATCH_RECOGNIZE (
ORDER BY id -- ordering of a streak
MEASURES FINAL COUNT(*) AS l -- count "per" match
ALL ROWS PER MATCH -- get all rows
PATTERN(a{3,}) -- 3 or more
DEFINE a AS volume >= 100 -- condtion of streak
)
ORDER BY l DESC FETCH FIRST 1 ROWS WITH TIES;
-- choose the group that has the longest streak
The strength of this approach is a PATTERN part which could be modifed to handle different scenarios like a{3,5} - between 3 and 5 occurences, a{4} exactly 4 occurences and so on. More conditions could be defined which allows to build complex pattern detection.
db<>fiddle demo
Get the min value of volume for all consecutive 3 rows of the table.
Then join to the table and keep only the ones belonging to a group that has min > 100:
select distinct t.*
from Trend t
inner join (
select t.*,
min(t.volume) over (order by t.id rows between current row and 2 following) min_volume,
lead(t.id, 1) over (order by t.id) next1,
lead(t.id, 2) over (order by t.id) next2
from Trend t
) m on t.id in (m.id, m.next1, m.next2)
where m.min_volume > 100 and m.next1 is not null and m.next2 is not null
See the demo for SQL Server, MySql, Postgresql, Oracle, SQLite.
Results:
> id | volume
> -: | -----:
> 2 | 101
> 3 | 120
> 4 | 200
a simplistic approach:
--CREATE TABLE Trend (id integer, volume integer);
--insert into Trend VALUES
-- (0,200),
-- (1,90),
-- (2,101),
-- (3,120),
-- (4,200),
-- (5,10),
-- (6,400);
SELECT
t1.id, t1.volume
--,t2.id, t2.volume
--,t3.id, t3.volume
FROM Trend t1
INNER JOIN Trend t2 ON t2.id>t1.id and t2.volume>100 and not exists (select * from Trend t5 where t5.id between t1.id+1 and t2.id-1)
INNER JOIN Trend t3 ON t3.id>t2.id and t3.volume>100 and not exists (select * from Trend where id between t2.id+1 and t3.id-1)
WHERE t1.volume>100
union all
SELECT
--t1.id, t1.volume
t2.id, t2.volume
--,t3.id, t3.volume
FROM Trend t1
INNER JOIN Trend t2 ON t2.id>t1.id and t2.volume>100 and not exists (select * from Trend t5 where t5.id between t1.id+1 and t2.id-1)
INNER JOIN Trend t3 ON t3.id>t2.id and t3.volume>100 and not exists (select * from Trend where id between t2.id+1 and t3.id-1)
WHERE t1.volume>100
union all
SELECT
--t1.id, t1.volume
--t2.id, t2.volume
t3.id, t3.volume
FROM Trend t1
INNER JOIN Trend t2 ON t2.id>t1.id and t2.volume>100 and not exists (select * from Trend t5 where t5.id between t1.id+1 and t2.id-1)
INNER JOIN Trend t3 ON t3.id>t2.id and t3.volume>100 and not exists (select * from Trend where id between t2.id+1 and t3.id-1)
WHERE t1.volume>100

SQLite: Use subquery result in another subquery

I have following table with data
id | COL1
=========
1 | b
2 | z
3 | b
4 | c
5 | b
6 | a
7 | b
8 | c
9 | a
So i know ID of 'z' (ID = 2) in the table and i will call it Z_ID.
I need to retrieve rows between 'a' and 'c' (including 'a' and 'c').
It must be first 'a' that comes after Z_ID.
'c' must come after Z_ID and after 'a' that i found previously.
Result that i am seeking is:
id | COL1
=========
6 | a
7 | b
8 | c
My SELECT looks like this
SELECT *
FROM table
WHERE id >= (
SELECT MIN(ID)
FROM table
WHERE COL1 = 'a' AND ID > 2
)
AND id <= (
SELECT MIN(ID)
FROM table
WHERE COL1 = 'c'AND ID > 2 and ID > (
SELECT MIN(ID)
FROM table
WHERE COL1 = 'a' AND ID > 2
)
)
I am getting the result that i want. But i am concerned about performance because i am using same subquery two times. Is there a way to reuse a result from first subquery?
Maybe there is cleaner way to get the result that i need?
Use a CTE which will return only once the result of the subquery that you use twice:
WITH cte AS (
SELECT MIN(ID) minid
FROM tablename
WHERE COL1 = 'a' AND ID > 2
)
SELECT t.*
FROM tablename t CROSS JOIN cte c
WHERE t.id >= c.minid
AND t.id <= (
SELECT MIN(ID)
FROM tablename
WHERE COL1 = 'c' and ID > c.minid
)
In your 2nd query's WHERE clause:
WHERE COL1 = 'c'AND ID > 2 and ID > (...
the condition AND ID > 2 is not needed because the next condition and ID > (... makes sure that ID will be greater than 2 so I don't use it either in my code.
See the demo.
Results:
| id | COL1 |
| --- | ---- |
| 6 | a |
| 7 | b |
| 8 | c |
You can use window functions for this:
select t.*
from (select t.*,
min(case when id > min_a_id and col1 = 'c' then id end) over () as min_c_id
from (select t.*,
min(case when col1 = 'a' then id end) over () as min_a_id
from (select t.*,
min(case when col1 = 'z' then id end) over () as z_id
from t
) t
where id > z_id
) t
) t
where id >= min_a_id and id < min_c_id;

UPDATE Table value from other table like distribution

I have a table 1 where I have to distribute award from other table table 2 but in a manner of First in first serve from other table row.
Table 1
ATTIME | Absent | LeaveType
-----------------------------
2019-01-01| 1 |
2019-01-02| 1 |
2019-01-03| 1 |
2019-01-04| 1 |
2019-01-05| 1 |
2019-01-06| 1 |
Table 2
LeaveType | Total
-------------------
Casual | 3
Sick | 2
I have achieved it by using cursor, but want a set base UPDATE QUERY or any other option which optimize my execution plan,
Final Result will be....
Table 1
ATTIME | Absent | LeaveType
-----------------------------
2019-01-01| 1 | CL
2019-01-02| 1 | CL
2019-01-03| 1 | CL
2019-01-04| 1 | SL
2019-01-05| 1 | SL
2019-01-06| 1 |
In a supported version of SQL Server, you would use a cumulative sum and row_number():
with toupdate as (
select t1.*,
row_number() over (order by attime) as seqnum
from table1 t1
)
update toupdate
set leavetype = t2.leavetype
from (select t2.*,
sum(total) over (order by leavetype) as runningtotal
from table2 t2
) t2
where toupdate.seqnum between t2.runningtotal + 1 - total and t2.runningtotal;
In archaic, unsupported versions of SQL Server, the cumulative sum is more cumbersome. One method uses a correlated subquery:
with toupdate as (
select t1.*,
row_number() over (order by attime) as seqnum
from table1 t1
)
update toupdate
set leavetype = t2.leavetype
from (select t2.*,
(select sum(total)
from table2 tt2
where tt2.leavetype <= t2.leavetype
) as runningtotal
from table2 t2
) t2
where t1.seqnum between t2.runningtotal + 1 - total and t2.runningtotal;

How to get closest n rows for specific row in table?

I have a table foo with its primary key id and some other columns.
My goal is to find for instance rows with id=3 and id=4 and rows with id=6 and id=7 for row with id=5 - in case I would like to find 2 closest previous and next rows.
In case there is only one or no such rows (e.g. for id=2 there is only previous row) I would like to get only possible ones.
The problem is there can be some rows missing.
Is there a common practice to make such queries?
I would try the following:
SELECT * FROM table WHERE id > ? ORDER BY id ASC LIMIT 2
followed by
SELECT * FROM table WHERE id <= ? ORDER BY id DESC LIMIT 2
You may be able to combine the above into the following:
SELECT * FROM table WHERE id > ? ORDER BY id ASC LIMIT 2
UNION
SELECT * FROM table WHERE id <= ? ORDER BY id DESC LIMIT 2
I think this would fit your description.
Select * from table where id between #n-2 and #n+2 and id <> #n
One way is this:
with your_table(id) as(
select 1 union all
select 2 union all
select 4 union all
select 5 union all
select 10 union all
select 11 union all
select 12 union all
select 13 union all
select 14
)
select * from (
(select * from your_table where id <= 10 order by id desc limit 3+1)
union all
(select * from your_table where id > 10 order by id limit 3)
) t
order by id
(Here 10 is start point and 3 is n rows you want)
This is a possible solution by numbering all the records and fetching those where row number is 2 rows greater or lower than the selected ID.
create table foo(id int);
insert into foo values (1),(2),(4),(6),(7),(8),(11),(12);
-- using ID = 6
with rnum as
(
select id, row_number() over (order by id) rn
from foo
)
select *
from rnum
where rn >= (select rn from rnum where id = 6) - 2
and rn <= (select rn from rnum where id = 6) + 2;
id | rn
-: | -:
2 | 2
4 | 3
6 | 4
7 | 5
8 | 6
-- using ID = 2
with rnum as
(
select id, row_number() over (order by id) rn
from foo
)
select *
from rnum
where rn >= (select rn from rnum where id = 2) - 2
and rn <= (select rn from rnum where id = 2) + 2;
id | rn
-: | -:
1 | 1
2 | 2
4 | 3
6 | 4
dbfiddle here

Any other alternative to write this SQL query

I need to select data base upon three conditions
Find the latest date (StorageDate Column) from the table for each record
See if there is more then one entry for date (StorageDate Column) found in first step for same ID (ID Column)
and then see if DuplicateID is = 2
So if table has following data:
ID |StorageDate | DuplicateTypeID
1 |2014-10-22 | 1
1 |2014-10-22 | 2
1 |2014-10-18 | 1
2 |2014-10-12 | 1
3 |2014-10-11 | 1
4 |2014-09-02 | 1
4 |2014-09-02 | 2
Then I should get following results
ID
1
4
I have written following query but it is really slow, I was wondering if anyone has better way to write it.
SELECT DISTINCT(TD.RecordID)
FROM dbo.MyTable TD
JOIN (
SELECT T1.RecordID, T2.MaxDate,COUNT(*) AS RecordCount
FROM MyTable T1 WITH (nolock)
JOIN (
SELECT RecordID, MAX(StorageDate) AS MaxDate
FROM MyTable WITH (nolock)
GROUP BY RecordID)T2
ON T1.RecordID = T2.RecordID AND T1.StorageDate = T2.MaxDate
GROUP BY T1.RecordID, T2.MaxDate
HAVING COUNT(*) > 1
)PT ON TD.RecordID = PT.RecordID AND TD.StorageDate = PT.MaxDate
WHERE TD.DuplicateTypeID = 2
Try this and see how the performance goes:
;WITH
tmp AS
(
SELECT *,
RANK() OVER (PARTITION BY ID ORDER BY StorageDate DESC) AS StorageDateRank,
COUNT(ID) OVER (PARTITION BY ID, StorageDate) AS StorageDateCount
FROM MyTable
)
SELECT DISTINCT ID
FROM tmp
WHERE StorageDateRank = 1 -- latest date for each ID
AND StorageDateCount > 1 -- more than 1 entry for date
AND DuplicateTypeID = 2 -- DuplicateTypeID = 2
You can use analytic function rank , can you try this query ?
Select recordId from
(
select *, rank() over ( partition by recordId order by [StorageDate] desc) as rn
from mytable
) T
where rn =1
group by recordId
having count(*) >1
and sum( case when duplicatetypeid =2 then 1 else 0 end) >=1