select over partiton by ... order by .. used long time - sql

select a.id
from (select /*+index(test_table, test_index)*/
row_number() over (partition by a, b, c order by d desc) rn,
id
from test_table
) a
where a.rn = 1
test_index(a, b, c, d)
limit access to 500, cost 9s)
How should I fix it

Out of curiosity, how long does this take with a correlated subquery instead?
select t.id
from test_table t
where t.d = (select max(t2.d)
from test_table t2
where t2.a = t.a and t2.b = t.b and t2.c = t.c
);
Or using aggregation?
select max(t.id) keep (dense_rank first order by d desc)
from test_table t
group by a, b, c;

Related

Find the first N nearest points in Bigquery

To find the nearest point and its distance in Bigquery I am using this query
WITH table_a AS (
SELECT id, geom
FROM bqtable
), table_b AS (
SELECT id, geom
FROM bqtable
)
SELECT AS VALUE ARRAY_AGG(STRUCT<id_a STRING,id_b STRING, dist FLOAT64>(a.id,b.id,ST_DISTANCE(a.geom, b.geom)) ORDER BY ST_DISTANCE(a.geom, b.geom) LIMIT 1)[OFFSET(0)]
FROM (SELECT id, geom FROM table_a) a
CROSS JOIN (SELECT id, geom FROM table_b) b
WHERE a.id <> b.id
GROUP BY a.id
How can I modify this query to find the nearest 10 points and their distances?
Thanks!
One method uses ORDER BY, LIMIT, and UNNEST(). Using your approach:
SELECT AS VALUE s
FROM (SELECT ARRAY_AGG(STRUCT<id_a STRING,id_b STRING, dist FLOAT64>(a.id, b.id, ST_DISTANCE(a.geom, b.geom))
ORDER BY ST_DISTANCE(a.geom, b.geom)
LIMIT 10
) as ar
FROM (SELECT id, geom FROM table_a) a CROSS JOIN
(SELECT id, geom FROM table_b) b
WHERE a.id <> b.id
GROUP BY a.id
) ab CROSS JOIN
UNNEST(ab.ar) s;
A simpler method would be
select id_a, id_b, ST_DISTANCE(a.geom, b.geom) as dist
from table_a a cross join
table_b b
where a.id <> b.id
qualify row_number() over (partition by id_a order by dist) <= 10;

How to find duplicate rows if one of two certain columns are equal.. and a third & fourth column are equal?

Let's say I have a table with columns: A, B, C & D
Any two rows are considered a duplicate if:
A, B, C have equal values but not D
or
A, B, D have equal values but not C.
How do I get a set of duplicate rows? Using a CTE is OK.
I think you can do it with union all with the corresponding where conditions.
select * from tablename where a=b and b=c and a<>d
union all
select * from tablename where a=b and b=d and a<>c
Using a self join it's quite easy:
SELECT DISTINCT t1.*
FROM TableName t1
INNER JOIN TableName t2
ON T1.A = T2.A
AND T1.B = T2.B
AND (T1.C = T2.C OR T1.D = T2.D)
Assuming, of course, that if all 4 columns are equal it's a duplicated row as well...
However, if for some strange reason these rows are not considered as duplicates, you can change the conditions in the ON clause to this:
SELECT DISTINCT t1.*
FROM TableName t1
INNER JOIN TableName t2
ON T1.A = T2.A
AND T1.B = T2.B
AND (
(T1.C = T2.C AND T1.D <> T2.D)
OR (T1.C <> T2.C AND T1.D = T2.D)
)
You can use RANK() to detect duplicates without having to select from the table twice :
SELECT s.* FROM (
SELECT t.*,
RANK() OVER(PARTITION BY t.a,t.b,t.c ORDER BY t.d) as d_dif,
RANK() OVER(PARTITION BY t.a,t.b,t.D ORDER BY t.c) as c_dif
FROM YourTable) s
WHERE s.d_dif > 1 or s.c_dif > 1
RANK() as opposed to ROW_NUMBER() deals with duplicates, so if d / c will be the same, both records will get the same rank and won't be selected.

most recent (max) date for every id

a = id
b = date
c = NewestDate
For every a, I have b c
How can I get the newest date for every set of id's (MY 'a' column)? they don't all have the same end date. So I do need a max for all of them.
The data is in myTable,
so far i tried:
select *
into #myTable
from myTable
select
t.a
,t.b
,t.c
,(select max(b) from myTable) as c
from myTable t
left join #myTable t1
on t.a = t1.a and t.b = t1.b
order by a, b
The problem with the above code is that in 'c' it is placed the max date of them all, which is not what I actually want.
EDIT: the problem is now solved with the answer given by Dmitry Poliakov (thanks). Used:
SELECT a,
b,
max(b) OVER (PARTITION BY a) AS c
FROM myTable
ORDER BY a,b
you can select maximum date for each group of ids as
SELECT a,
b,
max(b) OVER (PARTITION BY a) AS c
FROM myTable
ORDER BY a,b
EDIT: one of possible solutions for the second(edited) part of question is
WITH cte AS (
SELECT a,
b,
max(b) OVER (PARTITION BY a) AS c,
d
FROM myTable
)
SELECT t1.a,
t1.b,
t1.c,
t1.d,
t2.d AS e
FROM cte t1
JOIN cte t2 ON t1.a=t2.a AND t1.c=t2.b
DECLARE #updates TABLE (a int,b date,c date)
INSERT INTO #updates VALUES
(1,GETDATE(),GETDATE()+10),
(2,GETDATE()+11,GETDATE()+13),
(2,GETDATE()+11,GETDATE()+14),
(3,GETDATE()+11,GETDATE()+13),
(1,GETDATE()+11,GETDATE()+13);
WITH
cte AS
(
SELECT a, max(c) latest
FROM #updates
GROUP BY a
)
SELECT *
FROM cte INNER JOIN #updates as d ON cte.a=d.a AND cte.latest = d.c
Builds a table to select the records with the newest updates and then joins this new table with your original table to extract all the fields in matching rows

SELECT Row Values WHERE MAX() is Column Value In GROUP BY Query

How can I select like this? Can I create a User defined Aggregate Function
SELECT Max(A),(SELECT TOP 1 FROM TheGroup Where B=Max(A)) FROM MyTable
where MyTable as Shown Below
A B C
--------------
1 2 S
3 4 S
4 5 T
6 7 T
I want a Query Like this
SELECT MAX(A),(B Where A=Max(A)),C FROM MYTable GROUP BY C
I'm Expecting the result as below
MAX(A) Condition C
-----------------------
3 4 S
6 7 T
SELECT A,B,C FROM
(SELECT *, ROW_NUMBER() OVER (PARTITION BY C ORDER BY A DESC) RN FROM MyTable)
WHERE RN = 1
(this query will always return only one row per C value)
OR
WITH CTE_Group AS
(
SELECT C, MAX(A) AS MaxA
FROM MyTable
GROUP BY C
)
SELECT g.MaxA, t.B, g.C
FROM MyTable t
INNER JOIN CTE_Group g ON t.A = g.MaxA AND t.C = g.C
(if there are multiple rows that have same Max(A) value - this query will return all of them)
Try Following Query :
SELECT TABLE1.A , TABLE2.B , TABLE1.C
FROM
(
SELECT MAX(A) AS A,C
FROM MYTable
GROUP BY C
) AS TABLE1
INNER JOIN
(
SELECT *
FROM MYTable
) AS TABLE2 ON TABLE1.A = TABLE2.A
SQLFIDDLE
you can do it by simple join query . join query always run faster then In query . Join query run only one time at the time of execution of the query . we can archive same result by using IN query .
SELECT t1.*
FROM YourTable t1
Left Outer Join YourTable t2 on t1.C=t2.C AND t1.A < t2.A
WHERE t2.A is null
how about this:
SELECT *
FROM MyTable
WHERE A IN (SELECT MAX(A) FROM MyTable GROUP BY C)
SELECT Max(A)
FROM MyTable
Where B=(SELECT Max(A) FROM MyTable)
update:
SELECT *
FROM MyTable
Where B=(SELECT Max(A) FROM MyTable)
update 2:
SELECT DISTINCT A, B
FROM MyTable
Where A=(SELECT Max(A) FROM MyTable GROUP BY C)
update 3:
ok, I think I understand what you're looking for now.. How about this:
SELECT *
FROM MyTable
Where A in (SELECT Max(A) FROM MyTable GROUP BY C)
WITH
cte AS
(
SELECT
ROW_NUMBER() OVER (ORDER BY cola desc) AS Rno,
*
FROM
tbl
)
SELECT top 1
cola,colb
FROM
cte
order by Rno
Then try it:
WITH
cte AS
(
SELECT
ROW_NUMBER() OVER (PARTITION BY col3 ORDER BY col1 desc) AS Rno,
*
FROM
tbl
)
SELECT
col1,col2,col3
FROM
cte
WHERE Rno=1

Oracle subquery top 1 result

I want to get the top 1 row for each unique value of b with the minimum value of c for that particular value of b. Even though there can be more than 1 row with the same min value (just chose the first one)
myTable
a integer (unique)
b integer
c integer
I've tried this query
SELECT t1.*
FROM myTable t1,
(SELECT b,
MIN(c) as c
FROM myTable
GROUP BY b) t2
WHERE t1.b = t2.b
AND t1.c = t2.c
However, in this table it's possible for there to be more than 1 instance of the minimum value of c for a given value of b. The above query generates duplicates under these conditions.
I've got a feeling that I need to use rownum somewhere, but I'm not quite sure where.
You can use ROW_NUMBER:
SELECT *
FROM (
SELECT *, ROW_NUMBER() OVER (PARTITION BY b ORDER BY c) AS rn
FROM myTable
) AS T1
WHERE rn = 1
To tie-break between the equal c's, you will need to subquery one level further to get the min-a for each group of equal c's per b. (A mouthful!)
select t0.*
FROM myTable t0
inner join (
select t1.b, t1.c, MIN(a) as a
from myTable t1
inner join (
select b, min(c) as c
from myTable
group by b
) t2 on t1.b = t2.b and t1.c = t2.c
group by t1.b, t1.c
) t3 on t3.a = t0.a and t3.b = t0.b and t3.c = t0.c