select a.id
from (select /*+index(test_table, test_index)*/
row_number() over (partition by a, b, c order by d desc) rn,
id
from test_table
) a
where a.rn = 1
test_index(a, b, c, d)
limit access to 500, cost 9s)
How should I fix it
Out of curiosity, how long does this take with a correlated subquery instead?
select t.id
from test_table t
where t.d = (select max(t2.d)
from test_table t2
where t2.a = t.a and t2.b = t.b and t2.c = t.c
);
Or using aggregation?
select max(t.id) keep (dense_rank first order by d desc)
from test_table t
group by a, b, c;
Related
To find the nearest point and its distance in Bigquery I am using this query
WITH table_a AS (
SELECT id, geom
FROM bqtable
), table_b AS (
SELECT id, geom
FROM bqtable
)
SELECT AS VALUE ARRAY_AGG(STRUCT<id_a STRING,id_b STRING, dist FLOAT64>(a.id,b.id,ST_DISTANCE(a.geom, b.geom)) ORDER BY ST_DISTANCE(a.geom, b.geom) LIMIT 1)[OFFSET(0)]
FROM (SELECT id, geom FROM table_a) a
CROSS JOIN (SELECT id, geom FROM table_b) b
WHERE a.id <> b.id
GROUP BY a.id
How can I modify this query to find the nearest 10 points and their distances?
Thanks!
One method uses ORDER BY, LIMIT, and UNNEST(). Using your approach:
SELECT AS VALUE s
FROM (SELECT ARRAY_AGG(STRUCT<id_a STRING,id_b STRING, dist FLOAT64>(a.id, b.id, ST_DISTANCE(a.geom, b.geom))
ORDER BY ST_DISTANCE(a.geom, b.geom)
LIMIT 10
) as ar
FROM (SELECT id, geom FROM table_a) a CROSS JOIN
(SELECT id, geom FROM table_b) b
WHERE a.id <> b.id
GROUP BY a.id
) ab CROSS JOIN
UNNEST(ab.ar) s;
A simpler method would be
select id_a, id_b, ST_DISTANCE(a.geom, b.geom) as dist
from table_a a cross join
table_b b
where a.id <> b.id
qualify row_number() over (partition by id_a order by dist) <= 10;
Let's say I have a table with columns: A, B, C & D
Any two rows are considered a duplicate if:
A, B, C have equal values but not D
or
A, B, D have equal values but not C.
How do I get a set of duplicate rows? Using a CTE is OK.
I think you can do it with union all with the corresponding where conditions.
select * from tablename where a=b and b=c and a<>d
union all
select * from tablename where a=b and b=d and a<>c
Using a self join it's quite easy:
SELECT DISTINCT t1.*
FROM TableName t1
INNER JOIN TableName t2
ON T1.A = T2.A
AND T1.B = T2.B
AND (T1.C = T2.C OR T1.D = T2.D)
Assuming, of course, that if all 4 columns are equal it's a duplicated row as well...
However, if for some strange reason these rows are not considered as duplicates, you can change the conditions in the ON clause to this:
SELECT DISTINCT t1.*
FROM TableName t1
INNER JOIN TableName t2
ON T1.A = T2.A
AND T1.B = T2.B
AND (
(T1.C = T2.C AND T1.D <> T2.D)
OR (T1.C <> T2.C AND T1.D = T2.D)
)
You can use RANK() to detect duplicates without having to select from the table twice :
SELECT s.* FROM (
SELECT t.*,
RANK() OVER(PARTITION BY t.a,t.b,t.c ORDER BY t.d) as d_dif,
RANK() OVER(PARTITION BY t.a,t.b,t.D ORDER BY t.c) as c_dif
FROM YourTable) s
WHERE s.d_dif > 1 or s.c_dif > 1
RANK() as opposed to ROW_NUMBER() deals with duplicates, so if d / c will be the same, both records will get the same rank and won't be selected.
a = id
b = date
c = NewestDate
For every a, I have b c
How can I get the newest date for every set of id's (MY 'a' column)? they don't all have the same end date. So I do need a max for all of them.
The data is in myTable,
so far i tried:
select *
into #myTable
from myTable
select
t.a
,t.b
,t.c
,(select max(b) from myTable) as c
from myTable t
left join #myTable t1
on t.a = t1.a and t.b = t1.b
order by a, b
The problem with the above code is that in 'c' it is placed the max date of them all, which is not what I actually want.
EDIT: the problem is now solved with the answer given by Dmitry Poliakov (thanks). Used:
SELECT a,
b,
max(b) OVER (PARTITION BY a) AS c
FROM myTable
ORDER BY a,b
you can select maximum date for each group of ids as
SELECT a,
b,
max(b) OVER (PARTITION BY a) AS c
FROM myTable
ORDER BY a,b
EDIT: one of possible solutions for the second(edited) part of question is
WITH cte AS (
SELECT a,
b,
max(b) OVER (PARTITION BY a) AS c,
d
FROM myTable
)
SELECT t1.a,
t1.b,
t1.c,
t1.d,
t2.d AS e
FROM cte t1
JOIN cte t2 ON t1.a=t2.a AND t1.c=t2.b
DECLARE #updates TABLE (a int,b date,c date)
INSERT INTO #updates VALUES
(1,GETDATE(),GETDATE()+10),
(2,GETDATE()+11,GETDATE()+13),
(2,GETDATE()+11,GETDATE()+14),
(3,GETDATE()+11,GETDATE()+13),
(1,GETDATE()+11,GETDATE()+13);
WITH
cte AS
(
SELECT a, max(c) latest
FROM #updates
GROUP BY a
)
SELECT *
FROM cte INNER JOIN #updates as d ON cte.a=d.a AND cte.latest = d.c
Builds a table to select the records with the newest updates and then joins this new table with your original table to extract all the fields in matching rows
How can I select like this? Can I create a User defined Aggregate Function
SELECT Max(A),(SELECT TOP 1 FROM TheGroup Where B=Max(A)) FROM MyTable
where MyTable as Shown Below
A B C
--------------
1 2 S
3 4 S
4 5 T
6 7 T
I want a Query Like this
SELECT MAX(A),(B Where A=Max(A)),C FROM MYTable GROUP BY C
I'm Expecting the result as below
MAX(A) Condition C
-----------------------
3 4 S
6 7 T
SELECT A,B,C FROM
(SELECT *, ROW_NUMBER() OVER (PARTITION BY C ORDER BY A DESC) RN FROM MyTable)
WHERE RN = 1
(this query will always return only one row per C value)
OR
WITH CTE_Group AS
(
SELECT C, MAX(A) AS MaxA
FROM MyTable
GROUP BY C
)
SELECT g.MaxA, t.B, g.C
FROM MyTable t
INNER JOIN CTE_Group g ON t.A = g.MaxA AND t.C = g.C
(if there are multiple rows that have same Max(A) value - this query will return all of them)
Try Following Query :
SELECT TABLE1.A , TABLE2.B , TABLE1.C
FROM
(
SELECT MAX(A) AS A,C
FROM MYTable
GROUP BY C
) AS TABLE1
INNER JOIN
(
SELECT *
FROM MYTable
) AS TABLE2 ON TABLE1.A = TABLE2.A
SQLFIDDLE
you can do it by simple join query . join query always run faster then In query . Join query run only one time at the time of execution of the query . we can archive same result by using IN query .
SELECT t1.*
FROM YourTable t1
Left Outer Join YourTable t2 on t1.C=t2.C AND t1.A < t2.A
WHERE t2.A is null
how about this:
SELECT *
FROM MyTable
WHERE A IN (SELECT MAX(A) FROM MyTable GROUP BY C)
SELECT Max(A)
FROM MyTable
Where B=(SELECT Max(A) FROM MyTable)
update:
SELECT *
FROM MyTable
Where B=(SELECT Max(A) FROM MyTable)
update 2:
SELECT DISTINCT A, B
FROM MyTable
Where A=(SELECT Max(A) FROM MyTable GROUP BY C)
update 3:
ok, I think I understand what you're looking for now.. How about this:
SELECT *
FROM MyTable
Where A in (SELECT Max(A) FROM MyTable GROUP BY C)
WITH
cte AS
(
SELECT
ROW_NUMBER() OVER (ORDER BY cola desc) AS Rno,
*
FROM
tbl
)
SELECT top 1
cola,colb
FROM
cte
order by Rno
Then try it:
WITH
cte AS
(
SELECT
ROW_NUMBER() OVER (PARTITION BY col3 ORDER BY col1 desc) AS Rno,
*
FROM
tbl
)
SELECT
col1,col2,col3
FROM
cte
WHERE Rno=1
I want to get the top 1 row for each unique value of b with the minimum value of c for that particular value of b. Even though there can be more than 1 row with the same min value (just chose the first one)
myTable
a integer (unique)
b integer
c integer
I've tried this query
SELECT t1.*
FROM myTable t1,
(SELECT b,
MIN(c) as c
FROM myTable
GROUP BY b) t2
WHERE t1.b = t2.b
AND t1.c = t2.c
However, in this table it's possible for there to be more than 1 instance of the minimum value of c for a given value of b. The above query generates duplicates under these conditions.
I've got a feeling that I need to use rownum somewhere, but I'm not quite sure where.
You can use ROW_NUMBER:
SELECT *
FROM (
SELECT *, ROW_NUMBER() OVER (PARTITION BY b ORDER BY c) AS rn
FROM myTable
) AS T1
WHERE rn = 1
To tie-break between the equal c's, you will need to subquery one level further to get the min-a for each group of equal c's per b. (A mouthful!)
select t0.*
FROM myTable t0
inner join (
select t1.b, t1.c, MIN(a) as a
from myTable t1
inner join (
select b, min(c) as c
from myTable
group by b
) t2 on t1.b = t2.b and t1.c = t2.c
group by t1.b, t1.c
) t3 on t3.a = t0.a and t3.b = t0.b and t3.c = t0.c