Find the first N nearest points in Bigquery - sql

To find the nearest point and its distance in Bigquery I am using this query
WITH table_a AS (
SELECT id, geom
FROM bqtable
), table_b AS (
SELECT id, geom
FROM bqtable
)
SELECT AS VALUE ARRAY_AGG(STRUCT<id_a STRING,id_b STRING, dist FLOAT64>(a.id,b.id,ST_DISTANCE(a.geom, b.geom)) ORDER BY ST_DISTANCE(a.geom, b.geom) LIMIT 1)[OFFSET(0)]
FROM (SELECT id, geom FROM table_a) a
CROSS JOIN (SELECT id, geom FROM table_b) b
WHERE a.id <> b.id
GROUP BY a.id
How can I modify this query to find the nearest 10 points and their distances?
Thanks!

One method uses ORDER BY, LIMIT, and UNNEST(). Using your approach:
SELECT AS VALUE s
FROM (SELECT ARRAY_AGG(STRUCT<id_a STRING,id_b STRING, dist FLOAT64>(a.id, b.id, ST_DISTANCE(a.geom, b.geom))
ORDER BY ST_DISTANCE(a.geom, b.geom)
LIMIT 10
) as ar
FROM (SELECT id, geom FROM table_a) a CROSS JOIN
(SELECT id, geom FROM table_b) b
WHERE a.id <> b.id
GROUP BY a.id
) ab CROSS JOIN
UNNEST(ab.ar) s;
A simpler method would be
select id_a, id_b, ST_DISTANCE(a.geom, b.geom) as dist
from table_a a cross join
table_b b
where a.id <> b.id
qualify row_number() over (partition by id_a order by dist) <= 10;

Related

Removing duplicate values from a column in SQL

I have two tables A (group_id, id, subject) and B (id, date). Below is the joint table of tables A and B on id. I have tried using distinct and partition to remove the duplicates in group_id(field) only, but no luck:
My code:
select
a.group_id, a.id, a.subject, b.date
from
A a
inner join
(select
b.*,
row_number() over (partition by group_id order by date asc) as seqnum
from
B b) b on a.id = b.id and seqnum = 1
order by
date desc;
I got this error when I ran the code:
Partitioning can not be used stand-alone in query near 'partition by group_id order by date asc) as seqnum from B' at line 1
This is my expected result:
Thank you in advance!
It looks like you want the earliest date for each row in the table you show. Your question mentions two tables, but you only show one.
I recommend a correlated subquery in most databases:
select b.*
from b
where b.date = (select min(b2.date)
from b b2
where b2.group_id = b.group_id
);
I see. You need to join first and then use row_number():
select ab.*
from (select a.group_id, a.id, a.subject, b.date,
row_number() over (partition by a.group_id order by b.date) as seqnum
from A a join
B b
on a.id = b.id
) ab
where seqnum = 1
order by date desc;
You are almost there. But the column that you try to use to partition (ie group_id) comes from table a, which is not available in the subquery.
You would need to JOIN and assign the row number in a subquery, and then filter in the outer query.
select *
from (
select
a.group_id,
a.id,
a.subject,
b.date,
row_number() over (partition by a.group_id order by b.date asc) as seqnum
from a
inner join b on ON a.id = b.id
)
where seqnum = 1
ORDER BY date desc;
Another way to achieve your goal though it may not be the efficient one
SELECT
A.group_id, A.id, B.Date, A.subject
FROM A
INNER JOIN B
ON A.Id = B.Id
INNER JOIN
(
SELECT
A.Group_id, MIN(B.Date) AS Date
FROM A
INNER JOIN B
ON A.Id = B.Id
GROUP BY A.group_id
) AS supportTable
ON A.group_id = supportTable.group_id
AND B.Date = supportTable.Date

WITH clause in HIVE

In below code temp1 is used 2 times, so hive will be execute the select query in temp1 twice? or just once?
with temp1(
select distinct b as b, f, t1.id as id,
from
table1 t1
join
table2 t2 on (t1.id=t2.id)
),
agg_tbl as
(
select
max(abc) as maxabc,
tbl.t
from(
select
count(*) as cnt, b, f
from
temp1
group by
b, f
) tbl group by tbl.t
),
class_tbl as
(select
case
when bp = 1 then 'abc'
when bp = 2 then 'xyz'
end as class,
bp
from
( select
count(b) as bcount, bp
from
temp1
group by bp
)tbl
)
If that is the case, then using temporary tables will make sense.
Thanks.

How do I count three different distinct values and group on an ID in MS-Access?

So I know MS-Access does not allow SELECT COUNT(DISTINCT....) FROM ..., but I am trying to find a more viable alternative to the usual standard of
SELECT COUNT(*) FROM (SELECT DISTINCT Name FROM table1)
My problem is I am trying to do three separate Count functions and group them on ID. If I use the method above, it is giving me the total unique value count for the whole table instead of the total count for only the value of ID. I tried doing
(SELECT COUNT(*) FROM (SELECT DISTINCT Name FROM table1 as T2
WHERE T2.ColumnA = T1.ColumnA)) As MyVal
FROM table1 as T1
but it tells me I need to specify a value for T1.ColumnA.
The SQL query I am trying to accomplish is this:
SELECT ID
COUNT(DISTINCT ColumnA) as CA,
COUNT(DISTINCT ColumnB) as CB,
COUNT(DISTINCT ColumnC) as CC
FROM table1
GROUP BY ID
Any ideas?
You can use subqueries. Assuming you have a table where each id occurs once:
select (select count(*)
from (select columnA
from table1 t1
where t1.id = t.id
group by columnA
) as a
) as num_a,
(select count(*)
from (select columnB
from table1 t1
where t1.id = t.id
group by columnB
) as b
) as num_b,
(select count(*)
from (select columnC
from table1 t1
where t1.id = t.id
group by columnC
) as c
) as num_c
from <table with ids> as t;
I'm not sure if you'll think this is "viable".
EDIT:
This makes it even more complicated . . . it suggests that MS Access doesn't support correlation clauses more than one level deep (might you consider switching to another database?).
In any case, the brute force way:
select a.id, a.numA, b.numB, c.numC
from ((select id, count(*) as numA
from (select id, columnA
from table1 t1
group by id, columnA
) as a
) as a inner join
(select id, count(*) as numB
from (select id, columnB
from table1 t1
group by id, columnB
) as b
) as b
on a.id = b.id
) inner join
(select id, count(*) as numC
from (select id, columnC
from table1 t1
group by id, columnC
) as c
) c
on c.id = a.id;

Select group with maximum value of aggregate function

I have this query
select dca_sif, maxupp
from (select d.sifra dca_sif , sum(col1) as maxup
from tableD d join tablev v on d.id=v.d_id
join tableT t on v.id=t.v_id
group by d.sif
order by maxup desc
)
where rownum=1;
This returns first value, but If I have more groups with same maxup, how can I return all of them?
If I delete rownum=1, it returns all groups and coresponding maxup.
I want to return max(sum(col1)) but this cause error. Database is oracle.
You can try somthing like this:-
SELECT dca_sif, MAX(maxup)
FROM (SELECT d.sifra dca_sif , SUM(col1) AS maxup
FROM tableD d JOIN tablev v ON d.id=v.d_id
JOIN tableT t ON v.id=t.v_id
GROUP BY d.sif
ORDER BY maxup DESC
)
WHERE ROWNUM=1
GROUP BY dca_sif;
This might be helpful to you.
You can combine your select with the same select, but without the rownum limitation. Than join on the max id. There are many tables in your statement, so it is difficult to test for me, but this should work:
SELECT count(qry2.dcasif) || ' groups', sum( qry2.maxup2)
FROM ( SELECT dca_sif, MAX (maxup) AS maxup1
FROM ( SELECT d.sifra dca_sif, SUM (col1) AS maxup
FROM tabled d
JOIN tablev v ON d.id = v.d_id
JOIN tablet t ON v.id = t.v_id
GROUP BY d.sif
ORDER BY maxup DESC)
WHERE ROWNUM = 1
GROUP BY dca_sif) qry1
, ( SELECT dca_sif, MAX (maxup) AS maxup2
FROM ( SELECT d.sifra dca_sif, SUM (col1) AS maxup
FROM tabled d
JOIN tablev v ON d.id = v.d_id
JOIN tablet t ON v.id = t.v_id
GROUP BY d.sif
ORDER BY maxup DESC)
GROUP BY dca_sif) qry2
WHERE qry1.maxup1 = qry2.maxup2
SELECT dca_sif, maxup
FROM
(SELECT a.*,rank() over(order by maxup desc)as Rank
FROM (SELECT d.sifra dca_sif , SUM(col1) AS maxup
FROM tableD d JOIN tablev v ON d.id=v.d_id
JOIN tableT t ON v.id=t.v_id
GROUP BY d.sif
)a)
WHERE Rank=1

SELECT Row Values WHERE MAX() is Column Value In GROUP BY Query

How can I select like this? Can I create a User defined Aggregate Function
SELECT Max(A),(SELECT TOP 1 FROM TheGroup Where B=Max(A)) FROM MyTable
where MyTable as Shown Below
A B C
--------------
1 2 S
3 4 S
4 5 T
6 7 T
I want a Query Like this
SELECT MAX(A),(B Where A=Max(A)),C FROM MYTable GROUP BY C
I'm Expecting the result as below
MAX(A) Condition C
-----------------------
3 4 S
6 7 T
SELECT A,B,C FROM
(SELECT *, ROW_NUMBER() OVER (PARTITION BY C ORDER BY A DESC) RN FROM MyTable)
WHERE RN = 1
(this query will always return only one row per C value)
OR
WITH CTE_Group AS
(
SELECT C, MAX(A) AS MaxA
FROM MyTable
GROUP BY C
)
SELECT g.MaxA, t.B, g.C
FROM MyTable t
INNER JOIN CTE_Group g ON t.A = g.MaxA AND t.C = g.C
(if there are multiple rows that have same Max(A) value - this query will return all of them)
Try Following Query :
SELECT TABLE1.A , TABLE2.B , TABLE1.C
FROM
(
SELECT MAX(A) AS A,C
FROM MYTable
GROUP BY C
) AS TABLE1
INNER JOIN
(
SELECT *
FROM MYTable
) AS TABLE2 ON TABLE1.A = TABLE2.A
SQLFIDDLE
you can do it by simple join query . join query always run faster then In query . Join query run only one time at the time of execution of the query . we can archive same result by using IN query .
SELECT t1.*
FROM YourTable t1
Left Outer Join YourTable t2 on t1.C=t2.C AND t1.A < t2.A
WHERE t2.A is null
how about this:
SELECT *
FROM MyTable
WHERE A IN (SELECT MAX(A) FROM MyTable GROUP BY C)
SELECT Max(A)
FROM MyTable
Where B=(SELECT Max(A) FROM MyTable)
update:
SELECT *
FROM MyTable
Where B=(SELECT Max(A) FROM MyTable)
update 2:
SELECT DISTINCT A, B
FROM MyTable
Where A=(SELECT Max(A) FROM MyTable GROUP BY C)
update 3:
ok, I think I understand what you're looking for now.. How about this:
SELECT *
FROM MyTable
Where A in (SELECT Max(A) FROM MyTable GROUP BY C)
WITH
cte AS
(
SELECT
ROW_NUMBER() OVER (ORDER BY cola desc) AS Rno,
*
FROM
tbl
)
SELECT top 1
cola,colb
FROM
cte
order by Rno
Then try it:
WITH
cte AS
(
SELECT
ROW_NUMBER() OVER (PARTITION BY col3 ORDER BY col1 desc) AS Rno,
*
FROM
tbl
)
SELECT
col1,col2,col3
FROM
cte
WHERE Rno=1