PostgreSQL: Combining these two queries - sql

My first query returns the first 10 comments, whose parent_id's are null
SELECT comment_id FROM comments WHERE thread_id = $1
AND parent_id is NULL
ORDER BY upvoted DESC FETCH FIRST 10 ROW ONLY
How would I use each comment id, to perform a recursive query?
My current method is by storing the returned comment_id's in an array
[1, 2, 3, 4, 10, 14, 15, 18, 19, 20]
and then using a for loop to perform a recursive query on each id
var query =
`With RECURSIVE cte AS
(
SELECT * FROM comments WHERE comment_id = $1
UNION
SELECT t.*
From comments t
JOIN cte rt ON rt.comment_id = t.parent_id
)
SELECT * FROM cte`;
for(int i = 0; i < array.size(); i++){
client.query(query, array[i])
. . .
}
So I am wondering how I could do this in a single query instead of storing the ids in an array and then recursive querying each individual id?

You can use your first statement as the non-recursive part of your recursive query. But due to the order by that you need, you need to put that query between parentheses:
With RECURSIVE cte AS
(
(
SELECT *
FROM comments
WHERE thread_id = $1
AND parent_id is NULL
ORDER BY upvoted DESC
FETCH FIRST 10 ROW ONLY
)
UNION
SELECT t.*
From comments t
JOIN cte rt ON rt.comment_id = t.parent_id
)
SELECT * FROM cte`
Alternatively you can do that in a separate CTE:
With RECURSIVE root_nodes AS
(
SELECT *
FROM comments
WHERE thread_id = $1
AND parent_id is NULL
ORDER BY upvoted DESC
FETCH FIRST 10 ROW ONLY
), cte as (
select *
from root_nodes
UNION
SELECT t.*
From comments t
JOIN cte rt ON rt.comment_id = t.parent_id
)
SELECT *
FROM cte;
Note the recursive keyword belongs to the WITH even when the first CTE is not the recursive one.

Related

Linked lists: query first and last element of chained lists stored in SQL table

I have an SQL table with "lines" representing elements of chained lists.
I could for example have the following records:
(id, previous_id)
------------------
(1, NULL)
(2, NULL)
(3, 2)
(4, 3)
(5, NULL)
(6, 4)
(7, 5)
We have 3 lists in this table:
(1,)
(2,3,4,6)
(5,7)
I would like to find the last element of each list and the number of elements in the list.
The query I am looking for would output:
last, len
1, 1
6, 4
7, 2
Is this possible in SQL?
You can use a recursive CTE:
with recursive cte as (
select l.previous_id as id, id as last
from lines l
where not exists (select 1 from lines l2 where l2.previous_id = l.id)
union all
select l.previous_id, cte.last
from cte join
lines l
on cte.id = l.id
)
select cte.last, count(*)
from cte
group by cte.last;
Here is a db<>fiddle.
WITH RECURSIVE cte AS (
SELECT id AS first, id AS last, 1 as len
FROM lines
WHERE previous_id IS NULL
UNION ALL
SELECT c.first, l.id, len + 1
FROM cte c
JOIN lines l ON l.previous_id = c.last
)
SELECT DISTINCT ON (first)
last, len -- , first -- also?
FROM cte
ORDER BY first, len DESC;
db<>fiddle here
Produces your result exactly.
If yo also want the first element like your title states, that's readily available.
Here is an implementation in Microsoft SQL Server 2016 db<>fiddle
WITH chain
AS (SELECT l.id AS [first],
l.id AS [last],
1 AS [len]
FROM lines AS l
WHERE l.previous_id IS NULL
UNION ALL
SELECT c.[first],
l.id,
c.[len] + 1 AS [len]
FROM chain AS c
JOIN lines AS l ON l.previous_id = c.[last]),
result
AS (SELECT DISTINCT
c.[first],
c.[last],
c.[len],
ROW_NUMBER() OVER(PARTITION BY c.[first] ORDER BY c.[len] DESC) AS rn
FROM chain as c)
SELECT r.[first],
r.[last],
r.[len]
FROM result AS r
WHERE r.rn = 1
ORDER BY r.[first];

select N-1 records for update

I have a query where I want to update n-1 records from result set. Can this be done without loops?
If my query is like this:
with cte(id, count)
as
(
select e.id, count(*) as count
from data
where id in (multiple values)
group by id
having count(*) >1
)
Now I want to update the rows in another table with the resulting id's but only any n-1 rows for each id value from the above query. Something like this:
update top( count-1 or n-1) from data2
inner join cte on data2.id = cte.id
set somecolumn = 'some value'
where id in (select id from cte)
The id column is not unique. There are multiple rows with the same id values in table data 2.
This query will do what you want. It uses two CTEs; the first generates the list of eligible id values to update, and the second generates row numbers for id values in data2 which match those in the first CTE. The second CTE is then updated if the row number is greater than 1 (so only n-1 rows get updated):
with cte(id, count) as (
select id, count(*) as count
from data
where id in (2, 3, 4, 6, 7)
group by id
having count(*) >1
),
cte2 as (
select d.id, d.somecolumn,
row_number() over (partition by d.id order by rand()) as rn
from data2 d
join cte on cte.id = d.id
)
update cte2
set somecolumn = 'some value'
where rn > 1
Note I've chosen to order row numbers randomly, you might have some other scheme for deciding which n-1 values you want to update (e.g. ordered by id, or ...).
Is this what you're looking for? The CTE identifies ALL of the source rows, but the WHEREclause in the UPDATE statement limits the updates to n-1.
WITH cte AS
(
SELECT
id,
ROW_NUMBER() OVER (ORDER BY (SELECT 0)) AS RowNum
FROM data
)
UPDATE t
SET t.<whatever> = <whateverElse>
FROM
otherTable AS t
JOIN
cte AS c
ON t.id = c.id
WHERE
c.RowNum > 1;
I believe this would work just fine
;with cte(id, count)
as
(
select e.id, count(*) as count
from data
where id in (multiple values)
group by id
having count(*) >1
)
update data
set soemcolumn = 'some value'
from data join cte on cte.id = data.id
;

Postgres - Combining these two queries

My first query returns the first 10 message ids:
SELECT * from message WHERE $1 IN (creator_id, recipient_id)
AND parent_id is null
ORDER BY date_posted
DESC FETCH FIRST 10 ROW ONLY
[1, 2, 4, 6, 10, 14, 17, 18, 19, 23]
Using each message_id, my second query gets the message_id with the MAX value of each of their Linear trees:
With RECURSIVE recursetree AS
(
SELECT * FROM message WHERE message_id = $1
UNION ALL
SELECT t.*
From message t
JOIN recursetree rt ON rt.message_id = t.parent_id
)
SELECT * from recursetree where parent_id is not distinct
from (select max(parent_id) from recursetree)
Combining these two queries only gets one row which is the max value of the linear tree of the last message_id of 23. How would I get all rows?
with RECURSIVE cte as
(
(
SELECT * from message WHERE $1 IN (creator_id, recipient_id)
AND parent_id is null ORDER BY date_posted DESC
FETCH FIRST 10 ROW ONLY
)
UNION
SELECT t.*
From message t
JOIN cte rt ON rt.message_id = t.parent_id
)
SELECT * FROM cte where parent_id is not distinct
from (select max(parent_id) from cte)
If you want to get the last message of each,
I think you should extract data without children.
select m.*
from message m
left join message child on m.message_id = child.parent_id
where child.message_id is null

How to join two tables with the same number of rows in SQLite?

I have almost the same problem as described in this question. I have two tables with the same number of rows, and I would like to join them together one by one.
The tables are ordered, and I would like to keep this order after the join, if it is possible.
There is a rowid based solution for MSSql, but in SQLite rowid can not be used if the table is coming from a WITH statement (or RECURSIVE WITH).
It is guaranteed that the two tables have the exact same number of rows, but this number is not known beforehand. It is also important to note, that the same element may occur more than twice. The results are ordered, but none of the columns are unique.
Example code:
WITH
table_a (n) AS (
SELECT 2
UNION ALL
SELECT 4
UNION ALL
SELECT 5
),
table_b (s) AS (
SELECT 'valuex'
UNION ALL
SELECT 'valuey'
UNION ALL
SELECT 'valuez'
)
SELECT table_a.n, table_b.s
FROM table_a
LEFT JOIN table_b ON ( table_a.rowid = table_b.rowid )
The result I would like to achieve is:
(2, 'valuex'),
(4, 'valuey'),
(5, 'valuez')
SQLFiddle: http://sqlfiddle.com/#!5/9eecb7/6888
This is quite complicated in SQLite -- because you are allowing duplicates. But you can do it. Here is the idea:
Summarize the table by the values.
For each value, get the count and offset from the beginning of the values.
Then use a join to associate the values and figure out the overlap.
Finally use a recursive CTE to extract the values that you want.
The following code assumes that n and s are ordered -- as you specify in your question. However, it would work (with small modifications) if another column specified the ordering.
You will notice that I have included duplicates in the sample data:
WITH table_a (n) AS (
SELECT 2 UNION ALL
SELECT 4 UNION ALL
SELECT 4 UNION ALL
SELECT 4 UNION ALL
SELECT 5
),
table_b (s) AS (
SELECT 'valuex' UNION ALL
SELECT 'valuey' UNION ALL
SELECT 'valuey' UNION ALL
SELECT 'valuez' UNION ALL
SELECT 'valuez'
),
a as (
select a.n, count(*) as a_cnt,
(select count(*) from table_a a2 where a2.n < a.n) as a_offset
from table_a a
group by a.n
),
b as (
select b.s, count(*) as b_cnt,
(select count(*) from table_b b2 where b2.s < b.s) as b_offset
from table_b b
group by b.s
),
ab as (
select a.*, b.*,
max(a.a_offset, b.b_offset) as offset,
min(a.a_offset + a.a_cnt, b.b_offset + b.b_cnt) - max(a.a_offset, b.b_offset) as cnt
from a join
b
on a.a_offset + a.a_cnt - 1 >= b.b_offset and
a.a_offset <= b.b_offset + b.b_cnt - 1
),
cte as (
select n, s, offset, cnt, 1 as ind
from ab
union all
select n, s, offset, cnt, ind + 1
from cte
where ind < cnt
)
select n, s
from cte
order by n, s;
Here is a DB Fiddle showing the results.
I should note that this would be much simpler in almost any other database, using window functions (or perhaps variables in MySQL).
Since the tables are ordered, you can add row_id values by comparing n values.
But still the best way in order to get better performance would be inserting the ID values while creating the tables.
http://sqlfiddle.com/#!5/9eecb7/7014
WITH
table_a_a (n, id) AS
(
WITH table_a (n) AS
(
SELECT 2
UNION ALL
SELECT 4
UNION ALL
SELECT 5
)
SELECT table_a.n, (select count(1) from table_a b where b.n <= table_a.n) id
FROM table_a
) ,
table_b_b (n, id) AS
(
WITH table_a (n) AS
(
SELECT 'valuex'
UNION ALL
SELECT 'valuey'
UNION ALL
SELECT 'valuez'
)
SELECT table_a.n, (select count(1) from table_a b where b.n <= table_a.n) id
FROM table_a
)
select table_a_a.n,table_b_b.n from table_a_a,table_b_b where table_a_a.ID = table_b_b.ID
or convert the input set to comma separated list and try like this:
http://sqlfiddle.com/#!5/9eecb7/7337
WITH RECURSIVE table_b( id,element, remainder ) AS (
SELECT 0,NULL AS element, 'valuex,valuey,valuz,valuz' AS remainder
UNION ALL
SELECT id+1,
CASE
WHEN INSTR( remainder, ',' )>0 THEN
SUBSTR( remainder, 0, INSTR( remainder, ',' ) )
ELSE
remainder
END AS element,
CASE
WHEN INSTR( remainder, ',' )>0 THEN
SUBSTR( remainder, INSTR( remainder, ',' )+1 )
ELSE
NULL
END AS remainder
FROM table_b
WHERE remainder IS NOT NULL
),
table_a( id,element, remainder ) AS (
SELECT 0,NULL AS element, '2,4,5,7' AS remainder
UNION ALL
SELECT id+1,
CASE
WHEN INSTR( remainder, ',' )>0 THEN
SUBSTR( remainder, 0, INSTR( remainder, ',' ) )
ELSE
remainder
END AS element,
CASE
WHEN INSTR( remainder, ',' )>0 THEN
SUBSTR( remainder, INSTR( remainder, ',' )+1 )
ELSE
NULL
END AS remainder
FROM table_a
WHERE remainder IS NOT NULL
)
SELECT table_b.element, table_a.element FROM table_b, table_a WHERE table_a.element IS NOT NULL and table_a.id = table_b.id;
SQL
SELECT a1.n, b1.s
FROM table_a a1
LEFT JOIN table_b b1
ON (SELECT COUNT(*) FROM table_a a2 WHERE a2.n <= a1.n) =
(SELECT COUNT(*) FROM table_b b2 WHERE b2.s <= b1.s)
Explanation
The query simply counts the number of rows up until the current one for each table (based on the ordering column) and joins on this value.
Demo
See SQL Fiddle demo.
Assumptions
A single column in used for the ordering in each table. (But the query could easily be modified to allow multiple ordering columns).
The ordering values in each table are unique.
The values in the ordering column aren't necessarily the same between the two tables.
It is known that table_a contains either the same or more rows than table_b. (If this isn't the case then a FULL OUTER JOIN would need to be emulated since SQLite doesn't provide one.)
No further changes to the table structure are allowed. (If they are, it would be more efficient to have pre-populated columns for the ordering).
Either way...
Use something like
WITH
v_table_a (n, rowid) AS (
SELECT 2, 1
UNION ALL
SELECT 4, 2
UNION ALL
SELECT 5, 3
),
v_table_b (s, rowid) AS (
SELECT 'valuex', 1
UNION ALL
SELECT 'valuey', 2
UNION ALL
SELECT 'valuez', 3
)
SELECT v_table_a.n, v_table_b.s
FROM v_table_a
LEFT JOIN v_table_b ON ( v_table_a.rowid = v_table_b.rowid );
for "virtual" tables (with WITH or without),
WITH RECURSIVE vr_table_a (n, rowid) AS (
VALUES (2, 1)
UNION ALL
SELECT n + 2, rowid + 1 FROM vr_table_a WHERE rowid < 3
)
, vr_table_b (s, rowid) AS (
VALUES ('I', 1)
UNION ALL
SELECT s || 'I', rowid + 1 FROM vr_table_b WHERE rowid < 3
)
SELECT vr_table_a.n, vr_table_b.s
FROM vr_table_a
LEFT JOIN vr_table_b ON ( vr_table_a.rowid = vr_table_b.rowid );
for "virtual" tables using recursive WITHs (in this example the values are others then yours, but I guess you get the point) and
CREATE TABLE p_table_a (n INT);
INSERT INTO p_table_a VALUES (2), (4), (5);
CREATE TABLE p_table_b (s VARCHAR(6));
INSERT INTO p_table_b VALUES ('valuex'), ('valuey'), ('valuez');
SELECT p_table_a.n, p_table_b.s
FROM p_table_a
LEFT JOIN p_table_b ON ( p_table_a.rowid = p_table_b.rowid );
for physical tables.
I'd be careful with the last one though. A quick test shows, that the numbers of rowid are a) reused -- when some rows are deleted and others are inserted, the inserted rows get the rowids from the old rows (i.e. rowid in SQLite isn't unique past the lifetime of a row, whereas e.g. Oracle's rowid AFAIR is) -- and b) corresponds to the order of insertion. But I don't know and didn't find a clue in the documentation, if that's guaranteed or is subject to change in other/future implementations. Or maybe it's just a mere coincidence in my test environment.
(In general physical order of rows may be subject to change (even within the same database using the same DMBS as a result of some reorganization) and is therefore no good choice to rely on. And it's not guaranteed, a query will return the result ordered by physical position in the table as well (it might use the order of some index instead or have a partial result ordered some other way influencing the output's order). Consider designing your tables using common (sort) keys in corresponding rows for ordering and to join on.)
You can create temp tables to carry CTE data row. then JOIN them by sqlite row_id column.
CREATE TEMP TABLE temp_a(n integer);
CREATE TEMP TABLE temp_b(n VARCHAR(255));
WITH table_a(n) AS (
SELECT 2 n
UNION ALL
SELECT 4
UNION ALL
SELECT 5
UNION ALL
SELECT 5
)
INSERT INTO temp_a (n) SELECT n FROM table_a;
WITH table_b (n) AS
(
SELECT 'valuex'
UNION ALL
SELECT 'valuey'
UNION ALL
SELECT 'valuez'
UNION ALL
SELECT 'valuew'
)
INSERT INTO temp_b (n) SELECT n FROM table_b;
SELECT *
FROM temp_a a
INNER JOIN temp_b b on a.rowid = b.rowid;
sqlfiddle:http://sqlfiddle.com/#!5/9eecb7/7252
It is possible to use the rowid inside a with statement but you need to select it and make it available to the query using it.
Something like this:
with tablea AS (
select id, rowid AS rid from someids),
tableb AS (
select details, rowid AS rid from somedetails)
select tablea.id, tableb.details
from
tablea
left join tableb on tablea.rid = tableb.rid;
It is however as they have already warned you a really bad idea. What if the app breaks after inserting in one table but before the other one? What if you delete an old row? If you want to join two tables you need to specify the field to do so. There are so many things that could go wrong with this design. The most similar thing to this would be an incremental id field that you would save in the table and use in your application. Even simpler, make those into one table.
Read this link for more information about the rowid: https://www.sqlite.org/lang_createtable.html#rowid
sqlfiddle: http://sqlfiddle.com/#!7/29fd8/1
It is possible to use the rowid inside a with statement but you need to select it and make it available to the query using it. Something like this:
with tablea AS (select id, rowid AS rid from someids),
tableb AS (select details, rowid AS rid from somedetails)
select tablea.id, tableb.details
from
tablea
left join tableb on tablea.rid = tableb.rid;
The problem statement indicates:
The tables are ordered
If this means that the ordering is defined by the ordering of the values in the UNION ALL statements, and if SQLite respects that ordering, then the following solution may be of interest because, apart from small tweaks to the last three lines of the sample program, it adds just two lines:
A(rid,n) AS (SELECT ROW_NUMBER() OVER ( ORDER BY 1 ) rid, n FROM table_a),
B(rid,s) AS (SELECT ROW_NUMBER() OVER ( ORDER BY 1 ) rid, s FROM table_b)
That is, table A is table_a augmented with a rowid, and similarly for table B.
Unfortunately, there is a caveat, though it might just be the result of my not having found the relevant specifications. Before delving into that, however, here is the full proposed solution:
WITH
table_a (n) AS (
SELECT 2
UNION ALL
SELECT 4
UNION ALL
SELECT 5
),
table_b (s) AS (
SELECT 'valuex'
UNION ALL
SELECT 'valuey'
UNION ALL
SELECT 'valuez'
),
A(rid,n) AS (SELECT ROW_NUMBER() OVER ( ORDER BY 1 ) rid, n FROM table_a),
B(rid,s) AS (SELECT ROW_NUMBER() OVER ( ORDER BY 1 ) rid, s FROM table_b)
SELECT A.n, B.s
FROM A LEFT JOIN B
ON ( A.rid = B.rid );
Caveat
The proposed solution has been tested against a variety of data sets using sqlite version 3.29.0, but whether or not it is, and will continue to be, "guaranteed" to work is unclear to me.
Of course, if SQLite offers no guarantees with respect to the ordering of the UNION ALL statements (that is, if the question is based on an incorrect assumption), then it would be interesting to see a well-founded reformulation.

How do I get records before and after given one?

I have the following table structure:
Id, Message
1, John Doe
2, Jane Smith
3, Error
4, Jane Smith
Is there a way to get the error record and the surrounding records? i.e. find all Errors and the record before and after them.
;WITH numberedlogtable AS
(
SELECT Id,Message,
ROW_NUMBER() OVER (ORDER BY ID) AS RN
FROM logtable
)
SELECT Id,Message
FROM numberedlogtable
WHERE RN IN (SELECT RN+i
FROM numberedlogtable
CROSS JOIN (SELECT -1 AS i UNION ALL SELECT 0 UNION ALL SELECT 1) n
WHERE Message='Error')
WITH err AS
(
SELECT TOP 1 *
FROM log
WHERE message = 'Error'
ORDER BY
id
),
p AS
(
SELECT TOP 1 l.*
FROM log
WHERE id <
(
SELECT id
FROM err
)
ORDER BY
id DESC
)
SELECT TOP 3 *
FROM log
WHERE id >
(
SELECT id
FROM p
)
ORDER BY
id
Adapt this routine to pick out your target.
DECLARE #TargetId int
SET #TargetId = 3
select *
from LogTable
where Id in (-- "before"
select max(Id)
from LogTable
where Id < #TargetId
-- target
union all select #TargetId
-- "after"
union all select min(Id)
from LogTable
where Id > #TargetId)
select id,messag from
(Select (Row_Number() over (order by ID)) as RNO, * from #Temp) as A,
(select SubRNO-1 as A,
SubRNO as B,
SubRNO+1 as C
from (Select (Row_Number() over (order by ID)) as SubRNO, * from #Temp) as C
where messag = 'Error') as B
where A.RNO = B.A or A.RNO = B.B or A.RNO = B.C
;WITH Logs AS
(
SELECT ROW_NUMBER() OVER (ORDER BY id), id, message as rownum FROM LogTable lt
)
SELECT curr.id, prev.id, next.id
FROM Logs curr
LEFT OUTER JOIN Logs prev ON curr.rownum+1=prev.rownum
RIGHT OUTER JOIN Logs next ON curr.rownum-1=next.rownum
WHERE curr.message = 'Error'
select id, message from tbl where id in (
select id from tbl where message = "error"
union
select id-1 from tbl where message = "error"
union
select id+1 from tbl where message = "error"
)
Get fixed number of rows before & after target
Using UNION for a simple, high performance query (I found selected answer WITH query above to be extremely slow)
Here is a high performance alternative to the WITH top selected answer, when you know an ID or specific identifier for a given record, and you want to select a fixed number of records BEFORE and AFTER that record. Requires a number field for ID, or something like date that can be sorted ascending / descending.
Example: You want to select the 10 records before and after a specific error was recorded, you know the error ID, and can sort by date or ID.
The following query gets (inclusive) the 1 result above, the identified record itself, and the 1 record below. After the UNION, the results are sorted again in descending order.
SELECT q.*
FROM(
SELECT TOP 2
id, content
FROM
the_table
WHERE
id >= [ID]
ORDER BY id ASC
UNION
SELECT TOP 1
id, content
FROM
the_table
WHERE
id < [ID]
ORDER BY id DESC
) q
ORDER BY q.id DESC