Oracle SQL Query - Element containing every element in subquery - sql

I have 3 tables like so :
Document(ID:integer, Title:string)
Keywords(ID:integer, Name:string)
Document_Keywords(DocumentID:integer, KeywordID:integer)
Document_Keywords.DocumentID referencing Document.ID
Document_Keywords.KeywordID referencing Keywords.ID
A document contains [0, n] keywords.
I want to get every Document which Keywords contains at least a set of another Document's Keywords. As so:
Foo, Bar and Fred-> Documents
Foo's keywords: {1, 2, 3}
Bar's keywords: {1, 2, 3, 4}
Fred's keywords: {1, 3, 5}
If we search for all the documents keywords containing Foo's keywords, we get Bar but not Fred.
Here is the query I have so far:
SELECT KeywordID
FROM Document_Keywords DK
JOIN Document D ON D.ID = DK.DocumentID
WHERE D.title = 'Foo'
MINUS
SELECT KeywordID
FROM Document_Keywords
WHERE DocumentID = 1;
It returns an empty table if the Document with ID = 1 keywords contains at least every keywords of Foo's.
I can't find any other ways to solve this probleme as I can only use Oracle SQL to answer it.

If you want to get keywords with documents:
SELECT KeywordID, D1.ID DOC_ID, D1.Title
FROM Document_Keywords DK1
JOIN Document D1
on DK1.DocumentID = D1.ID
WHERE exists
(select 1
from Document D2
join Document_Keywords DK2
on D2.ID = DK2.DocumentID
where D2.title = 'Foo'
and DK1.KeywordID=DK2.KeywordID
and D1.ID!= D2.ID
);
Full test case with test data and results:
with
Document(ID, Title) as (
select 1, 'Foo' from dual union all
select 2, 'Bar' from dual union all
select 3, 'Fred' from dual
)
,Keywords(ID, Name) as (
select level, 'Key'||level from dual connect by level<=5
)
,Document_Keywords(DocumentID, KeywordID) as (
select 1, column_value from table(sys.odcinumberlist(1,2,3)) union all -- Foo's keywords: {1, 2, 3}
select 2, column_value from table(sys.odcinumberlist(1,2,3,4)) union all -- Bar's keywords: {1, 2, 3, 4}
select 3, column_value from table(sys.odcinumberlist(1,3,5)) -- Fred's keywords: {1, 3, 5}
)
SELECT KeywordID, D1.ID DOC_ID, D1.Title
FROM Document_Keywords DK1
JOIN Document D1
on DK1.DocumentID = D1.ID
WHERE exists
(select 1
from Document D2
join Document_Keywords DK2
on D2.ID = DK2.DocumentID
where D2.title = 'Foo'
and DK1.KeywordID=DK2.KeywordID
and D1.ID!= D2.ID
);
KEYWORDID DOC_ID TITLE
---------- ---------- -----
1 2 Bar
1 3 Fred
2 2 Bar
3 2 Bar
3 3 Fred
If you want without documents, just list of keywords:
SELECT distinct KeywordID
FROM Document_Keywords DK1
WHERE exists
(select 1
from Document D2
join Document_Keywords DK2
on D2.ID = DK2.DocumentID
where D2.title = 'Foo'
and DK1.KeywordID=DK2.KeywordID
and DK1.DocumentID!= D2.ID
);
Full tests case with the results:
with
Document(ID, Title) as (
select 1, 'Foo' from dual union all
select 2, 'Bar' from dual union all
select 3, 'Fred' from dual
)
,Keywords(ID, Name) as (
select level, 'Key'||level from dual connect by level<=5
)
,Document_Keywords(DocumentID, KeywordID) as (
select 1, column_value from table(sys.odcinumberlist(1,2,3)) union all -- Foo's keywords: {1, 2, 3}
select 2, column_value from table(sys.odcinumberlist(1,2,3,4)) union all -- Bar's keywords: {1, 2, 3, 4}
select 3, column_value from table(sys.odcinumberlist(1,3,5)) -- Fred's keywords: {1, 3, 5}
)
SELECT distinct KeywordID
FROM Document_Keywords DK1
WHERE exists
(select 1
from Document D2
join Document_Keywords DK2
on D2.ID = DK2.DocumentID
where D2.title = 'Foo'
and DK1.KeywordID=DK2.KeywordID
and DK1.DocumentID!= D2.ID
);
KEYWORDID
----------
1
2
3

If I have this right, you want documents whose keywords contain all of Fred's keywords as a submultiset.
Setup (building on Sayan's example):
create or replace type number_tt as table of number;
create table documents(id, title) as
select 1, 'Foo' from dual union all
select 2, 'Bar' from dual union all
select 3, 'Fred' from dual;
create table document_keywords(documentid, keywordid) as
select 1, column_value from table(number_tt(1,2,3)) union all
select 2, column_value from table(number_tt(1,2,3,4)) union all
select 3, column_value from table(number_tt(1,3,5))
Query:
with document_keywords_agg(documentid, title, keywordlist, keywordids) as (
select d.id, d.title
, listagg(dk.keywordid, ', ') within group (order by dk.keywordid)
, cast(collect(dk.keywordid) as number_tt)
from documents d
join document_keywords dk on dk.documentid = d.id
group by d.id, d.title
)
select dk1.documentid, dk1.title, dk1.keywordlist
, dk2.title as subset_title
, dk2.keywordlist as subset_keywords
from document_keywords_agg dk1
join document_keywords_agg dk2
on dk2.keywordids submultiset of dk1.keywordids
where dk2.documentid <> dk1.documentid;
Results:
DOCUMENTID
TITLE
KEYWORDLIST
SUBSET_TITLE
SUBSET_KEYWORDS
2
Bar
1, 2, 3, 4
Foo
1, 2, 3
To extend the example a little, let's add another document 'Dino' containing keywords {1,3,5,9}:
insert all
when rownum = 1 then into documents values (docid, 'Dino')
when 1=1 then into document_keywords values (docid, kw)
select 4 as docid, column_value as kw from table(number_tt(1,3,5,9));
Now the results are:
DOCUMENTID
TITLE
KEYWORDLIST
SUBSET_TITLE
SUBSET_KEYWORDS
2
Bar
1, 2, 3, 4
Foo
1, 2, 3
4
Dino
1, 3, 5, 9
Fred
1, 3, 5
(Add a filter to the where clause if you just want to check one document.)
SQL Fiddle

So, inner joining Document_Keyword to itself on KeywordID gives you the raw materials for what you are looking for, no?
. . .
From Document_Keywords A Inner Join Document_Keywords B On A.KeywordID=B.KeywordID
And A.DocumentID<>B.DocumentID
. . .
Granted, if the same Keyword is in multiple other documents you will get multiple occurrences of A.*, but you can summarize those out with a Group By, or possibly a Distinct clause.
If you need text-y results, you can add Document and Keywords table joins to this on the table A keys.
A query that delivers results in the format you specified above would be:
Select Title, ListAgg(KeywordID,',') Within Group (Order By KeywordID) as KeyWord_IDs
From (
Select D.Title,D.ID,A.KeywordID
From Document_Keywords A Inner Join Document_Keywords B On A.KeywordID=B.KeywordID
And A.DocumentID<>B.DocumentID
Inner Join Document D on D.ID=A.DocumentID
Group By A.DocumentID,A.KeyWordID
)
Group By Title,ID

Related

How to get a recursive tree for a single table element

I have a table of this type
| id | parent_id | | title |
parent_id refers to the id of the same table
I need to get a recursive tree for an element knowing only its parent.
it will be clearer what I mean in the picture
On the picture i need to get recursive parent tree for element E, (ะก id is known) i need get A - C - E tree without B and D and other elements, only for my element E
The nesting can be even greater, I just need to get all the parents in order without unnecessary elements.
This is needed for bread crumbs on my website
How i can do this in PostgreSQL?
Use RECURSIVE query
with recursive rec(id,parent_id, title) as (
select id,parent_id, title from t
where title = 'E'
union all
select t.*
from rec
join t on t.id = rec.parent_id
)
select * from rec
id|parent_id|title|
--+---------+-----+
5| 3|E |
3| 1|C |
1| |A |
Join your table on herself
SELECT t1.title, t2.title as parent, t3.title as great_parent, ...
FROM my_table t1
JOIN my_table t2 on t1.parent_id = t2.id
JOIN my_table t3 on t2.parent_id = t3.id
...
WHERE t1.title = 'curent'
if you don't know how many parent you have, use LEFT JOIN and do as mutch column as needed
thanks to Marmite Bomber
and with a small improvement to know the kinship level :
--drop table if exists recusive_test ;
create table recusive_test (id_parent integer, id integer, title varchar);
insert into recusive_test (id_parent , id , title) values
(1, 2, 'A')
,(2, 3, 'B')
,( 2, 4, 'C')
,( 4, 5, 'D')
,( 3, 6, 'E')
,( 3, 7, 'F')
,( 6, 8, 'G')
,( 6, 9, 'H')
,( 4, 10, 'I')
,( 4, 11, 'J');
WITH RECURSIVE search_tree(id, id_parent, title, step) AS (
SELECT t.id, t.id_parent, t.title ,1
FROM recusive_test t
where title = 'I'
UNION ALL
SELECT t.id, t.id_parent, t.title, st.step+1
FROM recusive_test t, search_tree st
WHERE t.id = st.id_parent
)
SELECT * FROM search_tree ORDER BY step DESC;

How to display null values in IN operator for SQL with two conditions in where

I have this query
select *
from dbo.EventLogs
where EntityID = 60181615
and EventTypeID in (1, 2, 3, 4, 5)
and NewValue = 'Received'
If 2 and 4 does not exist with NewValue 'Received' it shows this
current results
What I want
Ideally you should maintain somewhere a table containing all possible EventTypeID values. Sans that, we can use a CTE in place along with a left join:
WITH EventTypes AS (
SELECT 1 AS ID UNION ALL
SELECT 2 UNION ALL
SELECT 3 UNION ALL
SELECT 4 UNION ALL
SELECT 5
)
SELECT et.ID AS EventTypeId, el.*
FROM EventTypes et
LEFT JOIN dbo.EventLogs el
ON el.EntityID = 60181615 AND
el.NewValue = 'Received'
WHERE
et.ID IN (1,2,3,4,5);

How to find equal subsets?

I have a table with subsets. How to find reader id's with the same subsets as given id? For example:
Input reader = 4
The expected output: reader 1 and 5.
Subsets size is not always = 3 as in the example it can be dynamic. What is correct SQL query?
declare #t table(
reader int not null,
book int,
pages int
)
insert into #t (reader, book, pages)
select 1, 1, 100 union
select 1, 2, 201 union
select 1, 3, 301 union
select 2, 1, 100 union
select 2, 3, 101 union
select 2, 3, 301 union
select 3, 1, 100 union
select 3, 2, 101 union
select 3, 3, 301 union
select 4, 1, 100 union
select 4, 2, 201 union
select 4, 3, 301 union
select 5, 1, 100 union
select 5, 2, 201 union
select 5, 3, 301
select * from #t
This is a bit of a pain, but you can use a self-join:
with t as (
select t.*, count(*) over (partition by reader) as cnt
from #t t
)
select t.reader
from t left join
t t2
on t2.book = t.book and
t2.pages = t.pages and
t2.cnt = t.cnt and
t2.reader = 4
group by t.reader, t.cnt
having count(*) = t.cnt and
count(*) = count(t2.reader);
The left join is needed to avoid a subsetting relationship. That is, having all the books for "4" plus additional books.
This is a generic approach to handle relational division. It checks if set x contains all elements from set y (and perhaps more):
with reqd as (
select book, pages
from #t
where reader = 1
)
select t.reader
from #t as t
inner join reqd on t.book = reqd.book and t.pages = reqd.pages
group by t.reader
having count(reqd.book) = (select count(*) from reqd)

Select Parent having null and not null child

Given 3 tables like:
[Table_Main] ----> [Table_Sub] ----> [Table_Prop]
1-N 0-N
I want to select item in [Table_Main] that :
- Have multiple [Table_Sub].
- with [Table_Sub] lines that have both [Table_Prop] and haven't.
To select those value I use :
SELECT Table_Main.Field_ID
FROM Table_Main
INNER JOIN Table_Sub on Table_Main.Field_ID = Table_Sub.Table_Main_Field_ID
LEFT JOIN Table_Prop on Table_Sub.Field_ID = Table_Prop.Table_Sub_Field_ID
If we rename table Family, Child and Pet. I need family where some childs has pet(s) but some child doesn't.
Family: Id, Name
1, Foo -- Family with 2 childs, one of them has a pet
2, Bar -- Family with 2 childs, 0 pet
3, Abc -- Family with 2 childs, both have pet
Child: Id, Family_Id, Name
1, 1, John -- Child of Foo
2, 1, Joe -- Child of Foo
3, 2, Jane
4, 2, Jessica
5, 3, XXX
6, 3, YYY
Pet: Id, Child_Id, Name
1, 2, FooBar -- Joe's pet
2, 5, Huey
3, 6, Dewey
Expected Result:
1, Foo
Family with less than 2 childs is exclude from the exemple has they can satisfy both constraint:
- Has a child with a pet
- Has a child with no pet.
Table Creation :
CREATE TABLE Family(
1 INTEGER NOT NULL PRIMARY KEY
,Foo VARCHAR(20) NOT NULL
);
INSERT INTO Family(1,Foo) VALUES (1,'Foo');
INSERT INTO Family(1,Foo) VALUES (2,'Bar');
INSERT INTO Family(1,Foo) VALUES (3,'Abc');
CREATE TABLE Child(
Id INTEGER NOT NULL PRIMARY KEY
,Family_Id INTEGER NOT NULL
,Name VARCHAR(20) NOT NULL
);
INSERT INTO Child(Id,Family_Id,Name) VALUES (1,1,'John');
INSERT INTO Child(Id,Family_Id,Name) VALUES (2,1,'Joe');
INSERT INTO Child(Id,Family_Id,Name) VALUES (3,2,'Jane');
INSERT INTO Child(Id,Family_Id,Name) VALUES (4,2,'Jessica');
INSERT INTO Child(Id,Family_Id,Name) VALUES (5,3,'XXX');
INSERT INTO Child(Id,Family_Id,Name) VALUES (6,3,'YYY');
CREATE TABLE Pet(
Id INTEGER NOT NULL PRIMARY KEY
,Family_I INTEGER NOT NULL
,Name VARCHAR(20) NOT NULL
);
INSERT INTO Pet(Id,Family_Id,Name) VALUES (1,2,'FooBar');
INSERT INTO Pet(Id,Family_Id,Name) VALUES (2,5,'Huey');
INSERT INTO Pet(Id,Family_Id,Name) VALUES (3,6,'Dewey');
This will give you desired result.
;with family as
(
select 1 FamilyID, 'Foo' Family union select 2, 'Bar' union select 3, 'ABC'
), child as
(
select 1 ChildID, 1 FamilyID ,'John' ChildName union
select 2, 1, 'Joe' union
select 3, 2, 'Jane' union
select 4, 2, 'Jessica' union
select 5, 3, 'XXX'union
select 6, 3, 'YYY'
), pets as
(
select 1 petid , 2 childid, 'FooBar' pet union
select 2, 5, 'Huey' union
select 3, 6, 'Dewey'
)
SELECT T.FamilyID, Max(Family) Family, MIN(CNT) [Min] , MAX(CNT) [Max] FROM
(
SELECT f.FamilyID, C.ChildID, SUM(case when petid is null then 0 else 1 end) CNT FROM Family F
JOIN Child C ON F.FamilyID = C.FamilyID
LEFT JOIN Pets P ON C.ChildID = P.ChildID
GROUP BY F.FamilyID, C.ChildID
) T JOIN Family F on T.FamilyID = F.FamilyID GROUP BY T.FamilyID
HAVING MIN(CNT) = 0 AND MAX(CNT) > 0
Query
select family.ID, family.name
from family
left join child on family.ID = child.family_id
left join pet on pet.child_ID = child.Id
group by family.name,family.ID
having count(child.id) > 1 and count( pet.id) <>0 and count(child.id) > count( pet.id)
Output
looks like you are close but if I understand right:
With parent as (
select 'Charlie' name from dual union all
select 'Ben' name from dual union all
select 'Bob' name from dual union all
select 'Harry' name from dual
)
,child as (
select 'Ben' parentname, 'Bebbie' name from dual union all
select 'Ben' parentname, 'Tilda' name from dual union all
select 'Bob' parentname, 'Shara' name from dual union all
select 'Bob' parentname, 'Sandra' name from dual
)
,pet as (
select 'Tilda' childname, 'Dog' pet from dual union all
select 'Tilda' childname, 'Cat' pet from dual union all
select 'Shara' childname, 'Bird' pet from dual union all
select 'Shara' childname, 'Snake' pet from dual
)
select pa.name,ch.name,count(pe.pet)
from parent pa
inner join child ch on ch.parentname = pa.name
left join pet pe on pe.childname = ch.name
group by pa.name,ch.name

SQL search and destroy duplicates

I have a table with fields (simplified):
id, fld1, fld2, fld3.
id is a numeric primary key field.
There are duplicates: id differs but fld1, fld2 and fld3 are identical over 2 or more rows. There are also entries where the values occur only once, i.e. non-duplicates, of course.
Of each set of duplicate entries, I want to retain only the entry with the highest ID. I was planning to first list the doomed rows and then to delete them.
My first stab at it was this:
SELECT * FROM tab1 t1 WHERE EXISTS (
SELECT COUNT(*) FROM tab1 t2
WHERE t1.fld1 = t2.fld1 AND t1.fld2 = t2.fld2 AND t1.fld3 = t2.fld3
AND t1.id < MAX(t2.id)
HAVING COUNT(*) > 1
GROUP BY t2.fld1, t2.fld2, t2.fld3)
But (in Oracle) I'm getting a Missing right parenthesis error message. I think this needs a new approach altogether, but my SQL-fu is not up to the task. Help appreciated!
Edit:
With 'real' data fields:
select x.leg_id, x.airline_des, x.flight_nr, x.suffix, x.flight_id_date, x.lt_flight_id_date
from fdb_leg x
join ( select max(t.leg_id) 'max_id',
t.airline_des, t.flight_nr, t.suffix, t.flight_id_date, t.lt_flight_id_date
from fdb_leg t
group by t.airline_des, t.flight_nr, t.suffix, t.flight_id_date, t.lt_flight_id_date
having count(*) > 1) y on y.max_id > x.leg_id
and y.airline_des = x.airline_des and y.flight_nr = x.flight_nr and y.suffix = x.suffix
and y.flight_id_date = x.flight_id_date and x.lt_flight_id_date = y.lt_flight_id_date
Response is:
ORA-00923: FROM keyword not found where expected
Oracle 9i+, Using WITH:
To get the list of doomed entries, use:
WITH keepers AS (
SELECT MAX(t.id) 'max_id',
t.fld1, t.fld2, t.fld3
FROM TABLE_1 t
GROUP BY t.fld1, t.fld2, t.fld3
HAVING COUNT(*) > 1)
SELECT x.id,
x.fld1, x.fld2, x.fld3
FROM TABLE_1 x
JOIN keepers y ON y.max_id > x.id
AND y.fld1 = x.fld1
AND y.fld2 = x.fld2
AND y.fld3 = x.fld3
Non-WITH Equivalent:
To get the list of doomed entries, use:
SELECT x.id,
x.fld1, x.fld2, x.fld3
FROM TABLE_1 x
JOIN (SELECT MAX(t.id) 'max_id',
t.fld1, t.fld2, t.fld3
FROM TABLE_1 t
GROUP BY t.fld1, t.fld2, t.fld3
HAVING COUNT(*) > 1) y ON y.max_id > x.id
AND y.fld1 = x.fld1
AND y.fld2 = x.fld2
AND y.fld3 = x.fld3
You can delete them in one shot, like this:
SQL> create table mytable (id, fld1, fld2, fld3)
2 as
3 select 1, 1, 1, 1 from dual union all
4 select 2, 1, 1, 1 from dual union all
5 select 3, 2, 2, 2 from dual union all
6 select 4, 2, 3, 2 from dual union all
7 select 5, 2, 3, 2 from dual union all
8 select 6, 2, 3, 2 from dual
9 /
Table created.
SQL> delete mytable
2 where id not in
3 ( select max(id)
4 from mytable
5 group by fld1
6 , fld2
7 , fld3
8 )
9 /
3 rows deleted.
SQL> select * from mytable
2 /
ID FLD1 FLD2 FLD3
---------- ---------- ---------- ----------
2 1 1 1
3 2 2 2
6 2 3 2
3 rows selected.
Regards,
Rob.
Ugh, I get it. Scratch that.
This will identify the ID's needed to delete.
Select
fld1
, fld2
, fld3
, Max(ID)
From table_name
Group By
fld1
, fld2
, fld3