Sort JSONB array of strings - sql

I have column which contains strings with jsonb type, for example:
["Stomach Mixed Adenocarcinoma", "Gastric Mixed Adenocarcinoma", "Mixed Breast Carcinoma"]
And I have the following query which gets 'search' as a parameter and returns entities based on similarity with search pattern.
with names as (
select b.id, b.name, b.synonyms,
case
when b.id = :search then 0
else 1 - similarity(s.disease_name, :search)
end as distance
from disease_registry b
cross join lateral jsonb_array_elements_text(
b.synonyms || jsonb_build_array(b.name)
) as s(disease_name)
where (similarity(s.disease_name, :search) > 0.2
or b.id = :search
or :search = '')
)
select n.id, n.name, n.synonyms
from names n
group by n.id, n.name, n.synonyms
order by (case
when n.name ilike '%' || :search || '%'
then 0
else min(n.distance)
end),
n.name
Now i want to sort the synonym array based on similarity with search pattern, and only after that return the results.

with names as (
select b.id,
b.name,
b.synonyms,
case
when b.id = :search then 0
else 1 - similarity(s.disease_name, :search)
end as distance
from disease_registry b
cross join lateral jsonb_array_elements_text(
b.synonyms || jsonb_build_array(b.name)
) as s(disease_name)
where (similarity(s.disease_name, :search) > 0.2
or b.id = :search
or :search = '')
)
select n.id,
n.name,
(
select jsonb_agg(elem order by similarity(elem::text, :search) desc)
from jsonb_array_elements_text(n.synonyms) as elem
) as synonyms
from names n
group by n.id, n.name, n.synonyms
order by (case
when n.name ilike '%' || :search || '%'
then 0
else min(n.distance)
end),
n.name;
This is how I achieved it.

Related

SQL Query with Left Join slows down

Cleaning up some mess in a query that was written couple of years ago and I noticed that the below query is the cause of slowing down the result. I tried to search for alternatives but was not too successful.
WITH cat
AS (SELECT main.category_id main_id,
main.description main_desc,
sub1.category_id sub1_id,
sub1.description sub1_desc,
sub2.category_id sub2_id,
sub2.description sub2_desc,
sub3.category_id sub3_id,
sub3.description sub3_desc,
NVL (
NVL (NVL (sub3.category_id, sub2.category_id),
sub1.category_id),
main.category_id)
lowest
FROM iod.sr_category main
-- We need to insert a temporary null row and include it in the join so the categories which are not leaves are also included
LEFT JOIN
(SELECT category_id, description, parent_id
FROM iod.sr_category
UNION
SELECT NULL, NULL, NULL
FROM DUAL) sub1
ON ( main.category_id = sub1.parent_id
OR sub1.category_id IS NULL)
LEFT JOIN
(SELECT category_id, description, parent_id
FROM iod.sr_category
UNION
SELECT NULL, NULL, NULL
FROM DUAL) sub2
ON ( sub1.category_id = sub2.parent_id
OR sub2.category_id IS NULL)
LEFT JOIN
(SELECT category_id, description, parent_id
FROM iod.sr_category
UNION
SELECT NULL, NULL, NULL
FROM DUAL) sub3
ON ( sub2.category_id = sub3.parent_id
OR sub3.category_id IS NULL)
WHERE main.parent_id IS NULL
AND (main.category_id IN ( :main_category))
AND ( NVL ( :sub_category1, 0) = 0
OR sub1.category_id = :sub_category1)
AND ( NVL ( :sub_category2, 0) = 0
OR sub2.category_id = :sub_category2)
AND ( NVL ( :sub_category3, 0) = 0
OR sub3.category_id = :sub_category3))
SELECT r.sector_report_id,
r.created_date report_created_date --, nvl(f.sch_dep_date_local_port, f.sch_dep_date_gmt) report_search_date
,
c.main_desc,
c.sub1_desc,
c.sub2_desc,
c.sub3_desc,
p.problem_type_code,
p.description,
e.first_name,
e.last_name,
e.employee_id,
cl.description class,
ROW_NUMBER ()
OVER (PARTITION BY r.sector_report_id, cl.class_id
ORDER BY q.question_id)
question_num,
q.label question,
q.data_type,
NVL (
pa.answer,
NVL (
a.VALUE,
NVL (a.free_text,
NVL (a.hours || ':' || a.minutes, 'NO ANSWER'))))
answer,
f.arln_code,
f.flight_number,
f.sch_dep_date_gmt,
f.sch_dep_date_local_port,
f.sch_dep_apt,
f.sch_arv_apt,
f.tail_number,
f.fleet_code,
LISTAGG (
e_cmt.first_name
|| ' '
|| e_cmt.last_name
|| ' '
|| cmt.created_date
|| ':'
|| cmt.comment_text,
' ' || CHR (10))
WITHIN GROUP (ORDER BY cmt.created_date)
comments,
(NVL ( (SELECT COUNT (*)
FROM iod.sr_photograph p
WHERE p.sector_report_id = r.sector_report_id),
0))
AS photo
FROM iod.sr_sector_report r
JOIN cat c ON c.lowest = r.category_id
LEFT JOIN iod.sr_problem_type p
ON p.problem_type_id = r.problem_type_id
LEFT JOIN iod.ops_flight_leg f
ON f.ops_flight_leg_id = r.ops_flight_leg_id
LEFT JOIN iod.employee e ON e.employee_id = r.employee_id
LEFT JOIN iod.sr_class_sector_report csr
ON csr.sector_report_id = r.sector_report_id
LEFT JOIN iod.sr_class cl ON cl.class_id = csr.class_id
LEFT JOIN iod.sr_answer a ON a.sector_report_id = r.sector_report_id
LEFT JOIN iod.sr_predefined_answer pa
ON pa.predefined_answer_id = a.predefined_answer_id
LEFT JOIN iod.sr_question q ON q.question_id = a.question_id
LEFT JOIN iod.sr_comment cmt
ON cmt.sector_report_id = r.sector_report_id
LEFT JOIN iod.employee e_cmt ON e_cmt.employee_id = cmt.employee_id
WHERE (NVL ( :problem_type, 0) = 0 OR p.problem_type_id = :problem_type)
AND TRUNC (f.sch_dep_date_local_port) BETWEEN :from_date AND :TO_DATE
--and cast(nvl(f.sch_dep_date_local_port, f.sch_dep_date_gmt) as date) between :from_date and :to_date
AND (--nvl(:station_from, ' ') = ' ' or
f.sch_dep_apt IN ( :station_from))
AND (--nvl(:station_to, ' ') = ' ' or
f.sch_arv_apt IN ( :station_to))
GROUP BY r.sector_report_id,
r.created_date,
c.main_desc,
c.sub1_desc,
c.sub2_desc,
c.sub3_desc,
p.problem_type_code,
p.description,
e.first_name,
e.last_name,
e.employee_id,
cl.description,
cl.class_id,
q.label,
q.data_type,
q.question_id,
NVL (
pa.answer,
NVL (
a.VALUE,
NVL (a.free_text,
NVL (a.hours || ':' || a.minutes, 'NO ANSWER')))),
f.arln_code,
f.flight_number,
f.sch_dep_date_gmt,
f.sch_dep_date_local_port,
f.sch_dep_apt,
f.sch_arv_apt,
f.tail_number,
f.fleet_code
ORDER BY TRUNC (f.sch_dep_date_local_port) ASC
so the part
subx.category_id = suby.parent_id OR suby.category_id is null
slows down the query. Any ideas?
Without an explain plan or an understanding of the business logic being implemented all we can do is guess. But this does seem like a scenario where sub-query factoring could help:
with sub as (
SELECT category_id, description, parent_id FROM sr_category
UNION
SELECT null, null, null FROM DUAL)
select
....
from
....
LEFT JOIN sub sub1
ON (main.category_id = sub1.parent_id OR sub1.category_id is null)
LEFT JOIN sub sub2
ON (sub1.category_id = sub2.parent_id OR sub2.category_id is null)
LEFT JOIN sub sub3
ON (sub2.category_id = sub3.parent_id OR sub3.category_id is null)
WHERE main.parent_id is null
AND (main.category_id IN (:main_category))
AND (nvl(:sub_category1, 0) = 0 OR sub1.category_id = :sub_category1)
AND (nvl(:sub_category2, 0) = 0 OR sub2.category_id = :sub_category2)
AND (nvl(:sub_category3, 0) = 0 OR sub3.category_id = :sub_category3))
The WITH clause is covered in the Oracle SQL documentation. Find out more

SQL Inner Join AND

i'm having some trouble with an SQL Query...
SELECT
s.searchword AS searchword,
s.id AS id,
COUNT( c.id ) AS searchresult,
s.region AS region
FROM search_words AS s
INNER JOIN company_data AS c ON
c.text LIKE CONCAT( '%', s.searchword, '%' )
AND c.region = s.region
GROUP BY 1 ORDER BY s.date DESC
RESULT
[{"searchword":"wholesale","searchid":"416","searchresult":"31","region":"stockholm"},{"searchword":"Business","searchid":"329","searchresult":"1","region":"stockholm"},{"searchword":"Hospital","searchid":"330","searchresult":"1","region":"stockholm"},{"searchword":"Transportation","searchid":"337","searchresult":"4","region":"stockholm"},{"searchword":"Electronic","searchid":"334","searchresult":"4","region":"stockholm"},{"searchword":"Cars","searchid":"338","searchresult":"3","region":"stockholm"},{"searchword":"Food","searchid":"340","searchresult":"11","region":"stockholm"},{"searchword":"Retail","searchid":"342","searchresult":"8","region":"stockholm"},{"searchword":"Leasing","searchid":"343","searchresult":"1","region":"stockholm"}]
The problem here is...the region column has an empty value for all regions.
I need to collect these too... As you can see, it only collects stockholm.
So I was thinking something like...
SELECT
s.searchword AS searchword,
s.id AS id,
COUNT( c.id ) AS searchresult,
s.region AS region
FROM search_words AS s
INNER JOIN company_data AS c ON
c.text LIKE CONCAT( '%', s.searchword, '%' )
AND c.region = s.region OR s.region = ''
GROUP BY 1 ORDER BY s.date DESC
But it doesnt work :(
Empty "values", or nulls, in SQL aren't real values - they are the lack thereof, and need to be handled explicitly by the is operator. Additionally, and has a higher precedence than or, so you should have all the conditions handling the region in brackets:
SELECT
s.searchword AS searchword,
s.id AS id,
COUNT( c.id ) AS searchresult,
s.region AS region
FROM search_words AS s
INNER JOIN company_data AS c ON
c.text LIKE CONCAT( '%', s.searchword, '%' ) AND
(c.region = s.region OR s.region IS NULL)
GROUP BY 1 ORDER BY s.date DESC
If you region can be NULL or '' (empty value), this is the bullet proof way to check:
COALESCE(s.region, '') = ''
This way you replace NULL with '' before comparing to the empty string.
Based on the information provided so far, you might want to try along
SELECT
s.searchword AS searchword,
s.id AS id,
COUNT( c.id ) AS searchresult,
IFNULL(s.region, 'all') AS region
FROM search_words AS s
INNER JOIN company_data AS c
ON IFNULL(s.region, 'all') = IFNULL(c.region, 'all')
AND c.text LIKE CONCAT( '%', s.searchword, '%' )
GROUP BY 1, 2, 4
ORDER BY s.date DESC
;
See it in action: SQL Fiddle.
Please comment, if and as this requires adjustment / further detail. In particular, feel free to adjust the SQL Fiddle (and provide the link to the updated version).

Returning multiple columns from SELECT nested within CASE MySQL

Is there a better way to do this?
SELECT subs. * ,
CASE subs.member_type
WHEN 'member' THEN
( SELECT CONCAT_WS( ' ', members.first_name, members.last_name )
FROM members
WHERE members.id = subs.member_id)
ELSE
( SELECT members_anon.username
FROM members_anon
WHERE members_anon.id = subs.member_id)
END AS fullname,
CASE subs.member_type
WHEN 'member' THEN
( SELECT members.email
FROM members
WHERE members.id = subs.member_id)
ELSE
( SELECT members_anon.email
FROM members_anon
WHERE members_anon.id = subs.member_id)
END AS email
FROM subs
WHERE subs.item_id =19
AND subs.item_type = 'blog'
LIMIT 0 , 30
Ideally I would like to have only one CASE section that returned name and email from the relevant table.
I would use left outer joins on both tables:
SELECT subs. * ,
CASE subs.member_type
WHEN 'member' THEN CONCAT_WS( ' ', m.first_name, m.last_name )
ELSE ma.username
END AS fullname,
CASE subs.member_type
WHEN 'member' THEN m.email
ELSE ma.email
END AS email
FROM subs
LEFT OUTER JOIN members m on (m.id = subs.member_id)
LEFT OUTER JOIN members_anon ma on (ma.id = subs.member_id)
WHERE subs.item_id =19
AND subs.item_type = 'blog'
LIMIT 0 , 30
Regarding the only one case wish, if you need two different columns on your resultset, you will need two case sentences.
You can't use a single case expression to handle two separate columns...
Use:
SELECT s. *,
CASE s.member_type
WHEN 'member' THEN x.fullname
ELSE y.fullname
END AS fullname,
CASE subs.member_type
WHEN 'member' THEN x.email
ELSE y.email
END AS email
FROM SUBS s
LEFT JOIN (SELECT m.id,
CONCAT_WS( ' ', members.first_name, members.last_name ) AS fullname,
m.email
FROM MEMBERS m) x ON x.id = s.member_id
LEFT JOIN (SELECT ma.id,
ma.username,
ma.email
FROM MEMBERS_ANON ma) y ON y.id = s.member_id
WHERE s.item_id = 19
AND s.item_type = 'blog'
LIMIT 0 , 30

Convert this SQL to LINQ

How do I do this query in linq? All the tables already are list of objects.
This query give points to entities named "Empresas" (Companies) that fills the "Palavras" (Words) criterias.
select x.empresaid, sum(x.pontos)
from (
select a.empresaid, sum(1) as Pontos
from empresa a
inner join Palavras b on a.nome like '%' + b.Palavra + '%'
group by a.empresaid
union all
select a.empresaid, sum(case when c.estabelecimento is null then 0 else 1 end) as Pontos
from empresa a
left join estabelecimentoempresa b on b.empresaid = a.empresaid
left join estabelecimento c on c.estabelecimentoid = b.estabelecimentoid
left join Palavras d on c.estabelecimento like '%' + d.Palavra + '%'
group by a.empresaid
union all
select a.empresaid, sum(case when c.Cozinha is null then 0 else 1 end) as Pontos
from empresa a
left join Cozinhaempresa b on b.empresaid = a.empresaid
left join Cozinha c on c.Cozinhaid = b.Cozinhaid
left join Palavras d on c.Cozinha like '%' + d.Palavra + '%'
group by a.empresaid
) x
group by x.empresaid
order by sum(x.pontos) desc, x.empresaid
I don't think you would be able convert as it is from SQL to LINQ. You could still try this tool that convert SQL to LINQ syntax:
http://www.sqltolinq.com/
The preferable approach is to understand and write the LINQ syntax on your own.

Is it possible to use conditional statements such as if/then/when like this in SQL?

I have an SQL query that has an id field - think of it as a foreign key. I need to make a desicion based on the value of this id field such that:
If the value is less then 3100, run a nested fetch from table B.
If the value is greater then 3100, run a nested fetch from a table C.
The statement looks like this:
Select a.ID, a.SN, a.User_Ident,
(select b.first_name from b where b.ident = a.User_Ident) as 'First Name',
(select b.last_name from b where b.ident = a.User_Ident) as 'Last Name',
from a
where ...
What I would like to accomplish is something like this:
Select a.ID, a.SN, a.User_Ident,
when a.User_Ident > 3100 then
(select b.first_name from b where b.ident = a.User_Ident) as 'First Name',
(select b.last_name from b where b.ident = a.User_Ident) as 'Last Name'
else
(select c.name from c where c.ident = a.User_Ident) as 'Name'
from a
where ....
Is this possible?
UPDATE: Your answers suggested I use left joins. My query already contains several left outer joins, so I don't know how this would work. Here is the complete query:
select
A.Ident,
A.Serial_Number,
A.Category_Ident,
C.Description as Category,
A.Purchase_Order,
A.Manufacturer_Ident,
M.Description as Manufacturer,
A.Hardware_Model,
A.Processor_Quantity,
A.Processor_Speed_Hertz,
A.Memory_Installed_Bytes,
A.Memory_Maximum_Bytes,
A.Memory_Slots_Used,
A.Memory_Slots_Total,
A.Storage_Capacity_Bytes,
A.Video_Memory_Bytes,
A.Screen_Size_Diagonal_Inches,
A.Software_Ident,
S.Software_Title,
A.Account_Ident,
T.Description as Account,
A.User_Ident,
(select Q.dbo.P.user_name from Q.dbo.P where Q.dbo.P.ident = A.User_Ident) as 'User Name',
(select Q.dbo.P.first_name from Q.dbo.P where Q.dbo.P.ident = A.User_Ident) as 'First Name',
(select Q.dbo.P.last_name from Q.dbo.P where Q.dbo.P.ident = A.User_Ident) as 'Last Name',
(select Q.dbo.R.description from Q.dbo.R where Q.dbo.R.ident = (select Q.dbo.P.rank from Q.dbo.P where Q.dbo.P.ident = A.User_Ident)) as 'Rank',
(select Q.dbo.P.phone from Q.dbo.P where Q.dbo.P.ident = A.User_Ident) as 'Phone',
(select Q.dbo.P.smtp_address from Q.dbo.P where Q.dbo.P.ident = A.User_Ident) as 'Email',
(select Q.dbo.O.description from Q.dbo.O where Q.dbo.O.ident = (select Q.dbo.P.organization_ident from Q.dbo.P where Q.dbo.P.ident = A.User_Ident)) as 'Organization',
(select Q.dbo.L.description from Q.dbo.L where Q.dbo.L.ident = (select Q.dbo.P.location_ident from Q.dbo.P where Q.dbo.P.ident = A.User_Ident)) as 'Location',
A.Disposition_Ident,
D.Description as Disposition,
A.Notes,
A.Updated,
A.UpdatedBy,
A.Label,
A.Scanned,
S.Licensed
FROM Assets
left outer join C on A.Category_Ident = C.Ident
left outer join M on A.Manufacturer_Ident = M.Ident
left outer join S on A.Software_Ident = S.Ident
left outer join T on A.Account_Ident = T.Ident
left outer join D on A.Disposition_Ident = D.Ident
WHERE ((T.Description like '%' + #Account + '%') or (A.Account_Ident like '%' + #Account + '%'))
order by Serial_Number
Many ways to skin a cat, but I think this approach is worth a try, using a UNION to combine the results of the 2 different conditions (1 query joined to b for ids > 3100, and another query joined to c for ids <= 3100).
You have to return the same fields (you can't as you indicated you wanted), return 1 "name" field when looking at c when you return 2 fields for the b condition. Hence, in this example, when you join to c, it returns "name" as First Name, and returns a blank Last Name value.
Select a.ID, a.SN, a.User_Ident, b.first_name AS 'First Name', b.last_name AS 'Last Name'
FROM a
JOIN b ON a.User_Ident = b.ident
WHERE (a.User_Ident > 3100)
AND (......)
UNION ALL
Select a.ID, a.SN, a.User_Ident, c.name AS 'First Name', '' AS 'Last Name'
FROM a
JOIN c ON a.User_Ident = c.ident
WHERE (a.User_Ident <= 3100)
AND (......)
I'd accomplish this with two left joins and a case statement:
select a.field1,
case when b.lastname is not null then b.firstname else c.firstname end,
case when b.lastname is not null then b.lastname else c.larstname end
from table1 a
left join table2 b
on a.id = b.id
left join table3 c
on a.id = c.id
Note I used lastname is null filed in both case staments, becasue tyou probably don;t want the first name form one table and the last name form the other and lastname is less likely to be null than the firstname in the actual table.
Union would work(beat me there), or you could use an inline function as well. Like the others have said you must return same set of fields.
A slight variation on HLGEM's answer
select a.field1,
case when b.lastname is not null then b.firstname else c.firstname end,
case when b.lastname is not null then b.lastname else c.larstname end
from table1 a
left join table2 b
on a.User_Ident > 3100 AND a.id = b.id
left join table3 c
on a.User_Ident <= 3100 AND a.id = c.id
CASE can only return a column, not a row, and you can't correlate queries into it afaik. And no matter what, a result set must have a static amount of columns.
I think what you would want would be a result set where if the First Name or Last Name was filled out the Name was null.
a.ID | a.SN | Name | First Name | Last Name
1 | # | Name | null | null
2 | # | null | John | Doe
My take on the outer join solution is:
SELECT
a.id
,a.ssn
,a.user_ident
,case when a.user_ident <= 3100 then c.name else b.lastname end LastName
,case when a.user_ident <= 3100 then '' else b.firstname end FirstName
from a
left outer join b
on b.ident = a.user_ident
left outer join c
on c.ident = a.user_ident
A query cannot vary the number of columns returned (at least not in any system I'm familiar with), so if you have a "case C" situation and only have Name, I would return it as LastName and set FirstName to the empty string. This could be made a NULL, if that fits your application better.