Distinct select on Oracle - sql

What i am trying to do is a simple recommender , must take the biggest weighted top 40 element's node2 element. Calculation for weight comes from (E.WEIGHT * K.GRADE). Now this code succesfully returns top 40 elements. However, i don't want E.NODE2 to return duplicates. POSTGRE SQL allowed me to do SELECT DISTINCT ON (NODE2) E.NODE2 , (E.WEIGHT * K.GRADE). How can i do the same in oracle?
The complete sql query;
SELECT *
FROM (SELECT DISTINCT E.NODE2 , (E.WEIGHT * K.GRADE)
FROM KUAISFAST K, EDGES E
WHERE K.ID = 1 AND K.COURSE_ID = E.NODE1 AND E.NODE2 NOT IN(
SELECT K2.COURSE_ID
FROM KUAISFAST K2
WHERE K2.ID = 1
)
ORDER BY( E.WEIGHT * K.GRADE ) DESC) TEMP
WHERE rownum <= 40

This should solve your problem, altough quite slow
SELECT * FROM
(SELECT *
FROM (SELECT E.NODE2 , max(E.WEIGHT * K.GRADE ) AS MAXDE
FROM KUAISFAST K, EDGES E
WHERE K.ID = 1 AND K.COURSE_ID = E.NODE1 AND E.NODE2 NOT IN(
SELECT K2.COURSE_ID
FROM KUAISFAST K2
WHERE K2.ID = 1
)
GROUP BY E.NODE2 )
ORDER BY MAXDE DESC)
WHERE rownum <= 40

I believe you want something like
SELECT *
FROM (
SELECT
E.NODE2,
(E.WEIGHT * K.GRADE),
ROW_NUMBER() OVER (PARTITION BY E.NODE2 ORDER BY E.WEIGHT * K.GRADE DESC) R
FROM
KUAISFAST K,
EDGES E
WHERE
K.ID = 1 AND
K.COURSE_ID = E.NODE1 AND
E.NODE2 NOT IN
( SELECT K2.COURSE_ID
FROM KUAISFAST K2
WHERE K2.ID = 1
)
ORDER BY (E.WEIGHT * K.GRADE) DESC
) TEMP
WHERE R=1 AND
ROWNUM <= 40

In your subselect, I think you want: MAX(E.WEIGTH * K.GRADE) so that only one value comes back for each E.NODE2.
This means you'll need to GROUP BY E.NODE2 as well.

Related

SQL Unique ID for union all - Sybase

Is it possible to generate a unique ID for the auxiliary table? I am retrieving data from several tables, but I do not know how to create a new ID for the results:
I would like to have an additional column with ID.
I tried to look for several methods, but nothing helped me.
I will be very grateful.
Greetings,
with ct as (
select *
INTO temp_table
from dba.view_NEW_Users_AreaCodes ur
join dba.view_NEW_Customers_SalesTowns ct on ct.CustSalesTerritoryTTID = ur.UserAreaCodeID
where ur.UserType = 'TT'
and ct.CustSalesTerritoryTTID <> 0
union all
select *
from dba.view_NEW_Users_AreaCodes ur
join dba.view_NEW_Customers_SalesTowns ct on ct.CustSalesTerritoryMTID = ur.UserAreaCodeID
where ur.UserType = 'MT'
and ct.CustSalesTerritoryMTID <> 0
union all
select *
from dba.view_NEW_Users_AreaCodes ur
join dba.view_NEW_Customers_SalesTowns ct on ct.CustSalesTerritoryHRCID = ur.UserAreaCodeID
where ur.UserType = 'HRC'
and ct.CustSalesTerritoryHRCID <> 0
union all
select *
from dba.view_NEW_Users_AreaCodes ur
join dba.view_NEW_Customers_SalesTowns ct on ct.CustSalesTerritoryDevID = ur.UserAreaCodeID
where ur.UserType = 'DEV'
and ct.CustSalesTerritoryDevID <> 0
)
select row_number() over (order by newid()) as DATA_ID,
ct.*
from ct;
You could use row_number():
with t as (
< your query here >
)
select row_number() over (order by newid()) as seqnum,
t.*
from t;
newid() is just an arbitrary value that randomizes the numbering. You can use a column there if you prefer a more canonical ordering.

How to get the middle most record(s) from a group of data in sql

create table #middle
(
A INT,
B INT,
C INT
)
INSERT INTO #middle (A,B,C) VALUES (7,6,2),(1,0,8),(9,12,16),(7, 16, 2),(1,12,8), (9,12,16),(9,12,16),(7, 16, 2),(1,12,8), (9,12,16)
;WITH MIDS
AS (SELECT *,
Row_number()
OVER (
ORDER BY a, b, c DESC )AS rn
FROM #middle)
SELECT *
FROM MIDS
WHERE rn <= (SELECT CASE ( Count(*)%2 )
WHEN 0 THEN ( Count(*) / 2 ) + 1
ELSE ( Count(*) / 2 )
END
FROM MIDS) except (SELECT *
FROM MIDS
WHERE rn < (SELECT ( Count(*) / 2 )
FROM MIDS))
The query i have tried works >4 records but not for '3'.Now my question is how should i modify my query so that for 3 records i should get the 2nd record which is the middle most record among them,try to insert only 3 records from above records and help. Thanks in advance.
You can use OFFSET and FETCH
select *
from #middle
order by a, b, c desc
offset (select count(*) / 2 - (case when count(*) % 2 = 0 then 1 else 0 end) from #middle) rows
fetch next (select 2 - (count(*) % 2) from #middle) rows only
There are many ways to get the median in SQL. Here is a simple way:
select h.*
from (select h.*, row_number() over (order by a, b, c desc) as seqnum,
count(*) over () as cnt
from #highest h
) h
where 2 * rn in (cnt, cnt - 1, cnt + 1);
For an even number of records, you will get two rows. You need to decide what you actually want in this case.
How about this:
**EDITED
;WITH MIDS
AS (SELECT *,
Row_number()
OVER (
ORDER BY a, b, c DESC )AS rn
FROM #middle),
Cnt
AS
(SELECT COUNT(*) c, COUNT(*)%2 as rem, COUNT(*)/2 as mid FROM Mids)
SELECT *
FROM MIDS
CROSS APPLY cnt
where (rn >= cnt.mid and rn <= cnt.mid + 1 AND cnt.rem = 0) OR
(cnt.rem <> 0 AND rn = cnt.mid+1)

Does sequence contain 5 numbers that are each one apart solved recursively

This is the data:
create table #t
(ID int)
insert into #t
values
(-2)
,(-1)
-- ,(0)
,(1)
,(2)
,(3)
,(4)
,(7)
,(5)
,(21)
,(22)
,(23)
,(24)
,(25)
,(8);
We want to know if there are 5 numbers within the above sequence that are each 1 apart e.g. 21-22-23-24-25 gives a positive result. So is there an island of 5 anywhere in the list?
None recursively I've got a few possibilities but is there a simple recursive solution?
Or is there a simpler non-recursive solution?
--::::::::::::::
--:: 1. LONG-WINDED
with t as
(
select id,
U = (id+5),
L = (id-5)
from #t
)
, up as
(
select x.id,
cnt = count(*)
from t x
join t y on
(y.id > x.L and y.id <= x.id)
group by x.id
)
, down as --<<MAYBE DOWN IS NOT NEEDED
(
select x.id,
cnt = count(*)
from t x
join t y on
(y.id < x.U and y.id >= x.id)
group by x.id
)
select id from up where cnt >= 5
union all
select id from down where cnt >= 5
Following two are better:
--::::::::::::::
--::
--:: 2. PRETTY!!
SELECT *
FROM #t A
WHERE EXISTS
(
SELECT *
FROM #t B
WHERE (
(A.id + 5) > B.id
AND
A.id <= B.id
)
HAVING COUNT(*) >=5
)
--::::::::::::::
--::
--:: 3. PRETTY PRETTY!!
--::
SELECT ID
FROM #t A
CROSS APPLY
(
SELECT cnt = COUNT(*)
FROM #t B
WHERE (A.id + 5) > B.id AND A.id <= B.id
) C
WHERE C.cnt>=5
Following used this reference to Itzak article
--::::::::::::::
--::
--:: 4. One of the Windowed functions
--::
WITH x AS
(
SELECT ID,
y = LAG(ID,4) OVER(ORDER BY ID),
dif = ID - LAG(ID,4) OVER(ORDER BY ID)
FROM #t A
)
SELECT ID,y
FROM x
WHERE dif = 4
Yes, there is a much simpler solution. Take the difference between the numbers and an increasing sequence of numbers. If the numbers are consecutive, the difference is constant. So, you can do:
select grp, count(*) as num_in_sequence, min(id) as first_id, max(id) as last_id
from (select t.*,
(id - row_number() over (order by id)) as grp
from #t t
) t
group by grp
having count(*) >= 5;
EDIT:
I think this is the simplest of all. One window function and a comparison:
select t.*
from (select t.*, lead(id, 4) over (order by id) as id4
from #t
) t
where id4 - id = 4;
This does make the assumption that there are no duplicates in the ids, which is true of the OP data.
As I look further, this is the last solution in the OP. Kudos!

Oracle: Select at least one record foreach cluster

Look at the following query:
SELECT *
FROM ENI_FLUSSI_HUB c1
WHERE flh_tipo_processo_cod IN ('VA', 'NUOVA_ATT_ENI')
AND rownum < 10
It simply extracts just some VA. I need the extract some VA and some NUOVA_ATT_ENI.
What is the most elegant way to do it?
You can do it like this:
SELECT *
FROM ENI_FLUSSI_HUB c1
WHERE flh_tipo_processo_cod = 'VA'
AND rownum < 5
UNION
SELECT *
FROM ENI_FLUSSI_HUB c1
WHERE flh_tipo_processo_cod = 'NUOVA_ATT_ENI'
AND rownum < 5
Is there aren't any duplicate values, you can use UNION ALL to perform faster:
SELECT *
FROM ENI_FLUSSI_HUB c1
WHERE flh_tipo_processo_cod = 'VA'
AND rownum < 5
UNION ALL
SELECT *
FROM ENI_FLUSSI_HUB c1
WHERE flh_tipo_processo_cod = 'NUOVA_ATT_ENI'
AND rownum < 5
As #DavidAldridge stated, you can always use a view to make this selection.
Here's a nicely overengineered solution:
with
va as (
select rowid ri,
t.*
from eni_flussi_hub t
where flh_tipo_processo_cod = 'VA'
and rownum <= 1),
nuova_att_eni as (
select rowid ri,
t
from eni_flussi_hub t
where flh_tipo_processo_cod = 'NUOVA_ATT_ENI'
and rownum <=1),
the_rest as (
select *
from eni_flussi_hub c1
where flh_tipo_processo_cod in ('VA','NUOVA_ATT_ENI')
and rowid not in (
select ri
from va
union all
select ri
from nuova_att_eni)
and rownum <=9)
select *
from (
select * from va
union all
select * from nuova_att_eni
union all
select * from the_rest
)
where rownum <= 10
/
I think that what it does is return at least one row for each of the two values of flh_tipe_processo_cod, and "lets nature take its course" with the rest.
You'd have to edit the *'s in the main query to avoid trying to include the rowid from the first two subquery factoring clauses.
Here's another, which I think attempts to bring back five of each but will "top up" the required total if less than five are available for either of the two subquery factoring clauses:
with
va as (
select rownum rn,
t.*
from eni_flussi_hub t
where flh_tipo_processo_cod = 'VA'
and rownum <= 10),
nuova_att_eni as (
select rownum rn,
t
from eni_flussi_hub t
where flh_tipo_processo_cod = 'NUOVA_ATT_ENI'
and rownum <=10)
select *
from (
select *
from (select * from va
union all
select * from nuova_att_eni)
order by rn asc
)
where rownum <= 10
/
Enjoy!

SQL WITH Statement, Unknown Column in where clause

I ve got the following query which is throwing the following error
Unkown Column 'RowNum'
WITH Employees AS
(
SELECT
(keyTblSp.RANK * 3) AS [Rank],
sp.*,
addr.Street,
addr.PostOfficeBox,
addr.StreetNumber
FROM Employee sp
INNER JOIN
FREETEXTTABLE(Employee, *, 'something', 1000) AS keyTblSp
ON sp.EmployeeId = keyTblSp.[KEY]
LEFT OUTER JOIN [Address] addr ON addr.EmployeeId = sp.EmployeeId
UNION ALL
SELECT
(keyTblAddr.RANK * 2) AS [Rank],
sp.*,
addr.Street,
addr.PostOfficeBox,
addr.StreetNumber
FROM Employee sp
LEFT OUTER JOIN [Address] addr ON addr.EmployeeId = sp.EmployeeId
INNER JOIN
FREETEXTTABLE([Address], *, 'something', 1000) AS keyTblAddr
ON addr.AddressId = keyTblAddr.[KEY]
)
SELECT ROW_NUMBER() OVER (ORDER BY [Rank] DESC) AS RowNum, *
FROM Employees
WHERE RowNum BETWEEN (1 - 1) * 10 + 1 AND 1 * 10
ORDER BY Rank DESC
This is because aliases are not recognized in WHERE clauses. Instead, use the full query like this:
WHERE ROW_NUMBER() OVER (ORDER BY [Rank] DESC) BETWEEN (1 - 1) * 10 + 1 AND 1 * 10
Try wrpping up your query to get the name usable in the where clause
SELECT
*
FROM
(SELECT
ROW_NUMBER() OVER (ORDER BY [Rank] DESC) AS RowNum
, *
FROM
Employees) AS Results
WHERE
RowNum BETWEEN (1 - 1) * 10 + 1 AND 1 * 10
ORDER BY
Rank
Your WHERE clause cannot refer to a window or aggregate function like ROW_NUMBER(). If you want to filter on the result of ROW_NUMBER(), you need to do so in the HAVING clause:
...
SELECT ROW_NUMBER() OVER (ORDER BY [Rank] DESC) AS RowNum, *
FROM Employees
HAVING RowNum BETWEEN (1 - 1) * 10 + 1 AND 1 * 10
ORDER BY Rank DESC
How about:
select top 10 *
from Employees
order by Rank Desc
Alternatively, does it work without the where rownum clause. (why is your between so tricky?).