How to perform reference a window function inside current table? - sql

I have this part in a larger query which consume lot of RAM:
TopPerPost as
(
select Id,
CloseReasonTypeId,
Name,
ReasonsPerPost.TotalByCloseReason,
row_number() over(partition by Id order by TotalByCloseReason desc) seq -- Get the most common Id (The most common close Reason)
from ReasonsPerPost
where Name is NOT NULL and TopPerPost.seq=1 -- Remove useless results here, instead of doing it later
)
but I got The multi-part identifier "TopPerPost.seq" could not be bound.
Last detail... I only Use theNamecolumn in a laterINNER JOINof that table.

You can't reference a window function in the where of the same query. Just create a second cte.
with TopPerPost as
(
select Id,
CloseReasonTypeId,
Name,
ReasonsPerPost.TotalByCloseReason,
row_number() over(partition by Id order by TotalByCloseReason desc) seq -- Get the most common Id
from ReasonsPerPost
where Name is NOT NULL
)
, OnlyTheTop as
(
select *
from TopPerPost
where seq = 1
)
Or you can do it like this.
select * from
(
select Id,
CloseReasonTypeId,
Name,
ReasonsPerPost.TotalByCloseReason,
row_number() over(partition by Id order by TotalByCloseReason desc) seq -- Get the most common Id
from ReasonsPerPost
where Name is NOT NULL
) s
where seq = 1
Here is another option that should eliminate the need for so many rows being returned.
select Id,
CloseReasonTypeId,
Name,
s.TotalByCloseReason
from ReasonsPerPost rpp
cross apply
(
select top 1 TotalByCloseReason
from ReasonsPerPost rpp2
where rpp2.Id = rpp.Id
order by TotalByCloseReason desc
) s
where Name is NOT NULL
Attempt #4...this would be a LOT easier with a sql fiddle to work with.
select Id,
CloseReasonTypeId,
Name,
s.TotalByCloseReason
from ReasonsPerPost rpp
inner join
(
select top 1 TotalByCloseReason
from ReasonsPerPost rpp2
where rpp2.Id = rpp.Id
and Name is NOT NULL
order by TotalByCloseReason desc
) s on s.Id = rpp.Id
where Name is NOT NULL

The below might work for your need.
But without looking at the data is hard to tell it will or not.
;with t as
(
Select Id, max(totalbyclosereason) TC from reasonsperpost where name is not null group by id
)
Select T.id,t.tc,c.closereasontypeid,c.name
From t join reasonsperpost c on t.id = c.id and t.tc = c.totalbyclosereason

Related

If Rownumber = 1 and Condition Then Condition

I've got a question about a sql result and how to achieve the following.
In the Screenshot, there is a Rownumber for every ID, and every ID has another column which has a status 'old' or 'processed'. What i want is, if the RN is = 1 and the Status is processed, than all other RN of this ID should also have the status 'processed'.
Is there a possibility to achieve this in sql?
SELECT RN = ROW_NUMBER() OVER (PARTITION BY [NODE_NAME]
ORDER by REPORTING_RELEVANT_STATUS_ID DESC, BILLING_PERIOD DESC)
,[CI_EQUIPMENT_ID] AS ID_PART
,[REPORTING_RELEVANT_STATUS_ID] AS REPORTING_RELEVANT
,[BILLING_PERIOD]
, [NODE_NAME]
FROM Table
Put your query in CTE? then JOIN it with actual table:
;WITH cte AS (
SELECT RN = ROW_NUMBER() OVER (PARTITION BY [NODE_NAME]
ORDER by REPORTING_RELEVANT_STATUS_ID DESC, BILLING_PERIOD DESC)
,[CI_EQUIPMENT_ID] AS ID_PART
,[REPORTING_RELEVANT_STATUS_ID] AS REPORTING_RELEVANT
,[BILLING_PERIOD]
, [NODE_NAME]
FROM Table
)
SELECT t.[CI_EQUIPMENT_ID] AS ID_PART,
CASE WHEN c.RN is NOT NULL THEN c.REPORTING_RELEVANT ELSE t.[REPORTING_RELEVANT_STATUS_ID] END AS REPORTING_RELEVANT,
t.[BILLING_PERIOD],
t.[NODE_NAME]
FROM Table t
LEFT JOIN (
SELECT *
FROM cte
WHERE RN = 1 AND REPORTING_RELEVANT = 'PROCESSED'
) as c
ON c.ID_PART = t.[CI_EQUIPMENT_ID]
Creating common table expression
Updating the CTE values based on RN=1 and Status=Processed. using the Self Join
here:
;with CTE (RN,ID_PART,REPORTING_RELEVANT,BILLING_PERIOD,NODE_NAME) AS
(
SELECT RN = ROW_NUMBER() OVER (PARTITION BY [NODE_NAME]
ORDER by REPORTING_RELEVANT_STATUS_ID DESC, BILLING_PERIOD DESC)
,[CI_EQUIPMENT_ID] AS ID_PART
,[REPORTING_RELEVANT_STATUS_ID] AS REPORTING_RELEVANT
,[BILLING_PERIOD]
, [NODE_NAME]
FROM Table
)
Update CTE1
set CTE1.Status='Processed'
from CTE CTE1 inner join CTE CTE2
on CTE1.ID_PART=CTE2.ID_PART
where CTE2.RN=1 and CTE2.Status='Processed'

Get the rows with first phone number

I need to display 100 members per page. Because of multiple phone numbers of a member, I have to pick the first phone number for each member.
Here is the query which one gets every phone numbers of a member:
SELECT * FROM
(
SELECT
row_number() over(order by(1)) rn,
NAME, PHONE
FROM MEMBERS t0
LEFT OUTER JOIN MEMBER_IDENTITY ON MEMBER_IDENTITY.ID=t0.ID
LEFT JOIN MEMBER_PHONE ON MEMBER_PHONE.MEMBER_ID=t0.ID
WHERE
NAME LIKE 'U%'
ORDER BY NAME ASC
)
WHERE rn >= 0
AND rn <= 100
How can I pick first -or MAX, etc- phone number?
You could make a sub query for retrieving the phone numbers together with their row number per member, and then filter out the first of them:
SELECT *
FROM (
SELECT row_number() OVER (ORDER BY name ASC) rn,
name,
phone
FROM members t0
LEFT JOIN member_identity
ON member_identity.id = t0.id
LEFT JOIN (
SELECT member_id,
phone
row_number() OVER (PARTITION BY member_id ORDER BY (1)) ph_rn
FROM member_phone
) member_phone
ON member_phone.member_id = t0.id
AND ph_rn = 1
WHERE name LIKE 'U%'
ORDER BY name ASC
)
WHERE rn BETWEEN 0 AND 100
I would:
use the same order for determining the rn value as the result set (ORDER BY name ASC), otherwise the order will not be consistent across pages;
use BETWEEN in the outer WHERE condition, although the lower bound condition (0) is not necessary for the first page.
When you switch to MAX you can apply the ROW_NUMBER directly on the name (Windowed Aggregate Functions are calulated after GROUP BY/HAVING):
SELECT * FROM
(
SELECT
row_number() over (ORDER BY NAME ASC) rn,
NAME, MAX(PHONE)
FROM MEMBERS t0
LEFT OUTER JOIN MEMBER_IDENTITY ON MEMBER_IDENTITY.ID=t0.ID
LEFT JOIN MEMBER_PHONE ON MEMBER_PHONE.MEMBER_ID=t0.ID
WHERE NAME LIKE 'U%'
GROUP BY NAME
)
WHERE rn >= 0
AND rn <= 100
or move the aggregation into a Derived Table:
SELECT * FROM
(
SELECT
row_number() over (ORDER BY NAME ASC) rn,
NAME, PHONE
FROM MEMBERS t0
LEFT OUTER JOIN MEMBER_IDENTITY ON MEMBER_IDENTITY.ID=t0.ID
LEFT JOIN
( SELECT MEMBER_ID, MAX(PHONE) AS PHONE
FROM MEMBER_PHONE
GROUP BY NAME
) MEMBER_PHONE
ON MEMBER_PHONE.MEMBER_ID=t0.ID
WHERE NAME LIKE 'U%'
)
WHERE rn >= 0
AND rn <= 100

How to select both row_number and count over partition?

I need to find duplicate record (with master record id and duplicate record ids):
select ciid, name from (
select ciid, name, row_number() over (
partition by related_id, name order by updatedate desc) rn
) where rn = 1;
This gives me the master record IDs, but it also includes records without duplicates.
If I use
select ciid, name from (
select ciid, name, row_number() over (
partition by related_id, name order by updatedate desc) rn
) where rn > 1;
This gets me all the duplicate records, but not the master record.
I was wishing if I do something like:
select ciid, name from (
select ciid, name, row_number() over (
partition by related_id, name order by updatedate desc
) rn, count(*) over (
partition by related_id, name order by updatedate desc
) cnt
) where rn = 1 and cnt > 1;
But I was worried about the performance, or even is it actually doing what I want.
How do I get the master record only for the ones with duplicates? Please note that name is not unique column. Only ciid is unique.
I ended up using similar query in my question:
select ciid, name from (
select ciid, name, row_number() over (
partition by related_id, name order by updatedate desc
) rn, count(*) over (
partition by related_id, name desc
) cnt
) where rn = 1 and cnt > 1;
Works surprisingly well. The master record is where rn = 1 and duplicates are where rn > 1. Make sure count(*) over (partition ..) cannot have order by clause.
I haven't tested this (because I don't have real data and am too lazy to create some), but it seems something along these lines might work:
with has_duplicates as (
select related_id, name
from yourtable
group by related_id, name
having count (*) > 1
),
with_dupes as (
select
y.ccid, y.name,
row_number() over (partition by y.related_id, y.name order by y.updatedate desc) rn
from
yourtable y,
has_duplicates d
where
y.related_id = d.related_id and
y.name = d.name
)
select
ccid, name
from with_dupes
where rn = 1
select ciid, name
from (
select ciid, name,
dense_rank() over (partition by related_id, name order by updatedate desc) rn
from tablename) t
group by ciid,name
having count(distinct rn) > 1;
Edit: To find duplicates, why not just do this.
select x.ciid, x.name, x.updatedate
from tablename x join
(
select name, related_id, max(updatedate) as mxdt, count(*)
from tablename
group by name, related_id
having count(*) > 1
) t
on x.updatedate = t.mxdt and x.name = t.name
You can do a group by with having to select only those id's having more than one row with the same row number.

Select random values from each group, SQL

I have a project through which I'm creating a game powered by a database.
The database has data entered like this:
(ID, Name) || (1, PhotoID),(1,PhotoID),(1,PhotoID),(2,PhotoID),(2,PhotoID) and so on. There are thousands of entries.
This is my current SQL statement:
$sql = "SELECT TOP 8 * FROM Image WHERE Hidden = '0' ORDER BY NEWID()";
But this can also produce results with matching IDs, where I need to have each result have a unique ID (that is I need one result from each group).
How can I change my query to grab one result from each group?
Thanks!
Since ORDER BY NEWID() will result in tablescan anyway, you might use row_number() to isolate first in group:
; with randomizer as (
select id,
name,
row_number() over (partition by id
order by newid()) rn
from Image
where hidden = 0
)
select top 8
id,
name
from randomizer
where rn = 1
-- Added by mellamokb's suggestion to allow groups to be randomized
order by newid()
Sql Fiddle playground thanks to mellamokb.
Looks like this may work, but I can't vouch for performance:
SELECT TOP 8 ID,
(select top 1 name from image i2
where i2.id = i1.id order by newid())
FROM Image i1
WHERE hidden = '0'
group by ID
ORDER BY NEWID();
Demo: http://www.sqlfiddle.com/#!3/657ad/6
If you have an index on the ID column and want to take advantage of the index and avoid a full table scan, do your randomization on the key values first:
WITH IDs AS
(
SELECT DISTINCT ID
FROM Image
WHERE Hidden = '0'
),
SequencedIDs AS
(
SELECT ID, ROW_NUMBER() OVER (ORDER BY NEWID()) AS Seq
FROM IDs
),
ImageGroups AS
(
SELECT i.*, ROW_NUMBER() OVER (PARTITION BY i.ID ORDER BY NEWID()) Seq
FROM SequencedIDs s
INNER JOIN Image i
ON i.ID = s.ID
WHERE s.Seq < 8
AND i.Hidden = '0'
)
SELECT *
FROM ImageGroups
WHERE Seq = 1
This should drastically reduce the cost over the table scan approach, although I don't have a schema big enough that I can test with - so try running some statistics in SSMS and make sure ID is actually indexed for this to be effective.
select * from (select * from photos order by rand()) as _SUB group by _SUB.id;
select ID, Name from (select ID, Name, row_number() over
(partition by ID, Name order by ID) as ranker from Image where Hidden = 0 ) Z where ranker = 1
order by newID()

Oracle SQL Query Assistance

Given a table:
ID NUMBER
OBJECTID NUMBER
CATEGORYID NUMBER
SCORE NUMBER
SCOREDATE DATE
Is it possible to efficiently retrieve the last score (based on SCOREDATE) in each distinct category for a given object in one query?
Try:
select v.* from (
select category_id,
score,
scoredate,
row_number() over (partition by category_id order by scoredate desc) rn
from MyTable) v
where rn=1
What you want falls into the [greatest-n-per-group] tag. One way to achieve the result:
SELECT
t.CategoryId
, t.Score
FROM
( SELECT
CategoryId
, MAX(ScoreDate) AS LastScoreDate
FROM
TableX
WHERE
ObjectId = #ObjectId
GROUP BY
CategoryId
) AS grp
JOIN
TableX AS t
ON grp.Category = t.CategoryId
AND grp.LastScoreDate = t.ScoreDate
WHERE
t.ObjectId = #ObjectId