SQL Server 2008 - Finding duplicates using ROW_NUMBER - sql

i have the following SQL which works to find duplicates
SELECT *
FROM (SELECT
id,
ShipAddress,
ShipZIPPostal,
ROW_NUMBER() OVER (PARTITION BY shipaddress, shipzippostal ORDER BY shipaddress) ROWNUM
FROM orders
WHERE CONVERT(date, orderdate) = CONVERT(date, GETDATE())) x
WHERE rownum > 1
I would like to only see rows where, if the value of Rownum > 1 then i would like to see its corresponding row where rownum =1.
So basically, if a row has duplicates, i want to see the original row and all its duplicates.
If a row does not have duplicates, then i don't want to see it (it will have rownum = 1 )
How would i do this please?
cheers

Use count(*) rather than row_number():
SELECT *
FROM (SELECT id, ShipAddress, ShipZIPPostal,
COUNT(*) OVER (PARTITION BY shipaddress, shipzippostal) as cnt
FROM orders
WHERE CONVERT(date, orderdate) = CONVERT(date, GETDATE())
) x
WHERE cnt > 1;

In addition to Gordon's answer, if you want to keep the row_number() approach for some academic reason, you can do this:
SELECT *
FROM (SELECT
id,
ShipAddress,
ShipZIPPostal,
ROW_NUMBER() OVER (PARTITION BY shipaddress, shipzippostal ORDER BY shipaddress) ROWNUM
FROM orders
WHERE CONVERT(date, orderdate) = CONVERT(date, GETDATE())) x
WHERE EXISTS(
SELECT * FROM x x2
WHERE x.shipaddress=x2.shipaddress
AND x.shipzippostal=x2.shipzippostal
AND x2.ROWNUM>1
)
I'd actually prefer a cte structure like this personally:
WITH cte AS (
SELECT
id,
ShipAddress,
ShipZIPPostal,
ROW_NUMBER() OVER (PARTITION BY shipaddress, shipzippostal ORDER BY shipaddress) ROWNUM
FROM orders
WHERE CONVERT(date, orderdate) = CONVERT(date, GETDATE())
)
SELECT *
FROM cte
WHERE EXISTS(
SELECT * FROM cte x2
WHERE cte.shipaddress=x2.shipaddress
AND cte.shipzippostal=x2.shipzippostal
AND x2.ROWNUM>1
)

You could add a second row_number, but change the order by to ID so it will be different, and compare the 2 row_numbers
SELECT
*
FROM
(SELECT
id,
ShipAddress,
ShipZIPPostal,
ROW_NUMBER() OVER (PARTITION BY shipaddress,shipzippostal ORDER BY id) ROWNUM1,
ROW_NUMBER() OVER (PARTITION BY shipaddress,shipzippostal ORDER BY id DESC) ROWNUM2
FROM
orders
WHERE
CONVERT(DATE,orderdate) = CONVERT(DATE,GETDATE())
) x
WHERE
ROWNUM1 <> ROWNUM2

Related

How do I include a days calculation?

We got this to work well, but I want to show a column that will have the days since the last actual_date
I don't know how to code 'day' to be an output column.
WITH
cte_ul_ev AS (
SELECT
ev.full_name,
ev.event_name,
ev.actual_date,
ev.service_provider_name,
datediff(day, actual_date, getdate())
row_num = ROW_NUMBER() OVER (PARTITION BY ev.full_name ORDER BY ev.actual_date DESC) --<<--<<--
FROM
dbo.event_expanded_view ev
WHERE
ev.full_name IS NOT NULL
AND ev.category_code IN ('OTHER_ACT', 'CONTACTS', 'PEOPLEPLANS', 'PEOPLETESTS', 'PERSONREQ')
)
SELECT
ue.full_name,
ue.event_name,
ue.actual_date,
ue.service_provider_name
FROM
cte_ul_ev ue
WHERE
ue.row_num = 1;
you just missing a comma and , and wrong way of aliasing the column and seesm like distinct is ans extra thing you are doing
;WITH
cte_ul_ev AS (
SELECT
ev.full_name,
ev.event_name,
ev.actual_date,
ev.service_provider_name,
datediff(day, actual_date, getdate()) as DaysDiff,
ROW_NUMBER() OVER (PARTITION BY ev.full_name ORDER BY ev.actual_date DESC) as row_num --<<--<<--
FROM
dbo.event_expanded_view ev
WHERE
ev.full_name IS NOT NULL
AND ev.category_code IN ('OTHER_ACT', 'CONTACTS', 'PEOPLEPLANS', 'PEOPLETESTS', 'PERSONREQ')
)
SELECT
ue.full_name,
ue.event_name,
ue.actual_date,
ue.service_provider_name.
ue.DaysDiff
FROM
cte_ul_ev ue
WHERE
ue.row_num = 1;

PostgreSQL Percent Change using Row Number

I'm trying to find the percent change using row number with PostgreSQL but I'm running into an error where my "percent_change" column shows 0.
Here is what I have as my code.
WITH CTE AS (
SELECT date, sales, ROW_NUMBER() OVER (ORDER by date) AS rn
FROM sales_2019)
SELECT c1.date, c1.sales,
CAST(COALESCE (((c1.sales - c2.sales) * 1.0 / c2.sales) * 100, 0) AS INT) AS percent_change
FROM CTE AS c1
LEFT JOIN CTE AS c2
ON c1.date = c2.date AND c1.rn = c2.rn + 1
Here is my SQL table in case it's needed. Thank you in advance, I greatly appreciate it.
You can use LAG() for your requirement:
select
date,
sales,
round(coalesce((((sales-(lag(sales) over (order by date)))*1.0)/(lag(sales) over (order by date)))*100,0),2)
from sales_2019
or you can try with WITH clause
with cte as ( select
date,
sales,
coalesce(lag(sales) over (order by date),0) as previous_month
from sales_2019
)
select
date,
sales,
round( coalesce( (sales-previous_month)*1.0/nullif(previous_month,0),0 )*100,2)
from cte
DEMO
EDIT as per requirement in comment
with cte as ( select
date_,
sales,
ROW_NUMBER() OVER (ORDER by date_) AS rn1,
ROW_NUMBER() OVER (ORDER by date_)-1 AS rn2
from sales_2019
)
select t1.date_,
t1.sales,
round( coalesce( (t1.sales-t2.sales)*1.0/nullif(t2.sales,0),0 )*100,2)
from cte t1 left join cte t2 on t1.rn2=t2.rn1
DEMO

What should be done to do multiple order by?

I want to sort by chart_num and DATE. However, the following results are printed out when aligned:
in this my code:
SELECT *
FROM (
SELECT id, chart_num, chart_name, MIN(DATE) AS DATE, amount, (COUNT(*) = 2) AS result, card_check
FROM (
(
SELECT id, hpd.chart_num AS chart_num, hpd.chart_name AS chart_name, hpd.visit AS DATE, card_amount_received AS amount, card_check_modify AS card_check
,row_number() over (PARTITION BY card_amount_received ORDER BY id) AS seqnum
FROM hospital_payment_data hpd
WHERE store_mbrno = '135790' AND card_amount_received > 0
)
UNION ALL (
SELECT id, ncd. chart_num AS chart_num, ncd. chart_name AS chart_name, DATE_FORMAT(ncd.tranDate,'%Y-%m-%d') AS DATA, amount, card_check_result AS card_check
,row_number() over (PARTITION BY amount ORDER BY id) AS seqnum
FROM noti_card_data ncd
WHERE (mbrNo = '135790' OR mbrNo = '135791') AND cmd ='승인'
)
) X
GROUP BY amount, seqnum
ORDER BY result DESC
) a
ORDER BY a.DATE DESC
The result I want is that the NULL value goes back to the latest DATE, and if there is a chart_num, I want to sort it in order of chart_num and DATE.
It feels like I'm missing something else with this question, but you can separate columns in the ORDER BY with a comma. It's not clear from your text whether you want dates grouped within the same chart_num or charts grouped within the same date, but if I guessed wrong you can just swap it.
Also, the ORDER BY result DESC is completely extra. It adds nothing to the results, and by removing it we can get rid of a whole level of nesting.
SELECT id, chart_num, chart_name, MIN(DATE) AS DATE, amount, (COUNT(*) = 2) AS result, card_check
FROM (
(
SELECT id, hpd.chart_num AS chart_num, hpd.chart_name AS chart_name, hpd.visit AS DATE, card_amount_received AS amount, card_check_modify AS card_check
,row_number() over (PARTITION BY card_amount_received ORDER BY id) AS seqnum
FROM hospital_payment_data hpd
WHERE store_mbrno = '135790' AND card_amount_received > 0
)
UNION ALL (
SELECT id, ncd.chart_num, ncd.chart_name, DATE_FORMAT(ncd.tranDate,'%Y-%m-%d'), amount, card_check_result
,row_number() over (PARTITION BY amount ORDER BY id) AS seqnum
FROM noti_card_data ncd
WHERE mbrNo IN ('135790', '135791') AND cmd ='승인'
)
) X
GROUP BY amount, seqnum
ORDER BY MIN(DATE), coalesce(chart_num,-1), result DESC
Dont order by result in the inner union all query.
Sort by chart_num and date in place of result.
So in place of
Order by result desc
use this:
Order by chart_num desc, DATE desc
Or,
in outer main query:
in place of
Order by a.DATE DESC
use
Order by a.chart_num desc, a.DATE desc
Hope it helps.!

SQL Time Attendance Query

Recently I made a switch from MS Access to SQL Server. Due to this switch I am having issues with making one SQL query to work.
This is how the current table looks like in SQL.
This is what I am trying to get as result from the query:
Previously I was able to make it work in MS Access with the following query:
SELECT m.UserEnrollNumber, m.Checktime AS TimeIn, (SELECT Min(s.Checktime)
FROM CheckInOut1 s
WHERE s.UserEnrollNumber = m.UserEnrollNumber
AND s.Checktime > m.Checktime
AND s.Checktime <= Int(m.Checktime) + 1) AS TimeOut
FROM CheckInOut1 AS m
WHERE ((((SELECT COUNT(*)
FROM CheckInOut1 s
WHERE s.UserEnrollNumber = m.UserEnrollNumber
AND s.Checktime <= m.Checktime
AND s.Checktime >= INT(m.Checktime)) Mod 2)=1));
The following query as answer from #GMB:
select
employee_id,
min(time_in_out) check_in,
max(time_in_out) check_out
from (
select t.*, row_number() over(partition by employee_id order by time_in_out) - 1 rn
from mytable t
) t
group by employee_id, floor(rn / 2)
order by employee_id, floor(rn / 2)
from SQL table:
gives me the following result:
Seems like the minimum and maximum rows are shown, but the rows in between are not.
The following query from #Gordon Linoff:
SELECT cio.EmployeeID, cio.TimeInOut AS CheckIn,
cio.TimeInOut as CheckOut
FROM (SELECT cio.*,
ROW_NUMBER() OVER (PARTITION BY cio.EmployeeID, CONVERT(date, cio.TimeInOut) ORDER BY cio.TimeInOut) as seqnum,
LEAD(cio.TimeInOut) OVER (PARTITION BY cio.EmployeeID, CONVERT(date, cio.TimeInOut) ORDER BY cio.TimeInOut) as next_TimeInOut
FROM CheckInOut22 cio
) cio
WHERE seqnum % 2 = 1;
Gives me the following result:
Checkin is the same as CheckOut.
All help would be appreciated.
This is much simpler in SQL Server. Use window functions:
SELECT cio.EmployeeID, cio.TimeInOut AS CheckIn,
cio.next_TimeInOut as CheckOut
FROM (SELECT cio.*,
ROW_NUMBER() OVER (PARTITION BY cio.EmployeeID, CONVERT(date, cio.TimeInOut) ORDER BY cio.TimeInOut) as seqnum,
LEAD(cio.TimeInOut) OVER (PARTITION BY cio.EmployeeID, CONVERT(date, cio.TimeInOut) ORDER BY cio.TimeInOut) as next_TimeInOut
FROM CheckInOut cio
) cio
WHERE seqnum % 2 = 1;

Max dates for each sequence within partitions

I would like to see if somebody has an idea how to get the max and min dates within each 'id' using the 'row_num' column as an indicator when the sequence starts/ends in SQL Server 2016.
The screenshot below shows the desired output in columns 'min_date' and 'max_date'.
Any help would be appreciated.
You could use windowed MIN/MAX:
WITH cte AS (
SELECT *,SUM(CASE WHEN row_num > 1 THEN 0 ELSE 1 END)
OVER(PARTITION BY id, cat ORDER BY date_col) AS grp
FROM tab
)
SELECT *, MIN(date_col) OVER(PARTITION BY id, cat, grp) AS min_date,
MAX(date_col) OVER(PARTITION BY id, cat, grp) AS max_date
FROM cte
ORDER BY id, date_col, cat;
Rextester Demo
Try something like
SELECT
Q1.id, Q1.cat,
MIN(Q1.date) AS min_dat,
MAX(Q1.date) AS max_dat
FROM
(SELECT
*,
ROW_NUMBER() OVER (PARTITION BY id, cat ORDER BY [date]) AS r1,
ROW_NUMBER() OVER (PARTITION BY id ORDER BY [date]) AS r2
) AS Q1
GROUP BY
Q1.id, Q1.r2 - Q1.r1