SQL query to return duplicate rows for certain column, but with unique values for another column - sql

I have written the query shown here that combines three tables and returns rows where the at_ticket_num from appeal_tickets is duplicated but against a different at_sys_ref value
select top 100
t.t_reference, at.at_system_ref, at_ticket_num, a.a_case_ref
from
tickets t, appeal_tickets at, appeals_2 a
where
t.t_reference in ('AB123','AB234') -- filtering on these values so that I can see that its working
and t.t_number = at.at_ticket_num
and at.at_system_ref = a.a_system_ref
and at.at_ticket_num IN (select at_ticket_num
from appeal_tickets
group by at_ticket_num
having count(distinct at_system_ref) > 1)
order by
t.t_reference desc
This is the output:
t_reference at_system_ref at_ticket_num a_case_ref
-------------------------------------------------------
AB123 30838974 23641583 1111979010
AB123 30838976 23641583 1111979010
AB234 30839149 23641520 1111977352
AB234 30839209 23641520 1111988003
I want to modify this so that it only returns records where t_reference is duplicated but against a different a_case_ref. So in above case only records for AB234 would be returned.
Any help would be much appreciated.

You want all ticket appeals that have more than one system reference and more than one case reference it seems. You can join the tables, count the occurrences per ticket and then only keep the tickets that match these criteria.
select *
from
(
select
t.t_reference, at.at_system_ref, at.at_ticket_num, a.a_case_ref,
count(distinct a.a_system_ref) over (partition by at.at_ticket_num) as sysrefs,
count(distinct a.a_case_ref) over (partition by at.at_ticket_num) as caserefs
from tickets t
join appeal_tickets at on at.at_ticket_num = t.t_number
join appeals_2 a on a.a_system_ref = at.at_system_ref
) counted
where sysrefs > 1 and caserefs > 1
order by t.t_reference, at.at_system_ref, at.at_ticket_num, a.a_case_ref;
Correction
It seems that SQL Server still doesn't support COUNT(DISTINCT ...) OVER (...). You can count distinct values in a subquery though. Replace
count(distinct a.a_system_ref) over (partition by at.at_ticket_num) as sysrefs,
by
(
select count(distinct a2.a_system_ref)
from appeal_tickets at2
join appeals_2 a2 on a2.a_system_ref = at2.at_system_ref
where at2.at_ticket_num = t.t_number
) as sysrefs,
An alternative workaround is to use DENSE_RANK in two directions (found here: https://stackoverflow.com/a/53518204/2270762):
dense_rank() over (partition by at.at_ticket_num order by a.a_system_ref) +
dense_rank() over (partition by at.at_ticket_num order by a.a_system_ref desc) -
1 as sysrefs,

with data as (
<your query plus one column>,
case when
min() over (partition by t.t_reference)
<>
max() over (partition by t.t_reference)
then 1 end as dup
)
select * from data where dup = 1

Related

Multiple results - Need only the latest price

I need to find the latest price for some items
This is my query:
SELECT
MAX("POPORH1"."DATE") as "PO DATE",
"ICSHEH"."DOCNUM",
"ICSHEH"."TRANSDATE",
"ICSHEH"."FISCYEAR",
"ICSHEH"."FISCPERIOD",
"ICSHEH"."REFERENCE",
"ICSHED"."ITEMNO",
"ICSHED"."ITEMDESC",
"ICSHED"."LOCATION",
"ICSHED"."QUANTITY",
"ICSHED"."UNIT",
"POPORL"."UNITCOST"
FROM (("CABDAT"."dbo"."ICSHEH" "ICSHEH"
INNER JOIN
"CABDAT"."dbo"."ICSHED" "ICSHED" ON "ICSHEH"."SEQUENCENO"="ICSHED"."SEQUENCENO")
INNER JOIN "CABDAT"."dbo"."POPORL" "POPORL" ON "ICSHED"."ITEMNO"="POPORL"."ITEMNO")
INNER JOIN "CABDAT"."dbo"."POPORH1" "POPORH1" ON "POPORL"."PORHSEQ"="POPORH1"."PORHSEQ"
WHERE "ICSHED"."SEQUENCENO"=55873
group by
"ICSHEH"."DOCNUM",
"ICSHEH"."TRANSDATE",
"ICSHEH"."FISCYEAR",
"ICSHEH"."FISCPERIOD",
"ICSHEH"."REFERENCE",
"ICSHED"."ITEMNO",
"ICSHED"."ITEMDESC",
"ICSHED"."LOCATION",
"ICSHED"."QUANTITY",
"ICSHED"."UNIT",
"POPORL"."UNITCOST"
This query returns multiple results
These are the results:
"PODATE"='20180405' "ITEMNO"='2944' "UNITCOST"='0.266750'
"PODATE"='20180405' "ITEMNO"='2946' "UNITCOST"='0.266750'
"PODATE"='20170208' "ITEMNO"='2944' "UNITCOST"='0.250780'
"PODATE"='20170208' "ITEMNO"='2944' "UNITCOST"='0.250780'
"PODATE"='20170208' "ITEMNO"='2946' "UNITCOST"='0.250780'
"PODATE"='20170208' "ITEMNO"='2946' "UNITCOST"='0.250780'
I need to have only
"PODATE"='20180405' "ITEMNO"='2944' "UNITCOST"='0.266750'
"PODATE"='20180405' "ITEMNO"='2946' "UNITCOST"='0.266750'
I am learning SQL, so please be patient with my ignorance...
Thanks a lot!
You just need row_number().
WITH cte as (
SELECT *, ROW_NUMBER() OVER (PARTITION BY "ITEMNO" ORDER BY "PODATE" DESC) as rn
FROM "ICSHED" -- or join tables
WHERE "ICSHED"."SEQUENCENO"=55873
)
SELECT *
FROM cte where rn = 1
Or if you only need the highest value without any grouping can use TOP 1
SELECT TOP 1 *
FROM "ICSHED" -- or join tables
WHERE "ICSHED"."SEQUENCENO"=55873
ORDER "PODATE" DESC
By my understanding, you want top 2 rows with recent date. so I try this,
select top 2 * from yourtable order by dateCol desc

Group by not working to get count of a column with other max record in sql

I have a table named PublishedData, see image below
I'm trying to get the output like, below image
I think you can use a query like this:
SELECT dt.DistrictName, ISNULL(dt.Content, 'N/A') Content, dt.UpdatedDate, mt.LastPublished, mt.Unpublished
FROM (
SELECT *, ROW_NUMBER() OVER (PARTITION BY DistrictName ORDER BY UpdatedDate DESC, ISNULL(Content, 'zzzzz')) seq
FROM PublishedData) dt
INNER JOIN (
SELECT DistrictName, MAX(LastPublished) LastPublished, COUNT(CASE WHEN IsPublished = 0 THEN 1 END) Unpublished
FROM PublishedData
GROUP BY DistrictName) mt
ON dt.DistrictName = mt.DistrictName
WHERE
dt.seq = 1;
Because I think you use an order over UpdatedDate, Content to gain you two first columns.
Check out something like this (I don't have your tables, but you will get the idea where to follow with your query):
SELECT DirectName,
MAX(UpdatedDate),
MAX(LastPublished),
(
SELECT COUNT(*)
FROM PublishedData inr
WHERE inr.DirectName = outr.DirectName
AND inr.IsPublished = 0
) AS Unpublished
FROM PublishedData outr
GROUP BY DirectName
We should required a unique identity for that required output in PublishedData Table,Because We can't get the Latest content from given Schema.
If you want data apart from content like DistictName,updatedDate,LastPublishedDate and count of Unpublished records ,Please use Query given below :
select T1.DistrictName,T1.UpdatedDate,T1.LastPublished,T2.Unpublished from
(select DistrictName,Max(UpdateDate) as UpdatedDate,Max(LastPublished) as LastPublished from PublishedData group by DistrictName) T1
inner join
(select DistrictName,count(IsPublished) as Unpublished from PublishedData where isPublished=0 group by DistrictName) T2 ON T1.DistrictName=T2.DistrictName ORDER BY T2.Unpublished DESC

Find duplicates in MS SQL table

I know that this question has been asked several times but I still cannot figure out why my query is returning values which are not duplicates. I want my query to return only the records which have identical value in the column Credit. The query executes without any errors but values which are not duplicated are also being returned. This is my query:
Select
_bvGLTransactionsFull.AccountDesc,
_bvGLAccountsFinancial.Description,
_bvGLTransactionsFull.TxDate,
_bvGLTransactionsFull.Description,
_bvGLTransactionsFull.Credit,
_bvGLTransactionsFull.Reference,
_bvGLTransactionsFull.UserName
From
_bvGLAccountsFinancial Inner Join
_bvGLTransactionsFull On _bvGLAccountsFinancial.AccountLink =
_bvGLTransactionsFull.AccountLink
Where
_bvGLTransactionsFull.Credit
IN
(SELECT Credit AS NumOccurrences
FROM _bvGLTransactionsFull
GROUP BY Credit
HAVING (COUNT(Credit) > 1 ) )
Group By
_bvGLTransactionsFull.AccountDesc, _bvGLAccountsFinancial.Description,
_bvGLTransactionsFull.TxDate, _bvGLTransactionsFull.Description,
_bvGLTransactionsFull.Credit, _bvGLTransactionsFull.Reference,
_bvGLTransactionsFull.UserName, _bvGLAccountsFinancial.Master_Sub_Account,
IsNumeric(_bvGLTransactionsFull.Reference), _bvGLTransactionsFull.TrCode
Having
_bvGLTransactionsFull.TxDate > 01 / 11 / 2014 And
_bvGLTransactionsFull.Reference Like '5_____' And
_bvGLTransactionsFull.Credit > 0.01 And
_bvGLAccountsFinancial.Master_Sub_Account = '90210'
That's because you're matching on the credit field back to your table, which contains duplicates. You need to isolate the rows that are duplicated with ROW_NUMBER:
;WITH CTE AS (
SELECT *, ROW_NUMBER() OVER(PARTITION BY CREDIT ORDER BY (SELECT NULL)) AS RN
FROM _bvGLTransactionsFull)
Select
CTE.AccountDesc,
_bvGLAccountsFinancial.Description,
CTE.TxDate,
CTE.Description,
CTE.Credit,
CTE.Reference,
CTE.UserName
From
_bvGLAccountsFinancial Inner Join
CTE On _bvGLAccountsFinancial.AccountLink = CTE.AccountLink
WHERE CTE.RN > 1
Group By
CTE.AccountDesc, _bvGLAccountsFinancial.Description,
CTE.TxDate, CTE.Description,
CTE.Credit, CTE.Reference,
CTE.UserName, _bvGLAccountsFinancial.Master_Sub_Account,
IsNumeric(CTE.Reference), CTE.TrCode
Having
CTE.TxDate > 01 / 11 / 2014 And
CTE.Reference Like '5_____' And
CTE.Credit > 0.01 And
_bvGLAccountsFinancial.Master_Sub_Account = '90210'
Just as a side note, I would consider using aliases to shorten your queries and make them more readable. Prefixing the table name before each column in a join is very difficult to read.
I trust your code in terms of extracting all data per your criteria. With this, let me have a different approach and see your script "as-is". So then, lets keep first all the records in a temp.
Select
_bvGLTransactionsFull.AccountDesc,
_bvGLAccountsFinancial.Description,
_bvGLTransactionsFull.TxDate,
_bvGLTransactionsFull.Description,
_bvGLTransactionsFull.Credit,
_bvGLTransactionsFull.Reference,
_bvGLTransactionsFull.UserName
-- temp table
INTO #tmpTable
From
_bvGLAccountsFinancial Inner Join
_bvGLTransactionsFull On _bvGLAccountsFinancial.AccountLink =
_bvGLTransactionsFull.AccountLink
Where
_bvGLTransactionsFull.Credit
IN
(SELECT Credit AS NumOccurrences
FROM _bvGLTransactionsFull
GROUP BY Credit
HAVING (COUNT(Credit) > 1 ) )
Group By
_bvGLTransactionsFull.AccountDesc, _bvGLAccountsFinancial.Description,
_bvGLTransactionsFull.TxDate, _bvGLTransactionsFull.Description,
_bvGLTransactionsFull.Credit, _bvGLTransactionsFull.Reference,
_bvGLTransactionsFull.UserName, _bvGLAccountsFinancial.Master_Sub_Account,
IsNumeric(_bvGLTransactionsFull.Reference), _bvGLTransactionsFull.TrCode
Having
_bvGLTransactionsFull.TxDate > 01 / 11 / 2014 And
_bvGLTransactionsFull.Reference Like '5_____' And
_bvGLTransactionsFull.Credit > 0.01 And
_bvGLAccountsFinancial.Master_Sub_Account = '90210'
Then remove the "single occurrence" data by creating a row index and remove all those 1 time indexes.
SELECT * FROM (
SELECT
ROW_NUMBER() OVER (PARTITION BY Credit ORDER BY Credit) AS rowIdx
, *
FROM #tmpTable) AS innerTmp
WHERE
rowIdx != 1
You can change your preference through PARTITION BY <column name>.
Should you have any concerns, please raise it first as these are so far how I understood your case.
EDIT : To include those credits that has duplicates.
SELECT
tmp1.*
FROM #tmpTable tmp1
RIGHT JOIN (
SELECT
Credit
FROM (
SELECT
ROW_NUMBER() OVER (PARTITION BY Credit ORDER BY Credit) AS rowIdx
, *
FROM #tmpTable) AS innerTmp
WHERE
rowIdx != 1
) AS tmp2
ON tmp1.Credit = tmp2.Credit

Postgres: Making column in first row contain sum of same column in other rows

I'm a newbie in postgres and i have a troubling issue.
Suppose the output of my SQL query is
123456789;"2014-11-20 12:30:35.454875";500;200;"2014-11-16 16:16:26.976258";300
123456789;"2014-11-20 12:30:35.454875";500;200;"2014-11-16 16:16:27.173523";100
What i want is to sum up all the 4th column, and so that the first row will contain the sum of the 4th column
123456789;"2014-11-20 12:30:35.454875";500;400;"2014-11-16 16:16:26.976258";300
My query is
select l.phone_no, l.loan_time, l.cents_loaned/100, r.cents_deducted/100, r.event_time,
r.cents_balance/100
from tbl_table1 l
LEFT JOIN tbl_table2 r
ON l.tb1_id = r.tbl2_id
where l.phone_no=123456789
order by r.event_time desc
Any help will be appreciated.
Maybe this helps. It will add a new row containing the sum of the 4th column.
WITH query AS (
SELECT l.phone_no, l.loan_time, l.cents_loaned/100 AS cents_loaned,
r.cents_deducted/100 AS cents_deducted, r.event_time,
r.cents_balance/100 AS cents_balance,
ROW_NUMBER() OVER (ORDER BY r.event_time DESC) rn,
SUM(cents_deducted/100) OVER () AS sum_cents_deducted
FROM tbl_table1 l
LEFT
JOIN tbl_table2 r
ON l.tb1_id = r.tbl2_id
WHERE l.phone_no=123456789
)
SELECT phone_no, loan_time, cents_loaned, cents_deducted, event_time, cents_balance
FROM query
WHERE rn > 1
UNION
ALL
SELECT phone_no, loan_time, cents_loaned, sum_cents_deducted, event_time, cents_balance
FROM query
WHERE rn = 1
Use a window function over the whole set (OVER ()) as frame:
select l.phone_no, l.loan_time, l.cents_loaned/100
, sum(r.cents_deducted) OVER () / 100 AS total_cents_deducted
, r.event_time, r.cents_balance/100
FROM tbl_table1 l
LEFT JOIN tbl_table2 r ON l.tb1_id = r.tbl2_id
WHERE l.phone_no = 123456789
ORDER BY r.event_time desc
This will return all rows, not just the first. Your question is unclear as to that.

SQL if breaking number pattern, mark record?

I have the following query:
SELECT AccountNumber, RptPeriod
FROM dbo.Report
ORDER BY AccountNumber, RptPeriod.
I get the following results:
123 200801
123 200802
123 200803
234 200801
344 200801
344 200803
I need to mark the record where the rptperiod doesnt flow concurrently for the account. For example 344 200803 would have an X next to it since it goes from 200801 to 200803.
This is for about 19321 rows and I want it on a company basis so between different companies I dont care what the numbers are, I just want the same company to show where there is breaks in the number pattern.
Any Ideas??
Thanks!
OK, this is kind of ugly (double join + anti-join) but it gets the work done, AND is pure portable SQL:
SELECT *
FROM dbo.Report R1
, dbo.Report R2
WHERE R1.AccountNumber = R2.AccountNumber
AND R2.RptPeriod - R1.RptPeriod > 1
-- subsequent NOT EXISTS ensures that R1,R2 rows found are "next to each other",
-- e.g. no row exists between them in the ordering above
AND NOT EXISTS
(SELECT 1 FROM dbo.Report R3
WHERE R1.AccountNumber = R3.AccountNumber
AND R2.AccountNumber = R3.AccountNumber
AND R1.RptPeriod < R3.RptPeriod
AND R3.RptPeriod < R2.RptPeriod
)
Something like this should do it:
-- cte lists all items by AccountNumber and RptPeriod, assigning an ascending integer
-- to each RptPeriod and restarting at 1 for each new AccountNumber
;WITH cte (AccountNumber, RptPeriod, Ranking)
as (select
AccountNumber
,RptPeriod
,row_number() over (partition by AccountNumber order by AccountNumber, RptPeriod) Ranking
from dbo.Report)
-- and then we join each row with each preceding row based on that "Ranking" number
select
This.AccountNumber
,This.RptPeriod
,case
when Prior.RptPeriod is null then '' -- Catches the first row in a set
when Prior.RptPeriod = This.RptPeriod - 1 then '' -- Preceding row's RptPeriod is one less that This row's RptPeriod
else 'x' -- -- Preceding row's RptPeriod is not less that This row's RptPeriod
end UhOh
from cte This
left outer join cte Prior
on Prior.AccountNumber = This.AccountNumber
and Prior.Ranking = This.Ranking - 1
(Edited to add comments)
WITH T
AS (SELECT *,
/*Each island of contiguous data will have
a unique AccountNumber,Grp combination*/
RptPeriod - ROW_NUMBER() OVER (PARTITION BY AccountNumber
ORDER BY RptPeriod ) Grp,
/*RowNumber will be used to identify first record
per company, this should not be given an 'X'. */
ROW_NUMBER() OVER (PARTITION BY AccountNumber
ORDER BY RptPeriod ) AS RN
FROM Report)
SELECT AccountNumber,
RptPeriod,
/*Check whether first in group but not first over all*/
CASE
WHEN ROW_NUMBER() OVER (PARTITION BY AccountNumber, Grp
ORDER BY RptPeriod) = 1
AND RN > 1 THEN 'X'
END AS Flag
FROM T
SELECT *
FROM report r
LEFT JOIN report r2
ON r.accountnumber = r.accountnumber
AND {r2.rptperiod is one day after r.rptPeriod}
JOIN report r3
ON r3.accountNumber = r.accountNumber
AND r3.rptperiod > r1.rptPeriod
WHERE r2.rptPeriod IS NULL
AND r3 IS NOT NULL
I'm not sure of sql servers date logic syntax, but hopefully you get the idea. r will be all the records where the next rptPeriod is NULL (r2) and there exists at least one greater rptPeriod (r3). The query isn't super straight forward I guess, but if you have an index on the two columns, it'll probably be the most efficent way to get your data.
Basically, you number rows within every account, then, using the row numbers, compare the RptPeriod values for the neighbouring rows.
It is assumed here that RptPeriod is the year and month encoded, for which case the year transition check has been added.
;WITH Report_sorted AS (
SELECT
AccountNumber,
RptPeriod,
rownum = ROW_NUMBER() OVER (PARTITION BY AccountNumber ORDER BY RptPeriod)
FROM dbo.Report
)
SELECT
AccountNumber,
RptPeriod,
CASE ISNULL(CASE WHEN r1.RptPeriod / 100 < r2.RptPeriod / 100 THEN 12 ELSE 0 END
+ r1.RptPeriod - r2.RptPeriod, 1) AS Chk
WHEN 1 THEN ''
ELSE 'X'
END
FROM Report_sorted r1
LEFT JOIN Report_sorted r2
ON r1.AccountNumber = r2.AccountNumber AND r1.rownum = r2.rownum + 1
It could be complicated further with an additional check for gaps spanning a year and more, if you need that.