SQL if breaking number pattern, mark record? - sql

I have the following query:
SELECT AccountNumber, RptPeriod
FROM dbo.Report
ORDER BY AccountNumber, RptPeriod.
I get the following results:
123 200801
123 200802
123 200803
234 200801
344 200801
344 200803
I need to mark the record where the rptperiod doesnt flow concurrently for the account. For example 344 200803 would have an X next to it since it goes from 200801 to 200803.
This is for about 19321 rows and I want it on a company basis so between different companies I dont care what the numbers are, I just want the same company to show where there is breaks in the number pattern.
Any Ideas??
Thanks!

OK, this is kind of ugly (double join + anti-join) but it gets the work done, AND is pure portable SQL:
SELECT *
FROM dbo.Report R1
, dbo.Report R2
WHERE R1.AccountNumber = R2.AccountNumber
AND R2.RptPeriod - R1.RptPeriod > 1
-- subsequent NOT EXISTS ensures that R1,R2 rows found are "next to each other",
-- e.g. no row exists between them in the ordering above
AND NOT EXISTS
(SELECT 1 FROM dbo.Report R3
WHERE R1.AccountNumber = R3.AccountNumber
AND R2.AccountNumber = R3.AccountNumber
AND R1.RptPeriod < R3.RptPeriod
AND R3.RptPeriod < R2.RptPeriod
)

Something like this should do it:
-- cte lists all items by AccountNumber and RptPeriod, assigning an ascending integer
-- to each RptPeriod and restarting at 1 for each new AccountNumber
;WITH cte (AccountNumber, RptPeriod, Ranking)
as (select
AccountNumber
,RptPeriod
,row_number() over (partition by AccountNumber order by AccountNumber, RptPeriod) Ranking
from dbo.Report)
-- and then we join each row with each preceding row based on that "Ranking" number
select
This.AccountNumber
,This.RptPeriod
,case
when Prior.RptPeriod is null then '' -- Catches the first row in a set
when Prior.RptPeriod = This.RptPeriod - 1 then '' -- Preceding row's RptPeriod is one less that This row's RptPeriod
else 'x' -- -- Preceding row's RptPeriod is not less that This row's RptPeriod
end UhOh
from cte This
left outer join cte Prior
on Prior.AccountNumber = This.AccountNumber
and Prior.Ranking = This.Ranking - 1
(Edited to add comments)

WITH T
AS (SELECT *,
/*Each island of contiguous data will have
a unique AccountNumber,Grp combination*/
RptPeriod - ROW_NUMBER() OVER (PARTITION BY AccountNumber
ORDER BY RptPeriod ) Grp,
/*RowNumber will be used to identify first record
per company, this should not be given an 'X'. */
ROW_NUMBER() OVER (PARTITION BY AccountNumber
ORDER BY RptPeriod ) AS RN
FROM Report)
SELECT AccountNumber,
RptPeriod,
/*Check whether first in group but not first over all*/
CASE
WHEN ROW_NUMBER() OVER (PARTITION BY AccountNumber, Grp
ORDER BY RptPeriod) = 1
AND RN > 1 THEN 'X'
END AS Flag
FROM T

SELECT *
FROM report r
LEFT JOIN report r2
ON r.accountnumber = r.accountnumber
AND {r2.rptperiod is one day after r.rptPeriod}
JOIN report r3
ON r3.accountNumber = r.accountNumber
AND r3.rptperiod > r1.rptPeriod
WHERE r2.rptPeriod IS NULL
AND r3 IS NOT NULL
I'm not sure of sql servers date logic syntax, but hopefully you get the idea. r will be all the records where the next rptPeriod is NULL (r2) and there exists at least one greater rptPeriod (r3). The query isn't super straight forward I guess, but if you have an index on the two columns, it'll probably be the most efficent way to get your data.

Basically, you number rows within every account, then, using the row numbers, compare the RptPeriod values for the neighbouring rows.
It is assumed here that RptPeriod is the year and month encoded, for which case the year transition check has been added.
;WITH Report_sorted AS (
SELECT
AccountNumber,
RptPeriod,
rownum = ROW_NUMBER() OVER (PARTITION BY AccountNumber ORDER BY RptPeriod)
FROM dbo.Report
)
SELECT
AccountNumber,
RptPeriod,
CASE ISNULL(CASE WHEN r1.RptPeriod / 100 < r2.RptPeriod / 100 THEN 12 ELSE 0 END
+ r1.RptPeriod - r2.RptPeriod, 1) AS Chk
WHEN 1 THEN ''
ELSE 'X'
END
FROM Report_sorted r1
LEFT JOIN Report_sorted r2
ON r1.AccountNumber = r2.AccountNumber AND r1.rownum = r2.rownum + 1
It could be complicated further with an additional check for gaps spanning a year and more, if you need that.

Related

SQL query to return duplicate rows for certain column, but with unique values for another column

I have written the query shown here that combines three tables and returns rows where the at_ticket_num from appeal_tickets is duplicated but against a different at_sys_ref value
select top 100
t.t_reference, at.at_system_ref, at_ticket_num, a.a_case_ref
from
tickets t, appeal_tickets at, appeals_2 a
where
t.t_reference in ('AB123','AB234') -- filtering on these values so that I can see that its working
and t.t_number = at.at_ticket_num
and at.at_system_ref = a.a_system_ref
and at.at_ticket_num IN (select at_ticket_num
from appeal_tickets
group by at_ticket_num
having count(distinct at_system_ref) > 1)
order by
t.t_reference desc
This is the output:
t_reference at_system_ref at_ticket_num a_case_ref
-------------------------------------------------------
AB123 30838974 23641583 1111979010
AB123 30838976 23641583 1111979010
AB234 30839149 23641520 1111977352
AB234 30839209 23641520 1111988003
I want to modify this so that it only returns records where t_reference is duplicated but against a different a_case_ref. So in above case only records for AB234 would be returned.
Any help would be much appreciated.
You want all ticket appeals that have more than one system reference and more than one case reference it seems. You can join the tables, count the occurrences per ticket and then only keep the tickets that match these criteria.
select *
from
(
select
t.t_reference, at.at_system_ref, at.at_ticket_num, a.a_case_ref,
count(distinct a.a_system_ref) over (partition by at.at_ticket_num) as sysrefs,
count(distinct a.a_case_ref) over (partition by at.at_ticket_num) as caserefs
from tickets t
join appeal_tickets at on at.at_ticket_num = t.t_number
join appeals_2 a on a.a_system_ref = at.at_system_ref
) counted
where sysrefs > 1 and caserefs > 1
order by t.t_reference, at.at_system_ref, at.at_ticket_num, a.a_case_ref;
Correction
It seems that SQL Server still doesn't support COUNT(DISTINCT ...) OVER (...). You can count distinct values in a subquery though. Replace
count(distinct a.a_system_ref) over (partition by at.at_ticket_num) as sysrefs,
by
(
select count(distinct a2.a_system_ref)
from appeal_tickets at2
join appeals_2 a2 on a2.a_system_ref = at2.at_system_ref
where at2.at_ticket_num = t.t_number
) as sysrefs,
An alternative workaround is to use DENSE_RANK in two directions (found here: https://stackoverflow.com/a/53518204/2270762):
dense_rank() over (partition by at.at_ticket_num order by a.a_system_ref) +
dense_rank() over (partition by at.at_ticket_num order by a.a_system_ref desc) -
1 as sysrefs,
with data as (
<your query plus one column>,
case when
min() over (partition by t.t_reference)
<>
max() over (partition by t.t_reference)
then 1 end as dup
)
select * from data where dup = 1

SQL Query Help - Negative reporting

Perhaps somebody can help with Ideas or a Solution. A User asked me for a negative report. We have a table with tickets each ticket has a ticket number which would be easy to select but the user wants a list of missing tickets between the first and last ticket in the system.
E.g. Select TicketNr from Ticket order by TicketNr
Result
1,
2,
4,
7,
11
But we actually want the result 3,5,6,8,9,10
CREATE TABLE [dbo].[Ticket](
[pknTicketId] [int] IDENTITY(1,1) NOT NULL,
[TicketNr] [int] NULL
) ON [PRIMARY]
GO
SQL Server 2016 - TSQL
Any ideas ?
So a bit more information is need all solution thus far works on small table. Our production database has over 4 million tickets. Hence why we need to find the missing ones.
First get the minimum and maximum, then generate all posible ticket numbers and finally select the ones that are missing.
;WITH FirstAndLast AS
(
SELECT
MinTicketNr = MIN(T.TicketNr),
MaxTicketNr = MAX(T.TicketNr)
FROM
Ticket AS T
),
AllTickets AS
(
SELECT
TicketNr = MinTicketNr,
MaxTicketNr = T.MaxTicketNr
FROM
FirstAndLast AS T
UNION ALL
SELECT
TicketNr = A.TicketNr + 1,
MaxTicketNr = A.MaxTicketNr
FROM
AllTickets AS A
WHERE
A.TicketNr + 1 <= A.MaxTicketNr
)
SELECT
A.TicketNr
FROM
AllTickets AS A
WHERE
NOT EXISTS (
SELECT
'missing ticket'
FROM
Ticket AS T
WHERE
A.TicketNr = T.TicketNr)
ORDER BY
A.TicketNr
OPTION
(MAXRECURSION 32000)
If you can accept the results in a different format, the following will do what you want:
select TicketNr + 1 as first_missing,
next_TicketNr - 1 as last_missing,
(next_TicketNr - TicketNr - 1) as num_missing
from (select t.*, lead(TicketNr) over (order by TicketNr) as next_TicketNr
from Ticket t
) t
where next_TicketNr <> TicketNr + 1;
This shows each sequence of missing ticket numbers on a single row, rather than a separate row for each of them.
If you do use a recursive CTE, I would recommend doing it only for the missing tickets:
with cte as (
select (TicketNr + 1) as missing_TicketNr
from (select t.*, lead(TicketNr) over (order by TicketNr) as next_ticketNr
from tickets t
) t
where next_TicketNr <> TicketNr + 1
union all
select missing_TicketNr + 1
from cte
where not exists (select 1 from tickets t2 where t2.TicketNr = cte.missing_TicketNr + 1)
)
select *
from cte;
This version starts with the list of missing ticket numbers. It then adds a new one, as the numbers are not found.
One method is to use recursive cte to find the missing ticket numbers :
with missing as (
select min(TicketNr) as mnt, max(TicketNr) as mxt
from ticket t
union all
select mnt+1, mxt
from missing m
where mnt < mxt
)
select m.*
from missing m
where not exists (select 1 from tickets t where t.TicketNr = m.mnt);
This should do the trick: SQL Fiddle
declare #ticketsTable table (ticketNo int not null)
insert #ticketsTable (ticketNo) values (1),(2),(4),(7),(11)
;with cte1(ticketNo, isMissing, sequenceNo) AS
(
select ticketNo
, 0
, row_number() over (order by ticketNo)
from #ticketsTable
)
, cte2(ticketNo, isMissing, sequenceNo) AS
(
select ticketNo, isMissing, sequenceNo
from cte1
union all
select a.ticketNo + 1
, 1
, a.sequenceNo
from cte2 a
inner join cte1 b
on b.sequenceNo = a.sequenceNo + 1
and b.ticketNo != a.ticketNo + 1
)
select *
from cte2
where isMissing = 1
order by ticketNo
It works by collecting all of the existing tickets, marking them as existing, and assigning each a consecutive number giving their order in the original list.
We can then see the gaps in the list by finding any spots where the consecutive order number shows the next record, but the ticket numbers are not consecutive.
Finally, we recursively fill in the gaps; working from the start of a gap and adding new records until that gap's consecutive numbers no longer has a gap between the related ticket numbers.
I think this one give you easiest solution
with cte as(
select max(TicketNr) maxnum,min(TicketNr) minnum from Ticket )
select a.number FROM master..spt_values a,cte
WHERE Type = 'P' and number < cte.maxnum and number > cte.minno
except
select TicketNr FROM Ticket
So After looking at all the solutions
I went with creating a temp table with a full range of number from Starting to Ending ticket and then select from the Temp table where the ticket number not in the ticket table.
The reason being I kept running in MAXRECURSION problems.

SQL Server - How to fill in missing column values

I have set of records at day level with 2 columns:
Invoice_date
Invoice_amount
For few records, value of invoice_amount is missing.
I need to fill invoice_amount values where it is NULL using this logic:
Look for next available invoice_amount (in dates later than the blank value record date)
For records with invoice_amount still blank (invoice_amount not present for future dates), look for most previous invoice_amount (in dates before the blank value date)
Note: We have consecutive multiple days where invoice_amount is blank in the dataset:
use CROSS APPLY to find next and previous not null Invoice Amount
update p
set Invoice_Amount = coalesce(nx.Invoice_Amount, pr.Invoice_Amount)
from Problem p
outer apply -- Next non null value
(
select top 1 *
from Problem x
where x.Invoice_Amount is not null
and x.Invoice_Date > p.Invoice_Date
order by Invoice_Date
) nx
outer apply -- Prev non null value
(
select top 1 *
from Problem x
where x.Invoice_Amount is not null
and x.Invoice_Date < p.Invoice_Date
order by Invoice_Date desc
) pr
where p.Invoice_Amount is null
this updates back your table. If you need a select query, it can be modify to it easily
Not efficient but seems to work. Try:
update test set invoice_amount =
coalesce ((select top 1 next.invoice_amount from test next
where next.invoiceDate > test.invoiceDate and next.invoice_amount is not null
order by next.invoiceDate),
(select top 1 prev.invoice_amount from test prev
where prev.invoiceDate < test.invoiceDate and prev.invoice_amount is not null
order by prev.invoiceDate desc))
where invoice_amount is null;
As per given example you could use window function with self join
update t set t.amount = tt.NewAmount
from table t
inner join (
select Dates, coalesce(min(amount) over (order by dates desc ROWS BETWEEN 1 PRECEDING AND CURRENT ROW),
min(amount) over (order by dates asc ROWS BETWEEN 1 PRECEDING AND CURRENT ROW)) NewAmount
from table t
) tt on tt.dates = t.dates
where t.amount is null

Find duplicates in MS SQL table

I know that this question has been asked several times but I still cannot figure out why my query is returning values which are not duplicates. I want my query to return only the records which have identical value in the column Credit. The query executes without any errors but values which are not duplicated are also being returned. This is my query:
Select
_bvGLTransactionsFull.AccountDesc,
_bvGLAccountsFinancial.Description,
_bvGLTransactionsFull.TxDate,
_bvGLTransactionsFull.Description,
_bvGLTransactionsFull.Credit,
_bvGLTransactionsFull.Reference,
_bvGLTransactionsFull.UserName
From
_bvGLAccountsFinancial Inner Join
_bvGLTransactionsFull On _bvGLAccountsFinancial.AccountLink =
_bvGLTransactionsFull.AccountLink
Where
_bvGLTransactionsFull.Credit
IN
(SELECT Credit AS NumOccurrences
FROM _bvGLTransactionsFull
GROUP BY Credit
HAVING (COUNT(Credit) > 1 ) )
Group By
_bvGLTransactionsFull.AccountDesc, _bvGLAccountsFinancial.Description,
_bvGLTransactionsFull.TxDate, _bvGLTransactionsFull.Description,
_bvGLTransactionsFull.Credit, _bvGLTransactionsFull.Reference,
_bvGLTransactionsFull.UserName, _bvGLAccountsFinancial.Master_Sub_Account,
IsNumeric(_bvGLTransactionsFull.Reference), _bvGLTransactionsFull.TrCode
Having
_bvGLTransactionsFull.TxDate > 01 / 11 / 2014 And
_bvGLTransactionsFull.Reference Like '5_____' And
_bvGLTransactionsFull.Credit > 0.01 And
_bvGLAccountsFinancial.Master_Sub_Account = '90210'
That's because you're matching on the credit field back to your table, which contains duplicates. You need to isolate the rows that are duplicated with ROW_NUMBER:
;WITH CTE AS (
SELECT *, ROW_NUMBER() OVER(PARTITION BY CREDIT ORDER BY (SELECT NULL)) AS RN
FROM _bvGLTransactionsFull)
Select
CTE.AccountDesc,
_bvGLAccountsFinancial.Description,
CTE.TxDate,
CTE.Description,
CTE.Credit,
CTE.Reference,
CTE.UserName
From
_bvGLAccountsFinancial Inner Join
CTE On _bvGLAccountsFinancial.AccountLink = CTE.AccountLink
WHERE CTE.RN > 1
Group By
CTE.AccountDesc, _bvGLAccountsFinancial.Description,
CTE.TxDate, CTE.Description,
CTE.Credit, CTE.Reference,
CTE.UserName, _bvGLAccountsFinancial.Master_Sub_Account,
IsNumeric(CTE.Reference), CTE.TrCode
Having
CTE.TxDate > 01 / 11 / 2014 And
CTE.Reference Like '5_____' And
CTE.Credit > 0.01 And
_bvGLAccountsFinancial.Master_Sub_Account = '90210'
Just as a side note, I would consider using aliases to shorten your queries and make them more readable. Prefixing the table name before each column in a join is very difficult to read.
I trust your code in terms of extracting all data per your criteria. With this, let me have a different approach and see your script "as-is". So then, lets keep first all the records in a temp.
Select
_bvGLTransactionsFull.AccountDesc,
_bvGLAccountsFinancial.Description,
_bvGLTransactionsFull.TxDate,
_bvGLTransactionsFull.Description,
_bvGLTransactionsFull.Credit,
_bvGLTransactionsFull.Reference,
_bvGLTransactionsFull.UserName
-- temp table
INTO #tmpTable
From
_bvGLAccountsFinancial Inner Join
_bvGLTransactionsFull On _bvGLAccountsFinancial.AccountLink =
_bvGLTransactionsFull.AccountLink
Where
_bvGLTransactionsFull.Credit
IN
(SELECT Credit AS NumOccurrences
FROM _bvGLTransactionsFull
GROUP BY Credit
HAVING (COUNT(Credit) > 1 ) )
Group By
_bvGLTransactionsFull.AccountDesc, _bvGLAccountsFinancial.Description,
_bvGLTransactionsFull.TxDate, _bvGLTransactionsFull.Description,
_bvGLTransactionsFull.Credit, _bvGLTransactionsFull.Reference,
_bvGLTransactionsFull.UserName, _bvGLAccountsFinancial.Master_Sub_Account,
IsNumeric(_bvGLTransactionsFull.Reference), _bvGLTransactionsFull.TrCode
Having
_bvGLTransactionsFull.TxDate > 01 / 11 / 2014 And
_bvGLTransactionsFull.Reference Like '5_____' And
_bvGLTransactionsFull.Credit > 0.01 And
_bvGLAccountsFinancial.Master_Sub_Account = '90210'
Then remove the "single occurrence" data by creating a row index and remove all those 1 time indexes.
SELECT * FROM (
SELECT
ROW_NUMBER() OVER (PARTITION BY Credit ORDER BY Credit) AS rowIdx
, *
FROM #tmpTable) AS innerTmp
WHERE
rowIdx != 1
You can change your preference through PARTITION BY <column name>.
Should you have any concerns, please raise it first as these are so far how I understood your case.
EDIT : To include those credits that has duplicates.
SELECT
tmp1.*
FROM #tmpTable tmp1
RIGHT JOIN (
SELECT
Credit
FROM (
SELECT
ROW_NUMBER() OVER (PARTITION BY Credit ORDER BY Credit) AS rowIdx
, *
FROM #tmpTable) AS innerTmp
WHERE
rowIdx != 1
) AS tmp2
ON tmp1.Credit = tmp2.Credit

How do I refer to a record in sql that immediately precedes another record in a group?

I have a weird update query to write.
Here's the table
PK-ID (int) --- FK-ID (int) --- Value (int)
In my data set, if I group by FK-ID and order by PK-ID, suppose this is an example of one group:
5 --- 10 --- 23
7 --- 10 --- 49
8 --- 10 --- 81
Due to a bug in some old software, records 7 and 8 have incorrect values. The correct value for 7 is (49-23) = 26 and the correct value for 8 is (81-49) = 32. Record 5 is correct.
I need to update each record to subtract the value of the record immediately preceding it when it is grouped by FK-ID and ordered by PK-ID. If there is no preceding record I do not need to change the value.
Is there a way to write a general sql update query to accomplish this? How would I (conditionally) retrieve the value of the preceding record in the group? I'm using SQL server 2008.
Thanks!
with ordered as (
select *, rn = row_number() over (partition by fk_id order by pk_id)
from tbl
)
update cur
set value = cur.value - prior.value
from ordered cur
join ordered prior on prior.fk_id = cur.fk_id
and prior.rn = cur.rn-1;
This is what I believe to be the correct answer, using a similar idea to the previous one. The toupdate subquery calculates the values, based on the rules in the question (update records with the same foreign key and consecutive primary keys). It does assume that the ids are nuemric values of some sort.
with toupdate as (
select t.pkid, t.value - tprev.value as newval
from t join
t tprev
on t.pkid = tprev.pkid+1 and t.fkid = tprev.fkid
)
update t
set value = newvalue
from toupdate
where t.pkid = toupdate.pkid
update t set value = value -
isnull((select top 1 value
from t t2
where t2.FKID=t.FKID
and t2.PKID<t.PKID
order by PKID desc),0);
Here is a SQLFiddle demo
I hope it should return what you want(sorry, I cannot try it the moment); you just need to incorporate it with UPDATE
WITH cte1 AS
(SELECT pk_id, fk_id, value, ROW_NUMBER() OVER (PARTITION BY fk_id ORDER BY pk_id DESC)
as num
FROM your_table
)
SELECT a.*,
--CASE
-- WHEN b.pk_id IS NOT NULL THEN a.value-b.value
-- ELSE 0 END
a.value-b.value as valid_number
FROM cte1 a
--LEFT JOIN cte1 b ON (b.fk_id = a.fk_id AND b.num = a.num-1)
INNER JOIN cte1 b ON (b.fk_id = a.fk_id AND b.num = a.num-1)