Removing pairs of transactions - sql

I am attempting to remove transactions that have been reversed from a table. the table has Account, Date, Amount and Row. If a transaction has been reversed Account will match and Amount will be inverse of each other.
Example Table
Account Date Amount Row
12 1/1/18 45 72 -- Case 1
12 1/2/18 50 73
12 1/2/18 -50 74
12 1/3/18 52 75
15 1/1/18 51 76 -- Case 2
15 1/2/18 51 77
15 1/2/18 -51 78
15 1/2/18 51 79
18 1/2/18 50 80 -- Case 3
18 1/2/18 50 81
18 1/2/18 -50 82
18 1/2/18 -50 83
18 1/3/18 50 84
18 1/3/18 50 85
20 1/1/18 57 88 -- Case 4
20 1/2/18 57 89
20 1/4/18 -57 90
20 1/5/18 57 91
Desired Results Table
Account Date Amount Row
12 1/1/18 45 72 -- Case 1
12 1/3/18 52 75
15 1/1/18 51 76 -- Case 2
15 1/2/18 51 79
18 1/3/18 50 84 -- Case 3
18 1/3/18 50 85
20 1/1/18 57 88 -- Case 4
20 1/5/18 57 91
Removing all instances of inverse transactions does not work when there are multiple transactions when all other columns are the same. My attempt was to count all duplicate transactions, count of all inverse duplicate transactions, subtracting those to get the number of rows I needed from each transactions group. I was going to pull the first X rows but found in most cases I want the last X rows of each group, or even a mix (the first and last in Case 2).
I either need a method of removing pairs from the original table, or working from what I have so far, a method of distinguishing which transactions to pull.
Code so far:
--adding row Numbers
with a as (
select
account a,
date d,
amount f,
row_number() over(order by account, date) r
from table),
--counting Duplicates
b as (
select a.a, a.f, Dups
from a join (
select a, f, count(*) Dups
from a
group by a.a, a.f
having count(*)>1
) b
on a.a=b.a and
b.f=a.f
where a.f>0
),
--counting inverse duplicates
c as (
select a.a, a.f, InvDups
from a join (
select a, f, count(*) InvDups
from a
group by a.a, a.f
having count(*)>1
) b
on a.a=b.a and
-b.f=a.f
where a.f>0
),
--combining c and d to get desired number of rows of each transaction group
d as (
select
b.a, b.f, dups, InvDups, Dups-InvDups TotalDups
from b join c
on b.a=c.a and
b.f=c.f
),
--getting the number of rows from the beginning of each transaction group
select d.a, d.d, d.f
from
(select
a, d, f, row_number() over (group by a, d, f) r2
from a) e
join d
on e.a=d.a and
TotalDups<=r2

You can try this.
SELECT T_P.* FROM
( SELECT *, ROW_NUMBER() OVER(PARTITION BY Account, Amount ORDER BY [Row] ) RN from #MyTable WHere Amount > 0 ) T_P
LEFT JOIN
( SELECT *, ROW_NUMBER() OVER(PARTITION BY Account, Amount ORDER BY [Row] ) RN from #MyTable WHere Amount < 0 ) T_N
ON T_P.Account = T_N.Account
AND T_P.Amount = ABS(T_N.Amount)
AND T_P.RN = T_N.RN
WHERE
T_N.Account IS NULL

The following handles your three cases:
with t as (
select t.*,
row_number() over (partition by account, date, amount order by row) as seqnum
from table t
)
select t.*
from t
where not exists (select 1
from t t2
where t2.account = t.account and t2.date = t.date and
t2.amount = -t.amount and t2.seqnum = t.seqnum
);

Use This
;WITH CTE
AS
(
SELECT
[Row]
FROM YourTable YT
WHERE Amount > 0
AND EXISTS
(
SELECT 1 FROM YourTable WHERE Account = YT.Account
AND [Date] = YT.[Date]
AND (Amount+YT.Amount)=0
)
UNION ALL
SELECT
[Row]
FROM YourTable YT
WHERE Amount < 0
AND EXISTS
(
SELECT 1 FROM YourTable WHERE Account = YT.Account
AND [Date] = YT.[Date]
AND (Amount+YT.Amount)>0
)
)
SELECT * FROM YourTable
WHERE EXISTS
(
SELECT 1 FROM CTE WHERE [Row] = YourTable.[Row]
)

Related

Break up running sum into maximum group size / length

I am trying to break up a running (ordered) sum into groups of a max value. When I implement the following example logic...
IF OBJECT_ID(N'tempdb..#t') IS NOT NULL DROP TABLE #t
SELECT TOP (ABS(CHECKSUM(NewId())) % 1000) ROW_NUMBER() OVER (ORDER BY name) AS ID,
LEFT(CAST(NEWID() AS NVARCHAR(100)),ABS(CHECKSUM(NewId())) % 30) AS Description
INTO #t
FROM sys.objects
DECLARE #maxGroupSize INT
SET #maxGroupSize = 100
;WITH t AS (
SELECT
*,
LEN(Description) AS DescriptionLength,
SUM(LEN(Description)) OVER (/*PARTITION BY N/A */ ORDER BY ID) AS [RunningLength],
SUM(LEN(Description)) OVER (/*PARTITION BY N/A */ ORDER BY ID)/#maxGroupSize AS GroupID
FROM #t
)
SELECT *, SUM(DescriptionLength) OVER (PARTITION BY GroupID) AS SumOfGroup
FROM t
ORDER BY GroupID, ID
I am getting groups that are larger than the maximum group size (length) of 100.
A recusive common table expression (rcte) would be one way to resolve this.
Sample data
Limited set of fixed sample data.
create table data
(
id int,
description nvarchar(20)
);
insert into data (id, description) values
( 1, 'qmlsdkjfqmsldk'),
( 2, 'mldskjf'),
( 3, 'qmsdlfkqjsdm'),
( 4, 'fmqlsdkfq'),
( 5, 'qdsfqsdfqq'),
( 6, 'mds'),
( 7, 'qmsldfkqsjdmfqlkj'),
( 8, 'qdmsl'),
( 9, 'mqlskfjqmlkd'),
(10, 'qsdqfdddffd');
Solution
For every recursion step evaluate (r.group_running_length + len(d.description) <= #group_max_length) if the previous group must be extended or a new group must be started in a case expression.
Set group target size to 40 to better fit the sample data.
declare #group_max_length int = 40;
with rcte as
(
select d.id,
d.description,
len(d.description) as description_length,
len(d.description) as running_length,
1 as group_id,
len(d.description) as group_running_length
from data d
where d.id = 1
union all
select d.id,
d.description,
len(d.description),
r.running_length + len(d.description),
case
when r.group_running_length + len(d.description) <= #group_max_length
then r.group_id
else r.group_id + 1
end,
case
when r.group_running_length + len(d.description) <= #group_max_length
then r.group_running_length + len(d.description)
else len(d.description)
end
from rcte r
join data d
on d.id = r.id + 1
)
select r.id,
r.description,
r.description_length,
r.running_length,
r.group_id,
r.group_running_length,
gs.group_sum
from rcte r
cross apply ( select max(r2.group_running_length) as group_sum
from rcte r2
where r2.group_id = r.group_id ) gs -- group sum
order by r.id;
Result
Contains both the running group length as well as the group sum for every row.
id description description_length running_length group_id group_running_length group_sum
-- ---------------- ------------------ -------------- -------- -------------------- ---------
1 qmlsdkjfqmsldk 14 14 1 14 33
2 mldskjf 7 21 1 21 33
3 qmsdlfkqjsdm 12 33 1 33 33
4 fmqlsdkfq 9 42 2 9 39
5 qdsfqsdfqq 10 52 2 19 39
6 mds 3 55 2 22 39
7 qmsldfkqsjdmfqlkj 17 72 2 39 39
8 qdmsl 5 77 3 5 28
9 mqlskfjqmlkd 12 89 3 17 28
10 qsdqfdddffd 11 100 3 28 28
Fiddle to see things in action (includes random data version).

SQL to allocate rows from one table to another

I would like to allocate rows from one table to another using a sort on TopLvlOrd field. The inputs are the [Orders] table and the [Defects] table. I would like to create an SQL that produces [Output]. Even after a bunch of online research I'm not sure how to do this. I'd prefer not to do a cursor, but will go there if necessary. Any ideas? Using SQL Server 2012.
Rules:
(1) Allocate by TopLvlOrd asc,
(2) Allocate one TopLvlOrd row per PegQty
[Orders]
TopLvlOrd IntOrd PegQty
========= ====== ======
67 25 3
120 25 1
111 25 1
16 25 1
127 25 1
127 65 1
127 85 1
[Defects]
DefectID IntOrd TotQty
======== ====== ======
1 25 10
2 25 10
3 25 10
4 25 10
5 25 10
6 25 10
7 25 10
8 25 10
9 25 10
10 25 10
11 65 1
12 85 2
13 85 2
[Output]
DefectID IntOrd TotQty TopLvlOrd
======== ====== ====== =========
1 25 10 16
2 25 10 67
3 25 10 67
4 25 10 67
5 25 10 111
6 25 10 120
7 25 10 127
8 25 10 NULL
9 25 10 NULL
10 25 10 NULL
11 65 1 127
12 85 2 127
13 85 2 NULL
This answers the original version of the question.
I think you want to join on an implicit sequence number, which you can add using row_number():
select d.*, o.*
from (select d.*,
row_number() over (partition by intord order by defectid) as seqnum
from defects d
) d left join
(select o.*,
row_number() over (partition by IntOrd order by TopLvlOrd) as seqnum
from orders o
) o
on d.intord = o.intord and d.seqnum = o.seqnum
Please another time make another question. Please check this query:
SELECT DefectID, IntOrd,
TotQty, TopLvlOrd, PegQty
FROM
(
SELECT B. DefectID, COALESCE (A.IntOrd, B. IntOrd) IntOrd,
B.TotQty, A. TopLvlOrd, A.PegQty FROM
(
SELECT TopLvlOrd ,IntOrd, ROW_NUMBER () OVER (PARTITION By IntOrd ORDER by
TopLvlOrd) Num, PegQty FROM Orders
) A
FULL JOIN
(
SELECT DefectID , IntOrd ,TotQty, ROW_NUMBER () OVER (PARTITION By IntOrd ORDER by
TotQty) Num FROM Orders
) B
ON A. IntOrd=B.IntOrd AND A.Num=B.Num
)C
JOIN
master.dbo.spt_values Tab
ON Tab.type='P' AND Tab.number<C.PegQty
SELECT B. DefectID, COALESCE (A.IntOrd, B. IntOrd) IntOrd,
B.TotQty, A. TopLvlOrd FROM
(
SELECT TopLvlOrd ,IntOrd , ROW_NUMBER () OVER (PARTITION By IntOrd ORDER by TopLvlOrd) Num FROM Orders
) A
FULL JOIN
(
SELECT DefectID , IntOrd ,TotQty, ROW_NUMBER () OVER (PARTITION By IntOrd ORDER by TotQty) Num FROM Orders
) B
ON A. IntOrd=B.IntOrd AND A. Num=B.Num

Pivot SQL with Rank

Basically i have the following query and i am trying to distinguish only the unique ranks from this:
WITH numbered_rows
as (
SELECT Claim,
reserve,
time,
RANK() OVER (PARTITION BY ClaimNumber ORDER BY time asc) as 'Rank'
FROM (
SELECT cc.Claim,
MAX(csd.time) as time,
csd.reserve
FROM ClaimData csd WITH (NOLOCK)
JOIN Core cc WITH (NOLOCK)
on cc.ClaimID = csd.ClaimID
GROUP BY cc.Claim, csd.Reserve
) as t
)
select *
from numbered_rows cur, numbered_rows prev
where cur.Claim= prev.Claim
and cur.Rank = prev.Rank -1
The results set I get is the following:
Claim reserve Time Rank Claim reserve Time Rank
--------------------------------------------------------------------
11 0 12/10/2012 1 11 15000 5/30/2013 2
34 2000 1/21/2013 1 34 750 1/31/2013 2
34 750 1/31/2013 2 34 0 3/31/2013 3
07 800000 5/9/2013 1 07 0 5/10/2013 2
But what I only want to see the following: (have the Claim 34 Rank 2 removed because its not the highest
Claim reserve Time Rank Claim reserve Time Rank
--------------------------------------------------------------------
11 0 12/10/2012 1 11 15000 5/30/2013 2
34 750 1/31/2013 2 34 0 3/31/2013 3
07 800000 5/9/2013 1 07 0 5/10/2013 2
I think you can do this by just reversing your logic, i.e. order by time DESC, switching cur and prev in your final select and changing -1 to +1 in your final select, then just limiting prev.rank to 1, therefore ensuring that the you only include the latest 2 results for each claim:
WITH numbered_rows AS
( SELECT Claim,
reserve,
time,
[Rank] = RANK() OVER (PARTITION BY ClaimNumber ORDER BY time DESC)
FROM ( SELECT cc.Claim,
[Time] = MAX(csd.time),
csd.reserve
FROM ClaimData AS csd WITH (NOLOCK)
INNER JOIN JOIN Core AS cc WITH (NOLOCK)
ON cc.ClaimID = csd.ClaimID
GROUP BY cc.Claim, csd.Reserve
) t
)
SELECT *
FROM numbered_rows AS prev
INNER JOIN numbered_rows AS cur
ON cur.Claim= prev.Claim
AND cur.Rank = prev.Rank + 1
WHERE prev.Rank = 1;

SQL Server query for finding the sum of 4 consecutive values

Can somebody help me in finding the sum of 4 consecutive values i.e rolling sum of last 4 values.
Like:
VALUE SUM
1 NULL
2 NULL
3 NULL
4 10
5 14
6 18
7 22
8 26
9 30
10 34
11 38
12 42
13 46
14 50
15 54
16 58
17 62
18 66
19 70
20 74
21 78
22 82
23 86
24 90
25 94
26 98
27 102
28 106
29 110
30 114
31 118
32 122
33 126
34 130
35 134
36 138
37 142
38 146
Thanks,
select sum(select top 4 Value from [table] order by Value Desc)
or, perhaps
select sum(value)
from [Table]
where Value >= (Max(Value) - 4)
I haven't actually tried either of those- and can't at the moment, but they should get you pretty close.
Quick attempt, which gets the results you've posted in your question (except the 1st 3 rows are not NULL). Assumes that VALUE field is unique and in ascending order:
-- Create test TABLE with 38 values in
DECLARE #T TABLE (Value INTEGER)
DECLARE #Counter INTEGER
SET #Counter = 1
WHILE (#Counter <= 38)
BEGIN
INSERT #T VALUES(#Counter)
SET #Counter = #Counter + 1
END
-- This gives the results
SELECT t1.VALUE, x.Val
FROM #T t1
OUTER APPLY(SELECT SUM(VALUE) FROM (SELECT TOP 4 VALUE FROM #T t2 WHERE t2.VALUE <= t1.VALUE ORDER BY t2.VALUE DESC) x) AS x(Val)
ORDER BY VALUE
At the very least, you should see the kind of direction I was heading in.
Assuming ID can give you the last 4 rows.
SELECT SUM([SUM])
FROM
(
SELECT TOP 4 [SUM] FROM myTable ORDER BY ID DESC
) foo
Each time you query it, it will read the last 4 rows.
If this is wrong (e.g. you want the sum of each consecutive 4 rows), then please give sample output
Following would work if your Value column is sequential
;WITH q (Value) AS (
SELECT 1
UNION ALL
SELECT q.Value + 1
FROM q
WHERE q.Value < 38
)
SELECT q.Value
, CASE WHEN q.Value >= 4 THEN q.Value * 4 - 6 ELSE NULL END
FROM q
otherwise you might use something like this
;WITH q (Value) AS (
SELECT 1
UNION ALL
SELECT q.Value + 1
FROM q
WHERE q.Value < 38
)
, Sequential (ID, Value) AS (
SELECT ID = ROW_NUMBER() OVER (ORDER BY Value)
, Value
FROM q
)
SELECT s1.Value
, [SUM] = s1.Value + s2.Value + s3.Value + s4.Value
FROM Sequential s1
LEFT OUTER JOIN Sequential s2 ON s2.ID = s1.ID - 1
LEFT OUTER JOIN Sequential s3 ON s3.ID = s2.ID - 1
LEFT OUTER JOIN Sequential s4 ON s4.ID = s3.ID - 1
Note that the table qin the examples is a stub for your actual table. The actual statement then becomes
;WITH Sequential (ID, Value) AS (
SELECT ID = ROW_NUMBER() OVER (ORDER BY Value)
, Value
FROM YourTable
)
SELECT s1.Value
, [SUM] = s1.Value + s2.Value + s3.Value + s4.Value
FROM Sequential s1
LEFT OUTER JOIN Sequential s2 ON s2.ID = s1.ID - 1
LEFT OUTER JOIN Sequential s3 ON s3.ID = s2.ID - 1
LEFT OUTER JOIN Sequential s4 ON s4.ID = s3.ID - 1

Finding top n for each unique row

I'm trying to get the top N records for each unique row of data in a table (I'm grouping on columns b,c and d, column a is the unique identifier and column e is the score of which i want the top 1 in this case).
a b c d e
2 38 NULL NULL 141
1 38 NULL NULL 10
1 38 1 NULL 10
2 38 1 NULL 1
1 38 1 8 10
2 38 1 8 1
2 38 16 NULL 140
2 38 16 12 140
e.g. from this data i would like to find the following rows:
a b c d e
2 38 NULL NULL 141
1 38 1 NULL 10
1 38 1 8 10
2 38 16 NULL 140
2 38 16 12 140
can someone please point me in the right direction to solve this?
Your example doesn't show, and you don't explain how you determine which row is the "top" one, so I've put ?????? in the query where you need to provide a ranking column, such as
a desc
for example. In any case, this is exactly what the analytic functions in SQL Server 2005 and later are for.
declare #howmany int = 3;
with TRanked (a,b,c,d,e,rk) as (
select
a,b,c,d,e,
rank() over (
partition by b,c,d
order by ???????
)
from T
)
select a,b,c,d,e
from TRanked
where rk <= #howmany;
The nulls are a pain, but something like this:
select * from table1 t1
where a in (
select top 1 a from table1 t2
where (t1.b = t2.b or (t1.b is null and t2.b is null))
and (t1.c = t2.c or (t1.c is null and t2.c is null))
and (t1.d = t2.d or (t1.d is null and t2.d is null))
order by e desc
)
or better yet:
select * from (
select *, seqno = row_number() over (partition by b, c, d order by e desc)
from table1
) a
where seqno = 1
I believe this will do what you said (extending the idea from here):
select b,c,d,e,
rank() over
(partition by b,c,d order by e desc) "rank"
from t1 where rank < 5
Cheers.