query to flag a column based on multiple conditions

query to flag a column based on multiple conditions - sql

I want to flag a column based on interdependent conditions. My input data is as below
id
status
rnk
A
Open
1
A
Delay
2
A
In
3
B
In
1
B
Out
2
B
Delay
3
B
count
4
C
In
1
C
Close
2
C
out
3
D
Close
1
D
Open
2
D
Delay
3
D
In
4
My output should look like
id
status
rnk
flag
A
Open
1
N
A
Delay
2
Y
A
In
3
N
B
In
1
N
B
Out
2
N
B
Delay
3
N
B
count
4
N
C
In
1
N
C
Close
2
N
C
out
3
N
D
Close
1
N
D
Open
2
N
D
Delay
3
Y
D
In
4
N
Logic - if status column is anything other than Delay then the flag will be N.
If the status column is Delay and if the status is either 'Open' or 'Close' for records which are having less rnk than that of Delay within the same ID then flag will be Y else N.
Example - for ID 'A' we have a status 'Delay' and its rank is 2, now we need to check if the status of A with rank < 2 is either 'Open' or 'Close' then flag 'Delay' to 'Y'
please note: rnk column is already populated in the table based on different logic
Below is the query I have tried, but I am getting flag 'N' for all the records,
SELECT
*,
CASE WHEN status != 'Delay' THEN 'N'
WHEN rnk < (COALESCE(MAX(CASE WHEN status = 'Delay' THEN rnk ELSE -1 END) OVER(PARTITION BY id)))
AND status IN ('Open','Close') THEN 'Y'
ELSE 'N'
END AS flag
FROM TABLE

A correlated subquery is here more help full
SELECT
*,
CASE WHEN status != 'Delay' THEN 'N'
WHEN Exists( SELECT 1 FROM Table1 ta1 WHERE ta1.id = t1.id
AND status IN ('Open','Close') AND ta1.rnk < t1.rnk) THEN 'Y'
ELSE 'N'
END AS flag
FROM Table1 t1
id
status
rnk
flag
A
Open
1
N
A
Delay
2
Y
A
In
3
N
B
In
1
N
B
Out
2
N
B
Delay
3
N
B
count
4
N
C
In
1
N
C
Close
2
N
C
out
3
N
D
Close
1
N
D
Open
2
N
D
Delay
3
Y
D
In
4
N

Consider below simple approach
select *, if(
status = 'Delay' and
countif(status in ('Open', 'Close')) over(partition by id order by rnk) > 0,
'Y', 'N') as flag
from your_table
if applied to sample data in your question - output is

Use a grouped Common Table expression with the maximum rank and row count of all rows with status Open or Close, and left-join the base table with it:
-- your input, don't use in real query...
WITH
indata(id,status,rnk) AS (
SELECT 'A','Open',1
UNION ALL SELECT 'A','Delay',2
UNION ALL SELECT 'A','In',3
UNION ALL SELECT 'B','In',1
UNION ALL SELECT 'B','Out',2
UNION ALL SELECT 'B','Delay',3
UNION ALL SELECT 'B','count',4
UNION ALL SELECT 'C','In',1
UNION ALL SELECT 'C','Close',2
UNION ALL SELECT 'C','out',3
UNION ALL SELECT 'D','Close',1
UNION ALL SELECT 'D','Open',2
UNION ALL SELECT 'D','Delay',3
UNION ALL SELECT 'D','In',4
)
-- input ends here, real query starts below
-- replace following comma with "WITH" ...
,
prev_stats AS (
SELECT
id
, MAX(rnk) AS rnk
, COUNT(*) AS num
FROM indata
WHERE status IN ('Open','Close')
GROUP BY id
)
SELECT
indata.*
, CASE
WHEN status <> 'Delay' THEN 'N'
ELSE
CASE
WHEN prev_stats.num > 0 THEN 'Y'
ELSE 'N'
END
END AS flag
FROM indata
LEFT JOIN prev_stats ON indata.id = prev_stats.id
AND indata.rnk > prev_stats.rnk
;
Result:
id
status
rnk
flag
A
Open
1
N
A
Delay
2
Y
A
In
3
N
B
In
1
N
B
Out
2
N
B
Delay
3
N
B
count
4
N
C
In
1
N
C
Close
2
N
C
out
3
N
D
Close
1
N
D
Open
2
N
D
Delay
3
Y
D
In
4
N

Related

How to get specific records in posgtres

In Postgres I have two tables:
Table A { int keyA, Text name}
Table B { int keyB, int keyA, char mark, date start, date end}
Mark from Table B could be 'X', 'Y', 'Z'.
I want to get every record 'X' with dates but only one from 'Y', 'Z'. Also if there are 'X', 'Y', 'Z' i want only 'X'.
From:
keyB
keyA
mark
start
end
1
1
X
15-01-2023
16-01-2023
2
1
X
17-01-2023
18-01-2023
3
1
Y
null
null
4
1
Z
null
null
5
2
Y
null
null
6
2
Z
null
null
7
2
Y
null
null
8
3
Z
null
null
9
3
Y
null
null
10
4
X
19-01-2023
20-01-2023
I want to get
keyB
keyA
mark
start
end
1
1
X
15-01-2023
16-01-2023
2
1
X
17-01-2023
17-01-2023
5
2
Y
null
null
8
3
Z
null
null
10
4
X
19-01-2023
20-01-2023
I tried:
1.
Select A.name,
(select b2.start from B b2 where b2.keyA = A.keyA and b2.mark = 'X') as Start,
(select b2.end from B b2 where b2.keyA = A.keyA and b2.mark = 'X') as End,
from A order by name;
Order is important. I need to have name first.
There is a porblem. In subqueries i have more than one record so i have to add limit 1. But I want to get every X not only one.
If I do this
Select A.name, B.start, B.end
from A inner join B on A.keyA = B.keyB
I'll have X, Y, Z and as I mentioned I want only X or one from Y or Z.
Any idea how should I solve this?

Use the row_number function with your join query as the following:
select name, keyB, keyA, mark, start_dt, end_dt
from
(
select A.name, B.*,
row_number() over (partition by B.keyA order by case when B.mark='X' then 1 else 2 end, B.keyb) rn
from tableB B join tableA A
on B.keyA = A.keyA
) T
where mark = 'X' or rn = 1
order by keyb
See demo

Find missing number from a sequence for each value in hive

I have a table like below-
User-id | sequence
1 0
1 1
1 2
2 1
3 2
here 0 1 and 2 are fixed sequence which a user can at max have, now i want a flag as N where any sequence is missing for a user else flag should be Y.I can say output should like-
1 0 Y
1 1 Y
1 2 Y
2 0 N
2 1 Y
2 2 N
3 0 N
3 1 N
3 2 Y

Select distinct user_id, cross join with sequence (0, 1, 2) to get all user+sequnce combinations, left join with your table to calculate the flag
select us.user_id,
us.sequence,
case when t.user_id is null then 'N' else 'Y' end flag
from
(--all user sequence combinations
select u.user_id, s.sequence
from (select distinct user_id from mytable) u
cross join (select stack (3, 0, 1, 2) as sequence) s
) us --all user+sequence
left join mytable t on us.sequence=t.sequence and us.user_id=t.user_id
order by us.user_id, us.sequence;
Demo with your data example:
with
mytable as ( --use your table instead of this
select stack(5,
1, 0,
1, 1,
1, 2,
2, 1,
3, 2) as (user_id,sequence)
)
select us.user_id,
us.sequence,
case when t.user_id is null then 'N' else 'Y' end flag
from
(--all user sequence combinations
select u.user_id,
s.sequence
from (select distinct user_id from mytable) u
cross join (select stack (3, 0, 1, 2) as sequence) s
) us --all user+sequence
left join mytable t on us.sequence=t.sequence and us.user_id=t.user_id
order by us.user_id, us.sequence;
Result:
user_id sequence flag
1 0 Y
1 1 Y
1 2 Y
2 0 N
2 1 Y
2 2 N
3 0 N
3 1 N
3 2 Y

SQL Server - group and number matching contiguous values

I have a list of stock transactions and I am using Over(Partition By) to calculate the running totals (positions) by security. Over time a holding in a particular security can be long, short or flat. I am trying to find an efficient way to extract only the transactions relating to the current position for each security.
I have created a simplified sqlfiddle to show what I have so far. The cte query generates the running total for each security (code_id) and identifies when the holdings are long (L), short (s) or flat (f). What I need is to group and number matching contiguous values of L, S or F for each code_id.
What I have so far is this:
; WITH RunningTotals as
(
SELECT
*,
RunningTotal = sum(qty) OVER (Partition By code_id Order By id)
FROM
TradeData
), LongShortFlat as
(
SELECT
*,
LSF = CASE
WHEN RunningTotal > 0 THEN 'L'
WHEN RunningTotal < 0 THEN 'S'
ELSE 'F'
END
FROM
RunningTotals
)
SELECT
*
FROM
LongShortFlat r
I think what I need to do is create a GroupNum column by applying a row_number for each group of L, S and F within each code_id so the results look like this:
id code_id qty RunningTotal LSF GroupNum
1 1 5 5 L 1
2 1 2 7 L 1
3 1 7 14 L 1
4 1 -3 11 L 1
5 1 -5 6 L 1
6 1 -6 0 F 2
7 1 5 5 L 3
8 1 5 10 L 3
9 1 -2 8 L 3
10 1 -4 4 L 3
11 2 5 5 L 1
12 2 3 8 L 1
13 2 -4 4 L 1
14 2 -2 2 L 1
15 2 -2 0 F 2
16 2 6 6 L 3
17 2 -5 1 L 3
18 2 -5 -4 S 4
19 2 2 -2 S 4
20 2 4 2 L 5
21 2 -5 -3 S 6
22 2 -2 -5 S 6
23 3 5 5 L 1
24 3 2 7 L 1
25 3 1 8 L 1
I am struggling to generate the GroupNum column.
Thanks in advance for your help.

[Revised]
Sorry about that, I read your question too quickly. I came up with a solution using a recursive common table expression (below), then saw that you've worked out a solution using LAG. I'll post my revised query anyway, for posterity. Either way, the resulting query is (imho) pretty ugly.
;WITH cteBaseAgg
as (
-- Build the "sum increases over time" data
SELECT
row_number() over (partition by td.code_id order by td.code_id, td.Id) RecurseKey
,td.code_id
,td.id
,td.qty
,sum(tdPrior.qty) RunningTotal
,case
when sum(tdPrior.qty) > 0 then 'L'
when sum(tdPrior.qty) < 0 then 'S'
else 'F'
end LSF
from dbo.TradeData td
inner join dbo.TradeData tdPrior
on tdPrior.code_id = td.code_id -- All for this code_id
and tdPrior.id <= td.Id -- For this and any prior Ids
group by
td.code_id
,td.id
,td.qty
)
,cteRecurse
as (
-- "Set" the first row for each code_id
SELECT
RecurseKey
,code_id
,id
,qty
,RunningTotal
,LSF
,1 GroupNum
from cteBaseAgg
where RecurseKey = 1
-- For each succesive row in each set, check if need to increment GroupNum
UNION ALL SELECT
agg.RecurseKey
,agg.code_id
,agg.id
,agg.qty
,agg.RunningTotal
,agg.LSF
,rec.GroupNum + case when rec.LSF = agg.LSF then 0 else 1 end
from cteBaseAgg agg
inner join cteRecurse rec
on rec.code_id = agg.code_id
and agg.RecurseKey - 1 = rec.RecurseKey
)
-- Show results
SELECT
id
,code_id
,qty
,RunningTotal
,LSF
,GroupNum
from cteRecurse
order by
code_id
,id

Sorry for making this question a bit more complicated than it needed to be but for the sake of closure I have found a solution using the lag function.
In order to achieve what I wanted I continued my cte above with the following:
, a as
(
SELECT
*,
Lag(LSF, 1, LSF) OVER(Partition By code_id ORDER BY id) AS prev_LSF,
Lag(code_id, 1, code_id) OVER(Partition By code_id ORDER BY id) AS prev_code
FROM
LongShortFlat
), b as
(
SELECT
id,
LSF,
code_id,
Sum(CASE
WHEN LSF <> prev_LSF AND code_id = prev_code
THEN 1
ELSE 0
END) OVER(Partition By code_id ORDER BY id) AS grp
FROM
a
)
select * from b order by id
Here is the updated sqlfiddle.

SQL get the closest two rows within duplicate rows

I have following table
ID Name Stage
1 A 1
1 B 2
1 C 3
1 A 4
1 N 5
1 B 6
1 J 7
1 C 8
1 D 9
1 E 10
I need output as below with parameters A and N need to select closest rows where difference between stage is smallest
ID Name Stage
1 A 4
1 N 5
I need to select rows where difference between stage is smallest

This query can make use of an index on (name, stage) efficiently:
WITH cte AS (
SELECT TOP 1
a.id AS a_id, a.name AS a_name, a.stage AS a_stage
, n.id AS n_id, n.name AS n_name, n.stage AS n_stage
FROM tbl a
CROSS APPLY (
SELECT TOP 1 *, stage - a.stage AS diff
FROM tbl
WHERE name = 'N'
AND stage >= a.stage
ORDER BY stage
UNION ALL
SELECT TOP 1 *, a.stage - stage AS diff
FROM tbl
WHERE name = 'N'
AND stage < a.stage
ORDER BY stage DESC
) n
WHERE a.name = 'A'
ORDER BY diff
)
SELECT a_id AS id, a_name AS name, a_stage AS stage FROM cte
UNION ALL
SELECT n_id, n_name, n_stage FROM cte;
SQL Server uses CROSS APPLY in place of standard-SQL LATERAL.
In case of ties (equal difference) the winner is arbitrary, unless you add more ORDER BY expressions as tiebreaker.
dbfiddle here

This solution works, if u know the minimum difference is always 1
SELECT *
FROM myTable as a
CROSS JOIN myTable as b
where a.stage-b.stage=1;
a.ID a.Name a.Stage b.ID b.Name b.Stage
1 A 4 1 N 5
Or simpler if u don't know the minimum
SELECT *
FROM myTable as a
CROSS JOIN myTable as b
where a.stage-b.stage in (SELECT min (a.stage-b.stage)
FROM myTable as a
CROSS JOIN myTable as b)

"Cluster" Code Help in SQL

I am relative newcomer to SQL, but have gained many useful ideas through the site. Now I'm stuck on a piece of code that seems simple enough, but for some reason I can't wrap my head around it.
I am trying to create a third column (Column Z) based off of the first two columns below:
Column X Column Y
-------------------
1 a
1 b
1 c
2 a
2 d
2 e
2 f
4 b
5 i
5 c
3 g
3 h
6 j
6 k
6 l
What i need to have happen in Column Z:
For each individual value found in Column Y, note the value of Column X
Likewise, for each individual value in Column X, note the value of Column Y
Then, cluster (RANK/ROW_NUMBER?) these into groups seen below:
Column X Column Y Column Z
-----------------------------
1 a 1
1 b 1
1 c 1
2 a 1
2 d 1
2 e 1
2 f 1
4 b 1
5 i 1
5 c 1
3 g 2
3 h 2
6 j 3
6 k 3
6 l 3
I hope I've been clear enough without over-complicating things. My head has been spinning all morning. Let me know if anyone needs any more info.
Greatly appreciated in advance!

I have faced exactly this problem for some analyses in the past. The only way I could get it to work is by doing a loop, that incrementally adds in the information.
The loop assigns the minimum "x" value within each group as the group id. By your rules, this is guaranteed to be unique. It starts by assigning the current x value to z. It then finds the minimum z along the x and y dimensions. It repeats this process until no records change.
Given your data, the following is an outline of how to do it:
update t set z = x
while 1=1
begin
with toupdate as (
select t.*,
min(z) over (partition by x) as idx,
min(z) over (partition by y) as idy from t
)
update toupdate
set z = (case when idx < idy then idx else idy end)
where z > idx or z > idy;
if (##ROWCOUNT = 0) break;
end;
;with a as
(
select z, dense_rank() over (order by z) newZ from t
)
update a set z = newZ

Maybe not the best way, but it works
SQLFiddle http://sqlfiddle.com/#!3/99532/1
;WITH cte AS (
SELECT *, ROW_NUMBER() OVER(ORDER BY (SELECT NULL)) AS row_nb
FROM #t
)
, c2 AS (
SELECT e1.*
,CASE WHEN EXISTS(SELECT * FROM cte e2 WHERE e1.Y = e2.Y and e2.row_nb < e1.row_nb) THEN 1 ELSE 0 END as ex
FROM cte e1
)
, c3 AS (
SELECT X,1 - SIGN(SUM(ex)) as ex,MAX(row_nb) as max_row_nb
FROM c2
GROUP BY X
)
SELECT
cte.X,cte.Y
,(SELECT SUM(cc3.ex) FROM c3 cc3 where cc3.max_row_nb<= c3.max_row_nb) AS Z
FROM cte
INNER JOIN c3
ON c3.X = cte.X
ORDER BY cte.row_nb

declare #t table (x tinyint, y char(1), z tinyint)
insert #t (x,y) values(1,'a'),(1,'b'),(1,'c'),(2,'a'),(2,'d'),(2,'e'),(2,'c'),
(2,'f'),(4,'b'),(5,'i'),(5,'c'),(3,'g'),(3,'h'),(6,'j'),(6,'k'),(6,'l'),(7,'v')
;with a as
(
select x,parent from
(
select x, min(x) over (partition by y) parent from #t
) a
where x > parent
), b as
(
select x, parent from a
union all
select a.x, b.parent
from a join b on a.parent = b.x
), c as
(
select x, min(parent) parent
from b
group by x
), d as
(
select t.x,t.y, t.z,
dense_rank() over (order by coalesce(c.parent, t.x)) calculatedZ
from #t t
left join c on t.x = c.x
)
select x,y,calculatedZ as z from d
-- if you want to update instead of selecting, replace last line with:
-- update d set z = newz
-- select x,y,z from #t
option (maxrecursion 0)
Result:
x y z
1 a 1
1 b 1
1 c 1
2 a 1
2 d 1
2 e 1
2 c 1
2 f 1
4 b 1
5 i 1
5 c 1
3 g 2
3 h 2
6 j 3
6 k 3
6 l 3
8 j 3
7 v 4

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

query to flag a column based on multiple conditions - sql

Consider below simple approach select *, if( status = 'Delay' and countif(status in ('Open', 'Close')) over(partition by id order by rnk) > 0, 'Y', 'N') as flag from your_table if applied to sample data in your question - output is

Related

How to get specific records in posgtres

Find missing number from a sequence for each value in hive

SQL Server - group and number matching contiguous values

SQL get the closest two rows within duplicate rows

"Cluster" Code Help in SQL

Categories

Resources