Compare previus row to set a "StatusFlag" - sql

I want to compare the previous row in my table on "ExtractTypeNum". So if it changed from the previous the flag should be set to "isChanged".
I have tried to develop this with a case statement but without any success.
select *
(case
when rownum = rownum-1
then
(case when
extractTypeNum <> extractTypeNum
then Null
else 'IsChanged' end)
when rownum = rownum -'1' then '3'
else '4' end) as StatusFlag
from myTable
This is the structure of the table and some sample data:
CREATE TABLE mytable(
ExtractTypeNum INTEGER NOT NULL PRIMARY KEY
,FileOrderNum VARCHAR(11) NOT NULL
,PrevFileOrderNum VARCHAR(11) NOT NULL
,NextFileOrderNum VARCHAR(11) NOT NULL
,rownum1 INTEGER NOT NULL
,Statusflag1 VARCHAR(9) NOT NULL
);
INSERT INTO mytable(ExtractTypeNum,FileOrderNum,PrevFileOrderNum,NextFileOrderNum,rownum1,Statusflag1)
VALUES (1,'2016-09-191',NULL,'2016-09-192',1,'IsInitial');
INSERT INTO mytable(ExtractTypeNum,FileOrderNum,PrevFileOrderNum,NextFileOrderNum,rownum1,Statusflag1)
VALUES (2,'2016-09-192','2016-09-191','2016-09-201',2,NULL);
INSERT INTO mytable(ExtractTypeNum,FileOrderNum,PrevFileOrderNum,NextFileOrderNum,rownum1,Statusflag1)
VALUES (1,'2016-09-201','2016-09-192','2016-09-211',3,NULL);
INSERT INTO mytable(ExtractTypeNum,FileOrderNum,PrevFileOrderNum,NextFileOrderNum,rownum1,Statusflag1)
VALUES (1,'2016-09-211','2016-09-201','2016-09-222',4,NULL);
INSERT INTO mytable(ExtractTypeNum,FileOrderNum,PrevFileOrderNum,NextFileOrderNum,rownum1,Statusflag1)
VALUES (2,'2016-09-222','2016-09-211',NULL,5,'IsLatest');
Expected output
+----------------+--------------+------------------+------------------+--------+-------------+
| ExtractTypeNum | FileOrderNum | PrevFileOrderNum | NextFileOrderNum | rownum | Statusflag1 |
+----------------+--------------+------------------+------------------+--------+-------------+
| 1 | 2016-09-191 | NULL | 2016-09-192 | | IsInitial |
| 2 | 2016-09-192 | 2016-09-191 | 2016-09-201 | | IsChanged |
| 1 | 2016-09-201 | 2016-09-192 | 2016-09-211 | | IsChanged |
| 1 | 2016-09-211 | 2016-09-201 | 2016-09-222 | | NULL |
| 2 | 2016-09-222 | 2016-09-211 | NULL | | IsLatest |
+----------------+--------------+------------------+------------------+--------+-------------+

If you are using SQL Server 2012 or later, then you can try the following query:
;WITH CTE AS (
SELECT ExtractTypeNum, FileOrderNum, PrevFileOrderNum,
NextFileOrderNum, rownum1, Statusflag1,
ROW_NUMBER() OVER (ORDER BY rownum1) AS rn,
COUNT(*) OVER () AS totalCnt,
LAG(ExtractTypeNum) OVER (ORDER BY rownum1) AS prevExtractTypeNum
FROM mytable
)
SELECT ExtractTypeNum, FileOrderNum, PrevFileOrderNum,
NextFileOrderNum, rownum1, Statusflag1,
CASE
WHEN rn = 1 THEN 'IsInitial'
WHEN rn = totalCnt THEN 'IsLatest'
WHEN prevExtractTypeNum <> ExtractTypeNum THEN 'IsChanged'
END AS StatusFlag
FROM CTE

Related

Condense or merge rows with null values not using group by

Let's say I have a select which returns the following Data:
select nr, name, val_1, val_2, val_3
from table
Nr. | Name | Value 1 | Value 2 | Value 3
-----+------------+---------+---------+---------
1 | Max | 123 | NULL | NULL
1 | Max | NULL | 456 | NULL
1 | Max | NULL | NULL | 789
9 | Lisa | 1 | NULL | NULL
9 | Lisa | 3 | NULL | NULL
9 | Lisa | NULL | NULL | Hello
9 | Lisa | 9 | NULL | NULL
I'd like to condense the rows down to the bare minimum with.
I want the following result:
Nr. | Name | Value 1 | Value 2 | Value 3
-----+------------+---------+---------+---------
1 | Max | 123 | 456 | 789
9 | Lisa | 1 | NULL | Hello
9 | Lisa | 3 | NULL | NULL
9 | Lisa | 9 | NULL | NULL
For condensing the rows with Max (Nr. 1) a group by of the max values would help.
select nr, name, max(val_1), max(val_2), max(val_3)
from table
group by nr, name
But I am unsure how to get the desired results for Lisa (Nr. 9). The row for Lisa contains a value in the Value 3 column, in this example it's condensed with the first row that matches Nr and Name and has a Null value in Value 3.
I'm thankful for every input!
Basic principle is same as Vladimir's solution. This uses UNPIVOT and PIVOT
with cte as
(
select nr, name, col, val,
rn = row_number() over(partition by nr, name, col order by val)
from [table]
unpivot
(
val
for col in (val_1, val_2, val_3)
) u
)
select *
from (
select nr, name, rn, col, val
from cte
) d
pivot
(
max (val)
for col in ([val_1], [val_2], [val_3])
) p
Here is one way to do it. Assign a unique row number for each column by sorting them in such a way that NULLs come last and then join them back together using these row numbers and remove rows with all NULLs.
Run just the CTE first and examine the intermediate result to understand how it works.
Sample data
DECLARE #T TABLE (Nr varchar(10), Name varchar(10), V1 varchar(10), V2 varchar(10), V3 varchar(10));
INSERT INTO #T VALUES
('1', 'Max ', '123' , NULL , NULL ),
('1', 'Max ', NULL , '456', NULL ),
('1', 'Max ', NULL , NULL , '789'),
('9', 'Lisa', '1' , NULL , NULL ),
('9', 'Lisa', '3' , NULL , NULL ),
('9', 'Lisa', NULL , NULL , 'Hello'),
('9', 'Lisa', '9' , NULL , NULL );
Query
WITH CTE
AS
(
SELECT
Nr
,Name
,V1
,V2
,V3
-- here we use CASE WHEN V1 IS NULL THEN 1 ELSE 0 END to put NULLs last
,ROW_NUMBER() OVER (PARTITION BY Nr ORDER BY CASE WHEN V1 IS NULL THEN 1 ELSE 0 END, V1) AS rn1
,ROW_NUMBER() OVER (PARTITION BY Nr ORDER BY CASE WHEN V2 IS NULL THEN 1 ELSE 0 END, V2) AS rn2
,ROW_NUMBER() OVER (PARTITION BY Nr ORDER BY CASE WHEN V3 IS NULL THEN 1 ELSE 0 END, V3) AS rn3
FROM #T AS T
)
SELECT
T1.Nr
,T1.Name
,T1.V1
,T2.V2
,T3.V3
FROM
CTE AS T1
INNER JOIN CTE AS T2 ON T2.Nr = T1.Nr AND T2.rn2 = T1.rn1
INNER JOIN CTE AS T3 ON T3.Nr = T1.Nr AND T3.rn3 = T1.rn1
WHERE
T1.V1 IS NOT NULL
OR T2.V2 IS NOT NULL
OR T3.V3 IS NOT NULL
ORDER BY
T1.Nr, T1.rn1
;
Result
+----+------+-----+------+-------+
| Nr | Name | V1 | V2 | V3 |
+----+------+-----+------+-------+
| 1 | Max | 123 | 456 | 789 |
| 9 | Lisa | 1 | NULL | Hello |
| 9 | Lisa | 3 | NULL | NULL |
| 9 | Lisa | 9 | NULL | NULL |
+----+------+-----+------+-------+

Select distinct one field other first non empty or null

I have table
| Id | val |
| --- | ---- |
| 1 | null |
| 1 | qwe1 |
| 1 | qwe2 |
| 2 | null |
| 2 | qwe4 |
| 3 | qwe5 |
| 4 | qew6 |
| 4 | qwe7 |
| 5 | null |
| 5 | null |
is there any easy way to select distinct 'id' values with first non null 'val' values. if not exist then null. for example
result should be
| Id | val |
| --- | ---- |
| 1 | qwe1 |
| 2 | qwe4 |
| 3 | qwe5 |
| 4 | qew6 |
| 5 | null |
In your case a simple GROUP BY should be the solution:
SELECT Id
,MIN(val)
FROM dbo.mytable
GROUP BY Id
Whenever using a GROUP BY, you have to use an aggregate function on all columns, which are not listed in the GROUP BY.
If an Id has a value (val) other than NULL, this value will be returned.
If there are just NULLs for the Id, NULL will be returned.
As far as i unterstood (regarding your comment), this is exactly what you're going to approach.
If you always want to have "the first" value <> NULL, you'll need another sort criteria (like a timestamp column) and might be able to solve it with a WINDOW-function.
If you want the first non-NULL value (where "first" is based on id), then MIN() doesn't quite do it. Window functions do:
select t.*
from (select t.*,
row_number() over (partition by id
order by (case when val is not null then 1 else 2 end),
id
) as seqnum
from t
) t
where seqnum = 1;
SQL Fiddle:
Create Table from SQL Fiddle:
CREATE TABLE tab1(pid integer, id integer, val varchar(25))
Insert dummy records :
insert into tab1
values (1, 1 , null),
(2, 1 , 'qwe1' ),
(3, 1 , 'qwe2'),
(4, 2 , null ),
(5, 2 , 'qwe4' ),
(6, 3 , 'qwe5' ),
(7, 4 , 'qew6' ),
(8, 4 , 'qwe7' ),
(9, 5 , null ),
(10, 5 , null );
fire below query:
SELECT Id ,MIN(val) as val FROM tab1 GROUP BY Id;

clear string in a column for specific rows

i got a table which looks like this:
| col1 | ... | colx |
---------------------
| 1 | ... | dfd |
| 1 | ... | ajd |
| 1 | ... | aad |
| 2 | ... | azd |
| 2 | ... | iod |
| 3 | ... | asd |
| 3 | ... | aod |
| 3 | ... | wsd |
| 3 | ... | asi |
i want to update the table (or create a new table), so it looks like this:
| col1 | ... | colx |
---------------------
| 1 | ... | dfd |
| | ... | ajd |
| | ... | aad |
| 2 | ... | azd |
| | ... | iod |
| 3 | ... | asd |
| | ... | aod |
| | ... | wsd |
| | ... | asi |
any suggestions?
1.First add a new identity column to your table so we can order and update duplicate duplicate values to null:
alter table yourtable
add newcol int identity
2. If you want to update duplicate values in col1 to null then:
update yourtable
set col1 = null
where newcol not in (select min(newcol) from yourtable group by col1)
3.See your result:
select
*
from
yourtable
order by
newcol asc
You can not drop the identity column that was created but then you will loose your order..
If you don't want to update anything and just need a query
1.All you need is the following query AFTER you have created a new identity column on your table
select
*
from
yourtable
where
newcol in (select min(newcol) from yourtable group by col1)
Example
Select col1 = case when RN=1 then concat('',col1) else '' end
,other
,colx
From (
Select *
,RN = Row_Number() over (Partition By col1 order by colx)
From YourTable
) A
Order by A.Col1,RN
Returns
Something like this?
DECLARE #MyTable TABLE
(
COL1 int,
COLX nvarchar(50)
)
INSERT INTO #MyTable(COL1,COLX)
VALUES
( 1 ,'dfd'),
( 1 ,'ajd'),
( 1 ,'aad'),
( 2 ,'azd'),
( 2 ,'iod'),
( 3 ,'asd'),
( 3 ,'aod'),
( 3 ,'wsd'),
( 3 ,'asi')
select ISNULL(newcol,'')as col1,COLX from (
Select COLX,cast(Case when rn = 1 then COL1 else null end as nvarchar(50))
as NewCOL from (
Select * ,Row_number() over(Partition by col1 order by col1 ) as rn from
#MyTable
)x
)y
Result:

Select multiple rows for distinct column if column not null, otherwise select first row where column is null

I have an interesting query I need to execute. For Table A below, I want to select ALL non null phoneNumber for distinct userId, but if a non null phoneNumber value doesn't exist for distinct userId select only one null phoneNumber for distinct userId.
Table A
| id | userId | phoneNumber | emailAddress |
-------------------------------------------
| 1 | 1 | 0123456789 | null |
| 2 | 1 | 1234567890 | null |
| 3 | 1 | null | test#gmail |
| 4 | 2 | null | andy#yahoo |
| 5 | 2 | null | andy#gmail |
Expected Results
| id | userId | phoneNumber | emailAddress |
-------------------------------------------
| 1 | 1 | 0123456789 | null |
| 2 | 1 | 1234567890 | null |
| 5 | 2 | null | andy#gmail |
I wrote the query below and it returns the desired results, but I'm interested to see if there is a better, more optimal way to achieve this. Rather than writing multiple subqueries.
SELECT *
FROM A
WHERE phoneNumber IS NOT NULL
UNION
SELECT *
FROM A
WHERE id IN (SELECT MAX(id)
FROM A WHERE phoneNumber IS NULL
AND userId NOT IN (SELECT userId
FROM A
WHERE phoneNumber IS NOT NULL)
GROUP BY userId)
You can use the COUNT() and ROW_NUMBER() analytic functions:
SELECT *
FROM (
SELECT A.*,
COUNT( phoneNumber) OVER ( PARTITION BY userId ) AS ct,
ROW_NUMBER() OVER ( PARTITION BY userId ORDER BY id DESC ) AS rn
FROM A
)
WHERE phoneNumber IS NOT NULL
OR ( ct = 0 AND rn = 1 );

(SQL) How to select the correct row for each group?

I have a piece of data:
+------------+-----------+-----------+------------+--------------+
| first_name | last_name | family_id | is_primary | is_secondary |
+------------+-----------+-----------+------------+--------------+
| a | b | 1 | 1 | 0 |
| aa | bb | 1 | 0 | 0 |
| c | d | 1 | 0 | 0 |
| cc | dd | 1 | 0 | 0 |
| e | f | 10 | 0 | 0 |
| e | f | 10 | 0 | 1 |
| gg | hh | 10 | 0 | 1 |
| gg | hh | 10 | 0 | 0 |
| gg | hh | 10 | 0 | 0 |
| gg | hh | 10 | 0 | 0 |
+------------+-----------+-----------+------------+--------------+
What I want to do are:
Group by family_id (So we will have two groups)
For each group, if there are some rows that have is_primary equals 1, then choose a random row of them and get its first_name and last_name as the output of the group's two columns
For each group, if there is no row that has is_primary equals 1, find a row (any row is ok) that has is_secondary equals to 1 and get its first_name and last_name as the output of the group's two columns
So based on the logic described above and the data, the correct result should be:
+-----------+------------+-----------+
| family_id | first_name | last_name |
+-----------+------------+-----------+
| 1 | a | b |
| 10 | e | f |
+-----------+------------+-----------+
Or
+-----------+------------+-----------+
| family_id | first_name | last_name |
+-----------+------------+-----------+
| 1 | a | b |
| 10 | gg | hh |
+-----------+------------+-----------+
How can I write the query to get the correct result?
Below is the script to create the test table.
USE tempdb
GO
IF OBJECT_ID('dbo.mytable') IS NOT NULL DROP TABLE dbo.mytable;
CREATE TABLE mytable (
first_name VARCHAR(2) NOT NULL,
last_name VARCHAR(2) NOT NULL,
family_id INTEGER NOT NULL,
is_primary INTEGER NOT NULL,
is_secondary INTEGER NOT NULL);
INSERT INTO mytable VALUES ('a','b',1,1,0);
INSERT INTO mytable VALUES ('aa','bb',1,0,0);
INSERT INTO mytable VALUES ('c','d',1,0,0);
INSERT INTO mytable VALUES ('cc','dd',1,0,0);
INSERT INTO mytable VALUES ('e','f',10,0,0);
INSERT INTO mytable VALUES ('e','f',10,0,1);
INSERT INTO mytable VALUES ('gg','hh',10,0,1);
INSERT INTO mytable VALUES ('gg','hh',10,0,0);
INSERT INTO mytable VALUES ('gg','hh',10,0,0);
INSERT INTO mytable VALUES ('gg','hh',10,0,0);
GO
SELECT * FROM dbo.mytable;
Try this approach:
;with x as (
select *, row_number() over(partition by family_id order by is_primary desc, is_secondary desc) rn
from mytable
where is_primary+is_secondary = 1
)
select * from x where rn = 1
(thanks for the create & insert script)
EDIT:
As per OP comment (that both flags could be 1), change the WHERE clause to this:
where is_primary = 1 or (is_primary = 0 and is_secondary = 1)
If the rows selected must be random, then use the following:
WITH primary_families AS (
SELECT family_id
,first_name
,last_name
,ROW_NUMBER() OVER(ORDER BY NEWID()) AS r
FROM familytable
WHERE is_primary = 1
),
secondary_families AS (
SELECT family_id
,first_name
,last_name
,ROW_NUMBER() OVER(ORDER BY NEWID()) AS r
FROM familytable f
WHERE is_secondary = 1
AND NOT EXISTS (
SELECT 1
FROM familytable
WHERE family_id = f.family_id
AND is_primary = 1
)
)
SELECT f.family_id
,f.first_name
,f.last_name
FROM primary_families f
WHERE f.r = 1
UNION
SELECT f.family_id
,f.first_name
,f.last_name
FROM secondary_families f
WHERE f.r = 1
It's not an answer to your specific question, just an observation. If I had to develop a software or web application with such logic I would move it from SQL to the available programming language. Retrieve the interested data set, scan it, split in group and sort.