SQL Server 2012, update record based on next record - sql

I'm struggling to find a solution to update (not just select) SQL table records based on previous values. I can get the script to populate the immediate subsequent record with the LAG() function, but when I have the same values in a row (like 'CC' below) I can't get it to populate with the next value that's not the same as the current value.
It would also be helpful to be able to add CASE/WHEN condition so that only values with the same BaseID are evaluated. Any help would be greatly appreciated.
Here is my desired result:
BaseID Value Date NextValue
1 AA 2017-10-01 BB
1 BB 2017-10-02 CC
1 CC 2017-10-03 DD
1 CC 2017-10-03 DD
1 CC 2017-10-03 DD
1 DD 2017-10-04 NULL
2 EE 2017-10-01 FF
2 FF 2017-10-02 GG
2 GG 2017-10-03 NULL

Get the distinct baseid,value,date combinations and use lead to get the next value in a cte and use itto update.
with cte as (select t1.baseid,t1.value,t1.nextvalue,t2.nxt_value
from tbl t1
left join (select t.*,lead(value) over(partition by baseid order by datecol) as nxt_value
from (select distinct baseid,datecol,value from tbl) t
) t2
on t1.baseid=t2.baseid and t1.datecol=t2.datecol and t1.value=t2.value
)
update cte set nextvalue=nxt_value
This assumes there can't be multiple values for a given baseid,date combination.

Here is a working example using DENSE_RANK as another option.
declare #Something table
(
BaseID int
, MyValue char(2)
, MyDate date
, NextValue char(2)
)
insert #Something
(
BaseID
, MyValue
, MyDate
) VALUES
(1, 'AA', '2017-10-01')
, (1, 'BB', '2017-10-02')
, (1, 'CC', '2017-10-03')
, (1, 'CC', '2017-10-03')
, (1, 'CC', '2017-10-03')
, (1, 'DD', '2017-10-04')
, (2, 'EE', '2017-10-01')
, (2, 'FF', '2017-10-02')
, (2, 'GG', '2017-10-03')
;
with SortedResults as
(
select *
, DENSE_RANK() over(partition by BaseID order by BaseID, MyDate ) as MyRank
from #Something
)
update sr set NextValue = sr2.MyValue
from SortedResults sr
join SortedResults sr2 on sr2.MyRank - 1 = sr.MyRank and sr.BaseID = sr2.BaseID
select *
from #Something

Related

SQL to select the 'first' date a project was made inactive for all projects

I am trying to work out the SQL I would need to select certain records, here is an example of what I'm trying to do:
Project number
Active/Inactive
Date
1
A
1/1/20
1
I
3/1/20
1
A
5/1/20
1
I
7/1/20
1
I
9/1/20
2
I
1/1/19
2
A
5/1/19
3
A
1/3/20
3
I
3/3/20
3
I
5/3/20
Note: A=Active project, I=Inactive.
What I would like to do is for each project where the project is currently inactive (i.e. the latest date for the project in the above table is set to I), return the row of the longest time ago it was made inactive, but NOT before it was last active (hope this is understandable!). So for the above table the following would be returned:
Project number
Active/Inactive
Date
1
I
7/1/20
3
I
3/3/20
So proj number 1 is inactive and the earliest time it was made inactive (after the last time it was active) is 7/1/20. Project 2 is not selected as it is currently active. Project 3 is inactive and the earliest time it was made inactive (after the last time it was active) is 3/3/20.
Thanks.
You could use the 'row_number' function to help you.
create TABLE #PROJECT(ProjectNumber int, [Status] varcha(1), [Date] date)
INSERT INTO #PROJECT VALUES
(1 ,'A' ,'1/1/20'),
(1 ,'I' ,'3/1/20'),
(1 ,'A' ,'5/1/20'),
(1 ,'I' ,'7/1/20'),
(1 ,'I' ,'9/1/20'),
(2 ,'I' ,'1/1/19'),
(2 ,'A' ,'5/1/19'),
(3 ,'A' ,'1/3/20'),
(3 ,'I' ,'3/3/20'),
(3 ,'I' ,'5/3/20')
select * from
(SELECT
row_number() over (partition by projectNumber order by [date]) as [index]
,*
FROM
#PROJECT
WHERE
[STATUS] = 'I'
) as a where [index] = 1
Using some effective date joins, this should work. I am using SQL Server. Create your tables and set up the same data set you provided:
CREATE TABLE dbo.PROJECTS
(
PROJ_NUM int NULL,
STTS char(1) NULL,
STTS_DT date NULL
) ON [PRIMARY]
GO
INSERT INTO dbo.PROJECTS values (1, 'A', '1/1/20');
INSERT INTO dbo.PROJECTS values (1, 'I', '3/1/20');
INSERT INTO dbo.PROJECTS values (1, 'A', '5/1/20');
INSERT INTO dbo.PROJECTS values (1, 'I', '7/1/20');
INSERT INTO dbo.PROJECTS values (1, 'I', '9/1/20');
INSERT INTO dbo.PROJECTS values (2, 'I', '1/1/19');
INSERT INTO dbo.PROJECTS values (2, 'A', '5/1/19');
INSERT INTO dbo.PROJECTS values (3, 'A', '1/3/20');
INSERT INTO dbo.PROJECTS values (3, 'I', '3/3/20');
INSERT INTO dbo.PROJECTS values (3, 'I', '5/3/20');
Write a sub-query that filters out just to the projects that are INACTIVE:
-- sub-query that gives you projects that are inactive
SELECT PROJ_NUM, STTS, STTS_DT FROM dbo.PROJECTS CURRSTTS
WHERE STTS_DT = (SELECT MAX(STTS_DT) FROM dbo.PROJECTS ALLP WHERE ALLP.PROJ_NUM = CURRSTTS.PROJ_NUM)
AND CURRSTTS.STTS = 'I'
;
Write another sub-query that provides you the last active status date for each project:
-- sub-query that gives you last active status date for each project
SELECT PROJ_NUM, STTS, STTS_DT FROM dbo.PROJECTS LASTACTV
WHERE STTS_DT = (SELECT MAX(STTS_DT) FROM dbo.PROJECTS ALLP WHERE ALLP.PROJ_NUM = LASTACTV.PROJ_NUM AND ALLP.STTS = 'A')
;
Combine those two sub-queries into a query that gives you the list of inactive projects with their last active status date:
-- sub-query using the 2 above to show only inactive projects with last active stts date
SELECT CURRSTTS.PROJ_NUM, CURRSTTS.STTS, CURRSTTS.STTS_DT, LASTACTV.STTS_DT AS LASTACTV_STTS_DT FROM dbo.PROJECTS CURRSTTS
INNER JOIN
(SELECT PROJ_NUM, STTS, STTS_DT FROM dbo.PROJECTS LASTACTV
WHERE STTS_DT = (SELECT MAX(STTS_DT) FROM dbo.PROJECTS ALLP WHERE ALLP.PROJ_NUM = LASTACTV.PROJ_NUM AND ALLP.STTS = 'A'))
LASTACTV ON CURRSTTS.PROJ_NUM = LASTACTV.PROJ_NUM
WHERE CURRSTTS.STTS_DT = (SELECT MAX(STTS_DT) FROM dbo.PROJECTS ALLP WHERE ALLP.PROJ_NUM = CURRSTTS.PROJ_NUM)
AND CURRSTTS.STTS = 'I'
Add one more layer to the query that selects the MIN(STTS_DT) that is greater than the LASTACTV_STTS_DT:
-- final query that uses above sub-query
SELECT P.PROJ_NUM, P.STTS, P.STTS_DT
FROM dbo.PROJECTS P
INNER JOIN (
SELECT CURRSTTS.PROJ_NUM, CURRSTTS.STTS, CURRSTTS.STTS_DT, LASTACTV.STTS_DT AS LASTACTV_STTS_DT FROM dbo.PROJECTS CURRSTTS
INNER JOIN
(SELECT PROJ_NUM, STTS, STTS_DT FROM dbo.PROJECTS LASTACTV
WHERE STTS_DT = (SELECT MAX(STTS_DT) FROM dbo.PROJECTS ALLP WHERE ALLP.PROJ_NUM = LASTACTV.PROJ_NUM AND ALLP.STTS = 'A'))
LASTACTV ON CURRSTTS.PROJ_NUM = LASTACTV.PROJ_NUM
WHERE CURRSTTS.STTS_DT = (SELECT MAX(STTS_DT) FROM dbo.PROJECTS ALLP WHERE ALLP.PROJ_NUM = CURRSTTS.PROJ_NUM)
AND CURRSTTS.STTS = 'I'
) SUB ON SUB.PROJ_NUM = P.PROJ_NUM
WHERE P.STTS_DT = (SELECT MIN(STTS_DT) FROM dbo.PROJECTS ALLP WHERE ALLP.PROJ_NUM = P.PROJ_NUM AND ALLP.STTS_DT > SUB.LASTACTV_STTS_DT)
The result I get back matches your desired result:
"Greatest n-per group" is the thing to look up when you run accross a problem like this again. Here is a query that will get what you need in postgresSQL.
I realized I changed your column to a boolean, but you will get the gist.
with most_recent_projects as (
select project_number, max(date) date from testtable group by project_number
),
currently_inactive_projects as (
select t.project_number, t.date from testtable t join most_recent_projects mrp on t.project_number = mrp.project_number and t.date = mrp.date where not t.active
),
last_active_date as (
select project_number, date from (
select t.project_number, rank() OVER (
PARTITION BY t.project_number
ORDER BY t.date DESC), t.date
from currently_inactive_projects cip join testtable t on t.project_number = cip.project_number where t.active) t1 where rank = 1
)
-- oldest inactive -- ie, result
select t.project_number, t.active, min(t.date) from last_active_date lad join testtable t on lad.project_number = t.project_number and t.date > lad.date group by t.project_number, t.active;
This is a variation of "gaps and islands" problem.
The query may be like this
SELECT
num,
status,
MIN(date) AS date
FROM (
SELECT
*,
MAX(group_id) OVER (PARTITION BY num) AS max_group_id
FROM (
SELECT
*,
SUM(CASE WHEN status = prev_status THEN 0 ELSE 1 END) OVER (PARTITION BY num ORDER BY date) AS group_id
FROM (
SELECT
*,
LAG(status) OVER (PARTITION BY num ORDER BY date) AS prev_status
FROM projects
) groups
) islands
) q
WHERE status = 'I' AND group_id = max_group_id
GROUP BY num, status
ORDER BY num
Another approach using CTEs
WITH last_status AS (
SELECT
*
FROM (
SELECT
*,
ROW_NUMBER() OVER (PARTITION BY num ORDER BY date DESC) AS rn
FROM projects
) rns
WHERE rn = 1
),
last_active AS (
SELECT
num,
MAX(date) AS date
FROM projects
WHERE status = 'A'
GROUP BY num
),
last_inactive AS (
SELECT
p.num,
MIN(p.date) AS date
FROM projects p
WHERE p.status = 'I'
AND (
EXISTS (
SELECT 1 FROM last_active la
WHERE la.num = p.num AND la.date < p.date
)
OR NOT EXISTS (
SELECT 1 FROM last_active la
WHERE la.num = p.num
)
)
GROUP BY num
)
SELECT
ls.num,
ls.status,
li.date
FROM last_status ls
JOIN last_inactive li ON li.num = ls.num
WHERE ls.status = 'I'
You can check a working demo with both queries here

How to select just the rows in a table that fit a criteria , avoiding duplicates in SQL

So I have a df like follows:
USER Value object
0001 V V
0002 A NULL
0002 C C
0003 A NULL
0004 A NULL
0004 A NULL
0003 V V
So I basically want USER to be the unique id for each row of this DF. If there is an A in the Value column, I only want it if that's the only option for the ID. So there are two 002's, I only want to see the instance where it is not A , so C.
Because 0004 doesn't have a non-A Value, I'll take the A.
Final result:
USER Value
0001 V
0002 C
0003 V
0004 A
I think you are looking for the following:
select user,
'A' as value
from tbl
group by user
having sum(case when value = 'A' then 1 else 0 end) > 0
and sum(case when value <> 'A' then 1 else 0 end) = 0
union all
select user,
value
from tbl
where value <> 'A'
order by user;
See Fiddle:
http://www.sqlfiddle.com/#!9/b28f4c/2/0
The desired result is achieved with your example data. However, your example data does not contain any users having more than one non-A value row. The above query will keep all of them. If you only want to keep one or some, explain how to pick which you want.
This will return the one Value per tuple, returning A at last resort (if A is the smallest of the potential values):
select USER, max(Value) as value from Table
group by User
or, this might return multiple users if they have several tuples with different object (when not null)
select distinct user, coalesce(object, value)
from table ;
Here's a solution if you don't like typing :-)
select
distinct USR
,VAL
from
TBL
qualify
max(ascii(VAL)) over (partition by USR ) = ascii(VAL)
Copy|Paste|Run in snowflake:
CREATE or replace TABLE tbl( USR varchar(4), VAL varchar(1), OBJ varchar(4));
INSERT INTO tbl (USR,VAL,OBJ)
VALUES
('0001', 'V', 'V'),
('0002', 'A', NULL),
('0002', 'C', 'C'),
('0003', 'A', NULL),
('0004', 'A', NULL),
('0004', 'A', NULL),
('0003', 'V', 'V');
select
distinct USR
,VAL
from
TBL
qualify
max(ascii(VAL)) over (partition by USR ) = ascii(VAL);
You can try the following if you are using SQL-Server
select distinct USER
,Value
from
(
select *,rank() over (partition by USER order by Value desc) as ranking
from your_table_name
) as t
where ranking =1

SQL query to find all rows with same timestamp + or - one second

Row 3 in the following table is a duplicate. I know this because there is another row (row 5) that was created by the same user less than one second earlier.
row record created_by created_dt
1 5734 '00E759CF' '2020-06-05 19:59:36.610'
2 9856 '1E095CBA' '2020-06-05 19:57:31.207'
3 4592 '1E095CBA' '2020-06-05 19:54:41.930'
4 7454 '00E759CF' '2020-06-05 19:54:41.840'
5 4126 '1E095CBA' '2020-06-05 19:54:41.757'
I want a query that returns all rows created by the same user less than one second apart.
Like so:
row record created_by created_dt
1 4592 '1E095CBA' '2020-06-05 19:54:41.930'
2 4126 '1E095CBA' '2020-06-05 19:54:41.757'
This is what I have so far:
SELECT DISTINCT a1.*
FROM table AS a1
LEFT JOIN table AS a2
ON a1.created_by = a2.created_by
AND a1.created_dt > a2.created_dt
AND a1.created_dt <= DATEADD(second, 1, a2.created_dt)
WHERE a1.created_dt IS NOT NULL
AND a.created_dt IS NOT NULL
This is what finally did the trick:
SELECT
a.*
FROM table a
WHERE EXISTS (SELECT TOP 1
*
FROM table a1
WHERE a1.created_by = a.created_by
AND ABS(DATEDIFF(SECOND, a.created_dt, a1.created_dt)) < 1
AND a.created_dt <> a1.created_dt)
ORDER BY created_dt DESC
You could use exists:
select t.*
from mytable t
where exists(
select 1
from mytable t1
where
t1.created_by = t.created_by
and abs(datediff(second, t.created_dt, t1.created_dt)) < 1
)
How about something like this
SELECT DISTINCT a1.*
FROM #a1 AS a1
LEFT JOIN #a1 AS a2 ON a1.[Created_By] = a2.[Created_By]
AND a1.[Record] <> a2.[Record]
WHERE ABS(DATEDIFF(SECOND, a1.[Created_Dt], a2.[Created_Dt])) < 1
Here is the sample query I used to verify the results.
DECLARE #a1 TABLE (
[Record] INT,
[Created_By] NVARCHAR(10),
[Created_Dt] DATETIME
)
INSERT INTO #a1 VALUES
(5734, '00E759CF', '2020-06-05 19:59:36.610'),
(9856, '1E095CBA', '2020-06-05 19:57:31.207'),
(4592, '1E095CBA', '2020-06-05 19:54:41.930'),
(7454, '00E759CF', '2020-06-05 19:54:41.840'),
(4126, '1E095CBA', '2020-06-05 19:54:41.757')
SELECT DISTINCT a1.*
FROM #a1 AS a1
LEFT JOIN #a1 AS a2 ON a1.[Created_By] = a2.[Created_By]
AND a1.[Record] <> a2.[Record]
WHERE ABS(DATEDIFF(SECOND, a1.[Created_Dt], a2.[Created_Dt])) < 1
I would suggest lead() and lag() instead of self-joins:
select t.*
from (select t.*,
lag(created_dt) over (partition by created_dt) as prev_cd,
lead(created_dt) over (partition by created_dt) as next_cd
from t
) t
where created_dt < dateadd(second, 1, prev_created_dt) or
created_dt > dateadd(second, -1, next_created_dt)

sql generate code based on three column values

I have three columns
suppose
row no column1 column2 column3
1 A B C
2 A B C
3 D E F
4 G H I
5 G H C
I want to generate code by combining these three column values
For Eg.
1)ABC001
2)ABC002
3)DEF001
4)GHI001
5)GHC001
by checking combination of three columns
logic is that
if values of three columns are same then like first time it shows 'ABC001'
and 2nd time it shows 'ABC002'
You can try this:
I dont know what you want for logic with 00, but you can add them manuel or let the rn decide for you
declare #mytable table (rowno int,col1 nvarchar(50),col2 nvarchar(50),col3 nvarchar(50)
)
insert into #mytable
values
(1,'A', 'B', 'C'),
(2,'A', 'B', 'C'),
(3,'D', 'E', 'F'),
(4,'G', 'H', 'I'),
(5,'G', 'H', 'C')
Select rowno,col1,col2,col3,
case when rn >= 10 and rn < 100 then concatcol+'0'+cast(rn as nvarchar(50))
when rn >= 100 then concatcol+cast(rn as nvarchar(50))
else concatcol+'00'+cast(rn as nvarchar(50)) end as ConcatCol from (
select rowno,col1,col2,col3
,Col1+col2+col3 as ConcatCol,ROW_NUMBER() over(partition by col1,col2,col3 order by rowno) as rn from #mytable
) x
order by rowno
My case when makes sure when you hit number 10 it writes ABC010 and when it hits above 100 it writes ABC100 else if its under 10 it writes ABC001 and so on.
Result
TSQL: CONCAT(column1,column2,column3,RIGHT(REPLICATE("0", 3) + LEFT(row_no, 3), 3))
You should combine your columns like below :
SELECT CONVERT(VARCHAR(MAX), ROW_NUMBER() OVER(ORDER BY
(
SELECT NULL
)))+') '+DATA AS Data
FROM
(
SELECT column1+column2+column3+'00'+CONVERT(VARCHAR(MAX), ROW_NUMBER() OVER(PARTITION BY column1,
column2,
column3 ORDER BY
(
SELECT NULL
))) DATA
FROM <table_name>
) T;
Result :
1)ABC001
2)ABC002
3)DEF001
4)GHI001
5)GHC001
MySQL:
CONCAT(column1,column2,column3,LPAD(row_no, 3, '0'))
[you will need to enclose the 'row no' in ticks if there is a space in the name of the field instead of underscore.]

i want to display data according to type in same query

i have following table "vehicle_data" :
ID ALERT_TYPE VALUE
58 2 1
58 1 1
104 1 1
104 2 1
Here alert_type = 2 is for GPS value and alert_type=1 is for engine_value .
so if alert_type=2 and its value is =1 then it means its value is correct.
when alert_type=2 and its value is =0 then it means its value is wrong.
same for alert_type=1
so now here i want the following output:
ID gps engine_value
58 1 1
104 1 1
how can i perform this query??
You can do it like this.
SELECT ID
,CASE WHEN [ALERT_TYPE]=2 and [value ]=1 THEN 1 ELSE 0 END as gps
,CASE WHEN [ALERT_TYPE]=1 and [value ]=1 THEN 1 ELSE 0 END as engine
FROM vehicle_data
SELECT ID, alert_type=2 AS gps, alert_type=1 AS [engine] FROM vehicle_data WHERE value=1;
EDITED to account for your explanation of VALUE.
Schema
CREATE TABLE table3 (id int, ALERT_TYPE int)
INSERT table3 VALUES (58, 1), (58, 2), (104, 1), (104, 2)
Query
SELECT *
FROM (
SELECT ID
,ROW_NUMBER() OVER (
PARTITION BY id ORDER BY id
) AS row_num
,gps = CASE
WHEN ALERT_TYPE = 2
THEN 1
ELSE 0
END
,engine = CASE
WHEN ALERT_TYPE = 1
THEN 1
ELSE 0
END
FROM table3
) a
WHERE a.row_num = 1
Output
ID gps engine
58 1 0
104 0 1
One possible way using subqueries :
select
Ids.ID
, gps.VALUE 'gps'
, engine_value.VALUE 'engine_value'
from (select distinct ID from vehicle_data) Ids
left join
(select ID, VALUE from vehicle_data where ALERT_TYPE = 2) gps
on gps.ID = Ids.ID
left join
(select ID, VALUE from vehicle_data where ALERT_TYPE = 1) engine_value
on engine_value.ID = Ids.ID
[SQL Fiddle demo]
I hope this should work for you,
Select ID,sum(gps) as gps ,sum(engine) as engine from
(SELECT ID
,CASE WHEN [ALERT_TYPE]=2 and [value ]=1 THEN 1 ELSE 0 END as gps
,CASE WHEN [ALERT_TYPE]=1 and [value ]=1 THEN 1 ELSE 0 END as engine
FROM vehicle_data
)a
group by id
select x.id,x.alert_type as GPS,x.value as engine_value from (
select ID,alert_type,value,ROW_NUMBER() over (partition by id order by alert_type ) as Rnk from mytable
)x
where Rnk=1
Please check this query in SQL :
create table mytable (id int, alert_type int, value int);
insert into mytable (id, alert_type, value)
values (58, 2, 1),
(58, 1, 1),
(104, 1, 1),
(104, 2, 1);
SELECT distinct ID
,(select count (id) from mytable mt where mt.id=mytable.id and mt.[ALERT_TYPE]=2 and mt.[value ]=1) as gps
,(select count (id) from mytable mt where mt.id=mytable.id and mt.[ALERT_TYPE]=1 and mt.[value ]=1) as engine
FROM mytable
BASED ON YOUR QUESTION I BELIEVE YOU WANT THE DATA IN COLUMN AND TO SUIT YOUR REQUIREMENT I HAVE MADE A SQL FIDDLE WORKING - CODE IS ALSO MENTIONED BELOW -
HERE YOU GO WITH THE WORKING FIDDLE -
WORKING DEMO
SQL CODE FOR REFERNECE -
CREATE TABLE ALERTS (ID INT, ALERT_TYPE INT, VALUE INT)
INSERT INTO ALERTS VALUES (58,2,1)
INSERT INTO ALERTS VALUES (58,1,0)
INSERT INTO ALERTS VALUES (104,1,1)
INSERT INTO ALERTS VALUES (104,2,0)
CREATE TABLE ALERTSVALUE (ID INT, gps INT,engine INT)
INSERT INTO ALERTSVALUE VALUES (58,1,0)
INSERT INTO ALERTSVALUE VALUES (104,0,1)
SELECT A.ID,
CASE A.ALERT_TYPE WHEN 2 THEN 1 ELSE 0 END AS GPS,
CASE A.ALERT_TYPE WHEN 1 THEN 1 ELSE 0 END AS ENGINE_VALUE,
A.VALUE FROM ALERTS A WHERE A.VALUE = 1
EDIT BASED ON COMMENT - TO MERGE THE ROWS FOR BOTH GPS AND ENGINE_VALUE:
SELECT X.ID,X.ALERT_TYPE as GPS,X.VALUE as ENGINE_VALUE
FROM (
SELECT ID,ALERT_TYPE ,VALUE ,ROW_NUMBER() OVER (PARTITION BY ID ORDER BY alert_type ) AS [Rank] FROM ALERTS
)X
WHERE [Rank]=1
SQL FIDDLE DEMO