Excluding blank rows in CTE query - sql

I am new to using CTE queries in SQL Server. I've built this query with help from the web in an effort to start building my "change log" to highlight changes made in my database. Please see example linked below. I'd like to exclude rows where there are no changes. Can you assist in how to accomplish this?
Row #3 with Nov 7 changedate has blank values. I would like for this row to not display. I also don't want to have to do something like WHERE row1 <> '' AND row2 <> '' AND row3 <> '', etc, because my final query will contain much much more rows. Is this possible?
http://sqlfiddle.com/#!6/134bd/4/0

Here is an option that you can use.
Below uses COALESCE function.
Using the same data and just modified your code from sqlfiddle.
The CASE statement to return NULL when match and cast data types to varchar, then use COALESCE in the where
Below modified script to include nTEXT column. You can use DATALENGTH with COALESCE in the WHERE clause.
Modified:
CREATE TABLE tblEmp
([memid] int, [empid] int, [name] varchar(50),[salary] int, [room] varchar(50), changedate datetime, ntxt ntext);
INSERT INTO tblEmp
([memid], [empid], [name], [salary], [room], [changedate], [ntxt])
VALUES
(41, 1, 'peter', 1000, 'Regency', '11/4/2012', ''),
(43, 1, 'peterz', 2000, 'Regency','11/5/2013', 'nn') ,
(44, 1, 'peterz', 2000, 'Regency','11/7/2013', '') ,
(45, 4, 'sally', 2001, 'Sheratio','11/2/2013', '') ,
(46, 4, 'sally', 2000, 'Sheraton','11/6/2013', ''),
(47, 1, 'peter', 3000, 'Regency','12/4/2013', '') ,
(48, 4, 'sallye', 2000,'Sheraton 1', '11/9/2013', '') ,
(49, 4, 'sally', 3000, 'Sheraton','11/6/2013', 'kljslkdjflkajslkjasdlkjalskjdlakjsdlkjasldjfk')
;
WITH cte AS
(
SELECT
empid,
name,
salary, room,
changedate,
ntxt,
rn=ROW_NUMBER()OVER(PARTITION BY empid ORDER BY changedate)
FROM tblemp
)
SELECT *
FROM
(
SELECT c1.empid, oldname=CASE WHEN c1.Name=c2.Name THEN NULL ELSE C1.Name END,
newname=CASE WHEN c1.Name=c2.Name THEN NULL ELSE C2.Name END,
oldsalary=CASE WHEN c1.salary=c2.salary THEN NULL ELSE C1.salary END,
newsalary=CASE WHEN c1.salary=c2.salary THEN NULL ELSE C2.salary END,
oldroom=CASE WHEN c1.Room=c2.Room THEN NULL ELSE C1.Room END,
newroom=CASE WHEN c1.room=c2.room THEN NULL ELSE C2.room END,
c2.changedate
, c2.ntxt
FROM cte c1 INNER JOIN cte c2
ON c1.empid=c2.empid AND c2.RN=c1.RN+1
) x
WHERE NOT (COALESCE(oldname, newname, CAST(oldsalary AS VARCHAR), CAST(newsalary AS VARCHAR), CAST(oldroom AS VARCHAR), CAST(newroom AS VARCHAR)) is null
AND DATALENGTH(ntxt) = 0)
ORDER BY ChangeDate DESC

A slightly different approach with SQL Server 2012 would be to use LAG in the common table expression to be able to detect row by row changes. The CTE basically pulls out each row along with the relevant data from the previous row, and does a straight forward compare in the outer query to generate the result.
WITH cte AS (
SELECT
empid, changeDate,
LAG(name) OVER (PARTITION BY empid ORDER BY changeDate) oldname, name,
LAG(salary) OVER (PARTITION BY empid ORDER BY changeDate) oldsalary, salary,
LAG(room) OVER (PARTITION BY empid ORDER BY changeDate) oldroom, room
FROM tblEmp
)
SELECT empid,
CASE WHEN name<>oldname THEN oldname ELSE '' END oldname,
CASE WHEN name<>oldname THEN name ELSE '' END newname,
CASE WHEN salary<>oldsalary THEN oldsalary ELSE '' END oldsalary,
CASE WHEN salary<>oldsalary THEN salary ELSE '' END newsalary,
CASE WHEN room<>oldroom THEN oldroom ELSE '' END oldroom,
CASE WHEN room<>oldroom THEN room ELSE '' END newroom,
changeDate
FROM cte
WHERE oldname<>name OR oldsalary<>salary OR oldroom<>room
ORDER BY empid, changeDate;
An SQLfiddle to test with.

Related

Trying to group by a value in SQL

I have a table called TESTTABLE
The table script and some sample date
CREATE TABLE Test_Table(
NODE VARCHAR(10) NOT NULL PRIMARY KEY
,EVENTID CHAR(255) NOT NULL
,TYPE INTEGER NOT NULL
,FIRSTOCCURRENCE VARCHAR(16) NOT NULL
,LASTOCCURRENCE VARCHAR(16) NOT NULL
,TALLY INTEGER NOT NULL
,TICKETNUMBER VARCHAR(20)
,TIME_DELTA VARCHAR(5)
);
INSERT INTO Test_Table(NODE,EVENTID,TYPE,FIRSTOCCURRENCE,LASTOCCURRENCE,TALLY,TICKETNUMBER,TIME_DELTA) VALUES ('Washington','ReachabilityProblem',2,'12/13/2017 23:24','12/13/2017 23:24',1,NULL,'1 sec');
INSERT INTO Test_Table(NODE,EVENTID,TYPE,FIRSTOCCURRENCE,LASTOCCURRENCE,TALLY,TICKETNUMBER,TIME_DELTA) VALUES ('San Diego','ReachabilityProblem',1,'12/13/2017 23:23','12/13/2017 23:23',1,NULL,NULL);
INSERT INTO Test_Table(NODE,EVENTID,TYPE,FIRSTOCCURRENCE,LASTOCCURRENCE,TALLY,TICKETNUMBER,TIME_DELTA) VALUES ('Richmond','ReachabilityProblem',1,'12/13/2017 14:23','12/13/2017 14:23',1,NULL,NULL);
INSERT INTO Test_Table(NODE,EVENTID,TYPE,FIRSTOCCURRENCE,LASTOCCURRENCE,TALLY,TICKETNUMBER,TIME_DELTA) VALUES ('Richmond','ReachabilityProblem',1,'12/13/2017 23:23','12/13/2017 23:23',1,NULL,NULL);
INSERT INTO Test_Table(NODE,EVENTID,TYPE,FIRSTOCCURRENCE,LASTOCCURRENCE,TALLY,TICKETNUMBER,TIME_DELTA) VALUES ('New York','ReachabilityProblem',2,'12/13/2017 23:24','12/13/2017 23:24',1,NULL,'1 sec');
INSERT INTO Test_Table(NODE,EVENTID,TYPE,FIRSTOCCURRENCE,LASTOCCURRENCE,TALLY,TICKETNUMBER,TIME_DELTA) VALUES ('New York','ReachabilityProblem',2,'12/13/2017 11:32','12/13/2017 11:33',2,NULL,'1 sec');
INSERT INTO Test_Table(NODE,EVENTID,TYPE,FIRSTOCCURRENCE,LASTOCCURRENCE,TALLY,TICKETNUMBER,TIME_DELTA) VALUES ('New York','ReachabilityProblem',1,'12/13/2017 16:35','12/13/2017 16:35',1,NULL,NULL);
INSERT INTO Test_Table(NODE,EVENTID,TYPE,FIRSTOCCURRENCE,LASTOCCURRENCE,TALLY,TICKETNUMBER,TIME_DELTA) VALUES ('Landsdown','ReachabilityProblem',2,'12/13/2017 23:24','12/13/2017 23:24',1,NULL,'1 sec');
INSERT INTO Test_Table(NODE,EVENTID,TYPE,FIRSTOCCURRENCE,LASTOCCURRENCE,TALLY,TICKETNUMBER,TIME_DELTA) VALUES ('Houston','ReachabilityProblem',2,'12/13/2017 14:24','12/13/2017 14:24',1,NULL,'1 sec');
INSERT INTO Test_Table(NODE,EVENTID,TYPE,FIRSTOCCURRENCE,LASTOCCURRENCE,TALLY,TICKETNUMBER,TIME_DELTA) VALUES ('Houston','ReachabilityProblem',1,'12/13/2017 11:31','12/13/2017 11:32',2,NULL,NULL);
INSERT INTO Test_Table(NODE,EVENTID,TYPE,FIRSTOCCURRENCE,LASTOCCURRENCE,TALLY,TICKETNUMBER,TIME_DELTA) VALUES ('Dallas','ReachabilityProblem',1,'12/13/2017 23:23','12/13/2017 23:23',1,NULL,NULL);
INSERT INTO Test_Table(NODE,EVENTID,TYPE,FIRSTOCCURRENCE,LASTOCCURRENCE,TALLY,TICKETNUMBER,TIME_DELTA) VALUES ('Dallas','ReachabilityProblem',2,'12/13/2017 23:24','12/13/2017 23:24',1,NULL,'1 sec');
INSERT INTO Test_Table(NODE,EVENTID,TYPE,FIRSTOCCURRENCE,LASTOCCURRENCE,TALLY,TICKETNUMBER,TIME_DELTA) VALUES ('Coco Beach','ReachabilityProblem',1,'12/13/2017 23:23','12/13/2017 23:23',1,NULL,NULL);
I'm trying to obtain this
I have tried this
Select DATEDIFF(Day, GETDATE(), DATEADD(HOUR, 15, GETDATE()))
Select
[NODE]
,[EVENTID]
,[TYPE]
,[FIRSTOCCURRENCE]
,LASTOCCURRENCE]
,DATEDIFF(Minute, FIrst OCCURENCE, LAST OCCURENCE) as [Outage in MIN]
,[TicketNumber]
,[Severity]
,Tally]
From
[XYZ].[XYZ].[XYZ_STATUS]
Where
[FIRST OCCURRENCE] >= DATEADD(hh, -24, GETDATE())
Group by node;
Please help a rookie
Group by returns a relation/table with a row for each group, if you are going to use the GROUP BY clause, so in your SELECT statement you can only select the column that you are grouping by and use aggregate functions on that column because the other columns will not appear in the resulting table.
Maybe this is what you want...
Select
DATEDIFF (DAY, GETDATE(), DATEADD(Hour, 15, GETDATE())),
,Node
,EventID
,Type
,Severity
,Tally
FROM xyz.xyz.xyz_status
GROUP BY Node,EventID,Type,Severity,Tally
When we group by two or more columns, it is saying "Group them so that all of those with the same col1 and col2 are in the same group, and then calculate all the aggregate functions (Count, Sum, Average, etc.) for each of those groups"
Maybe you want this...
SELECT DATEDIFF(minute,(SELECT TOP(1) FIRSTOCCURRENCE FROM
xyz.xyz.xyz_status),(SELECT TOP(1) LASTOCCURRENCE FROM
xyz.xyz.xyz_status))
FROM xyz.xyz.xyz_status
WHERE node = 'Houston';
Here you can take a look at more examples of DATEDIFF function.
This should put you on track although Writing reports in SQL is probably a bad idea. What I believe you're wanting to do it output. You can also look at the ROLLUP options some of which are deprecated.
with data as (
select
NODE, EVENTID, TYPE, FIRSTOCCURRENCE, LASTOCCURRENCE,
DATEDIFF(Minute, FIRSTOCCURRENCE, LASTOCCURRENCE) as OutageInMin,
TicketNumber, Tally,
ROW_NUMBER() OVER (PARTITION BY NODE ORDER BY FIRSTOCCURRENCE) as rn
from Test_Table
--WHERE FIRSTOCCURRENCE >= DATEADD(hh, -24, GETDATE())
)
select
case when grouping(rn) = 1 then 'SITE TOTAL' else NODE end as NODE,
case when grouping(rn) = 1 then null else min(EVENTID) end as EVENTID,
case when grouping(rn) = 1 then null else min(TYPE) end as TYPE,
case when grouping(rn) = 1 then null else min(FIRSTOCCURRENCE) end as FIRSTOCCURRENCE,
case when grouping(rn) = 1 then null else min(LASTOCCURRENCE) end as LASTOCCURRENCE,
case when grouping(rn) = 1 then null else min(Tally) end as Tally,
case when grouping(rn) = 1 then null else min(TicketNumber) end as TicketNumber,
case when grouping(node) = 1
then min(OutageInMin) else sum(OutageInMin) end as "Outage In MIN"
from
data
group by grouping sets ( (NODE, rn), (NODE) )
order by data.NODE, grouping(rn), rn;
http://rextester.com/DZIHJ81264
GROUP BY is only authorized in SQL when you are aggregating something. The easiest exemple is a count.
Example : you want to know how much EventID are linked to a given Node :
SELECT Count(EventId), node FROM xyz.xyz.xyz_status GROUP BY node;
Here is a site that present the Group By function. If you clarify what you are searching for, we'll give you a more concrete example.

How can I only pull back the latest result in SQL?

I'm struggling to find out how to only return the latest iteration of the claimtid in the result set. I'm using this query:
SELECT
claimid, paiddate
CASE
WHEN actid = '119' THEN 'Channel Exception'
WHEN actid = '127' THEN 'Rejected'
WHEN actid = '128' THEN 'Accepted'
WHEN actid = '130' THEN 'Adjustment Complete'
WHEN actid = '133' THEN 'Channel Ready'
END AS [Status]
FROM
Encounter
WHERE
claimtid LIKE '173225AR0%' OR claimtid LIKE '197565GL0%' OR
claimtid LIKE '293215QW0%' OR claimtid LIKE
ORDER BY
claimtid
This query returns the following result:
|claimtid |paiddt |Status |
-+----------+----------+-------------------+
1|173225AR00|2017-03-01|Adjustment Complete|
2|173225AR01|2017-04-11|Accepted |
3|197565GL00|2017-03-17|Accepted |
4|197565GL01|2017-03-19|Adjustment Complete|
5|197565GL02|2017-04-01|Rejected |
6|293215QW00|2017-04-19|Adjustment Complete|
7|293215QW01|2017-04-23|Accepted |
I'm not sure what I can add to my query so that the results will only bring back lines 2, 5, and 7. My actual query contains produces more rows in the result.
This is only an example, but is accurate to the situation. I'll need to pull back more than 3 rows, but it needs to be the latest iteration.
Each additional iteration makes the last number in the claimtid go up by one. I won't know how many iterations there are of each claimtid.
Try this, but I am not sure of the performance penalty for using a string for claimtid as in your case:
SELECT claimid, paiddate
FROM
Encounter
WHERE claimtid IN (
SELECT MAX(paiddt), claimtid
FROM Encounter
GROUP BY SUBSTRING (claimtid, 6, 8)
) t
Assuming you can count on the paiddate field to know the most recent record.
If you can't separate the two fields of the claim ID into separate columns of the data table itself, you can pull them apart in a query, to allow you to use the max() aggregate to find the largest value of the second field.
select
claim,
paiddate,
CASE
WHEN actid = '119' THEN 'Channel Exception'
WHEN actid = '127' THEN 'Rejected'
WHEN actid = '128' THEN 'Accepted'
WHEN actid = '130' THEN 'Adjustment Complete'
WHEN actid = '133' THEN 'Channel Ready'
END AS [Status]
from
(
select
left(claimid, 6) as claim,
max(right(claimid,4)) as seq,
from
Encounter
group by
left(claimid, 6)
) as ms
inner join Encounter as e
on e.claimid = ms.claim + ms.seq;
This should do the trick...
IF OBJECT_ID('tempdb..#ClaimData', 'U') IS NOT NULL
DROP TABLE #ClaimData;
CREATE TABLE #ClaimData (
RN INT NOT NULL IDENTITY(1,1),
claimtid CHAR(10) NOT NULL,
paiddt DATE NOT NULL,
[Status] VARCHAR(20) NOT NULL
);
INSERT #ClaimData (claimtid, paiddt, Status) VALUES
('173225AR00', '2017-03-01', 'Adjustment Complete'),
('173225AR01', '2017-04-11', 'Accepted'),
('197565GL00', '2017-03-17', 'Accepted'),
('197565GL01', '2017-03-19', 'Adjustment Complete'),
('197565GL02', '2017-04-01', 'Rejected'),
('293215QW00', '2017-04-19', 'Adjustment Complete'),
('293215QW01', '2017-04-23', 'Accepted');
--SELECT * FROM #ClaimData cd;
--=========================================================
SELECT TOP 1 WITH TIES
cd.RN, cd.claimtid, cd.paiddt, cd.Status
FROM
#ClaimData cd
CROSS APPLY ( VALUES (SUBSTRING(cd.claimtid, 1, 6), SUBSTRING(cd.claimtid, 7, 4)) ) sc (Claim_1, Claim_2)
ORDER BY
ROW_NUMBER() OVER (PARTITION BY sc.Claim_1 ORDER BY sc.Claim_2 DESC);
Results...
RN claimtid paiddt Status
----------- ---------- ---------- --------------------
2 173225AR01 2017-04-11 Accepted
5 197565GL02 2017-04-01 Rejected
7 293215QW01 2017-04-23 Accepted
Edit...
A slightly better performing solution that produces the same results...
SELECT
RN = CAST(SUBSTRING(MAX(bv.BinaryValue), 39, 4) AS INT),
claimtid = CAST(SUBSTRING(MAX(bv.BinaryValue), 1, 10) AS CHAR(10)),
paiddt = CAST(SUBSTRING(MAX(bv.BinaryValue), 11, 8) AS DATE),
Status = CAST(SUBSTRING(MAX(bv.BinaryValue), 19, 20) AS VARCHAR(20))
FROM
#ClaimData cd
CROSS APPLY ( VALUES (CAST(cd.claimtid AS BINARY(10)) + CAST(cd.paiddt AS BINARY(8)) + CAST(cd.Status AS BINARY(20)) + CAST(cd.RN AS BINARY(4))) ) bv (BinaryValue)
GROUP BY
SUBSTRING(cd.claimtid, 1, 6);
Try this, assuming you can't have a early claimtid with a later paiddt:
IF OBJECT_ID('tempdb..#ClaimData') IS NOT NULL
DROP TABLE #ClaimData
CREATE TABLE #ClaimData (
ID INT NOT NULL IDENTITY(1,1)
, claimtid CHAR(10) NOT NULL
, paiddt DATE NOT NULL
, [Status] VARCHAR(20) NOT NULL
)
INSERT #ClaimData (claimtid, paiddt, Status) VALUES
('173225AR00', '2017-03-01', 'Adjustment Complete')
,('173225AR01', '2017-04-11', 'Accepted')
,('197565GL00', '2017-03-17', 'Accepted')
,('197565GL01', '2017-03-19', 'Adjustment Complete')
,('197565GL02', '2017-04-01', 'Rejected')
,('293215QW00', '2017-04-19', 'Adjustment Complete')
,('293215QW01', '2017-04-23', 'Accepted')
SELECT
ID
, x.claimtid
, x.paiddt
, x.Status
FROM (
SELECT
ROW_NUMBER() OVER (PARTITION BY LEFT(Claimtid, LEN(Claimtid) - 2) ORDER BY paiddt DESC) RN
, *
FROM #ClaimData
) x
WHERE x.RN = 1
Otherwise change RN to ROW_NUMBER() OVER (PARTITION BY LEFT(Claimtid, LEN(Claimtid) - 2) ORDER BY RIGHT(Claimtid, 2) DESC) RN

How to replace the 'Strings' with numerical values based on a group by clause

I have the below table (#temp1) where I need to replace the string in the column'Formula' with the matching input 'VALUE' column based on the group 'Yearmonth'.
The 'Formula' column may be of any mathematical expression for better understanding I have mentioned a simple example below.
IDNUM formula INPUTNAME VALUE YEARMONTH
---------------------------------------------------------------------
1 imports(398)+imports(399) imports(398) 17.000 2003:1
2 imports(398)+imports(399) imports(398) 56.000 2003:2
3 imports(398)+imports(399) imports(399) 15.000 2003:1
4 imports(398)+imports(399) imports(399) 126.000 2003:2
For eg :From the above table i need the output as
Idnum Formula Yearmonth
1. 17.00 +15.00 2003:1
2. 56.00 +126.00 2003:2
I tried with the below different query from various suggestions but coludnt achieve it. Could someone help me this out ?
Type1 :
SELECT
REPLACE(FORMULA, INPUTName, AttributeValue) AS realvalues,
yearmonth
FROM #temp1
GROUP BY yearmonth
TYPE2 :
USING XML PATH... In this case it got worked but I need to replace only the strings with the values and not to stuff the strings based on mathematcal operation.(Because the formula might be of any type).
SELECT
IDNUM = MIN(IDNUM),
FORMULA =
(SELECT STUFF(
(SELECT ' +' + CONVERT(VARCHAR(10), Value)
FROM #temp1
WHERE YEARMONTH = t1.YEARMONTH
FOR XML PATH(''))
,1, 2, '')),
YEARMONTH
FROM #TEMP1 t1
GROUP BY YEARMONTH
TYPE3:Using Recursions...This is returning only the null values...
;with t as (
select t.*,
row_number() over (partition by yearmonth order by idnum) as seqnum,
count(*) over (partition by yearmonth) as cnt
from #temp1 t
)
,cte as (
select t.seqnum, t.yearmonth, t.cnt,
replace(formula, inputname, AttributeValue) as formula1
from t
where seqnum = 1
union all
select cte.seqnum, cte.yearmonth, cte.cnt,
replace(CTE.formula1, T.inputname, T.AttributeValue) as formula2
from cte join
t
on cte.yearmonth = t.yearmonth
AND cte.seqnum = t.seqnum + 1
)
select row_number() over (order by (select null)) as id,formula1
from cte
where seqnum = cnt
This is full working example using recursive CTE:
DECLARE #DataSource TABLE
(
[IDNUM] TINYINT
,[formula] VARCHAR(MAX)
,[INPUTNAME] VARCHAR(128)
,[VALUE] DECIMAL(9,3)
,[YEARMONTH] VARCHAR(8)
);
INSERT INTO #DataSource ([IDNUM], [formula], [INPUTNAME], [VALUE], [YEARMONTH])
VALUES ('1', 'imports(398)+imports(399)', 'imports(398)', '17.000', '2003:1')
,('2', 'imports(398)+imports(399)', 'imports(398)', '56.000', '2003:2')
,('3', 'imports(398)+imports(399)', 'imports(399)', '15.000', '2003:1')
,('4', 'imports(398)+imports(399)', 'imports(399)', '126.000', '2003:2')
,('5', '(imports(391)+imports(392)-imports(393))/imports(394)', 'imports(391)', '5.000', '2003:3')
,('6', '(imports(391)+imports(392)-imports(393))/imports(394)', 'imports(392)', '10.000', '2003:3')
,('7', '(imports(391)+imports(392)-imports(393))/imports(394)', 'imports(393)', '3.000', '2003:3')
,('8', '(imports(391)+imports(392)-imports(393))/imports(394)', 'imports(394)', '-5.000', '2003:3');
WITH DataSource AS
(
SELECT ROW_NUMBER() OVER(PARTITION BY [YEARMONTH] ORDER BY [IDNUM]) AS [ReplacementOrderID]
,[YEARMONTH]
,[formula]
,[INPUTNAME] AS [ReplacementString]
,[VALUE] AS [ReplacementValue]
FROM #DataSource
),
RecursiveDataSource AS
(
SELECT [ReplacementOrderID]
,[YEARMONTH]
,REPLACE([formula], [ReplacementString], [ReplacementValue]) AS [formula]
FROM DataSource
WHERE [ReplacementOrderID] = 1
UNION ALL
SELECT DS.[ReplacementOrderID]
,DS.[YEARMONTH]
,REPLACE(RDS.[formula], DS.[ReplacementString], DS.[ReplacementValue]) AS [formula]
FROM RecursiveDataSource RDS
INNER JOIN DataSource DS
ON RDS.[ReplacementOrderID] + 1 = DS.[ReplacementOrderID]
AND RDS.[YEARMONTH] = DS.[YEARMONTH]
)
SELECT RDS.[YEARMONTH]
,RDS.[formula]
FROM RecursiveDataSource RDS
INNER JOIN
(
SELECT [YEARMONTH]
,MAX([ReplacementOrderID]) AS [ReplacementOrderID]
FROM DataSource
GROUP BY [YEARMONTH]
) DS
ON RDS.[YEARMONTH] = DS.[YEARMONTH]
AND RDS.[ReplacementOrderID] = DS.[ReplacementOrderID]
ORDER BY RDS.[YEARMONTH]
Generally, you simply want to perform multiple replacements over a string in one statement. You can have many replacement values, just play with the MAXRECURSION option.
--Create sample data
DROP TABLE #temp1
CREATE TABLE #temp1 (IDNUM int, formula varchar(max), INPUTNAME varchar(max), VALUE decimal, YEARMONTH varchar(max))
INSERT INTO #temp1 VALUES
(1, 'imports(398)+imports(399)', 'imports(398)', 17.000, '2003:1'),
(2, 'imports(398)+imports(399)', 'imports(398)', 56.000, '2003:2'),
(3, 'imports(398)+imports(399)', 'imports(399)', 15.000, '2003:1'),
(4, 'imports(398)+imports(399)', 'imports(399)', 126.000, '2003:2')
--Query
;WITH t as (
SELECT formula, YEARMONTH, IDNUM
FROM #temp1
UNION ALL
SELECT REPLACE(a.formula, b.INPUTNAME, CAST(b.VALUE AS varchar(100))) AS formula, a.YEARMONTH, a.IDNUM
FROM t a
JOIN #temp1 b ON a.YEARMONTH = b.YEARMONTH AND a.formula LIKE '%' + b.INPUTNAME + '%'
)
SELECT MIN(IDNUM) AS IDNUM, formula, YEARMONTH
FROM t
WHERE formula not LIKE '%imports(%'
GROUP BY formula, YEARMONTH

dynamically select column name that changed

I have a table as shown below.
ID NAME ADDRESS CITY ROLE Date_Modified
1 Tom something austin manager X
2 Tom nothing austin principal Y
3 Tom anything dallas VP Z
How do write a query to select the column name that have changed between entries 1,2 and 3? Currently I am building a report that needs to identify change. This is what I have so far and need to work with it.
I need to be able to detect via stored proc and see output below.
Id ColumnName DateChanged
2 Address Y
2 Role Y
3 Address Z
3 Role Z
If I understood your question correctly, what you need is detecting changes from one row to another and unpivoting the data. Usage of LAG required SQL Server 2012 or more.
;with cte as (
-- LAG for id is used to skip first row from selection
select id, LAG(id, 1) OVER (ORDER BY id) AS OldId,
address, LAG(address, 1) OVER (ORDER BY id) AS OldAddress,
role, LAG(role, 1) OVER (ORDER BY id) AS OldRole,
Date_Modified
from audit_data
)
SELECT id, ColName, data_col, Date_Modified
FROM
(
select id, address, role, Date_Modified
from cte
-- detect any change in monitored data
where ((OldAddress IS NULL OR address <> OldAddress)
OR (OldRole IS NULL OR role <> OldRole))
AND OldId IS NOT NULL
) AS cp
-- unpivot address and role into data_col column
UNPIVOT
(
data_col FOR ColName IN (address, role)
) AS up;
Data used for setup:
-- drop table audit_data
create table audit_data (
id int,
name VARCHAR(100),
address VARCHAR(100),
city varchar(100),
role VARCHAR(100),
Date_Modified DATETIME2
)
insert into audit_data values (1, 'Tom', 'something', 'austin', 'manager', '20150103'),
(2, 'Tom', 'nothing', 'austin', 'principa', '20150205'),
(3, 'Tom', 'anything', 'dallas', 'VP', '20150314')
go
[Edit] SQL 2008R2 version:
;with ad_cte as (
select id, address, role, Date_Modified, ROW_NUMBER() OVER (ORDER BY id) RowNo
from audit_data
),
cte as (
select ad.id,
ad.address, ad_old.address AS OldAddress,
ad.role, ad_old.role AS OldRole,
ad.Date_Modified
from ad_cte ad
join ad_cte ad_old on ad_old.RowNo + 1 = ad.RowNo
)
SELECT id, ColName, data_col, Date_Modified
FROM
(
select id, address, role, Date_Modified
from cte
-- detect any change in monitored data
where ((OldAddress IS NULL OR address <> OldAddress)
OR (OldRole IS NULL OR role <> OldRole))
-- this should be changed for generality
AND cte.id > 1
) AS cp
-- unpivot address and role into data_col column
UNPIVOT
(
data_col FOR ColName IN (address, role)
) AS up;
This is very similar to Alexei's answer:
CREATE TABLE #temp( ID INT IDENTITY(1, 1),
NAME VARCHAR(30),
ADDRESS VARCHAR(30),
CITY VARCHAR(30),
ROLE VARCHAR(30),
Date_Modified DATETIME );
INSERT INTO #temp
SELECT 'Tom',
'something',
'austin',
'manager',
DATEADD(day, -3, GETDATE())
UNION
SELECT 'Tom',
'nothing',
'austin',
'principal',
DATEADD(day, -2, GETDATE())
UNION
SELECT 'Tom',
'anything',
'dallas',
'VP',
DATEADD(day, -1, GETDATE());
SELECT 'Jon',
'something',
'san antonio',
'assistant manager',
DATEADD(day, -3, GETDATE())
UNION
SELECT 'Jon',
'something',
'austin',
'assistant manager',
DATEADD(day, -2, GETDATE())
UNION
SELECT 'Jon',
'anything',
'dallas',
'manager',
DATEADD(day, -1, GETDATE());
SELECT id,
ColName,
Date_Modified
FROM(
SELECT DISTINCT B.ID,
B.Name,
CASE
WHEN A.ADDRESS <> B.ADDRESS
THEN B.ADDRESS
END AS ADDRESS,
CASE
WHEN A.CITY <> B.CITY
THEN B.CITY
END AS CITY,
CASE
WHEN A.ROLE <> B.ROLE
THEN B.ROLE
END AS ROLE,
B.Date_Modified
FROM(
SELECT *,
ROW_NUMBER() OVER(PARTITION BY NAME ORDER BY Date_Modified DESC) AS ROWNUM
FROM #temp ) AS A
INNER JOIN(
SELECT *,
ROW_NUMBER() OVER(PARTITION BY NAME ORDER BY Date_Modified DESC) AS ROWNUM
FROM #temp ) AS B ON A.NAME = B.NAME
AND CHECKSUM(A.NAME, A.ADDRESS, A.CITY, A.ROLE) <> CHECKSUM(B.NAME, B.ADDRESS, B.CITY, B.ROLE)
AND A.ROWNUM = B.ROWNUM - 1 ) AS cp
UNPIVOT( data FOR ColName IN( address,
role )) AS up;

Remove duplicates with less null values

I have a table of employees which contains about 25 columns. Right now there are a lot of duplicates and I would like to try and get rid of some of these duplicates.
First, I want to find the duplicates by looking for multiple records that have the same values in first name, last name, employee number, company number and status.
SELECT
firstname,lastname,employeenumber, companynumber, statusflag
FROM
employeemaster
GROUP BY
firstname,lastname,employeenumber,companynumber, statusflag
HAVING
(COUNT(*) > 1)
This gives me duplicates but my goal is to find and keep the best single record and delete the other records. The "best single record" is defined by the record with the least amount of NULL values in all of the other columns. How can I do this?
I am using Microsoft SQL Server 2012 MGMT Studio.
EXAMPLE:
Red: DELETE
Green: KEEP
NOTE: There are a lot more columns in the table than what this table shows.
You can use the sys.columns table to get a list of columns and build a dynamic query. This query will return a 'KeepThese' value for every record you want to keep based on your given criteria.
-- insert test data
create table EmployeeMaster
(
Record int identity(1,1),
FirstName varchar(50),
LastName varchar(50),
EmployeeNumber int,
CompanyNumber int,
StatusFlag int,
UserName varchar(50),
Branch varchar(50)
);
insert into EmployeeMaster
(
FirstName,
LastName,
EmployeeNumber,
CompanyNumber,
StatusFlag,
UserName,
Branch
)
values
('Jake','Jones',1234,1,1,'JJONES','PHX'),
('Jake','Jones',1234,1,1,NULL,'PHX'),
('Jake','Jones',1234,1,1,NULL,NULL),
('Jane','Jones',5678,1,1,'JJONES2',NULL);
-- get records with most non-null values with dynamic sys.column query
declare #sql varchar(max)
select #sql = '
select e.*,
row_number() over(partition by
e.FirstName,
e.LastName,
e.EmployeeNumber,
e.CompanyNumber,
e.StatusFlag
order by n.NonNullCnt desc) as KeepThese
from EmployeeMaster e
cross apply (select count(n.value) as NonNullCnt from (select ' +
replace((
select 'cast(' + c.name + ' as varchar(50)) as value union all select '
from sys.columns c
where c.object_id = t.object_id
for xml path('')
) + '#',' union all select #','') + ')n)n'
from sys.tables t
where t.name = 'EmployeeMaster'
exec(#sql)
Try this.
;WITH cte
AS (SELECT Row_number()
OVER(
partition BY firstname, lastname, employeenumber, companynumber, statusflag
ORDER BY (SELECT NULL)) rn,
firstname,
lastname,
employeenumber,
companynumber,
statusflag,
username,
branch
FROM employeemaster),
cte1
AS (SELECT a.firstname,
a.lastname,
a.employeenumber,
a.companynumber,
a.statusflag,
Row_number()
OVER(
partition BY a.firstname, a.lastname, a.employeenumber, a.companynumber, a.statusflag
ORDER BY (CASE WHEN a.username IS NULL THEN 1 ELSE 0 END +CASE WHEN a.branch IS NULL THEN 1 ELSE 0 END) )rn
-- add the remaining columns in case statement
FROM cte a
JOIN employeemaster b
ON a.firstname = b.firstname
AND a.lastname = b.lastname
AND a.employeenumber = b.employeenumber
AND a.companynumbe = b.companynumber
AND a.statusflag = b.statusflag)
SELECT *
FROM cte1
WHERE rn = 1
I test with MySQL and use NULL String concat to found the best record. Because LENGTH ( NULL || 'data') is 0. Only if all column not NULL some length exists. Maybe this is not perfekt.
create table EmployeeMaster
(
Record int auto_increment,
FirstName varchar(50),
LastName varchar(50),
EmployeeNumber int,
CompanyNumber int,
StatusFlag int,
UserName varchar(50),
Branch varchar(50),
PRIMARY KEY(record)
);
INSERT INTO EmployeeMaster
(
FirstName, LastName, EmployeeNumber, CompanyNumber, StatusFlag, UserName, Branch
) VALUES ('Jake', 'Jones', 1234, 1, 1, 'JJONES', 'PHX'), ('Jake', 'Jones', 1234, 1, 1, NULL, 'PHX'), ('Jake', 'Jones', 1234, 1, 1, NULL, NULL), ('Jane', 'Jones', 5678, 1, 1, 'JJONES2', NULL);
My query idea looks like this
SELECT e.*
FROM employeemaster e
JOIN ( SELECT firstname,
lastname,
employeenumber,
companynumber,
statusflag,
MAX( LENGTH ( username || branch ) ) data_quality
FROM employeemaster
GROUP BY firstname, lastname, employeenumber, companynumber, statusflag
HAVING count(*) > 1
) g
ON LENGTH ( username || branch ) = g.data_quality