How to get the name and values of columns which have different values in the same table's consecutive rows - sql

I have a table History with columns as follows:
HistoryId---User---Tag---Updateddate----DeptId
1 B12 abc 10-08-2017 D34
2 B24 abc 11-08-2017 D34
3 B24 def 12-08-2017 D34
I have a query
SELECT
*
FROM History
WHERE DeptId = 'D34'
ORDER BY Updateddate
The result of this query gives me the above 3 rows.
Now out of these rows I want the name and value of columns which have different values in consecutive rows.
Something like:
HistoryId-----Column-----Value
2 User B24
3 Tag def
Is there a way to do this?

If there are no null values for tag or user, then one way you could achieve this is by using a combination of LAG() and a CROSS APPLY to check whether any values are different.
SELECT H.HistoryID, C.Col, C.Val
FROM (
SELECT *, PrevUser = LAG([User]) OVER (ORDER BY HistoryID), PrevTag = LAG([Tag]) OVER (ORDER BY HistoryID)
FROM [History] AS H
) AS H
CROSS APPLY (
VALUES
('User', CASE WHEN PrevUser != [User] THEN [User] END),
('Tag', CASE WHEN PrevTag != Tag THEN Tag END)
) AS C(Col, Val)
WHERE C.Val IS NOT NULL;
If there are null values, it gets a bit more complicated, but the basic idea would be the same, you'd just have to add in rules to check for null values (and ignore the first row).
EDIT: If you needed to check for null values too, one way to do it would be like the following...
DECLARE #History TABLE (HistoryID INT, [User] CHAR(3), [Tag] CHAR(3));
INSERT #History VALUES
(1,'B12','abc'),
(2,'B24','abc'),
(3,'B24','def'),
(4,NULL,'def'),
(5,'A24',NULL),
(6,NULL,NULL),
(7,'123','456');
SELECT H.HistoryID, C.Col, C.Val
FROM (
SELECT *, RN = ROW_NUMBER() OVER (ORDER BY HistoryID), PrevUser = LAG([User]) OVER (ORDER BY HistoryID), PrevTag = LAG([Tag]) OVER (ORDER BY HistoryID)
FROM #History AS H
) AS H
CROSS APPLY (
VALUES
('User', CASE WHEN RN != 1 AND (PrevUser != [User] OR (PrevUser IS NULL AND [User] IS NOT NULL) OR (PrevUser IS NOT NULL AND [User] IS NULL)) THEN [User] END, CASE WHEN RN != 1 AND (PrevUser != [User] OR (PrevUser IS NULL AND [User] IS NOT NULL) OR (PrevUser IS NOT NULL AND [User] IS NULL)) THEN 1 END),
('Tag', CASE WHEN RN != 1 AND (PrevTag != Tag OR (PrevTag IS NULL AND Tag IS NOT NULL) OR (PrevTag IS NOT NULL AND Tag IS NULL)) THEN Tag END, CASE WHEN RN != 1 AND (PrevTag != Tag OR (PrevTag IS NULL AND Tag IS NOT NULL) OR (PrevTag IS NOT NULL AND Tag IS NULL)) THEN 1 END)
) AS C(Col, Val, Chk)
WHERE C.Chk = 1;

You can unpivot your results and then check them for change over consecutive rows like below
select
HistoryId,
column,
values
into #temp_up_values
from
(
select *
from History
where DeptId='D34'
)src
unpivot
(
values
for column
in ([User],[Tag],[Updateddate],[DeptId])
)up
select t2.*
from
#temp_up_values t1
join
#temp_up_values t2
on t1.HistoryId=t2.HistoryId +1
and t1.column=t2.column and t1.values<>t2.values

Related

Get single row depending of conditional

I have a simple select query with some joins like:
SELECT
[c].[column1]
, [c].[column2]
FROM [Customer] AS [c]
INNER JOIN ...
So I do a left join with my principal table as:
LEFT JOIN [Communication] AS [com] ON [c].[CustomerGuid] = [com].[ComGuid]
this relatioship its 1 to *, one customer can have multiple communications
So in my select I want to get value 1 or 2 depending of condition:
Condition:
if ComTypeKey (from communication) table have a row with value 3 and have another row with vale 4 return 1 then 0
So I try something like:
SELECT
[c].[column1]
, [c].[column2]
, IIF([com].[ComTypeKey] = 3 AND [com].[ComTypeKey] = 4,1,0)
FROM [Customer] AS [c]
INNER JOIN ...
LEFT JOIN [Communication] AS [com] ON [c].[CustomerGuid] = [com].[ComGuid]
But it throws me two rows, beacause there are 2 rows on communication. My desire value is to get only one row with value 1 if my condition is true
If you have multiple rows you need GROUP BY, then count the relevant keys and subtract 1 to get (1, 0)
SELECT
[c].[column1]
, [c].[column2]
, COUNT(CASE WHEN [ComTypeKey] IN (3,4) THEN 1 END) - 1 as FLAG_CONDITION
FROM [Customer] AS [c]
INNER JOIN ...
LEFT JOIN [Communication] AS [com]
ON [c].[CustomerGuid] = [com].[ComGuid]
GROUP BY
[c].[column1]
, [c].[column2]
I'm not really sure I understand.
This will literally find if both values 3 and 4 exist for that CustomerGuid, and only select one of them in that case - not filtering out any record otherwise.
If this is not what you want, providing sample data with the expected result would remove the ambiguity.
SELECT Field1,
Field2,
...
FieldN
FROM (SELECT TMP.*,
CASE WHEN hasBothValues = 1 THEN
ROW_NUMBER() OVER ( PARTITION BY CustomerGuid ORDER BY 1 )
ELSE 1
END AS iterim_rn
FROM (SELECT TD.*,
MAX(CASE WHEN Value1 = '3' THEN 1 ELSE 0 END) OVER
( PARTITION BY CustomerGuid ) *
MAX(CASE WHEN Value1 = '4' THEN 1 ELSE 0 END) OVER
( PARTITION BY CustomerGuid ) AS hasBothValues
FROM TEST_DATA TD
) TMP
) TMP2
WHERE interim_rn = 1

SQL Server case when or enum

I have a table something like:
stuff type price
first_stuff 1 43
second_stuff 2 46
third_stuff 3 24
fourth_stuff 2 12
fifth_stuff NULL 90
And for every type of stuff is assigned a description which is not stored in DB
1 = Bad
2 = Good
3 = Excellent
NULL = Not_Assigned
All I want is to return a table which count each type separately, something like:
Description Count
Bad 1
Good 2
Excellent 1
Not_Assigned 1
DECLARE #t TABLE ([type] INT)
INSERT INTO #t ([type])
VALUES (1),(2),(3),(2),(NULL)
SELECT
[Description] =
CASE t.[type]
WHEN 1 THEN 'Bad'
WHEN 2 THEN 'Good'
WHEN 3 THEN 'Excellent'
ELSE 'Not_Assigned'
END, t.[Count]
FROM (
SELECT [type], [Count] = COUNT(*)
FROM #t
GROUP BY [type]
) t
ORDER BY ISNULL(t.[type], 999)
output -
Description Count
------------ -----------
Bad 1
Good 2
Excellent 1
Not_Assigned 1
;WITH CTE_TYPE
AS (SELECT DESCRIPTION,
VALUE
FROM (VALUES ('BAD',
1),
('GOOD',
2),
('EXCELLENT',
3))V( DESCRIPTION, VALUE )),
CTE_COUNT
AS (SELECT C.DESCRIPTION,
Count(T.TYPE) TYPE_COUNT
FROM YOUR_TABLE T
JOIN CTE_TYPE C
ON T.TYPE = C.VALUE
GROUP BY TYPE,
DESCRIPTION
UNION ALL
SELECT 'NOT_ASSIGNED' AS DESCRIPTION,
Count(*) TYPE_COUNT
FROM YOUR_TABLE
WHERE TYPE IS NULL)
SELECT *
FROM CTE_COUNT
Hope, this helps.
SELECT ISNULL(D.descr, 'Not_Assigned'),
T2.qty
FROM
(SELECT T.type,
COUNT(*) as qty
FROM Table AS T
GROUP BY type) AS T2
LEFT JOIN (SELECT 1 as type, 'Bad' AS descr
UNION ALL
SELECT 2, 'Good'
UNION ALL
SELECT 3, 'Excellent') AS D ON D.type = T2.type
If you are using Sql server 2012+ use this
SELECT
[Description] = coalesce(choose (t.[type],'Bad','Good' ,'Excellent'), 'Not_Assigned'),
t.[Count]
FROM (
SELECT [type], [Count] = COUNT(*)
FROM yourtable
GROUP BY [type]
) t

How do I flatten a table for an entire population?

I'm currently working to replace an update process which currently iterates over a very large table using a PL/SQL cursor, updating several columns with flattened data.
The query is structured such that the flattened results can only return a single row, by limiting to term and id. The term_eff column indicates when an activity should start appearing in results, but there is no current limitation for an end date. How can I return the flattened results of the test_activity table for all rows in the test_person table?
Test case tables:
create table test_person (id number,term varchar2(6));
create table test_activity(id number,term_eff varchar2(6),activity varchar2(10));
insert into test_person values(1,'201001');
insert into test_person values(1,'201101');
insert into test_person values(1,'201102');
insert into test_person values(2,'201001');
insert into test_person values(2,'201101');
insert into test_person values(2,'201102');
insert into test_activity values (1,'201001','Jump');
insert into test_activity values (1,'201001','Play');
insert into test_activity values (1,'201102','Run');
insert into test_activity values (2,'201001','Jump');
insert into test_activity values (2,'201101','Play');
insert into test_activity values (2,'201101','Run');
commit;
Here is the current query to return a single row. Would like a version of this that can return values for all rows in the test_person table.
select Max(CASE WHEN A.activity_rank = 1 THEN A.activity ELSE NULL END) AS activity1,
Max(CASE WHEN A.activity_rank = 2 THEN A.activity ELSE NULL END) AS activity2,
Max(CASE WHEN A.activity_rank = 3 THEN A.activity ELSE NULL END) AS activity3
from (SELECT id,
term_eff,
activity,
row_number() OVER (PARTITION BY ID ORDER BY term_eff desc) AS activity_rank
FROM test_activity
WHERE id = 1
AND term_eff <= '201001') A;
Edit: Expected results from the final query:
ID Term Activity1 Activity2 Activity3
1 201001 Jump Play
1 201101 Jump Play
1 201102 Jump Play Run
...
Basically, just remove the where condition and add a group by. You've already done the hard part:
select id,
Max(CASE WHEN A.activity_rank = 1 THEN A.activity ELSE NULL END) AS activity1,
Max(CASE WHEN A.activity_rank = 2 THEN A.activity ELSE NULL END) AS activity2,
Max(CASE WHEN A.activity_rank = 3 THEN A.activity ELSE NULL END) AS activity3
from (SELECT id, term_eff, activity,
row_number() OVER (PARTITION BY ID ORDER BY term_eff desc) AS activity_rank
FROM test_activity
WHERE term_eff <= '201001'
) A
group by id;

Joining two select queries from the same table

The table contains an ID column, valueHeading column and a value column. I want to separate the value column into two new columns called valueHeading1 and valueHeading2 depending on which type of valueHeading the value has.
So I want to join this select:
Edit: Full join
SELECT ID
,valueHeading
,value as 'valueHeading1'
FROM table1
WHERE valueHeading = 'valueHeading1'
With This select:
SELECT ID
,value as 'valueHeading2'
FROM table1
WHERE valueHeading = 'valueHeading2'
on their respective ID's. How do I do this?
Edit to illustrate what I want to do:
Original table:
ID valueHeading value
0 valueHeading1 a
0 valueHeading2 a
1 valueHeading1 ab
1 valueHeading2 NULL
2 valueHeading1 abcd
2 valueHeading2 abc
New Table:
ID valueHeading1 valueHeading2
0 a a
1 ab NULL
2 abcd abc
If you need only join use this. Using case when is elegant way if you don't need join.
SELECT * FROM
(SELECT ID
,valueHeading
,value as 'valueHeading1'
FROM table1
WHERE valueHeading = 'valueHeading1') AS TAB_1,
(SELECT ID
,value as 'valueHeading2'
FROM table1
WHERE valueHeading = 'valueHeading2') AS TAB_2
WHERE TAB_1.ID = TAB_2.ID
Try something like :
SELECT ID
, CASE WHEN valueHeading = 'valueHeading1' THEN value ELSE NULL END AS valueHeading1
, CASE WHEN valueHeading = 'valueHeading2' THEN value ELSE NULL END AS valueHeading2
FROM table1
WHERE valueHeading IN ('valueHeading1', 'valueHeading2')
If you want to regroup all values on one row for each ID, you can try :
SELECT ID
, MAX(CASE WHEN valueHeading = 'valueHeading1' THEN value ELSE NULL END) AS valueHeading1
, MAX(CASE WHEN valueHeading = 'valueHeading2' THEN value ELSE NULL END) AS valueHeading2
FROM table1
WHERE valueHeading IN ('valueHeading1', 'valueHeading2')
GROUP BY ID
HAVING MAX(CASE WHEN valueHeading = 'valueHeading1' THEN value ELSE NULL END) IS NOT NULL
OR MAX(CASE WHEN valueHeading = 'valueHeading2' THEN value ELSE NULL END) IS NOT NULL
See SQLFiddle. I also tried on Oracle 11g and MSSQL 2012, and it works each time.
In SQLServer2005+ possible use PIVOT
SELECT ID, valueHeading1, valueHeading2
FROM
(
SELECT *
FROM dbo.test28
WHERE valueHeading IN ('valueHeading1', 'valueHeading2')
) x
PIVOT
(
MAX(value)
FOR valueHeading IN ([valueHeading1], [valueHeading2])
) p
Demo on SQLFiddle
self join could be a simple solution
SELECT DISTINCT t1.ID, t1.value as valueHeading1, t2.value as valueHeading2,
FROM table1 t1
INNER JOIN table1 t2 ON t1.ID = t2.ID
WHERE t1.valueHeading <> t2.valueHeading

SQL Optimize - From History table get value from two different dates

Not sure where to start... But basically I have a report table, an account table, and an account history table. The account history table will have zero or more records, where each record is the state of the account cancelled flag after it changed.
There is other stuff going on, but basically i am looking to return the account detail data, with the state of account cancelled bit on the start date and enddate as different columns.
What is the best way to do this?
I have the following working query below
(Idea) Should I do seperate joins on history table, 1 for each date?
I guess I could do it in three separate queries ( Get Begin Snapshot, End Snapshot, Normal Report query with a join to each snapshot)
something else?
Expected output:
AccountID, OtherData, StartDateCancelled, EndDateCancelled
Test Tables:
DECLARE #Report TABLE (ReportID INT, StartDate DATETIME, EndDate DATETIME)
DECLARE #ReportAccountDetail TABLE( ReportID INT, Accountid INT, Cancelled BIT )
DECLARE #AccountHistory TABLE( AccountID INT, ModifiedDate DATETIME, Cancelled BIT )
INSERT INTO #Report
SELECT 1,'1/1/2011', '2/1/2011'
--
INSERT INTO #ReportAccountDetail
SELECT 1 AS ReportID, 1 AS AccountID, 0 AS Cancelled
UNION
SELECT 1,2,0
UNION
SELECT 1,3,1
UNION
SELECT 1,4,1
--
INSERT INTO #AccountHistory
SELECT 2 AS CustomerID, '1/2/2010' AS ModifiedDate, 1 AS Cancelled
UNION--
SELECT 3, '2/1/2011', 1
UNION--
SELECT 4, '1/1/2010', 1
UNION
SELECT 4, '2/1/2010', 0
UNION
SELECT 4, '2/1/2011', 1
Current Query:
SELECT Accountid, OtherData,
MAX(CASE WHEN BeginRank = 1 THEN CASE WHEN BeginHistoryExists = 1 THEN HistoryCancelled ELSE DefaultCancel END ELSE NULL END ) AS StartDateCancelled,
MAX(CASE WHEN EndRank = 1 THEN CASE WHEN EndHistoryExists = 1 THEN HistoryCancelled ELSE DefaultCancel END ELSE NULL END ) AS EndDateCancelled
FROM
(
SELECT c.Accountid,
'OtherData' AS OtherData,
--lots of other data
ROW_NUMBER() OVER (PARTITION BY c.AccountID ORDER BY
CASE WHEN ch.ModifiedDate <= Report.StartDate THEN 1 ELSE 0 END DESC, ch.ModifiedDate desc) AS BeginRank,
CASE WHEN ch.ModifiedDate <= Report.StartDate THEN 1 ELSE 0 END AS BeginHistoryExists,
ROW_NUMBER() OVER ( PARTITION BY c.AccountID ORDER BY
CASE WHEN ch.ModifiedDate <= Report.EndDate THEN 1 ELSE 0 END DESC, ch.ModifiedDate desc) AS EndRank,
CASE WHEN ch.ModifiedDate <= Report.EndDate THEN 1 ELSE 0 END AS EndHistoryExists,
CAST( ch.Cancelled AS INT) AS HistoryCancelled,
0 AS DefaultCancel
FROM
#Report AS Report
INNER JOIN #ReportAccountDetail AS C ON Report.ReportID = C.ReportID
--Others joins related for data to return
LEFT JOIN #AccountHistory AS CH ON CH.AccountID = C.AccountID
WHERE Report.ReportID = 1
) AS x
GROUP BY AccountID, OtherData
Welcome input on writing stack overflow questions. Thanks!
ROW_NUMBER() often suprises me and out-performs my expectations. In this case, however, I'd be tempted to just use correlated sub-queries. At least, I'd test them against the alternatives.
Note: I would also use real tables, with real indexes, and a realistic volume of fake data. (If it's worth posting this question, I'm assuming that it's worth testing this realistically.)
SELECT
[Report].ReportID,
[Account].AccountID,
[Account].OtherData,
ISNULL((SELECT TOP 1 Cancelled FROM AccountHistory WHERE AccountID = [Account].AccountID AND ModifiedDate <= [Report].StartDate ORDER BY ModifiedDate DESC), 0) AS StartDateCancelled,
ISNULL((SELECT TOP 1 Cancelled FROM AccountHistory WHERE AccountID = [Account].AccountID AND ModifiedDate <= [Report].EndDate ORDER BY ModifiedDate DESC), 0) AS EndDateCancelled
FROM
Report AS [Report]
LEFT JOIN
ReportAccountDetail AS [Account]
ON [Account].ReportID = [Report].ReportID
ORDER BY
[Report].ReportID,
[Account].AccountID
Note: For whatever reason, I've found that TOP 1 and ORDER BY is faster than MAX().
In terms of your suggested answer, I'd modify it slightly to just use ISNULL instead of trying to make the Exists columns work.
I'd also join on the "other data" after all of the working out, rather than inside the inner-most query, so as to avoid having to group by all the "other data".
WITH
HistoricData AS
(
SELECT
Report.ReportID,
c.Accountid,
c.OtherData,
ROW_NUMBER() OVER (PARTITION BY c.ReportID, c.AccountID ORDER BY CASE WHEN ch.ModifiedDate <= Report.StartDate THEN 1 ELSE 0 END DESC, ch.ModifiedDate DESC) AS BeginRank,
ROW_NUMBER() OVER (PARTITION BY c.ReportID, c.AccountID ORDER BY ch.ModifiedDate DESC) AS EndRank,
CH.Cancelled
FROM
#Report AS Report
INNER JOIN
#ReportAccountDetail AS C
ON Report.ReportID = C.ReportID
LEFT JOIN
#AccountHistory AS CH
ON CH.AccountID = C.AccountID
AND CH.ModifiedDate <= Report.EndDate
)
,
FlattenedData AS
(
SELECT
ReportID,
Accountid,
OtherData,
ISNULL(MAX(CASE WHEN BeginRank = 1 THEN Cancelled END), 0) AS StartDateCancelled,
ISNULL(MAX(CASE WHEN EndRank = 1 THEN Cancelled END), 0) AS EndDateCancelled
FROM
[HistoricData]
GROUP BY
ReportID,
AccountID,
OtherData
)
SELECT
*
FROM
[FlattenedData]
LEFT JOIN
[OtherData]
ON Whatever = YouLike
WHERE
[FlattenedData].ReportID = 1
And a final possible version...
WITH
ReportStartHistory AS
(
SELECT
*
FROM
(
SELECT
[Report].ReportID,
ROW_NUMBER() OVER (PARTITION BY [Report].ReportID, [History].AccountID ORDER BY [History].ModifiedDate) AS SequenceID,
[History].*
FROM
Report AS [Report]
INNER JOIN
AccountHistory AS [History]
ON [History].ModifiedDate <= [Report].StartDate
)
AS [data]
WHERE
SequenceID = 1
)
,
ReportEndHistory AS
(
SELECT
*
FROM
(
SELECT
[Report].ReportID,
ROW_NUMBER() OVER (PARTITION BY [Report].ReportID, [History].AccountID ORDER BY [History].ModifiedDate) AS SequenceID,
[History].*
FROM
Report AS [Report]
INNER JOIN
AccountHistory AS [History]
ON [History].ModifiedDate <= [Report].EndDate
)
AS [data]
WHERE
SequenceID = 1
)
SELECT
[Report].ReportID,
[Account].*,
ISNULL([ReportStartHistory].Cancelled, 0) AS StartDateCancelled,
ISNULL([ReportEndHistory].Cancelled, 0) AS EndDateCancelled
FROM
Report AS [Report]
INNER JOIN
Account AS [Account]
LEFT JOIN
[ReportStartHistory]
ON [ReportStartHistory].ReportID = [Report].ReportID
AND [ReportStartHistory].AccountID = [Account].AccountID
LEFT JOIN
[ReportEndHistory]
ON [ReportEndHistory].ReportID = [Report].ReportID
AND [ReportEndHistory].AccountID = [Account].AccountID