Get the latest value for each column - sql

Suppose I have the following "values" table in my SQL Server (2012) DB:
Table1:
Id Col1 Col2 Col3 Col4
And I want to create a second "override" table that will store values to override the original values in case a user needs to do so. So, given the table above, the override table would look as follows:
Overrides:
FK_Id Col1 Col2 Col3 Col4 When_Inserted
Where Overrides.FK_Id references Table1.Id as a foreign key.
So, for example, suppose my Overrides table had the following rows within it with overrides for a row in Table1 with Id=1:
FK_Id: Col1: Col2: Col3: Col4: When_Inserted:
1 Val1_1 Val2_1 Expected_Val3 NULL 1-Jan
1 NULL Val2_2 NULL NULL 2-Jan
1 NULL Expected_Val2 NULL NULL 3-Jan
1 Expected_Val1 NULL NULL NULL 4-Jan
Then, based upon the When_Inserted column - Wanting the latest inserts to take precedence, I'd want the overrides to be as follows:
FK_Id: Col1: Col2: Col3: Col4:
1 Expected_Val1 Expected_Val2 Expected_Val3 NULL
I'm trying to think of a smart way to create this SQL and am coming up with a fairly ugly solution along the lines of:
SELECT
FK_Id
,(
SELECT TOP 1
Col1
FROM
Overrides O1
WHERE
Col1 IS NOT NULL
AND O1.FK_Id = O.FK_Id
ORDER BY
O1.When_Inserted DESC
) Col1
.... <same for each of the other columns> ....
FROM
Overrides O
GROUP BY
FK_Id
I'm sure there has to be a better way that is cleaner and substantially more efficient.

using a common table expression with row_number() (latest first), cross apply() to unpivot your columns, filter for the latest of each column (rn = 1), and finally pivot() back to the same form:
;with cte as (
select o.fk_id, v.Col, v.Value, o.When_Inserted
, rn = row_number() over (partition by o.fk_id, v.col order by o.when_inserted desc)
from overrides o
cross apply (values('Col1',Col1),('Col2',Col2),('Col3',Col3),('Col4',Col4)
) v (Col,Value)
where v.value is not null
)
select fk_id, col1, col2, col3, col4
from (
select fk_id, col, value
from cte
where rn = 1
) s
pivot (max(Value) for Col in (col1,col2,col3,col4)) p
rextester demo: http://rextester.com/KGM96394
returns:
+-------+---------------+---------------+---------------+------+
| fk_id | col1 | col2 | col3 | col4 |
+-------+---------------+---------------+---------------+------+
| 1 | Expected_Val1 | Expected_Val2 | Expected_Val3 | NULL |
+-------+---------------+---------------+---------------+------+
dbfiddle.uk demo comparison of 3 methods
Looking at the io stats for the sample:
unpivot/pivot version:
Table 'Worktable'. Scan count 0, logical reads 0
Table 'overrides'. Scan count 1, logical reads 1
first_value over() version:
Table 'Worktable'. Scan count 20, logical reads 100
Table 'overrides'. Scan count 1, logical reads 1
select top 1 subquery version:
Table 'overrides'. Scan count 5, logical reads 5
Table 'Worktable'. Scan count 0, logical reads 0

You can use first_value():
select distinct fkid,
first_value(col1) over (partition by fkid
order by (case when col1 is not null then 1 else 2 end),
when_inserted desc
) as col1,
first_value(col2) over (partition by fkid
order by (case when col2 is not null then 1 else 2 end),
when_inserted desc
) as col2,
. . .
from t;
The select distinct is because SQL Server does not have the equivalent functionality as an aggregation function.

See my solution is quite different.
IMHO, my script performance will be better provided it give correct output across all sample data.
I have use auto generated id in my script,but in case if you don't have identity id then you can use ROW_NUMBER . and my script is very easy to understand .
declare #t table(id int identity(1,1),FK_Id int,Col1 varchar(50),Col2 varchar(50)
,Col3 varchar(50),Col4 varchar(50),When_Inserted date)
insert into #t VALUES
(1 ,'Val1_1' ,'Val2_1' ,'Expected_Val3', NULL , '2017-01-1')
,(1 ,NULL ,'Val2_2' , NULL , NULL, '2017-01-2')
,(1 ,NULL ,'Expected_Val2', NULL , NULL, '2017-01-3')
,(1 ,'Expected_Val1' , NULL , NULL , NULL, '2017-01-4')
;
WITH CTE
AS (
SELECT *
,CASE
WHEN col1 IS NULL
THEN NULL
ELSE CONCAT (
cast(id AS VARCHAR(10))
,'_'
,col1
)
END col1Code
,CASE
WHEN col2 IS NULL
THEN NULL
ELSE CONCAT (
cast(id AS VARCHAR(10))
,'_'
,col2
)
END col2Code
,CASE
WHEN col3 IS NULL
THEN NULL
ELSE CONCAT (
cast(id AS VARCHAR(10))
,'_'
,col3
)
END col3Code
,CASE
WHEN col4 IS NULL
THEN NULL
ELSE CONCAT (
cast(id AS VARCHAR(10))
,'_'
,col4
)
END col4Code
FROM #t
)
,CTE1
AS (
SELECT FK_Id
,max(col1Code) col1Code
,max(col2Code) col2Code
,max(col3Code) col3Code
,max(col4Code) col4Code
FROM cte
GROUP BY FK_Id
)
SELECT FK_Id
,SUBSTRING(col1Code, charindex('_', col1Code) + 1, len(col1Code)) col1Code
,SUBSTRING(col2Code, charindex('_', col2Code) + 1, len(col2Code)) col2Code
,SUBSTRING(col3Code, charindex('_', col3Code) + 1, len(col2Code)) col3Code
,SUBSTRING(col4Code, charindex('_', col4Code) + 1, len(col4Code)) col4Code
FROM cte1 c1

Related

How to convert a single row table into columns?

I have a single row query returning data in this format:
Col1 Col2 Col3 Col4
-----------------------------
1425 3454 2345 3243
I want it to display it in this format:
Col1 | 1425
Col2 | 3454
Col3 | 2345
Col4 | 3243
How can I do it?
I am avoiding to use UNION method since the above table is extracted from a query and for each <table_name> I would have to paste the table query which will make the process slow.
If the number of fields per table is always constant, then it might work like this.
DECLARE #Table TABLE(
[Col1] int,
[Col2] int,
[Col3] int,
[Col4] int
)
INSERT INTO #Table VALUES(1425, 3454, 2345, 3243); -- some Test data
SELECT * FROM #Table; -- row
SELECT
p.[Columns],
p.[Value]
FROM (
SELECT
[Col1],
[Col2],
[Col3],
[Col4]
FROM #Table
) x
UNPIVOT(
[Value] FOR [Columns] IN ([Col1],[Col2],[Col3],[Col4]) --
) AS P;
You can cross join your query with the column names in order to show the column values in separate rows:
select
columns.col,
case columns.col
when 'Col1' then q.col1
when 'Col2' then q.col2
when 'Col3' then q.col3
when 'Col4' then q.col4
end as value
from ( <your query here> ) q
cross join ( values ('Col1'), ('Col2'), ('Col3'), ('Col4') ) as columns(col);

Showing the result of COALESCE into separate columns based from where they where retrieved

I have a table with many NULL values. Therefore I use the COALESCE function to retrieve the NON NULL values. This works fine when the result of the COALESCE is to be placed in a single Column. However I need to place the values of the COALESCE into separate Columns depending from where they where picked.
E.g. I have the following table.
SELECT COALESCE(Col1, Col2, Col3, Col4) FROM Table 1
Will produce:-
Column1
1
1
3
4
However I do not want that result but I want this result:-
Col1 Col2 Col3 Col4
1 - - -
- 1 - -
- - 3 -
- 4 - -
As you can see I want only one field populated (that why I'm suing COALESCE but the result of COALESCE should be placed as illustrated, NOTICE ONE VALUE PER ROW.
Any ideas of how I can achieve this result please.
coalesce can be built with case statements. You need something like the below:
select col1
, case when col1 is not null then null else col2 end 'Col2'
, case when col1 is not null or col2 is not null then null else col3 end 'Col3'
, case when col1 is not null or col2 is not null or col3 is not null then null else col4 end 'Col4'
from table
You can achieve this with a combination of PIVOT, UNPIVOT and ROW_NUMBER.
declare #t table(rn int identity(1,1) primary key, col1 int, col2 int, col3 int, col4 int);
insert #t values (1,null,null,null), (null,1,0,null), (null,null,3,null), (null,4,null,2);
with a as (
select *, ranking = row_number() over (partition by rn order by col)
from #t a
unpivot ([val] for [col] in ([col1],[col2],[col3],[col4])) p
)
select *
from a
pivot (min(val) for [col] in ([col1],[col2],[col3],[col4])) p
where ranking = 1

How to create separate rows for each unique value in source data

I have following table:
Cus_ID Work_Phone Home_Phone Mobile_Phone
1 x Blank x
2 x x Blank
3 x x x
.
.
. and so on (1000s of rows)
Work_Phone, Home_Phone, Mobile_Phone - varchar
x = some value present
I need to select from Source data to move it Target system like below, I need to create separate row for unique values for each Cus_ID. How do i do it?
Cus_ID Type ContactNo
1 Work x
1 Mobile x
2 Work x
2 Home x
3 Work x
3 Home x
3 Mobile x
.. and so on
Type, ContactNo - varchar
x = Should be the corresponding value from Source table
above result we can achieve using UNPIVOT or Cross Apply also by basing on your assumed data
declare #t table (PK varchar(1),col1 varchar(1),col2 varchar(1),col3 varchar(1))
insert into #t(PK,col1,col2,col3)values
('X','a','','c'),
('y','a','b',''),
('z','a','b','c')
Cross Apply :
select PK,value
from #t
cross apply
(
values
('I1', col1),
('I2', col2),
('I3', col3)
) c(col, value)
where value is not null AND value <> ''
order by PK, col
UNPIVOT
select PK,value
from #t
unpivot
(
value
for col in (col1, col2, col3)
) un
WHERE value <> ''
order by PK, col;
Assuming col1, col2 and col3 are of the same type, then:
SELECT pk, col2 AS target_value FROM your_table WHERE col2 IS NOT NULL
UNION
SELECT pk, col3 AS target_value FROM your_table WHERE col3 IS NOT NULL
UNION
SELECT pk, col4 AS target_value FROM your_table WHERE col4 IS NOT NULL
ORDER BY pk
Edit edit: here's the version with ISNULL tests, column headings and the rest, in response to your revised question:
SELECT Cus_ID, 'Work' AS Type, Work_Phone AS ContactNo FROM your_table
WHERE ISNULL(Work_Phone, '') <> ''
UNION
SELECT Cus_ID, 'Home' AS Type, Home_Phone AS ContactNo FROM your_table
WHERE ISNULL(Home_Phone, '') <> ''
UNION
SELECT Cus_ID, 'Mobile' AS Type, Mobile_Phone AS ContactNo FROM your_table
WHERE ISNULL(Mobile_Phone, '') <> ''
ORDER BY 1
If there's a chance the "blank" column may contain whitespace characters, then refine it yet further to:
... ISNULL(LTRIM(Work_Phone), '') <> ''
etc.

How to make dynamic column

I have some data as under
Declare #t table (Id int identity,CommaSeperatedValue varchar(100))
Insert Into #t
Select 'Somalia,Vietnam' Union All
Select 'apple,banana,guava,India,Australia'
There is no limit in the CommaSeperated value. The desired output for the sample provided will be
Id Col1 Col2 Col3 Col4 Col5
1 Somalia Vietnam Null Null Null
2 apple banana guava India Australia
That means , the columns will be generated dynamically. Let us take another example
Declare #t table (Id int identity,CommaSeperatedValue varchar(100))
Insert Into #t
Select 'Somalia,Vietnam,Honolulu,Spain' Union All
Select 'apple,banana,guava,India,Australia,Smart,Bus' Union All
Select 'Mango'
The desired output
Id Col1 Col2 Col3 Col4 Col5 Col6 Col7
1 Somalia Vietnam Honolulu Spain Null Null Null
2 apple banana guava India Australia Smart Bus
3 Mango Null Null Null Null Null Null
How to do this query?
My attempt so far(after this I am lost)
SELECT
X.id,
X.CommaSeperatedValue,
Y.splitdata
FROM
(
SELECT *,
CAST('<X>'+REPLACE(F.CommaSeperatedValue,',','</X><X>')+'</X>' AS XML) AS xmlfilter
FROM #t F
)X
CROSS APPLY
(
SELECT fdata.D.value('.','varchar(50)') as splitdata
FROM X.xmlfilter.nodes('X') as fdata(D)
)Y
Thanks in advance
Well here is the Dynamic Solution you are looking for .I used Temp Table you can replace it with Permanent Table or Table Variable.
Declare #t table (Id int identity,CommaSeperatedValue varchar(100))
Insert Into #t
Select 'Somalia,Vietnam' Union All
Select 'apple,banana,guava,India,Australia'
IF object_ID('TempDB..#Temp') IS NOT NULL DROP TABLE #Temp;
SELECT
X.id,
--X.CommaSeperatedValue,
Y.splitdata
,ROW_NUMBER() OVER( PARTITION BY X.id ORDER BY X.id ) AS DataID
INTO #Temp
FROM
(
SELECT *,
CAST('<X>'+REPLACE(F.CommaSeperatedValue,',','</X><X>')+'</X>' AS XML) AS xmlfilter
FROM #t F
)X
CROSS APPLY
(
SELECT fdata.D.value('.','varchar(50)') as splitdata
FROM X.xmlfilter.nodes('X') as fdata(D)
)Y
DECLARE #MAXCol INT = (SELECT MAX(DataID)FROM #Temp)
,#index INT =1
,#ColNames varchar(4000)=''
WHILE (#index<=#MAXCol)
BEGIN
SET #ColNames =#ColNames +'MAX(CASE WHEN DataID = '+LTRIM(STR(#index))+' THEN splitdata END) as Col'+LTRIM(STR(#index))+','
SET #Index=#Index +1
END
SET #ColNames = LEFT(#ColNames,LEN(#ColNames)-1) -- Remove Last Comma
EXECUTE ( 'SELECT
[id],'+#ColNames+' FROM #Temp GROUP BY [id]'
)
Going with what you already got, add a rownumber, and transpose it using a group by.
SELECT
[id],
MAX(CASE WHEN RowNumber=1 THEN splitdata END) as Col1,
MAX(CASE WHEN RowNumber=2 THEN splitdata END) as Col2,
MAX(CASE WHEN RowNumber=3 THEN splitdata END) as Col3,
MAX(CASE WHEN RowNumber=4 THEN splitdata END) as Col4,
MAX(CASE WHEN RowNumber=5 THEN splitdata END) as Col5,
MAX(CASE WHEN RowNumber=6 THEN splitdata END) as Col6,
MAX(CASE WHEN RowNumber=7 THEN splitdata END) as Col7,
MAX(CASE WHEN RowNumber=8 THEN splitdata END) as Col8,
MAX(CASE WHEN RowNumber=9 THEN splitdata END) as Col9,
MAX(CASE WHEN RowNumber=10 THEN splitdata END) as Col10
FROM (
SELECT
X.id,
Y.splitdata,
ROW_NUMBER() OVER (PARTITION BY id ORDER BY id) AS RowNumber
FROM
(
SELECT *,
CAST('<X>'+REPLACE(F.CommaSeperatedValue,',','</X><X>')+'</X>' AS XML) AS xmlfilter
FROM #t F
)X
CROSS APPLY
(
SELECT fdata.D.value('.','varchar(50)') as splitdata
FROM X.xmlfilter.nodes('X') as fdata(D)
)Y
) X
GROUP BY [id]
This will yield:
id Col1 Col2 Col3 Col4 Col5 Col6 Col7 Col8 Col9 Col10
1 Somalia Vietnam Honolulu Spain NULL NULL NULL NULL NULL NULL
2 apple banana guava India Australia Smart Bus NULL NULL NULL
3 Mango NULL NULL NULL NULL NULL NULL NULL NULL NULL
I believe you would need to use dynamic SQL to do this, because the SELECT statement needs to specify the number of columns, or you need to iterate over the number of items in the CSV column.
Typically this is a bad idea, will cause more problems than it solves, and is too confusing for the next person to maintain. What you might want to consider instead is simply flattening your data to a two columned format
-- ie your initial data
1, 'Somalia,Vietnam,Honolulu,Spain'
2, 'apple,banana,guava,India,Australia,Smart,Bus'
3, 'Mango'
-- would become
1, 'Somalia'
1, 'Vietnam'
1, 'Honolulu'
1, 'Spain'
2, 'apple'
2, 'banana
-- etc
Now group and pivot in your presentation layer.
Trying to format data in SQL (probably for a report or data export?) is a common mistake, really just one up from trying to store dates as literals. SQL is a data storage and manipulation language / platform; it is not for data Trying to use it in this manner will simply cause yourself no end of headache.

How to combine multiple rows into one with nulled values where row values differ

How can I do with SQL Server to get a single row where the only non-null values are the ones that are consistent and non-null through all the selected rows.
A B C D
10 NULL text NULL
4 abc text NULL
4 def text NULL
Should give the following row:
A B C D
NULL NULL text NULL
create table #t (col1 int, col2 char(3), col3 char(4), col4 int)
go
insert into #t select 10, null, 'text', null
insert into #t select 4, 'abc', 'text', null
insert into #t select 4, 'def', 'text', null
go
select
case when count(distinct isnull(col1, 0)) > 1 then null else max(col1) end as 'col1',
case when count(distinct isnull(col2, '')) > 1 then null else max(col2) end as 'col2',
case when count(distinct isnull(col3, '')) > 1 then null else max(col3) end as 'col3',
case when count(distinct isnull(col4, 0)) > 1 then null else max(col4) end as 'col4'
from
#t
go
drop table #t
go
EDIT: I added ISNULL to handle the issue identified by t-clausen.dk but this will only work if the 'default' values (i.e. zero and empty string) do not appear in the real data.
Daniel's comment about data types is also correct, but since we don't know the data types involved it's not easy to suggest an alternative. Providing a self-contained test script that uses the real data types is the best way to ask questions like this.
declare #t table(A int, b varchar(10), c varchar(max), d int)
insert #t values(10, null, 'text', null)
insert #t values(4, 'abc', 'text', null)
insert #t values(10, 'def', 'text', null)
select case when max(rna) > 1 then null else min(a) end,
case when max(rnb) > 1 then null else min(b) end,
case when max(rnc) > 1 then null else min(c) end,
case when max(rnd) > 1 then null else min(d) end
from
(
select rna = rank() over(order by a),
rnb = rank() over(order by b),
rnc = rank() over(order by c),
rnd = rank() over(order by d),
a, b,c,d
from #t
) e
If you have text columns replace the column type with varchar(max). Text columns are outdated.
Using count(distinct col1) was by first thought, but it doesn't count null values.
select count(distinct a) from (select cast(null as int) a) b
returns 0 rows
SELECT
CASE WHEN COUNT(DISTINCT col1) = 1
AND COUNT(col1) = COUNT(*)
THEN MIN(col1)
END AS col1
, CASE WHEN COUNT(DISTINCT col2) = 1
AND COUNT(col2) = COUNT(*)
THEN MIN(col2)
END AS col2
, CASE WHEN COUNT(DISTINCT col3) = 1
AND COUNT(col3) = COUNT(*)
THEN MIN(col3)
END AS col3
, CASE WHEN COUNT(DISTINCT col4) = 1
AND COUNT(col4) = COUNT(*)
THEN MIN(col4)
END AS col4
FROM
tableX