SQL Table data compare

SQL Table data compare - sql

I need to compare the two tables rows and also only show the coulmns having different data i.e mismatch data from both the table.Suppose Table1 and Table2 having 50 Columns and in that only mistach records are 5 then that coulms needs into Select statement.
Comparsion part is completed with Union query, My hurdle is how to come up with mismacted row columns names.

One way to do this is to have a list of all such column names concatenated in one string as:
select
T1.id, case when t1.col1<> t2.col1 then 'Col1;' else '' end +
case when t1.col2<> t2.col2 then 'Col2;' else '' end
-- similar case statementes for all th columns you want to be included
-- in the list
as Mismatchedcolumns
from Table1 T1
Join Table2 T2 on T1.id = T2.id
Check Demo here..

If you are looking at a list of all mismatched columns, then see below example
CREATE TABLE TableA
([Product] varchar(1), [Qty] int, [Price] int, [Comments] varchar(3))
;
INSERT INTO TableA
([Product], [Qty], [Price], [Comments])
VALUES
('A', 20, 500, 'xyz'),
('B', 50, 200, 'xyz'),
('C', 90, 100, 'abc'),
('D', 50, 500, 'xyz')
;
CREATE TABLE TableB
([Product] varchar(1), [Qty] int, [Price] int, [Comments] varchar(3))
;
INSERT INTO TableB
([Product], [Qty], [Price], [Comments])
VALUES
('B', 70, 200, 'cv'),
('C', 90, 200, 'wsd'),
('D', 40, 400, 'xyz'),
('E', 50, 500, 'xyz')
;
SELECT b.Product,
b.Qty,
b.Price,
Result = CASE WHEN a.product IS NULL THEN 'New'
ELSE 'Updated: ' +
STUFF( CASE WHEN a.Qty != b.Qty THEN ',Qty' ELSE '' END +
CASE WHEN a.Price != b.Price THEN ',Price' ELSE '' END,
1, 1, '')
END
FROM TableB b
LEFT JOIN TableA a
ON a.Product = b.Product
WHERE a.Product IS NULL
OR a.Qty != b.Qty
OR a.Price != b.Price
union
SELECT
a.Product,a.Qty,a.Price, 'NewA' as Result
FROM
TABLEA a left join
TABLEB b on a.Product = b.Product
WHERE b.Product is null
Modified version of solution at SQL Server 2008 compare two tables in same database and get column is changed
http://sqlfiddle.com/#!3/d1b3f/3

Related

Combining rows in SQL Server against all other rows

Say I have an table like this:
And I want to create a select which combines every non-null row against every other value such that I end up with:
etc, all the way up to 3 - 3 - 3
Can this be done in one select statement?

Since you want to combine values that are now on different rows, first you need to seperate them in subqueries (the with statements), then you can cross join these (cartesian product) to find all different combinations.
with
c1 as (select column1 from table where column1 is not null),
c2 as (select column2 from table where column2 is not null),
c3 as (select column3 from table where column3 is not null)
select *
from c1, c2, c3 -- no join condition returns all possible combinations

You can do it with two CROSS JOIN's:
DECLARE #tb AS TABLE
(
column1 INT
,column2 INT
,column3 INT
);
INSERT INTO #tb VALUES (1, NULL, NULL);
INSERT INTO #tb VALUES (2, NULL, NULL);
INSERT INTO #tb VALUES (3, NULL, NULL);
INSERT INTO #tb VALUES (NULL, 1, NULL);
INSERT INTO #tb VALUES (NULL, 2, NULL);
INSERT INTO #tb VALUES (NULL, 3, NULL);
INSERT INTO #tb VALUES (NULL, NULL, 1);
INSERT INTO #tb VALUES (NULL, NULL, 2);
INSERT INTO #tb VALUES (NULL, NULL, 3);
SELECT tb1.column1, tb2.column2, tb3.column3
FROM #tb tb1
CROSS JOIN #tb AS tb2
CROSS JOIN #tb AS tb3
WHERE tb1.column1 IS NOT NULL
AND tb2.column2 IS NOT NULL
AND tb3.column3 IS NOT NULL
ORDER BY tb1.column1, tb2.column2, tb3.column3;

Use cross join:
DECLARE #t table(col1 int, col2 int, col3 int)
INSERT #t values
(1, null, null),(2, null, null),(3, null, null),
(null, 1, null),(null, 2, null),(null, 3, null),
(null, null, 1),(null, null, 2),(null, null, 3)
SELECT t1.col1, t2.col2, t3.col3
FROM #t t1
CROSS JOIN #t t2
CROSS JOIN #t t3
WHERE t1.col1 is not null and t2.col2 is not null and t3.col3 is not null

You need to do a Cross Join here. Try this.
;WITH cte
AS (SELECT 1 cola,NULL colb, NULL colc
UNION
SELECT 2 cola,NULL colb,NULL colc
UNION
SELECT 3 cola,NULL colb,NULL colc
UNION
SELECT NULL cola,1 colb,NULL colc
UNION
SELECT NULL cola,2 colb,NULL colc
UNION
SELECT NULL cola,3 colb,NULL colc
UNION
SELECT NULL cola,NULL colb,1 colc
UNION
SELECT NULL cola,NULL colb,2 colc
UNION
SELECT NULL cola,NULL colb,3 colc)
SELECT *
FROM (SELECT a.cola
FROM cte a
WHERE cola IS NOT NULL) f
CROSS JOIN (SELECT b.colb
FROM cte b
WHERE colb IS NOT NULL) s
CROSS JOIN (SELECT c.colc
FROM cte c
WHERE colc IS NOT NULL) T

Updating Using Aggregate Function

I am trying to update a column of the table using the below query.. But I get an error
An aggregate may not appear in the set list of an UPDATE statement
Code:
UPDATE Test.dbo.Table1
SET InDate = MIN(b.Date)
FROM
Test.dbo.Table1 a
LEFT OUTER JOIN
Test.dbo.Table2 b
ON
a.ID1 = b.ID2
WHERE b.Code = 'IN';
I want to update the InDate column in my table with the oldest date from Table2 (b.Date) column where (b.code) is 'IN'
What is wrong in here?

You need to put the aggregate in a temp table or subquery and you need an explicit GROUP BY statement.
UPDATE Test.dbo.Table1
SET InDate = min_date
FROM Test.dbo.Table1 c inner join
(SELECT a.id1, MIN(b.Date) min_date
FROM Test.dbo.Table1 a
LEFT OUTER JOIN Test.dbo.Table2 b
ON a.ID1 = b.ID2
Group by a.id1) d
ON c.ID1 = d.ID1
WHERE c.Code = 'IN';

I think this will do what you want. I've removed the aliases to make it as clear as possible:
UPDATE Table1
SET InDate = (
SELECT MIN(Table2.Date)
FROM Table2
WHERE Table1.ID1 = Table2.ID2
AND Table2.Code = 'IN'
)

You could use apply to get the min date and then use that in the update statement:
UPDATE a
SET a.InDate = b.MinBDate
FROM Table1 a
OUTER APPLY
(
SELECT MIN(b.InDate) MinBDate
FROM Table2 b
WHERE b.Id = a.Id
AND b.Code = 'IN'
) b

Maybe this?
UPDATE Test.dbo.Table1
SET InDate = b.Date
FROM
Test.dbo.Table1 a
INNER JOIN (
select
b.ID2,
MIN(b.Date) Date
from Test.dbo.Table2 b
where
WHERE b.Code = 'IN'
group by
b.ID2
) b
ON
a.ID1 = b.ID2

Assuming your data model is something like the following, joining to a derived table should do the trick:
--Data Setup:
DECLARE #Table1 TABLE (ID1 INT, InDate DATETIME)
DECLARE #Table2 TABLE (ID2 INT, ID1 INT, Date DATETIME, Code VARCHAR(12))
INSERT INTO #Table1 (ID1)
VALUES (1),(2),(3),(4),(5),(6),(7),(8),(9),(10)
INSERT INTO #Table2 (ID2, ID1, Date, Code)
VALUES
(1, 1, '1/1/2014', 'OUT'),
(2, 1, '5/1/2014', 'IN'),
(3, 1, '3/1/2013', 'IN'),
(4, 2, '1/1/2014', 'OUT'),
(5, 2, '1/1/2014', 'IN'),
(6, 3, '1/1/2014', 'IN'),
(7, 4, '1/1/2014', 'IN'),
(8, 5, '1/1/2014', 'IN'),
(9, 6, '2/1/2014', 'OUT'),
(10, 7, '3/1/2014', 'IN'),
(11, 8, '4/1/2014', 'IN'),
(12, 9, '2/1/2014', 'IN'),
(12, 9, '2/1/2014', 'IN'),
(12, 10, '1/2/2014', 'IN'),
(12, 10, '1/3/2014', 'IN'),
(12, 10, '1/4/2014', 'IN'),
(12, 10, '1/1/2014', 'OUT')
--Actual Update:
UPDATE T1
SET InDate = T2.MinDate
FROM #Table1 T1
JOIN (SELECT T2.ID1, MIN(Date) AS MinDate
FROM #Table2 T2
WHERE T2.Code = 'IN'
GROUP BY T2.ID1) T2 ON T2.ID1 = T1.ID1
--Results
SELECT *
FROM #Table1

SQL Query to Filter a Table using another Table

I currently have 2 SQL tables that look like this:
and...
I need to write a SELECT statement that retrieves all products from the DataTable that contain rows that match the FilterTable.
So based on my example tables above, if I were to run the query, it would return the following result:
I recently found a question that kind of attempts this:
SQL query where ALL records in a join match a condition?
but have been unsuccessful in implementing something similar
Note - I am using Microsoft SQL Server 2008

This is a little complicated, but here is one solution. Basically you need to check to see how many records from the datatable match all the records from the filtertable. This uses a subquery to do that:
SELECT *
FROM DataTable
WHERE ID IN (
SELECT DT.ID
FROM DataTable DT
JOIN FilterTable FT ON FT.Name = DT.Name
AND FT.Value = DT.VALUE
GROUP BY DT.ID
HAVING COUNT(*) = (SELECT COUNT(*) FROM FilterTable)
)
SQL Fiddle Demo

This will work:
SELECT * FROM Data WHERE ID NOT IN (
SELECT ID FROM Data JOIN Filter
on Data.Name = Filter.Name and Data.Value <> Filter.Value
)
I set up a SQL Fiddle if you want to try other things:
http://sqlfiddle.com/#!3/38b87/6
EDIT:
Better answer:
SELECT *
FROM DATA
WHERE ID NOT IN (
SELECT ID
FROM DATA
JOIN Filter ON DATA.Name = Filter.Name
AND DATA.Value <> Filter.Value
) AND ID IN
(
SELECT ID
FROM DATA
JOIN Filter ON DATA.Name = Filter.Name
)
This now fits where there is at least one filter that matches, and none that don't.

In case you can use sp_executesql (you are using procedure).
SET NOCOUNT ON
GO
CREATE TABLE Data
(
[ID] INT
,[Name] VARCHAR(12)
,[Value] VARCHAR(2)
)
CREATE TABLE Filter
(
[Name] VARCHAR(12)
,[Value] VARCHAR(2)
)
INSERT INTO Data ([ID], [Name], [Value])
VALUES (1, 'productname', 'A')
,(1, 'cost', '20')
,(1, 'active', 'Y')
,(2, 'productname', 'A')
,(2, 'cost', '20')
,(2, 'active', 'N')
,(3, 'productname', 'B')
,(3, 'cost', '20')
,(3, 'active', 'Y')
,(4, 'productname', 'A')
,(4, 'cost', '20')
,(4, 'active', 'Y')
INSERT INTO Filter ([Name], [Value])
VALUES ('productname', 'A')
,('active', 'Y')
DECLARE #SQLColumns NVARCHAR(MAX) = SUBSTRING((SELECT DISTINCT ',[' +[Name] +']' FROM Data FOR XML PATH('')),2,4000)
DECLARE #SQLFilterColumns NVARCHAR(MAX) = SUBSTRING((SELECT 'AND [' +[Name] +'] = ''' + [Value] + ''' ' FROM Filter FOR XML PATH('')),4,4000)
DECLARE #SQLStatement NVARCHAR(MAX) = N'
;WITH DataSource ([ID]) AS
(
SELECT [ID]
FROM
(
SELECT [ID]
,[Name]
,[Value]
FROM Data
) DataSource
PIVOT
(
MAX([Value]) FOR [Name] IN (' + #SQLColumns+ ')
) PVT
WHERE ' + #SQLFilterColumns + '
)
SELECT DT.[ID]
,DT.[Name]
,DT.[Value]
FROM Data DT
INNER JOIN DataSource DS
ON DT.[ID] = DS.[ID]
'
EXECUTE sp_executesql #SQLStatement
DROP TABLE Data
DROP TABLE Filter
SET NOCOUNT OFF
GO

Here is an option using a couple of PIVOTs
DECLARE #Data table ([ID] INT, [Name] VARCHAR(12), [Value] VARCHAR(2) )
DECLARE #Filter TABLE ( [Name] VARCHAR(12), [Value] VARCHAR(2) )
INSERT INTO #Data ([ID], [Name], [Value])
VALUES (1, 'productname', 'A')
,(1, 'cost', '20')
,(1, 'active', 'Y')
,(2, 'productname', 'A')
,(2, 'cost', '20')
,(2, 'active', 'N')
,(3, 'productname', 'B')
,(3, 'cost', '20')
,(3, 'active', 'Y')
,(4, 'productname', 'A')
,(4, 'cost', '20')
,(4, 'active', 'Y')
INSERT INTO #Filter ([Name], [Value])
VALUES ('productname', 'A')
,('active', 'Y');
SELECT *
FROM ( SELECT *
FROM (select [ID], [Name], [value] from #Data) as s
PIVOT
( MAX([value]) FOR [name] in ( [productname], [active])
) as pvt) B
INNER JOIN
( SELECT *
FROM (select [name], [value] from #Filter) as f
PIVOT
( MAX([value]) for [Name] IN ([productname], [active])
) AS fpvt
) F
ON F.active = b.active and f.productname = b.productname
By doing a PIVOT on the DATA table and then on the FILTER table, it allows them to be lined up for an inner join. This returns the records that match within both,

Select rows with duplicate values in 2 columns

This is my table:
CREATE TABLE [Test].[dbo].[MyTest]
(
[Id] BIGINT NOT NULL,
[FId] BIGINT NOT NULL,
[SId] BIGINT NOT NULL
);
And some data:
INSERT INTO [Test].[dbo].[MyTest] ([Id], [FId], [SId]) VALUES (1, 100, 11);
INSERT INTO [Test].[dbo].[MyTest] ([Id], [FId], [SId]) VALUES (2, 200, 12);
INSERT INTO [Test].[dbo].[MyTest] ([Id], [FId], [SId]) VALUES (3, 100, 21);
INSERT INTO [Test].[dbo].[MyTest] ([Id], [FId], [SId]) VALUES (4, 200, 22);
INSERT INTO [Test].[dbo].[MyTest] ([Id], [FId], [SId]) VALUES (5, 300, 13);
INSERT INTO [Test].[dbo].[MyTest] ([Id], [FId], [SId]) VALUES (6, 200, 12);
So I need 2 select query,
First Select FId, SId that like a distinct in both column so the result is:
100, 11
200, 12
100, 21
200, 22
300, 13
As you see the values of 200, 12 returned once.
Second query is the Id's of that columns whose duplicated in both FId, SId So the result is:
2
6
Does any one have any idea about it?

Standard SQL
SELECT
M.ID
FROM
( -- note all duplicate FID, SID pairs
SELECT FID, SID
FROM MyTable
GROUP BY FID, SID
HAVING COUNT(*) > 1
) T
JOIN -- back onto main table using these duplicate FID, SID pairs
MyTable M ON T.FID = M.FID AND T.SID = M.SID
Using windowing:
SELECT
T.ID
FROM
(
SELECT
ID,
COUNT(*) OVER (PARTITION BY FID, SID) AS CountPerPair
FROM
MyTable
) T
WHERE
T.CountPerPair > 1

First query:
SELECT DISTINCT Fid,SId
FROM MyTest
Second query:
SELECT DISTINCT a1.Id
FROM MyTest a1 INNER JOIN MyTest a2
ON a1.Fid = a2.Fid
AND a1.SId = a2.SId
AND a1.Id <> a2.Id
I cannot test them, but I think they should work...

first:
select distinct FId,SId from [Test].[dbo].[MyTest]
second query
select distinct t.Id
from [Test].[dbo].[MyTest] t
inner join [Test].[dbo].[MyTest] t2
on t.Id<>t2.Id and t.FId=t2.FId and t.SId=t2.SId

Part 1 is as mentioned above distinct.
This will resolve second part.
select id from [Test].[dbo].[MyTest] a
where exists(select 1 from [Test].[dbo].[MyTest] where a.[SId] = [SId] and a.[FId] = [FId] and a.id <> id)

Merging records based on a time difference?

I have the following table:
CREATE TABLE #TEMP (id int, name varchar(255), startdate datetime, enddate datetime)
INSERT INTO #TEMP VALUES(1, 'John', '2011-01-11 00:00:00.000','2011-01-11 00:01:10.000')
INSERT INTO #TEMP VALUES(2, 'John', '2011-01-11 00:00:20.000','2011-01-11 00:01:50.000')
INSERT INTO #TEMP VALUES(3, 'John', '2011-01-11 00:01:40.000','2011-01-11 00:01:50.000')
INSERT INTO #TEMP VALUES(4, 'Adam', '2011-01-11 00:00:40.000','2011-01-11 00:01:20.000')
INSERT INTO #TEMP VALUES(5, 'Adam', '2011-01-11 00:00:10.000','2011-01-11 00:01:30.000')
SELECT * FROM #TEMP
DROP TABLE #TEMP
I am trying to merge all records with the same name within a range of 60 seconds to each other to get the following:
John 2011-01-11 00:00:00.000 2011-01-11 00:01:10.000
John 2011-01-11 00:01:40.000 2011-01-11 00:01:50.000
Adam 2011-01-11 00:00:10.000 2011-01-11 00:01:20.000
Any suggestions on how to do this on a table with about 50K records? Currently, I managed to get to this:
SELECT * FROM #TEMP
CREATE TABLE #Merge(id1 int, id2 int)
INSERT INTO #Merge
SELECT id, uuid
FROM
(
SELECT t.id, u.uuid, t.name, t.startdate, t.enddate, u.ustartdate, u.uenddate,
(CASE WHEN (DATEDIFF(second, t.startdate, u.ustartdate) <= 60 AND DATEDIFF(second, t.startdate, u.ustartdate) >= 0) then 1 else 0 END) Flag
FROM #Temp t
INNER JOIN
(SELECT id AS uuid, name, startdate AS ustartdate, enddate AS uenddate
FROM #Temp) u
ON t.name = u.name AND t.startdate != u.ustartdate AND t.id != u.uuid
) w
WHERE Flag = 1
SELECT * FROM #Merge
-- Insert non-mergable records
CREATE TABLE #TEMP2 (id int, name varchar(255), membergroup varchar(255), startdate datetime, enddate datetime)
INSERT INTO #TEMP2
SELECT * FROM #TEMP
WHERE id NOT IN (SELECT id1 FROM #Merge UNION SELECT id2 FROM #Merge)
SELECT * FROM #TEMP2
Of course, I am not sure how to proceed from here. The #Merge table gives me rows that are to be merged. What I did was to insert non-mergable rows first into #Temp2 first.
EDIT:
Updated set of rows, just in case:
INSERT INTO #TEMP VALUES(1, 'John', 'A', '2011-01-11 00:00:00.000','2011-01-11 00:01:10.000')
INSERT INTO #TEMP VALUES(2, 'John', 'A', '2011-01-11 00:00:01.000','2011-01-11 00:01:10.000')
INSERT INTO #TEMP VALUES(3, 'John', 'B', '2011-01-11 00:00:20.000','2011-01-11 00:01:50.000')
INSERT INTO #TEMP VALUES(4, 'John', 'C', '2011-01-11 00:01:40.000','2011-01-11 00:01:50.000')
INSERT INTO #TEMP VALUES(5, 'John', 'C', '2011-01-11 00:01:50.000','2011-01-11 00:02:20.000')
INSERT INTO #TEMP VALUES(6, 'Adam', 'A', '2011-01-11 00:00:40.000','2011-01-11 00:01:20.000')
INSERT INTO #TEMP VALUES(7, 'Adam', 'B', '2011-01-11 00:00:10.000','2011-01-11 00:01:30.000')
INSERT INTO #TEMP VALUES(8, 'Adam', 'B', '2011-01-11 00:03:10.000','2011-01-11 00:04:30.000')

The code below manage's to show both merged rows (rows 1-2,4-5) and unique rows (row 3)
SELECT DISTINCT a.id,a.name,a.startdate,a.enddate
FROM temp a
LEFT JOIN temp b ON a.name = b.name AND a.id < b.id AND DATEDIFF(s,a.startdate,b.startdate)<=60
LEFT JOIN temp c ON c.name = a.name AND c.id < a.id AND DATEDIFF(s,c.startdate,a.startdate)<=60
WHERE (b.id IS NOT NULL OR c.id IS NULL) AND a.id <= COALESCE(c.id,a.id)

Given you haven't said how to use the 60 second interval and your sample code showed only a startdate comparison, here you go
SELECT
*
FROM
#Temp t1
CROSS APPLY
(SELECT TOP 1*
FROM #Temp t2
WHERE t1.name = t2.name AND DATEDIFF(second, t1.startdate, t2.startdate) < 60 AND t1.id < t2.id
ORDER BY id DESC
) t2x
Based on startdate only, row pairs 1/2 and 4/5 make it into the output. Row 3 doesn't so you'll have to explain why you added it.
That is, row id = 3 is not within 60 seconds of row 1 or 2 based on startdate. So it shouldn't be in the output.
This assumes that id and startdate are both increasing.
Edit, after chat:
SELECT
*
FROM
#Temp t1
CROSS APPLY
(SELECT TOP 1 *
FROM #Temp t2
WHERE t1.name = t2.name AND DATEDIFF(second, t1.startdate, t2.startdate) < 60 AND t1.id < t2.id
ORDER BY t2.id DESC
) t2x
UNION ALL
SELECT
t1.*, t1.*
FROM
#Temp t1
WHERE NOT EXISTS
(
SELECT
t1ZZ.id, t2xZZ.id
FROM
#Temp t1ZZ
CROSS APPLY
(SELECT TOP 1 *
FROM #Temp t2ZZ
WHERE t1ZZ.name = t2ZZ.name AND DATEDIFF(second, t1ZZ.startdate, t2ZZ.startdate) < 60 AND t1ZZ.id < t2ZZ.id
ORDER BY t2ZZ.id DESC
) t2xZZ
WHERE
t1.id IN (t1ZZ.id, t2xZZ.id)
)

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

SQL Table data compare - sql

Related

Combining rows in SQL Server against all other rows

Updating Using Aggregate Function

SQL Query to Filter a Table using another Table

Select rows with duplicate values in 2 columns

Merging records based on a time difference?

Categories

Resources