Translating idempotent merge example not working - sql

How can I get this merge statement to handle (just inserts for now) in an idempotent way? I would like to follow this rough example / template proposed by databricks.
The example data contains a base set with 2 rows for 2 dates. The change set contains 2 rows for 2 dates, with 1 row being a duplicate, and 1 row being "new" data that should be "merged".
Some data
create table #baseset
(
Date varchar(30),
ID varchar(30),
State varchar(30),
Count varchar(30)
)
insert into #baseset values('2/7/2023', 'A', 'A', null)
insert into #baseset values('2/6/2023', 'A', 'A', null)
create table #changeset
(
Date varchar(30),
ID varchar(30),
State varchar(30),
Count varchar(30)
)
insert into #changeset values('2/8/2023', 'A', 'A', null)
insert into #changeset values('2/7/2023', 'A', 'A', null)
Mockup MERGE
(
-- These rows will either UPDATE the existing data or INSERT new data that does not yet exist
SELECT
#changeset.ID as mergeKey, #changeset.*
FROM
#changeset
UNION ALL
-- These rows will INSERT data of existing rows
-- Setting the mergeKey to NULL forces these rows to NOT MATCH and be INSERTed.
SELECT
NULL as mergeKey,
#changeset.*
FROM
#changeset
JOIN #baseset ON #changeset.ID = #baseset.ID
WHERE
#changeset.Date <> #baseset.Date
)
Yielding
+--------+--------+---+-----+-----+
|mergeKey| Date| ID|State|Count|
+--------+--------+---+-----+-----+
| A|2/8/2023| A| A| null|
| A|2/7/2023| A| A| null|
| null|2/8/2023| A| A| null|
| null|2/7/2023| A| A| null|
| null|2/8/2023| A| A| null|
+--------+--------+---+-----+-----+
Expected
+--------+--------+---+-----+-----+
|mergeKey| Date| ID|State|Count|
+--------+--------+---+-----+-----+
| A|2/8/2023| A| A| null|
| A|2/7/2023| A| A| null|
| null|2/8/2023| A| A| null|
+--------+--------+---+-----+-----+
Full Merge Statement
Its the same query as above.
MERGE INTO #baseset
USING (
-- These rows will either UPDATE the existing data or INSERT new data that does not yet exist
SELECT
#changeset.ID as mergeKey, #changeset.*
FROM
#changeset
UNION ALL
-- These rows will INSERT data of existing rows
-- Setting the mergeKey to NULL forces these rows to NOT MATCH and be INSERTed.
SELECT
NULL as mergeKey,
#changeset.*
FROM
#changeset
JOIN #baseset ON #changeset.ID = #baseset.ID
WHERE
#changeset.Date <> #baseset.Date
) staged_updates
ON #baseset.id = mergeKey
WHEN NOT MATCHED BY TARGET THEN
INSERT (Date, ID, State, Count)
VALUES (staged_updates.Date, staged_updates.ID, staged_updates.State, staged_updates.Count)
;

It sounds like you're kinda mixing up your terminology, but anyways.
The canonical merge example with your data:
drop table #baseset
drop table #changeset
go
create table #baseset
(
Date varchar(30),
ID varchar(30),
State varchar(30),
Count varchar(30)
)
insert into #baseset values('2/7/2023', 'A', 'A', null)
insert into #baseset values('2/6/2023', 'A', 'A', null)
create table #changeset
(
Date varchar(30),
ID varchar(30),
State varchar(30),
Count varchar(30)
)
insert into #changeset values('2/8/2023', 'A', 'A', 3)
insert into #changeset values('2/7/2023', 'A', 'A', 4)
;merge #baseset as t
using #changeset as s
ON s.date = t.date
and s.ID = t.ID
when matched then update
set state = s.state
, count = s.count
when not matched by target then insert
(date, id, state, count)
values (s.date, s.id, s.state, s.count)
;
select *
from #baseset
This does a so called upsert which updates #baseset from the #changeset when ID and date matches, and when not matched by #baseset (ie. missing), it inserts the missing rows

Related

I need to do retrofit query using update or merge

I have two tables A and B. In A, I have a column called fetch_year. I need to consider table B from these two columns
primary_date
secondary_date
These columns have JSON values like {"lock":"true","date":"01/01/1990"}
So from this, I need to get the date and I need to extract the year and should save it in table A column called fetch_year. Will always consider primary_date first then secondary_date(if primary_date is null)
The final result should be 1990 in the fetch_year column
Table A is empty as of now( only one column with cal_id)
cal_id fetch_year
1 null
n null
Table B
|B_id|Cal_id | primary_date | secondary_date |
|----|-------|-----------------------------------|------------------------|
|11 | 1 |{"lock":"true","date":"01/01/1990"}|Null|
|12 | 2 | Null | {"lock":"true","date":"01/01/1980"} |
|13 | 3 | Null | Null |
|14 | 4 | {"lock":"true","date":"01/01/1995"} |{"lock":"true","date":"01/01/1997"} |
In table B
So I have n number of records in both the tables
I need results like this in A table
Cal_id fetch_year.
1 1990
2 1980
3 Null
4 1995
n n-values
In cal_id =4 in this case we have value in both columns so we are considering primary_date not secondary_date
Please help me with this problem
You could make use of either JSON_VALUE or OPENJSON here to extract the date from your JSON blobs.
I tend to prefer OPENJSON because it allows you to extract multiple values simultaneously and they don't have to be at the same level in a nested JSON structure. With the "squirelly" dates in your example data, though, you may prefer the JSON_VALUE version with TRY_CONVERT so that you have more control over date deserialization.
--Data setup
create table dbo.A (
Cal_id int,
fetch_year int
);
create table dbo.B (
B_id int not null identity(11,1),
Cal_id int,
primary_date nvarchar(max),
secondary_date nvarchar(max)
);
insert dbo.A (Cal_id, fetch_year)
values
(1, null),
(2, null),
(3, null),
(4, null);
insert dbo.B (Cal_id, primary_date, secondary_date)
values
(1, N'{"lock":"true","date":"01/01/1990"}', null),
(2, null, N'{"lock":"true","date":"01/01/1980"}'),
(3, null, null),
(4, N'{"lock":"true","date":"01/01/1995"}', N'{"lock":"true","date":"01/01/1997"}');
--JSON_VALUE example
update Table_A
set fetch_year = year(coalesce(
-- REF: CAST and CONVERT / Date and time styles
-- https://learn.microsoft.com/en-us/sql/t-sql/functions/cast-and-convert-transact-sql#date-and-time-styles
try_convert(date, json_value(primary_date, '$.date'), 101), --mm/dd/yyyy
try_convert(date, json_value(secondary_date, '$.date'), 101) --mm/dd/yyyy
))
from dbo.A Table_A
join dbo.B Table_B on Table_B.Cal_id = Table_A.Cal_id
--OPENJSON example
update Table_A
set fetch_year = year(coalesce(
Primary_JSON.date,
Secondary_JSON.date
))
from dbo.A Table_A
join dbo.B Table_B on Table_B.Cal_id = Table_A.Cal_id
outer apply openjson(Table_B.primary_date) with ([date] date) Primary_JSON
outer apply openjson(Table_B.secondary_date) with ([date] date) Secondary_JSON;

create table as select with primary key in snowflake --need syntax

I know it works (when there is no select as)
CREATE TABLE t1 ( dt1 date ,primary key (dt1)) ;
I also know alter table works like below:
CREATE TABLE t2 as (select current_date as dt1 ) ;
alter TABLE t2 add primary key (dt1) ;
But I need a syntax for create table with PK when there is SELECT in Create Table. I am trying to convert existing code and other DB support Create Table as select with PK.
You can do it like you'd normally add a primary key:
create or replace transient table test_table (number_1 number primary key, number_2 number, number_3 number) as (
select column1, column2, column3
from
values (1, 2, 3),
(3, 4, 5)
)
;
Results
describe table TEST_TABLE;
-- +--------+------------+------+-----+-------+-----------+----------+
-- |name |type |kind |null?|default|primary key|unique key|
-- +--------+------------+------+-----+-------+-----------+----------+
-- |NUMBER_1|NUMBER(38,0)|COLUMN|N |NULL |Y |N | --> Primary Key
-- |NUMBER_2|NUMBER(38,0)|COLUMN|Y |NULL |N |N |
-- |NUMBER_3|NUMBER(38,0)|COLUMN|Y |NULL |N |N |
-- +--------+------------+------+-----+-------+-----------+----------+
In the documentation for Variant Create Table syntax it looks like the following should work:
CREATE TABLE t1 ( dt1 Date, primary key (dt1))
AS SELECT CURRENT_DATE;

Value from 2nd Row of column copied to 1st row of next column

The table looks like below.
DROP TABLE #TEMP
CREATE TABLE #TEMP
(
UVRID VARCHAR(20),
DynamoNo INT,
FREQHZ INT
)
INSERT #TEMP
SELECT '15AL78',100,10 UNION ALL
SELECT '15AL78',110,20 UNION ALL
SELECT '257T13',100,10 UNION ALL
SELECT '257T13',110,20 UNION ALL
SELECT '257T13',931,30
I am trying to make 1 new column say SuprerFrez whose value is depends on column FREQHZ.
For every UVRID group 2nd value FREQHZ will be 1st value of SuprerFrez
and for last FREQHZ, SuprerFrez value will be zero.
Expected output with 1 new column whose value depends upon FREQHZ column. Order by FREQHZ ASC
UVRID |DynamoNo|FREQHZ|SuprerFrez
'15AL78'|100 |10 |20
'15AL78'|110 |20 |0
'257T13'|100 |10 |20
'257T13'|110 |20 |30
'257T13'|931 |30 |0
You are looking for lead():
select t.*,
lead(FREQhz, 1, 0) over (partition by UVRID order by DynamoNo) as SuprerFrez
from #temp t;
Note this assumes that the ordering is by DynamoNo. If that is not the ordering you have in mind, then you need another column that specifies the ordering. For instance, if you wanted "insert" order, you could use an identity column:
CREATE TABLE #TEMP (
TempID INT IDENTITY(1, 1) PRIMARY KEY,
UVRID VARCHAR(20),
DynamoNo INT,
FREQHZ INT
);
Then the code would look like:
select t.*,
lead(FREQhz, 1, 0) over (partition by UVRID order by TempID) as SuprerFrez
from #temp t;

Insert different values per column at once

After adding a new column is it possible to insert values for that column using only one query?
TABLE:
id | category | level | new_column_name
---+-------------+-------+----------------
1 | Character | 3 |
2 | Character | 2 |
3 | Character | 2 |
4 | Character | 5 |
I'd need to do something like
INSERT INTO table_name
(new_column_name)
VALUES
('foo'), -- value for new_column_name : row1
('bar'), -- value for new_column_name : row2
...
;
I already used a similar query using postgresql cli in order to insert values, but it fails becauase uses null for the unset column values, one of which (id) is PRIMARY_KEY, so it can't be NULL.
This is the error it logs:
ERROR: null value in column "id" violates not-null constraint
DETAIL: Failing row contains (null, null, null, 1359).
EDIT:
What I'm doing is more an update than an insert, but what I would be able to do is to insert all the different values at once.
For instance, if possible, I would avoid doing this:
UPDATE table_name
SET new_column_name = 'foo'
WHERE id = 1;
UPDATE table_name
SET new_column_name = 'bar',
WHERE id = 2;
--...
You can use VALUES so as to construct an in-line table containing the values to be updated.:
UPDATE table_name AS v
SET new_column_name = s.val
FROM (VALUES (1, 'foo'), (2, 'bar')) AS s(id, val)
WHERE v.id = s.id
Demo here
You can use a huge CASE:
UPDATE table_name
SET new_column_name
= CASE WHEN id = 1 THEN 'foo'
WHEN id = 2 THEN 'bar'
END;

SQL Compare two tables update and set flag

I have two tables, the first one is called table1, the structure and contents are shown below
+--+-----+------+------+
|ID|fname|Lname |Status|
+--+-----+------+------+
|1 |Pat |Test | |
+--+-----+------+------+
|2 |Su |Test2 | |
+--+-----+------+------+
|3 |Bri |Test4 | |
+--+-----+------+------+
|4 |Mel |Gi | |
+--+-----+------+------+
|5 |Good |Record| |
+--+-----+------+------+
|6 |Tr |BL | |
+--+-----+------+------+
The second table has updates that need to be committed to table1.
+--+-------+-----+
|ID|Fname |Lname|
+--+-------+-----+
|1 |Patrick|Cool |
+--+-------+-----+
|2 |Susun |Smart|
+--+-------+-----+
|6 |True |Blood|
+--+-------+-----+
I would like to compare both tables and update table1 with the contents of table 2. (compare using ID) If any update is done on any row, i would like the status column to be marked as UPDATED. If a row exists in table1 but it doesnt exist in table2 i would like the status of that row in table1 marked as DELETE. Rows with no changes should have no status
The after the update the final output should resemble
+--+-------+------+------+
|ID|fname |Lname |Status|
+--+-------+------+------+
|1 |Patrick|Cool |UPDATE|
+--+-------+------+------+
|2 |Susun |Smart |UPDATE|
+--+-------+------+------+
|3 |Brian |Brown |DELETE|
+--+-------+------+------+
|4 |Mel |Gibson|DELETE|
+--+-------+------+------+
|5 |Good |Record||
+--+-------+------+------+
|6 |True |Blood |UPDATE|
+--+-------+------+------+
Any help will be appreciated
maybe try a nested query?
for the deleted updated
UPDATE table_1 SET status = "DELETED" WHERE id NOT IN (
SELECT id FROM table_2
)
for update
UPDATE table_1, table_2
SET table_1.status = "UPDATED",
table_1.fName = table_2.FName,
table_1.lName = table_2.LName
WHERE table_1.id IN (
SELECT id FROM table_1, table_2
WHERE table_1.id = table_2.id AND
(table_1.fName!= table_2.FName OR table_1.lName!=table_2.LName)
)
(edited)update with join
UPDATE table_1
SET table_1.fName = t2.FName, table_1.lName = t2.LName, table_1.status = "UPDATED"
FROM table_1 AS t1 JOIN table_2 AS t2 USING (id)
WHERE t1.fName!=t2.FName OR t1.lName!=t2.LName
hashbrown ->
the code above is just not showing the "DELETED" , "UPDATED" , sorry about that (:
So I'll add this code which can help
Read more in Simple-talk-> the merge statement in sql server 2008
IF OBJECT_ID ('BookInventory', 'U') IS NOT NULL
DROP TABLE dbo.BookInventory;
CREATE TABLE dbo.BookInventory -- target
(
TitleID INT NOT NULL PRIMARY KEY,
Title NVARCHAR(100) NOT NULL,
Quantity INT NOT NULL
CONSTRAINT Quantity_Default_1 DEFAULT 0
);
IF OBJECT_ID ('BookOrder', 'U') IS NOT NULL
DROP TABLE dbo.BookOrder;
CREATE TABLE dbo.BookOrder -- source
(
TitleID INT NOT NULL PRIMARY KEY,
Title NVARCHAR(100) NOT NULL,
Quantity INT NOT NULL
CONSTRAINT Quantity_Default_2 DEFAULT 0
);
INSERT BookInventory VALUES
(1, 'The Catcher in the Rye', 6),
(2, 'Pride and Prejudice', 3),
(3, 'The Great Gatsby', 0),
(5, 'Jane Eyre', 0),
(6, 'Catch 22', 0),
(8, 'Slaughterhouse Five', 4);
INSERT BookOrder VALUES
(1, 'The Catcher in the Rye', 3),
(3, 'The Great Gatsby', 0),
(4, 'Gone with the Wind', 4),
(5, 'Jane Eyre', 5),
(7, 'Age of Innocence', 8);
--1 Implementing the WHEN MATCHED Clause
MERGE BookInventory bi
USING BookOrder bo
ON bi.TitleID = bo.TitleID
WHEN MATCHED THEN
UPDATE
SET bi.Quantity = bi.Quantity + bo.Quantity;
SELECT * FROM BookInventory;
--2 Implementing the WHEN MATCHED Clause (Delete)
MERGE BookInventory bi
USING BookOrder bo
ON bi.TitleID = bo.TitleID
WHEN MATCHED AND
bi.Quantity + bo.Quantity = 0 THEN
DELETE
WHEN MATCHED THEN
UPDATE
SET bi.Quantity = bi.Quantity + bo.Quantity;
SELECT * FROM BookInventory;
--3 Implementing the WHEN NOT MATCHED [BY TARGET] Clause
MERGE BookInventory bi
USING BookOrder bo
ON bi.TitleID = bo.TitleID
WHEN MATCHED AND
bi.Quantity + bo.Quantity = 0 THEN
DELETE
WHEN MATCHED THEN
UPDATE
SET bi.Quantity = bi.Quantity + bo.Quantity
WHEN NOT MATCHED BY TARGET THEN
INSERT (TitleID, Title, Quantity)
VALUES (bo.TitleID, bo.Title,bo.Quantity);
SELECT * FROM BookInventory;
--4 Implementing the WHEN NOT MATCHED BY SOURCE Clause
MERGE BookInventory bi
USING BookOrder bo
ON bi.TitleID = bo.TitleID
WHEN MATCHED AND
bi.Quantity + bo.Quantity = 0 THEN
DELETE
WHEN MATCHED THEN
UPDATE
SET bi.Quantity = bi.Quantity + bo.Quantity
WHEN NOT MATCHED BY TARGET THEN
INSERT (TitleID, Title, Quantity)
VALUES (bo.TitleID, bo.Title,bo.Quantity)
WHEN NOT MATCHED BY SOURCE
AND bi.Quantity = 0 THEN
DELETE;
SELECT * FROM BookInventory;
--5 Implementing the OUTPUT Clause
DECLARE #MergeOutput TABLE
(
ActionType NVARCHAR(10),
DelTitleID INT,
InsTitleID INT,
DelTitle NVARCHAR(50),
InsTitle NVARCHAR(50),
DelQuantity INT,
InsQuantity INT
);
MERGE BookInventory bi
USING BookOrder bo
ON bi.TitleID = bo.TitleID
WHEN MATCHED AND
bi.Quantity + bo.Quantity = 0 THEN
DELETE
WHEN MATCHED THEN
UPDATE
SET bi.Quantity = bi.Quantity + bo.Quantity
WHEN NOT MATCHED BY TARGET THEN
INSERT (TitleID, Title, Quantity)
VALUES (bo.TitleID, bo.Title,bo.Quantity)
WHEN NOT MATCHED BY SOURCE
AND bi.Quantity = 0 THEN
DELETE
OUTPUT
$action,
DELETED.TitleID,
INSERTED.TitleID,
DELETED.Title,
INSERTED.Title,
DELETED.Quantity,
INSERTED.Quantity
INTO #MergeOutput;
SELECT * FROM BookInventory;
SELECT * FROM #MergeOutput
--where ActionType = 'UPDATE'
You can use the Merge Clause and OUTPUT
IF OBJECT_ID (N'dbo.Purchases', N'U') IS NOT NULL
DROP TABLE dbo.Purchases;
GO
CREATE TABLE dbo.Purchases (
ProductID int, CustomerID int, PurchaseDate datetime,
CONSTRAINT PK_PurchProdID PRIMARY KEY(ProductID,CustomerID));
GO
INSERT INTO dbo.Purchases VALUES(707, 11794, '20060821'),
(707, 15160, '20060825'),(708, 18529, '20060821'),
(711, 11794, '20060821'),(711, 19585, '20060822'),
(712, 14680, '20060825'),(712, 21524, '20060825'),
(712, 19072, '20060821'),(870, 15160, '20060823'),
(870, 11927, '20060824'),(870, 18749, '20060825');
GO
IF OBJECT_ID (N'dbo.FactBuyingHabits', N'U') IS NOT NULL
DROP TABLE dbo.FactBuyingHabits;
GO
CREATE TABLE dbo.FactBuyingHabits (
ProductID int, CustomerID int, LastPurchaseDate datetime,
CONSTRAINT PK_FactProdID PRIMARY KEY(ProductID,CustomerID));
GO
INSERT INTO dbo.FactBuyingHabits VALUES(707, 11794, '20060814'),
(707, 18178, '20060818'),(864, 14114, '20060818'),
(866, 13350, '20060818'),(866, 20201, '20060815'),
(867, 20201, '20060814'),(869, 19893, '20060815'),
(870, 17151, '20060818'),(870, 15160, '20060817'),
(871, 21717, '20060817'),(871, 21163, '20060815'),
(871, 13350, '20060815'),(873, 23381, '20060815');
GO
select * from Purchases;
select * from FactBuyingHabits;
--Now this is the Place where you do the manipulation you want
MERGE dbo.FactBuyingHabits AS Target
USING (SELECT CustomerID, ProductID, PurchaseDate FROM dbo.Purchases) AS Source
ON (Target.ProductID = Source.ProductID AND Target.CustomerID = Source.CustomerID)
WHEN MATCHED THEN
UPDATE SET Target.LastPurchaseDate = Source.PurchaseDate
WHEN NOT MATCHED BY TARGET THEN
INSERT (CustomerID, ProductID, LastPurchaseDate)
VALUES (Source.CustomerID, Source.ProductID, Source.PurchaseDate)
OUTPUT $action, Inserted.ProductId InsertedProductId,
Inserted.CustomerId InsertedCustomerId,
Inserted.LastPurchaseDate InsertedLastPurchaseDate,
Deleted.ProductId DeletedProductId,
Deleted.CustomerId DeletedCustomerId,
Deleted.LastPurchaseDate DeletedLastPurchaseDate;
select * from FactBuyingHabits;
drop table FactBuyingHabits;
drop table Purchases;