I am working with SAP Timesheet data, so there are millions of rows. What I am trying to do is select the data from the SAP table and insert it into a table on MS SQL Server.
So I want to insert the original record, then if an update to the original record happens, which is in the form of a new SAP record with a refcounter, I want to find the original record in my table and update it, keeping the original counter value.
So I have done this successfully with a cursor (I know not the best), but with millions of records, I am wondering if there is a faster way, because I am on day 4 of my cursor running. Is there a better way then what I have below:
BEGIN
CREATE TABLE CATSDB
(
[COUNTER] nvarchar(12),
REFCOUNTER nvarchar(12),
PERNR nvarchar(8),
WORKDATE nvarchar(8),
CATSHOURS decimal(7, 3),
APDAT nvarchar(8),
LAETM nvarchar(6),
CATS_STATUS nvarchar(2),
APPR_STATUS nvarchar(2)
)
INSERT INTO CATSDB
(
[COUNTER],REFCOUNTER,PERNR,WORKDATE,CATSHOURS,APDAT,LAETM,CATS_STATUS,APPR_STATUS
)
VALUES
('000421692670',NULL,'00000071','20190114','6.00','20190204','174541','30','30'),
('000421692671',NULL,'00000071','20190114','3.00','20190204','174541','30','30'),
('000421692672',NULL,'00000071','20190115','6.00','00000000','000000','60','20'),
('000421692673',NULL,'00000071','20190115','3.00','00000000','000000','60','20'),
('000421692712','000421692672','00000071','20190115','0.00','20190115','111007','30','30'),
('000421692713','000421692673','00000071','20190115','0.00','20190115','111007','30','30'),
('000429718015',NULL,'00000072','20190313','7.00','00000000','000000','60','20'),
('000429718016',NULL,'00000072','20190313','1.50','20190315','164659','30','30'),
('000429718017',NULL,'00000072','20190313','1.00','20190315','164659','30','30'),
('000430154143',NULL,'00000072','20190313','2.00','00000000','000000','60','20'),
('000430154142','000429718015','00000072','20190313','5.00','00000000','000000','60','20'),
('000430154928','000430154142','00000072','20190313','4.50','20190315','164659','30','30'),
('000430154929','000430154143','00000072','20190313','2.50','20190315','164659','30','30'),
('000429774620',NULL,'00000152','20190314','1.00','00000000','000000','60','20'),
('000429774619',NULL,'00000152','20190314','1.00','00000000','000000','60','20'),
('000429802106','000429774620','00000152','20190314','2.00','00000000','000000','60','20'),
('000429802105','000429774619','00000152','20190314','3.00','00000000','000000','60','20'),
('000429840242','000429802106','00000152','20190314','4.00','20190315','143857','30','30'),
('000429840241','000429802105','00000152','20190314','5.00','20190315','143857','30','30')
CREATE TABLE [TBL_COUNTER]
(
[COUNTER] [varchar](12) NOT NULL,
[REFCOUNTER] [varchar](12) NULL
)
CREATE TABLE TEMP
(
[COUNTER] [nvarchar](12) NOT NULL,
[REFCOUNTER] [nvarchar](12) NULL,
[PERNR] [nvarchar](8) NULL,
[WORKDATE] [nvarchar](8) NULL,
[CATSHOURS] [decimal](7, 3) NULL,
[APDAT] [nvarchar](8) NULL,
[LAETM] [nvarchar](6) NULL,
[CATS_STATUS] [nvarchar](2) NULL,
[APPR_STATUS] [nvarchar](2) NULL
)
END
BEGIN
DECLARE #COUNTER nvarchar(12),
#REFCOUNTER nvarchar(12),
#PERNR nvarchar(8),
#WORKDATE nvarchar(8),
#CATSHOURS decimal(7, 3),
#APDAT nvarchar(8),
#LAETM nvarchar(6),
#CATS_STATUS nvarchar(2),
#APPR_STATUS nvarchar(2)
DECLARE #orig_counter nvarchar(12)
END
BEGIN
DECLARE curs CURSOR FOR
SELECT
[COUNTER],
REFCOUNTER,
PERNR,
WORKDATE,
CATSHOURS,
APDAT,
LAETM,
CATS_STATUS,
APPR_STATUS
FROM
CATSDB
END
BEGIN
OPEN curs
END
BEGIN
FETCH NEXT FROM curs INTO
#COUNTER,
#REFCOUNTER,
#PERNR,
#WORKDATE,
#CATSHOURS,
#APDAT,
#LAETM,
#CATS_STATUS,
#APPR_STATUS
END
BEGIN
WHILE ##FETCH_STATUS = 0
BEGIN
BEGIN
IF NOT EXISTS (SELECT * FROM TBL_COUNTER WHERE [COUNTER] = #COUNTER)
BEGIN
INSERT INTO TBL_COUNTER
([COUNTER]
,REFCOUNTER)
VALUES
(#COUNTER
,#REFCOUNTER)
END
END
BEGIN
IF NOT EXISTS (SELECT * FROM TEMP WHERE [COUNTER] = #COUNTER)
BEGIN
--If REFCOUNTER is populated, get the original COUNTER value, then update that row with the new values. Otherwise insert new record
IF #REFCOUNTER <> '' AND #REFCOUNTER IS NOT NULL
BEGIN
BEGIN
WITH n([COUNTER], REFCOUNTER) AS
(
SELECT
cnt.[COUNTER],
cnt.REFCOUNTER
FROM
TBL_COUNTER cnt
WHERE
cnt.[COUNTER] = #REFCOUNTER
UNION ALL
SELECT
nplus1.[COUNTER],
nplus1.REFCOUNTER
FROM
TBL_COUNTER as nplus1,
n
WHERE
n.[COUNTER] = nplus1.REFCOUNTER
)
SELECT #orig_counter = [COUNTER] FROM n WHERE REFCOUNTER = '' OR REFCOUNTER IS NULL
END
BEGIN
UPDATE TEMP
SET
[REFCOUNTER] = #REFCOUNTER
,[PERNR] = #PERNR
,[WORKDATE] = #WORKDATE
,[CATSHOURS] = #CATSHOURS
,[APDAT] = #APDAT
,[LAETM] = #LAETM
,[CATS_STATUS] = #CATS_STATUS
,[APPR_STATUS] = #APPR_STATUS
WHERE [COUNTER] = #orig_counter
END
END
ELSE
BEGIN
INSERT INTO TEMP
([COUNTER]
,[REFCOUNTER]
,[PERNR]
,[WORKDATE]
,[CATSHOURS]
,[APDAT]
,[LAETM]
,[CATS_STATUS]
,[APPR_STATUS])
VALUES
(#COUNTER
,#REFCOUNTER
,#PERNR
,#WORKDATE
,#CATSHOURS
,#APDAT
,#LAETM
,#CATS_STATUS
,#APPR_STATUS)
END
END
FETCH NEXT FROM curs INTO
#COUNTER,
#REFCOUNTER,
#PERNR,
#WORKDATE,
#CATSHOURS,
#APDAT,
#LAETM,
#CATS_STATUS,
#APPR_STATUS
END
END
END
BEGIN
CLOSE curs
DEALLOCATE curs
END
I shortened it and created the tables for you all to be able to see what is going on. The expected result is
+--------------+--------------+----------+----------+-----------+----------+--------+-------------+-------------+
| COUNTER | REFCOUNTER | PERNR | WORKDATE | CATSHOURS | APDAT | LAETM | CATS_STATUS | APPR_STATUS |
+--------------+--------------+----------+----------+-----------+----------+--------+-------------+-------------+
| 000421692670 | NULL | 00000071 | 20190114 | 6.00 | 20190204 | 174541 | 30 | 30 |
| 000421692671 | NULL | 00000071 | 20190114 | 3.00 | 20190204 | 174541 | 30 | 30 |
| 000421692672 | 000421692672 | 00000071 | 20190115 | 0.00 | 20190115 | 111007 | 30 | 30 |
| 000421692673 | 000421692673 | 00000071 | 20190115 | 0.00 | 20190115 | 111007 | 30 | 30 |
| 000429718015 | 000430154142 | 00000072 | 20190313 | 4.50 | 20190315 | 164659 | 30 | 30 |
| 000429718016 | NULL | 00000072 | 20190313 | 1.50 | 20190315 | 164659 | 30 | 30 |
| 000429718017 | NULL | 00000072 | 20190313 | 1.0 | 20190315 | 164659 | 30 | 30 |
| 000430154143 | 000430154143 | 00000072 | 20190313 | 2.50 | 20190315 | 164659 | 30 | 30 |
| 000429774620 | 000429774620 | 00000152 | 20190314 | 2.00 | 00000000 | 000000 | 60 | 20 |
| 000429774619 | 000429802105 | 00000152 | 20190314 | 5.00 | 20190315 | 143857 | 30 | 30 |
+--------------+--------------+----------+----------+-----------+----------+--------+-------------+-------------+
I need to add to this. So there is two phases to this. The first phase is I will pull all the data from 2019 for an initial load of my table. Then on a weekly basis, I will pull the data from the origin source for new records and changed records from the last time i ran it. So I will not have the full chain every week. There needs to be a way to get back to the original counter value, without the full dataset, which is why i had the counter table. I apologize for not being more clear. I am swamped with work and havent been able to focus on this as much as I planned. I am trying all these different techniques.
I believe, following query would help you to start with and it's much efficient way to approach you goal.
It was created to maintain historical info of SQL Servers in central location, and performs following activities, you have to include/replace your table structures in respective blocks of script
Creates temp table
Collects the information from multiple servers using OPENQUERY via Lined Servers (source) and loads into Temp Table.
Creates Indexes on Temp tables
Loads the data into Central Table (destination) with 3 scenarios (as commented in script)
Note: Replaced the script as per your scenario
BEGIN
Create Table #SrcTemp
( AENAM nvarchar(12),
AUTYP nvarchar(2),
AWART nvarchar(4),
BELNR nvarchar(10),
CATSHOURS decimal(7, 3),
CATSQUANTITY decimal(18, 3),
CHARGE_HOLD nvarchar(24),
[COUNTER] nvarchar(12),
ERNAM nvarchar(12),
ERSDA nvarchar(8),
ERSTM nvarchar(6),
HRCOSTASG nvarchar(1),
LAEDA nvarchar(8),
LSTAR nvarchar(6),
LTXA1 nvarchar(40),
MANDT nvarchar(3),
PERNR nvarchar(8),
RAPLZL nvarchar(8),
RAUFPL nvarchar(10),
REFCOUNTER nvarchar(12),
RNPLNR nvarchar(12),
SKOSTL nvarchar(10),
CATS_STATUS nvarchar(2),
SUPP3 nvarchar(10),
WORKDATE nvarchar(8),
ZZOH_ORDER nvarchar(24),
APDAT nvarchar(8),
APNAM nvarchar(12),
LAETM nvarchar(6),
APPR_STATUS nvarchar(2)
);
-- DECLARE #orig_counter nvarchar(12)
END
UPDATE #SrcTemp SET REFCOUNTER = '0' WHERE REFCOUNTER = '' or REFCOUNTER is null;
CREATE Clustered Index CLU_SrvTemp on #SrcTemp ([COUNTER], REFCOUNTER);
BEGIN
INSERT INTO #SrcTemp
SELECT
AENAM,AUTYP,AWART,BELNR,CATSHOURS,CATSQUANTITY,CHARGE_HOLD,[COUNTER],ERNAM,ERSDA,ERSTM,HRCOSTASG,LAEDA,LSTAR,LTXA1,MANDT,
PERNR,RAPLZL,RAUFPL,REFCOUNTER,RNPLNR,SKOSTL,CATS_STATUS,SUPP3,WORKDATE,ZZOH_ORDER,APDAT,APNAM,LAETM,APPR_STATUS
FROM
CATSDB;
END
--BEGIN
-- OPEN curs
--END
-- Scope: UNCHANGED Records ==================================================================================================================================
IF EXISTS
(select *
from (
SELECT ROW_NUMBER () OVER (PARTITION BY [COUNTER] ORDER BY COUNTER) AS RN
FROM #SrcTemp
WHERE REFCOUNTER = '0'
) as t where t.RN > 1
)
BEGIN
RAISERROR ('Primary key violation occurred in "UNCHANGED" records processing block', 16, 1) with NOWAIT;
END
ELSE
BEGIN
-- When NON-CHANGED Records NOT Existed in SQL table -------------------------------------------
BEGIN
INSERT INTO TEMP ([AENAM],[AUTYP],[AWART],[BELNR],[CATSHOURS],[CATSQUANTITY],[CHARGE_HOLD],[COUNTER],[ERNAM]
,[ERSDA],[ERSTM],[HRCOSTASG],[LAEDA],[LSTAR],[LTXA1],[MANDT],[PERNR],[RAPLZL],[RAUFPL]
,[REFCOUNTER],[RNPLNR],[SKOSTL],[CATS_STATUS],[SUPP3],[WORKDATE],[ZZOH_ORDER],[APDAT],[APNAM]
,[LAETM],[APPR_STATUS]
)
SELECT s.[AENAM], s.[AUTYP], s.[AWART], s.[BELNR], s.[CATSHOURS], s.[CATSQUANTITY], s.[CHARGE_HOLD], s.[COUNTER], s.[ERNAM]
, s.[ERSDA], s.[ERSTM], s.[HRCOSTASG], s.[LAEDA], s.[LSTAR], s.[LTXA1], s.[MANDT], s.[PERNR], s.[RAPLZL], s.[RAUFPL]
, s.[REFCOUNTER], s.[RNPLNR], s.[SKOSTL], s.[CATS_STATUS], s.[SUPP3], s.[WORKDATE], s.[ZZOH_ORDER], s.[APDAT], s.[APNAM]
, s.[LAETM], s.[APPR_STATUS]
FROM #SrcTemp as S
LEFT JOIN
TEMP as D on s.COUNTER = d.COUNTER
WHERE (S.REFCOUNTER = '0') and D.COUNTER is null ;
END
-- When NON-CHANGED Records Existed in SQL table -------------------------------------------
BEGIN
UPDATE S
SET [AENAM] = D.AENAM
,[AUTYP] = D.AUTYP
,[AWART] = D.AWART
,[BELNR] = D.BELNR
,[CATSHOURS] = D.CATSHOURS
,[CATSQUANTITY] = D.CATSQUANTITY
,[CHARGE_HOLD] = D.CHARGE_HOLD
,[ERNAM] = D.ERNAM
,[ERSDA] = D.ERSDA
,[ERSTM] = D.ERSTM
,[HRCOSTASG] = D.HRCOSTASG
,[LAEDA] = D.LAEDA
,[LSTAR] = D.LSTAR
,[LTXA1] = D.LTXA1
,[MANDT] = D.MANDT
,[PERNR] = D.PERNR
,[RAPLZL] = D.RAPLZL
,[RAUFPL] = D.RAUFPL
,[REFCOUNTER] = D.REFCOUNTER
,[RNPLNR] = D.RNPLNR
,[SKOSTL] = D.SKOSTL
,[CATS_STATUS] = D.CATS_STATUS
,[SUPP3] = D.SUPP3
,[WORKDATE] = D.WORKDATE
,[ZZOH_ORDER] = D.ZZOH_ORDER
,[APDAT] = D.APDAT
,[APNAM] = D.APNAM
,[LAETM] = D.LAETM
,[APPR_STATUS] = D.APPR_STATUS
FROM #SrcTemp as S
LEFT JOIN
TEMP as D on (s.COUNTER = d.COUNTER and S.REFCOUNTER = D.REFCOUNTER)
WHERE (S.REFCOUNTER = '0') and D.COUNTER is NOT null
END
END
-- Scope: CHANGED Records ==================================================================================================================================
IF EXISTS
(select *
from (
SELECT ROW_NUMBER () OVER (PARTITION BY [COUNTER], REFCOUNTER ORDER BY [COUNTER]) AS RN
FROM #SrcTemp
WHERE not REFCOUNTER = '0'
) as t where t.RN > 1
)
BEGIN
RAISERROR ('Primary key violation occurred in "CHANGED" records processing block', 10, 1) with NOWAIT;
END
ELSE
BEGIN
-- When CHANGED Records NOT Existed in SQL table -------------------------------------------
BEGIN
INSERT INTO TEMP ([AENAM],[AUTYP],[AWART],[BELNR],[CATSHOURS],[CATSQUANTITY],[CHARGE_HOLD],[COUNTER],[ERNAM]
,[ERSDA],[ERSTM],[HRCOSTASG],[LAEDA],[LSTAR],[LTXA1],[MANDT],[PERNR],[RAPLZL],[RAUFPL]
,[REFCOUNTER],[RNPLNR],[SKOSTL],[CATS_STATUS],[SUPP3],[WORKDATE],[ZZOH_ORDER],[APDAT],[APNAM]
,[LAETM],[APPR_STATUS]
)
SELECT s.[AENAM], s.[AUTYP], s.[AWART], s.[BELNR], s.[CATSHOURS], s.[CATSQUANTITY], s.[CHARGE_HOLD], s.[COUNTER], s.[ERNAM]
, s.[ERSDA], s.[ERSTM], s.[HRCOSTASG], s.[LAEDA], s.[LSTAR], s.[LTXA1], s.[MANDT], s.[PERNR], s.[RAPLZL], s.[RAUFPL]
, s.[REFCOUNTER], s.[RNPLNR], s.[SKOSTL], s.[CATS_STATUS], s.[SUPP3], s.[WORKDATE], s.[ZZOH_ORDER], s.[APDAT], s.[APNAM]
, s.[LAETM], s.[APPR_STATUS]
FROM #SrcTemp as S
LEFT JOIN
TEMP as D on s.COUNTER = d.COUNTER and S.REFCOUNTER = D.REFCOUNTER
WHERE (not S.REFCOUNTER = '0') and D.COUNTER is null
END
-- When NON-CHANGED Records Existed in SQL table -------------------------------------------
BEGIN
UPDATE S
SET [AENAM] = D.AENAM
,[AUTYP] = D.AUTYP
,[AWART] = D.AWART
,[BELNR] = D.BELNR
,[CATSHOURS] = D.CATSHOURS
,[CATSQUANTITY] = D.CATSQUANTITY
,[CHARGE_HOLD] = D.CHARGE_HOLD
,[ERNAM] = D.ERNAM
,[ERSDA] = D.ERSDA
,[ERSTM] = D.ERSTM
,[HRCOSTASG] = D.HRCOSTASG
,[LAEDA] = D.LAEDA
,[LSTAR] = D.LSTAR
,[LTXA1] = D.LTXA1
,[MANDT] = D.MANDT
,[PERNR] = D.PERNR
,[RAPLZL] = D.RAPLZL
,[RAUFPL] = D.RAUFPL
,[REFCOUNTER] = D.REFCOUNTER
,[RNPLNR] = D.RNPLNR
,[SKOSTL] = D.SKOSTL
,[CATS_STATUS] = D.CATS_STATUS
,[SUPP3] = D.SUPP3
,[WORKDATE] = D.WORKDATE
,[ZZOH_ORDER] = D.ZZOH_ORDER
,[APDAT] = D.APDAT
,[APNAM] = D.APNAM
,[LAETM] = D.LAETM
,[APPR_STATUS] = D.APPR_STATUS
FROM #SrcTemp as S
LEFT JOIN
TEMP as D on s.COUNTER = d.COUNTER and S.REFCOUNTER = D.REFCOUNTER
WHERE (not S.REFCOUNTER = '0' ) and D.COUNTER is NOT null
END
END
Drop table #SrcTemp;
It looks like it can be done with a simple recursive query. Having suitable index is also important.
Sample data
This is how your sample data should look like in the question. Only few relevant columns.
It would be better to include several sets/chains of changes, not just one. Having only this sample data would make it harder for you to verify if presented solutions are correct.
+-----------+---------------------+-----------+------------+
| BELNR | CHARGE_HOLD | COUNTER | REFCOUNTER |
+-----------+---------------------+-----------+------------+
| 417548605 | T4-GS023ABC2 0150#* | 420202428 | NULL |
| 417549506 | T4-GS023-ABC2 | 420203329 | 420202428 |
| 417553156 | JGS023001 0010#* | 420206979 | 420203329 |
| 417557221 | T4-GS023-ABC2 | 420211044 | 420206979 |
| 417581675 | JGS023001 0010#* | 420235498 | 420211044 |
| 417677969 | JGS023001 0010#* | 420331792 | 420235498 |
+-----------+---------------------+-----------+------------+
The main recursive part of the query
WITH
CTE
AS
(
SELECT
1 AS Lvl,
CATSDB.BELNR AS OriginalBELNR,
CATSDB.CHARGE_HOLD AS OriginalCHARGE_HOLD,
CATSDB.[COUNTER] AS OriginalCOUNTER,
CATSDB.REFCOUNTER AS OrginalREFCOUNTER,
CATSDB.BELNR AS NewBELNR,
CATSDB.CHARGE_HOLD AS NewCHARGE_HOLD,
CATSDB.[COUNTER] AS NewCOUNTER,
CATSDB.REFCOUNTER AS NewREFCOUNTER
FROM
CATSDB
WHERE
REFCOUNTER IS NULL
UNION ALL
SELECT
CTE.Lvl + 1 AS Lvl,
CTE.OriginalBELNR,
CTE.OriginalCHARGE_HOLD,
CTE.OriginalCOUNTER,
CTE.OrginalREFCOUNTER,
CATSDB.BELNR AS NewBELNR,
CATSDB.CHARGE_HOLD AS NewCHARGE_HOLD,
CATSDB.[COUNTER] AS NewCOUNTER,
CATSDB.REFCOUNTER AS NewREFCOUNTER
FROM
CATSDB
INNER JOIN CTE ON CATSDB.REFCOUNTER = CTE.NewCOUNTER
)
SELECT * FROM CTE;
Intermediate result
+-----+---------------+---------------------+-----------------+-------------------+-----------+---------------------+------------+---------------+
| Lvl | OriginalBELNR | OriginalCHARGE_HOLD | OriginalCOUNTER | OrginalREFCOUNTER | NewBELNR | NewCHARGE_HOLD | NewCOUNTER | NewREFCOUNTER |
+-----+---------------+---------------------+-----------------+-------------------+-----------+---------------------+------------+---------------+
| 1 | 417548605 | T4-GS023ABC2 0150#* | 420202428 | NULL | 417548605 | T4-GS023ABC2 0150#* | 420202428 | NULL |
| 2 | 417548605 | T4-GS023ABC2 0150#* | 420202428 | NULL | 417549506 | T4-GS023-ABC2 | 420203329 | 420202428 |
| 3 | 417548605 | T4-GS023ABC2 0150#* | 420202428 | NULL | 417553156 | JGS023001 0010#* | 420206979 | 420203329 |
| 4 | 417548605 | T4-GS023ABC2 0150#* | 420202428 | NULL | 417557221 | T4-GS023-ABC2 | 420211044 | 420206979 |
| 5 | 417548605 | T4-GS023ABC2 0150#* | 420202428 | NULL | 417581675 | JGS023001 0010#* | 420235498 | 420211044 |
| 6 | 417548605 | T4-GS023ABC2 0150#* | 420202428 | NULL | 417677969 | JGS023001 0010#* | 420331792 | 420235498 |
+-----+---------------+---------------------+-----------------+-------------------+-----------+---------------------+------------+---------------+
You can see that we've taken the starting row of the chain (where RefCounter is NULL) and carried it over the whole chain of changes.
Now we just need to pick the rows with the last change, i.e. with the largest Lvl for each starting row. One way to do it is to use ROW_NUMBER function with suitable partitioning.
Final query
WITH
CTE
AS
(
SELECT
1 AS Lvl,
CATSDB.BELNR AS OriginalBELNR,
CATSDB.CHARGE_HOLD AS OriginalCHARGE_HOLD,
CATSDB.[COUNTER] AS OriginalCOUNTER,
CATSDB.REFCOUNTER AS OrginalREFCOUNTER,
CATSDB.BELNR AS NewBELNR,
CATSDB.CHARGE_HOLD AS NewCHARGE_HOLD,
CATSDB.[COUNTER] AS NewCOUNTER,
CATSDB.REFCOUNTER AS NewREFCOUNTER
FROM
CATSDB
WHERE
REFCOUNTER IS NULL
UNION ALL
SELECT
CTE.Lvl + 1 AS Lvl,
CTE.OriginalBELNR,
CTE.OriginalCHARGE_HOLD,
CTE.OriginalCOUNTER,
CTE.OrginalREFCOUNTER,
CATSDB.BELNR AS NewBELNR,
CATSDB.CHARGE_HOLD AS NewCHARGE_HOLD,
CATSDB.[COUNTER] AS NewCOUNTER,
CATSDB.REFCOUNTER AS NewREFCOUNTER
FROM
CATSDB
INNER JOIN CTE ON CATSDB.REFCOUNTER = CTE.NewCOUNTER
)
,CTE_rn
AS
(
SELECT
*
,ROW_NUMBER() OVER (PARTITION BY OriginalCOUNTER ORDER BY Lvl DESC) AS rn
FROM CTE
)
SELECT *
FROM CTE_rn
WHERE rn = 1
--OPTION (MAXRECURSION 0)
;
If you can have a chain longer than 100 you should add OPTION (MAXRECURSION 0) to the query, because by default SQL Server limits recursion depth to 100.
Result
+-----+---------------+---------------------+-----------------+-------------------+-----------+---------------------+------------+---------------+----+
| Lvl | OriginalBELNR | OriginalCHARGE_HOLD | OriginalCOUNTER | OrginalREFCOUNTER | NewBELNR | NewCHARGE_HOLD | NewCOUNTER | NewREFCOUNTER | rn |
+-----+---------------+---------------------+-----------------+-------------------+-----------+---------------------+------------+---------------+----+
| 6 | 417548605 | T4-GS023ABC2 0150#* | 420202428 | NULL | 417677969 | JGS023001 0010#* | 420331792 | 420235498 | 1 |
+-----+---------------+---------------------+-----------------+-------------------+-----------+---------------------+------------+---------------+----+
Efficiency
To make it work efficiently we need to have an index on REFCOUNTER column. Also, the query assumes that REFCOUNTER is NULL, not ''. If you have a mix of NULLs and empty strings, unify your data, otherwise an index would not be useful. This index is the minimum what you need to have.
Ideally, you should have a CLUSTERED index on REFCOUNTER column, because the query always selects all columns from the table.
CREATE CLUSTERED INDEX [IX_RefCounter] ON [dbo].[CATSDB]
(
[REFCOUNTER] ASC
)
If you can't change the indexes of your original table, I would recommend to copy all millions of rows into a temp table and create this clustered index for that temp table.
I got a pretty good plan with this clustered index.
Few things you can do to improve performance:
Convert COUNTER and REFCOUNTER to datatype int from nvarchar, operations on int are much faster than characters.
Do not use a cursors, you can still process one record at at time using a while loop.
DECLARE #CCOUNTER int = 0
WHILE (1 = 1)
BEGIN
/* SELECT #COUNTER = MIN(COUNTER) > #COUNTER FROM CATSDB */
/* IF ##ROWCOUNT != 1 THEN BREAK OUT OF THE WHILE LOOP, WE ARE DONE */
/* SELECT RECORD FOR THIS #COUNTER FROM CATSDB */
/* DO THE PROCESSING FOR THIS RECORD */
END
There is a method called sql Bulk copy i don't it will help in your problem but give it a try.
The most performant way to do this is through BCP. https://learn.microsoft.com/en-us/sql/tools/bcp-utility?view=sql-server-2017.
You can BCP all of the data into a staging table in SQL Server and then run your inserts and updates. Also when checking for non-existence of a record to determine whether this is an insert or an update "IF NOT EXISTS (SELECT * FROM TEMP WHERE [COUNTER] = #COUNTER)" is very expensive.
Example of a more performant way to do this:
(Table names TBL_SOURCE, TBL_DESTINATION, #TBL_UPDATES, and #TBL_INSERTS)
SELECT * into #TBL_INSERTS
FROM TBL_SOURCE S
left outer join TBL_DESTINATION D on S.COUNTER=D.COUNTER
WHERE D.Counter is null
SELECT * into #TBL_UPDATES
FROM TBL_SOURCE S
left outer join TBL_DESTINATION D on S.COUNTER=D.COUNTER
WHERE D.Counter is not null
Updates will be captured in #tbl_updates and inserts in #tbl_inserts
See based few sample data and given output, our script cannot be 100% OK and optimized ,where millions of data to updated is concern.
I have confidence in my script that it can be improve in that direction,after fully understanding the requirement .
First of all I wonder why data type are nvarchar,if possible make it to varchar,int,datetime .
If you can change data type then it will do wonder to the performance.
Also there is no identity column which should be Clustered Index.
This two point matter from performance point of view.
So in my example,
CREATE TABLE CATSDB
(
id int identity ,
[COUNTER] nvarchar(12),
REFCOUNTER nvarchar(12),
PERNR nvarchar(8),
WORKDATE nvarchar(8),
CATSHOURS decimal(7, 3),
APDAT nvarchar(8),
LAETM nvarchar(6),
CATS_STATUS nvarchar(2),
APPR_STATUS nvarchar(2)
)
ALTER TABLE CATSDB
ADD CONSTRAINT PK_CATSDB_ID PRIMARY KEY CLUSTERED(ID)
CREATE NONCLUSTERED INDEX FICATSDB_REFCOUNTER ON CATSDB(REFCOUNTER,[COUNTER]);
IF OBJECT_ID('tempdb..#TEMP', 'U') IS NOT NULL
DROP TABLE #TEMP;
CREATE TABLE #TEMP
(UpdateID INT,
FINDID INT
PRIMARY KEY,
[COUNTER] [NVARCHAR](12) NOT NULL,
[REFCOUNTER] [NVARCHAR](12) NULL,
[PERNR] [NVARCHAR](8) NULL,
[WORKDATE] [NVARCHAR](8) NULL,
[CATSHOURS] [DECIMAL](7, 3) NULL,
[APDAT] [NVARCHAR](8) NULL,
[LAETM] [NVARCHAR](6) NULL,
[CATS_STATUS] [NVARCHAR](2) NULL,
[APPR_STATUS] [NVARCHAR](2) NULL
);
WITH CTE
AS (SELECT a.id,
a.[COUNTER],
a.REFCOUNTER,
a.id AS Findid
FROM dbo.CATSDB A
UNION ALL
SELECT b.id,
a.[COUNTER],
a.[refCOUNTER],
a.id
FROM dbo.CATSDB A
INNER JOIN CTE b ON(a.REFCOUNTER = b.[COUNTER])
WHERE a.id >= b.Findid),
CTE1
AS (SELECT id,
MAX(Findid) Findid
FROM CTE
GROUP BY id)
INSERT INTO #TEMP
(UpdateID,
FINDID,
[COUNTER],
[REFCOUNTER],
[PERNR],
[WORKDATE],
[CATSHOURS],
[APDAT],
[LAETM],
[CATS_STATUS],
[APPR_STATUS]
)
SELECT c1.ID,
c1.FINDID,
a.COUNTER,
a.REFCOUNTER,
a.PERNR,
a.WORKDATE,
a.CATSHOURS,
a.APDAT,
a.LAETM,
a.CATS_STATUS,
a.APPR_STATUS
FROM dbo.CATSDB A
INNER JOIN CTE1 c1 ON a.id = c1.Findid;
BEGIN TRY
BEGIN TRAN;
UPDATE A
SET
[REFCOUNTER] = b.REFCOUNTER,
[PERNR] = b.PERNR,
[WORKDATE] = b.WORKDATE,
[CATSHOURS] = b.CATSHOURS,
[APDAT] = b.APDAT,
[LAETM] = b.LAETM,
[CATS_STATUS] = b.CATS_STATUS,
[APPR_STATUS] = b.APPR_STATUS
FROM CATSDB A
INNER JOIN #TEMP B ON a.id = b.UpdateID;
-- this is only test query
SELECT c1.UpdateID AS UpdateID,
a.*
FROM dbo.CATSDB A
INNER JOIN #TEMP c1 ON a.id = c1.Findid;
IF(##trancount > 0)
ROLLBACK; -- commit
END TRY
BEGIN CATCH
IF(##trancount > 0)
ROLLBACK;
END CATCH;
#Temp should be permanent table.
IMO, your table badly need identity column which should be identity and Clustered Index.
You can try, you can Alter it .
REFCOUNTER,COUNTER should be Non Clustered Index.
After and only after optimizing the query and with proper PLAN above index is going to boost performance.
Proper PLAN : Should you use Recursive or RBAR and update millions of records in one time or Should I Batch update ?
You can first Test the script with millions of row with Rollback.
I am trying to convert dynamically a table like this:
+----+---------+-------+
| ID | Subject | Users |
+----+---------+-------+
| 1 | Hi! | Anna |
| 2 | Hi! | Peter |
| 3 | Try | Jan |
| 4 | Try | Peter |
| 5 | Try | Jan |
| 6 | Problem | Anna |
| 7 | Problem | José |
| 8 | Test | John |
| 9 | Test | John |
| 10 | Hi! | Anna |
| 11 | Hi! | José |
| 12 | Hi! | Anna |
| 13 | Hi! | Joe |
+----+---------+-------+
Into something like that:
+----+---------+-------+-------+-------+-------+
| ID | Subject | User1 | User2 | User3 | User4 |
+----+---------+-------+-------+-------+-------+
| 1 | Hi! | Anna | Peter | José | NULL |
| 2 | Try | Jan | Peter | NULL | NULL |
| 3 | Problem | Anna | José | NULL | NULL |
| 4 | Test | John | NULL | NULL | NULL |
+----+---------+-------+-------+-------+-------+
I have been reading the following links, but they are thought for splitting a column into a predefined number of columns:
Splitting SQL Columns into Multiple Columns Based on Specific Column Value
Split column into two columns based on type code in third column
I would need to split it dinamically depending on the content of the table.
SQL:
--【Build Test Data】
create table #Tem_Table ([ID] int,[Subject] nvarchar(20),[Users] nvarchar(20));
insert into #Tem_Table ([ID],[Subject] ,[Users]) values
('1','Hi!','Anna')
,('2','Hi!','Peter')
,('3','Try','Jan')
,('4','Try','Peter')
,('5','Try','Jan')
,('6','Problem','Anna')
,('7','Problem','José')
,('7','Test','John')
,('9','Test','John')
,('10','Hi! ','Anna')
,('11','Hi! ','José')
,('12','Hi! ','Anna')
,('13','Hi! ','Joe')
;
--STEP 1 distinct and ROW_NUMBER
with distinct_table as (
select [Subject],[Users]
,ROW_NUMBER() OVER (PARTITION BY [Subject] order by [Users]) [rank]
from (
select distinct [Subject],[Users] from #Tem_Table
) T00
)
--STEP 2 Group by row_count
,group_table as (
select [Subject]
from distinct_table T
group by [Subject]
)
--STEP 3 Use Left Join and Rank
select
T.[Subject],T1.[Users] as User1, T2.[Users] as User2 , T3.[Users] as User3, T4.[Users] as User4
from group_table T
left join distinct_table T1 on T.[Subject] = T1.[Subject] and T1.[rank] = 1
left join distinct_table T2 on T.[Subject] = T2.[Subject] and T2.[rank] = 2
left join distinct_table T3 on T.[Subject] = T3.[Subject] and T3.[rank] = 3
left join distinct_table T4 on T.[Subject] = T4.[Subject] and T4.[rank] = 4
order by [Subject];
result:
-------------------- -------------------- -------------------- -------------------- --------------------
Hi! Anna Joe José Peter
Problem Anna José NULL NULL
Test John NULL NULL NULL
Try Jan Peter NULL NULL
Update the Dynamic version :
--STEP 1 distinct and ROW_NUMBER
SELECT * into #distinct_table from (
select [Subject],[Users]
,ROW_NUMBER() OVER (PARTITION BY [Subject] order by [Users]) [rank]
from (
select distinct [Subject],[Users] from #Tem_Table
) T00
)T;
--STEP 2 Group by row_count
SELECT * into #group_table from (
select [Subject] ,count(1) [count]
from #distinct_table T
group by [Subject]
)T;
--Use Exec
DECLARE #select_sql AS NVARCHAR(MAX) = ' select T.[Subject] ',
#join_sql AS NVARCHAR(MAX) = ' from #group_table T ',
#max_count INT = (SELECT max([count]) FROM #group_table),
#temp_string NVARCHAR(5),
#temp_string_addone NVARCHAR(5)
;
DECLARE #index int = 0 ;
WHILE #index < #max_count
BEGIN
sELECT #temp_string = Convert(nvarchar(10),#index);
sELECT #temp_string_addone = Convert(nvarchar(10),#index+1);
select #select_sql = #select_sql + ' , T'+#temp_string_addone+'.[Users] as User'+#temp_string_addone+' '
select #join_sql = #join_sql + 'left join #distinct_table T'+#temp_string_addone+' on T.[Subject] = T'+#temp_string_addone+'.[Subject] and T'+#temp_string_addone+'.[rank] = '+#temp_string_addone+' ';
SET #index = #index + 1;
END;
EXEC (#select_sql
+ #join_sql
+' order by [Subject]; ')
;
CREATE TABLE mytable
([ID] int, [Subject] varchar(7), [Users] varchar(5))
;
INSERT INTO mytable
([ID], [Subject], [Users])
VALUES
(1, 'Hi!', 'Anna'),
(2, 'Hi!', 'Peter'),
(3, 'Try', 'Jan'),
(4, 'Try', 'Peter'),
(5, 'Try', 'Jan'),
(6, 'Problem', 'Anna'),
(7, 'Problem', 'José'),
(8, 'Test', 'John'),
(9, 'Test', 'John'),
(10, 'Hi!', 'Anna'),
(11, 'Hi!', 'José'),
(12, 'Hi!', 'Anna'),
(13, 'Hi!', 'Joe')
;
select distinct subject,
(select users from (
select distinct users from mytable where subject=m.subject) a order by users offset 0 rows fetch next 1 row only) user1,
(select users from (
select distinct users from mytable where subject=m.subject) a order by users offset 1 rows fetch next 1 row only) user2,
(select users from (
select distinct users from mytable where subject=m.subject) a order by users offset 2 rows fetch next 1 row only) user3,
(select users from (
select distinct users from mytable where subject=m.subject) a order by users offset 3 rows fetch next 1 row only) user4
from mytable m
you can use below dynamic query to get the result-
create table test_Raw(ID int ,Subject varchar(100), Users varchar(100))
insert into test_Raw
values (1,' Hi!','Anna'),
(2,' Hi!','Peter'),
(3,'Try','Jan'),
(4,'Try','Peter'),
(5,'Try','Jan'),
(6,'Problem','Anna'),
(7,'Problem','José'),
(8,'Test','John'),
(9,'Test','John'),
(10,' Hi!','Anna'),
(11,' Hi!','José'),
(12,' Hi!','Anna'),
(13,' Hi!','Joe')
--select * from test_Raw
select dense_RANK() over( order by Subject) Ranking1, dense_RANK() over(partition by Subject order by users) Ranking2 , Subject , Users
into test
from test_Raw
group by Subject , Users
order by 3
declare #min int , #mx int , #Select nvarchar(max) , #from nvarchar(max) , #vmin varchar(3)
select #min= 1 , #mx = MAX(Ranking2) , #Select= 'select ' , #from = ' from test t1 ' , #vmin = '' from test
while (#min<=#mx)
begin
select #vmin = CAST(#min as varchar(3))
select #Select = #Select + CASE WHEN #min = 1 THEN 't1.Ranking1 as ID , t1.Subject , t1.Users AS User1 ' ELSE ',t' +#vmin+'.Users as User'+#vmin END
select #from = #from + CASE WHEN #min = 1 THEN '' ELSE ' left join test t'+#vmin + ' on t1.Ranking1 = t' + #vmin + '.Ranking1 and t1.Ranking2 + ' + cast (#min-1 as varchar(10)) + ' = t'+#vmin+'.Ranking2' END
set #min = #min + 1
end
select #Select = #Select + #from + ' where t1.Ranking2 = 1'
exec sp_executesql #Select
i have the below table. (no primary key in this table)
ID | IC | Name | UGCOS | MCOS
---------------------------------------------------------
1AA | A123456B | Edmund | Australia | Denmark
1AA | A123456B | Edmund | Australia | France
2CS | C435664C | Grace | Norway | NULL
3TG | G885595H | Rae | NULL | Japan
I need to get the result like this.
ID | IC | Name | UGCOS | MCOS | MCOS1
--------------------------------------------------------------------
1AA | A123456B | Edmund | Australia | Denmark | France
2CS | C435664C | Grace | Norway | NULL | NULL
3TG | G885595H | Rae | NULL | Japan | NULL
Did googled around and seems like PIVOT is what i need to do that. However i am not sure how can that be implemented to my tables. It would be great help if somebody can help me with it. Thanks!
I'll create a second answer, as this approach is something completely different from my first:
This dynamic query will first find the max count of a distinct ID and then build a dynamic pivot
CREATE TABLE #tmpTbl (ID VARCHAR(100),IC VARCHAR(100),Name VARCHAR(100),UGCOS VARCHAR(100),MCOS VARCHAR(100))
INSERT INTO #tmpTbl VALUES
('1AA','A123456B','Edmund','Australia','Denmark')
,('1AA','A123456B','Edmund','Australia','France')
,('1AA','A123456B','Edmund','Australia','OneMore')
,('2CS','C435664C','Grace','Norway',NULL)
,('3TG','G885595H','Rae',NULL,'Japan');
GO
DECLARE #maxCount INT=(SELECT TOP 1 COUNT(*) FROM #tmpTbl GROUP BY ID ORDER BY COUNT(ID) DESC);
DECLARE #colNames VARCHAR(MAX)=
(
STUFF
(
(
SELECT TOP(#maxCount)
',MCOS' + CAST(ROW_NUMBER() OVER(ORDER BY (SELECT NULL)) AS VARCHAR(10))
FROM sys.objects --take any large table or - better! - an numbers table or a tally CTE
FOR XML PATH('')
),1,1,''
)
);
DECLARE #cmd VARCHAR(MAX)=
'SELECT p.*
FROM
(
SELECT *
,''MCOS'' + CAST(ROW_NUMBER() OVER(PARTITION BY ID ORDER BY (SELECT NULL)) AS VARCHAR(10)) AS colName
FROM #tmpTbl
) AS tbl
PIVOT
(
MIN(MCOS) FOR colName IN(' + #colNames + ')
) AS p';
EXEC(#cmd);
GO
DROP TABLE #tmpTbl;
The result
1AA A123456B Edmund Australia Denmark France OneMore
2CS C435664C Grace Norway NULL NULL NULL
3TG G885595H Rae NULL Japan NULL NULL
This is a suggestion with a concatenated result:
CREATE TABLE #tmpTbl (ID VARCHAR(100),IC VARCHAR(100),Name VARCHAR(100),UGCOS VARCHAR(100),MCOS VARCHAR(100))
INSERT INTO #tmpTbl VALUES
('1AA','A123456B','Edmund','Australia','Denmark')
,('1AA','A123456B','Edmund','Australia','France')
,('2CS','C435664C','Grace','Norway',NULL)
,('3TG','G885595H','Rae',NULL,'Japan');
SELECT ID,IC,Name,UGCOS,
(
STUFF(
(
SELECT ' ,' + x.MCOS
FROM #tmpTbl AS x
WHERE x.ID=outerTbl.ID
FOR XML PATH('')
),1,2,''
)
) AS MCOS
FROM #tmpTbl AS outerTbl
GROUP BY ID,IC,Name,UGCOS;
GO
DROP TABLE #tmpTbl;
The result
1AA A123456B Edmund Australia Denmark ,France
2CS C435664C Grace Norway NULL
3TG G885595H Rae NULL Japan
Using Cross Apply and Pivot we can achieve this
DECLARE #Table1 TABLE
( ID varchar(3), IC varchar(8), Name varchar(6), UGCOS varchar(9), MCOS varchar(7))
;
INSERT INTO #Table1
( ID , IC , Name , UGCOS , MCOS )
VALUES
('1AA', 'A123456B', 'Edmund', 'Australia', 'Denmark'),
('1AA', 'A123456B', 'Edmund', 'Australia', 'France'),
('2CS', 'C435664C', 'Grace', 'Norway', NULL),
('3TG', 'G885595H', 'Rae', NULL, 'Japan')
;
Select ID , IC , Name , UGCOS,MAX([MCOS1])[MCOS1],MAX([MCOS2])[MCOS2] from (
select ID , IC , Name , UGCOS , MCOS,col,val,col +''+CAST(ROW_NUMBER()OVER(PARTITION BY ID ORDER BY col) AS VARCHAR)RN from #Table1
CROSS APPLY (values('MCOS',MCOS))CS(col,val))T
PIVOT (MAX(val) FOR RN IN ([MCOS1],[MCOS2]))PVT
GROUP BY ID , IC , Name , UGCOS
Do you always have a maximum of 2 rows of data that you'll want to turn into columns? If so, this would do you;
CREATE TABLE #TableName (ID varchar(3), IC varchar(8), Name varchar(6), UCGOS varchar(9), MCOS varchar(7))
INSERT INTO #TableName
VALUES
('1AA','A123456B','Edmund','Australia','Denmark')
,('1AA','A123456B','Edmund','Australia','France')
,('2CS','C435664C','Grace','Norway',NULL)
,('3TG','G885595H','Rae',NULL,'Japan')
SELECT DISTINCT a.ID
,a.IC
,a.NAME
,a.UCGOS
,b.Mcos1 MCOS
,c.Mcos2 MCOS1
FROM #TableName a
LEFT JOIN (
SELECT ID
,MAX(MCOS) Mcos1
FROM #TableName
GROUP BY ID
) b ON a.ID = b.ID
LEFT JOIN (
SELECT ID
,MIN(MCOS) Mcos2
FROM #TableName
GROUP BY ID
) c ON a.ID = c.ID
AND (
b.ID = c.ID
AND b.Mcos1 <> c.Mcos2
)
DROP TABLE #TableName
Gives you the result you're after.