I am working with SAP Timesheet data, so there are millions of rows. What I am trying to do is select the data from the SAP table and insert it into a table on MS SQL Server.
So I want to insert the original record, then if an update to the original record happens, which is in the form of a new SAP record with a refcounter, I want to find the original record in my table and update it, keeping the original counter value.
So I have done this successfully with a cursor (I know not the best), but with millions of records, I am wondering if there is a faster way, because I am on day 4 of my cursor running. Is there a better way then what I have below:
BEGIN
CREATE TABLE CATSDB
(
[COUNTER] nvarchar(12),
REFCOUNTER nvarchar(12),
PERNR nvarchar(8),
WORKDATE nvarchar(8),
CATSHOURS decimal(7, 3),
APDAT nvarchar(8),
LAETM nvarchar(6),
CATS_STATUS nvarchar(2),
APPR_STATUS nvarchar(2)
)
INSERT INTO CATSDB
(
[COUNTER],REFCOUNTER,PERNR,WORKDATE,CATSHOURS,APDAT,LAETM,CATS_STATUS,APPR_STATUS
)
VALUES
('000421692670',NULL,'00000071','20190114','6.00','20190204','174541','30','30'),
('000421692671',NULL,'00000071','20190114','3.00','20190204','174541','30','30'),
('000421692672',NULL,'00000071','20190115','6.00','00000000','000000','60','20'),
('000421692673',NULL,'00000071','20190115','3.00','00000000','000000','60','20'),
('000421692712','000421692672','00000071','20190115','0.00','20190115','111007','30','30'),
('000421692713','000421692673','00000071','20190115','0.00','20190115','111007','30','30'),
('000429718015',NULL,'00000072','20190313','7.00','00000000','000000','60','20'),
('000429718016',NULL,'00000072','20190313','1.50','20190315','164659','30','30'),
('000429718017',NULL,'00000072','20190313','1.00','20190315','164659','30','30'),
('000430154143',NULL,'00000072','20190313','2.00','00000000','000000','60','20'),
('000430154142','000429718015','00000072','20190313','5.00','00000000','000000','60','20'),
('000430154928','000430154142','00000072','20190313','4.50','20190315','164659','30','30'),
('000430154929','000430154143','00000072','20190313','2.50','20190315','164659','30','30'),
('000429774620',NULL,'00000152','20190314','1.00','00000000','000000','60','20'),
('000429774619',NULL,'00000152','20190314','1.00','00000000','000000','60','20'),
('000429802106','000429774620','00000152','20190314','2.00','00000000','000000','60','20'),
('000429802105','000429774619','00000152','20190314','3.00','00000000','000000','60','20'),
('000429840242','000429802106','00000152','20190314','4.00','20190315','143857','30','30'),
('000429840241','000429802105','00000152','20190314','5.00','20190315','143857','30','30')
CREATE TABLE [TBL_COUNTER]
(
[COUNTER] [varchar](12) NOT NULL,
[REFCOUNTER] [varchar](12) NULL
)
CREATE TABLE TEMP
(
[COUNTER] [nvarchar](12) NOT NULL,
[REFCOUNTER] [nvarchar](12) NULL,
[PERNR] [nvarchar](8) NULL,
[WORKDATE] [nvarchar](8) NULL,
[CATSHOURS] [decimal](7, 3) NULL,
[APDAT] [nvarchar](8) NULL,
[LAETM] [nvarchar](6) NULL,
[CATS_STATUS] [nvarchar](2) NULL,
[APPR_STATUS] [nvarchar](2) NULL
)
END
BEGIN
DECLARE #COUNTER nvarchar(12),
#REFCOUNTER nvarchar(12),
#PERNR nvarchar(8),
#WORKDATE nvarchar(8),
#CATSHOURS decimal(7, 3),
#APDAT nvarchar(8),
#LAETM nvarchar(6),
#CATS_STATUS nvarchar(2),
#APPR_STATUS nvarchar(2)
DECLARE #orig_counter nvarchar(12)
END
BEGIN
DECLARE curs CURSOR FOR
SELECT
[COUNTER],
REFCOUNTER,
PERNR,
WORKDATE,
CATSHOURS,
APDAT,
LAETM,
CATS_STATUS,
APPR_STATUS
FROM
CATSDB
END
BEGIN
OPEN curs
END
BEGIN
FETCH NEXT FROM curs INTO
#COUNTER,
#REFCOUNTER,
#PERNR,
#WORKDATE,
#CATSHOURS,
#APDAT,
#LAETM,
#CATS_STATUS,
#APPR_STATUS
END
BEGIN
WHILE ##FETCH_STATUS = 0
BEGIN
BEGIN
IF NOT EXISTS (SELECT * FROM TBL_COUNTER WHERE [COUNTER] = #COUNTER)
BEGIN
INSERT INTO TBL_COUNTER
([COUNTER]
,REFCOUNTER)
VALUES
(#COUNTER
,#REFCOUNTER)
END
END
BEGIN
IF NOT EXISTS (SELECT * FROM TEMP WHERE [COUNTER] = #COUNTER)
BEGIN
--If REFCOUNTER is populated, get the original COUNTER value, then update that row with the new values. Otherwise insert new record
IF #REFCOUNTER <> '' AND #REFCOUNTER IS NOT NULL
BEGIN
BEGIN
WITH n([COUNTER], REFCOUNTER) AS
(
SELECT
cnt.[COUNTER],
cnt.REFCOUNTER
FROM
TBL_COUNTER cnt
WHERE
cnt.[COUNTER] = #REFCOUNTER
UNION ALL
SELECT
nplus1.[COUNTER],
nplus1.REFCOUNTER
FROM
TBL_COUNTER as nplus1,
n
WHERE
n.[COUNTER] = nplus1.REFCOUNTER
)
SELECT #orig_counter = [COUNTER] FROM n WHERE REFCOUNTER = '' OR REFCOUNTER IS NULL
END
BEGIN
UPDATE TEMP
SET
[REFCOUNTER] = #REFCOUNTER
,[PERNR] = #PERNR
,[WORKDATE] = #WORKDATE
,[CATSHOURS] = #CATSHOURS
,[APDAT] = #APDAT
,[LAETM] = #LAETM
,[CATS_STATUS] = #CATS_STATUS
,[APPR_STATUS] = #APPR_STATUS
WHERE [COUNTER] = #orig_counter
END
END
ELSE
BEGIN
INSERT INTO TEMP
([COUNTER]
,[REFCOUNTER]
,[PERNR]
,[WORKDATE]
,[CATSHOURS]
,[APDAT]
,[LAETM]
,[CATS_STATUS]
,[APPR_STATUS])
VALUES
(#COUNTER
,#REFCOUNTER
,#PERNR
,#WORKDATE
,#CATSHOURS
,#APDAT
,#LAETM
,#CATS_STATUS
,#APPR_STATUS)
END
END
FETCH NEXT FROM curs INTO
#COUNTER,
#REFCOUNTER,
#PERNR,
#WORKDATE,
#CATSHOURS,
#APDAT,
#LAETM,
#CATS_STATUS,
#APPR_STATUS
END
END
END
BEGIN
CLOSE curs
DEALLOCATE curs
END
I shortened it and created the tables for you all to be able to see what is going on. The expected result is
+--------------+--------------+----------+----------+-----------+----------+--------+-------------+-------------+
| COUNTER | REFCOUNTER | PERNR | WORKDATE | CATSHOURS | APDAT | LAETM | CATS_STATUS | APPR_STATUS |
+--------------+--------------+----------+----------+-----------+----------+--------+-------------+-------------+
| 000421692670 | NULL | 00000071 | 20190114 | 6.00 | 20190204 | 174541 | 30 | 30 |
| 000421692671 | NULL | 00000071 | 20190114 | 3.00 | 20190204 | 174541 | 30 | 30 |
| 000421692672 | 000421692672 | 00000071 | 20190115 | 0.00 | 20190115 | 111007 | 30 | 30 |
| 000421692673 | 000421692673 | 00000071 | 20190115 | 0.00 | 20190115 | 111007 | 30 | 30 |
| 000429718015 | 000430154142 | 00000072 | 20190313 | 4.50 | 20190315 | 164659 | 30 | 30 |
| 000429718016 | NULL | 00000072 | 20190313 | 1.50 | 20190315 | 164659 | 30 | 30 |
| 000429718017 | NULL | 00000072 | 20190313 | 1.0 | 20190315 | 164659 | 30 | 30 |
| 000430154143 | 000430154143 | 00000072 | 20190313 | 2.50 | 20190315 | 164659 | 30 | 30 |
| 000429774620 | 000429774620 | 00000152 | 20190314 | 2.00 | 00000000 | 000000 | 60 | 20 |
| 000429774619 | 000429802105 | 00000152 | 20190314 | 5.00 | 20190315 | 143857 | 30 | 30 |
+--------------+--------------+----------+----------+-----------+----------+--------+-------------+-------------+
I need to add to this. So there is two phases to this. The first phase is I will pull all the data from 2019 for an initial load of my table. Then on a weekly basis, I will pull the data from the origin source for new records and changed records from the last time i ran it. So I will not have the full chain every week. There needs to be a way to get back to the original counter value, without the full dataset, which is why i had the counter table. I apologize for not being more clear. I am swamped with work and havent been able to focus on this as much as I planned. I am trying all these different techniques.
I believe, following query would help you to start with and it's much efficient way to approach you goal.
It was created to maintain historical info of SQL Servers in central location, and performs following activities, you have to include/replace your table structures in respective blocks of script
Creates temp table
Collects the information from multiple servers using OPENQUERY via Lined Servers (source) and loads into Temp Table.
Creates Indexes on Temp tables
Loads the data into Central Table (destination) with 3 scenarios (as commented in script)
Note: Replaced the script as per your scenario
BEGIN
Create Table #SrcTemp
( AENAM nvarchar(12),
AUTYP nvarchar(2),
AWART nvarchar(4),
BELNR nvarchar(10),
CATSHOURS decimal(7, 3),
CATSQUANTITY decimal(18, 3),
CHARGE_HOLD nvarchar(24),
[COUNTER] nvarchar(12),
ERNAM nvarchar(12),
ERSDA nvarchar(8),
ERSTM nvarchar(6),
HRCOSTASG nvarchar(1),
LAEDA nvarchar(8),
LSTAR nvarchar(6),
LTXA1 nvarchar(40),
MANDT nvarchar(3),
PERNR nvarchar(8),
RAPLZL nvarchar(8),
RAUFPL nvarchar(10),
REFCOUNTER nvarchar(12),
RNPLNR nvarchar(12),
SKOSTL nvarchar(10),
CATS_STATUS nvarchar(2),
SUPP3 nvarchar(10),
WORKDATE nvarchar(8),
ZZOH_ORDER nvarchar(24),
APDAT nvarchar(8),
APNAM nvarchar(12),
LAETM nvarchar(6),
APPR_STATUS nvarchar(2)
);
-- DECLARE #orig_counter nvarchar(12)
END
UPDATE #SrcTemp SET REFCOUNTER = '0' WHERE REFCOUNTER = '' or REFCOUNTER is null;
CREATE Clustered Index CLU_SrvTemp on #SrcTemp ([COUNTER], REFCOUNTER);
BEGIN
INSERT INTO #SrcTemp
SELECT
AENAM,AUTYP,AWART,BELNR,CATSHOURS,CATSQUANTITY,CHARGE_HOLD,[COUNTER],ERNAM,ERSDA,ERSTM,HRCOSTASG,LAEDA,LSTAR,LTXA1,MANDT,
PERNR,RAPLZL,RAUFPL,REFCOUNTER,RNPLNR,SKOSTL,CATS_STATUS,SUPP3,WORKDATE,ZZOH_ORDER,APDAT,APNAM,LAETM,APPR_STATUS
FROM
CATSDB;
END
--BEGIN
-- OPEN curs
--END
-- Scope: UNCHANGED Records ==================================================================================================================================
IF EXISTS
(select *
from (
SELECT ROW_NUMBER () OVER (PARTITION BY [COUNTER] ORDER BY COUNTER) AS RN
FROM #SrcTemp
WHERE REFCOUNTER = '0'
) as t where t.RN > 1
)
BEGIN
RAISERROR ('Primary key violation occurred in "UNCHANGED" records processing block', 16, 1) with NOWAIT;
END
ELSE
BEGIN
-- When NON-CHANGED Records NOT Existed in SQL table -------------------------------------------
BEGIN
INSERT INTO TEMP ([AENAM],[AUTYP],[AWART],[BELNR],[CATSHOURS],[CATSQUANTITY],[CHARGE_HOLD],[COUNTER],[ERNAM]
,[ERSDA],[ERSTM],[HRCOSTASG],[LAEDA],[LSTAR],[LTXA1],[MANDT],[PERNR],[RAPLZL],[RAUFPL]
,[REFCOUNTER],[RNPLNR],[SKOSTL],[CATS_STATUS],[SUPP3],[WORKDATE],[ZZOH_ORDER],[APDAT],[APNAM]
,[LAETM],[APPR_STATUS]
)
SELECT s.[AENAM], s.[AUTYP], s.[AWART], s.[BELNR], s.[CATSHOURS], s.[CATSQUANTITY], s.[CHARGE_HOLD], s.[COUNTER], s.[ERNAM]
, s.[ERSDA], s.[ERSTM], s.[HRCOSTASG], s.[LAEDA], s.[LSTAR], s.[LTXA1], s.[MANDT], s.[PERNR], s.[RAPLZL], s.[RAUFPL]
, s.[REFCOUNTER], s.[RNPLNR], s.[SKOSTL], s.[CATS_STATUS], s.[SUPP3], s.[WORKDATE], s.[ZZOH_ORDER], s.[APDAT], s.[APNAM]
, s.[LAETM], s.[APPR_STATUS]
FROM #SrcTemp as S
LEFT JOIN
TEMP as D on s.COUNTER = d.COUNTER
WHERE (S.REFCOUNTER = '0') and D.COUNTER is null ;
END
-- When NON-CHANGED Records Existed in SQL table -------------------------------------------
BEGIN
UPDATE S
SET [AENAM] = D.AENAM
,[AUTYP] = D.AUTYP
,[AWART] = D.AWART
,[BELNR] = D.BELNR
,[CATSHOURS] = D.CATSHOURS
,[CATSQUANTITY] = D.CATSQUANTITY
,[CHARGE_HOLD] = D.CHARGE_HOLD
,[ERNAM] = D.ERNAM
,[ERSDA] = D.ERSDA
,[ERSTM] = D.ERSTM
,[HRCOSTASG] = D.HRCOSTASG
,[LAEDA] = D.LAEDA
,[LSTAR] = D.LSTAR
,[LTXA1] = D.LTXA1
,[MANDT] = D.MANDT
,[PERNR] = D.PERNR
,[RAPLZL] = D.RAPLZL
,[RAUFPL] = D.RAUFPL
,[REFCOUNTER] = D.REFCOUNTER
,[RNPLNR] = D.RNPLNR
,[SKOSTL] = D.SKOSTL
,[CATS_STATUS] = D.CATS_STATUS
,[SUPP3] = D.SUPP3
,[WORKDATE] = D.WORKDATE
,[ZZOH_ORDER] = D.ZZOH_ORDER
,[APDAT] = D.APDAT
,[APNAM] = D.APNAM
,[LAETM] = D.LAETM
,[APPR_STATUS] = D.APPR_STATUS
FROM #SrcTemp as S
LEFT JOIN
TEMP as D on (s.COUNTER = d.COUNTER and S.REFCOUNTER = D.REFCOUNTER)
WHERE (S.REFCOUNTER = '0') and D.COUNTER is NOT null
END
END
-- Scope: CHANGED Records ==================================================================================================================================
IF EXISTS
(select *
from (
SELECT ROW_NUMBER () OVER (PARTITION BY [COUNTER], REFCOUNTER ORDER BY [COUNTER]) AS RN
FROM #SrcTemp
WHERE not REFCOUNTER = '0'
) as t where t.RN > 1
)
BEGIN
RAISERROR ('Primary key violation occurred in "CHANGED" records processing block', 10, 1) with NOWAIT;
END
ELSE
BEGIN
-- When CHANGED Records NOT Existed in SQL table -------------------------------------------
BEGIN
INSERT INTO TEMP ([AENAM],[AUTYP],[AWART],[BELNR],[CATSHOURS],[CATSQUANTITY],[CHARGE_HOLD],[COUNTER],[ERNAM]
,[ERSDA],[ERSTM],[HRCOSTASG],[LAEDA],[LSTAR],[LTXA1],[MANDT],[PERNR],[RAPLZL],[RAUFPL]
,[REFCOUNTER],[RNPLNR],[SKOSTL],[CATS_STATUS],[SUPP3],[WORKDATE],[ZZOH_ORDER],[APDAT],[APNAM]
,[LAETM],[APPR_STATUS]
)
SELECT s.[AENAM], s.[AUTYP], s.[AWART], s.[BELNR], s.[CATSHOURS], s.[CATSQUANTITY], s.[CHARGE_HOLD], s.[COUNTER], s.[ERNAM]
, s.[ERSDA], s.[ERSTM], s.[HRCOSTASG], s.[LAEDA], s.[LSTAR], s.[LTXA1], s.[MANDT], s.[PERNR], s.[RAPLZL], s.[RAUFPL]
, s.[REFCOUNTER], s.[RNPLNR], s.[SKOSTL], s.[CATS_STATUS], s.[SUPP3], s.[WORKDATE], s.[ZZOH_ORDER], s.[APDAT], s.[APNAM]
, s.[LAETM], s.[APPR_STATUS]
FROM #SrcTemp as S
LEFT JOIN
TEMP as D on s.COUNTER = d.COUNTER and S.REFCOUNTER = D.REFCOUNTER
WHERE (not S.REFCOUNTER = '0') and D.COUNTER is null
END
-- When NON-CHANGED Records Existed in SQL table -------------------------------------------
BEGIN
UPDATE S
SET [AENAM] = D.AENAM
,[AUTYP] = D.AUTYP
,[AWART] = D.AWART
,[BELNR] = D.BELNR
,[CATSHOURS] = D.CATSHOURS
,[CATSQUANTITY] = D.CATSQUANTITY
,[CHARGE_HOLD] = D.CHARGE_HOLD
,[ERNAM] = D.ERNAM
,[ERSDA] = D.ERSDA
,[ERSTM] = D.ERSTM
,[HRCOSTASG] = D.HRCOSTASG
,[LAEDA] = D.LAEDA
,[LSTAR] = D.LSTAR
,[LTXA1] = D.LTXA1
,[MANDT] = D.MANDT
,[PERNR] = D.PERNR
,[RAPLZL] = D.RAPLZL
,[RAUFPL] = D.RAUFPL
,[REFCOUNTER] = D.REFCOUNTER
,[RNPLNR] = D.RNPLNR
,[SKOSTL] = D.SKOSTL
,[CATS_STATUS] = D.CATS_STATUS
,[SUPP3] = D.SUPP3
,[WORKDATE] = D.WORKDATE
,[ZZOH_ORDER] = D.ZZOH_ORDER
,[APDAT] = D.APDAT
,[APNAM] = D.APNAM
,[LAETM] = D.LAETM
,[APPR_STATUS] = D.APPR_STATUS
FROM #SrcTemp as S
LEFT JOIN
TEMP as D on s.COUNTER = d.COUNTER and S.REFCOUNTER = D.REFCOUNTER
WHERE (not S.REFCOUNTER = '0' ) and D.COUNTER is NOT null
END
END
Drop table #SrcTemp;
It looks like it can be done with a simple recursive query. Having suitable index is also important.
Sample data
This is how your sample data should look like in the question. Only few relevant columns.
It would be better to include several sets/chains of changes, not just one. Having only this sample data would make it harder for you to verify if presented solutions are correct.
+-----------+---------------------+-----------+------------+
| BELNR | CHARGE_HOLD | COUNTER | REFCOUNTER |
+-----------+---------------------+-----------+------------+
| 417548605 | T4-GS023ABC2 0150#* | 420202428 | NULL |
| 417549506 | T4-GS023-ABC2 | 420203329 | 420202428 |
| 417553156 | JGS023001 0010#* | 420206979 | 420203329 |
| 417557221 | T4-GS023-ABC2 | 420211044 | 420206979 |
| 417581675 | JGS023001 0010#* | 420235498 | 420211044 |
| 417677969 | JGS023001 0010#* | 420331792 | 420235498 |
+-----------+---------------------+-----------+------------+
The main recursive part of the query
WITH
CTE
AS
(
SELECT
1 AS Lvl,
CATSDB.BELNR AS OriginalBELNR,
CATSDB.CHARGE_HOLD AS OriginalCHARGE_HOLD,
CATSDB.[COUNTER] AS OriginalCOUNTER,
CATSDB.REFCOUNTER AS OrginalREFCOUNTER,
CATSDB.BELNR AS NewBELNR,
CATSDB.CHARGE_HOLD AS NewCHARGE_HOLD,
CATSDB.[COUNTER] AS NewCOUNTER,
CATSDB.REFCOUNTER AS NewREFCOUNTER
FROM
CATSDB
WHERE
REFCOUNTER IS NULL
UNION ALL
SELECT
CTE.Lvl + 1 AS Lvl,
CTE.OriginalBELNR,
CTE.OriginalCHARGE_HOLD,
CTE.OriginalCOUNTER,
CTE.OrginalREFCOUNTER,
CATSDB.BELNR AS NewBELNR,
CATSDB.CHARGE_HOLD AS NewCHARGE_HOLD,
CATSDB.[COUNTER] AS NewCOUNTER,
CATSDB.REFCOUNTER AS NewREFCOUNTER
FROM
CATSDB
INNER JOIN CTE ON CATSDB.REFCOUNTER = CTE.NewCOUNTER
)
SELECT * FROM CTE;
Intermediate result
+-----+---------------+---------------------+-----------------+-------------------+-----------+---------------------+------------+---------------+
| Lvl | OriginalBELNR | OriginalCHARGE_HOLD | OriginalCOUNTER | OrginalREFCOUNTER | NewBELNR | NewCHARGE_HOLD | NewCOUNTER | NewREFCOUNTER |
+-----+---------------+---------------------+-----------------+-------------------+-----------+---------------------+------------+---------------+
| 1 | 417548605 | T4-GS023ABC2 0150#* | 420202428 | NULL | 417548605 | T4-GS023ABC2 0150#* | 420202428 | NULL |
| 2 | 417548605 | T4-GS023ABC2 0150#* | 420202428 | NULL | 417549506 | T4-GS023-ABC2 | 420203329 | 420202428 |
| 3 | 417548605 | T4-GS023ABC2 0150#* | 420202428 | NULL | 417553156 | JGS023001 0010#* | 420206979 | 420203329 |
| 4 | 417548605 | T4-GS023ABC2 0150#* | 420202428 | NULL | 417557221 | T4-GS023-ABC2 | 420211044 | 420206979 |
| 5 | 417548605 | T4-GS023ABC2 0150#* | 420202428 | NULL | 417581675 | JGS023001 0010#* | 420235498 | 420211044 |
| 6 | 417548605 | T4-GS023ABC2 0150#* | 420202428 | NULL | 417677969 | JGS023001 0010#* | 420331792 | 420235498 |
+-----+---------------+---------------------+-----------------+-------------------+-----------+---------------------+------------+---------------+
You can see that we've taken the starting row of the chain (where RefCounter is NULL) and carried it over the whole chain of changes.
Now we just need to pick the rows with the last change, i.e. with the largest Lvl for each starting row. One way to do it is to use ROW_NUMBER function with suitable partitioning.
Final query
WITH
CTE
AS
(
SELECT
1 AS Lvl,
CATSDB.BELNR AS OriginalBELNR,
CATSDB.CHARGE_HOLD AS OriginalCHARGE_HOLD,
CATSDB.[COUNTER] AS OriginalCOUNTER,
CATSDB.REFCOUNTER AS OrginalREFCOUNTER,
CATSDB.BELNR AS NewBELNR,
CATSDB.CHARGE_HOLD AS NewCHARGE_HOLD,
CATSDB.[COUNTER] AS NewCOUNTER,
CATSDB.REFCOUNTER AS NewREFCOUNTER
FROM
CATSDB
WHERE
REFCOUNTER IS NULL
UNION ALL
SELECT
CTE.Lvl + 1 AS Lvl,
CTE.OriginalBELNR,
CTE.OriginalCHARGE_HOLD,
CTE.OriginalCOUNTER,
CTE.OrginalREFCOUNTER,
CATSDB.BELNR AS NewBELNR,
CATSDB.CHARGE_HOLD AS NewCHARGE_HOLD,
CATSDB.[COUNTER] AS NewCOUNTER,
CATSDB.REFCOUNTER AS NewREFCOUNTER
FROM
CATSDB
INNER JOIN CTE ON CATSDB.REFCOUNTER = CTE.NewCOUNTER
)
,CTE_rn
AS
(
SELECT
*
,ROW_NUMBER() OVER (PARTITION BY OriginalCOUNTER ORDER BY Lvl DESC) AS rn
FROM CTE
)
SELECT *
FROM CTE_rn
WHERE rn = 1
--OPTION (MAXRECURSION 0)
;
If you can have a chain longer than 100 you should add OPTION (MAXRECURSION 0) to the query, because by default SQL Server limits recursion depth to 100.
Result
+-----+---------------+---------------------+-----------------+-------------------+-----------+---------------------+------------+---------------+----+
| Lvl | OriginalBELNR | OriginalCHARGE_HOLD | OriginalCOUNTER | OrginalREFCOUNTER | NewBELNR | NewCHARGE_HOLD | NewCOUNTER | NewREFCOUNTER | rn |
+-----+---------------+---------------------+-----------------+-------------------+-----------+---------------------+------------+---------------+----+
| 6 | 417548605 | T4-GS023ABC2 0150#* | 420202428 | NULL | 417677969 | JGS023001 0010#* | 420331792 | 420235498 | 1 |
+-----+---------------+---------------------+-----------------+-------------------+-----------+---------------------+------------+---------------+----+
Efficiency
To make it work efficiently we need to have an index on REFCOUNTER column. Also, the query assumes that REFCOUNTER is NULL, not ''. If you have a mix of NULLs and empty strings, unify your data, otherwise an index would not be useful. This index is the minimum what you need to have.
Ideally, you should have a CLUSTERED index on REFCOUNTER column, because the query always selects all columns from the table.
CREATE CLUSTERED INDEX [IX_RefCounter] ON [dbo].[CATSDB]
(
[REFCOUNTER] ASC
)
If you can't change the indexes of your original table, I would recommend to copy all millions of rows into a temp table and create this clustered index for that temp table.
I got a pretty good plan with this clustered index.
Few things you can do to improve performance:
Convert COUNTER and REFCOUNTER to datatype int from nvarchar, operations on int are much faster than characters.
Do not use a cursors, you can still process one record at at time using a while loop.
DECLARE #CCOUNTER int = 0
WHILE (1 = 1)
BEGIN
/* SELECT #COUNTER = MIN(COUNTER) > #COUNTER FROM CATSDB */
/* IF ##ROWCOUNT != 1 THEN BREAK OUT OF THE WHILE LOOP, WE ARE DONE */
/* SELECT RECORD FOR THIS #COUNTER FROM CATSDB */
/* DO THE PROCESSING FOR THIS RECORD */
END
There is a method called sql Bulk copy i don't it will help in your problem but give it a try.
The most performant way to do this is through BCP. https://learn.microsoft.com/en-us/sql/tools/bcp-utility?view=sql-server-2017.
You can BCP all of the data into a staging table in SQL Server and then run your inserts and updates. Also when checking for non-existence of a record to determine whether this is an insert or an update "IF NOT EXISTS (SELECT * FROM TEMP WHERE [COUNTER] = #COUNTER)" is very expensive.
Example of a more performant way to do this:
(Table names TBL_SOURCE, TBL_DESTINATION, #TBL_UPDATES, and #TBL_INSERTS)
SELECT * into #TBL_INSERTS
FROM TBL_SOURCE S
left outer join TBL_DESTINATION D on S.COUNTER=D.COUNTER
WHERE D.Counter is null
SELECT * into #TBL_UPDATES
FROM TBL_SOURCE S
left outer join TBL_DESTINATION D on S.COUNTER=D.COUNTER
WHERE D.Counter is not null
Updates will be captured in #tbl_updates and inserts in #tbl_inserts
See based few sample data and given output, our script cannot be 100% OK and optimized ,where millions of data to updated is concern.
I have confidence in my script that it can be improve in that direction,after fully understanding the requirement .
First of all I wonder why data type are nvarchar,if possible make it to varchar,int,datetime .
If you can change data type then it will do wonder to the performance.
Also there is no identity column which should be Clustered Index.
This two point matter from performance point of view.
So in my example,
CREATE TABLE CATSDB
(
id int identity ,
[COUNTER] nvarchar(12),
REFCOUNTER nvarchar(12),
PERNR nvarchar(8),
WORKDATE nvarchar(8),
CATSHOURS decimal(7, 3),
APDAT nvarchar(8),
LAETM nvarchar(6),
CATS_STATUS nvarchar(2),
APPR_STATUS nvarchar(2)
)
ALTER TABLE CATSDB
ADD CONSTRAINT PK_CATSDB_ID PRIMARY KEY CLUSTERED(ID)
CREATE NONCLUSTERED INDEX FICATSDB_REFCOUNTER ON CATSDB(REFCOUNTER,[COUNTER]);
IF OBJECT_ID('tempdb..#TEMP', 'U') IS NOT NULL
DROP TABLE #TEMP;
CREATE TABLE #TEMP
(UpdateID INT,
FINDID INT
PRIMARY KEY,
[COUNTER] [NVARCHAR](12) NOT NULL,
[REFCOUNTER] [NVARCHAR](12) NULL,
[PERNR] [NVARCHAR](8) NULL,
[WORKDATE] [NVARCHAR](8) NULL,
[CATSHOURS] [DECIMAL](7, 3) NULL,
[APDAT] [NVARCHAR](8) NULL,
[LAETM] [NVARCHAR](6) NULL,
[CATS_STATUS] [NVARCHAR](2) NULL,
[APPR_STATUS] [NVARCHAR](2) NULL
);
WITH CTE
AS (SELECT a.id,
a.[COUNTER],
a.REFCOUNTER,
a.id AS Findid
FROM dbo.CATSDB A
UNION ALL
SELECT b.id,
a.[COUNTER],
a.[refCOUNTER],
a.id
FROM dbo.CATSDB A
INNER JOIN CTE b ON(a.REFCOUNTER = b.[COUNTER])
WHERE a.id >= b.Findid),
CTE1
AS (SELECT id,
MAX(Findid) Findid
FROM CTE
GROUP BY id)
INSERT INTO #TEMP
(UpdateID,
FINDID,
[COUNTER],
[REFCOUNTER],
[PERNR],
[WORKDATE],
[CATSHOURS],
[APDAT],
[LAETM],
[CATS_STATUS],
[APPR_STATUS]
)
SELECT c1.ID,
c1.FINDID,
a.COUNTER,
a.REFCOUNTER,
a.PERNR,
a.WORKDATE,
a.CATSHOURS,
a.APDAT,
a.LAETM,
a.CATS_STATUS,
a.APPR_STATUS
FROM dbo.CATSDB A
INNER JOIN CTE1 c1 ON a.id = c1.Findid;
BEGIN TRY
BEGIN TRAN;
UPDATE A
SET
[REFCOUNTER] = b.REFCOUNTER,
[PERNR] = b.PERNR,
[WORKDATE] = b.WORKDATE,
[CATSHOURS] = b.CATSHOURS,
[APDAT] = b.APDAT,
[LAETM] = b.LAETM,
[CATS_STATUS] = b.CATS_STATUS,
[APPR_STATUS] = b.APPR_STATUS
FROM CATSDB A
INNER JOIN #TEMP B ON a.id = b.UpdateID;
-- this is only test query
SELECT c1.UpdateID AS UpdateID,
a.*
FROM dbo.CATSDB A
INNER JOIN #TEMP c1 ON a.id = c1.Findid;
IF(##trancount > 0)
ROLLBACK; -- commit
END TRY
BEGIN CATCH
IF(##trancount > 0)
ROLLBACK;
END CATCH;
#Temp should be permanent table.
IMO, your table badly need identity column which should be identity and Clustered Index.
You can try, you can Alter it .
REFCOUNTER,COUNTER should be Non Clustered Index.
After and only after optimizing the query and with proper PLAN above index is going to boost performance.
Proper PLAN : Should you use Recursive or RBAR and update millions of records in one time or Should I Batch update ?
You can first Test the script with millions of row with Rollback.
Related
I am using SQL Server 2014 Standard.
I have the following query...
SELECT ach.amt, ades.dsline, ades.des
FROM ##ACHTrans ach
LEFT OUTER JOIN apvodes ades on 1=1 and ades.vo_id = ach.vo_id
WHERE ades.voline = '100'
ORDER by ach.apnum, ach.cknum, ach.vo_id, ach.amt desc
Which gives me the results...
+------------+---------------+------------------------------+
| ach.amt | ades.dsline | ades.des |
+------------+---------------+------------------------------+
| 1232.50 | 1 | This is the description for |
| 1232.50 | 2 | The $1,232.50 ACH Amount |
| 245.18 | 1 | This one is for the $245.18 |
| 245.18 | 2 | transactions details |
| 245.18 | 3 | that has four lines of info |
| 245.18 | 4 | in the description. |
| 79.25 | 1 | This $79.25 item has 1 line. |
| 15.00 | 1 | So does this $15.00 one. |
+------------+---------------+------------------------------+
I need a way to snag this info by the ach.amt line, and concatenate the ades.des info for results similar to:
+------------+--------------------------------------------------------------------------------------------------+
| Amount | Description |
+------------+--------------------------------------------------------------------------------------------------+
| 1232.50 | This is the description for The $1,232.50 ACH Amount |
| 245.18 | This one is for the $245.18 transactions details that has four lines of info in the description. |
| 79.25 | This $79.25 item has 1 line. |
| 15.00 | So does this $15.00 one. |
+------------+--------------------------------------------------------------------------------------------------+
This is what string_agg() does:
select ach.amt,
string_agg(des, ',') within group (order by dsline)
from t
group by ach.amt;
Without STRING_AGG you would use for XML PATH like so:
DECLARE #table TABLE (amt MONEY, dsline INT, [des] VARCHAR(1000));
INSERT #table VALUES
(1232.50,1,'This is the description for'),
(1232.50,2,'The $1,232.50 ACH Amount'),
( 245.18,1,'This one is for the $245.18'),
( 245.18,2,'transactions details'),
( 245.18,3,'that has four lines of info'),
( 245.18,4,'in the description.'),
( 79.25,1,'This $79.25 item has 1 line.'),
( 15.00,1,'So does this $15.00 one.');
SELECT
amt,
[Description] =
(
SELECT t2.[des]+''
FROM #table AS t2
WHERE t.amt = t2.amt
ORDER BY t2.dsline
FOR XML PATH('')
)
-- string_agg(des, ',') within group (order by dsline)
FROM #table AS t
GROUP BY amt;
Results:
amt Description
--------------------- ---------------------------------------------------------------------------------------------
15.00 So does this $15.00 one.
79.25 This $79.25 item has 1 line.
245.18 This one is for the $245.18transactions detailsthat has four lines of infoin the description.
1232.50 This is the description forThe $1,232.50 ACH Amount
This may not be the prettiest solution but I have had to deal with something similar and used a cursor to concatenate my strings in a temporary table and then used that in my final join statement back to the original table. I used table variables so you can play with it yourself.
Following is a code example you can play with:
declare #tableAmt table (
IDNum int,
Amt Money
)
declare #tableDesc table (
IDNum int,
LineNum int,
Info varchar(10)
)
set nocount on
insert #tableAmt (IDNum, Amt)
values (1,100.00),
(2,125.00)
insert #tableDesc (IDNum, LineNum, Info)
values (1,1,'some text'),
(1,2,'more text'),
(2,1,'different'),
(2,2,'text'),
(2,3,'final')
declare #description table
(IDNum int,
ConcatDesc varchar(30)
)
declare #id int,
#oldid int,
#string char(10),
#finalstring varchar(30)
declare getdata_cursor cursor for
select IDNum, Info
from #tableDesc
order by IDNum, LineNum
open getdata_cursor
fetch next from getdata_cursor into
#id, #string
while ##FETCH_STATUS=0
begin
if #oldid <> #id
begin
insert #description(IDNum, ConcatDesc)
values(#oldid, #finalstring)
select #finalstring = ''
end
select #finalstring = isnull(#finalstring,'') + rtrim(#string) + ' '
select #string = '', #oldid = #id
fetch next from getdata_cursor into
#id, #string
end
insert #description(IDNum, ConcatDesc)
values(#oldid, #finalstring)
close getdata_cursor
deallocate getdata_cursor
select ta.IDNum, Amt, ConcatDesc from #tableAmt ta join #description d
on ta.IDNum = d.IDNum
I amazed myself with this MERGE statement, the company isn't truly doing a Type 2 Slowing Changing Dimension but close. Oddly it's not even analytical data but let's ignore that horrendous decision. I have this working referencing HashBytes to indicated changed rows. Unfortunately, to get all scenarios addressed I ended up with that additional INSERT at then end from the temp table which actually holds the updated rows.
Alas it's functional but if you have a more effective design, please do share. I would appreciate it.
However, I am attempting to get a row count representing not only for the INSERT from the Temp table, but the updates AND the new INSERTS, all are distinct separate actions with their own row count, that I need to document and account for.
How can I do this, please ?
DECLARE #dtNow AS DATETIME = GetDate()
DECLARE #dtPast AS DATETIME = DATEADD(day,-1,GetDate())
DECLARE #dtFuture AS DATETIME = '22991231'
SET NOCOUNT ON;
-- Temp Table is JUST Updating Rows reflecting
--Historical Marker on existing row No content change to row's columnar content data
IF OBJECT_ID('tempdb..#TheTempTableName') IS NOT NULL DROP TABLE #TheTempTableName
CREATE TABLE #TheTempTableName
(
ABunchOfColumns
RowCreatedDate datetime NULL,
RowEffectiveDate datetime NULL,
RowTerminationDate datetime NULL,
RowIsCurrent bit NULL,
RowHash varchar(max) NULL,
)
INSERT INTO #TheTempTableName
(
ABunchOfColumns
,RowCreatedDate
,RowEffectiveDate
,RowTerminationDate
,RowIsCurrent
,RowHash
)
SELECT
ABunchOfColumns
,RowCreatedDate
,RowEffectiveDate
,RowTerminationDate
,RowIsCurrent
,RowHash
FROM
(
MERGE tblDim WITH (HOLDLOCK) AS target
USING
(
SELECT
ABunchOfColumns
,RowCreatedDate
,RowEffectiveDate
,RowTerminationDate
,RowIsCurrent
,RowHash
FROM dbo.tblStaging
)
AS source
ON target.PKID = source.PKID
WHEN MATCHED
AND target.RowIsCurrent = 1
AND target.RowHash != source.RowHash
------- PROCESS ONE -- UPDATE --- HISTORICALLY MARK EXISTING ROWS
THEN UPDATE SET
RowEffectiveDate = #dtPast
,RowTerminationDate = #dtPast
,RowIsCurrent = 0
----- PROCESS TWO -- INSERT ---INSERT NEW ROWS
WHEN NOT MATCHED
THEN INSERT --- THIS INSERT Goes directly into Target ( DIM ) Table (New Rows not matched with PK = PK )
(
ABunchOfColumns
,RowCreatedDate
,RowEffectiveDate
,RowTerminationDate
,RowIsCurrent
,RowHash
)
VALUES
(
source.ABunchOfColumns
,#dtNow --source.RowCreatedDate,
,#dtFuture ---source.RowEffectiveDate,
,#dtFuture ---source.RowTerminationDate,
,1 ---source.RowIsCurrent,
,source.RowHash
)
-------PROCESS THREE a -- INSERT ---OUTPUT MATCHED ROWS FROM PROCESS ONE THAT CAUSED HISTORICAL MARK (CHANGES) "INSERT"
OUTPUT
$action Action_Out,
ABunchOfColumns
,RowCreatedDate
,RowEffectiveDate
,RowTerminationDate
,RowIsCurrent
,RowHash
)
AS MERGE_OUT
WHERE MERGE_OUT.Action_Out = 'UPDATE';
----------PROCESS THREE b -- INSERT FROM Temp Tbl to final
--Now we flush the data in the temp table into dim table
INSERT INTO tblDim
(
ABunchOfColumns
,RowCreatedDate
,RowEffectiveDate
,RowTerminationDate
,RowIsCurrent
,RowHash
)
SELECT
ABunchOfColumns
,#dtNow AS RowCreatedDate
,#dtFuture AS RowEffectiveDate
,#dtFuture AS RowTerminationDate
,1 AS RowIsCurrent
,RowHash
FROM #TheTempTableName
END
There are two types of deletes (1) real deletes (2) primary key updates.
So you can also say there are two types of inserts (1) real inserts (2) primary key updates
The updates are always updates.
The dilemma then is when is an insert/delete combination is really an update.
Usually if you dont really care about the one above a simple merge like this is sufficient
MERGE esqlProductTarget T
USING esqlProductSource S
ON (S.ProductID = T.ProductID)
WHEN MATCHED
THEN UPDATE
SET T.Name = S.Name,
T.ProductNumber = S.ProductNumber,
T.Color = S.Color
WHEN NOT MATCHED BY TARGET
THEN INSERT (ProductID, Name, ProductNumber, Color)
VALUES (S.ProductID, S.Name, S.ProductNumber, S.Color)
WHEN NOT MATCHED BY SOURCE
THEN DELETE
OUTPUT S.ProductID, $action into #MergeLog;
SELECT MergeAction, Cnt=count(*)
FROM #MergeLog
GROUP BY MergeAction
The output will be like:
+-------------+-----+--+
| MergeAction | Cnt | |
+-------------+-----+--+
| DELETE | 100 | |
| UPDATE | 60 | |
| INSERT | 70 | |
+-------------+-----+--+
Refer to https://www.essentialsql.com/introduction-merge-statement/
I am not sure why you have "WHERE MERGE_OUT.Action_Out = 'UPDATE'. But if you remove that, then you can get your rowcount. Unless I have misunderstood your query.
Based on your further comments i think the main issue is how you handle the type 2 updates. The quick answer is you need two operations of UPDATE (insert/update); and DELETES are not really DELETES but UPDATES on the timestamp.
I have formulated a sample query below how to handle type2 updates and the results should be self explanatory. I have tried doing a double operation on the UPDATE merge and it is interesting it cannot do it and gives an error: "An action of type 'INSERT' is not allowed in the 'WHEN MATCHED' clause of a MERGE statement." So i think there is no choice but to split the update and insert of the UPDATE statement.
The last consideration is also the DELETE that manifest as an update. I have handled it as well in the code below how to determine when an action of UPDATE is really a DELETE.
DROP TABLE IF EXISTS _a
CREATE TABLE _a (
id int
,val int
,fromdate datetime
,todate datetime
,isactive bit
)
INSERT INTO _a
select 1,100,'2015-Jan-1',NULL,1
UNION ALL select 2,200,'2015-Feb-1',NULL,1
UNION ALL select 3,300,'2015-Mar-1',NULL,1
DROP TABLE IF EXISTS #data
DROP TABLE IF EXISTS #outputdata
select * INTO #data from _a
select TOP 0 action=CAST('' as varchar(10)),* INTO #outputdata from _a
DELETE #data where id = 3
UPDATE #data set val = 2000 where id = 2
INSERT INTO #data
select 4,400,GETDATE(),NULL,1
--select * from #data
-- _a is your data warehouse table using type2
BEGIN TRAN
select Note='OLD STATE OF _a',* from _a
select Note='NEW SET OF DATA',* from #data
MERGE dbo._a T
USING (
select id,val from #data
) S
ON (S.id = T.id)
WHEN MATCHED
AND ((S.val <> T.val OR (S.val IS NOT NULL AND T.val IS NULL) OR (S.val IS NULL AND T.val IS NOT NULL)))
THEN UPDATE SET
todate = GETDATE()
,isactive = 0
WHEN NOT MATCHED BY TARGET
THEN INSERT (id,val,fromdate,todate,isactive)
VALUES (id,val,GETDATE(),NULL,1)
WHEN NOT MATCHED BY SOURCE --AND T.id IN (SELECT id FROM #data)
--THEN DELETE TYPE2
THEN UPDATE SET /*NO-PK*/
todate = GETDATE()
,isactive = 0
OUTPUT $action as Action
,ISNULL(inserted.id,deleted.id) as id
,ISNULL(inserted.val,deleted.val) as val
,ISNULL(inserted.fromdate,deleted.fromdate) as fromdate
,ISNULL(inserted.todate,deleted.todate) as todate
,ISNULL(inserted.isactive,deleted.isactive) as isactive
INTO #outputdata;
select Note='Logs Output',* from #outputdata
-- FIND THE NEW RECORD
INSERT INTO _a (id,val,fromdate,todate,isactive)
SELECT a.id,a.val,GETDATE()+.000001,a.todate,a.isactive
FROM #data a
INNER JOIN #outputdata b
on a.id = b.id
WHERE b.action ='UPDATE'
select Note='NEW STATE OF _a',* from _a
SELECT Note='Real Action',d1.id,action=CASE WHEN action='UPDATE' AND d2.id is null then 'DELETE' ELSE action END
FROM #outputdata d1
LEFT JOIN _a d2
on d1.action ='UPDATE' and d1.id = d2.id and d2.isactive =1
ROLLBACK TRAN
The results will be:
+-----------------+----+-----+-------------------------+--------+----------+
| Note | id | val | fromdate | todate | isactive |
+-----------------+----+-----+-------------------------+--------+----------+
| OLD STATE OF _a | 1 | 100 | 2015-01-01 00:00:00.000 | NULL | 1 |
| OLD STATE OF _a | 2 | 200 | 2015-02-01 00:00:00.000 | NULL | 1 |
| OLD STATE OF _a | 3 | 300 | 2015-03-01 00:00:00.000 | NULL | 1 |
+-----------------+----+-----+-------------------------+--------+----------+
+-----------------+----+------+-------------------------+--------+----------+
| Note | id | val | fromdate | todate | isactive |
+-----------------+----+------+-------------------------+--------+----------+
| NEW SET OF DATA | 1 | 100 | 2015-01-01 00:00:00.000 | NULL | 1 |
| NEW SET OF DATA | 2 | 2000 | 2015-02-01 00:00:00.000 | NULL | 1 |
| NEW SET OF DATA | 4 | 400 | 2019-01-31 09:49:45.943 | NULL | 1 |
+-----------------+----+------+-------------------------+--------+----------+
+-------------+--------+----+-----+-------------------------+-------------------------+----------+
| Note | action | id | val | fromdate | todate | isactive |
+-------------+--------+----+-----+-------------------------+-------------------------+----------+
| Logs Output | INSERT | 4 | 400 | 2019-01-31 09:51:13.647 | NULL | 1 |
| Logs Output | UPDATE | 2 | 200 | 2015-02-01 00:00:00.000 | 2019-01-31 09:51:13.647 | 0 |
| Logs Output | UPDATE | 3 | 300 | 2015-03-01 00:00:00.000 | 2019-01-31 09:51:13.647 | 0 |
+-------------+--------+----+-----+-------------------------+-------------------------+----------+
-- OPERATIONS 1 INSERT 1 UPDATE 1 DELETE
DELETE #data where id = 3
UPDATE #data set val = 2000 where id = 2
INSERT INTO #data
select 4,400,GETDATE(),NULL,1
+-----------------+----+------+-------------------------+-------------------------+----------+
| Note | id | val | fromdate | todate | isactive |
+-----------------+----+------+-------------------------+-------------------------+----------+
| NEW STATE OF _a | 1 | 100 | 2015-01-01 00:00:00.000 | NULL | 1 |
| NEW STATE OF _a | 2 | 200 | 2015-02-01 00:00:00.000 | 2019-01-31 09:51:13.647 | 0 |
| NEW STATE OF _a | 3 | 300 | 2015-03-01 00:00:00.000 | 2019-01-31 09:51:13.647 | 0 |
| NEW STATE OF _a | 4 | 400 | 2019-01-31 09:51:13.647 | NULL | 1 |
| NEW STATE OF _a | 2 | 2000 | 2019-01-31 09:51:13.733 | NULL | 1 |
+-----------------+----+------+-------------------------+-------------------------+----------+
+-------------+----+--------+
| Note | id | action |
+-------------+----+--------+
| Real Action | 4 | INSERT |
| Real Action | 2 | UPDATE |
| Real Action | 3 | DELETE |
+-------------+----+--------+
I have a table that looks like this with repeating rows of 3 and 3...
Column1 | Column2
CustomerID | 22
CustomerName | ”ABC”
Responsible | ”Allan”
CustomerID | 23
CustomerName | ”DEF”
Responsible | ”Jessica”
CustomerID | 24
CustomerName | ”GHI”
Responsible | ”Paul”
The following script can be used to create the table and populate it with sample data...
CREATE TABLE Responsible
( [ RowType ] VARCHAR(12),
[ Value ] VARCHAR(9) )
;
INSERT INTO Responsible
( [RowType],
[ Value ] )
VALUES
( 'CustomerID',
'22' ),
( 'CustomerName',
'ABC'),
( 'Responsible',
'Allan' ),
( 'CustomerID',
'23' ),
( 'CustomerName',
'DEF' ),
( 'Responsible',
'Jessica' ),
( 'CustomerID',
'24' ),
( 'CustomerName',
'GHI' ),
( 'Responsible',
'Paul' );
And I would like to get it like a table that looks like this:
CustomerID | CustomerName | Responsible
22 | ABC | Allan
23 | DEF | Jessica
24 | GHI | Paul
What is the best way forward?
I got it to work like this in SQL Server. I don't see any other option, but to use a cursor to go down one row at a time. The script below works only in your unique situation.
Create the new table
USE [YOURDATABASE NAME GOES HERE]
GO
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE TABLE [dbo].[Table_2](
[CustomerID] [int] NULL,
[CustomerName] [varchar](50) NULL,
[Responsible] [varchar](50) NULL
) ON [PRIMARY]
GO
Insert Data Into Table
DECLARE #CustomerId INT
DECLARE #CustomerName VARCHAR(50)
DECLARE #Responsible VARCHAR(50)
DECLARE myCursor CURSOR
FOR SELECT Column2 FROM Table_1;
OPEN myCursor;
FETCH NEXT FROM myCursor
INTO #CustomerId;
FETCH NEXT FROM myCursor
INTO #CustomerName;
FETCH NEXT FROM myCursor
INTO #Responsible;
WHILE ##FETCH_STATUS = 0
BEGIN
INSERT INTO [dbo].[Table_2](CustomerID,CustomerName,Responsible)
VALUES (#CustomerId,#CustomerName,#Responsible)
FETCH NEXT FROM myCursor
INTO #CustomerId;
FETCH NEXT FROM myCursor
INTO #CustomerName;
FETCH NEXT FROM myCursor
INTO #Responsible;
END
CLOSE myCursor;
DEALLOCATE myCursor;
GO
edit, SQL server version :
Query 10:
select CustomerID, CustomerName, Responsible
from (
select row_number() over(order by k) as id , v as CustomerID
from t1
where k ='CustomerID') tt1
inner join (
select row_number() over(order by k) as id , v as CustomerName
from t1
where k ='CustomerName') tt2
on tt1.id = tt2.id
inner join (
select row_number() over(order by k) as id , v as Responsible
from t1
where k ='Responsible') tt3
on tt1.id = tt3.id
Results:
| CustomerID | CustomerName | Responsible |
|------------|--------------|-------------|
| 22 | ABC | Allan |
| 23 | DEF | Jessica |
| 24 | GHI | Paul |
This is what you want I think ?
SQL Fiddle
MySQL 5.6 Schema Setup:
CREATE TABLE t1
(`k` varchar(12), `v` varchar(9))
;
INSERT INTO t1
(`k`, `v`)
VALUES
('CustomerID', '22'),
('CustomerName', 'ABC'),
('Responsible', 'Allan'),
('CustomerID', '23'),
('CustomerName', 'DEF'),
('Responsible', 'Jessica'),
('CustomerID', '24'),
('CustomerName', 'GHI'),
('Responsible', 'Paul')
;
Query 1:
set #v1 = 0, #v2 = 0, #v3 = 0
Query 2:
select CustomerID, CustomerName, Responsible
from (
select #v1:= #v1+1 as id , v as CustomerID
from t1
where k ='CustomerID'
) tt1
inner join (
select #v2:= #v2+1 as id , v as CustomerName
from t1
where k ='CustomerName'
) tt2
on tt1.id = tt2.id
inner join (
select #v3:= #v3+1 as id , v as Responsible
from t1
where k ='Responsible'
) tt3
on tt1.id = tt3.id;
Results:
| CustomerID | CustomerName | Responsible |
|------------|--------------|-------------|
| 22 | ABC | Allan |
| 23 | DEF | Jessica |
| 24 | GHI | Paul |
So, I have an SQL table called "bom".
ParentPartId
ChildPartId
Imagine I have table full of data such as (unknown amount of sub-levels):
| PARENTPARTID | CHILDPARTID |
+---------------+--------------+
| NK-FS-DR-62DR | 2001020060 |
| 2001020060 | 2002014018 |
| 2002014018 | 120011 |
| NK-WH-DR-3MH | 2001916023 |
Basically I want to select "NK-FS-DR-62DR" which would select all the child parts below it so it would look like
| PARENTPARTID | CHILDPARTID |
+---------------+--------------+
| NK-FS-DR-62DR | 2001020060 |
| 2001020060 | 2002014018 |
| 2002014018 | 120011 |
boms can go unknown amount of levels deep.
So, I'm looking for some form of cursor to literal all the sublevels.
I currently have a cursor but I can't think of a way to iterate the data.
Here is my current code (Only goes level 1 deep):
CREATE TABLE #tmpBillOfMaterial
(
ParentPartId VARCHAR(18),
ChildPartId VARCHAR(18)
)
DECLARE #ParentPartId VARCHAR(18)
INSERT INTO #tmpBillOfMaterial ( ParentPartId, ChildPartId )
SELECT b.ParentPartId, b.ChildPartId
FROM jbds.BoM b
WHERE b.ParentPartId = 'NK-FS-DR-62DR'
DECLARE cursorStage1 CURSOR FOR
SELECT b.ChildPartId
FROM jbds.BoM b
WHERE b.ParentPartId = 'NK-FS-DR-62DR'
OPEN cursorStage1
FETCH NEXT FROM cursorStage1 INTO #ParentPartId
WHILE ##FETCH_STATUS = 0
BEGIN
INSERT INTO #tmpBillOfMaterial ( ParentPartId, ChildPartId )
SELECT b.ParentPartId, b.ChildPartId
FROM jbds.BoM b
WHERE b.ParentPartId = #ParentPartId
FETCH NEXT FROM cursorStage1 INTO #ParentPartId
END
CLOSE cursorStage1
DEALLOCATE cursorStage1
SELECT *
FROM #tmpBillOfMaterial
DROP TABLE #tmpBillOfMaterial
I've tried adding cursor within cursor but can only get 2 levels deep instead of it iterating all the data.
The easiest is to use a recursive CTE:
;with x as (
select parentpartid, childpartid
from bom
where parentpartid = 'NK-FS-DR-62DR'
union all
select bom.parentpartid, bom.childpartid
from bom
inner join x on bom.parentpartid = x.childpartid
)
select * from x
How can I get a List all the JobPositionNames having the lowest jobPositionId when ContactId = 1
Tablel :
| JobPositionId | JobPositionName | JobDescriptionId | JobCategoryId | ContactId
---------------------------------------------------------------------------------
1 | Audio Cables | 1 | 1 | 1
2 |Audio Connections| 2 | 1 | 1
3 |Audio Connections| 2 | 1 | 0
4 |Audio Connections| 2 | 1 | 0
5 | Sound Board | 3 | 1 | 0
6 | Tent Pen | 4 | 3 | 0
eg the result of this table should be lines 1,3,5,6
I can't figure out the solution.
Only lack of something, but I can give some code for you view.
Maybe it can help you.
--create table
create table t
(
JobPositionId int identity(1,1) primary key,
JobPositionName nvarchar(100) not null,
JobDescriptionId int,
JobCategoryId int,
ContactId int
)
go
--insert values
BEGIN TRAN
INSERT INTO t VALUES ('AudioCables', 1,1,1)
INSERT INTO t VALUES ('AudioConnections',2,1,1)
INSERT INTO t VALUES ('AudioConnections',2,1,0)
INSERT INTO t VALUES ('AudioConnections',2,1,0)
INSERT INTO t VALUES ('SoundBoard',3,1,0)
INSERT INTO t VALUES ('TentPen',4,3,0)
COMMIT TRAN
GO
SELECT
Min(JobPositionId) AS JobPositionId, JobPositionName, ContactId
INTO
#tempTable
FROM
t
GROUP BY JobPositionName, ContactId
SELECT * FROM #tempTable
WHERE JobPositionId IN (
SELECT JobPositionId
FROM #tempTable
GROUP BY JobPositionName
--... lack of sth, I can't figure out ,sorry.
)
drop table t
GO
For per-group maximum/minimum queries you can use a null-self-join as well as strategies like subselects. This is generally faster in MySQL.
SELECT j0.JobPositionId, j0.JobPositionName, j0.ContactId
FROM Jobs AS j0
LEFT JOIN Jobs AS j1 ON j1.JobPositionName=j0.JobPositionName
AND (
(j1.ContactId<>0)<(j0.ContactId<>0)
OR ((j1.ContactId<>0)=(j0.ContactId<>0) AND j1.JobPositionId<j0.JobPositionId))
)
WHERE j1.JobPositionName IS NULL
This says, for each JobPositionName, find a row for which there exists no other row with a lower ordering value. The ordering value here is a composite [ContactId-non-zeroness, JobPositionId].
(Aside: shouldn't JobPositionName and JobCategoryId be normalised out into a table keyed on JobDescriptionId? And shouldn't unassigned ContactIds be NULL?)
SELECT jp.*
FROM (
SELECT JobPositionName, JobPositionId, COUNT(*) AS cnt
FROM JobPosisions
) jpd
JOIN JobPosisions jp
ON jp.JobPositionId =
IF(
cnt = 1,
jpd.JobPositionId,
(
SELECT MIN(JobPositionId)
FROM JobPositions jpi
WHERE jpi.JobPositionName = jpd.JobPositionName
AND jpi.ContactID = 0
)
)
Create an index on (JobPositionName, ContactId, JobPositionId) for this to work fast.
Note that if will not return the jobs having more than one position, neither of which has ContactID = 0