SQL: Conditionally join on a date table if value found - sql

I want to return the columns found in a date dimension, if the values in my columns in a separate dimension are valid and match keys in the date dimension. But, I want to do this conditionally, checking first for attributeOne and whether or not a date key exists there that can map the date dimension, then checking attributeTwo, and so on.
How can I specify a join to the date dimension such that all possible columns that I want to check join if found (e.g., attributeOne, attributeTwo, attributeThree, etc) so I can conditionally report/return the date dimension columns for that match.
JOIN #date D on D.dateKey = d1.attributeOne;
Consider this probably far too contrived example:
drop table #date;
drop table #dim;
drop table #fact;
CREATE TABLE #date (dateKey int, dateVal varchar(25))
INSERT #date (dateKey, dateVal)
VALUES
(20000101, 'FY1')
,(20000102, 'FY2')
,(20000103, 'FY3')
,(20000104, 'FY4')
,(20000105, 'FY5')
CREATE TABLE #dim (dKey int, attributeOne int, attributeTwo int, attributeThree int)
INSERT #dim (dKey, attributeOne, attributeTwo, attributeThree)
VALUES
(1, 20000101, 20000102, NULL)
,(2, NULL, 20000104, 20000105)
,(3, 20000301, 20000501, 20000104)
,(4, NULL, 20000102, NULL)
CREATE TABLE #fact (fKey int, Naming varchar(25))
INSERT #fact (fKey, Naming)
VALUES
(1, 'ex1')
,(2, 'ex2')
,(3, 'ex3')
,(4, 'ex4')
-- how to
select
f.fKey as 'F Key',
CASE
WHEN d1.attributeOne = D.dateKey THEN D.dateVal
WHEN d1.attributeTwo = D.dateKey THEN D.dateVal
WHEN d1.attributeThree = D.dateKey THEN D.dateVal
ELSE '<missing>' END AS "attributeOne"
from #fact f
JOIN #dim d1 on f.fKey = d1.dKey
JOIN #date D on D.dateKey = d1.attributeOne;

Maybe something like joining on an IN with some conditional aggregation?
select fact.Naming
, MAX(CASE WHEN dim.attributeOne = dt.dateKey THEN dt.dateVal WHEN dim.attributeOne IS NOT NULL THEN '<missing>' ELSE '' END) AS attributeOne
, MAX(CASE WHEN dim.attributeTwo = dt.dateKey THEN dt.dateVal WHEN dim.attributeTwo IS NOT NULL THEN '<missing>' ELSE '' END) AS attributeTwo
, MAX(CASE WHEN dim.attributeThree = dt.dateKey THEN dt.dateVal WHEN dim.attributeThree IS NOT NULL THEN '<missing>' ELSE '' END) AS attributeThree
FROM #fact AS fact
LEFT JOIN #dim AS dim
ON dim.dKey = fact.fKey
LEFT JOIN #date AS dt
ON dt.dateKey IN (dim.attributeOne, dim.attributeTwo, dim.attributeThree)
GROUP BY fact.fKey, fact.Naming
ORDER BY fact.fKey
Naming | attributeOne | attributeTwo | attributeThree
:----- | :----------- | :----------- | :-------------
ex1 | FY1 | FY2 |
ex2 | | FY4 | FY5
ex3 | <missing> | <missing> | FY4
ex4 | | FY2 |
db<>fiddle here

Related

SQL Query - Run query multiple times but with a different variable date

I have a lengthy query written in SQL that uses CTEs and multiple variables to produce a report of about 1500 customer records with many columns based on a particular date, #ToDate. Some of the tables are ordered CTEs so I only get the latest value based on the #ToDate.
I've omitted specifics but the structure is as follows:
Declare #ToDate date .....
Declare #Category varchar ....;
with cte1 as (select * from table1 where table1.start_date <= #ToDate and (table1.end_date > #ToDate or table1.end_date is null))
,cte2 as (select * from table2 where table2.start_date <= #ToDate and (table2.end_date > #ToDate or table2.end_date is null))
select * from cte1
left join cte2 on cte2.id = cte1.id
where .....
which gives me the following results
|RunDate |CustomerID|DOB |Category|Col5 |Col6 |
|----------|----------|----------|--------|------|------|
|2021-08-30|11111 |2000-01-01|Cat1 | | |
|2021-08-30|22222 |2000-02-02|Cat2 | | |
I'd like to run the same script multiple times but with a different date. So run with #ToDate = '2021-08-30' which gives me one set of results and then every past Monday n number of times which would give me results like this...
|RunDate |CustomerID|DOB |Category|Col5 |Col6
|----------|----------|----------|--------|------|------|
|2021-08-30|11111 |2000-01-01|Cat1 | | |
|2021-08-30|22222 |2000-02-02|Cat2 | | |
|2021-08-23|11111 |2000-01-01|Cat1 | | |
|2021-08-23|22222 |2000-02-02|Cat2 | | |
|2021-08-23|33333 |2000-03-03|Cat9 | | |
I do have a calendar table available so I can easily identify the past n Mondays (or other day I like).
The only variable to change is the #ToDate as this is the Run Date, or As At Date if you will. Essentially I want to run it multiple times for the past few Mondays so I can get what the results were like at 30-08, 23-08, 16-08 etc...
I've never used loops and research suggests I should maybe avoid them or use them as a last resort. I'm not sure on the best approach and if I do use loops, how I wrap it around my query.
Thanks in advance
The question really needs a bit more elaboration but I have give a guess at what you are trying to do with this example.
I have create a Customers and Orders table and then display the results for the date range
I don't think you need to loop with cursors and such as you can get the loop effect by just using the #DateRanges and join on that. it being a CTE or not.
Please let me know if this is not what you meant and I will remove the answer
-- Setup a temp table to hold the dates I want to look for
IF EXISTS (SELECT * FROM tempdb.dbo.sysobjects O WHERE O.xtype in ('U') AND O.id = object_id(N'tempdb..#DateRanges'))
BEGIN
PRINT 'Removing temp table #DateRanges'
DROP TABLE #DateRanges;
END
CREATE TABLE #DateRanges (
[Date] DATE
)
-- Add some dates
INSERT INTO #DateRanges ([Date])
VALUES ('2021-08-30'),
('2021-08-23'),
('2021-08-16')
-- Setup some customers
IF EXISTS (SELECT * FROM tempdb.dbo.sysobjects O WHERE O.xtype in ('U') AND O.id = object_id(N'tempdb..#Customers'))
BEGIN
PRINT 'Removing temp table #Customers'
DROP TABLE #Customers;
END
CREATE TABLE #Customers (
CustomerId BIGINT IDENTITY(1,1) NOT NULL,
[Name] NVARCHAR(50),
DOB DATE NOT NULL,
CONSTRAINT PK_CustomerId PRIMARY KEY (CustomerId)
)
INSERT INTO #Customers ([Name], DOB)
VALUES('Bob', '1989-01-01'),
('Robert', '1994-01-01'),
('Andrew', '1992-01-01');
-- Setup some orders
IF EXISTS (SELECT * FROM tempdb.dbo.sysobjects O WHERE O.xtype in ('U') AND O.id = object_id(N'tempdb..#Order'))
BEGIN
PRINT 'Removing temp table #Order'
DROP TABLE #Order;
END
CREATE TABLE #Order (
OrderId BIGINT IDENTITY(1,1) NOT NULL,
CustomerId BIGINT NOT NULL,
CreatedDate DATE NOT NULL,
Category NVARCHAR(50) NOT NULL,
CONSTRAINT PK_OrderId PRIMARY KEY (OrderId)
)
INSERT INTO #Order(CustomerId, CreatedDate, Category)
VALUES
(1, '2021-08-30', 'Cat1'),
(1, '2021-08-23', 'Cat2'),
(2, '2021-08-30', 'Cat1'),
(2, '2021-08-23', 'Cat2'),
(2, '2021-08-16', 'Cat3'),
(3, '2021-08-30', 'Cat1'),
(3, '2021-08-16', 'Cat2')
-- Using the #DateRanged temp table we can the use this to ge the data we need so no need for a loop
SELECT *
FROM #DateRanges AS DR
LEFT JOIN #Order AS O ON O.
CreatedDate <= DR.[Date] AND O.CreatedDate >= DATEADD(D, -6, DR.[Date])

I need to do retrofit query using update or merge

I have two tables A and B. In A, I have a column called fetch_year. I need to consider table B from these two columns
primary_date
secondary_date
These columns have JSON values like {"lock":"true","date":"01/01/1990"}
So from this, I need to get the date and I need to extract the year and should save it in table A column called fetch_year. Will always consider primary_date first then secondary_date(if primary_date is null)
The final result should be 1990 in the fetch_year column
Table A is empty as of now( only one column with cal_id)
cal_id fetch_year
1 null
n null
Table B
|B_id|Cal_id | primary_date | secondary_date |
|----|-------|-----------------------------------|------------------------|
|11 | 1 |{"lock":"true","date":"01/01/1990"}|Null|
|12 | 2 | Null | {"lock":"true","date":"01/01/1980"} |
|13 | 3 | Null | Null |
|14 | 4 | {"lock":"true","date":"01/01/1995"} |{"lock":"true","date":"01/01/1997"} |
In table B
So I have n number of records in both the tables
I need results like this in A table
Cal_id fetch_year.
1 1990
2 1980
3 Null
4 1995
n n-values
In cal_id =4 in this case we have value in both columns so we are considering primary_date not secondary_date
Please help me with this problem
You could make use of either JSON_VALUE or OPENJSON here to extract the date from your JSON blobs.
I tend to prefer OPENJSON because it allows you to extract multiple values simultaneously and they don't have to be at the same level in a nested JSON structure. With the "squirelly" dates in your example data, though, you may prefer the JSON_VALUE version with TRY_CONVERT so that you have more control over date deserialization.
--Data setup
create table dbo.A (
Cal_id int,
fetch_year int
);
create table dbo.B (
B_id int not null identity(11,1),
Cal_id int,
primary_date nvarchar(max),
secondary_date nvarchar(max)
);
insert dbo.A (Cal_id, fetch_year)
values
(1, null),
(2, null),
(3, null),
(4, null);
insert dbo.B (Cal_id, primary_date, secondary_date)
values
(1, N'{"lock":"true","date":"01/01/1990"}', null),
(2, null, N'{"lock":"true","date":"01/01/1980"}'),
(3, null, null),
(4, N'{"lock":"true","date":"01/01/1995"}', N'{"lock":"true","date":"01/01/1997"}');
--JSON_VALUE example
update Table_A
set fetch_year = year(coalesce(
-- REF: CAST and CONVERT / Date and time styles
-- https://learn.microsoft.com/en-us/sql/t-sql/functions/cast-and-convert-transact-sql#date-and-time-styles
try_convert(date, json_value(primary_date, '$.date'), 101), --mm/dd/yyyy
try_convert(date, json_value(secondary_date, '$.date'), 101) --mm/dd/yyyy
))
from dbo.A Table_A
join dbo.B Table_B on Table_B.Cal_id = Table_A.Cal_id
--OPENJSON example
update Table_A
set fetch_year = year(coalesce(
Primary_JSON.date,
Secondary_JSON.date
))
from dbo.A Table_A
join dbo.B Table_B on Table_B.Cal_id = Table_A.Cal_id
outer apply openjson(Table_B.primary_date) with ([date] date) Primary_JSON
outer apply openjson(Table_B.secondary_date) with ([date] date) Secondary_JSON;

Nested while loop in SQL Server is not showing the expected result

I am trying to connect records from two different tables so I can display the data in a tabular format in an SSRS tablix.
The code below does not return the expected results.
As is, for each item in Temp_A the loop updates everything with the last item in Temp_C. Here is the code:
CREATE TABLE #Temp_A
(
[ID] INT,
[Name] VARCHAR(255)
)
INSERT INTO #Temp_A ([ID], [Name])
VALUES (1, 'A'), (2, 'B')
CREATE TABLE #Temp_C
(
[ID] INT,
[Name] VARCHAR(255)
)
INSERT INTO #Temp_C ([ID], [Name])
VALUES (1, 'C'), (2, 'D')
CREATE TABLE #Temp_Main
(
[Temp_A_ID] INT,
[Temp_A_Name] VARCHAR(255),
[Temp_C_ID] INT,
[Temp_C_Name] VARCHAR(255),
)
DECLARE #MIN_AID int = (SELECT MIN(ID) FROM #Temp_A)
DECLARE #MAX_AID int = (SELECT MAX(ID) FROM #Temp_A)
DECLARE #MIN_DID int = (SELECT MIN(ID) FROM #Temp_C)
DECLARE #MAX_DID int = (SELECT MAX(ID) FROM #Temp_C)
WHILE #MIN_AID <= #MAX_AID
BEGIN
WHILE #MIN_DID <= #MAX_DID
BEGIN
INSERT INTO #Temp_Main([Temp_A_ID], [Temp_A_Name])
SELECT ID, [Name]
FROM #Temp_A
WHERE ID = #MIN_AID
UPDATE #Temp_Main
SET [Temp_C_ID] = ID, [Temp_C_Name] = [Name]
FROM #Temp_C
WHERE ID = #MIN_DID
SET #MIN_DID = #MIN_DID + 1
END
SET #MIN_AID = #MIN_AID + 1
SET #MIN_DID = 1
END
SELECT * FROM #Temp_Main
DROP TABLE #Temp_A
DROP TABLE #Temp_C
DROP TABLE #Temp_Main
Incorrect result:
Temp_A_ID | Temp_A_Name | Temp_C_ID | Temp_C_Name
----------+-------------+-----------+---------------
1 A 2 D
1 A 2 D
2 B 2 D
2 B 2 D
Expected results:
Temp_A_ID | Temp_A_Name | Temp_C_ID | Temp_C_Name
----------+-------------+-----------+---------------
1 A 1 C
1 A 2 D
2 B 1 C
2 B 2 D
What am I missing?
You seem to want a cross join:
select a.*, c.*
from #Temp_A a cross join
#Temp_C c
order by a.id, c.id;
Here is a db<>fiddle.
There is no need to write a WHILE loop to do this.
You can use insert to insert this into #TempMain, but I don't se a need to have a temporary table for storing the results of this query.

SQL Server, Merge two records in one record

We have these tables
CREATE TABLE tbl01
(
[id] int NOT NULL PRIMARY KEY,
[name] nvarchar(50) NOT NULL
)
CREATE TABLE tbl02
(
[subId] int NOT NULL PRIMARY KEY ,
[id] int NOT NULL REFERENCES tbl01(id),
[val] nvarchar(50) NULL,
[code] int NULL
)
If we run this query:
SELECT
tbl01.id, tbl01.name, tbl02.val, tbl02.code
FROM
tbl01
INNER JOIN
tbl02 ON tbl01.id = tbl02.id
we get these results:
-------------------------------
id | name | val | code
-------------------------------
1 | one | FirstVal | 1
1 | one | SecondVal | 2
2 | two | YourVal | 1
2 | two | OurVal | 2
3 | three | NotVal | 1
3 | three | ThisVal | 2
-------------------------------
You can see that each two rows are related to same "id"
The question is: we need for each id to retrieve one record with all val, each val will return in column according to the value of column code
if(code = 1) then val as val-1
else if (code = 2) then val as val-2
Like this:
-------------------------------
id | name | val-1 | val-2
-------------------------------
1 | one | FirstVal | SecondVal
2 | two | YourVal | OurVal
3 | three | NotVal | ThisVal
-------------------------------
Any advice?
Use can use MAX and Group By to achieve this
SELECT id,
name,
MAX([val1]) [val-1],
MAX([val2]) [val-2]
FROM ( SELECT tbl01.id, tbl01.name,
CASE code
WHEN 1 THEN tbl02.val
ELSE ''
END [val1],
CASE code
WHEN 2 THEN tbl02.val
ELSE ''
END [val2]
FROM tbl01
INNER JOIN tbl02 ON tbl01.id = tbl02.id
) Tbl
GROUP BY id, name
Is it the PIVOT operator (http://technet.microsoft.com/en-us/library/ms177410(v=sql.105).aspx) that you are looking for?
You've already got a few answers, but heres one using PIVOT as an alternative. The good thing is this approach is easy to scale if there are additional columns required later
-- SETUP TABLES
DECLARE #t1 TABLE (
[id] int NOT NULL PRIMARY KEY,
[name] nvarchar(50) NOT NULL
)
DECLARE #t2 TABLE(
[subId] int NOT NULL PRIMARY KEY ,
[id] int NOT NULL,
[val] nvarchar(50) NULL,
[code] int NULL
)
-- SAMPLE DATA
INSERT #t1 ( id, name )
VALUES ( 1, 'one'), (2, 'two'), (3, 'three')
INSERT #t2
( subId, id, val, code )
VALUES ( 1,1,'FirstVal', 1), ( 2,1,'SecondVal', 2)
,( 3,2,'YourVal', 1), ( 4,2,'OurVal', 2)
,( 5,3,'NotVal', 1), ( 6,3,'ThisVal', 2)
-- SELECT (using PIVOT)
SELECT id, name, [1] AS 'val-1', [2] AS 'val-2'
FROM
(
SELECT t2.id, t1.name, t2.val, t2.code
FROM #t1 AS t1 JOIN #t2 AS t2 ON t2.id = t1.id
) AS src
PIVOT
(
MIN(val)
FOR code IN ([1], [2])
) AS pvt
results:
id name val-1 val-2
---------------------------------
1 one FirstVal SecondVal
2 two YourVal OurVal
3 three NotVal ThisVal
If there are always only two values, you could join them or even easier, group them:
SELECT tbl01.id as id, Min(tbl01.name) as name, MIN(tbl02.val) as val-1, MAX(tbl02.val) as val-2
FROM tbl01
INNER JOIN tbl02 ON tbl01.id = tbl02.id
GROUP BY tbl02.id
note: this query will always put the lowest value in the first column and highest in the second, if this is not wanted: use the join query:
Join query
If you always want code 1 in the first column and code 2 in the second:
SELECT tbl01.id as id, tbl01.name as name, tbl02.val as val-1, tbl03.val as val-2
FROM tbl01
INNER JOIN tbl02 ON tbl01.id = tbl02.id
ON tbl02.code = 1
INNER JOIN tbl03 ON tbl01.id = tbl03.id
ON tbl03.code = 2
Variable amount of columns
You cannot get an variable amount of columns, only when you do this by building your query in code or t-sql stored procedures.
My advice:
If its always to values: join them in query, if not, let your server-side code transform the data. (or even better, find a way which makes it not nessecery to transform data)
Try this - it uses a pivot function but it also creates creates the dynamic columns dependent on code
DECLARE #ColumnString varchar(200)
DECLARE #sql varchar(1000)
CREATE TABLE #ColumnValue
(
Value varchar(500)
)
INSERT INTO #ColumnValue (Value)
SELECT DISTINCT '[' + 'value' + Convert(Varchar(20),ROW_NUMBER() Over(Partition by id Order by id )) + ']'
FROM Test
SELECT #ColumnString = COALESCE(#ColumnString + ',', '') + Value
FROM #ColumnValue
Drop table #ColumnValue
SET #sql =
'
SELECT *
FROM
(
SELECT
id,name,val,''value'' + Convert(Varchar(20),ROW_NUMBER() Over(Partition by id Order by id ))as [values]
FROM Test
) AS P
PIVOT
(
MAX(val) FOR [values] IN ('+#ColumnString+')
) AS pv
'
--print #sql
EXEC (#sql)

SQL query to show repeating data from child records in columns

I have the following tables in a SQL Server 2000 database:
Master
MasterID | Details | [other fields]
=====================================
PK (int) | Free text | ...
LogTable
LogID | MasterID | UserID | LogDate | LogText
==========================================================
PK (int) | FK (int) | VarChar(2)| Date stamp | Free text
There may be many Log entries for each master record.
I have a query which extracts the most recent three associated Log entries for each Master row as shown below. Note that appropriate conversion and formatting is performed to achieve the LogData concatenation (omitted for clarity):
SELECT
M.MasterID, M.Details, L.LogDate + L.UserID + L.LogText AS LogData
FROM
MasterTable M
INNER JOIN
LogTable L ON M.MasterID = L.MasterID
AND L.LogID IN (SELECT TOP 3 LogID FROM LogTable
WHERE MasterID = M. MasterID ORDER BY LogDate DESC)
This produces output like this:
MasterID | Details | LogData
========================================================
1 | First | 05/11/2012 AB Called Client
2 | Second | 08/11/2012 CD Client Visit
2 | Second | 07/11/2012 CD Called Client
2 | Second | 05/11/2012 AB Called Client
What I need to achieve is showing the data from the second table as columns in the output, all reported against each single master record, thus avoiding repeated data. Like so:
MasterID | Details | LogData1 | LogData2 | LogData3
===========================================================================================================
1 | First | 05/11/2012 AB Called Client | (null) | (null)
2 | Second | 08/11/2012 CD Client Visit | 07/11/2012 CD Called Client | 05/11/2012 AB Called Client
Note that in the real world requirement, this solution will be part of flattening 5 tables with the output consisting of approx 20,000 rows and 90 columns of data.
Thanks in advance.
I'm going to post this, just to show it can be done, but HIGHLY SUGGEST, not do it through SQL. Should be done through the UI that's displaying to be more dynamic on your columns. Even then, I would design this differently.
-- create master table
DECLARE #MasterTable TABLE (
[MasterID] [int] IDENTITY (1, 1) NOT NULL ,
[Details] [varchar] (50) ,
[AdditionalField_1] [varchar] (50) ,
[AdditionalField_n] [varchar] (50)
)
-- create log table
DECLARE #LogTable TABLE (
[LogID] [int] IDENTITY (1, 1) NOT NULL ,
[MasterID] [int] NULL ,
[UserID] [varchar] (2) ,
[LogDate] [datetime] NULL ,
[LogText] [varchar] (50)
)
-- insert into master table
INSERT INTO #MasterTable (Details)
VALUES ('First')
INSERT INTO #MasterTable (Details)
VALUES ('Second')
-- insert into log table
INSERT INTO #LogTable (MasterID, UserID, LogDate, LogText)
VALUES (1, 'AB', '05/11/2012', 'Called Client')
INSERT INTO #LogTable (MasterID, UserID, LogDate, LogText)
VALUES (2, 'AB', '05/11/2012', 'Called Client')
INSERT INTO #LogTable (MasterID, UserID, LogDate, LogText)
VALUES (2, 'CD', '07/11/2012', 'Called Client')
INSERT INTO #LogTable (MasterID, UserID, LogDate, LogText)
VALUES (2, 'CD', '08/11/2012', 'Client Visit')
-- create table to display data
DECLARE #MyTemp TABLE (MasterID INT, Details VARCHAR(50), LogData1 VARCHAR(50), LogData2 VARCHAR(50), LogData3 VARCHAR(50))
INSERT INTO #MyTemp SELECT MasterID, Details, NULL, NULL, NULL FROM #MasterTable
-- create vars
DECLARE #ID INT, #NewID INT, #MasterID INT, #NewValue VARCHAR(100)
SET #ID = 0
-- loop through data
WHILE #ID >-1
BEGIN
-- clear vars
SELECT #NewID = NULL, #MasterID = NULL, #NewValue = NULL
-- get first record
SELECT TOP 1
#NewValue = CONVERT(VARCHAR(10), LogDate, 103)+ ' ' + UserID + ': ' + LogText
, #MasterID=MasterID
, #NewID=LogID
FROM #LogTable WHERE LogID>#ID
-- if no data, exit loop
IF #NewID IS NULL
BREAK
-- update record based on valuds in fields
UPDATE m
SET #ID = #NewID
, LogData1 = (CASE WHEN m.LogData1 IS NULL THEN #NewValue ELSE m.LogData1 END)
, LogData2 = (CASE WHEN m.LogData1 IS NOT NULL THEN
(CASE WHEN m.LogData2 IS NULL THEN #NewValue ELSE m.LogData2 END)
ELSE m.LogData2 END)
, LogData3 = (CASE WHEN m.LogData1 IS NOT NULL THEN
(CASE WHEN m.LogData2 IS NOT NULL THEN
(CASE WHEN m.LogData3 IS NULL THEN #NewValue ELSE m.LogData3 END)
ELSE m.LogData3 END)
ELSE m.LogData3 END)
FROM #MyTemp m
WHERE m.MasterID=#MasterID
END
--display all data
SELECT * FROM #MyTemp