creating max date function using sql in databricks - sql

I am writing queries in databricks using sql on views and would like to calculate max of dates of update timestamp column across multiple views. for instance i am joining table a with table b and would like to know max(a.updt_ts,b.updt_ts). since max function can not have more than one columns mentioned, i want to create a function. Any help is greatly appreciated.
below is what i have:
CREATE temporary FUNCTION ufnGetMaxDt (#Date1 DATETIME2,#Date2 DATETIME2)
BEGIN
DECLARE #ret DATETIME2
, #MinDt datetime2;
SET #MinDt = cast('1900-01-01' as datetime2);
IF (#Date1) is null SET #Date1 = #MinDt;
IF (#Date2) is null SET #Date2 = #MinDt;
SET #ret = CASE When #Date1 >= #Date2
Then #Date1
else #Date2
END;
IF (#ret IS NULL)
SET #ret = #MinDt; -- Dummy date
RETURN #ret;
END
GO

You could just use greatest? eg
SELECT *, GREATEST( date1, date2 ) xmax
FROM tmp
Or put them in an array, explode it and then max that? eg something like this:
%sql
WITH cte AS
(
SELECT *, EXPLODE( ARRAY( date1, date2 ) ) xmax
FROM tmp
)
SELECT MAX( xmax )
FROM cte
Seems a bit excessive when you can just use greatest though? It's also worth having a read through the list of Spark SQL built-in functions. You don't have to remember them all but at least if you know something is possible it's useful:
https://spark.apache.org/docs/2.3.0/api/sql/index.html

Related

Return smalldatetime value from scalar function SELECT query

I'm looking to create a scalar function in SQL Server (2017) that leverages a calendar table I built awhile back in order to calculate and return a date a given number of business days forward in time from a given date. I have been struggling with how to pass the SMALLDATETIME return value back appropriately. To give some idea what I'm attempting:
CREATE FUNCTION dbo.AddBusDaysToDate
(
#startDate SMALLDATETIME,
#numBusDays INT,
)
RETURNS SMALLDATETIME
AS
BEGIN
DECLARE #rs SMALLDATETIME;
SELECT #rs = TOP(1) dt
FROM (
SELECT TOP(#numBusDays) dt
FROM dbo.OurCalendar
WHERE isWeekday = 1
AND isHoliday = 0
AND dt >= #startDate
ORDER BY dt ASC
) as ID
ORDER BY dt DESC
RETURN #rs
END
dt is a SMALLDATETIME data type on our calendar table.
The query itself runs as intended when values plugged in for the variables, but I was trying to repurpose a similar function that calculated the difference in business days between two points on the calendar, with a different data type. So I'm unsure if I'm pulling in a row to the #rs instead of the individual value, or how to separate/isolate that specific 'cell' from the SELECT query result. I expect I'm probably missing something very simple.
Any help or a point in the right direction would be very well appreciated.
I was able to resolve with the following:
CREATE FUNCTION dbo.AddBusDaysToDate
(
#startDate SMALLDATETIME,
#numBusDays INT,
)
RETURNS SMALLDATETIME
AS
BEGIN
DECLARE #rs SMALLDATETIME;
DECLARE #workdayModifier INT;
IF EXISTS (
SELECT dt FROM dbo.OurCalendar
WHERE dt = #startDate
AND isWeekday = 1
AND isHoliday = 0
)
SET #workdayModifier = 1
ELSE
SET #workdayModifier = 0
SELECT TOP(1) #rs = dt
FROM (
SELECT TOP(#numBusDays + #workdayModifier) dt
FROM dbo.OurCalendar
WHERE isWeekday = 1
AND isHoliday = 0
AND dt >= #startDate
ORDER BY dt ASC
) as ID
ORDER BY dt DESC
RETURN #rs
END

How to pass the Datetime value as parameter to a stored function?

I created a function in SQL SERVER that takes in parameter a datetime value, and when I tried to execute it, I didn't know how to pass a Datetime value as parameter to this function, I got this error : Failed to convert date and / or time from a string.
this is the code of my function :
CREATE FUNCTION [dbo].[nb_pieces_produites] (#dateInsertion datetime)
returns decimal(4,0)
AS
BEGIN
DECLARE #year decimal(4,0), #month decimal(2,0), #day decimal(2,0) ,#hour
decimal(2,0), #nbPiecesProduites decimal(4,0)
set #year = (select DATEPART(yyyy, #dateInsertion))
set #month = (select DATEPART(mm, #dateInsertion))
set #day = (select DATEPART(dd, #dateInsertion))
set #hour = (select DATEPART(hh, #dateInsertion))
set #nbPiecesProduites = (SELECT COUNT(DISTINCT Num_Serie) FROM [dbo].
[dbo_Test] WHERE #dateInsertion BETWEEN '#year-#month-#day #hour:00:00' AND
'#year-#month-#day #hour:59:59')
return #nbPiecesProduites
END
and this is my query :
select [dbo].[nb_pieces_produites]('2017-06-19 11:38:52')
Can anyone help me please ?
Don't spend lots of time fiddling around with strings - try to keep your data as datetime data throughout.
To round a datetime down to the previous hour boundary, use a DATEADD/DATEDIFF pair:
CREATE FUNCTION [dbo].[nb_pieces_produites] (#dateInsertion datetime)
returns decimal(4,0)
AS
BEGIN
set #nbPiecesProduites = (SELECT COUNT(DISTINCT Num_Serie) FROM [dbo].
[dbo_Test] WHERE
date_column_from_table >= DATEADD(hour,DATEDIFF(hour,0,#dateInsertion),0)
AND
date_column_from_table < DATEADD(hour,DATEDIFF(hour,0,#dateInsertion)+1,0)
)
return #nbPiecesProduites
END
And, just to be safe, call it like this:
select [dbo].[nb_pieces_produites]('2017-06-19T11:38:52')
(Occasionally, under some settings, SQL Server will interpret nnnn-nn-nn as yyyy-dd-mm rather than yyyy-mm-dd, if it's followed by a space and then a time, rather than using T as the separator)
it's error because your function expect dateTime value, but when you called it you passing string not dateTime
change your query into :
select [dbo].[nb_pieces_produites](getDate())
CREATE FUNCTION [dbo].[nb_pieces_produites] (#dateInsertion datetime)
RETURNS decimal(4,0)
AS
BEGIN
RETURN
(
SELECT COUNT(DISTINCT Num_Serie)
FROM dbo.dbo_Test
WHERE
-- Use the date part to compare
CONVERT(date, [datetimecolumn]) = CONVERT(date, #dateInsertion)
-- Then compare with hour
AND DATEPART(HOUR, [datetimecolumn]) = DATEPART(HOUR, #dateInsertion)
)
END

Generate random records for datetime columns by stored procedure in SQL

I want to generate 5 random records from a field which is a datetime column and contains several records of (OrderDate) for a given date range using stored procedure for the table named Orders
CREATE PROCEDURE test
#StartDate DATETIME = NULL,
#EndDate DATETIME = NULL,
AS
BEGIN
SELECT OrderDate = DATEADD(......)
FROM Orders
END
May I get some help!
A while loop works ok for this purpose, especially if you're concerned with limiting your randomness to a bounded date range.
The downside is that potentially many insert queries get executed vs. a single insert for a recursive CTE as in the other answer.
create procedure dbo.spGenDates2
#MinDate datetime,
#MaxDate datetime,
#RecordCount int = 5
as
SET NOCOUNT ON;
DECLARE #Range int, #DayOffset int, #Cnt int
SET #Range = DATEDIFF(dd, #MinDate, #MaxDate)
SET #Cnt = 1
WHILE #Cnt <= #RecordCount
BEGIN
SET #DayOffset = RAND() * (#Range + 1)
INSERT INTO _test (Dt) VALUES(DATEADD(dd, #DayOffset, #MinDate))
SET #Cnt = #Cnt + 1
END
Based on your syntax I'm assuming you're using SQL Server...
Note that you cannot reliably use the sql random number generator function RAND() within the context of a single query because it does not get reseeded per row so you end up receiving the same, single random number for each row result. Instead, an approach using NEWID() converted into a numeric does the trick when generating random values within the execution of a single query.
Here's a procedure that will give you n number of sample dates in the near past.
create procedure dbo.spGenDates
#MaxDate datetime,
#RecordCount int = 5
as
WITH dates as (
SELECT DATEADD(MILLISECOND, ABS(CHECKSUM(NEWID())) * -1, #MaxDate) D,
1 as Cnt
UNION ALL
SELECT DATEADD(MILLISECOND, ABS(CHECKSUM(NEWID())) * -1, #MaxDate) D,
x.Cnt + 1 as Cnt
FROM dates x
WHERE x.Cnt < #RecordCount
)
INSERT INTO _test (Dt)
SELECT D
FROM dates
The wording of the question has been clarified (see comments on another answer) to be a desire to SELECT 5 random sample dates within a bounded range from a table.
A query like this will yield the desired result.
SELECT TOP (5) OrderDate
FROM Orders
WHERE OrderDate >= #StartDate
AND OrderDate < #EndDate
ORDER BY NEWID()

How to compare data with a multiple selection in SQL Server?

I have these dates as example:
date1: 2013-01-01
date2: 2013-01-25
I need to create a procedure that will insert the product special offer in the database if date1 is between these 2 dates.
create procedure CreateOfferProduct(
#codP varchar(5),
#date1 date,
#date2 date,
)
as
if(not exists(
select * from OfferProducts
where Products.codP = #codP
and #date1 <= (select date2 from OfferProducts where codP = #codP)
)
)
begin
insert into OfferProducts(codP, date1, date2)
values(#codP, #date1, #date2);
end
But since
select date2 from Products where codP = #codP
returns multiple values it doesn't work. Any help is appreciated.
This is one way to insert multiple rows that don't already exist in the destination, instead of doing this row-by-row technique where you assign values to a variable (which is slower and inflexible).
INSERT dbo.OfferProducts(codP, date1, date2)
SELECT p.codP, #date, #date2
FROM dbo.Products AS p
WHERE codP = #codP
AND NOT EXISTS
(
SELECT 1 FROM dbo.OfferProducts
WHERE codP = p.codP
AND date2 >= #date1 -- this logic seems funny
);
If you show sample data and desired results, including existing data that you want excluded from the insert, we might be able to formulate better logic for you.
You are looking for the intersection of two time periods. The following may be what you want:
. . .
if not exists (
select *
from OfferProducts op
where op.codP = #codP and
(op.date1 <= #date2 and
op.date2 >= #date1)
)
. . .
The exact definition of overlap depends on whether the end points are included in the date range.
On re-reading the question, you explicitly state "if its date1 is between these 2 dates". If so, then try:
if not exists (
select *
from OfferProducts op
where op.codP = #codP and
op.date1 between #date1 and #date2
)
. . .

Stored procedure which count records between two dates

I am trying to count records with status 0 between two dates in SQL server 2008
have my procedure but it is not right trows error.
Msg 156, Level 15, State 1, Procedure sp_SerchPickupHispanicBetweenDates, Line 6
Incorrect syntax near the keyword 'between'.
Procedure looks like
CREATE procedure sp_SerchPickupHispanicBetweenDates
#date1 date,
#date2 date
as
select COUNT ( Pickuphispanic ) from pickup
where Pickuphispanic = 1 and pickupdate like between #date1+ '%' and #date1 +'%'
was trying search solution in internet but no success.
Any Ideas how to wright it correct?
also i have select statment which works perfectly
select COUNT ( Pickuphispanic ) from pickup
where Pickuphispanic = 1 and pickup.pickupdate between '2006-07-01' and '2010-12-31'
The #date1 and #date2 parameters are of datatype DATE and as such you cannot append a % to those values (the % can only be used for string-based datatypes when using the LIKE operator; also: you cannot apply LIKE to a search using DATE values).
Use this instead:
CREATE procedure procSerchPickupHispanicBetweenDates
#date1 date,
#date2 date
AS
SELECT COUNT (Pickuphispanic)
FROM dbo.pickup
WHERE Pickuphispanic = 1
AND pickupdate BETWEEN #date1 AND #date2
To call this, use this syntax:
EXEC procSerchPickupHispanicBetweenDates '2006-07-01', '2010-12-31'
(no braces, no parenthesis - just specify the two dates), or:
EXEC procSerchPickupHispanicBetweenDates #date1 = '2006-07-01', #date2 = '2010-12-31'
If you are trying to search between 2 dates, your stored procedure must look like this:
pickupdate between #date1 and #date2
YOU DON'T HAVE TO USE THE LIKE FUNCTION
Like cannot be nested on Between Function.
Try this:
CREATE procedure sp_SerchPickupHispanicBetweenDates
#date1 date,
#date2 date
as
select COUNT ( Pickuphispanic ) from pickup
where Pickuphispanic = 1 and CONVERT(VARCHAR(25), pickupdate , 126) like #date1 + '%' and #date1 + '%'
Regards
Modify the query like this:
SELECT COUNT ( Pickuphispanic_ID ) from pickup
where Pickuphispanic = 1 and pickupdate between #date1 AND #date2
or
SELECT COUNT ( Pickuphispanic_ID ) from pickup
where Pickuphispanic = 1 and pickupdate> #date1 AND pickupdate<#date2
Like cannot be nested on Between Function.
if you want to use like then it should be like this
CREATE procedure sp_SerchPickupHispanicBetweenDates
#date1 date,
#date2 date
as
select COUNT ( Pickuphispanic ) from pickup
where Pickuphispanic = 1 and (CONVERT(VARCHAR, pickupdate ,106) like CONVERT(VARCHAR,#date1,106) + '%' or CONVERT(VARCHAR, pickupdate ,106) like CONVERT(VARCHAR,#date2,106) + '%')
and you want to use between then it should be like this
CREATE procedure procSerchPickupHispanicBetweenDates
#date1 date,
#date2 date
AS
SELECT COUNT (Pickuphispanic)
FROM dbo.pickup
WHERE Pickuphispanic = 1
AND Convert(date,pickupdate) BETWEEN Convert(date,#date1) AND Convert(date,#date2)
FilterDate between ISNULL(CONVERT(DATE,#FromDate),'2000-01-01') and
ISNULL(CONVERT(DATE,#ToDate),'3000-01-01'))