Query to calculate the weekly wise data of a fields - sql

I need to build a query to calculate the sum/total of a product_id, Product_Name fields based on Released_Date, revision_date, IS_UPDATED and display the output like the following. And I would appreciate if it shows the starting date and ending date instead of Week-1, Week-2.
Table structure and insert script as below.
create table products (
Released_Date varchar(40)
, product_id varchar(40)
, Product_Name varchar(40)
, revision_date varchar(40)
, IS_UPDATED varchar(2)
)
+-----------------+------------+-------------+--------------------------+---------------
insert into products values('2018-04-25 00:00:0','Pega','Pega5.0','2018-04-27 00:00:00','N');
insert into products values('2018-05-11 00:00:00','Oracle','orace11g','2018-05-13 00:00:00','Y');
insert into products values('2018-04-04 00:00:00','Oracle',' OracleBPM','2018-04-06 00:00:00','Y');
insert into products values('2018-06-05 00:00:00','Ibm','Cognos','2018-06-08 00:00:00','Y');
insert into products values('2018-05-03 00:00:00','Microsoft','C++','2018-05-05 00:00:00','Y');
insert into products values('2018-05-21 00:00:00','Microsoft',' C#','2018-05-25 00:00:00','Y');
insert into products values('2018-04-10 00:00:00','Salesforce','CPQ','2018-04-13 00:00:00','Y');
insert into products values('2018-03-12 00:00:00','Java',' Struts','2018-03-15 00:00:00','Y');
insert into products values('2018-04-12 00:00:00','Salesforce','Analytics','2018-04-13 00:00:00','Y');
insert into products values('2018-05-09 00:00:00','Microsoft','Asp','2018-05-11 00:00:00','Y');
insert into products values('2018-05-28 00:00:00','Salesforce','Marketing','2018-05-31 00:00:00','N');
insert into products values('2018-04-11 00:00:00','ETL',' Informatica','2018-04-12 00:00:00',' Y');
insert into products values('2018-03-26 00:00:00','Oracle',' orace11g','2018-03-30 00:00:00','Y');
insert into products values('2018-04-19 00:00:00','Oracle',' obiee','2018-04-20 00:00:00','Y');
insert into products values('2018-04-16 00:00:00','Ibm','Datastage','2018-04-17 00:00:00','N');
insert into products values('2018-06-18 00:00:00','Microsoft','C#','2018-06-21 00:00:00','Y');
insert into products values('2018-06-19 00:00:00','ETL',' Informatica','2018-06-24 00:00:00','Y');
insert into products values('2018-06-22 00:00:00','Microsoft','WCF','2018-06-23 00:00:00','Y');
insert into products values('2018-04-19 00:00:00','Hadoop',' Hive','2018-04-20 00:00:00',' Y');
insert into products values('2018-04-16 00:00:00','Testing','Database','2018-04-20 00:00:00','N');
insert into products values('2018-04-24 00:00:00','Ibm','Cognos',' 2018-04-27 00:00:00','Y');
insert into products values('2018-06-07 00:00:00','Microsoft','C#','2018-06-08 00:00:00','Y');
insert into products values('2018-04-02 00:00:00','Java','Struts','2018-04-05 00:00:00','Y');
insert into products values('2018-05-01 00:00:00','Microsoft','C++','2018-05-04 00:00:00','Y');
insert into products values('2018-04-10 00:00:00','ETL',' Datastage','2018-04-14 00:00:00','N');
insert into products values('2018-04-23 00:00:00','Ibm','AI','2018-04-25 00:00:00','Y');
insert into products values('2018-04-03 00:00:00','JAVA','Struts','2018-04-04 00:00:00','N');
insert into products values('2018-04-23 00:00:00','Pega','Pega5.4','2018-04-25 00:00:00','N');
insert into products values('2018-05-28 00:00:00','Java',' Jasperreports','2018-05-30 00:00:00','Y');
insert into products values('2018-05-28 00:00:00','IBM','Watson','2018-05-29 00:00:00','Y');
insert into products values('2018-05-30 00:00:00','Salesforce','Paradot','2018-05-31 00:00:00','Y');
insert into products values('2018-05-10 00:00:00','Oracle',' orace12c','2018-05-11 00:00:00','Y');
insert into products values('2018-06-11 00:00:00','Ibm','Cognos',' 2018-06-13 00:00:00','Y');
insert into products values('2018-06-13 00:00:00','Ibm','Datastage','2018-06-17 00:00:00','Y');
+-----------------+------------+-------------+--------------------------+---------------
Created_Date product_id Product_Name Released_Date IS_UPDATED
+-----------------+------------+---------------+--------------------------+---------------
2018-04-25 00:00:00 Pega Pega5.0 2018-04-27 00:00:00 N
2018-05-11 00:00:00 Oracle orace11g 2018-05-13 00:00:00 Y
2018-04-04 00:00:00 Oracle OracleBPM 2018-04-06 00:00:00 Y
2018-06-05 00:00:00 Ibm Cognos 2018-06-08 00:00:00 Y
2018-05-03 00:00:00 Microsoft C++ 2018-05-05 00:00:00 Y
2018-05-21 00:00:00 Microsoft C# 2018-05-25 00:00:00 Y
2018-04-10 00:00:00 Salesforce CPQ 2018-04-13 00:00:00 Y
2018-03-12 00:00:00 Java Struts 2018-03-15 00:00:00 Y
2018-04-12 00:00:00 Salesforce Analytics 2018-04-13 00:00:00 Y
2018-05-09 00:00:00 Microsoft Asp 2018-05-11 00:00:00 Y
2018-05-28 00:00:00 Salesforce Marketing 2018-05-31 00:00:00 N
2018-04-11 00:00:00 ETL Informatica 2018-04-12 00:00:00 Y
2018-03-26 00:00:00 Oracle orace11g 2018-03-30 00:00:00 Y
2018-04-19 00:00:00 Oracle obiee 2018-04-20 00:00:00 Y
2018-04-16 00:00:00 Ibm Datastage 2018-04-17 00:00:00 N
2018-06-18 00:00:00 Microsoft C# 2018-06-21 00:00:00 Y
2018-06-19 00:00:00 ETL Informatica 2018-06-24 00:00:00 Y
2018-06-22 00:00:00 Microsoft WCF 2018-06-23 00:00:00 Y
2018-04-19 00:00:00 Hadoop Hive 2018-04-20 00:00:00 Y
2018-04-16 00:00:00 Testing Database 2018-04-20 00:00:00 N
2018-04-24 00:00:00 Ibm Cognos 2018-04-27 00:00:00 Y
2018-06-07 00:00:00 Microsoft C# 2018-06-08 00:00:00 Y
2018-04-02 00:00:00 Java Struts 2018-04-05 00:00:00 Y
2018-05-01 00:00:00 Microsoft C++ 2018-05-04 00:00:00 Y
2018-04-10 00:00:00 ETL Datastage 2018-04-14 00:00:00 N
2018-04-23 00:00:00 Ibm AI 2018-04-25 00:00:00 Y
2018-04-03 00:00:00 JAVA Struts 2018-04-04 00:00:00 N
2018-04-23 00:00:00 Pega Pega5.4 2018-04-25 00:00:00 N
2018-05-28 00:00:00 Java Jasperreports 2018-05-30 00:00:00 Y
2018-05-28 00:00:00 IBM Watson 2018-05-29 00:00:00 Y
2018-05-30 00:00:00 Salesforce Paradot 2018-05-31 00:00:00 Y
2018-05-10 00:00:00 Oracle orace12c 2018-05-11 00:00:00 Y
2018-06-11 00:00:00 Ibm Cognos 2018-06-13 00:00:00 Y
2018-06-13 00:00:00 Ibm Datastage 2018-06-17 00:00:00 Y
Required output:-Based on below conditions.
for Total_productIds, the Created_Date should be greater than 2018-04-01 00:00:00 and Created_Date should be less than 2018-06-30 00:00:00.
i.e Created_Date>2018-04-01 00:00:00 and Created_Date<2018-06-30 00:00:00.
for Total_ProductNames, the Created_Date should be greater than 2018-04-01 00:00:00 and Released_Date should be revision_date<2018-06-30 00:00:00
Created_Date>2018-04-01 00:00:00 and Released_Date< 2018-06-30 00:00:00
for Total_IS_Updated, the Created_Date should be greater than 2018-04-01 00:00:00 and Created_Date should be less than 2018-06-30 00:00:00.
i.e Created_Date>2018-04-01 00:00:00 and Created_Date<2018-06-30 00:00:00. and
IS_UPDATED='Y'
WEEK NO. Total_productIds Total_ProductNames Total_IS_Updated(if 'Y')
Firstweek(2018-04-01) 0 0 0
Secondweek(2018-04-02 to 2018-04-08) 3 2 2
Thirdweek(2018-04-09 to 2018-04-15) 3 5 4
Fourthweek(2018-04-16 to 2018-04-22) 4 4 2
Fifthweek(2018-04-23 to 2018-04-29) 3 4 2
Firstweek(2018-05-01 to 2018-05-06) 1 2 2
Secondweek(2018-05-07 to 2018-05-13) 2 3 3
Thirdweek(2018-05-14 to 2018-05-20) 0 0 0
Fourthweek(2018-05-21 to 2018-05-27) 1 1 0
Fifthweek(2018-05-28 to 2018-05-31) 3 4 3
Firstweek(2018-06-01 to 2018-06-03) 0 0 0
Secondweek(2018-06-04 to 2018-06-10) 2 2 2
Thirdweek(2018-06-11 to 2018-06-17) 1 2 2
Fourthweek(2018-06-18 to 2018-06-24) 2 3 3
Fifthweek(2018-06-25 to 2018-06-30) 0 0 0

As you have mentioned interval conditions so I have hardcoded that. This query will fetch data and group it weekly.
I have replaced column format of week_no from Firstweek(2018-04-01) to week 1 of 04/2018 to make it fast.
SELECT week + DATEPART('wk', Created_Date) - DATEPART('wk', DATEADD(day, 1,
EOMONTH(Created_Date, - 1))) + 'of ' + DATEPART('mm', Created_Date) + '/' +
DATEPART('mm', Created_Date) as WEEK_NO, --- will result week 1 of 04/2018
sum(CASE
WHEN Created_Date > '2018-04-01 00:00:00'
AND Created_Date < '2018-06-30 00:00:00'
THEN 1
ELSE 0
END) AS Total_productIds, sum(CASE
WHEN Created_Date > '2018-04-01 00:00:00'
AND Released_Date < '2018-06-30 00:00:00'
THEN 1
ELSE 0
END) AS Total_ProductNames, sum(CASE
WHEN Created_Date > '2018-04-01 00:00:00'
AND Created_Date < '2018-06-30 00:00:00'
AND IS_UPDATED = 'Y'
THEN 1
ELSE 0
END) AS Total_ProductNames
FROM products
GROUP BY DATEPART('wk', Created_Date)

Related

How to allocate a list of payments to a list of invoices/charges in SQL?

Let's say I have the following two tables. The first is invoice data.
customer_id
scheduled_payment_date
scheduled_total_payment
1004
2021-04-08 00:00:00
1300
1004
2021-04-29 00:00:00
1300
1004
2021-05-13 00:00:00
1300
1004
2021-06-11 00:00:00
1300
1004
2021-06-26 00:00:00
1300
1004
2021-07-12 00:00:00
1300
1004
2021-07-26 00:00:00
1300
1003
2021-04-05 00:00:00
2012
1003
2021-04-21 00:00:00
2012
1003
2021-05-05 00:00:00
2012
1003
2021-05-17 00:00:00
2012
1003
2021-06-02 00:00:00
2012
1003
2021-06-17 00:00:00
2012
The second is payment data.
customer_id
payment_date
total_payment
1003
2021-04-06 00:00:00
2012
1003
2021-04-16 00:00:00
2012
1003
2021-05-03 00:00:00
2012
1003
2021-05-18 00:00:00
2012
1003
2021-06-01 00:00:00
2012
1003
2021-06-17 00:00:00
2012
1004
2021-04-06 00:00:00
1300
1004
2021-04-22 00:00:00
200
1004
2021-04-27 00:00:00
2600
1004
2021-06-11 00:00:00
1300
I want to allocate the payments to the invoices in the correct order, i.e. payments are allocated to the earliest charge first and then when that is paid start allocating to the next earliest charge. The results should look like:
customer_id
payment_date
scheduled_payment_date
total_payment
payment_allocation
scheduled_total_payment
1004
2021-04-06 00:00:00
2021-04-08 00:00:00
1300
1300
1300
1004
2021-04-22 00:00:00
2021-04-29 00:00:00
200
200
1300
1004
2021-04-27 00:00:00
2021-04-29 00:00:00
2600
1100
1300
1004
2021-04-27 00:00:00
2021-05-13 00:00:00
2600
1300
1300
1004
2021-04-27 00:00:00
2021-06-11 00:00:00
2600
200
1300
1004
2021-06-11 00:00:00
2021-06-11 00:00:00
1300
1100
1300
1004
2021-06-11 00:00:00
2021-06-26 00:00:00
1300
200
1300
1003
2021-04-06 00:00:00
2021-04-05 00:00:00
2012
2012
2012
1003
2021-04-16 00:00:00
2021-04-21 00:00:00
2012
2012
2012
1003
2021-05-03 00:00:00
2021-05-05 00:00:00
2012
2012
2012
1003
2021-05-18 00:00:00
2021-05-17 00:00:00
2012
2012
2012
1003
2021-06-01 00:00:00
2021-06-02 00:00:00
2012
2012
2012
1003
2021-06-17 00:00:00
2021-06-17 00:00:00
2012
2012
2012
How can I do this in SQL?
When I was searching for the answer to this question I couldn't find a good solution anywhere so I figured out my own that I think can be understood and adapted for similar situations.
WITH payments_data AS (
SELECT
*,
SUM(total_payment) OVER (
PARTITION BY customer_id ORDER BY payment_ind ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS total_payment_cum,
COALESCE(SUM(total_payment) OVER (
PARTITION BY customer_id ORDER BY payment_ind ASC ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
), 0) AS prev_total_payment_cum
FROM (
SELECT
customer_id,
payment_date,
ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY payment_date ASC) AS payment_ind,
total_payment
FROM
payments
) AS payments_ind
), charges_data AS (
SELECT
customer_id,
scheduled_payment_date,
scheduled_total_payment,
SUM(scheduled_total_payment) OVER (
PARTITION BY customer_id ORDER BY scheduled_payment_date ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS scheduled_total_payment_cum,
COALESCE(SUM(scheduled_total_payment) OVER (
PARTITION BY customer_id ORDER BY scheduled_payment_date ASC ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
), 0) AS prev_scheduled_total_payment_cum
FROM
charges
)
SELECT
*,
CASE
WHEN current_balance >= 0 THEN IIF(
updated_charges >= total_payment,
total_payment,
updated_charges
)
WHEN current_balance < 0 THEN IIF(
scheduled_total_payment >= updated_payments,
updated_payments,
scheduled_total_payment
)
ELSE 0
END AS payment_allocation
FROM (
SELECT
pd.customer_id,
pd.payment_ind,
payment_date,
scheduled_payment_date,
total_payment,
scheduled_total_payment,
total_payment_cum,
scheduled_total_payment_cum,
prev_total_payment_cum,
prev_scheduled_total_payment_cum,
prev_total_payment_cum - prev_scheduled_total_payment_cum AS current_balance,
IIF(
prev_total_payment_cum - prev_scheduled_total_payment_cum >= 0,
scheduled_total_payment - (prev_total_payment_cum - prev_scheduled_total_payment_cum),
NULL
) AS updated_charges,
IIF(
prev_total_payment_cum - prev_scheduled_total_payment_cum < 0,
total_payment + (prev_total_payment_cum - prev_scheduled_total_payment_cum),
NULL
) AS updated_payments
FROM
payments_data AS pd
JOIN charges_data AS cd
ON pd.customer_id = cd.customer_id
WHERE
prev_total_payment_cum < scheduled_total_payment_cum
AND total_payment_cum > prev_scheduled_total_payment_cum
) data
There is a lot going on here so I wrote up an article explaining it in detail. You can find it on Medium here.
The basic idea is to track the cumulative amount of payment and charge through each record (the payments_data and charges_data CTEs) and then use this information to identify whether the charge and payment match each other (the WHERE statement that generates the "data" subquery). If they match then identify how much of the payment should be allocated to the charge (all the calculations related to the "current_balance").

SQL time-series resampling

I have clickhouse table with some rows like that
id
created_at
6962098097124188161
2022-07-01 00:00:00
6968111372399976448
2022-07-02 00:00:00
6968111483775524864
2022-07-03 00:00:00
6968465518567268352
2022-07-04 00:00:00
6968952917160271872
2022-07-07 00:00:00
6968952924479332352
2022-07-09 00:00:00
I need to resample time-series and get count by date like this
created_at
count
2022-07-01 00:00:00
1
2022-07-02 00:00:00
2
2022-07-03 00:00:00
3
2022-07-04 00:00:00
4
2022-07-05 00:00:00
4
2022-07-06 00:00:00
4
2022-07-07 00:00:00
5
2022-07-08 00:00:00
5
2022-07-09 00:00:00
6
I've tried this
SELECT
arrayJoin(
timeSlots(
MIN(created_at),
toUInt32(24 * 3600 * 10),
24 * 3600
)
) as ts,
SUM(
COUNT(*)
) OVER (
ORDER BY
ts
)
FROM
table
but it counts all rows.
How can I get expected result?
why not use group by created_at
like
select count(*) from table_name group by toDate(created_at)

SQL Select up to a certain sum

I have been trying to figure out a way to write a SQL script to select a given sum, and would appreciate any ideas given to do so.
I am trying to do a stock valuation based on the dates of goods received. At month-end closing, the value of my stocks remaining in the warehouse would be a specified sum of the last received goods.
The below query is done by a couple of unions but reduces to:
SELECT DATE, W1 FROM Table
ORDER BY DATE DESC
Query result:
Row DATE W1
1 2019-02-28 00:00:00 13250
2 2019-02-28 00:00:00 42610
3 2019-02-28 00:00:00 41170
4 2019-02-28 00:00:00 13180
5 2019-02-28 00:00:00 20860
6 2019-02-28 00:00:00 19870
7 2019-02-28 00:00:00 37780
8 2019-02-28 00:00:00 47210
9 2019-02-28 00:00:00 32000
10 2019-02-28 00:00:00 41930
I have thought about solving this issue by calculating a cumulative sum as follows:
Row DATE W1 Cumulative Sum
1 2019-02-28 00:00:00 13250 13250
2 2019-02-28 00:00:00 42610 55860
3 2019-02-28 00:00:00 41170 97030
4 2019-02-28 00:00:00 13180 110210
5 2019-02-28 00:00:00 20860 131070
6 2019-02-28 00:00:00 19870 150940
7 2019-02-28 00:00:00 37780 188720
8 2019-02-28 00:00:00 47210 235930
9 2019-02-28 00:00:00 32000 267930
10 2019-02-28 00:00:00 41930 309860
However, I am stuck when figuring out a way to use a parameter to return only the rows of interest.
For example, if a parameter was specified as '120000', it would return the rows where the cumulative sum is exactly 120000.
Row DATE W1 Cumulative Sum W1_Select
1 2019-02-28 00:00:00 13250 13250 13250
2 2019-02-28 00:00:00 42610 55860 42610
3 2019-02-28 00:00:00 41170 97030 41170
4 2019-02-28 00:00:00 13180 110210 13180
5 2019-02-28 00:00:00 20860 131070 9790
----------
Total 120000
This just requires some arithmetic:
select t.*,
(case when running_sum < #threshold then w1
else #threshold - w1
end)
from (select date, w1, sum(w1) over (order by date) as running_sum
from t
) t
where running_sum - w1 < #threshold;
Actually, in your case, the dates are all the same. That is a bit counter-intuitive, but you need to use the row for this to work:
select t.*,
(case when running_sum < #threshold then w1
else #threshold - w1
end)
from (select date, w1, sum(w1) over (order by row) as running_sum
from t
) t
where running_sum - w1 < #threshold;
Here is a db<>fiddle.

PostgreSQL - rank over rows listed in blocks of 0 and 1

I have a table that looks like:
id code date1 date2 block
--------------------------------------------------
20 1234 2017-07-01 2017-07-31 1
15 1234 2017-06-01 2017-06-30 1
13 1234 2017-05-01 2017-05-31 0
11 1234 2017-03-01 2017-03-31 0
9 1234 2017-02-01 2017-02-28 1
8 1234 2017-01-01 2017-01-31 0
7 1234 2016-11-01 2016-11-31 0
6 1234 2016-10-01 2016-10-31 1
2 1234 2016-09-01 2016-09-31 1
I need to rank the rows according to the blocks of 0's and 1's, like:
id code date1 date2 block desired_rank
-------------------------------------------------------------------
20 1234 2017-07-01 2017-07-31 1 1
15 1234 2017-06-01 2017-06-30 1 1
13 1234 2017-05-01 2017-05-31 0 2
11 1234 2017-03-01 2017-03-31 0 2
9 1234 2017-02-01 2017-02-28 1 3
8 1234 2017-01-01 2017-01-31 0 4
7 1234 2016-11-01 2016-11-31 0 4
6 1234 2016-10-01 2016-10-31 1 5
2 1234 2016-09-01 2016-09-31 1 5
I've tried to use rank() and dense_rank(), but the result I end up with is:
id code date1 date2 block dense_rank()
-------------------------------------------------------------------
20 1234 2017-07-01 2017-07-31 1 1
15 1234 2017-06-01 2017-06-30 1 2
13 1234 2017-05-01 2017-05-31 0 1
11 1234 2017-03-01 2017-03-31 0 2
9 1234 2017-02-01 2017-02-28 1 3
8 1234 2017-01-01 2017-01-31 0 3
7 1234 2016-11-01 2016-11-31 0 4
6 1234 2016-10-01 2016-10-31 1 4
2 1234 2016-09-01 2016-09-31 1 5
In the last table, the rank doesn't care about the rows, it just takes all the 1's and 0's as a unit and sets an ascending count starting at the first 1 and 0.
My query goes like this:
CREATE TEMP TABLE data (id integer,code text, date1 date, date2 date, block integer);
INSERT INTO data VALUES
(20,'1234', '2017-07-01','2017-07-31',1),
(15,'1234', '2017-06-01','2017-06-30',1),
(13,'1234', '2017-05-01','2017-05-31',0),
(11,'1234', '2017-03-01','2017-03-31',0),
(9, '1234', '2017-02-01','2017-02-28',1),
(8, '1234', '2017-01-01','2017-01-31',0),
(7, '1234', '2016-11-01','2016-11-30',0),
(6, '1234', '2016-10-01','2016-10-31',1),
(2, '1234', '2016-09-01','2016-09-30',1);
SELECT *,dense_rank() OVER (PARTITION BY code,block ORDER BY date2 DESC)
FROM data
ORDER BY date2 DESC;
By the way, the database is in postgreSQL.
I hope there's a workaround... Thanks :)
Edit: Note that the blocks of 0's and 1's aren't equal.
There's no way to get this result using a single Window Function:
SELECT *,
Sum(flag) -- now sum the 0/1 to create the "rank"
Over (PARTITION BY code
ORDER BY date2 DESC)
FROM
(
SELECT *,
CASE
WHEN Lag(block) -- check if this is the 1st row of a new block
Over (PARTITION BY code
ORDER BY date2 DESC) = block
THEN 0
ELSE 1
END AS flag
FROM DATA
) AS dt

SQL Date Range Query - Table Comparison

I have two SQL Server tables containing the following information:
Table t_venues:
venue_id is unique
venue_id | start_date | end_date
1 | 01/01/2014 | 02/01/2014
2 | 05/01/2014 | 05/01/2014
3 | 09/01/2014 | 15/01/2014
4 | 20/01/2014 | 30/01/2014
Table t_venueuser:
venue_id is not unique
venue_id | start_date | end_date
1 | 02/01/2014 | 02/01/2014
2 | 05/01/2014 | 05/01/2014
3 | 09/01/2014 | 10/01/2014
4 | 23/01/2014 | 25/01/2014
From these two tables I need to find the dates that haven't been selected for each range, so the output would look like this:
venue_id | start_date | end_date
1 | 01/01/2014 | 01/01/2014
3 | 11/01/2014 | 15/01/2014
4 | 20/01/2014 | 22/01/2014
4 | 26/01/2014 | 30/01/2014
I can compare the two tables and get the date ranges from t_venues to appear in my query using 'except' but I can't get the query to produce the non-selected dates. Any help would be appreciated.
Calendar Table!
Another perfect candidate for a calendar table. If you can't be bothered to search for one, here's one I made earlier.
Setup Data
DECLARE #t_venues table (
venue_id int
, start_date date
, end_date date
);
INSERT INTO #t_venues (venue_id, start_date, end_date)
VALUES (1, '2014-01-01', '2014-01-02')
, (2, '2014-01-05', '2014-01-05')
, (3, '2014-01-09', '2014-01-15')
, (4, '2014-01-20', '2014-01-30')
;
DECLARE #t_venueuser table (
venue_id int
, start_date date
, end_date date
);
INSERT INTO #t_venueuser (venue_id, start_date, end_date)
VALUES (1, '2014-01-02', '2014-01-02')
, (2, '2014-01-05', '2014-01-05')
, (3, '2014-01-09', '2014-01-10')
, (4, '2014-01-23', '2014-01-25')
;
The Query
SELECT t_venues.venue_id
, calendar.the_date
, CASE WHEN t_venueuser.venue_id IS NULL THEN 1 ELSE 0 END As is_available
FROM dbo.calendar /* see: http://gvee.co.uk/files/sql/dbo.numbers%20&%20dbo.calendar.sql for an example */
INNER
JOIN #t_venues As t_venues
ON t_venues.start_date <= calendar.the_date
AND t_venues.end_date >= calendar.the_date
LEFT
JOIN #t_venueuser As t_venueuser
ON t_venueuser.venue_id = t_venues.venue_id
AND t_venueuser.start_date <= calendar.the_date
AND t_venueuser.end_date >= calendar.the_date
ORDER
BY t_venues.venue_id
, calendar.the_date
;
The Result
venue_id the_date is_available
----------- ----------------------- ------------
1 2014-01-01 00:00:00.000 1
1 2014-01-02 00:00:00.000 0
2 2014-01-05 00:00:00.000 0
3 2014-01-09 00:00:00.000 0
3 2014-01-10 00:00:00.000 0
3 2014-01-11 00:00:00.000 1
3 2014-01-12 00:00:00.000 1
3 2014-01-13 00:00:00.000 1
3 2014-01-14 00:00:00.000 1
3 2014-01-15 00:00:00.000 1
4 2014-01-20 00:00:00.000 1
4 2014-01-21 00:00:00.000 1
4 2014-01-22 00:00:00.000 1
4 2014-01-23 00:00:00.000 0
4 2014-01-24 00:00:00.000 0
4 2014-01-25 00:00:00.000 0
4 2014-01-26 00:00:00.000 1
4 2014-01-27 00:00:00.000 1
4 2014-01-28 00:00:00.000 1
4 2014-01-29 00:00:00.000 1
4 2014-01-30 00:00:00.000 1
(21 row(s) affected)
The Explanation
Our calendar tables contains an entry for every date.
We join our t_venues (as an aside, if you have the choice, lose the t_ prefix!) to return every day between our start_date and end_date. Example output for venue_id=4 for just this join:
venue_id the_date
----------- -----------------------
4 2014-01-20 00:00:00.000
4 2014-01-21 00:00:00.000
4 2014-01-22 00:00:00.000
4 2014-01-23 00:00:00.000
4 2014-01-24 00:00:00.000
4 2014-01-25 00:00:00.000
4 2014-01-26 00:00:00.000
4 2014-01-27 00:00:00.000
4 2014-01-28 00:00:00.000
4 2014-01-29 00:00:00.000
4 2014-01-30 00:00:00.000
(11 row(s) affected)
Now we have one row per day, we [outer] join our t_venueuser table. We join this in much the same manner as before, but with one added twist: we need to join based on the venue_id too!
Running this for venue_id=4 gives this result:
venue_id the_date t_venueuser_venue_id
----------- ----------------------- --------------------
4 2014-01-20 00:00:00.000 NULL
4 2014-01-21 00:00:00.000 NULL
4 2014-01-22 00:00:00.000 NULL
4 2014-01-23 00:00:00.000 4
4 2014-01-24 00:00:00.000 4
4 2014-01-25 00:00:00.000 4
4 2014-01-26 00:00:00.000 NULL
4 2014-01-27 00:00:00.000 NULL
4 2014-01-28 00:00:00.000 NULL
4 2014-01-29 00:00:00.000 NULL
4 2014-01-30 00:00:00.000 NULL
(11 row(s) affected)
See how we have a NULL value for rows where there is no t_venueuser record. Genius, no? ;-)
So in my first query I gave you a quick CASE statement that shows availability (1=available, 0=not available). This is for illustration only, but could be useful to you.
You can then either wrap the query up and then apply an extra filter on this calculated column or simply add a where clause in: WHERE t_venueuser.venue_id IS NULL and that will do the same trick.
This is a complete hack, but it gives the results you require, I've only tested it on the data you provided so there may well be gotchas with larger sets.
In general what you are looking at solving here is a variation of gaps and islands problem ,this is (briefly) a sequence where some items are missing. The missing items are referred as gaps and the existing items are referred as islands. If you would like to understand this issue in general check a few of the articles:
Simple talk article
blogs.MSDN article
SO answers tagged gaps-and-islands
Code:
;with dates as
(
SELECT vdates.venue_id,
vdates.vdate
FROM ( SELECT DATEADD(d,sv.number,v.start_date) vdate
, v.venue_id
FROM t_venues v
INNER JOIN master..spt_values sv
ON sv.type='P'
AND sv.number BETWEEN 0 AND datediff(d, v.start_date, v.end_date)) vdates
LEFT JOIN t_venueuser vu
ON vdates.vdate >= vu.start_date
AND vdates.vdate <= vu.end_date
AND vdates.venue_id = vu.venue_id
WHERE ISNULL(vu.venue_id,-1) = -1
)
SELECT venue_id, ISNULL([1],[2]) StartDate, [2] EndDate
FROM (SELECT venue_id, rDate, ROW_NUMBER() OVER (PARTITION BY venue_id, DateType ORDER BY rDate) AS rType, DateType as dType
FROM( SELECT d1.venue_id
,d1.vdate AS rDate
,'1' AS DateType
FROM dates AS d1
LEFT JOIN dates AS d0
ON DATEADD(d,-1,d1.vdate) = d0.vdate
LEFT JOIN dates AS d2
ON DATEADD(d,1,d1.vdate) = d2.vdate
WHERE CASE ISNULL(d2.vdate, '01 Jan 1753') WHEN '01 Jan 1753' THEN '2' ELSE '1' END = 1
AND ISNULL(d0.vdate, '01 Jan 1753') = '01 Jan 1753'
UNION
SELECT d1.venue_id
,ISNULL(d2.vdate,d1.vdate)
,'2'
FROM dates AS d1
LEFT JOIN dates AS d2
ON DATEADD(d,1,d1.vdate) = d2.vdate
WHERE CASE ISNULL(d2.vdate, '01 Jan 1753') WHEN '01 Jan 1753' THEN '2' ELSE '1' END = 2
) res
) src
PIVOT (MIN (rDate)
FOR dType IN
( [1], [2] )
) AS pvt
Results:
venue_id StartDate EndDate
1 2014-01-01 2014-01-01
3 2014-01-11 2014-01-15
4 2014-01-20 2014-01-22
4 2014-01-26 2014-01-30