group by on date - sql

I have two tables, Sales and Calls as follows:
**Sales**
CUST_ID INT primary key,
CUST_NM Varchar(40),
Sale_date Datetime2,
SALES Money);
CUST_ID CUST_NM Sale_date SALES
1 Dom 2015-01-01 15:00:02.3000000 10.00
2 Brian 2015-01-02 15:00:02.3000000 12.00
3 Stu 2015-01-03 15:00:02.3000000 21.00
4 John 2015-01-04 15:00:02.3000000 41.00
5 Jack 2015-01-05 15:00:02.3000000 51.00
6 Jill 2015-01-05 15:00:02.3000000 61.00
7 Steve 2015-01-04 15:00:02.3000000 16.00
8 Stacey 2015-01-03 15:00:02.3000000 19.00
9 Lacey 2015-01-03 15:00:02.3000000 30.00
Calls
NAME Varchar(40),
CALL_DATE Date,
TOTAL_CALLS INT
NAME CALL_DATE TOTAL_CALLS
Dom 2015-01-01 2
Brian 2015-01-02 4
Stu 2015-01-03 3
John 2015-01-04 5
Jack 2015-01-05 6
Jill 2015-01-05 10
Steve 2015-01-04 8
Stacey 2015-01-03 7
Lacey 2015-01-03 9
I want to write a select statement that brings back the date, gross sales, and the total calls from both Sales and Calls, joined on date.
Here is what I wrote, and I think it should be right, but somehow I am not getting the right output.
select Calls.CALL_DATE, sum(Sales.SALES) as gross_sale, sum(Calls.TOTAL_CALLS) as gross_total_calls
from Sales
join
Calls
on convert (date,sales.Sale_date)=calls.CALL_DATE
group by Calls.CALL_DATE
order by Calls.CALL_DATE
The output I am getting is
CALL_DATE gross_sale gross_total_calls
2015-01-01 10.00 2
2015-01-02 12.00 4
2015-01-03 210.00 57
2015-01-04 114.00 26
2015-01-05 224.00 32
Where am I going wrong??

You are generating a Cartesian product for each day. You need to aggregate before the join. Or, you can do this with a union all and aggregation:
select dte, sum(sales) as sales, sum(calls) as total_calls
from ((select cast(s.sale_date as date) as dte, sales, 0 as calls
from sales s
) union all
(select call_date, 0, total_calls as calls
from calls c
)
) sc
group by dte
order by dte;

The alternative way as suggested by Gordon using table variables to create a runnable test script.
Note the two extra rows of data and the FULL OUTER JOIN which allows all data to be returned.
declare #Sales table (CUST_ID INT primary key, CUST_NM Varchar(40), Sale_date Datetime2,SALES Money);
insert into #Sales (CUST_ID, CUST_NM, Sale_date, SALES)
select 1, 'Dom', '2015-01-01 15:00:02.3000000', 10.00 union
select 2, 'Brian', '2015-01-02 15:00:02.3000000', 12.00 union
select 3, 'Stu', '2015-01-03 15:00:02.3000000', 21.00 union
select 4, 'John', '2015-01-04 15:00:02.3000000', 41.00 union
select 5, 'Jack', '2015-01-05 15:00:02.3000000', 51.00 union
select 6, 'Jill', '2015-01-05 15:00:02.3000000', 61.00 union
select 7, 'Steve', '2015-01-04 15:00:02.3000000', 16.00 union
select 8, 'Stacey', '2015-01-03 15:00:02.3000000', 19.00 union
select 9, 'Lacey', '2015-01-03 15:00:02.3000000', 30.00 union
select 10, 'Tom', '2015-01-07 15:00:02.3000000', 1.00
declare #Calls table (NAME Varchar(40), CALL_DATE Date, TOTAL_CALLS INT)
insert into #Calls (NAME, CALL_DATE, TOTAL_CALLS)
select 'Dom', '2015-01-01', 2 union
select 'Brian', '2015-01-02', 4 union
select 'Stu', '2015-01-03', 3 union
select 'John', '2015-01-04', 5 union
select 'Jack', '2015-01-05', 6 union
select 'Jill', '2015-01-05', 10 union
select 'Steve', '2015-01-04', 8 union
select 'Stacey', '2015-01-03', 7 union
select 'Lacey', '2015-01-03', 9 union
select 'Tom', '2015-01-06', 1
select * from #Sales
select * from #Calls
select ISNULL (a.CALL_DATE, b.CALL_DATE) as CALL_DATE, gross_sale, TOTAL_CALLS
from
( select convert(date, Sale_date) as CALL_DATE, sum(SALES) as gross_sale
from #Sales
group by convert(date, Sale_date)
) a
full outer join
( select CALL_DATE, SUM(TOTAL_CALLS) as TOTAL_CALLS
from #Calls
group by CALL_DATE
) b on a.CALL_DATE = b.CALL_DATE
order by a.CALL_DATE

Related

SQL Query to fetch the latest payment method and invoice date

There are four tables
First - Customer
Second - Invoice
Third - Supplier
Fourth - Supplier_Remit
Tables details are mentioned below
Customer_id
Customer_Account_number
Customer_Status
Supplier_id
Supplier_Remit_id
1
1501
Active
11
111
2
1502
Inactive
12
112
3
1503
Active
13
113
4
1504
Active
14
114
5
1505
Inactive
15
115
Invoice_Date
Invoice_Amount
Invoice_Number
Payment Method
Customer_id
01/01/2023
100
1000001
Cash
1
12/01/2022
150
1000002
Credit Card
1
11/09/2022
200
1000003
Credit Card
1
12/09/2022
300
1000004
Cash
2
04/15/2022
1000
1000005
Cash
2
04/15/2022
1000
1000006
Credit Card
3
10/31/2022
250
1000007
Cash
4
10/25/2022
250
1000008
Cash
4
09/20/2022
130
1000009
Credit Card
5
05/20/2022
120
10000010
Credit Card
5
Supplier_Name
Supplier_id
ABC
11
ACCC
12
ADEF
13
AJKL
14
AFLR
15
City
Country
Supplier_Remit_id
Supplier_id
Boston
US
111
11
Oak
US
112
12
Albany
US
113
13
Madison
US
114
14
Los Ang
US
115
15
I need help in finding the most recent payment method, most recent invoice amount, no of count of invoices missing for current year (2023) and no of count of invoices missing for previous year(2022)
I have written query to find first few columns but unable to write further to get the above mentioned details
select c.customer_id,c.customer_account_number,c.customer_status,sr.country,max(i.invoice_date) as Latest receieved_Invoice_date
from
customer c,
invoice i,
supplier s,
supplier_Remit sr
where
c.customer_status='Active' and
sr.supplier_id=s.supplier_id and
c.supplier_remit_id=sr.supplier_remit_id and
c.customer_id=i.customer_id
group by
c.customer_id,c.customer_account_number,c.customer_status,sr.country;
My expected output would be as below
Customer_id
Cust_Acct_Num
Cust_Status
Country
Last_Inv_Rec_Date
1
1501
Active
US
01/01/2023
3
1503
Active
US
04/15/2022
4
1504
Active
US
10/31/2022
Latest_Paym_Method
Lastest_Inv_Amt
Count of Missing Inv for Curr Yr
Cash
100
0
Credit card
1000
1
Cash
250
1
Count of Missing Invoices for Prev Year
10
11
11
You can use MAX(...) KEEP (DENSE_RANK LAST ORDER BY invoice_date) to get values for the latest invoice and conditional aggregation to count the number of months where there are invoices and then subtract from the total number of months to find the missing invoices:
SELECT c.Customer_id,
c.Customer_Account_number,
c.Customer_Status,
r.country,
i.last_invoice_date,
i.latest_payment_method,
i.latest_invoice_amount,
EXTRACT(MONTH FROM SYSDATE) - COALESCE(i.missing_invoices_this_year, 0)
AS missing_invoices_this_year,
12 - COALESCE(i.missing_invoices_last_year, 0)
AS missing_invoices_last_year
FROM customer c
INNER JOIN supplier_remit r
ON (c.supplier_id = r.supplier_id)
LEFT OUTER JOIN (
SELECT customer_id,
MAX(invoice_date) AS last_invoice_date,
MAX(payment_method) KEEP (DENSE_RANK LAST ORDER BY invoice_date)
AS latest_payment_method,
MAX(invoice_amount) KEEP (DENSE_RANK LAST ORDER BY invoice_date)
AS latest_invoice_amount,
COUNT(
DISTINCT
CASE
WHEN invoice_date < SYSDATE
AND invoice_date >= TRUNC(SYSDATE, 'YY')
THEN TRUNC(invoice_date, 'MM')
END
) AS missing_invoices_this_year,
COUNT(
DISTINCT
CASE
WHEN invoice_date < TRUNC(SYSDATE, 'YY')
AND invoice_date >= ADD_MONTHS(TRUNC(SYSDATE, 'YY'), -12)
THEN TRUNC(invoice_date, 'MM')
END
) AS missing_invoices_last_year
FROM invoice
GROUP BY customer_id
) i
ON (c.customer_id = i.customer_id)
WHERE c.customer_status = 'Active';
Which, for the sample data:
CREATE TABLE customer (Customer_id, Customer_Account_number, Customer_Status, Supplier_id, Supplier_Remit_id) AS
SELECT 1, 1501, 'Active', 11, 111 FROM DUAL UNION ALL
SELECT 2, 1502, 'Inactive', 12, 112 FROM DUAL UNION ALL
SELECT 3, 1503, 'Active', 13, 113 FROM DUAL UNION ALL
SELECT 4, 1504, 'Active', 14, 114 FROM DUAL UNION ALL
SELECT 5, 1505, 'Inactive', 15, 115 FROM DUAL;
CREATE TABLE invoice (Invoice_Date, Invoice_Amount, Invoice_Number, Payment_Method, Customer_id) AS
SELECT DATE '2023-01-01', 100, 1000001, 'Cash', 1 FROM DUAL UNION ALL
SELECT DATE '2022-12-01', 150, 1000002, 'Credit Card', 1 FROM DUAL UNION ALL
SELECT DATE '2022-11-09', 200, 1000003, 'Credit Card', 1 FROM DUAL UNION ALL
SELECT DATE '2022-12-09', 300, 1000004, 'Cash', 2 FROM DUAL UNION ALL
SELECT DATE '2022-04-15', 1000, 1000005, 'Cash', 2 FROM DUAL UNION ALL
SELECT DATE '2022-04-15', 1000, 1000006, 'Credit Card', 3 FROM DUAL UNION ALL
SELECT DATE '2022-10-31', 250, 1000007, 'Cash', 4 FROM DUAL UNION ALL
SELECT DATE '2022-10-25', 250, 1000008, 'Cash', 4 FROM DUAL UNION ALL
SELECT DATE '2022-09-20', 130, 1000009, 'Credit Card', 5 FROM DUAL UNION ALL
SELECT DATE '2022-05-20', 120, 10000010, 'Credit Card', 5 FROM DUAL;
CREATE TABLE supplier (Supplier_Name, Supplier_id) AS
SELECT 'ABC', 11 FROM DUAL UNION ALL
SELECT 'ACCC', 12 FROM DUAL UNION ALL
SELECT 'ADEF', 13 FROM DUAL UNION ALL
SELECT 'AJKL', 14 FROM DUAL UNION ALL
SELECT 'AFLR', 15 FROM DUAL;
CREATE TABLE supplier_remit (City, Country, Supplier_Remit_id, Supplier_id) AS
SELECT 'Boston', 'US', 111, 11 FROM DUAL UNION ALL
SELECT 'Oak', 'US', 112, 12 FROM DUAL UNION ALL
SELECT 'Albany', 'US', 113, 13 FROM DUAL UNION ALL
SELECT 'Madison', 'US', 114, 14 FROM DUAL UNION ALL
SELECT 'Los Ang', 'US', 115, 15 FROM DUAL;
Outputs:
CUSTOMER_ID
CUSTOMER_ACCOUNT_NUMBER
CUSTOMER_STATUS
COUNTRY
LAST_INVOICE_DATE
LATEST_PAYMENT_METHOD
LATEST_INVOICE_AMOUNT
MISSING_INVOICES_THIS_YEAR
MISSING_INVOICES_LAST_YEAR
1
1501
Active
US
2023-01-01 00:00:00
Cash
100
0
10
3
1503
Active
US
2022-04-15 00:00:00
Credit Card
1000
1
11
4
1504
Active
US
2022-10-31 00:00:00
Cash
250
1
11
fiddle
In order to find what's missing, you have to first define what should be there, so you need to create a calendar of every month. Then you can use outer joins to the invoice table to find where there aren't any records for that month for that customer. There are lots of ways to write SQL to do this. Here's one:
WITH months AS(SELECT /*+ MATERIALIZE */ *
FROM (SELECT 'Current' year,
ADD_MONTHS(TRUNC(SYSDATE,'YYYY'),ROWNUM-1) month_start
FROM [any table with at least 12 rows]
WHERE ROWNUM <= 12)
WHERE month_start < SYSDATE
UNION ALL
SELECT 'Previous' year,
ADD_MONTHS(TRUNC(ADD_MONTHS(SYSDATE,-12),'YYYY'),ROWNUM-1)
FROM [any table with at least 12 rows]
WHERE ROWNUM <= 12)
SELECT customer.*,
inv.invoice_amount most_recent_invoice_amount,
inv.payment_method most_recent_payment_method,
(SELECT COUNT(*)
FROM months,
invoice
WHERE months.year = 'Current'
AND months.month_start = TRUNC(invoice_date(+),'MM')
AND invoice.customer_id(+) = customer.customer_id
AND invoice.customer_id IS NULL) missed_current_year_months,
(SELECT COUNT(*)
FROM months,
invoice
WHERE months.year = 'Previous'
AND months.month_start = TRUNC(invoice_date(+),'MM')
AND invoice.customer_id(+) = customer.customer_id
AND invoice.customer_id IS NULL) missed_previous_year_months
FROM customer
OUTER APPLY (SELECT invoice_amount,
payment_method
FROM (SELECT invoice_amount,
payment_method,
ROW_NUMBER() OVER (ORDER BY invoice_date DESC) seq
FROM invoice
WHERE invoice.customer_id = customer.customer_id)
WHERE seq = 1) inv

SQL select lapsed customers with 30 day frequency by day

The goal is to select the count of distinct customer_id's who have not made a purchase in the rolling 30 day period prior to every day in the calendar year 2016. I have created a calendar table in my database to join to.
Here is an example table for reference, let's say you have customers orders normalized as follows:
+-------------+------------+----------+
| customer_id | date | order_id |
+-------------+------------+----------+
| 123 | 01/25/2016 | 1000 |
+-------------+------------+----------+
| 123 | 04/27/2016 | 1025 |
+-------------+------------+----------+
| 444 | 02/02/2016 | 1010 |
+-------------+------------+----------+
| 521 | 01/23/2016 | 998 |
+-------------+------------+----------+
| 521 | 01/24/2016 | 999 |
+-------------+------------+----------+
The goal output is effectively a calendar with 1 row for every single day of 2016 with a count on each day of how many customers "lapsed" on that day, meaning their last purchase was 30 days or more prior from that day of the year. The final output will look like this:
+------------+--------------+
| date | lapsed_count |
+------------+--------------+
| 01/01/2016 | 0 |
+------------+--------------+
| 01/02/2016 | 0 |
+------------+--------------+
| ... | ... |
+------------+--------------+
| 03/01/2016 | 12 |
+------------+--------------+
| 03/02/2016 | 9 |
+------------+--------------+
| 03/03/2016 | 7 |
+------------+--------------+
This data does not exist in 2015, therefore it's not possible for Jan-01-2016 to have a count of lapsed customers because that is the first possible day to ever make a purchase.
So for customer_id #123, they purchased on 01/25/2016 and 04/27/2016. They should have 2 lapse counts because their purchases are more than 30 days apart. One lapse occurring on 2/24/2016 and another lapse on 05/27/2016.
Customer_id#444 only purchased once, so they should have one lapse count for 30 days after 02/02/2016 on 03/02/2016.
Customer_id#521 is tricky, since they purchased with a frequency of 1 day we will not count the first purchase on 03/02/2016, so there is only one lapse starting from their last purchase of 03/03/2016. The count for the lapse will occur on 04/02/2016 (+30 days).
If you have a table of dates, here is one expensive method:
select date,
sum(case when prev_date < date - 30 then 1 else 0 end) as lapsed
from (select c.date, o.customer_id, max(o.date) as prev_date
from calendar c cross join
(select distinct customer_id from orders) c left join
orders o
on o.date <= c.date and o.customer_id = c.customer_id
group by c.date, o.customer_id
) oc
group by date;
For each date/customer pair, it determines the latest purchase the customer made before the date. It then uses this information to count the lapsed.
To be honest, this will probably work well on a handful of dates, but not for a full year's worth.
Apologies, I didn't read your question properly the first time around. This query will give you all the lapses you have. It takes each order and uses an analytic function to work out the next order date - if the gap is greater than 30 days then a lapse is recorded
WITH
cust_orders (customer_id , order_date , order_id )
AS
(SELECT 1, TO_DATE('01/01/2016','DD/MM/YYYY'), 1001 FROM dual UNION ALL
SELECT 1, TO_DATE('29/01/2016','DD/MM/YYYY'), 1002 FROM dual UNION ALL
SELECT 1, TO_DATE('01/03/2016','DD/MM/YYYY'), 1003 FROM dual UNION ALL
SELECT 2, TO_DATE('01/01/2016','DD/MM/YYYY'), 1004 FROM dual UNION ALL
SELECT 2, TO_DATE('29/01/2016','DD/MM/YYYY'), 1005 FROM dual UNION ALL
SELECT 2, TO_DATE('01/04/2016','DD/MM/YYYY'), 1006 FROM dual UNION ALL
SELECT 2, TO_DATE('01/06/2016','DD/MM/YYYY'), 1007 FROM dual UNION ALL
SELECT 2, TO_DATE('01/08/2016','DD/MM/YYYY'), 1008 FROM dual UNION ALL
SELECT 3, TO_DATE('01/09/2016','DD/MM/YYYY'), 1009 FROM dual UNION ALL
SELECT 3, TO_DATE('01/12/2016','DD/MM/YYYY'), 1010 FROM dual UNION ALL
SELECT 3, TO_DATE('02/12/2016','DD/MM/YYYY'), 1011 FROM dual UNION ALL
SELECT 3, TO_DATE('03/12/2016','DD/MM/YYYY'), 1012 FROM dual UNION ALL
SELECT 3, TO_DATE('04/12/2016','DD/MM/YYYY'), 1013 FROM dual UNION ALL
SELECT 3, TO_DATE('05/12/2016','DD/MM/YYYY'), 1014 FROM dual UNION ALL
SELECT 3, TO_DATE('06/12/2016','DD/MM/YYYY'), 1015 FROM dual UNION ALL
SELECT 3, TO_DATE('07/12/2016','DD/MM/YYYY'), 1016 FROM dual
)
SELECT
customer_id
,order_date
,order_id
,next_order_date
,order_date + 30 lapse_date
FROM
(SELECT
customer_id
,order_date
,order_id
,LEAD(order_date) OVER (PARTITION BY customer_id ORDER BY order_date) next_order_date
FROM
cust_orders
)
WHERE NVL(next_order_date,sysdate) - order_date > 30
;
Now join that to a set of dates and run a COUNT function (enter the year parameter as YYYY) :
WITH
cust_orders (customer_id , order_date , order_id )
AS
(SELECT 1, TO_DATE('01/01/2016','DD/MM/YYYY'), 1001 FROM dual UNION ALL
SELECT 1, TO_DATE('29/01/2016','DD/MM/YYYY'), 1002 FROM dual UNION ALL
SELECT 1, TO_DATE('01/03/2016','DD/MM/YYYY'), 1003 FROM dual UNION ALL
SELECT 2, TO_DATE('01/01/2016','DD/MM/YYYY'), 1004 FROM dual UNION ALL
SELECT 2, TO_DATE('29/01/2016','DD/MM/YYYY'), 1005 FROM dual UNION ALL
SELECT 2, TO_DATE('01/04/2016','DD/MM/YYYY'), 1006 FROM dual UNION ALL
SELECT 2, TO_DATE('01/06/2016','DD/MM/YYYY'), 1007 FROM dual UNION ALL
SELECT 2, TO_DATE('01/08/2016','DD/MM/YYYY'), 1008 FROM dual UNION ALL
SELECT 3, TO_DATE('01/09/2016','DD/MM/YYYY'), 1009 FROM dual UNION ALL
SELECT 3, TO_DATE('01/12/2016','DD/MM/YYYY'), 1010 FROM dual UNION ALL
SELECT 3, TO_DATE('02/12/2016','DD/MM/YYYY'), 1011 FROM dual UNION ALL
SELECT 3, TO_DATE('03/12/2016','DD/MM/YYYY'), 1012 FROM dual UNION ALL
SELECT 3, TO_DATE('04/12/2016','DD/MM/YYYY'), 1013 FROM dual UNION ALL
SELECT 3, TO_DATE('05/12/2016','DD/MM/YYYY'), 1014 FROM dual UNION ALL
SELECT 3, TO_DATE('06/12/2016','DD/MM/YYYY'), 1015 FROM dual UNION ALL
SELECT 3, TO_DATE('07/12/2016','DD/MM/YYYY'), 1016 FROM dual
)
,calendar (date_value)
AS
(SELECT TO_DATE('01/01/'||:P_year,'DD/MM/YYYY') + (rownum -1)
FROM all_tables
WHERE rownum < (TO_DATE('31/12/'||:P_year,'DD/MM/YYYY') - TO_DATE('01/01/'||:P_year,'DD/MM/YYYY')) + 2
)
SELECT
calendar.date_value
,COUNT(*)
FROM
(
SELECT
customer_id
,order_date
,order_id
,next_order_date
,order_date + 30 lapse_date
FROM
(SELECT
customer_id
,order_date
,order_id
,LEAD(order_date) OVER (PARTITION BY customer_id ORDER BY order_date) next_order_date
FROM
cust_orders
)
WHERE NVL(next_order_date,sysdate) - order_date > 30
) lapses
,calendar
WHERE 1=1
AND calendar.date_value = TRUNC(lapses.lapse_date)
GROUP BY
calendar.date_value
;
Or if you really want every date printed out then use this :
WITH
cust_orders (customer_id , order_date , order_id )
AS
(SELECT 1, TO_DATE('01/01/2016','DD/MM/YYYY'), 1001 FROM dual UNION ALL
SELECT 1, TO_DATE('29/01/2016','DD/MM/YYYY'), 1002 FROM dual UNION ALL
SELECT 1, TO_DATE('01/03/2016','DD/MM/YYYY'), 1003 FROM dual UNION ALL
SELECT 2, TO_DATE('01/01/2016','DD/MM/YYYY'), 1004 FROM dual UNION ALL
SELECT 2, TO_DATE('29/01/2016','DD/MM/YYYY'), 1005 FROM dual UNION ALL
SELECT 2, TO_DATE('01/04/2016','DD/MM/YYYY'), 1006 FROM dual UNION ALL
SELECT 2, TO_DATE('01/06/2016','DD/MM/YYYY'), 1007 FROM dual UNION ALL
SELECT 2, TO_DATE('01/08/2016','DD/MM/YYYY'), 1008 FROM dual UNION ALL
SELECT 3, TO_DATE('01/09/2016','DD/MM/YYYY'), 1009 FROM dual UNION ALL
SELECT 3, TO_DATE('01/12/2016','DD/MM/YYYY'), 1010 FROM dual UNION ALL
SELECT 3, TO_DATE('02/12/2016','DD/MM/YYYY'), 1011 FROM dual UNION ALL
SELECT 3, TO_DATE('03/12/2016','DD/MM/YYYY'), 1012 FROM dual UNION ALL
SELECT 3, TO_DATE('04/12/2016','DD/MM/YYYY'), 1013 FROM dual UNION ALL
SELECT 3, TO_DATE('05/12/2016','DD/MM/YYYY'), 1014 FROM dual UNION ALL
SELECT 3, TO_DATE('06/12/2016','DD/MM/YYYY'), 1015 FROM dual UNION ALL
SELECT 3, TO_DATE('07/12/2016','DD/MM/YYYY'), 1016 FROM dual
)
,lapses
AS
(SELECT
customer_id
,order_date
,order_id
,next_order_date
,order_date + 30 lapse_date
FROM
(SELECT
customer_id
,order_date
,order_id
,LEAD(order_date) OVER (PARTITION BY customer_id ORDER BY order_date) next_order_date
FROM
cust_orders
)
WHERE NVL(next_order_date,sysdate) - order_date > 30
)
,calendar (date_value)
AS
(SELECT TO_DATE('01/01/'||:P_year,'DD/MM/YYYY') + (rownum -1)
FROM all_tables
WHERE rownum < (TO_DATE('31/12/'||:P_year,'DD/MM/YYYY') - TO_DATE('01/01/'||:P_year,'DD/MM/YYYY')) + 2
)
SELECT
calendar.date_value
,(SELECT COUNT(*)
FROM lapses
WHERE calendar.date_value = lapses.lapse_date
)
FROM
calendar
WHERE 1=1
ORDER BY
calendar.date_value
;
Here's how I'd do it:
WITH your_table AS (SELECT 123 customer_id, to_date('24/01/2016', 'dd/mm/yyyy') order_date, 12345 order_id FROM dual UNION ALL
SELECT 123 customer_id, to_date('24/01/2016', 'dd/mm/yyyy') order_date, 12346 order_id FROM dual UNION ALL
SELECT 123 customer_id, to_date('25/01/2016', 'dd/mm/yyyy') order_date, 12347 order_id FROM dual UNION ALL
SELECT 123 customer_id, to_date('24/02/2016', 'dd/mm/yyyy') order_date, 12347 order_id FROM dual UNION ALL
SELECT 123 customer_id, to_date('16/03/2016', 'dd/mm/yyyy') order_date, 12348 order_id FROM dual UNION ALL
SELECT 123 customer_id, to_date('18/04/2016', 'dd/mm/yyyy') order_date, 12349 order_id FROM dual UNION ALL
SELECT 456 customer_id, to_date('20/02/2016', 'dd/mm/yyyy') order_date, 12350 order_id FROM dual UNION ALL
SELECT 456 customer_id, to_date('01/03/2016', 'dd/mm/yyyy') order_date, 12351 order_id FROM dual UNION ALL
SELECT 456 customer_id, to_date('03/03/2016', 'dd/mm/yyyy') order_date, 12352 order_id FROM dual UNION ALL
SELECT 456 customer_id, to_date('18/04/2016', 'dd/mm/yyyy') order_date, 12353 order_id FROM dual UNION ALL
SELECT 456 customer_id, to_date('20/05/2016', 'dd/mm/yyyy') order_date, 12354 order_id FROM dual UNION ALL
SELECT 456 customer_id, to_date('23/06/2016', 'dd/mm/yyyy') order_date, 12355 order_id FROM dual UNION ALL
SELECT 456 customer_id, to_date('19/01/2017', 'dd/mm/yyyy') order_date, 12356 order_id FROM dual),
-- end of mimicking your_table with data in it
lapsed_info AS (SELECT customer_id,
order_date,
CASE WHEN TRUNC(SYSDATE) - order_date <= 30 THEN NULL
WHEN COUNT(*) OVER (PARTITION BY customer_id ORDER BY order_date RANGE BETWEEN 1 FOLLOWING AND 30 FOLLOWING) = 0 THEN order_date+30
ELSE NULL
END lapsed_date
FROM your_table),
dates AS (SELECT to_date('01/01/2016', 'dd/mm/yyyy') + LEVEL -1 dt
FROM dual
CONNECT BY to_date('01/01/2016', 'dd/mm/yyyy') + LEVEL -1 <= TRUNC(SYSDATE))
SELECT dates.dt,
COUNT(li.lapsed_date) lapsed_count
FROM dates
LEFT OUTER JOIN lapsed_info li ON dates.dt = li.lapsed_date
GROUP BY dates.dt
ORDER BY dates.dt;
Results:
DT LAPSED_COUNT
---------- ------------
01/01/2016 0
<snip>
23/01/2016 0
24/01/2016 0
25/01/2016 0
26/01/2016 0
<snip>
19/02/2016 0
20/02/2016 0
21/02/2016 0
22/02/2016 0
23/02/2016 0
24/02/2016 1
25/02/2016 0
<snip>
29/02/2016 0
01/03/2016 0
02/03/2016 0
03/03/2016 0
04/03/2016 0
<snip>
15/03/2016 0
16/03/2016 0
17/03/2016 0
<snip>
20/03/2016 0
21/03/2016 0
22/03/2016 0
<snip>
30/03/2016 0
31/03/2016 0
01/04/2016 0
02/04/2016 1
03/04/2016 0
<snip>
14/04/2016 0
15/04/2016 1
16/04/2016 0
17/04/2016 0
18/04/2016 0
19/04/2016 0
<snip>
17/05/2016 0
18/05/2016 2
19/05/2016 0
20/05/2016 0
21/05/2016 0
<snip>
18/06/2016 0
19/06/2016 1
20/06/2016 0
21/06/2016 0
22/06/2016 0
23/06/2016 0
24/06/2016 0
<snip>
22/07/2016 0
23/07/2016 1
24/07/2016 0
<snip>
18/01/2017 0
19/01/2017 0
20/01/2017 0
<snip>
08/02/2017 0
This takes your data, and uses an the analytic count function to work out the number of rows that have a value within 30 days of (but excluding) the current row's date.
Then we apply a case expression to determine that if the row has a date within 30 days of today's date, we'll count those as not lapsed. If a count of 0 was returned, then the row is considered lapsed and we'll output the lapsed date as the order_date plus 30 days. Any other count result means the row has not lapsed.
The above is all worked out in the lapsed_info subquery.
Then all we need to do is list the dates (see the dates subquery) and outer join the lapsed_info subquery to it based on the lapsed_date and then do a count of the lapsed dates for each day.

Determine contiguous date intervals

I have the following table structure:
id int -- more like a group id, not unique in the table
AddedOn datetime -- when the record was added
For a specific id there is at most one record each day. I have to write a query that returns contiguous (at day level) date intervals for each id.
The expected result structure is:
id int
StartDate datetime
EndDate datetime
Note that the time part of AddedOn is available but it is not important here.
To make it clearer, here is some input data:
with data as
(
select * from
(
values
(0, getdate()), --dummy record used to infer column types
(1, '20150101'),
(1, '20150102'),
(1, '20150104'),
(1, '20150105'),
(1, '20150106'),
(2, '20150101'),
(2, '20150102'),
(2, '20150103'),
(2, '20150104'),
(2, '20150106'),
(2, '20150107'),
(3, '20150101'),
(3, '20150103'),
(3, '20150105'),
(3, '20150106'),
(3, '20150108'),
(3, '20150109'),
(3, '20150110')
) as d(id, AddedOn)
where id > 0 -- exclude dummy record
)
select * from data
And the expected result:
id StartDate EndDate
1 2015-01-01 2015-01-02
1 2015-01-04 2015-01-06
2 2015-01-01 2015-01-04
2 2015-01-06 2015-01-07
3 2015-01-01 2015-01-01
3 2015-01-03 2015-01-03
3 2015-01-05 2015-01-06
3 2015-01-08 2015-01-10
Although it looks like a common problem I couldn't find a similar enough question. Also I'm getting closer to a solution and I will post it when (and if) it works but I feel that there should be a more elegant one.
Here's answer without any fancy joining, but simply using group by and row_number, which is not only simple but also more efficient.
WITH CTE_dayOfYear
AS
(
SELECT id,
AddedOn,
DATEDIFF(DAY,'20000101',AddedOn) dyID,
ROW_NUMBER() OVER (ORDER BY ID,AddedOn) row_num
FROM data
)
SELECT ID,
MIN(AddedOn) StartDate,
MAX(AddedOn) EndDate,
dyID-row_num AS groupID
FROM CTE_dayOfYear
GROUP BY ID,dyID - row_num
ORDER BY ID,2,3
The logic is that the dyID is based on the date so there are gaps while row_num has no gaps. So every time there is a gap in dyID, then it changes the difference between row_num and dyID. Then I simply use that difference as my groupID.
In Sql Server 2008 it is a little bit pain without LEAD and LAG functions:
WITH data
AS ( SELECT * ,
ROW_NUMBER() OVER ( ORDER BY id, AddedOn ) AS rn
FROM ( VALUES ( 0, GETDATE()), --dummy record used to infer column types
( 1, '20150101'), ( 1, '20150102'), ( 1, '20150104'),
( 1, '20150105'), ( 1, '20150106'), ( 2, '20150101'),
( 2, '20150102'), ( 2, '20150103'), ( 2, '20150104'),
( 2, '20150106'), ( 2, '20150107'), ( 3, '20150101'),
( 3, '20150103'), ( 3, '20150105'), ( 3, '20150106'),
( 3, '20150108'), ( 3, '20150109'), ( 3, '20150110') )
AS d ( id, AddedOn )
WHERE id > 0 -- exclude dummy record
),
diff
AS ( SELECT d1.* ,
CASE WHEN ISNULL(DATEDIFF(dd, d2.AddedOn, d1.AddedOn),
1) = 1 THEN 0
ELSE 1
END AS diff
FROM data d1
LEFT JOIN data d2 ON d1.id = d2.id
AND d1.rn = d2.rn + 1
),
parts
AS ( SELECT * ,
( SELECT SUM(diff)
FROM diff d2
WHERE d2.rn <= d1.rn
) AS p
FROM diff d1
)
SELECT id ,
MIN(AddedOn) AS StartDate ,
MAX(AddedOn) AS EndDate
FROM parts
GROUP BY id ,
p
Output:
id StartDate EndDate
1 2015-01-01 00:00:00.000 2015-01-02 00:00:00.000
1 2015-01-04 00:00:00.000 2015-01-06 00:00:00.000
2 2015-01-01 00:00:00.000 2015-01-04 00:00:00.000
2 2015-01-06 00:00:00.000 2015-01-07 00:00:00.000
3 2015-01-01 00:00:00.000 2015-01-01 00:00:00.000
3 2015-01-03 00:00:00.000 2015-01-03 00:00:00.000
3 2015-01-05 00:00:00.000 2015-01-06 00:00:00.000
3 2015-01-08 00:00:00.000 2015-01-10 00:00:00.000
Walkthrough:
diff
This CTE returns data:
1 2015-01-01 00:00:00.000 1 0
1 2015-01-02 00:00:00.000 2 0
1 2015-01-04 00:00:00.000 3 1
1 2015-01-05 00:00:00.000 4 0
1 2015-01-06 00:00:00.000 5 0
You are joining same table on itself to get the previous row. Then you calculate difference in days between current row and previous row and if the result is 1 day then pick 0 else pick 1.
parts
This CTE selects result from previous step and sums up the new column(it is a cumulative sum. sum of all values of new column from starting till current row), so you are getting partitions to group by:
1 2015-01-01 00:00:00.000 1 0 0
1 2015-01-02 00:00:00.000 2 0 0
1 2015-01-04 00:00:00.000 3 1 1
1 2015-01-05 00:00:00.000 4 0 1
1 2015-01-06 00:00:00.000 5 0 1
2 2015-01-01 00:00:00.000 6 0 1
2 2015-01-02 00:00:00.000 7 0 1
2 2015-01-03 00:00:00.000 8 0 1
2 2015-01-04 00:00:00.000 9 0 1
2 2015-01-06 00:00:00.000 10 1 2
2 2015-01-07 00:00:00.000 11 0 2
3 2015-01-01 00:00:00.000 12 0 2
3 2015-01-03 00:00:00.000 13 1 3
The last step is just a grouping by ID and new column and picking min and max values for dates.
I took the "Islands Solution #3 from SQL MVP Deep Dives" solution from https://www.simple-talk.com/sql/t-sql-programming/the-sql-of-gaps-and-islands-in-sequences/ and applied to your test data:
with
data as
(
select * from
(
values
(0, getdate()), --dummy record used to infer column types
(1, '20150101'),
(1, '20150102'),
(1, '20150104'),
(1, '20150105'),
(1, '20150106'),
(2, '20150101'),
(2, '20150102'),
(2, '20150103'),
(2, '20150104'),
(2, '20150106'),
(2, '20150107'),
(3, '20150101'),
(3, '20150103'),
(3, '20150105'),
(3, '20150106'),
(3, '20150108'),
(3, '20150109'),
(3, '20150110')
) as d(id, AddedOn)
where id > 0 -- exclude dummy record
)
,CTE_Seq
AS
(
SELECT
ID
,SeqNo
,SeqNo - ROW_NUMBER() OVER (PARTITION BY ID ORDER BY SeqNo) AS rn
FROM
data
CROSS APPLY
(
SELECT DATEDIFF(day, '20150101', AddedOn) AS SeqNo
) AS CA
)
SELECT
ID
,DATEADD(day, MIN(SeqNo), '20150101') AS StartDate
,DATEADD(day, MAX(SeqNo), '20150101') AS EndDate
FROM CTE_Seq
GROUP BY ID, rn
ORDER BY ID, StartDate;
Result set
ID StartDate EndDate
1 2015-01-01 00:00:00.000 2015-01-02 00:00:00.000
1 2015-01-04 00:00:00.000 2015-01-06 00:00:00.000
2 2015-01-01 00:00:00.000 2015-01-04 00:00:00.000
2 2015-01-06 00:00:00.000 2015-01-07 00:00:00.000
3 2015-01-01 00:00:00.000 2015-01-01 00:00:00.000
3 2015-01-03 00:00:00.000 2015-01-03 00:00:00.000
3 2015-01-05 00:00:00.000 2015-01-06 00:00:00.000
3 2015-01-08 00:00:00.000 2015-01-10 00:00:00.000
I'd recommend you to examine the intermediate results of CTE_Seq to understand how it actually works. Just put
select * from CTE_Seq
instead of the final SELECT ... GROUP BY .... You'll get this result set:
ID SeqNo rn
1 0 -1
1 1 -1
1 3 0
1 4 0
1 5 0
2 0 -1
2 1 -1
2 2 -1
2 3 -1
2 5 0
2 6 0
3 0 -1
3 2 0
3 4 1
3 5 1
3 7 2
3 8 2
3 9 2
Each date is converted into a sequence number by DATEDIFF(day, '20150101', AddedOn). ROW_NUMBER() generates a set of sequential numbers without gaps, so when these numbers are subtracted from a sequence with gaps the difference jumps/changes. The difference stays the same until the next gap, so in the final SELECT GROUP BY ID, rn brings all rows from the same island together.
Here is a simple solution that does not use analytics. I tend not to use analytics because I work with many different DBMSs and many don't (yet) have them emplemented and even those who do have different syntaxes. I just have the habit of writing generic code whenever possible.
with
Data( ID, AddedOn )as(
select 1, convert( date, '20150101' ) union all
select 1, '20150102' union all
select 1, '20150104' union all
select 1, '20150105' union all
select 1, '20150106' union all
select 2, '20150101' union all
select 2, '20150102' union all
select 2, '20150103' union all
select 2, '20150104' union all
select 2, '20150106' union all
select 2, '20150107' union all
select 3, '20150101' union all
select 3, '20150103' union all
select 3, '20150105' union all
select 3, '20150106' union all
select 3, '20150108' union all
select 3, '20150109' union all
select 3, '20150110'
)
select d.ID, d.AddedOn StartDate, IsNull( d1.AddedOn, '99991231' ) EndDate
from Data d
left join Data d1
on d1.ID = d.ID
and d1.AddedOn =(
select Min( AddedOn )
from data
where ID = d.ID
and AddedOn > d.AddedOn );
In your situation I assume that ID and AddedOn form a composite PK and so are indexed. Thus, the query will run impressively fast even on very large tables.
Also, I used the outer join because it seemed like the last AddedOn date of each ID should be seen in the StartDate column. Instead of NULL I used a common MaxDate value. The NULL could work just as well as a "this is the latest StartDate row" flag.
Here is the output for ID=1:
ID StartDate EndDate
----------- ---------- ----------
1 2015-01-01 2015-01-02
1 2015-01-02 2015-01-04
1 2015-01-04 2015-01-05
1 2015-01-05 2015-01-06
1 2015-01-06 9999-12-31
I'd like to post my own solution too because it's yet another approach:
with data as
(
...
),
temp as
(
select d.id
,d.AddedOn
,dprev.AddedOn as PrevAddedOn
,dnext.AddedOn as NextAddedOn
FROM data d
left JOIN
data dprev on dprev.id = d.id
and dprev.AddedOn = dateadd(d, -1, d.AddedOn)
left JOIN
data dnext on dnext.id = d.id
and dnext.AddedOn = dateadd(d, 1, d.AddedOn)
),
starts AS
(
select id
,AddedOn
from temp
where PrevAddedOn is NULL
),
ends as
(
select id
,AddedOn
from temp
where NextAddedon is NULL
)
SELECT s.id as id
,s.AddedOn as StartDate
,(select min(e.AddedOn) from ends e where e.id = s.id and e.AddedOn >= s.AddedOn) as EndDate
from starts s

Generate a result set for every day using values for current day or values from most recent day

Using start and end dates, we need to select the column values for each day or use values from the most recent date for which data exists.
for example, using this sample data
create table #t1
(location char(2),
item varchar(6),
postdate date,
posttime time,
qoh int)
insert #t1 select 'FL', 'itemA', '1/1/2014', '0900', 10
insert #t1 select 'FL', 'itemA', '1/1/2014', '0100', 11
insert #t1 select 'NY', 'itemA', '1/1/2014', '1100', 50
insert #t1 select 'NY', 'itemA', '1/1/2014', '0900', 51;
insert #t1 select 'FL', 'itemB', '1/1/2014', '0900', 100
insert #t1 select 'FL', 'itemB', '1/1/2014', '0100', 101
insert #t1 select 'NY', 'itemB', '1/1/2014', '1100', 150
insert #t1 select 'NY', 'itemB', '1/1/2014', '0900', 151;
insert #t1 select 'FL', 'itemA', '1/5/2014', '0900', 510
insert #t1 select 'FL', 'itemA', '1/5/2014', '0100', 511
insert #t1 select 'NY', 'itemA', '1/5/2014', '1100', 550
insert #t1 select 'NY', 'itemA', '1/5/2014', '0900', 551;
insert #t1 select 'FL', 'itemB', '1/5/2014', '0900', 5100
insert #t1 select 'FL', 'itemB', '1/5/2014', '0100', 5101
insert #t1 select 'NY', 'itemB', '1/5/2014', '1100', 5150
insert #t1 select 'NY', 'itemB', '1/5/2014', '0900', 5151;
I want to select each location, item and the last balance for all dates between 1/1 and 1/6. For dates without entries, such as 1/2, there aren't any records, so I want to use the values for last known day 1/1, and use this similar logic for all other dates.
my desired results are
date Location Item OHB2
2014-01-01 FL itemA 11
2014-01-01 FL itemB 101
2014-01-01 NY itemA 51
2014-01-01 NY itemB 151
2014-01-02 FL itemA 11
2014-01-02 FL itemB 101
2014-01-02 NY itemA 51
2014-01-02 NY itemB 151
2014-01-03 FL itemA 11
2014-01-03 FL itemB 101
2014-01-03 NY itemA 51
2014-01-03 NY itemB 151
2014-01-04 FL itemA 11
2014-01-04 FL itemB 101
2014-01-04 NY itemA 51
2014-01-04 NY itemB 151
2014-01-05 FL itemA 510
2014-01-05 FL itemB 5100
2014-01-05 NY itemA 550
2014-01-05 NY itemB 5150
2014-01-06 FL itemA 510
2014-01-06 FL itemB 5100
2014-01-06 NY itemA 550
2014-01-06 NY itemB 5150
This is what I've tried so far, which works, until I start using larger date ranges. It then begins to run very slowly in my actual database where there 1 million+ rows are involved. When the SELECT TOP portion is commented out, it runs very quickly.
WITH dates AS
(
SELECT CAST('1/1/2014' AS DATE) 'date',
1 AS RN
UNION ALL
SELECT DATEADD(day, 1, D.date),
1 AS RN
FROM dates D
WHERE DATEADD(dd, 1, D.date) <= '1/6/2014'
)
SELECT dates.[date],
I.Location,
I.Item,
(SELECT TOP 1 #t1.qoh FROM #t1
WHERE #t1.location = I.Location
AND #t1.item = I.Item
AND #t1.postdate <= dates.[date]
ORDER BY #t1.postdate DESC, #t1.posttime DESC) AS OHB2
FROM dates
INNER JOIN
(
SELECT 1 AS RN2,
#t1.location AS Location,
#t1.item AS Item
FROM #t1
GROUP BY #t1.location,
#t1.item) I
ON dates.RN = I.RN2
ORDER BY dates.[date] ASC, I.Location ASC, I.Item ASC
OPTION (MAXRECURSION 32767)
Using your dates CTE.
;WITH dates AS
(
SELECT CAST('1/1/2014' AS DATE) 'date',
1 AS RN
UNION ALL
SELECT DATEADD(day, 1, D.date),
1 AS RN
FROM dates D
WHERE DATEADD(dd, 1, D.date) <= '1/6/2014'
)
SELECT d.[date], I.location, I.item, i.qoh
FROM dates d
CROSS JOIN (SELECT DISTINCT location, item
FROM #t1) a
CROSS APPLY (SELECT TOP 1 *
FROM #t1 t
WHERE t.postdate <= d.date
AND a.location = t.location
AND a.item = t.item
ORDER BY t.postdate
DESC) i
ORDER BY i.postdate

Get most recent Price for each Item

I have a table:
ItemID PurchaseDate Price
001 03/17/2013 19.00
002 03/17/2013 14.00
001 03/18/2013 13.00
002 03/18/2013 15.00
001 03/19/2013 17.00
003 03/19/2013 19.00
I need to write a SQL query to get the Price corresponding to the latest PurchaseDate for each ItemID.
Entries in table might not necessarily be entered ordered by date
Like this:
ItemID PurchaseDate Price
001 03/19/2013 17.00
002 03/18/2013 15.00
003 03/19/2013 19.00
The idea behind the subquery is it separately gets the latest PurchaseDate for each ItemID. The result of the subquery is then joined back on the table provided that it matches on two conditions: ItemID and PurchaseDate.
SELECT a.*
FROM TableName a
INNER JOIN
(
SELECT ItemID, MAX(PurchaseDate) max_date
FROM TableName
GROUP BY ItemID
) b ON a.ItemID = b.ItemID AND
a.PurchaseDate = b.max_date
-- WITH clause, works with Oracle.
-- I added this clause to dynamically run the SELECT statement without any DDL.
-- Ignore this section for use on MS Access
WITH v AS (
SELECT 001 ItemID, TO_DATE('03/17/2013', 'MM/DD/YYYY') PurchaseDate, 19.00 Price FROM dual
UNION ALL
SELECT 002, TO_DATE('03/17/2013', 'MM/DD/YYYY'), 14.00 FROM dual
UNION ALL
SELECT 001, TO_DATE('03/18/2013', 'MM/DD/YYYY'), 13.00 FROM dual
UNION ALL
SELECT 002, TO_DATE('03/18/2013', 'MM/DD/YYYY'), 15.00 FROM dual
UNION ALL
SELECT 001, TO_DATE('03/19/2013', 'MM/DD/YYYY'), 17.00 FROM dual
UNION ALL
SELECT 003, TO_DATE('03/19/2013', 'MM/DD/YYYY'), 19.00 FROM dual
)
-- The WITH clause was upto here.
-- Below starts the main query which works on most platforms including MS Access.
-- I have referenced to the same table "v" two times - v_in and v_out.
-- You will need to change the "v" with your table name.
SELECT v_out.itemid, v_out.purchasedate, v_out.price
FROM v v_out
WHERE EXISTS (SELECT 1
FROM v v_in
WHERE v_in.itemid = v_out.itemid
GROUP BY v_in.itemid
HAVING MAX(v_in.purchasedate) = v_out.purchasedate)
ORDER BY v_out.itemid
;