Create column of new fields from daily fields in SQL Presto

Create column of new fields from daily fields in SQL Presto - sql

I would like to calculate the new visitors based on all the visitors that visit each day. Right now the only data I have available is the first 2 columns, so I need to extrapolate the last 2 columns based on the first 2 columns.
Date
Visitors_Today
New_Visitors
All_Visitors_To_Date
Dec 6
Allie, Jon
Allie, Jon
Allie, Jon
Dec 7
Allie, Jon, Zach
Zach
Allie, Jon, Zach
Dec 8
Barb, Jon
Barb
Allie, Barb, Jon, Zach
Dec 9
Janet, Zach
Janet
Allie, Barb, Janet, Jon, Zach
This is what I have so far to create the first and second column
WITH visitor_log_response AS (
SELECT
CAST(JSON_PARSE(visitor_log) AS MAP<VARCHAR, VARCHAR>) AS visitor_map,
date
FROM visitor_log_response_table
),
names_and_dates AS (
SELECT DISTINCT
visitor_name AS visitor_name,
date
FROM visitor_log_response
CROSS JOIN UNNEST(visitor_map) AS u(visitor_name, visitor_age)
),
visitor_names AS (
SELECT
date,
ARRAY_JOIN(
ARRAY_AGG(
visitor_name
ORDER BY
visitor_name
),
','
) visitors_today,
FROM names_and_dates
GROUP BY
date
ORDER BY
date DESC
)
SELECT
date,
visitors_today
FROM visitor_names
Which results in this
Date
Visitors_Today
Dec 6
Allie, Jon
Dec 7
Allie, Jon, Zach
Dec 8
Barb, Jon
Dec 9
Janet, Zach
If the table is normalized using this query
SELECT ds, visitors_today_split
FROM previous_table
CROSS JOIN UNNEST(SPLIT(visitors_today),',')) as (visitors_today_split)
I would have this output
Date
Visitors_Today
Dec 6
Allie
Dec 6
Jon
Dec 7
Allie
Dec 7
Jon
Dec 7
Zach
Dec 8
Barb
Dec 8
Jon
Dec 9
Janet
Dec 9
Zach

You can use window functions with array aggregation (remove ARRAY_JOIN from visitor_names CTE):
-- sample data
with dataset(date, visitors_today) as (
values ('Dec 6', array['Allie', 'Jon']),
('Dec 7', array['Allie', 'Jon', 'Zach']),
('Dec 8', array['Barb', 'Jon']),
('Dec 9', array['Janet', 'Zach'])
)
-- query
select date,
visitors_today,
array_distinct(visitors_today || prev_visitors) all_visitors_to_date,
array_except(visitors_today, prev_visitors) new_visitors
from (
select *,
coalesce(
flatten(array_distinct(array_agg(visitors_today)
over (order by date rows between UNBOUNDED PRECEDING and 1 PRECEDING))),
array[]) as prev_visitors -- combine all visitors before today into non null array
from dataset);
Output:
date
visitors_today
all_visitors_to_date
new_visitors
Dec 6
[Allie, Jon]
[Allie, Jon]
[Allie, Jon]
Dec 7
[Allie, Jon, Zach]
[Allie, Jon, Zach]
[Zach]
Dec 8
[Barb, Jon]
[Barb, Jon, Allie, Zach]
[Barb]
Dec 9
[Janet, Zach]
[Janet, Zach, Allie, Jon, Barb]
[Janet]
Note that arrays can be not that optimal type in terms of performance and is limited to 10000 elements in Presto/Trino.

You can flatten your arrays and perform new aggregations in subqueries:
select t.*, (select array_join(array_agg(t1.v), ', ') from
(select v from unnest(t.visitors_today) v
except
select v from tbl t2 cross join unnest(t2.visitors_today) v
where t2.date < t.date) t1),
(select array_join(array_distinct(array_agg(v)), ', ')
from tbl t1 cross join unnest(t1.visitors_today)) v
where t1.date <= t.date)
from tbl t
See fiddle (Demo of query above in Postgres).

Beginning from normalized table, you can aggregate on a ranking value on visitors over dates (first time visit corresponds to ranking = 1), then use a window function on new visitors field to get your increasing visitors.
If you want strings (fiddle):
WITH cte AS (
SELECT *, ROW_NUMBER() OVER(PARTITION BY Visitors_Today ORDER BY Date_) AS rn
FROM tab
), cte2 AS (
SELECT Date_,
STRING_AGG(Visitors_Today, ', ') AS Visitors_Today,
STRING_AGG(Visitors_Today, ', ') FILTER (WHERE rn = 1) AS New_Visitors
FROM cte
GROUP BY Date_
)
SELECT *,
STRING_AGG(New_Visitors, ', ') OVER(ORDER BY Date_) AS All_Visitors_To_Date
FROM cte2
If you want arrays (fiddle):
WITH cte AS (
SELECT *, ROW_NUMBER() OVER(PARTITION BY Visitors_Today ORDER BY Date_) AS rn
FROM tab
), cte2 AS (
SELECT Date_,
ARRAY_AGG(Visitors_Today) AS Visitors_Today,
ARRAY_AGG(Visitors_Today) FILTER (WHERE rn = 1) AS New_Visitors
FROM cte
GROUP BY Date_
)
SELECT DISTINCT Date_, Visitors_Today, New_Visitors, ARRAY_AGG(elements) OVER(ORDER BY Date_) AS All_Visitors_To_Date
FROM cte2, UNNEST(New_Visitors) AS elements
ORDER BY Date_

Related

Bigquery - How to Calculate the sum of two continuous rows

How can I get the sum of two rows clubbed together for instance If I have 5 rows in total, I should get 3 rows a result.
Below is my table:
2020-08-01 1
2020-08-02 3
2020-08-03 4
2020-08-04 2
2020-08-05 4
I want to achive this:
4
6
4
August 1 and 2 = 4
August 3 and 4 = 6
August 5 = 4

You could use ROW_NUMBER here:
WITH cte AS (
SELECT dt, val, ROW_NUMBER() OVER (ORDER BY dt) rn
FROM yourTable
)
SELECT SUM(val)
FROM cte
GROUP BY FLOOR((rn - 1) / 2)
GROUP BY MIN(dt);
Here is a demo link, shown in SQL Server, but whose logic should also be working for BigQuery:
Demo

Below is for Bigquery Standard SQL
#standardSQL
SELECT SUM(value) AS value,
STRING_AGG(FORMAT_DATE('%B %d', day), ' and ') || ' = ' || CAST(SUM(value) AS STRING) AS calc
FROM (
SELECT day, value, DIV(ROW_NUMBER() OVER(ORDER BY day) - 1, 2) grp
FROM `project.dataset.table` t
)
GROUP BY grp
ORDER BY grp
You can test, play with above using sample data from your question as in below example
#standardSQL
WITH `project.dataset.table` AS (
SELECT DATE '2020-08-01' day, 1 value UNION ALL
SELECT '2020-08-02', 3 UNION ALL
SELECT '2020-08-03', 4 UNION ALL
SELECT '2020-08-04', 2 UNION ALL
SELECT '2020-08-05', 4
)
SELECT SUM(value) AS value,
STRING_AGG(FORMAT_DATE('%B %d', day), ' and ') || ' = ' || CAST(SUM(value) AS STRING) AS calc
FROM (
SELECT day, value, DIV(ROW_NUMBER() OVER(ORDER BY day) - 1, 2) grp
FROM `project.dataset.table` t
)
GROUP BY grp
ORDER BY grp
with output
Row value calc
1 4 August 01 and August 02 = 4
2 6 August 03 and August 04 = 6
3 4 August 05 = 4

SQL: How to create a weekly user count summary by month

I’m trying to create a week over week active user count summary report/table aggregated by month. I have one table for June 2017 and one table for May 2016 which I need to join together in order to. The date timestamp is created_utc which is a UNIX timestamp which I can figure out to transform into a human-readable format and from there extract the week of the year value so 1 through 52. The questions I have are:
Number the weeks just by values of 1 through 4. So, week 1 for June, Week 1 for May, Week 2 for June week 2 for May and so on.
Joining the tables based by those weeks 1 through 4 values
Pivoting the table and adding a WOW Change variable.
I'd like the final table to look like this:
W
| Week | June_count | May_count |WOW_Change |
|:-----------|:-----------:|:------------:|:----------:
| Week_1 | 5 | 8 | 0.6 |
| Week_2 | 2 | 1 | -0.5 |
| Week_3 | 10 | 5 | -0.5 |
| Week_4 | 30 | 6 | 1 |
Below is some sample data as well as the code I've started.
CREATE TABLE June
(created_utc int, id varchar(6))
;
INSERT INTO June
(created_utc, userid)
VALUES
(1496354167, '6eq4xf'),
(1496362973, '6eqzz3'),
(1496431934, '6ewlm8'),
(1496870877, '6fwied'),
(1496778080, '6fo79k'),
(1496933893, '6g1gcg'),
(1497154559, '6gjkid'),
(1497618561, '6hmeud'),
(1497377349, '6h1osm'),
(1497221017, '6god73'),
(1497731470, '6hvmic'),
(1497273130, '6gs4ay'),
(1498080798, '6ioz8q'),
(1497769316, '6hyer4'),
(1497415729, '6h5cgu'),
(1497978764, '6iffwq')
;
CREATE TABLE May
(created_utc int, id varchar(6))
;
INSERT INTO May
(created_utc, userid)
VALUES
(1493729491, '68sx7k'),
(1493646801, '68m2s2'),
(1493747285, '68uohf'),
(1493664087, '68ntss'),
(1493690759, '68qe5k'),
(1493829196, '691fy9'),
(1493646344, '68m1dv'),
(1494166859, '69rhkl'),
(1493883023, '6963qb'),
(1494362328, '6a83wv'),
(1494525998, '6alv6c'),
(1493945230, '69bkhb'),
(1494050355, '69jqtz'),
(1494418011, '6accd0'),
(1494425781, '6ad0xm'),
(1494024697, '69hx2z'),
(1494586576, '6aql9y')
;
#standardSQL
SELECT created_utc,
DATE(TIMESTAMP_SECONDS(created_utc)) as event_date,
CAST(EXTRACT(WEEK FROM TIMESTAMP_SECONDS(created_utc)) AS STRING) AS week_number,
COUNT(distinct userid) as user_count
FROM June
SELECT created_utc,
DATE(TIMESTAMP_SECONDS(created_utc)) as event_date,
CAST(EXTRACT(WEEK FROM TIMESTAMP_SECONDS(created_utc)) AS STRING) AS week_number,
COUNT(distinct userid) as user_count
FROM May

Below is for BigQuery Standard SQL
#standardSQL
SELECT
CONCAT('Week_', CAST(week AS STRING)) Week,
June.user_count AS June_count,
May.user_count AS May_count,
ROUND((May.user_count - June.user_count) / June.user_count, 2) AS WOW_Change
FROM (
SELECT COUNT(DISTINCT userid) user_count,
DIV(EXTRACT(DAY FROM DATE(TIMESTAMP_SECONDS(created_utc))) - 1, 7) + 1 week
FROM `project.dataset.June`
GROUP BY week
) June
JOIN (
SELECT COUNT(DISTINCT userid) user_count,
DIV(EXTRACT(DAY FROM DATE(TIMESTAMP_SECONDS(created_utc))) - 1, 7) + 1 week
FROM `project.dataset.May`
GROUP BY week
) May
USING(week)
You can test, play with above using sample data from your question as in example below
#standardSQL
WITH `project.dataset.June` AS (
SELECT 1496354167 created_utc, '6eq4xf' userid UNION ALL
SELECT 1496362973, '6eqzz3' UNION ALL
SELECT 1496431934, '6ewlm8' UNION ALL
SELECT 1496870877, '6fwied' UNION ALL
SELECT 1496778080, '6fo79k' UNION ALL
SELECT 1496933893, '6g1gcg' UNION ALL
SELECT 1497154559, '6gjkid' UNION ALL
SELECT 1497618561, '6hmeud' UNION ALL
SELECT 1497377349, '6h1osm' UNION ALL
SELECT 1497221017, '6god73' UNION ALL
SELECT 1497731470, '6hvmic' UNION ALL
SELECT 1497273130, '6gs4ay' UNION ALL
SELECT 1498080798, '6ioz8q' UNION ALL
SELECT 1497769316, '6hyer4' UNION ALL
SELECT 1497415729, '6h5cgu' UNION ALL
SELECT 1497978764, '6iffwq'
), `project.dataset.May` AS (
SELECT 1493729491 created_utc, '68sx7k' userid UNION ALL
SELECT 1493646801, '68m2s2' UNION ALL
SELECT 1493747285, '68uohf' UNION ALL
SELECT 1493664087, '68ntss' UNION ALL
SELECT 1493690759, '68qe5k' UNION ALL
SELECT 1493829196, '691fy9' UNION ALL
SELECT 1493646344, '68m1dv' UNION ALL
SELECT 1494166859, '69rhkl' UNION ALL
SELECT 1493883023, '6963qb' UNION ALL
SELECT 1494362328, '6a83wv' UNION ALL
SELECT 1494525998, '6alv6c' UNION ALL
SELECT 1493945230, '69bkhb' UNION ALL
SELECT 1494050355, '69jqtz' UNION ALL
SELECT 1494418011, '6accd0' UNION ALL
SELECT 1494425781, '6ad0xm' UNION ALL
SELECT 1494024697, '69hx2z' UNION ALL
SELECT 1494586576, '6aql9y'
)
SELECT
CONCAT('Week_', CAST(week AS STRING)) Week,
June.user_count AS June_count,
May.user_count AS May_count,
ROUND((May.user_count - June.user_count) / June.user_count, 2) AS WOW_Change
FROM (
SELECT COUNT(DISTINCT userid) user_count,
DIV(EXTRACT(DAY FROM DATE(TIMESTAMP_SECONDS(created_utc))) - 1, 7) + 1 week
FROM `project.dataset.June`
GROUP BY week
) June
JOIN (
SELECT COUNT(DISTINCT userid) user_count,
DIV(EXTRACT(DAY FROM DATE(TIMESTAMP_SECONDS(created_utc))) - 1, 7) + 1 week
FROM `project.dataset.May`
GROUP BY week
) May
USING(week)
-- ORDER BY week
with result (as sample data is limited to just first two weeks result is also showing two weeks only which should not be an issue when you apply it to real data)
Row Week June_count May_count WOW_Change
1 Week_1 5 12 1.4
2 Week_2 6 5 -0.17

Use arithmetic on the day of the month to get the week:
SELECT j.weeknumber, j.user_count as june_user_count,
m.user_count as may_user_count
FROM (SELECT (EXTRACT(DAY FROM DATE(TIMESTAMP_SECONDS(created_utc))) - 1) / 7 as week_number,
COUNT(distinct userid) as user_count
FROM June
GROUP BY week_number
) j JOIN
(SELECT (EXTRACT(DAY FROM DATE(TIMESTAMP_SECONDS(created_utc))) - 1) / 7 as week_number,
COUNT(distinct userid) as user_count
FROM May
GROUP BY week_number
) m
ON m.week_number = j.week_number;
Note that splitting data into different tables just based on the date is bad idea. The data should all go into one table, perhaps partitioned if data volume is an issue.

Join Table to Itself for Last Year Results with Missing Dates

I have a table of dates, channels, and sessions and I am trying to use a join command to add columns for each row containing the relevant value last year, however, I want to include the dates from last year that have no value this year and vice versa. The problem is that for dates that don't exist, I am getting doubling of the rows. Any thoughts on how to fix?
SELECT
ty.*,
ly.Date as Date_LY,
ly.Sessions as Sessions_LY
FROM
`testjoin` AS ty
FULL JOIN
`testjoin` as ly
ON
ly.Date = DATE_SUB(ty.Date, INTERVAl 1 YEAR)
AND ly.Channel = ty.Channel
Data:
Date Channel Sessions
01/01/2017 Email 5
02/02/2017 Email 10
01/01/2018 Email 11
02/02/2018 Email 17
01/01/2017 Organic 10
02/02/2017 Organic 15
01/01/2018 Organic 20
Desired Output:
Date Channel Sessions Sessions_LY
01/01/2017 Email 5 null
02/02/2017 Email 10 null
01/01/2018 Email 11 5
02/02/2018 Email 17 10
01/01/2017 Organic 10 null
02/02/2017 Organic 15 null
01/01/2018 Organic 20 10
02/02/2018 Organic null 15
Actual Output:
Date Channel Sessions Sessions_LY
01/01/2017 Organic 10
02/02/2017 Email 10
02/02/2017 Organic 15
01/01/2017 Email 5
01/01/2018 Email 11 5
01/01/2018 Organic 20 10
02/02/2018 Email 17 10
15
11
20
17

I think you want a cross join to generate the rows and a left join to bring in the values:
SELECT d.Date, c.Channel, ty.Sessions, ty_prev.Sessions
FROM (SELECT DISTINCT ty.Date
FROM testjoin ty
) d CROSS JOIN
(SELECT DISTINCT ty.channel FROM testjoin ty) c LEFT JOIN
testjoin ty
ON ty.Date = d.Date AND ty.Channel = c.Channel LEFT JOIN
testjoin ty_prev
ON ty_prev.Date = d.date - interval 1 year and ty.Channel = c.Channel;

Play with datepart as per your needs
with t (date, channel, sessions) as
(
select '01/01/2017', 'Email', 5 union all
select '02/02/2017', 'Email', 10 union all
select '01/01/2018', 'Email', 11 union all
select '02/02/2018', 'Email', 17 union all
select '01/01/2017', 'Organic', 10 union all
select '02/02/2017', 'Organic', 15 union all
select '01/01/2018', 'Organic', 20
)
select *, lag(sessions) over (partition by d.channel, datepart(mm, d.date) order by d.channel, datepart(mm, d.date)) l
from (select * from ((SELECT DISTINCT t.Date
FROM t) d
CROSS JOIN
(SELECT DISTINCT t.channel FROM t) c)) d left join t on d.Date = t.Date and d.channel = t.channel
order by d.channel, datepart(yyyy,d.date), datepart(mm, d.date)

Everything in your question indicates that you have only current(2018) and previous (2017) so below is based on this assumption and is for BigQuery Standard SQL
#standardSQL
WITH temp AS (
SELECT PARSE_DATE('%m/%d/%Y', Date) Date, Channel, Sessions
FROM `project.dataset.your_table`
), all_days AS (
SELECT Date, Channel FROM temp UNION DISTINCT
SELECT DATE_ADD(Date, INTERVAL 1 YEAR), Channel
FROM temp WHERE EXTRACT(YEAR FROM Date) = 2017
), all_data AS (
SELECT Date, Channel, Sessions, FORMAT_DATE('%m%d', Date) day
FROM all_days
LEFT JOIN temp USING(Date, Channel)
)
SELECT Date, Channel, Sessions,
LAG(Sessions) OVER(PARTITION BY day, Channel ORDER BY Date) Sessions_LY
FROM all_data
You can test / play with above using dummy data from your question as below
#standardSQL
WITH `project.dataset.your_table` AS (
SELECT '01/01/2017' Date, 'Email' Channel, 5 Sessions UNION ALL
SELECT '02/02/2017', 'Email', 10 UNION ALL
SELECT '01/01/2018', 'Email', 11 UNION ALL
SELECT '02/02/2018', 'Email', 17 UNION ALL
SELECT '01/01/2017', 'Organic', 10 UNION ALL
SELECT '02/02/2017', 'Organic', 15 UNION ALL
SELECT '01/01/2018', 'Organic', 20
), temp AS (
SELECT PARSE_DATE('%m/%d/%Y', Date) Date, Channel, Sessions
FROM `project.dataset.your_table`
), all_days AS (
SELECT Date, Channel FROM temp UNION DISTINCT
SELECT DATE_ADD(Date, INTERVAL 1 YEAR), Channel
FROM temp WHERE EXTRACT(YEAR FROM Date) = 2017
), all_data AS (
SELECT Date, Channel, Sessions, FORMAT_DATE('%m%d', Date) day
FROM all_days
LEFT JOIN temp USING(Date, Channel)
)
SELECT Date, Channel, Sessions,
LAG(Sessions) OVER(PARTITION BY day, Channel ORDER BY Date) Sessions_LY
FROM all_data
ORDER BY 2, 1
result is
Row Date Channel Sessions Sessions_LY
1 2017-01-01 Email 5 null
2 2017-02-02 Email 10 null
3 2018-01-01 Email 11 5
4 2018-02-02 Email 17 10
5 2017-01-01 Organic 10 null
6 2017-02-02 Organic 15 null
7 2018-01-01 Organic 20 10
8 2018-02-02 Organic null 15

To subtract a previous row value in SQL Server 2012

This is SQL Query
SELECT
ROW_NUMBER() OVER (ORDER BY (SELECT 1)) [Sno],
_Date,
SUM(Payment) Payment
FROM
DailyPaymentSummary
GROUP BY
_Date
ORDER BY
_Date
This returns output like this
Sno _Date Payment
---------------------------
1 2017-02-02 46745.80
2 2017-02-03 100101.03
3 2017-02-06 140436.17
4 2017-02-07 159251.87
5 2017-02-08 258807.51
6 2017-02-09 510986.79
7 2017-02-10 557399.09
8 2017-02-13 751405.89
9 2017-02-14 900914.45
How can I get the additional column like below
Sno _Date Payment Diff
--------------------------------------
1 02/02/2017 46745.80 46745.80
2 02/03/2017 100101.03 53355.23
3 02/06/2017 140436.17 40335.14
4 02/07/2017 159251.87 18815.70
5 02/08/2017 258807.51 99555.64
6 02/09/2017 510986.79 252179.28
7 02/10/2017 557399.09 46412.30
8 02/13/2017 751405.89 194006.80
9 02/14/2017 900914.45 149508.56
I have tried the following query but not able to solve the error
WITH cte AS
(
SELECT
ROW_NUMBER() OVER (ORDER BY (SELECT 1)) [Sno],
_Date,
SUM(Payment) Payment
FROM
DailyPaymentSummary
GROUP BY
_Date
ORDER BY
_Date
)
SELECT
t.Payment,
t.Payment - COALESCE(tprev.col, 0) AS diff
FROM
DailyPaymentSummary t
LEFT OUTER JOIN
t tprev ON t.seqnum = tprev.seqnum + 1;
Can anyone help me?

Use a order by with column(s) to get consistent results.
Use lag function to get data from previous row and do the subtraction like this:
with t
as (
select ROW_NUMBER() over (order by _date) [Sno],
_Date,
sum(Payment) Payment
from DailyPaymentSummary
group by _date
)
select *,
Payment - lag(Payment, 1, 0) over (order by [Sno]) diff
from t;

You can use lag() to get previous row values
coalesce(lag(sum_payment_col) OVER (ORDER BY (SELECT 1)),0)

How to duplicate data in sql with conditions

I havea table as table_A . table_A includes these columns
-CountryName
-Min_Date
-Max_Date
-Number
I want to duplicate data with seperating by months. For example
Argentina | 2015-01-04 | 2015-04-07 | 100
England | 2015-02-08 | 2015-03-11 | 90
I want to see a table as this (Monthly seperated)
Argentina | 01-2015 | 27 //(days to end of the min_date's month)
Argentina | 02-2015 | 29 //(days full month)
Argentina | 03-2015 | 31 //(days full month)
Argentina | 04-2015 | 7 //(days from start of the max_date's month)
England | 02-2015 | 21 //(days)
England | 03-2015 | 11 //(days)
I tried too much thing to made this for each records. But now my brain is so confusing and my project is delaying.
Does anybody know how can i solve this. I tried to duplicate each rows with datediff count but it is not working
WITH cte AS (
SELECT CountryName, ISNULL(DATEDIFF(M,Min_Date ,Max_Date )+1,1) as count FROM table_A
UNION ALL
SELECT CountryName, count-1 FROM cte WHERE count>1
)
SELECT CountryName,count FROM cte

-Generate all the dates between min and max dates for each country.
-Then get the month start and month end dates for each country,year,month.
-Finally get the date differences of the month start and month end.
WITH cte AS (
SELECT Country, min_date dt,min_date,max_date FROM t
UNION ALL
SELECT Country, dateadd(dd,1,dt),min_date,max_date FROM cte WHERE dt < max_date
)
,monthends as (
SELECT country,year(dt) yr,month(dt) mth,max(dt) monthend,min(dt) monthstart
FROM cte
GROUP BY country,year(dt),month(dt))
select country
,cast(mth as varchar(2))+'-'+cast(yr as varchar(4)) yr_month
,datediff(dd,monthstart,monthend)+1 days_diff
from monthends
Sample Demo
EDIT: Another option would be to generate all the dates once (the example shown here generates 51 years of dates from 2000 to 2050) and then joining it to the table to get the days by month.
WITH cte AS (
SELECT cast('2000-01-01' as date) dt,cast('2050-12-31' as date) maxdt
UNION ALL
SELECT dateadd(dd,1,dt),maxdt FROM cte WHERE dt < maxdt
)
SELECT country,year(dt) yr,month(dt) mth, datediff(dd,min(dt),max(dt))+1 days_diff
FROM cte c
JOIN t on c.dt BETWEEN t.min_date and t.max_date
GROUP BY country,year(dt),month(dt)
OPTION (MAXRECURSION 0)

I think you have the right idea. But you need to construct the months:
WITH cte AS (
SELECT CountryName, Min_Date as dte, Min_Date, Max_Date
FROM table_A
UNION ALL
SELECT CountryName, DATEADD(month, 1, dte), Min_Date, Max_Date
FROM cte
WHERE dte < Max_date
)
SELECT CountryName, dte
FROM cte;
Getting the number of days in the month is a bit more complicated. That requires some thought.
Oh, I forgot about EOMONTH():
select countryName, dte,
(case when dte = min_date
then datediff(day, min_date, eomonth(dte)) + 1
when dte = max_date
then day(dte)
else day(eomonth(dte))
end) as days
from cte;

Using a Calendar Table makes this stuff pretty easy. RexTester: http://rextester.com/EBTIMG23993
begin
create table #enderaric (
CountryName varchar(16)
, Min_Date date
, Max_Date date
, Number int
)
insert into #enderaric values
('Argentina' ,'2015-01-04' ,'2015-04-07' ,'100')
, ('England' ,'2015-02-08' ,'2015-03-11' ,'90')
end;
-- select * from #enderaric
--*/"
declare #FromDate date;
declare #ThruDate date;
set #FromDate = '2015-01-01';
set #ThruDate = '2015-12-31';
with x as (
select top (cast(sqrt(datediff(day, #FromDate, #ThruDate)) as int) + 1)
[number]
from [master]..spt_values v
)
/* Date Range CTE */
,cal as (
select top (1+datediff(day, #FromDate, #ThruDate))
DateValue = convert(date,dateadd(day,
row_number() over (order by x.number)-1,#FromDate)
)
from x cross join x as y
order by DateValue
)
select
e.CountryName
, YearMonth = convert(char(7),left(convert(varchar(10),DateValue),7))
, [Days]=count(c.DateValue)
from #enderaric as e
inner join cal c on c.DateValue >= e.min_date
and c.DateValue <= e.max_date
group by
e.CountryName
, e.Min_Date
, e.Max_Date
, e.Number
, convert(char(7),left(convert(varchar(10),DateValue),7))
results in:
CountryName YearMonth Days
---------------- --------- -----------
Argentina 2015-01 28
Argentina 2015-02 28
Argentina 2015-03 31
Argentina 2015-04 7
England 2015-02 21
England 2015-03 11
More about calendar tables:
Aaron Bertrand - Generate a set or sequence without loops
generate-a-set-1
generate-a-set-2
generate-a-set-3
David Stein - Creating a Date Table/Dimension on SQL 2008
Michael Valentine Jones - F_TABLE_DATE

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

Create column of new fields from daily fields in SQL Presto - sql

Related

Bigquery - How to Calculate the sum of two continuous rows

SQL: How to create a weekly user count summary by month

Join Table to Itself for Last Year Results with Missing Dates

To subtract a previous row value in SQL Server 2012

How to duplicate data in sql with conditions

Categories

Resources