conditional running sum - sql

I'm trying to return the number of unique users that converted over time.
So I have the following query:
WITH CTE
As
(
SELECT '2020-04-01' as date,'userA' as user,1 as goals Union all
SELECT '2020-04-01','userB',0 Union all
SELECT '2020-04-01','userC',0 Union all
SELECT '2020-04-03','userA',1 Union all
SELECT '2020-04-05','userC',1 Union all
SELECT '2020-04-06','userC',0 Union all
SELECT '2020-04-06','userB',0
)
select
date,
COUNT(DISTINCT
IF
(goals >= 1,
user,
NULL)) AS cad_converters
from CTE
group by date
I'm trying to count distinct user but I need to find a way to apply the distinct count to the whole date. I probably need to do something like a cumulative some...
expected result would be something like this
date, goals, total_unique_converted_users
'2020-04-01',1,1
'2020-04-01',0,1
'2020-04-01',0,1
'2020-04-03',1,2
'2020-04-05',1,2
'2020-04-06',0,2
'2020-04-06',0,2

Below is for BigQuery Standard SQL
#standardSQL
SELECT t.date, t.goals, total_unique_converted_users
FROM `project.dataset.table` t
LEFT JOIN (
SELECT a.date,
COUNT(DISTINCT IF(b.goals >= 1, b.user, NULL)) AS total_unique_converted_users
FROM `project.dataset.table` a
CROSS JOIN `project.dataset.table` b
WHERE a.date >= b.date
GROUP BY a.date
)
USING(date)

I would approach this by tagging when the first goal is scored for each name. Then simply do a cumulative sum:
select cte.* except (seqnum), countif(seqnum = 1) over (order by date)
from (select cte.*,
(case when goals = 1 then row_number() over (partition by user, goals order by date) end) as seqnum
from cte
) cte;
I realize this can be expressed without the case in the subquery:
select cte.* except (seqnum), countif(seqnum = 1 and goals = 1) over (order by date)
from (select cte.*,
row_number() over (partition by user, goals order by date) as seqnum
from cte
) cte;

Related

Get range of dates from dates record in MS SQL

I have dates record
with DateTable (dateItem) as
(
select '2022-07-03' union all
select '2022-07-05' union all
select '2022-07-04' union all
select '2022-07-09' union all
select '2022-07-12' union all
select '2022-07-13' union all
select '2022-07-18'
)
select dateItem
from DateTable
order by 1 asc
I want to get ranges of dates between this record like this
with DateTableRange (dateItemStart, dateItemend) as
(
select '2022-07-03','2022-07-05' union all
select '2022-07-09','2022-07-09' union all
select '2022-07-12','2022-07-13' union all
select '2022-07-18','2022-07-18'
)
select dateItemStart, dateItemend
from DateTableRange
I am able to do it in SQL with looping using while or looping by getting first one and check the next dates and if they are 1 plus then I add it in enddate and do the same in loop
But I don't know what the best or optimized way is, as there were lots of looping and temp tables involve
Edited :
as in data we have 3,4,5 and 6,7,8 is missing so range is 3-5
9 exist and 10 is missing so range is 9-9
so ranges is purely depend on the consecutive data in datetable
Any suggestion will be appreciated
With some additional clarity this requires a gaps-and-islands approach to first identify adjacent rows as groups, from which you can then use a window to identify the first and last value of each group.
I'm sure this could be refined further but should give your desired results:
with DateTable (dateItem) as
(
select '2022-07-03' union all
select '2022-07-05' union all
select '2022-07-04' union all
select '2022-07-09' union all
select '2022-07-12' union all
select '2022-07-13' union all
select '2022-07-18'
), valid as (
select *,
case when exists (
select * from DateTable d2 where Abs(DateDiff(day, d.dateitem, d2.dateitem)) = 1
) then 1 else 0 end v
from DateTable d
), grp as (
select *,
Row_Number() over(order by dateitem) - Row_Number()
over (partition by v order by dateitem) g
from Valid v
)
select distinct
Iif(v = 0, dateitem, First_Value(dateitem) over(partition by g order by dateitem)) DateItemStart,
Iif(v = 0, dateitem, First_Value(dateitem) over(partition by g order by dateitem desc)) DateItemEnd
from grp
order by dateItemStart;
See Demo Fiddle
After clarification, this is definitely a 'gaps and islands' problem.
The solution can be like this
WITH DateTable(dateItem) AS
(
SELECT * FROM (
VALUES
('2022-07-03'),
('2022-07-05'),
('2022-07-04'),
('2022-07-09'),
('2022-07-12'),
('2022-07-13'),
('2022-07-18')
) t(v)
)
SELECT
MIN(dateItem) AS range_from,
MAX(dateItem) AS range_to
FROM (
SELECT
*,
SUM(CASE WHEN DATEADD(day, 1, prev_dateItem) >= dateItem THEN 0 ELSE 1 END) OVER (ORDER BY rn) AS range_id
FROM (
SELECT
ROW_NUMBER() OVER (ORDER BY dateItem) AS rn,
CAST(dateItem AS date) AS dateItem,
CAST(LAG(dateItem) OVER (ORDER BY dateItem) AS date) AS prev_dateItem
FROM DateTable
) groups
) islands
GROUP BY range_id
You can check a working demo

PostgreSQL Percent Change using Row Number

I'm trying to find the percent change using row number with PostgreSQL but I'm running into an error where my "percent_change" column shows 0.
Here is what I have as my code.
WITH CTE AS (
SELECT date, sales, ROW_NUMBER() OVER (ORDER by date) AS rn
FROM sales_2019)
SELECT c1.date, c1.sales,
CAST(COALESCE (((c1.sales - c2.sales) * 1.0 / c2.sales) * 100, 0) AS INT) AS percent_change
FROM CTE AS c1
LEFT JOIN CTE AS c2
ON c1.date = c2.date AND c1.rn = c2.rn + 1
Here is my SQL table in case it's needed. Thank you in advance, I greatly appreciate it.
You can use LAG() for your requirement:
select
date,
sales,
round(coalesce((((sales-(lag(sales) over (order by date)))*1.0)/(lag(sales) over (order by date)))*100,0),2)
from sales_2019
or you can try with WITH clause
with cte as ( select
date,
sales,
coalesce(lag(sales) over (order by date),0) as previous_month
from sales_2019
)
select
date,
sales,
round( coalesce( (sales-previous_month)*1.0/nullif(previous_month,0),0 )*100,2)
from cte
DEMO
EDIT as per requirement in comment
with cte as ( select
date_,
sales,
ROW_NUMBER() OVER (ORDER by date_) AS rn1,
ROW_NUMBER() OVER (ORDER by date_)-1 AS rn2
from sales_2019
)
select t1.date_,
t1.sales,
round( coalesce( (t1.sales-t2.sales)*1.0/nullif(t2.sales,0),0 )*100,2)
from cte t1 left join cte t2 on t1.rn2=t2.rn1
DEMO

SQLite Getting multiple results with LIMIT 1

I have the following problem.
Part of a task is to determine the visitor(s) with the most money spent between 2000 and 2020.
It just looks like this.
SELECT UserEMail FROM Visitor
JOIN Ticket ON Visitor.UserEMail = Ticket.VisitorUserEMail
where Ticket.Date> date('2000-01-01') AND Ticket.Date < date ('2020-12-31')
Group by Ticket.VisitorUserEMail
order by SUM(Price) DESC;
Is it possible to output more than one person if both have spent the same amount?
Use rank():
SELECT VisitorUserEMail
FROM (SELECT VisitorUserEMail, SUM(PRICE) as sum_price,
RANK() OVER (ORDER BY SUM(Price) DESC) as seqnum
FROM Ticket t
WHERE t.Date >= date('2000-01-01') AND Ticket.Date <= date('2021-01-01')
GROUP BY t.VisitorUserEMail
) t
WHERE seqnum = 1;
Note: You don't need the JOIN, assuming that ticket buyers are actually visitors. If that assumption is not true, then use the JOIN.
Use a CTE that returns all the total prices for each email and with NOT EXISTS select the rows with the top total price:
WITH cte AS (
SELECT VisitorUserEMail, SUM(Price) SumPrice
FROM Ticket
WHERE Date >= '2000-01-01' AND Date <= '2020-12-31'
GROUP BY VisitorUserEMail
)
SELECT c.VisitorUserEMail
FROM cte c
WHERE NOT EXISTS (
SELECT 1 FROM cte
WHERE SumPrice > c.SumPrice
)
or:
WITH cte AS (
SELECT VisitorUserEMail, SUM(Price) SumPrice
FROM Ticket
WHERE Date >= '2000-01-01' AND Date <= '2020-12-31'
GROUP BY VisitorUserEMail
)
SELECT VisitorUserEMail
FROM cte
WHERE SumPrice = (SELECT MAX(SumPrice) FROM cte)
Note that you don't need the function date() because the result of date('2000-01-01') is '2000-01-01'.
Also I think that the conditions in the WHERE clause should include the =, right?

How to get the validity date range of a price from individual daily prices in SQL

I have some prices for the month of January.
Date,Price
1,100
2,100
3,115
4,120
5,120
6,100
7,100
8,120
9,120
10,120
Now, the o/p I need is a non-overlapping date range for each price.
price,from,To
100,1,2
115,3,3
120,4,5
100,6,7
120,8,10
I need to do this using SQL only.
For now, if I simply group by and take min and max dates, I get the below, which is an overlapping range:
price,from,to
100,1,7
115,3,3
120,4,10
This is a gaps-and-islands problem. The simplest solution is the difference of row numbers:
select price, min(date), max(date)
from (select t.*,
row_number() over (order by date) as seqnum,
row_number() over (partition by price, order by date) as seqnum2
from t
) t
group by price, (seqnum - seqnum2)
order by min(date);
Why this works is a little hard to explain. But if you look at the results of the subquery, you will see how the adjacent rows are identified by the difference in the two values.
SELECT Lag.price,Lag.[date] AS [From], MIN(Lead.[date]-Lag.[date])+Lag.[date] AS [to]
FROM
(
SELECT [date],[Price]
FROM
(
SELECT [date],[Price],LAG(Price) OVER (ORDER BY DATE,Price) AS LagID FROM #table1 A
)B
WHERE CASE WHEN Price <> ISNULL(LagID,1) THEN 1 ELSE 0 END = 1
)Lag
JOIN
(
SELECT [date],[Price]
FROM
(
SELECT [date],Price,LEAD(Price) OVER (ORDER BY DATE,Price) AS LeadID FROM [#table1] A
)B
WHERE CASE WHEN Price <> ISNULL(LeadID,1) THEN 1 ELSE 0 END = 1
)Lead
ON Lag.[Price] = Lead.[Price]
WHERE Lead.[date]-Lag.[date] >= 0
GROUP BY Lag.[date],Lag.[price]
ORDER BY Lag.[date]
Another method using ROWS UNBOUNDED PRECEDING
SELECT price, MIN([date]) AS [from], [end_date] AS [To]
FROM
(
SELECT *, MIN([abc]) OVER (ORDER BY DATE DESC ROWS UNBOUNDED PRECEDING ) end_date
FROM
(
SELECT *, CASE WHEN price = next_price THEN NULL ELSE DATE END AS abc
FROM
(
SELECT a.* , b.[date] AS next_date, b.price AS next_price
FROM #table1 a
LEFT JOIN #table1 b
ON a.[date] = b.[date]-1
)AA
)BB
)CC
GROUP BY price, end_date

Grouping duplicate rows and calculating effective and end dates

As per the attached sample, I have data of repeated rows with different date values. I would like to combine the duplicate records to reduce the number of rows and at the same time would like to calculate the end date of record.
“CountryCode” column should be used to combine the records and value changes in “CountryRiskLevel” or “RegionRiskLevel” columns should be used to define the start and end date ranges.
Database - SQL Server.
Try this query, I used slightly different sample data, but query will work for you as well:
;with SampleData as(
select 1 CountryCode,
1 RegionCode,
5 CountryRiskLevel,
5 RegionRiskLevel,
CONVERT(date, '2018-01-01') EffectiveDate
union all
select 1,1,5,5,CONVERT(date, '2018-01-02')
union all
select 1,1,5,5,CONVERT(date, '2018-01-03')
union all
select 1,1,5,5,CONVERT(date, '2018-01-04')
union all
select 1,1,2,2,CONVERT(date, '2018-01-05')
union all
select 1,1,5,5,CONVERT(date, '2018-01-06')
union all
select 1,1,5,5,CONVERT(date, '2018-01-07')
union all
select 1,1,5,3,CONVERT(date, '2018-01-08')
union all
select 1,1,5,5,CONVERT(date, '2018-01-09')
union all
select 1,1,5,5,CONVERT(date, '2018-01-10')
union all
select 1,1,5,5,CONVERT(date, '2018-01-11')
)
select CountryCode,
RegionCode,
CountryRiskLevel,
RegionRiskLevel,
MIN(effectiveDate) EffecticeStartDate,
case when MAX(effectiveDate) = MIN(effectiveDate) then MAX(dt) else MAX(effectiveDate) end EffectiveEndDate
from (
select *,
ROW_NUMBER() over (partition by CountryCode, RegionCode, CountryRiskLevel, RegionRiskLevel order by EffectiveDate) rn1,
ROW_NUMBER() over (order by EffectiveDate) rn2,
case when COUNT(*) over (partition by countrycode, RegionCode, CountryRiskLevel, RegionRiskLevel) = 1
then LEAD(effectivedate) over (order by effectivedate) end dt
from SampleData
) a group by CountryCode, RegionCode, CountryRiskLevel, RegionRiskLevel, rn2 - rn1