Top 25 Pageviews in each Country SQL - sql

I am looking for the top 25 blog search in each country
Please help me out in this. Thanks in advance
with Result as ( select
sum(Pageviews) Total_Page,page_path,date
,case
when "PROFILE" = 44399579 then 'India'
when "PROFILE" = 36472271 then 'China'
when "PROFILE" = 41751607 then 'Russia'
else null
end COUNTRY,
Dense_rank() over(PARTITION BY Country order by sum(Pageviews) desc) as Test
From ""GOOGLE_ANALYTICS_PHASE1"."PAGES"
where PAGE_PATH like '%blog%' //and PAGE_PATH = '/blog?category_id=8&page=3'
group by page_path,country,date)
select top 100 Total_Page,
page_path,country,test,date
from result
where test <= 25 and Date between '2022-05-01' and '2022-05-31'

Snowflake SQL:
If you want the TOP 25 page views, per country only for the pages in date period defined.
using this fake data:
with PAGES(pageviews, profile, page_path, date) as (
select * from values
(100, 44399579, 'blog1', '2022-05-31'::date),
(1000, 44399579, 'blog1', '2022-05-30'::date),
(200, 44399579, 'blog2', '2022-05-31'::date),
(2000, 44399579, 'blog2', '2022-04-01'::date)
)
and with 25 changed to 1 to "show it working"
SELECT
b.total_page
,b.page_path
,b.date
,b.country
FROM (
SELECT a.*
,SUM(total_page) over(partition by country, page_path) as tt
FROM (
SELECT
SUM(pageviews) AS total_page
,page_path
,date
,CASE profile
WHEN 44399579 THEN 'United States'
WHEN 36472271 THEN 'New Zealand'
WHEN 41751607 THEN 'Australia'
ELSE null
END AS country
FROM pages //"FIVETRAN_DATABASE_COMVITA"."GOOGLE_ANALYTICS_PHASE1"."PAGES"
WHERE page_path LIKE '%blog%'
AND Date BETWEEN '2022-05-01' AND '2022-05-31'
//and PAGE_PATH = '/blog?category_id=8&page=3'
GROUP BY 2,3,4
) as A
) as B
QUALIFY DENSE_RANK() OVER (PARTITION BY country ORDER BY tt desc) <= 1
gives:
TOTAL_PAGE
PAGE_PATH
DATE
COUNTRY
100
blog1
2022-05-31
United States
1,000
blog1
2022-05-30
United States
where-as if you want all time top pages, but only showing the top pages in the current period..
SELECT
b.total_page
,b.page_path
,b.date
,b.country
FROM (
SELECT a.*
,SUM(total_page) over(partition by country, page_path) as tt
FROM (
SELECT
SUM(pageviews) AS total_page
,page_path
,date
,CASE profile
WHEN 44399579 THEN 'United States'
WHEN 36472271 THEN 'New Zealand'
WHEN 41751607 THEN 'Australia'
ELSE null
END AS country
FROM pages //"FIVETRAN_DATABASE_COMVITA"."GOOGLE_ANALYTICS_PHASE1"."PAGES"
WHERE page_path LIKE '%blog%'
//and PAGE_PATH = '/blog?category_id=8&page=3'
GROUP BY 2,3,4
) as A
) as B
WHERE Date BETWEEN '2022-05-01' AND '2022-05-31'
QUALIFY DENSE_RANK() OVER (PARTITION BY country ORDER BY tt desc) <= 1
now returns:
TOTAL_PAGE
PAGE_PATH
DATE
COUNTRY
200
blog2
2022-05-31
United States
Because blog2 has the all time record, but the 200 views is the only one in the window of interest.

Related

Update value based on value from another record of same table

Here I have a sample table of a website visitors. As we can see, sometimes visitor don't provide their email. Also they may switch to different email addresses over period.
**
Original table:
**
I want to update this table with following requirements:
First time when a visitor provides an email, all his past visits will be tagged to that email
Also all his future visits will be tag to that email until he switches to another email.
**
Expected table after update:
**
I was wondering if there is a way of doing it in Redshift or T-Sql?
Thanks everyone!
In SQL Server or Redshift, you can use a subquery to calculate the email:
select t.*,
coalesce(email,
max(email) over (partition by visitor_id, grp),
max(case when activity_date = first_email_date then email end) over (partition by visitor_id)
)
from (select t.*,
min(case when email is not null then activity_date end) over
(partition by visitor_id order by activity_date rows between unbounded preceding and current row) as first_email_date,
count(email) over (partition by visitor_id order by activity_date between unbounded preceding and current row) as grp
from t
) t;
You can then use this in an update:
update t
set emai = tt.imputed_email
from (select t.,
coalesce(email,
max(email) over (partition by visitor_id, grp),
max(case when activity_date = first_email_date then email end) over (partition by visitor_id)
) as imputed_email
from (select t.,
min(case when email is not null then activity_date end) over
(partition by visitor_id order by activity_date) as first_email_date,
count(email) over (partition by visitor_id order by activity_date) as grp
from t
) t
) tt
where tt.visitor_id = t.visitor_id and tt.activity_date = t.activity_date and
t.email is null;
If we suppose that the name of the table is Visits and the primary key of that table is made of the columns Visitor_id and Activity_Date then you can do in T-SQL following:
using correlated subquery:
update a
set a.Email = coalesce(
-- select the email used previously
(
select top 1 Email from Visits
where Email is not null and Activity_Date < a.Activity_Date and Visitor_id = a.Visitor_id
order by Activity_Date desc
),
-- if there was no email used previously then select the email used next
(
select top 1 Email from Visits
where Email is not null and Activity_Date > a.Activity_Date and Visitor_id = a.Visitor_id
order by Activity_Date
)
)
from Visits a
where a.Email is null;
using window function to provide the ordering:
update v
set Email = vv.Email
from Visits v
join (
select
v.Visitor_id,
coalesce(a.Email, b.Email) as Email,
v.Activity_Date,
row_number() over (partition by v.Visitor_id, v.Activity_Date
order by a.Activity_Date desc, b.Activity_Date) as Row_num
from Visits v
-- previous visits with email
left join Visits a
on a.Visitor_id = v.Visitor_id
and a.Email is not null
and a.Activity_Date < v.Activity_Date
-- next visits with email if there are no previous visits
left join Visits b
on b.Visitor_id = v.Visitor_id
and b.Email is not null
and b.Activity_Date > v.Activity_Date
and a.Visitor_id is null
where v.Email is null
) vv
on vv.Visitor_id = v.Visitor_id
and vv.Activity_Date = v.Activity_Date
where
vv.Row_num = 1;
For each visitor_id you can update the null email value with the previus non-null value. In case there is none, you will use the next non-null value.You can get those values as follows:
select
v.*, v_prev.email prev_email, v_next.email next_email
from
visits v
left join visits v_prev on v.visitor_id = v_prev.visitor_id
and v_prev.activity_date = (select max(v2.activity_date) from visits v2 where v2.visitor_id = v.visitor_id and v2.activity_date < v.activity_date and v2.email is not null)
left join visits v_next on v.visitor_id = v_next.visitor_id
and v_next.activity_date = (select min(v2.activity_date) from visits v2 where v2.visitor_id = v.visitor_id and v2.activity_date > v.activity_date and v2.email is not null)
where
v.email is null

SQL : How to select the most recent value by country

I have a table with 3 columns : day, country, value. There are many values by country with different dates. For example :
DAY COUNTRY VALUE
04-SEP-19 BELGIUM 2124
15-MAR-19 BELGIUM 2135
21-MAY-19 SPAIN 1825
18-JUL-19 SPAIN 1724
26-MAR-19 ITALY 4141
I want to select the most recent value by country. For example :
DAY COUNTRY VALUE
04-SEP-19 BELGIUM 2124
18-JUL-19 SPAIN 1724
26-MAR-19 ITALY 4141
What is the sql query I can use?
Thank you for your help.
You can use the row_number() window function (if your DBMS supports it)).
SELECT x.day,
x.country,
x.value
FROM (SELECT t.day,
t.country,
t.value,
row_number() OVER (PARTITION BY t.country
ORDER BY t.day DESC) rn
FROM elbat t) x
WHERE x.rn = 1;
Another way of doing this is using a window function (SQL Server, MySQL8 etc)
e.g.
ROW_NUMBER() OVER ( PARTITION BY COUNTRY ORDER BY CONVERT(DATE, [Day]) DESC )
Then just filter to where this function returns 1
full example:
WITH TestData
AS ( SELECT '04-SEP-19' AS [Day], 'BELGIUM' AS [COUNTRY], 2124 AS [VALUE]
UNION
SELECT '15-MAR-19' AS [Day], 'BELGIUM' AS [COUNTRY], 2135 AS [VALUE]
UNION
SELECT '21-MAY-19' AS [Day], 'SPAIN' AS [COUNTRY], 1825 AS [VALUE]
UNION
SELECT '18-JUL-19' AS [Day], 'SPAIN' AS [COUNTRY], 1724 AS [VALUE]
UNION
SELECT '26-MAR-19' AS [Day], 'ITALY' AS [COUNTRY], 4141 AS [VALUE] ),
TestDataRanked
AS ( SELECT *,
ROW_NUMBER() OVER ( PARTITION BY COUNTRY ORDER BY CONVERT(DATE, [Day]) DESC ) AS SelectionRank
FROM TestData )
SELECT [Day],
COUNTRY,
[VALUE]
FROM TestDataRanked
WHERE SelectionRank = 1;
I understand the problem as you want the most recent value for all countries, as the country can repeat in the table(?):
select distinct t1.DAY, t1.COUNTRY, t1.VALUE
FROM day_test t1
inner join day_test t2 on t1.day in
(select max(day) from day_test t3 where t1.country = t3.country )
and t1.country = t2.country
I tested it and it works.
Let's suppose that the type of day column is date.
In the subquery, you can find the tuple of (country, max date) and to add the value, you can join as mentioned in the comments or use IN
SELECT DISTINCT day, country, value
FROM yourTable
WHERE (country, day)
in (
SELECT country, MAX(day) as day
FROM yourTable
GROUP BY country, value
)
You can use the following query:
Just replace the TABLE_NAME with the name of your table.
SELECT
COUNTRY,
VALUE,
MAX(DATE) AS "MostRecent"
FROM TABLE_NAME
GROUP BY COUNTRY;

Percentage increase in price based on latest transaction date and previous transaction date

,
I want to get the percentage increase in price by Country and City based on latest transaction date and date of Previous Transaction.
How can I Query this? I am not getting it. This is What I have tried:
SELECT Country,City, Price
From tbl
Group by Country,City
Percentage increase = [( Latest Price - Previous Price ) / Previous Price] * 100
Expected Outout:
Unique Country and City Name + Percentage increase in Price.
Country | City | Percentage
This might be overly complicated.
Set up some random data:
IF OBJECT_ID('tempdb..#Cities') IS NOT NULL
BEGIN
DROP TABLE #Cities;
END;
CREATE TABLE #Cities
(
Country VARCHAR(20)
, City VARCHAR(20)
);
IF OBJECT_ID('tempdb..#Data') IS NOT NULL
BEGIN
DROP TABLE #Data;
END;
CREATE TABLE #Data
(
Country VARCHAR(20)
, City VARCHAR(20)
, Price DECIMAL(13, 4)
, Date DATETIME
);
INSERT INTO #Cities
VALUES ('Country 1', 'City 1'), ('Country 1', 'City 2'), ('Country 1', 'City 3'), ('Country 2', 'City 4'), ('Country 2', 'City 5');
INSERT INTO #Data
SELECT Country
, City
, ROUND(RAND(CHECKSUM(NEWID())) * 100, 4) AS Price
, DATEADD(DAY, ROUND(RAND(CHECKSUM(NEWID())) * 10, 0), GETDATE()) AS Date
FROM #Cities
UNION
SELECT Country
, City
, ROUND(RAND(CHECKSUM(NEWID())) * 100, 4)
, DATEADD(DAY, ROUND(RAND(CHECKSUM(NEWID())) * 10, 0), GETDATE())
FROM #Cities;
--Delete duplicate dates
WITH data3 AS
(
SELECT *,ROW_NUMBER() OVER (PARTITION BY Country,City,Date ORDER BY Country,City,Date) AS RN
FROM #Data
)
DELETE FROM data3 WHERE RN<>1
Query the data to get the most recent price, date and percentage increase:
SELECT Dates.*
, Latest.Price AS Latestprice
, Previous.Price AS Previousprice
, ((Latest.Price - Previous.Price) / Previous.Price) * 100 AS Percentageincrease
FROM
(
SELECT C.*
, Latestdate.Latestdate
, Previousdate.Previousdate
FROM #Cities AS C
LEFT JOIN
(
--Latest Date for each county, city
SELECT Country
, City
, MAX(Date) AS Latestdate
FROM #Data
GROUP BY Country
, City
) AS Latestdate ON Latestdate.Country = C.Country
AND Latestdate.City = C.City
LEFT JOIN
(
--Previous Date for each county, city
SELECT Country
, City
, Date AS Previousdate
FROM
(
SELECT Country
, City
, Date
, RANK() OVER(PARTITION BY Country
, City ORDER BY Date DESC) AS Rank
FROM #Data
) AS A
WHERE Rank = 2
) AS Previousdate ON Previousdate.Country = C.Country
AND Previousdate.City = C.City
) AS Dates
JOIN #Data AS Latest ON Latest.Country = Dates.Country
AND Latest.City = Dates.City
AND Latest.Date = Dates.Latestdate
JOIN #Data AS Previous ON Previous.Country = Dates.Country
AND Previous.City = Dates.City
AND Previous.Date = Dates.Previousdate
And for comparison, using lag() to get the percentage increase for each date. Similar to Gordon's answer:
SELECT D.Country
, D.City
, D.Date
, Lag(Date) OVER(PARTITION BY Country
, City ORDER BY Date) AS Previousdate
, D.Price
, Lag(Price) OVER(PARTITION BY Country
, City ORDER BY Date) AS Previousprice
, 100 * (Price / Lag(Price) OVER(PARTITION BY Country
, City ORDER BY Date) - 1) AS PercentageIncrease
FROM #Data AS D;
Using lag to get the same results (latest info per city) as my first query:
SELECT *
FROM
(
SELECT D.Country
, D.City
, D.Date
, Lag(Date) OVER(PARTITION BY Country
, City ORDER BY Date) AS Previousdate
, D.Price
, Lag(Price) OVER(PARTITION BY Country
, City ORDER BY Date) AS Previousprice
, 100 * (Price / Lag(Price) OVER(PARTITION BY Country
, City ORDER BY Date) - 1) AS Percentageincrease
, ROW_NUMBER() OVER(PARTITION BY Country
, City ORDER BY Date DESC) AS Rn
FROM #Data AS D
) AS A
WHERE Rn = 1
ORDER BY Country
, City;
Use lag():
select t.*,
100 * ((price / lag(price) over (partition by country, city order by t_date) - 1) as increase
from t;

How to find latest status of the day in SQL Server

I have a SQL Server question that I'm trying to figure out at work:
There is a table with a status field which can contain a status called "Participate." I am only trying to find records if the latest status of the day is "Participate" and only if the status changed on the same day from another status to "Participate."
I don't want any records where the status was already "Participate." It must have changed to that status on the same day. You can tell when the status was changed by the datetime field ChangedOn.
In the sample below I would only want to bring back ID 1880 since the status of "Participated" has the latest timestamp. I would not bring back ID 1700 since the last record is "Other," and I would not bring back ID 1600 since "Participated" is the only status of that day.
ChangedOn Status ID
02/01/17 15:23 Terminated 1880
02/01/17 17:24 Participated 1880
02/01/17 09:00 Other 1880
01/31/17 01:00 Terminated 1700
01/31/17 02:00 Participated 1700
01/31/17 03:00 Other 1700
01/31/17 02:00 Participated 1600
I was thinking of using a Window function, but I'm not sure how to get started on this. It's been a few months since I've written a query like this so I'm a bit out of practice.
Thanks!
You can use window functions for this:
select t.*
from (select t.*,
row_number() over (partition by cast(ChangedOn as date)
order by ChangedOn desc
) as seqnum,
sum(case when status <> 'Participate' then 1 else 0 end) over (partition by cast(ChangedOn as date)) as num_nonparticipate
from t
) t
where (seqnum = 1 and ChangedOn = 'Participate') and
num_nonparticipate > 0;
Can you check this?
WITH sample_table(ChangedOn,Status,ID)AS(
SELECT CONVERT(DATETIME,'02/01/2017 15:23'),'Terminated',1880 UNION ALL
SELECT '02/01/2017 17:24','Participated',1880 UNION ALL
SELECT '02/01/2017 09:00','Other',1880 UNION ALL
SELECT '01/31/2017 01:00','Terminated',1700 UNION ALL
SELECT '01/31/2017 02:00','Participated',1700 UNION ALL
SELECT '01/31/2017 03:00','Other',1700 UNION ALL
SELECT '01/31/2017 02:00','Participated',1600
)
SELECT ID FROM (
SELECT *
,ROW_NUMBER()OVER(PARTITION BY ID,CONVERT(VARCHAR,ChangedOn,112) ORDER BY ChangedOn) AS rn
,COUNT(0)OVER(PARTITION BY ID,CONVERT(VARCHAR,ChangedOn,112)) AS cnt
,CASE WHEN Status<>'Participated' THEN 1 ELSE 0 END AS ss
,SUM(CASE WHEN Status!='Participated' THEN 1 ELSE 0 END)OVER(PARTITION BY ID,CONVERT(VARCHAR,ChangedOn,112)) AS OtherStatusCnt
FROM sample_table
) AS t WHERE t.rn=t.cnt AND t.Status='Participated' AND t.OtherStatusCnt>0
--Return:
1880
try this with other sample data,
declare #t table(ChangedOn datetime,Status varchar(50),ID int)
insert into #t VALUES
('02/01/17 15:23', 'Terminated' ,1880)
,('02/01/17 17:24', 'Participated' ,1880)
,('02/01/17 09:00', 'Other' ,1880)
,('01/31/17 01:00', 'Terminated' ,1700)
,('01/31/17 02:00', 'Participated' ,1700)
,('01/31/17 03:00', 'Other' ,1700)
,('01/31/17 02:00', 'Participated' ,1600)
;
WITH CTE
AS (
SELECT *
,row_number() OVER (
PARTITION BY id
,cast(ChangedOn AS DATE) ORDER BY ChangedOn DESC
) AS seqnum
FROM #t
)
SELECT *
FROM cte c
WHERE seqnum = 1
AND STATUS = 'Participated'
AND EXISTS (
SELECT id
FROM cte c1
WHERE seqnum > 1
AND c.id = c1.id
)
2nd query,this is better
here CTE is same
SELECT *
FROM cte c
WHERE seqnum = 1
AND STATUS = 'Participated'
AND EXISTS (
SELECT id
FROM cte c1
WHERE STATUS != 'Participated'
AND c.id = c1.id
)

Selecting only last part of the data

I've the following data
I want the outcome to be like this:
For Albania, I want to select minimum and maximum values of date for the last value of City_Code (here, it's 20008) of Albania (The min value of date for 20008 is 18.01.2013, and max value for 20008 is 20.01.2013). For Croatia the last value of City_Code is 'zero', so we shouldn't select anything (if last value of 'City_Code' is zero, don't select it at all). For Slovenia, the last value of City_Code is 70005, so we select minimum and maximum values of corresponding dates (here maximum and minimum values are 22.01.2013). How should code look like? I don't have any idea. Thanks in advance
Try this...
;
WITH cte_countries ( Country, City_code, CurrentDate, LatestRank )
AS ( SELECT Country ,
City_code ,
CurrentDate ,
RANK() OVER ( PARTITION BY country ORDER BY CurrentDate DESC ) AS LatestRank
FROM #countries
WHERE City_code != 0
)
SELECT *
FROM cte_countries
WHERE LatestRank = 1
SELECT Country,
max(City_code),
min(DATE),
max(Date)
FROM T as T1
WHERE City_code = (SELECT TOP 1 City_Code
FROM T WHERE T.Country=T1.Country
ORDER BY Date DESC)
GROUP BY Country
HAVING max(City_Code)<>'0'
Try this:
With Cities
AS (
select Country, City_Code, Min([Date]) Date1, Max([Date]) Date2,
ROW_NUMBER() OVER(PARTITION BY Country ORDER BY Country, City_Code DESC) Seq
from MyCountryCityTable t
group by t.Country, t.City_Code
)
Select
Country,
NULLIF(City_Code,0) City_Code,
CASE WHEN City_Code = 0 THEN NULL ELSE Date1 END Date1,
CASE WHEN City_Code = 0 THEN NULL ELSE Date2 END Date2
From Cities Where Seq = 1
Order by Country
EDIT:
Version without the common table expression (WITH):
Select
Country,
NULLIF(City_Code,0) City_Code,
CASE WHEN City_Code = 0 THEN NULL ELSE Date1 END Date1,
CASE WHEN City_Code = 0 THEN NULL ELSE Date2 END Date2
From (select Country, City_Code, Min([Date]) Date1, Max([Date]) Date2,
ROW_NUMBER() OVER(PARTITION BY Country ORDER BY Country, City_Code DESC) Seq
from MyCountryCityTable t
group by t.Country, t.City_Code) Cities
Where Seq = 1
Order by Country