SQLCount distinct of one column based on another column - sql

I need to count the sessions which visited a particular page once and sessions which visited the same page once or more than once. For example: consider these sessions between 1st to 4th April:
Session_id| Date
----+---------
1| 01/04/2016
1| 02/04/2016
2| 01/04/2016
3| 01/04/2016
4| 01/04/2016
4| 03/04/2016
4| 04/04/2016
I can not do it using a sub query as there are millions of session. Hence a query like this won't work for me:
select case when no_of_visits=1 then 'single_visit'
when no_of_visits>1 then 'multiple_visits' end as visit,
count(distinct session_id) as sessions
FROM (
select session_id,
count(distinct date) as no_of_visits
from my_table
group by session_id
) a
group by case when no_of_visits=1 then 'single_visit'
when no_of_visits>1 then 'multiple_visits' end
The answer should be like this:
Visit|Sessions
single_visit|2
multiple_visit|2
Is there any way where i can do something like this:
count(distinct session_id) where no_of_visits=1 and count(distinct session_id) where no_of_visits>=1 without subquery or self join?
Any help would be deeply appreciated.

You can't avoid subqueries to get this result, but you can get rid of the count(distinct) (which is probably the most expensive part):
select no_of_visits,
count(*) as sessions
FROM
(
select session_id,
case when min(date) <> max(date) -- at least two different dates
then 'multiple_visits'
else 'single_visit'
end as no_of_visits
from table_name
group by session_id
) a
group by no_of_visits

It seems you want something like this:
select session_id, count(distinct dt) as no_of_visits,
case when count(distinct dt) = 1 then "Single visit"
else "Multiple visits" end as visit
from my_table
group by session_id;
Note: I used dt rather than "date" as a column name; date is an Oracle key word, and such words should not be used as schema, table or column names.

Oracle Setup:
CREATE TABLE table_name ( Session_id, "Date" ) AS
SELECT 1, DATE'2016-04-01' FROM DUAL UNION ALL
SELECT 1, DATE'2016-04-02' FROM DUAL UNION ALL
SELECT 2, DATE'2016-04-01' FROM DUAL UNION ALL
SELECT 3, DATE'2016-04-01' FROM DUAL UNION ALL
SELECT 4, DATE'2016-04-01' FROM DUAL UNION ALL
SELECT 4, DATE'2016-04-03' FROM DUAL UNION ALL
SELECT 4, DATE'2016-04-04' FROM DUAL;
Query 1:
SELECT visit,
COUNT(1) AS Sessions
FROM (
SELECT session_id,
CASE WHEN COUNT(1) > 1
THEN 'Multiple Visits'
ELSE 'Single Visit'
END AS visit
FROM table_name
GROUP BY session_id
)
GROUP BY visit;
Output:
VISIT SESSIONS
--------------- ----------
Multiple Visits 2
Single Visit 2
Query 2:
SELECT first_visit - multiple_visits AS single_visit,
multiple_visits
FROM (
SELECT COUNT( CASE rn WHEN 1 THEN 1 END ) AS first_visit,
COUNT( CASE rn WHEN 2 THEN 1 END ) AS multiple_visits
FROM (
SELECT rn --
FROM ( --
SELECT ROW_NUMBER() OVER ( PARTITION BY session_id ORDER BY ROWNUM ) AS rn
FROM table_name
) --
WHERE rn <= 2 -- These lines are optional
)
);
Output:
SINGLE_VISIT MULTIPLE_VISITS
------------ ---------------
2 2

Related

How can i find rows before a specific value?

I have the next row and what I want to do is to select all the rows before the type "shop". I tried using case in the "where clause" but I didn't get any result. How can I do it?
|id|visitnumber|type |
|01| 1|register|
|01| 2|visit |
|01| 3|visit |
|01| 4|shop |
|01| 5|visit |
For example, what I want to get is the visitnumber before type = "shop".
it would be very helpful because what I'm trying to do is to get all the actions that happened before an specific event on big query.
|id|numberofvisits|
|01| 3|
One method uses correlated subqueries:
select id, count(*)
from t
where visitnumber < (select min(t2.visitnumber) from t t2 where t2.id = t.id and type = 'shop')
group by id;
However, in BigQuery, I prefer an approach using window functions:
select id, countif(visitnumber < visitnumber_shop)
from (select t.*,
min(case when type = 'shop' then visitnumber end) over (partition by id) as visitnumber_shop
from t
) t
group by id;
This has the advantage of keeping all ids even those that don't have a "shop" type.
One option uses a subquery for filtering:
select id, count(*) number_of_visits
from mytable t
where t.visit_number < (
select min(t1.visit_number)
from mytable t
where t1.id = t.id and t1.type = 'shop'
)
group by id
You can also use window functions:
select id, count(*) number_of_visits
from (
select
t.*,
countif(type = 'shop') over(partition by id order by visit_number) has_shop
from mytable t
) t
where has_shop = 0
group by id
Below option is for BigQuery Standard SQL
#standardSQL
SELECT id,
ARRAY_LENGTH(SPLIT(REGEXP_EXTRACT(',' || STRING_AGG(type ORDER BY visitnumber), r'(.*?),shop'))) - 1 AS number_of_visits_before_first_shop
FROM `project.dataset.table`
GROUP BY id
You can test, play with above using dummy data as in below example
#standardSQL
WITH `project.dataset.table` AS (
SELECT '01' id, 1 visitnumber, 'register' type UNION ALL
SELECT '01', 2, 'visit' UNION ALL
SELECT '01', 3, 'visit' UNION ALL
SELECT '01', 4, 'shop' UNION ALL
SELECT '01', 5, 'visit' UNION ALL
SELECT '02', 1, 'register' UNION ALL
SELECT '02', 2, 'visit' UNION ALL
SELECT '02', 3, 'visit' UNION ALL
SELECT '03', 1, 'shop' UNION ALL
SELECT '03', 2, 'shop' UNION ALL
SELECT '03', 3, 'visit'
)
SELECT id,
ARRAY_LENGTH(SPLIT(REGEXP_EXTRACT(',' || STRING_AGG(type ORDER BY visitnumber), r'(.*?),shop'))) - 1 AS number_of_visits_before_first_shop
FROM `project.dataset.table`
GROUP BY id
with result
Row id number_of_visits_before_first_shop
1 01 3
2 02 null
3 03 0
This is the query i run on Big Query with an Analytics 360 test dataset:
select
id,
visitnumber,
countif(hit_number < hitnumber_quickviewclick) as hitsprev_quickviewclick
from (
select
a.fullVisitorID as id,
a.visitnumber as visitnumber,
h.hitNumber as hit_number,
MIN (case when h.eventInfo.eventAction = 'Quickview Click' then h.hitNumber end) over (partition by a.fullVisitorID) as hitnumber_quickviewclick
FROM `bigquery-public-data.google_analytics_sample.ga_sessions_20170725` as a
CROSS JOIN UNNEST(hits) as h
) as T
group by 1,2;
I wanted to make a query where i could find the total number of hits before the event action 'quickview click' hitted. If this is wrong or can be improved let me know!
Thanks a lot, guys!
This is how I would approach in SQL in general:
select count(*)
from yourtable yt
where type = 'visit' and not exists (
select 1
from yourtable yt2
where yt.id > yt2.id and yt2.type = 'shop'
)
However, I would very much think about situations when we want to find visits before the next shop... And the next shop... And the next shop. For that purpose you could find out the ids of shop and group by intervals.

Count number of events before and after a particular event in SQL?

I have a table containing date and events. There is event named 'A'. I want to find out how many events occurred before and after event 'A' in Sql Bigquery.
for Example,
User Date Events
123 2018-02-13 D
123 2018-02-12 B
123 2018-02-10 C
123 2018-02-11 A
123 2018-02-01 X
The answer would be something like this.
User Event Before After
123 A 2 2
I have tried many queries but its not working. Any Idea, how to solve this problem?
below is for BigQuery Standard SQL
#standardSQL
WITH `project.dataset.events` AS (
SELECT 123 user, '2018-02-13' dt, 'D' event UNION ALL
SELECT 123, '2018-02-12', 'B' UNION ALL
SELECT 123, '2018-02-11', 'A' UNION ALL
SELECT 123, '2018-02-10', 'C' UNION ALL
SELECT 123, '2018-02-01', 'X'
)
SELECT user, event, before, after
FROM (
SELECT user, event,
COUNT(1) OVER(PARTITION BY user ORDER BY dt ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) before,
COUNT(1) OVER(PARTITION BY user ORDER BY dt ROWS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING ) after
FROM `project.dataset.events`
)
WHERE event = 'A'
For each "A", you can get the number of events to the next "A" using row_number() and lead():
select t.*,
(lead(seqnum) over (order by date) - seqnum - 1) as num_other_events
from (select t.*, row_number() over (order by date) as seqnum
from t
) t
where event = 'A';
This produces the results for each "A". Given that you have three "A"s in your sample data and only want "2", I'm not sure what logic is used for that.
If you want to count number of events as they appear in the table before the row with event A, there is no way to do this because BigQuery doesn't preserve physical order of rows in a table.
If you want to count Before and After using the date column, you can do
WITH
events AS (
SELECT
DATE('2018-02-13') AS event_date,
"D" AS event
UNION ALL
SELECT
DATE('2018-02-12') AS event_date,
"B" AS event
UNION ALL
SELECT
DATE('2018-02-10') AS event_date,
"C" AS event
UNION ALL
SELECT
DATE('2018-02-11') AS event_date,
"A" AS event
UNION ALL
SELECT
DATE('2018-02-01') AS event_date,
"X" AS event),
event_a AS (
SELECT
*
FROM
events
WHERE
event = "A")
SELECT
ANY_VALUE(event_a.event) AS Event,
COUNTIF(events.event_date<event_a.event_date) AS Before,
COUNTIF(events.event_date>event_a.event_date) AS After
FROM
events,
event_a
Hope this answers your question
Create table #temp(T_date varchar(100),Events varchar(100))
​
insert into #temp values
('2018-02-13','A'),
('2018-02-12','B'),
('2018-02-10','C'),
('2018-02-11','A'),
('2018-02-01','X'),
('2018-02-06','A')
​
select max(rn)-min(rn)
from
(
select *,ROW_NUMBER() over(order by (select 1)) as rn from #temp
)a
where Events='A'

Updating columns in Oracle based on values in other tables

I am fairly new to creating and altering tables in SQL (Oracle) and have a question involving updating one table based on values in others.
Say I have table A:
ID Date Status
--- --- ---
1 1/1/2000 Active
2 5/10/2007 Inactive
2 2/15/2016 Active
3 10/1/2013 Inactive
4 1/11/2004 Inactive
5 4/5/2012 Inactive
5 6/12/2014 Active
and table B:
ID Date Status Number of Records in A
--- --- --- ---
1
2
3
4
5
What is the best way to update table B to get the most recent Date and Status of each item and count of records in A? I know I could join tables but I would like B to exist as its own table.
Oracle lets you assign multiple columns at once in an update statement. So, you can do:
update b
set (dt, status, ct) =
(select max(dt),
max(status) keep (dense_rank first order by dt desc),
count(*)
from a
where a.id = b.id
) ;
You can basically use the subquery -- with a group by -- if you want the results for all ids as a query:
select max(dt),
max(status) keep (dense_rank first order by dt desc),
count(*)
from a
group by id;
You can also use create table as or insert into to put the records directly into b, without having to match them up using update.
Something like this. If you already have a table B and you need to populate it with the values from this query, or if you need to create a new table B with these values, adapt as needed. NOTE: I used dt as a column name, since "date" is a reserved word in Oracle. (For the same reason I used "ct" for "count".)
with
table_A ( id, dt, status ) as (
select 1, to_date( '1/1/2000', 'mm/dd/yyyy'), 'Active' from dual union all
select 2, to_date('5/10/2007', 'mm/dd/yyyy'), 'Inactive' from dual union all
select 2, to_date('2/15/2016', 'mm/dd/yyyy'), 'Active' from dual union all
select 3, to_date('10/1/2013', 'mm/dd/yyyy'), 'Inactive' from dual union all
select 4, to_date('1/11/2004', 'mm/dd/yyyy'), 'Inactive' from dual union all
select 5, to_date(' 4/5/2012', 'mm/dd/yyyy'), 'Inactive' from dual union all
select 5, to_date('6/12/2014', 'mm/dd/yyyy'), 'Active' from dual
),
prep ( id, dt, status, rn, ct ) as (
select id, dt, status,
row_number() over (partition by id order by dt desc),
count(*) over (partition by id)
from table_A
)
select id, to_char(dt, 'mm/dd/yyyy') as dt, status, ct
from prep
where rn = 1
;
ID DT STATUS CT
---------- ---------- -------- ----------
1 01/01/2000 Active 1
2 02/15/2016 Active 2
3 10/01/2013 Inactive 1
4 01/11/2004 Inactive 1
5 06/12/2014 Active 2
Added: You mentioned you are pretty new at this... so: for example, if you need to create table_B with these results, and table_A already exists and is populated: FIRST, you will not need the "table_A" factored subquery in my solution; and SECOND, you will create table_B with something like
create table table_B as
with
prep ( .....) -- rest of the solution here, up to and including the ;

How to do sorting and then numbering on an Oracle database

As an example I have a database with the following information
Name Number
Boris
Trevor
Arthur
bessie
big Dave
BOB
I want to be able to sort that data in the below order and then add a number to the number column in that specific order
Name Number
Arthur 1
BOB 2
Boris 3
big Dave 4
bessie 5
Trevor 6
I can select using the order I have specified using
select DB.TABLE.NAME , case
when row_number() over(partition by lower(DB.TABLE.NAME )
order by DB.TABLE.NAME ) = 1
then 1
else 0
end as result
from DB.TABLE;
but I then have no idea how to apply the numbers to the numbers column.
If I try a different method of sorting, I can use a sequence to apply the numbers but the order is not what I want. It seems to be the row_number() function that is causing me problems.
Any help would be appreciated.
I think what you're after is something like:
with sample_data as (select 'Boris' name from dual union all
select 'Trevor' name from dual union all
select 'BO Derek' name from dual union all
select 'Arthur' name from dual union all
select 'big dave' name from dual union all
select 'big Dave' name from dual union all
select 'BOB' name from dual union all
select 'BORAT' name from dual union all
select 'Brian' name from dual union all
select 'Big Bad Dom' name from dual)
-- end of creating a subquery "sample_data" to mimic a table with data in it.
-- see SQL below:
select name,
row_number() over (order by upper(substr(name, 1, 1)),
name) row_num
from sample_data
order by upper(substr(name, 1, 1)),
name;
NAME ROW_NUM
----------- ----------
Arthur 1
BO Derek 2
BOB 3
BORAT 4
Big Bad Dom 5
Boris 6
Brian 7
big Dave 8
big dave 9
Trevor 10
To update a table, you'd do something like (assuming name is a unique column):
merge into some_table tgt
using (select name,
row_number() over (order by upper(substr(name, 1, 1)),
name) row_num
from some_table) src
on (tgt.name = src.name)
when matched then
update set tgt.number = src.row_num;
Use a MERGE statement:
merge into the_table t
using (
select rowid as rid,
row_number() over(order by lower(name)) as result
from the_table
) nr on (nr.rid = t.rowid)
when matched then update
set "number" = nr.result;
I am not sure what the CASE should do. It only returns 1 or 0 but the expected result shows you want numbers from 1 to 6, so I removed the CASE
If you have a proper primary key on the table, it's better to use that instead of rowid
Try this.
select DB.TABLE.NAME ,
row_number() over(ORDER by DB.TABLE.NAME ) as Number
from DB.TABLE
order by DB.TABLE.NAME;
Maybe you are looking to update db.table in that case:
update DB.TABLE
set number = (select row_number() over(ORDER by DB.TABLE.NAME ) as Number
from DB.TABLE t1 where t1.name = DB.TABLE.NAME );
Thanks all for your suggestions.
I went with this hacky approach to the answer by #a_horse_with_no_name
CREATE SEQUENCE NEWSEQ
START WITH 1
MAXVALUE 999999999999999999999999999
MINVALUE 1;
merge into DB.TABLE t
using (
select rowid as rid, DB.TABLE.NAME, case
when row_number() over(partition by lower(DB.TABLE.NAME )
order by DB.TABLE.NAME ) = 1
then 1
else 0
end as result
from DB.TABLE
) nr on (nr.rid = t.rowid)
when matched then update
set NUMBER = NEWSEQ.NEXTVAL;
drop sequence NEWSEQ;
It may not be the most efficient way to do it, but it works

ORDER BY CASE does not working?

Hi I have SQL statement in DB2 which is working.
select distinct 'IN' as STATUS,
(select count(*) from table.......)
from table
UNION ALL
select distinct 'OUT',
(select count(*) from table.......)
from table
UNION ALL
select distinct 'FINISHED',
(select count(*) from table.......)
from table
order by status
But if I change the last line to
order by
case STATUS
when 'IN' then 1
when 'OUT' then 2
when 'FINISHED' then 3
end
My query does not work.
Can someone tell me how to solve this?
Thanks
Try wrapping the UNION into a derived table and order on that:
select *
from (
.... here goes your statement ...
) t
order by
case STATUS
when 'IN' then 1
when 'OUT' then 2
when 'FINISHED' then 3
end
you could always add the sort # to the status:
select distinct '1-IN' as STATUS,
(select count(*) from table.......)
from table
UNION ALL
select distinct '2-OUT',
(select count(*) from table.......)
from table
UNION ALL
select distinct '3-FINISHED',
(select count(*) from table.......)
from table
order by status
hello try this should work if i remember correctly
order by
case
when STATUS='IN' then 1
when STATUS='OUT' then 2
when STATUS='FINISHED' then 3
end
you could also name this when finishing
end as field_name