Snowflake SQL aggregate based on multiple columns - sql

I've got 2 tables of User ID's and emails.
A user can change their email but keep the same user ID (row 2 and row 5 of USER_PLAYS table).
A user can also create a new user ID with an existing email (row 3 of USER_PLAYS table).
I want to be able to sum up the total plays for this user into a single row.
There is also another table with sales value that I would like to get the total sales.
I'm thinking somehow to create a unique ID that is the same across all these fields but not sure how to implement it.
Note that I've only shown 1 actual person but there are multiple more unique people in these tables.
I am using Snowflake as that is where the data is.
USER_PLAYS table:
|ROW|USER_ID | EMAIL |VIDEO_PLAYS|
|---|-----------|--------------------|-----------|
|1 | 1 | ab#gmail.com | 2 |
|2 | 1 | cd#gmail.com | 3 |
|3 | 3 | cd#gmail.com | 4 |
|4 | 4 | cd#gmail.com | 2 |
|5 | 4 | ef#gmail.com | 3 |
Sales Table:
|NET_SALE | EMAIL |
|-----------|-------------|
|5 | cd#gmail.com|
|10 | ef#gmail.com|
Desired Output:
|UNIQUE_ID | PLAYS |NET_SALE|
|-----------|-------|--------|
| 1 | 14 | 15 |

This may have opportunities for additional efficiencies, but I think this process works to get you the unique identifier across your user_id / email combinations.
For this process I added another column called COMMON_ID to the user_plays table. This joined with the NET_SALES table by email_id, can be aggregated to the sales against the COMMON_ID (see results below):
-- Create the test case
create
or replace table user_plays (
user_id varchar not null,
email varchar not null,
video_plays integer not null,
common_id integer default NULL
);
insert into
user_plays
values
(1, 'ab#gmail.com', 2, null),
(1, 'cd#gmail.com', 3, null),
(3, 'cd#gmail.com', 4, null),
(4, 'cd#gmail.com', 2, null),
(4, 'ef#gmail.com', 3, null),
(5, 'jd#gmail.com', 10, null),
(6, 'lk#gmail.com', 1, null),
(6, 'zz#gmail.com', 2, null),
(7, 'zz#gmail.com', 3, null);
create
or replace table sales (net_sale integer, email varchar);
insert into
sales
values
(5, 'cd#gmail.com'),(10, 'ef#gmail.com');
-- Test run
-- Create view for User IDs with multiple emails
create
or replace view grp1 as (
select
user_id,
count(*) as mult
from
user_plays
group by
user_id
having
count(*) > 1
);
-- Create view for Emails with multiple user IDs
create
or replace view grp2 as (
select
email,
count(*) as mult
from
user_plays x
group by
email
having
count(*) > 1
);
EXECUTE IMMEDIATE $$
declare new_common_id integer;
counter integer;
Begin
counter := 0;
new_common_id := 0;
-- Basline common_id to NULL
update
user_plays
set
common_id = NULL;
-- Mark all unique entries with a common_id = user_id
update
user_plays
set
common_id = user_id
where
email not in (
select
distinct email
from
grp2
)
and user_id not in (
select
distinct user_id
from
grp1
);
-- Set a common_id to the lowest user_id value for each user_id with multiple emails
LOOP
select count(*) into :counter
from
user_plays
where
common_id is null;
if (counter = 0) then BREAK;
end if;
select
min(user_id) into :new_common_id
from
user_plays
where
common_id is null;
-- first pass
update
user_plays
set
common_id = :new_common_id
where
common_id is null and
(user_id = :new_common_id
or email in (
select
email
from
user_plays
where
user_id = :new_common_id
));
END LOOP;
-- Update the chain where an account using a changed email created a new user_id to match up with prior group.
UPDATE user_plays vp
set vp.common_id = vp2.common_id
from (select user_id, min(common_id) as common_id from user_plays group by user_id) vp2
where vp.user_id = vp2.user_id;
END;
$$;
-- See results
select
*
from
user_plays;
select
x.common_id,
vps.video_plays,
sum(x.net_sale) as net_sale
from
(
select
common_id,
sum(video_plays) as video_plays
from
user_plays
group by
common_id
) vps,
(
select
s.email,
s.net_sale,
max(up.common_id) as common_id
from
sales s,
user_plays up
where
up.email = s.email
group by
s.email,
s.net_sale
) x
where
vps.common_id = x.common_id
group by
x.common_id,
vps.video_plays;
Common ID assignment Results:
USER_ID EMAIL VIDEO_PLAYS COMMON_ID
1 ab#gmail.com 2 1
1 cd#gmail.com 3 1
3 cd#gmail.com 4 1
4 cd#gmail.com 2 1
4 ef#gmail.com 3 1
5 jd#gmail.com 10 5
6 lk#gmail.com 1 6
6 zz#gmail.com 2 6
7 zz#gmail.com 3 6
Final Results:
COMMON_ID VIDEO_PLAYS NET_SALE
1 14 15

Related

Earliest timestamp with join before group

I have 3 tables users, levels, attempts in PostgreSQL. I need to select the earliest attempts by attempts.created_at for each user for each level and get sum of attempts.rate for each user.
CREATE TABLE IF NOT EXISTS users
(
id BIGSERIAL PRIMARY KEY,
nickname VARCHAR(255) UNIQUE
);
CREATE TABLE IF NOT EXISTS levels
(
id BIGSERIAL PRIMARY KEY,
title VARCHAR(255) NOT NULL,
);
CREATE TABLE IF NOT EXISTS attempts
(
id BIGSERIAL PRIMARY KEY,
rate INTEGER NOT NULL,
created_at TIMESTAMP NOT NULL,
level_id BIGINT REFERENCES levels (id),
user_id BIGINT REFERENCES users (id)
);
For example attempts content
id | rate | created_at | level_id | user_id
------------------------------------------------------------
1 | 10 | 2022-10-21 16:53:13.818000 | 1 | 1
2 | 20 | 2022-10-21 11:53:13.818000 | 1 | 1
3 | 30 | 2022-10-21 14:53:13.818000 | 1 | 1
4 | 40 | 2022-10-21 10:53:13.818000 | 2 | 1 -- (nickname = 'Joe')
5 | 100 | 2022-11-21 10:53:13.818000 | 1 | 2 -- (nickname = 'Max')
For level 1 and user 1 earliest row with id = 2 for level 2 with id = 4, I need select
nickname | sum
-----------------
Max | 100
Joe | 60
As a result for user Max (user with id = 1) sum of the earliest attempts of all levels = 100. And order by sum descending.
Something like this but how to select only one earliest attempt for each level before summing:
select u.nickname, sum(a.rate) as sum
from attempts a
inner join users u on a.user_id = u.id
inner join levels l on l.id = a.level_id
-- on a.created_at is the earliest for level and user
group by u.id
order by sum desc
select user_id
,sum(rate)
from
(
select distinct on (level_id, user_id) *
from t
order by level_id, user_id, created_at
) t
group by user_id
user_id
sum
2
100
1
60
Fiddle

How to insert first table run time values in 2 nd table in oracle?

I have table A column looks below
> id, name, salary
>
> Max+1(10) ,'aa',100 max+1(11), 'bb',200 . .like that i have 10 record
> is there . Max+(21),'xx',1000
Now 10 rows is inserted without any problem its fine .
2nd table looks below
p_id,age,id
p_id max+1 (41) ,30,(here i need to insert same id from A table 1st record(Max+1(10)))
p_id max+1 (42) ,31,(here i need to insert same id from A table 2nd record(Max+1(11)))
p_id max+1 (43) ,32,(here i need to insert same id from A table 3rd record(Max+1(12)))
.
.
.
.
likewise i need to insert 10 rows in loop. Thanks .
Use a stored procedure and don't get the MAX()+1 value, use a sequence:
SQL Fiddle
Oracle 11g R2 Schema Setup:
CREATE TABLE table1 (
id NUMBER(20,0) PRIMARY KEY,
name VARCHAR2(200),
salary NUMBER(12,2)
)
/
CREATE TABLE table2 (
p_id NUMBER(20,0) PRIMARY KEY,
age NUMBER(3,0),
id NUMBER(20,0) REFERENCES table1 (id)
)
/
CREATE SEQUENCE table1__id__seq
/
CREATE SEQUENCE table2__p_id__seq
/
CREATE PROCEDURE add_Details(
in_name TABLE1.NAME%TYPE,
in_salary TABLE1.SALARY%TYPE,
in_age TABLE2.AGE%TYPE
)
IS
p_id TABLE1.ID%TYPE;
BEGIN
INSERT INTO table1 ( id, name, salary )
VALUES ( table1__id__seq.NEXTVAL, in_name, in_salary )
RETURNING id INTO p_id;
INSERT INTO table2 ( p_id, age, id )
VALUES ( table2__p_id__seq.NEXTVAL, in_age, p_id );
END;
/
Query 1:
BEGIN
add_Details( 'aa', 100, 30 );
add_Details( 'bb', 200, 31 );
add_Details( 'cc', 300, 32 );
END;
Query 2:
SELECT * FROM table1
Results:
| ID | NAME | SALARY |
|----|------|--------|
| 1 | aa | 100 |
| 2 | bb | 200 |
| 3 | cc | 300 |
Query 3:
SELECT * FROM table2
Results:
| P_ID | AGE | ID |
|------|-----|----|
| 1 | 30 | 1 |
| 2 | 31 | 2 |
| 3 | 32 | 3 |

Update table in Postgresql by grouping rows

I want to update a table by grouping (or combining) some rows together based on a certain criteria. I basically have this table currently (I want to group by 'id_number' and 'date' and sum 'count'):
Table: foo
---------------------------------------
| id_number | date | count |
---------------------------------------
| 1 | 2001 | 1 |
| 1 | 2001 | 2 |
| 1 | 2002 | 1 |
| 2 | 2001 | 6 |
| 2 | 2003 | 12 |
| 2 | 2003 | 2 |
---------------------------------------
And I want to get this:
Table: foo
---------------------------------------
| id_number | date | count |
---------------------------------------
| 1 | 2001 | 3 |
| 1 | 2002 | 1 |
| 2 | 2001 | 6 |
| 2 | 2003 | 14 |
---------------------------------------
I know that I can easily create a new table with the pertinent info. But how can I modify an existing table like this without making a "temp" table? (Note: I have nothing against using a temporary table, I'm just interested in seeing if I can do it this way)
If you want to delete rows you can add a primary key (for distinguish rows) and use two sentences, an UPDATE for the sum and a DELETE for obtain less rows.
You can do something like this:
create table foo (
id integer primary key,
id_number integer,
date integer,
count integer
);
insert into foo values
(1, 1 , 2001 , 1 ),
(2, 1 , 2001 , 2 ),
(3, 1 , 2002 , 1 ),
(4, 2 , 2001 , 6 ),
(5, 2 , 2003 , 12 ),
(6, 2 , 2003 , 2 );
select * from foo;
update foo
set count = count_sum
from (
select id, id_number, date,
sum(count) over (partition by id_number, date) as count_sum
from foo
) foo_added
where foo.id_number = foo_added.id_number
and foo.date = foo_added.date;
delete from foo
using (
select id, id_number, date,
row_number() over (partition by id_number, date order by id) as inner_order
from foo
) foo_ranked
where foo.id = foo_ranked.id
and foo_ranked.inner_order <> 1;
select * from foo;
You can try it here: http://rextester.com/PIL12447
With only one UPDATE
(but with a trigger) you can set a NULL value in count and trigger a DELETE in that case.
create table foo (
id integer primary key,
id_number integer,
date integer,
count integer
);
create function delete_if_count_is_null() returns trigger
language plpgsql as
$BODY$
begin
if new.count is null then
delete from foo
where id = new.id;
end if;
return new;
end;
$BODY$;
create trigger delete_if_count_is_null
after update on foo
for each row
execute procedure delete_if_count_is_null();
insert into foo values
(1, 1 , 2001 , 1 ),
(2, 1 , 2001 , 2 ),
(3, 1 , 2002 , 1 ),
(4, 2 , 2001 , 6 ),
(5, 2 , 2003 , 12 ),
(6, 2 , 2003 , 2 );
select * from foo;
update foo
set count = case when inner_order = 1 then count_sum else null end
from (
select id, id_number, date,
sum(count) over (partition by id_number, date) as count_sum,
row_number() over (partition by id_number, date order by id) as inner_order
from foo
) foo_added
where foo.id_number = foo_added.id_number
and foo.date = foo_added.date
and foo.id = foo_added.id;
select * from foo;
You can try it in: http://rextester.com/MWPRG10961

Insert many rows returning id and update that ids in another table

I am working with PostgreSQL, My sql structure:
CREATE TEMP TABLE users (
id_user serial,
user_name varchar,
id_user_description int
);
CREATE TEMP TABLE user_description (
id_user_description serial,
age int
);
users table has some users:
INSERT INTO users (user_name)
SELECT column1
FROM (
VALUES
('John'),
('Amanda')
) t;
I am trying to insert data to table user_description and I also need to update inserted row ids to table users. My query is this:
WITH inserted_user_description AS (
INSERT INTO user_description (age)
SELECT age
FROM (
SELECT users.id_user,
t.column1 AS age,
t.column2 AS user_name
FROM (
VALUES
(21, 'John'),
(28, 'Amanda')
) t
INNER JOIN users ON users.user_name = t.column2
) tt
RETURNING id_user_description, tt.id_user
)
UPDATE users SET id_user_description = t.id_user_description
FROM (
SELECT id_user_description, id_user
FROM inserted_user_description
) t
WHERE users.id_user = t.id_user;
But I get error:
ERROR: missing FROM-clause entry for table "tt"
LINE 15: RETURNING id_user_description, tt.id_user
How could I fix this?
Here is a valid SQL snippet that illustrated how it works. You have 2 tables a and b. You want to update b when you insert rows in a.
a and b schema:
CREATE TABLE a (
id serial unique,
some_int int
);
CREATE TABLE b (
id serial,
a_id int,
some_date timestamp
);
Let's insert some rows into b to match the ones we will insert in a (they are the rows we will update):
INSERT INTO b (a_id, some_date)
SELECT generate_series, null
FROM generate_series(1, 100);
Now, here is how to insert rows in a and update equivalent rows in b:
WITH inserted as (
INSERT INTO a (some_int)
SELECT *
FROM generate_series(1, 10)
RETURNING id
)
UPDATE b
SET some_date = NOW()
FROM inserted i
WHERE i.id = b.a_id
;
As you can see, 10 rows where inserted in a and the 10 equivalent rows where updated in b:
test=# SELECT * FROM a;
id | some_int
----+----------
1 | 1
2 | 2
3 | 3
4 | 4
5 | 5
6 | 6
7 | 7
8 | 8
9 | 9
10 | 10
(10 rows)
test=# SELECT * FROM b WHERE some_date IS NOT NULL;
id | a_id | some_date
----+------+----------------------------
1 | 1 | 2017-03-16 17:48:32.257217
2 | 2 | 2017-03-16 17:48:32.257217
3 | 3 | 2017-03-16 17:48:32.257217
4 | 4 | 2017-03-16 17:48:32.257217
5 | 5 | 2017-03-16 17:48:32.257217
6 | 6 | 2017-03-16 17:48:32.257217
7 | 7 | 2017-03-16 17:48:32.257217
8 | 8 | 2017-03-16 17:48:32.257217
9 | 9 | 2017-03-16 17:48:32.257217
10 | 10 | 2017-03-16 17:48:32.257217
(10 rows)
Update:
In your specific case, this is what I believe your query should look like (always hard to write queries without the schema!):
WITH inserted_user_description AS (
INSERT INTO user_description (age, <...>)
SELECT u.id_user,
t.column1 AS age,
<...>
t.column8 AS user_name
FROM (
VALUES
(21, <...> ,'John'),
(28, <...> ,'Amanda'),
<...>
) t
JOIN users u ON u.user_name = t.user_name
RETURNING id_user_description, u.id_user
)
UPDATE users
SET id_user_description = t.id_user_description
FROM inserted_user_description t
WHERE users.id_user = t.id_user;

How to copy rows into a new a one to many relationship

I'm trying to copy a set of data in a one to many relationship to create a new set of the same data in a new, but unrelated one to many relationship. Lets call them groups and items. Groups have a 1-* relation with items - one group has many items.
I've tried to create a CTE to do this, however I can't get the items inserted (in y) as the newly inserted groups don't have any items associated with them yet. I think I need to be able to access old. and new. like you would in a trigger, but I can't work out how to do this.
I think I could solve this by introducing a previous parent id into the templateitem table, or maybe a temp table with the data required to enable me to join on that, but I was wondering if it is possible to solve it this way?
SQL Fiddle Keeps Breaking on me, so I've put the code here as well:
DROP TABLE IF EXISTS meta.templateitem;
DROP TABLE IF EXISTS meta.templategroup;
CREATE TABLE meta.templategroup (
templategroup_id serial PRIMARY KEY,
groupname text,
roworder int
);
CREATE TABLE meta.templateitem (
templateitem_id serial PRIMARY KEY,
itemname text,
templategroup_id INTEGER NOT NULL REFERENCES meta.templategroup(templategroup_id)
);
INSERT INTO meta.templategroup (groupname, roworder) values ('Group1', 1), ('Group2', 2);
INSERT INTO meta.templateitem (itemname, templategroup_id) values ('Item1A',1), ('Item1B',1), ('Item2A',2);
WITH
x AS (
INSERT INTO meta.templategroup (groupname, roworder)
SELECT distinct groupname || '_v1' FROM meta.templategroup where templategroup_id in (1,2)
RETURNING groupname, templategroup_id, roworder
),
y AS (
Insert INTO meta.templateitem (itemname, templategroup_id)
Select itemname, x.templategroup_id
From meta.templateitem i
INNER JOIN x on x.templategroup_id = i.templategroup_id
RETURNING *
)
SELECT * FROM y;
Use an auxiliary column templategroup.old_id:
ALTER TABLE meta.templategroup ADD old_id int;
WITH x AS (
INSERT INTO meta.templategroup (groupname, roworder, old_id)
SELECT DISTINCT groupname || '_v1', roworder, templategroup_id
FROM meta.templategroup
WHERE templategroup_id IN (1,2)
RETURNING templategroup_id, old_id
),
y AS (
INSERT INTO meta.templateitem (itemname, templategroup_id)
SELECT itemname, x.templategroup_id
FROM meta.templateitem i
INNER JOIN x ON x.old_id = i.templategroup_id
RETURNING *
)
SELECT * FROM y;
templateitem_id | itemname | templategroup_id
-----------------+----------+------------------
4 | Item1A | 3
5 | Item1B | 3
6 | Item2A | 4
(3 rows)
It's impossible to do that in a single plain sql query without an additional column. You have to store the old ids somewhere. As an alternative you can use plpgsql and anonymous code block:
Before:
select *
from meta.templategroup
join meta.templateitem using (templategroup_id);
templategroup_id | groupname | roworder | templateitem_id | itemname
------------------+-----------+----------+-----------------+----------
1 | Group1 | 1 | 1 | Item1A
1 | Group1 | 1 | 2 | Item1B
2 | Group2 | 2 | 3 | Item2A
(3 rows)
Insert:
do $$
declare
grp record;
begin
for grp in
select distinct groupname || '_v1' groupname, roworder, templategroup_id
from meta.templategroup
where templategroup_id in (1,2)
loop
with insert_group as (
insert into meta.templategroup (groupname, roworder)
values (grp.groupname, grp.roworder)
returning templategroup_id
)
insert into meta.templateitem (itemname, templategroup_id)
select itemname || '_v1', g.templategroup_id
from meta.templateitem i
join insert_group g on grp.templategroup_id = i.templategroup_id;
end loop;
end $$;
After:
select *
from meta.templategroup
join meta.templateitem using (templategroup_id);
templategroup_id | groupname | roworder | templateitem_id | itemname
------------------+-----------+----------+-----------------+-----------
1 | Group1 | 1 | 1 | Item1A
1 | Group1 | 1 | 2 | Item1B
2 | Group2 | 2 | 3 | Item2A
3 | Group1_v1 | 1 | 4 | Item1A_v1
3 | Group1_v1 | 1 | 5 | Item1B_v1
4 | Group2_v1 | 2 | 6 | Item2A_v1
(6 rows)