DB-Fiddle
CREATE TABLE sales (
id SERIAL PRIMARY KEY,
event_date DATE,
country VARCHAR,
channel VARCHAR,
sales DECIMAL
);
INSERT INTO sales
(event_date, country, channel, sales)
VALUES
('2020-02-08', 'DE', 'channel_01', '500'),
('2020-02-08', 'DE', 'channel_02', '400'),
('2020-02-08', 'DE', 'channel_03', '200'),
('2020-02-08', 'FR', 'channel_01', '900'),
('2020-02-08', 'FR', 'channel_02', '800'),
('2020-02-08', 'NL', 'channel_01', '100'),
('2020-04-15', 'DE', 'channel_01', '700'),
('2020-04-15', 'FR', 'channel_01', '500'),
('2020-04-15', 'NL', 'channel_01', '850'),
('2020-04-15', 'NL', 'channel_02', '250'),
('2020-04-15', 'NL', 'channel_03', '300');
Expected Result:
event_date | country | share_per_day_per_country
------------|-------------|----------------------------------------------
2020-02-08 | DE | 0.379 (=1100/2900)
2020-02-08 | FR | 0.586 (=1700/2900)
2020-02-08 | NL | 0.034 (=100/2900)
------------|-------------|----------------------------------------------
2020-04-15 | DE | 0.269 (=700/2600)
2020-04-15 | FR | 0.192 (=500/2600)
2020-04-15 | NL | 0.538 (=1400/2600)
I want to calculate the sales share per country per day as it is done in the question here.
However, since I added the column channel in the database I am not getting the right shares anymore using this query:
SELECT
s.event_date,
s.country,
s.sales,
s.sales/SUM(s.sales) OVER (PARTITION BY s.event_date) AS share_per_day_per_country
FROM sales s
GROUP BY 1,2,3
ORDER BY 1,2;
How do I need to modify this query to get the expected results?
here is one way:
select distinct
s.event_date,
s.country,
SUM(s.sales) OVER (PARTITION BY s.event_date, s.country) / SUM(s.sales) OVER (PARTITION BY s.event_date) AS share_per_day
from
sales s
ORDER BY
s.event_date,
s.country;
or
SELECT
s.event_date,
s.country,
sum(sales) / max(share_per_day) share_per_day_per_country
from
(
select *,SUM(s.sales) OVER (PARTITION BY s.event_date) AS share_per_day
from sales s
) s
GROUP BY
s.event_date,s.country
ORDER BY
s.event_date,s.country;
Related
DB-Fiddle
/* Table Campaigns */
CREATE TABLE campaigns (
id SERIAL PRIMARY KEY,
insert_time DATE,
campaign VARCHAR,
tranches VARCHAR,
quantity DECIMAL);
INSERT INTO campaigns
(insert_time, campaign, tranches, quantity)
VALUES
('2021-01-01', 'C001', 't', '500'),
('2021-01-01', 'C002', 't', '600'),
('2021-01-02', 'C001', 't', '500'),
('2021-01-02', 'C002', 't', '600');
/* Table Tranches */
CREATE TABLE tranches (
id SERIAL PRIMARY KEY,
insert_time DATE,
campaign VARCHAR,
tranches VARCHAR,
quantity DECIMAL);
INSERT INTO tranches
(insert_time, campaign, tranches, quantity)
VALUES
('2021-01-01', 'C001', 't1', '200'),
('2021-01-01', 'C001', 't2', '120'),
('2021-01-01', 'C001', 't3', '180'),
('2021-01-01','C002', 't1', '350'),
('2021-01-01','C002', 't2', '250'),
('2021-01-02', 'C001', 't1', '400'),
('2021-01-02', 'C001', 't2', '120'),
('2021-01-02', 'C001', 't3', '180'),
('2021-01-02','C002', 't1', '350'),
('2021-01-02','C002', 't2', '250');
Expected Result:
insert_time | campaign | tranches | quantity_campaigns | quantity_tranches | check
--------------|------------|------------|---------------------|---------------------|-----------
2021-01-01 | C001 | t | 500 | 500 | ok
2021-01-01 | C002 | t | 600 | 600 | ok
--------------|------------|------------|---------------------|---------------------|------------
2021-01-02 | C001 | t | 500 | 700 | error
2021-01-02 | C002 | t | 600 | 500 | ok
I want to compare the total quantity per campaign in table campaigns with the total quantity per campaign in table tranches.
So far I have been able to develop this query:
SELECT
c.insert_time AS insert_time,
c.campaign AS campaign,
c.tranches AS tranches,
c.quantity AS quantity_campaigns,
t.quantity AS quantity_tranches,
(CASE WHEN
MAX(c.quantity) OVER(PARTITION BY c.insert_time, c.campaign) = SUM(t.quantity) OVER(PARTITION BY t.insert_time, t.campaign)
THEN 'ok' ELSE 'error' END) AS check
FROM campaigns c
LEFT JOIN tranches t ON c.campaign = t.campaign
ORDER BY 1,2,3,4,5;
However, it does not give me the expected result?
What do I need to change to make it work?
I think the result you're looking for should be something like this. The problem is that you're trying to aggregate over two groupings after a join which will either yield too many results or incorrect calculations. By aggregating in CTE, and then joining the CTEs after aggregation has occurred you can achieve the results you are looking for. See my example below:
WITH campaign_agg AS(
SELECT c.insert_time, c.campaign, c.tranches, MAX(c.quantity) c_quantity
FROM campaigns c
GROUP BY c.insert_time, c.campaign, c.tranches
), tranch_agg AS(
SELECT t.insert_time, t.campaign, SUM(t.quantity) as t_sum
FROM tranches t
GROUP BY t.insert_time, t.campaign
)
SELECT c.insert_time, c.campaign, c.tranches, c.c_quantity, t.t_sum,
CASE WHEN c.c_quantity = t.t_sum THEN 'ok' ELSE 'error' END as check
FROM campaign_agg c
JOIN
tranch_agg t ON
t.insert_time = c.insert_time
AND t.campaign = c.campaign
ORDER BY c.insert_time, c.campaign
I have a db-fiddle for this as well: https://www.db-fiddle.com/f/33x4upVEcgTMNehiHCKzfN/1
DB-Fiddle
SELECT
c.insert_time AS insert_time,
c.campaign AS campaign,
c.tranches AS tranches,
SUM(c.quantity) AS quantity_campaigns,
SUM(t1.quantity) AS quantity_tranches,
(CASE WHEN SUM(c.quantity) <> SUM(t1.quantity) THEN 'error' ELSE 'ok' END) AS check
FROM campaigns c
LEFT JOIN
(SELECT
t.insert_time AS insert_time,
t.campaign AS campaign,
SUM(t.quantity) AS quantity
FROM tranches t
GROUP BY 1,2
ORDER BY 1,2) t1 on t1.insert_time = c.insert_time AND t1.campaign = c.campaign
GROUP BY 1,2,3
ORDER BY 1,2,3;
DB-Fiddle
CREATE TABLE costs (
id SERIAL PRIMARY KEY,
event_date DATE,
country VARCHAR,
channel VARCHAR,
costs DECIMAL
);
INSERT INTO costs
(event_date, country, channel, costs)
VALUES
('2020-02-08', 'DE', 'channel_01', '400'),
('2020-02-08', 'DE', 'channel_02', '400'),
('2020-02-08', 'DE', 'channel_03', '400'),
('2020-02-08', 'FR', 'channel_01', '400'),
('2020-02-08', 'FR', 'channel_02', '400'),
('2020-02-08', 'NL', 'channel_01', '400'),
('2020-04-15', 'DE', 'channel_01', '300'),
('2020-04-15', 'FR', 'channel_01', '300'),
('2020-04-15', 'NL', 'channel_01', '300'),
('2020-04-15', 'NL', 'channel_02', '300'),
('2020-04-15', 'NL', 'channel_03', '300');
Expected Result:
event_date | country | costs |
--------------|--------------|-----------------------------|---------
2020-02-08 | DE | 240 (=400 x 0.6) |
2020-02-08 | FR | 120 (=400 x 0.3) |
2020-02-08 | NL | 40 (=400 x 0.1) |
--------------|--------------|-----------------------------|---------
2020-04-15 | DE | 180 (=300 x 0.6) |
2020-04-15 | FR | 90 (=300 x 0.3) |
2020-04-15 | NL | 30 (=300 x 0.1) |
I want to split the costs based on pre-defined shares to each country per day.
The shares are: DE = 0.6, FR = 0.3, NL = 0.1
SELECT
c.event_date,
c.country,
c.costs
FROM costs c
GROUP BY 1,2,3
ORDER BY 1,2;
Do you have an idea what query I need to achieve this?
Just use a CASE expression:
SELECT c.event_date,
c.country,
(CASE WHEN c.country = 'DE' THEN 0.6 * c.costs
WHEN c.country = 'FR' THEN 0.3 * c.costs
WHEN c.country = 'NL' THEN 0.1 * c.costs
END) as allocated_costs
FROM costs c
GROUP BY c.event_date, c.country, c.costs
ORDER BY 1, 2;
You can more conveniently store the values in a derived table, if you prefer:
SELECT c.event_date, c.country, (v.alloc * c.costs) as allocated_costs
FROM costs c JOIN
(VALUES ('DE', 0.6), ('FR', 0.3), ('NL', 0.1)
) v(country, alloc)
USING (country)
GROUP BY c.event_date, c.country, c.costs, v.alloc
ORDER BY 1, 2;
Here is a db<>fiddle.
the best way would be to add those rates into a table and use that table in your query.
to illustrate how you can do it with hard-coded values in your query:
SELECT
c.event_date,
c.country,
c.costs * case country when 'DE' then 0.6
when 'FR' then 0.3
when 'NL' then 0.1
end
FROM costs c
GROUP BY 1,2,3
ORDER BY 1,2;
DB-Fiddle
CREATE TABLE sales (
id SERIAL PRIMARY KEY,
event_date DATE,
country VARCHAR,
channel VARCHAR,
sales DECIMAL
);
INSERT INTO sales
(event_date, country, channel, sales)
VALUES
('2020-01-04', 'DE', 'channel_01', '500'),
('2020-01-04', 'FR', 'channel_01', '900'),
('2020-01-04', 'NL', 'channel_01', '100'),
('2020-02-20', 'DE', 'channel_01', '0'),
('2020-02-20', 'FR', 'channel_01', '0'),
('2020-02-20', 'NL', 'channel_01', '0'),
('2020-03-15', 'DE', 'channel_01', '700'),
('2020-03-15', 'FR', 'channel_01', '500'),
('2020-03-15', 'NL', 'channel_03', '300');
/* Table Dates */
CREATE TABLE dates (
id SERIAL PRIMARY KEY,
date DATE
);
INSERT INTO dates
(date)
SELECT generate_series ('2020-01-01'::date, '2020-12-31'::date, interval '1 day');
Expected Result:
date_list | country
--------------|--------------------------
2020-01-01 | DE
2020-01-01 | FR
2020-01-01 | NL
--------------|---------------------------
2020-01-02 | DE
2020-01-02 | FR
2020-01-02 | NL
--------------|---------------------------
: | :
: | :
: | :
--------------|--------------------------
2020-12-29 | DE
2020-12-30 | NL
2020-12-31 | FR
I want to list all dates from table dates and group them by all countries that are available in table sales no matter if the date exist in both tables. So far I have developed this query:
SELECT
d.date AS date_list,
t2.country
FROM dates d
LEFT JOIN
(SELECT
s.event_date,
s.country,
s.sales
FROM sales s
GROUP BY 1,2,3
ORDER BY 1,2) t2 ON t2.event_date = d.date
GROUP BY 1,2
ORDER BY 1,2;
However, it only groups the results by country if the s.event_date matches the d.date.
How do I have to modify the query to get the expected result?
I am not sure, if I understand your requirements right, but seems it is about CROSS JOIN
SELECT D.DATE,X.COUNTRY
FROM DATES AS D
CROSS JOIN
(
SELECT DISTINCT COUNTRY FROM SALES
)X
DB-Fiddle
CREATE TABLE sales (
id SERIAL PRIMARY KEY,
event_date DATE,
country VARCHAR,
channel VARCHAR,
sales DECIMAL
);
INSERT INTO sales
(event_date, country, channel, sales)
VALUES
('2020-02-08', 'DE', 'channel_01', '500'),
('2020-02-08', 'DE', 'channel_02', '400'),
('2020-02-08', 'DE', 'channel_03', '200'),
('2020-02-08', 'FR', 'channel_01', '900'),
('2020-02-08', 'FR', 'channel_02', '800'),
('2020-02-08', 'NL', 'channel_01', '100'),
('2020-03-20', 'DE', 'channel_01', '0'),
('2020-03-20', 'FR', 'channel_01', '0'),
('2020-03-20', 'FR', 'channel_02', '0'),
('2020-03-20', 'FR', 'channel_03', '0'),
('2020-03-20', 'NL', 'channel_01', '0'),
('2020-04-15', 'DE', 'channel_01', '700'),
('2020-04-15', 'FR', 'channel_01', '500'),
('2020-04-15', 'NL', 'channel_01', '850'),
('2020-04-15', 'NL', 'channel_02', '250'),
('2020-04-15', 'NL', 'channel_03', '300');
Expected Result:
event_date | country | share_per_day_per_country | details of share calculation
------------|-----------|----------------------------|--------------------------------------------
2020-02-08 | DE | 0.379 | = (500+400+200) / (500+400+200+900+800+100)
2020-02-08 | FR | 0.586 | = (900+800) / (500+400+200+900+800+100)
2020-02-08 | NL | 0.034 | = (100) / (500+400+200+900+800+100)
------------|-----------|----------------------------|--------------------------------------------
2020-03-20 | DE | 0.333 | = equal split in case of 0 sales
2020-03-20 | FR | 0.333 | = equal split in case of 0 sales
2020-03-20 | NL | 0.333 | = equal split in case of 0 sales
------------|-----------|----------------------------|--------------------------------------------
2020-04-15 | DE | 0.269 | = (700) / (700+500+850+250+300)
2020-04-15 | FR | 0.192 | = (500) / (700+500+850+250+300)
2020-04-15 | NL | 0.538 | = (850+250+300) / (700+500+850+250+300)
In the expected result I want to
calculate the share of the sales per country per day
in case there is a day with no sales the share should be divided equaly to the number of countries.
In order to achieve this so far I have developed this query:
SELECT
t1.event_date,
t1.country,
t1.sales,
t1.total_sales_per_country,
t1.total_sales_per_day,
(CASE WHEN SUM(t1.sales) OVER (PARTITION BY t1.event_date) = 0 THEN
100/(COUNT(t1.country) OVER (PARTITION BY t1.event_date))/100::decimal
ELSE t1.total_sales_per_country / t1.total_sales_per_day END) AS share_per_day_per_country
FROM
(SELECT
s.event_date,
s.country,
s.sales,
SUM(s.sales) OVER (PARTITION BY s.event_date) AS total_sales_per_day,
SUM(s.sales) OVER (PARTITION BY s.event_date, s.country) AS total_sales_per_country
FROM sales s
GROUP BY 1,2,3
ORDER BY 1,2) t1
GROUP BY 1,2,3,4,5
ORDER BY 1,2
This query almost gives me the correct result.
However, instead of listing each event_date only one time it lists them multiple times.
I have tried a few ways (e.g. DSTINCT pl.event_date) to fix this issue but none of them worked.
How do I have to modify the query to get the entire expected result?
Remove t1.sales and t1.total_sales_per_country from the select list. Now use distinct instead of group by.
CREATE TABLE sales (
id SERIAL PRIMARY KEY,
event_date DATE,
country VARCHAR,
channel VARCHAR,
sales DECIMAL
);
INSERT INTO sales
(event_date, country, channel, sales)
VALUES
('2020-02-08', 'DE', 'channel_01', '500'),
('2020-02-08', 'DE', 'channel_02', '400'),
('2020-02-08', 'DE', 'channel_03', '200'),
('2020-02-08', 'FR', 'channel_01', '900'),
('2020-02-08', 'FR', 'channel_02', '800'),
('2020-02-08', 'NL', 'channel_01', '100'),
('2020-03-20', 'DE', 'channel_01', '0'),
('2020-03-20', 'FR', 'channel_01', '0'),
('2020-03-20', 'FR', 'channel_02', '0'),
('2020-03-20', 'FR', 'channel_03', '0'),
('2020-03-20', 'NL', 'channel_01', '0'),
('2020-04-15', 'DE', 'channel_01', '700'),
('2020-04-15', 'FR', 'channel_01', '500'),
('2020-04-15', 'NL', 'channel_01', '850'),
('2020-04-15', 'NL', 'channel_02', '250'),
('2020-04-15', 'NL', 'channel_03', '300');
Query:
SELECT
distinct t1.event_date,
t1.country,
t1.total_sales_per_day,
(CASE WHEN SUM(t1.sales) OVER (PARTITION BY t1.event_date) = 0 THEN
100/(COUNT(t1.country) OVER (PARTITION BY t1.event_date))/100::decimal
ELSE t1.total_sales_per_country / t1.total_sales_per_day END) AS share_per_day_per_country
FROM
(SELECT
s.event_date,
s.country,
s.sales,
SUM(s.sales) OVER (PARTITION BY s.event_date) AS total_sales_per_day,
SUM(s.sales) OVER (PARTITION BY s.event_date, s.country) AS total_sales_per_country
FROM sales s
GROUP BY 1,2,3
ORDER BY 1,2) t1
ORDER BY 1,2
Output:
event_date
country
total_sales_per_day
share_per_day_per_country
2020-02-08
DE
2900
0.37931034482758620690
2020-02-08
FR
2900
0.58620689655172413793
2020-02-08
NL
2900
0.03448275862068965517
2020-03-20
DE
0
0.33000000000000000000
2020-03-20
FR
0
0.33000000000000000000
2020-03-20
NL
0
0.33000000000000000000
2020-04-15
DE
2600
0.26923076923076923077
2020-04-15
FR
2600
0.19230769230769230769
2020-04-15
NL
2600
0.53846153846153846154
db<>fiddle here
I am trying to count the number of unique booking by each Customer by looking at the invoice_id, however they aren't producing the correct results.
Schema
CREATE TABLE invoice_line_items (
invoice_id int,
customer_id int,
tstamp datetime
);
Data
INSERT INTO invoice_line_items (invoice_id, customer_id, tstamp)
VALUES ('1', '123', '2018-12-21 10:00:00'),
('1', '123', '2018-12-21 10:00:00'),
('2', '123', '2018-12-22 10:00:00'),
('2', '124', '2018-12-22 10:00:00'),
('3', '124', '2018-12-22 10:00:00'),
('4', '124', '2018-12-22 10:00:00'),
('5', '124', '2018-12-22 10:00:00'),
('5', '124', '2018-12-22 10:00:00');
Query
select customer_id, count(*) as number_of_orders
from invoice_line_items
where tstamp >= '2018-01-01'
group by customer_id, invoice_id
Desired Output
customer_id | number_of_orders
123 | 2
124 | 4
You don't need to include invoice_id in group by :
select customer_id, count(distinct invoice_id) as number_of_orders
from invoice_line_items
where tstamp >= '2018-01-01'
group by customer_id;
Use DISTINCT inside COUNT() to find unique booking.