risk_score result for each of month - sql

I want to generate highest risk_score result for each of month (Jan, Feb & Mar)
Displaying the following columns: Firm_id_1, risk_score_Jan, risk_score_Feb, risk_score_Mar
CREATE table firm_risk (
firm_id_1 INT,
assessment_date DATE,
risk_score FLOAT
);
INSERT INTO firm_risk (firm_id_1, assessment_date, risk_score)
VALUES (123, '1/01/2018', 0.43),
(123, '1/28/2018', 0.80),
(123, '2/11/2018', 0.28),
(123, '2/23/2018', 0.91),
(123, '3/11/2018', 0.08),
(123, '3/31/2018', 0.60),
(456, '1/4/2018', 0.87),
(456, '1/6/2018', 0.02),
(456, '1/20/2018', 0.39),
(456, '2/3/2018', 0.10),
(456, '3/1/2018', 0.12),
(789, '1/1/2018', 0.20),
(789, '3/1/2018', 0.17);
SELECT * FROM firm_risk;
SELECT firm_id_1, date_part('month', assessment_date) AS AD
FROM firm_risk
WHERE assessment_date = (SELECT MAX (assessment_date) FROM firm_risk)
GROUP BY firm_id_1, risk_score, assessment_date;
CREATE table latest_risk_score (
firm_id_2 integer,
latest_risk_score_Jan float,
latest_risk_score_Feb float,
latest_risk_score_Mar float
);
SELECT * FROM latest_risk_score;
INSERT INTO latest_risk_score (firm_id_2)
VALUES (123),
(456),
(789);
SELECT firm_risk.firm_id_1, date_part('month', assessment_date), firm_risk.risk_score
FROM firm_risk
INNER JOIN latest_risk_score
ON firm_risk.firm_id_1 = latest_risk_score.firm_id_2
GROUP BY firm_risk.firm_id_1, firm_risk.risk_score, assessment_date;
SELECT firm_risk.firm_id_1, date_part('month', assessment_date), firm_risk.risk_score
FROM firm_risk
WHERE assessment_date = (SELECT MAX (assessment_date) FROM firm_risk)
AND assessment_date LIKE '_%-01-2018%';
SELECT firm_risk.firm_id_1, date_part('month', assessment_date)
FROM firm_risk
WHERE assessment_date >= date_part('month', assessment_date - '3 months')
GROUP BY firm_risk.firm_id_1, ('month', assessment_date);
UPDATE latest_risk_score SET latest_risk_score_Jan = (SELECT Risk_Score FROM firm_risk.firm_id_1 WHERE Assessment_Date = (SELECT MAX(Assessment_Date)
FROM firm_risk.firm_id_1 WHERE firm_id_1 = 123 AND Assessment_Date LIKE "2018-01-%" ORDER BY Assessment_Date))
WHERE firm_id_1 = 123;
update latest_risk_score
set latest_risk_score_Feb = (select Risk_Score from firm_risk.firm_id_1 where Assessment_Date = (select max(Assessment_Date)
from firm_risk.firm_id_1 where firm_id_1 = 123 and Assessment_Date like "2018-02-%" order by Assessment_Date))
where firm_id_1 = 123;
update latest_risk_score
set latest_risk_score_Mar = (select Risk_Score from firm_risk.firm_id_1 where Assessment_Date = (select max(Assessment_Date)
from firm_risk.firm_id_1 where firm_id_1 = 123 and Assessment_Date like "2018-03-%" order by Assessment_Date))
where firm_id_1 = 123;
select * from latest_risk_score;

Assuming postgres is relevant (due to existence of "date_part" in question)
CREATE table firm_risk (
firm_id_1 INT,
assessment_date DATE,
risk_score FLOAT
);
INSERT INTO firm_risk (firm_id_1, assessment_date, risk_score)
VALUES (123, '2018-01-01', 0.43),
(123, '2018-01-28', 0.80),
(123, '2018-02-11', 0.28),
(123, '2018-02-23', 0.91),
(123, '2018-03-11', 0.08),
(123, '2018-03-31', 0.60),
(456, '2018-01-04', 0.87),
(456, '2018-01-06', 0.02),
(456, '2018-01-20', 0.39),
(456, '2018-02-03', 0.10),
(456, '2018-03-01', 0.12),
(789, '2018-01-01', 0.20),
(789, '2018-03-01', 0.17);
SELECT
firm_risk.firm_id_1
, max(case when date_part('month',assessment_date) = 1 then firm_risk.risk_score end) jan_risk
, max(case when date_part('month',assessment_date) = 2 then firm_risk.risk_score end) feb_risk
, max(case when date_part('month',assessment_date) = 3 then firm_risk.risk_score end) mar_risk
FROM firm_risk
WHERE date_part('month',assessment_date) in (1,2,3)
GROUP BY
firm_risk.firm_id_1
firm_id_1 | jan_risk | feb_risk | mar_risk
--------: | :------- | :------- | :-------
789 | 0.2 | null | 0.17
456 | 0.87 | 0.1 | 0.12
123 | 0.8 | 0.91 | 0.6
db<>fiddle here

Related

Compare values between two tables with over partition criteria

DB-Fiddle
/* Table Campaigns */
CREATE TABLE campaigns (
id SERIAL PRIMARY KEY,
insert_time DATE,
campaign VARCHAR,
tranches VARCHAR,
quantity DECIMAL);
INSERT INTO campaigns
(insert_time, campaign, tranches, quantity)
VALUES
('2021-01-01', 'C001', 't', '500'),
('2021-01-01', 'C002', 't', '600'),
('2021-01-02', 'C001', 't', '500'),
('2021-01-02', 'C002', 't', '600');
/* Table Tranches */
CREATE TABLE tranches (
id SERIAL PRIMARY KEY,
insert_time DATE,
campaign VARCHAR,
tranches VARCHAR,
quantity DECIMAL);
INSERT INTO tranches
(insert_time, campaign, tranches, quantity)
VALUES
('2021-01-01', 'C001', 't1', '200'),
('2021-01-01', 'C001', 't2', '120'),
('2021-01-01', 'C001', 't3', '180'),
('2021-01-01','C002', 't1', '350'),
('2021-01-01','C002', 't2', '250'),
('2021-01-02', 'C001', 't1', '400'),
('2021-01-02', 'C001', 't2', '120'),
('2021-01-02', 'C001', 't3', '180'),
('2021-01-02','C002', 't1', '350'),
('2021-01-02','C002', 't2', '250');
Expected Result:
insert_time | campaign | tranches | quantity_campaigns | quantity_tranches | check
--------------|------------|------------|---------------------|---------------------|-----------
2021-01-01 | C001 | t | 500 | 500 | ok
2021-01-01 | C002 | t | 600 | 600 | ok
--------------|------------|------------|---------------------|---------------------|------------
2021-01-02 | C001 | t | 500 | 700 | error
2021-01-02 | C002 | t | 600 | 500 | ok
I want to compare the total quantity per campaign in table campaigns with the total quantity per campaign in table tranches.
So far I have been able to develop this query:
SELECT
c.insert_time AS insert_time,
c.campaign AS campaign,
c.tranches AS tranches,
c.quantity AS quantity_campaigns,
t.quantity AS quantity_tranches,
(CASE WHEN
MAX(c.quantity) OVER(PARTITION BY c.insert_time, c.campaign) = SUM(t.quantity) OVER(PARTITION BY t.insert_time, t.campaign)
THEN 'ok' ELSE 'error' END) AS check
FROM campaigns c
LEFT JOIN tranches t ON c.campaign = t.campaign
ORDER BY 1,2,3,4,5;
However, it does not give me the expected result?
What do I need to change to make it work?
I think the result you're looking for should be something like this. The problem is that you're trying to aggregate over two groupings after a join which will either yield too many results or incorrect calculations. By aggregating in CTE, and then joining the CTEs after aggregation has occurred you can achieve the results you are looking for. See my example below:
WITH campaign_agg AS(
SELECT c.insert_time, c.campaign, c.tranches, MAX(c.quantity) c_quantity
FROM campaigns c
GROUP BY c.insert_time, c.campaign, c.tranches
), tranch_agg AS(
SELECT t.insert_time, t.campaign, SUM(t.quantity) as t_sum
FROM tranches t
GROUP BY t.insert_time, t.campaign
)
SELECT c.insert_time, c.campaign, c.tranches, c.c_quantity, t.t_sum,
CASE WHEN c.c_quantity = t.t_sum THEN 'ok' ELSE 'error' END as check
FROM campaign_agg c
JOIN
tranch_agg t ON
t.insert_time = c.insert_time
AND t.campaign = c.campaign
ORDER BY c.insert_time, c.campaign
I have a db-fiddle for this as well: https://www.db-fiddle.com/f/33x4upVEcgTMNehiHCKzfN/1
DB-Fiddle
SELECT
c.insert_time AS insert_time,
c.campaign AS campaign,
c.tranches AS tranches,
SUM(c.quantity) AS quantity_campaigns,
SUM(t1.quantity) AS quantity_tranches,
(CASE WHEN SUM(c.quantity) <> SUM(t1.quantity) THEN 'error' ELSE 'ok' END) AS check
FROM campaigns c
LEFT JOIN
(SELECT
t.insert_time AS insert_time,
t.campaign AS campaign,
SUM(t.quantity) AS quantity
FROM tranches t
GROUP BY 1,2
ORDER BY 1,2) t1 on t1.insert_time = c.insert_time AND t1.campaign = c.campaign
GROUP BY 1,2,3
ORDER BY 1,2,3;

postgresql How show most frequent value per day date

I've got a problem with a query that is supposed to return the value which occur most per date
+------------+------------------+
| Date | value |
+------------+------------------+
| 2020-01-01 | Programmer |
| 2020-01-02 | Technician |
| 2020-01-03 | Business Analyst |
+------------+------------------+
So far I have done
select count(headline) as asd, publication_date, employer -> 'name' as dsa from jobhunter
group by publication_date,dsa
ORDER BY publication_date DESC
But it shows 2020-12-31 19:06:00 instead of just YYYY-MM-DD
Any idea on how to fix this?
enter image description here
Test data:
create table tbl (
id serial primary key,
row_datetime TIMESTAMP,
row_val VARCHAR(60)
);
insert into tbl (row_datetime, row_val) values ('2021-01-01 00:00:00', 'a');
insert into tbl (row_datetime, row_val) values ('2021-01-01 01:00:00', 'a');
insert into tbl (row_datetime, row_val) values ('2021-01-01 02:00:00', 'b');
insert into tbl (row_datetime, row_val) values ('2021-01-02 00:00:00', 'a');
insert into tbl (row_datetime, row_val) values ('2021-01-02 01:00:00', 'b');
insert into tbl (row_datetime, row_val) values ('2021-01-02 02:00:00', 'b');
Example query:
SELECT dt, val, cnt
FROM (
SELECT dt, val, cnt, ROW_NUMBER() OVER (PARTITION BY dt ORDER BY cnt DESC) AS row_num
FROM (
SELECT dt, val, COUNT(val) AS cnt
FROM (
SELECT DATE(row_datetime) AS dt, row_val AS val FROM tbl
) AS T1 GROUP BY dt, val
) AS T2
) AS T3
WHERE row_num=1
ORDER BY dt ASC
You can additionally customize your query to optimize the performance, get more fields, etc.

Query Optimization to reduce multiple joins statements

Here is the table:
CREATE TABLE ABC
(
key NUMBER(5),
orders NUMBER(5),
cost NUMBER(5),
dat DATE
);
insert into ABC (key, orders, cost, dat) values (1, 3, 5, to_date('10-11-
2017', 'mm-dd-yyyy'));
insert into ABC (key, orders, cost,dat) values (1, 5, 2, to_date('02-10-
2017', 'mm-dd-yyyy'));
insert into ABC (key, orders, cost,dat) values (1, 6, 1, to_date('03-10-
2017', 'mm-dd-yyyy'));
insert into ABC (key, orders, cost,dat) values (1, 7, 2, to_date('05-10-
2017', 'mm-dd-yyyy'));
insert into ABC (key, orders, cost,dat) values (1, 8, 3, to_date('07-10-
2017', 'mm-dd-yyyy'));
insert into ABC (key, orders, cost,dat) values (1, 3, 4, to_date('08-10-
2017', 'mm-dd-yyyy'));
insert into ABC (key, orders, cost,dat) values (2, 3, 6, to_date('02-10-
2017', 'mm-dd-yyyy'));
insert into ABC (key, orders, cost,dat) values (2, 3, 9, to_date('01-10-
2017', 'mm-dd-yyyy'));
insert into ABC (key, orders, cost,dat) values (2, 2 ,5, to_date('03-10-
2017', 'mm-dd-yyyy'));
insert into ABC (key, orders, cost,dat) values (2, 3, 2, to_date('05-10-
2017', 'mm-dd-yyyy'));
insert into ABC (key, orders, cost,dat) values (2, 1, 1, to_date('06-10-
2017', 'mm-dd-yyyy'));
insert into ABC (key, orders, cost,dat) values (3, 4, 12, to_date('10-10-
2017', 'mm-dd-yyyy'));
insert into ABC (key, orders, cost,dat) values (3, 3, 9, to_date('01-10-
2017', 'mm-dd-yyyy'));
insert into ABC (key, orders, cost,dat) values (3, 2 ,5, to_date('05-10-
2017', 'mm-dd-yyyy'));
insert into ABC (key, orders, cost,dat) values (3, 3, 2, to_date('06-10-
2017', 'mm-dd-yyyy'));
insert into ABC (key, orders, cost,dat) values (3, 1, 1, to_date('07-10-
2017', 'mm-dd-yyyy'));
insert into ABC (key, orders, cost,dat) values (3, 4, 12, to_date('11-10-
2017', 'mm-dd-yyyy'));
insert into ABC (key, orders, cost, dat) values (1, 3, 5, to_date('10-01-
2017', 'mm-dd-yyyy'));
insert into ABC (key, orders, cost,dat) values (1, 5, 2, to_date('02-17-
2017', 'mm-dd-yyyy'));
insert into ABC (key, orders, cost,dat) values (1, 6, 1, to_date('03-18-
2017', 'mm-dd-yyyy'));
insert into ABC (key, orders, cost,dat) values (1, 7, 2, to_date('05-14-
2017', 'mm-dd-yyyy'));
insert into ABC (key, orders, cost,dat) values (1, 8, 3, to_date('07-13-
2017', 'mm-dd-yyyy'));
insert into ABC (key, orders, cost,dat) values (1, 3, 4, to_date('08-12-
2017', 'mm-dd-yyyy'));
insert into ABC (key, orders, cost,dat) values (2, 3, 6, to_date('02-11-
2017', 'mm-dd-yyyy'));
insert into ABC (key, orders, cost,dat) values (2, 3, 9, to_date('01-15-
2017', 'mm-dd-yyyy'));
insert into ABC (key, orders, cost,dat) values (2, 2 ,5, to_date('03-14-
2017', 'mm-dd-yyyy'));
insert into ABC (key, orders, cost,dat) values (2, 3, 2, to_date('05-18-
2017', 'mm-dd-yyyy'));
insert into ABC (key, orders, cost,dat) values (2, 1, 1, to_date('06-19-
2017', 'mm-dd-yyyy'));
insert into ABC (key, orders, cost,dat) values (3, 4, 12, to_date('10-11-
2017', 'mm-dd-yyyy'));
insert into ABC (key, orders, cost,dat) values (3, 3, 9, to_date('01-12-
2017', 'mm-dd-yyyy'));
insert into ABC (key, orders, cost,dat) values (3, 2 ,5, to_date('05-16-
2017', 'mm-dd-yyyy'));
insert into ABC (key, orders, cost,dat) values (3, 3, 2, to_date('06-17-
2017', 'mm-dd-yyyy'));
insert into ABC (key, orders, cost,dat) values (3, 1, 1, to_date('07-12-
2017', 'mm-dd-yyyy'));
insert into ABC (key, orders, cost,dat) values (3, 4, 12, to_date('12-21-
2017', 'mm-dd-yyyy'));
Not sure why my results are repeating.
Here is my query:
with qone as
(select a.key, a.max_price, max(t.dat) as qo_dat from ABC t
JOIN
(select key, max(cost) as max_price from ABC
where dat >= to_date('01-01-2017', 'mm-dd-yyyy') and dat < to_date('04-01-
2017', 'mm-dd-yyyy')
group by key) a on a.key = t.key and a.max_price = t.cost
group by a.key, a.max_price),
qtwo as
(select a.key, a.max_price, max(t.dat) as qt_dat from ABC t
JOIN
(select key, max(cost) as max_price from ABC
where dat >= to_date('04-01-2017', 'mm-dd-yyyy') and dat < to_date('07-01-
2017', 'mm-dd-yyyy')
group by key) a on a.key = t.key and a.max_price = t.cost
group by a.key, a.max_price),
qthree as
(select a.key, a.max_price, max(t.dat) as qth_dat from ABC t
JOIN
(select key, max(cost) as max_price from ABC
where dat >= to_date('07-01-2017', 'mm-dd-yyyy') and dat < to_date('10-01-
2017', 'mm-dd-yyyy')
group by key) a on a.key = t.key and a.max_price = t.cost
group by a.key, a.max_price),
qfour as
(select a.key, a.max_price, max(t.dat) as qf_dat from ABC t
JOIN
(select key, max(cost) as max_price from ABC
where dat >= to_date('10-01-2017', 'mm-dd-yyyy') and dat < to_date('01-01-
2018', 'mm-dd-yyyy')
group by key) a on a.key = t.key and a.max_price = t.cost
group by a.key, a.max_price)
select qo.key, qo.max_price as max_q1, qo.qo_dat, qt.max_price as max_q2,
qt.qt_dat, qth.max_price as max_q3, qth.qth_dat, qf.max_price as max_q4,
qf.qf_dat from qone qo
join qtwo qt on qt.key = qo.key
join qthree qth on qth.key = qth.key
join qfour qf on qf.key = qf.key
order by keyenter code here
I want to know if there is a way to reduce the lines.
How I did it? I find the Max Price and Max Date for each quarter, I define the quarters using where statement.
I use Divide and Conquer technique, I find the Max price and the respective date for all four quarters and join them on the key. Sample of one self-defined quarter below.
`select a.key, a.max_price, max(t.dat) as qo_dat from ABC t
JOIN
(select key, max(cost) as max_price from ABC
where dat >= to_date('01-01-2017', 'mm-dd-yyyy') and dat < to_date('04-01-
2017', 'mm-dd-yyyy')
group by key) a on a.key = t.key and a.max_price = t.cost
group by a.key, a.max_price`
Output:
Possible optimized solution: but I m figuring out a way to add a corresponding date next to it
select
t.key,
max( case when t.dat >= Tmp.Q1From and t.dat < Tmp.Q1End then t.cost
else 0 end ) as Q1Tot,
max( case when t.dat >= Tmp.Q1End and t.dat < Tmp.Q2End then t.cost else
0 end ) as Q2Tot,
max( case when t.dat >= Tmp.Q2End and t.dat < Tmp.Q3End then t.cost else
0 end ) as Q3Tot,
max( case when t.dat >= Tmp.Q3End and t.dat < Tmp.Q4End then t.cost else
0 end ) as Q4Tot
from
ABC t,
( select
to_date('01-01-2017', 'mm-dd-yyyy') Q1From,
to_date('04-01-2017', 'mm-dd-yyyy') Q1End,
to_date('07-01-2017', 'mm-dd-yyyy') Q2End,
to_date('10-01-2017', 'mm-dd-yyyy') Q3End,
to_date('01-01-2018', 'mm-dd-yyyy') Q4End
from
dual ) Tmp
where
t.dat >= to_date('01-01-2017', 'mm-dd-yyyy')
and t.dat < to_date('01-01-2018', 'mm-dd-yyyy')
group by
t.key
Instead of using JOINs or cross joins, consider using the analytical function NTH_VALUE (see documentation) for displaying the required values for the 4 quarters side by side.
NTH_VALUE returns the measure_expr value of the nth row in the window
defined by the analytic_clause.
First step: find the "max costs" and their corresponding dates for all keys (and quarters).
select *
from (
select key, dat, to_char( dat, 'Q' ) quarter
, max( cost ) over ( partition by key, to_char( dat, 'Q' ) order by cost desc ) maxcost_
, max( dat ) over ( partition by key, to_char( dat, 'Q' ) order by cost desc ) maxdat_
, row_number() over ( partition by key, to_char( dat, 'Q' ) order by cost desc ) rownum_
from abc
)
where rownum_ = 1
-- result
KEY DAT QUARTER MAXCOST_ MAXDAT_ ROWNUM_
1 17-FEB-17 1 2 17-FEB-17 1
1 14-MAY-17 2 2 14-MAY-17 1
1 12-AUG-17 3 4 12-AUG-17 1
1 01-OCT-17 4 5 11-OCT-17 1
2 10-JAN-17 1 9 15-JAN-17 1
2 10-MAY-17 2 2 18-MAY-17 1
3 10-JAN-17 1 9 12-JAN-17 1
3 10-MAY-17 2 5 16-MAY-17 1
3 10-JUL-17 3 1 12-JUL-17 1
3 10-NOV-17 4 12 21-DEC-17 1
10 rows selected.
Final query: use the first query as an INLINE VIEW, and call NTH_VALUE for retrieving the values for each quarter.
select unique key
, nth_value( maxcost_, 1 ) from first over ( partition by key ) q1max
, nth_value( maxdat_, 1 ) from first over ( partition by key ) q1date
, nth_value( maxcost_, 2 ) from first over ( partition by key ) q2max
, nth_value( maxdat_, 2 ) from first over ( partition by key ) q2date
, nth_value( maxcost_, 3 ) from first over ( partition by key ) q3max
, nth_value( maxdat_, 3 ) from first over ( partition by key ) q3date
, nth_value( maxcost_, 4 ) from first over ( partition by key ) q4max
, nth_value( maxdat_, 4 ) from first over ( partition by key ) q4date
from (
select *
from (
select key, dat, to_char( dat, 'Q' ) quarter
, max( cost ) over ( partition by key, to_char( dat, 'Q' ) order by cost desc ) maxcost_
, max( dat ) over ( partition by key, to_char( dat, 'Q' ) order by cost desc ) maxdat_
, row_number() over ( partition by key, to_char( dat, 'Q' ) order by cost desc ) rownum_
from abc
)
where rownum_ = 1
) -- inline view (no name required)
order by key
;
-- result
KEY Q1MAX Q1DATE Q2MAX Q2DATE Q3MAX Q3DATE Q4MAX Q4DATE
1 2 17-FEB-17 2 14-MAY-17 4 12-AUG-17 5 11-OCT-17
2 9 15-JAN-17 2 18-MAY-17 NULL NULL NULL NULL
3 9 12-JAN-17 5 16-MAY-17 1 12-JUL-17 12 21-DEC-17
Maybe you could rewrite it in a shorter way, as in SQL Fiddle:
select a.key, qtr, a.max_price, max(t.dat) as qo_dat
from ABC t
join (
select key, to_char(dat, 'Q') as qtr, max(cost) as max_price
from ABC
where dat >= to_date('01-01-2017', 'mm-dd-yyyy')
and dat < to_date('01-01-2018', 'mm-dd-yyyy')
group by key, to_char(dat, 'Q')
) a on a.key = t.key and a.max_price = t.cost and a.qtr = to_char(t.dat, 'Q')
group by a.key, a.qtr, a.max_price
order by a.key, a.qtr, a.max_price
The output is a little bit different, but it shows what you want. Doesn't it?
select a.key, a.q1tot, b.dat, a.q2tot, c.dat, a.q3tot, d.dat, a.q4tot, e.dat from (
select
t.key,
max( case when t.dat >= Tmp.Q1From and t.dat < Tmp.Q1End then t.cost else 0 end ) as Q1Tot,
max( case when t.dat >= Tmp.Q1End and t.dat < Tmp.Q2End then t.cost else 0 end ) as Q2Tot,
max( case when t.dat >= Tmp.Q2End and t.dat < Tmp.Q3End then t.cost else 0 end ) as Q3Tot,
max( case when t.dat >= Tmp.Q3End and t.dat < Tmp.Q4End then t.cost else 0 end ) as Q4Tot
from
ABC t,
( select
to_date('01-01-2017', 'mm-dd-yyyy') Q1From,
to_date('04-01-2017', 'mm-dd-yyyy') Q1End,
to_date('07-01-2017', 'mm-dd-yyyy') Q2End,
to_date('10-01-2017', 'mm-dd-yyyy') Q3End,
to_date('01-01-2018', 'mm-dd-yyyy') Q4End
from
dual ) Tmp
where
t.dat >= to_date('01-01-2017', 'mm-dd-yyyy')
and t.dat < to_date('01-01-2018', 'mm-dd-yyyy')
group by
t.key) a
join
( select key, cost, dat from ABC
where dat < to_date('04-01-2017', 'mm-dd-yyyy')) b
on a.key = b.key and a.Q1tot = b.cost
join
( select key, cost, dat from ABC
where dat >= to_date('04-01-2017', 'mm-dd-yyyy') and dat < to_date('07-01-2017',
'mm-dd-yyyy')) c
on a.key = c.key and a.Q1tot = c.cost
join
( select key, cost, dat from ABC
where dat >= to_date('07-01-2017', 'mm-dd-yyyy') and dat < to_date('10-01-2017',
'mm-dd-yyyy')) d
on a.key = d.key and a.Q1tot = d.cost
join
( select key, cost, dat from ABC
where dat >= to_date('10-01-2017', 'mm-dd-yyyy') and dat < to_date('01-01-2018', 'mm-dd-yyyy')) e
on a.key = e.key and a.Q1tot = e.cost
This is my code, But the above two queries perform faster

Finding duplicate records in a specific date range

I have a table where I have 4 columns
Serial(nvarchar), SID(nvarchar), DateCreated(Date), CID(unique and int)
I want to find the records where there is duplicate serial and SID and where the 2 duplicate serial fall between date range of 180 days.
please help
Sample Data
Serial SID DateCreated CID
02302-25-0036 HONMD01 2017-05-01 00:00:00.000 1
02302-25-0036 HONMD01 2017-05-01 00:00:00.000 3
0264607 HONMD01 2017-05-01 00:00:00.000 65
0264607 HONMD01 2016-05-01 00:00:00.000 45
03118-09-0366 PRIVA00 2016-05-20 00:00:00.000 34
03118-09-0366 PRIVA00 2016-05-20 00:00:00.000 87
0969130 140439 2017-05-09 00:00:00.000 32
0969130 140439 2017-05-09 00:00:00.000 23
1049567 INIIL00 2017-04-12 00:00:00.000 76
create table #Test (Serial nvarchar(20), [SID] nvarchar(10), DateCreated datetime, CID int)
Insert into #Test values ('02302-25-0036', 'HONMD01', '2017-05-01 00:00:00.000', 1)
, ('02302-25-0036', 'HONMD01', '2017-05-01 00:00:00.000', 3)
, ('0264607', 'HONMD01', '2017-05-01 00:00:00.000', 65)
, ('0264607', 'HONMD01', '2016-05-01 00:00:00.000', 45)
, ('03118-09-0366', 'PRIVA00', '2016-05-20 00:00:00.000', 34)
, ('03118-09-0366', 'PRIVA00', '2016-05-20 00:00:00.000', 87)
, ('0969130', '140439', '2017-05-09 00:00:00.000', 32)
, ('0969130', '140439', '2017-05-09 00:00:00.000', 23)
, ('1049567', 'INIIL00', '2017-04-12 00:00:00.000', 76)
select distinct a.*
from
(
select t.*
from #Test t
inner join (
Select Serial, [SID]
from #Test
group by Serial, [SID]
Having count(*)>=2
) d on d.Serial = t.Serial and t.SID= t.SID
) a
full outer join
(
select t.*
from #Test t
inner join (
Select Serial, [SID]
from #Test
group by Serial, [SID]
Having count(*)>=2
) d on d.Serial = t.Serial and t.SID= t.SID
) b on a.Serial = b.Serial and a.SID= b.SID
where datediff(d,a.DateCreated, b.DateCreated)<180
Try to do this:
with cte as (
select
serial,
sid,
dateCreated,
cid,
coalesce(max(dateCreated) over(partition by serial, sid order by cid, dateCreated asc rows between unbounded preceding and 1 preceding), '1900-01-01') as last,
coalesce(min(dateCreated) over(partition by serial, sid order by cid, dateCreated asc rows between 1 following and unbounded following), '5999-01-01') as next
from table_name
)
select *
from cte
where
datediff(day, last, dateCreated) >= 180
and datediff(day, dateCreated, next) >= 180
This was a challenging question ! I have left final output with *(PreviousDate, rno) for easy understanding. Here is my way to solve :
Create table #t(Serial nvarchar(100),SID nvarchar(100),DateCreated date,CID int)
Insert into #t values
('02302-25-0036', 'HONMD01', '2017-05-01 00:00:00.000', 1),
('02302-25-0036', 'HONMD01', '2017-05-01 00:00:00.000', 3),
('0264607', 'HONMD01', '2017-05-01 00:00:00.000', 65),
('0264607', 'HONMD01', '2016-05-01 00:00:00.000', 45),
('03118-09-0366', 'PRIVA00', '2016-05-20 00:00:00.000', 34),
('03118-09-0366', 'PRIVA00', '2016-05-20 00:00:00.000', 87),
('0969130', '140439', '2017-05-09 00:00:00.000', 32),
('0969130', '140439', '2017-05-09 00:00:00.000', 23),
('1049567', 'INIIL00', '2017-04-12 00:00:00.000', 76)
Select iq2.*
FROM
(Select iq.Serial, iq.SID, iq.DateCreated, iq.CID, iq.PreviousDate,
ROW_NUMBER() OVER (PARTITION BY iq.Serial,iq.SID, CASE WHEN DATEDIFF(day, iq.DateCreated, iq.PreviousDate) <= 180 THEN 1 ELSE 0 END
ORDER BY Serial,SID) rno
FROM
(select Serial,SID,DateCreated,CID,
MAX(DateCreated) OVER (PARTITION BY Serial,SID ORDER BY Serial,SID) maxDate,
DATEADD(day,-180,MAX(DateCreated) OVER (PARTITION BY Serial,SID ORDER BY Serial,SID)) PreviousDate
from #t
)iq
)iq2
where iq2.rno <> 1
output :
Serial SID DateCreated CID PreviousDate rno
---------- ------- ---------- ---- ----------- ----
02302-25-0036 HONMD01 2017-05-01 3 2016-11-02 2
03118-09-0366 PRIVA00 2016-05-20 87 2015-11-22 2
0969130 140439 2017-05-09 23 2016-11-10 2
PS : PreviousDate is MAX PreviousDate

Using a (Recursive?) CTE + Window Functions to zero out sales orders?

I am trying to use a recursive CTE + window functions to find the last outcome of a series of buy/sell orders.
First, here's some nomenclature:
field_id is the store's ID.
Field_number is an order number, but can be reused by the same person
Field_date is the date of the initial order.
Field_inserted is when this specific transaction occcurred.
Field_sale is whether we bought or returned it.
Unfortunately, because of the way the systems work, I do NOT get the cost when an item is returned, so figuring out the last outcome for an order is complicated (did we wind up selling any). I need to match the purchase with the sale, Which normally works pretty well. However, there are cases such as below when it fails, and I'm trying to find a way to do this in one pass, possibly using a recursive CTE.
Here's some code.
DECLARE #tablea TABLE (field_id int, field_number CHAR(3), field_date datetime, field_inserted DATETIME, field_sale varchar(4))
INSERT INTO #tablea
VALUES
(1, 100, '20170311','20170311 01:00:00', 'Buy'),
(1, 100, '20170311','20170311 01:01:00', 'Retu'),
(1, 100, '20170311','20170311 01:02:00', 'Buy'),
(1, 100, '20170311','20170311 01:03:00', 'Retu'),
(1, 100, '20170311','20170311 01:02:01', 'buy'),
(2, 100, '20170311','20170311 01:03:00', 'REtu'),
(1, 110, '20170311','20170311 01:03:00', 'Buy');
Now to remove the buys that were then returned. The ISNULL is because I'm the NOT IN will ignore all the rows that have NULL for the _lead/_lag values.
WITH cte AS
(SELECT
ROW_NUMBER() OVER (PARTITION BY field_id, field_number, field_date ORDER BY field_inserted) AS row_num,
field_id,
field_number,
field_date,
field_sale,
lead(field_sale) OVER (PARTITION BY field_id, field_number, field_date ORDER BY field_inserted) AS field_sale_lead,
lag(field_sale) OVER (PARTITION BY field_id, field_number, field_date ORDER BY field_inserted) AS field_sale_lag
FROM #tablea
)
SELECT * FROM cte
WHERE NOT (cte.field_sale = 'Buy' AND ISNULL(field_sale_lead,'') = 'Retu')--AND field_sale_lead IS NOT null)
AND NOT (cte.field_sale = 'Retu' AND ISNULL(field_sale_lag,'') = 'buy' )--AND field_sale_lag IS NOT NULL)
And I felt pretty smug and thought I had it. However, that's the simple case. Buy, Return, Buy, Return. Let's try another case, Buy Buy Return Return, which is still valid, but obviously would result in a net of 0..
DECLARE #tablea TABLE (field_id int, field_number CHAR(3), field_date datetime, field_inserted DATETIME, field_sale varchar(4))
INSERT INTO #tablea
VALUES
(1, 100, '20170311','20170311 01:00:00', 'Buy'),
(1, 100, '20170311','20170311 01:01:00', 'Buy'),
(1, 100, '20170311','20170311 01:02:00', 'Retu'),
(1, 100, '20170311','20170311 01:03:00', 'Retu'),
(2, 100, '20170311','20170311 01:03:00', 'Buy'),
(1, 110, '20170311','20170311 01:03:00', 'Buy');
WITH cte AS
(SELECT
ROW_NUMBER() OVER (PARTITION BY field_id, field_number, field_date ORDER BY field_inserted) AS row_num,
field_id,
field_number,
field_date,
field_sale,
lead(field_sale) OVER (PARTITION BY field_id, field_number, field_date ORDER BY field_inserted) AS field_sale_lead,
lag(field_sale) OVER (PARTITION BY field_id, field_number, field_date ORDER BY field_inserted) AS field_sale_lag
FROM #tablea
)
SELECT * FROM cte
WHERE NOT (cte.field_sale = 'Buy' AND ISNULL(field_sale_lead,'') = 'sell')--AND field_sale_lead IS NOT null)
AND NOT (cte.field_sale = 'sell' AND ISNULL(field_sale_lag,'') = 'buy' )--AND field_sale_lag IS NOT NULL)
When you do this, though, you realize that it found direct matches, but now there's still a Buy/Return pair, and I'd like to cancel that out.
It's at this point I'm stuck. I've done Recursive CTEs before, but for whatever reason I can't figure out how to recurse and make it cancel out 1/1/100 and 4/1/100. All I've managed to do is have it choke on the recursion.
DECLARE #tablea TABLE (field_id int, field_number CHAR(3), field_date datetime, field_inserted DATETIME, field_sale varchar(4))
INSERT INTO #tablea
VALUES
(1, 100, '20170311','20170311 01:00:00', 'Buy'),
(1, 100, '20170311','20170311 01:01:00', 'Buy'),
(1, 100, '20170311','20170311 01:02:00', 'Retu'),
(1, 100, '20170311','20170311 01:03:00', 'Retu'),
(2, 100, '20170311','20170311 01:03:00', 'Buy'),
(1, 110, '20170311','20170311 01:03:00', 'Buy');
WITH cte AS
(SELECT
ROW_NUMBER() OVER (PARTITION BY field_id, field_number, field_date ORDER BY field_inserted) AS row_num,
field_id,
field_number,
field_date,
field_sale,
field_inserted,
lead(field_sale) OVER (PARTITION BY field_id, field_number, field_date ORDER BY field_inserted) AS field_sale_lead,
lag(field_sale) OVER (PARTITION BY field_id, field_number, field_date ORDER BY field_inserted) AS field_sale_lag
FROM #tablea
--)
--SELECT * FROM cte
--WHERE NOT (cte.field_sale = 'Buy' AND ISNULL(field_sale_lead,'') = 'Retu')--AND field_sale_lead IS NOT null)
--AND NOT (cte.field_sale = 'Retu' AND ISNULL(field_sale_lag,'') = 'buy' )--AND field_sale_lag IS NOT NULL)
UNION ALL
SELECT
ROW_NUMBER() OVER (PARTITION BY cte.field_id, cte.field_number, cte.field_date ORDER BY cte.field_inserted) AS row_num,
cte.field_id,
cte.field_number,
cte.field_date,
cte.field_sale,
cte.field_inserted,
lead(cte.field_sale) OVER (PARTITION BY cte.field_id, cte.field_number, cte.field_date ORDER BY cte.field_inserted) AS field_sale_lead,
lag(cte.field_sale) OVER (PARTITION BY cte.field_id, cte.field_number, cte.field_date ORDER BY cte.field_inserted) AS field_sale_lag
FROM #tablea INNER JOIN cte ON cte.field_date = [#tablea].field_date AND cte.field_id = [#tablea].field_id AND cte.field_number = [#tablea].field_number
)
SELECT * FROM cte
WHERE NOT (cte.field_sale = 'Buy' AND ISNULL(field_sale_lead,'') = 'Retu')--AND field_sale_lead IS NOT null)
AND NOT (cte.field_sale = 'Retu' AND ISNULL(field_sale_lag,'') = 'buy' )--AND field_sale_lag IS NOT NULL)
We can tackle this without loops or recursion by using a common table expression and row_number() like so:
If I am understanding your question correctly, you want to remove sales that have been returned
, and for each 'retu' it should remove the most recent 'buy'.
First we will add id using row_number() to our rowset so we can uniquely identify our rows.
Next, we add br_rn (short for Buy/Return RowNumber) partitioned by field_id, field_number, field_date, but we will also add field_sale to the partition; and we will order it by field_inserted desc.
This will let us match each 'retu' with the most recent 'buy', and once we can do that, we can eliminate all of the pairs with not exists():
;with cte as (
select
id = row_number() over (
order by field_id, field_number, field_date, field_inserted asc
)
, field_id
, field_number
, field_date
, field_inserted
, field_sale
, br_rn = row_number() over (
partition by field_id, field_number, field_date, field_sale
order by field_inserted desc
)
from #tablea
)
select
id
, field_number
, field_date
, field_inserted
, field_sale
from cte
where not exists (
select 1
from cte as i
where i.field_id = cte.field_id
and i.field_number = cte.field_number
and i.field_date = cte.field_date
and i.br_rn = cte.br_rn
and i.id <> cte.id
)
order by id
rextester demo: http://rextester.com/TKXOC61533
For this input:
(1, 100, '20170311','20170311 01:00:00', 'Buy')
, (1, 100, '20170311','20170311 01:01:00', 'Buy')
, (1, 100, '20170311','20170311 01:02:00', 'Retu')
, (1, 100, '20170311','20170311 01:03:00', 'Retu')
, (2, 100, '20170311','20170311 01:03:00', 'Buy')
, (1, 110, '20170311','20170311 01:03:00', 'Buy');
returns:
+----+----------+--------------+------------+---------------------+------------+
| id | field_id | field_number | field_date | field_inserted | field_sale |
+----+----------+--------------+------------+---------------------+------------+
| 5 | 1 | 110 | 2017-03-11 | 2017-03-11 01:03:00 | Buy |
| 6 | 2 | 100 | 2017-03-11 | 2017-03-11 01:03:00 | Buy |
+----+----------+--------------+------------+---------------------+------------+
and for this input:
(1, 100, '20170311','20170311 01:01:00', 'Buy')
, (1, 100, '20170311','20170311 01:02:00', 'Buy')
, (1, 100, '20170311','20170311 01:03:00', 'Buy')
, (1, 100, '20170311','20170311 01:04:00', 'Retu')
, (1, 100, '20170311','20170311 01:05:00', 'Buy')
, (1, 100, '20170311','20170311 01:06:00', 'Retu')
, (1, 100, '20170311','20170311 01:07:00', 'Retu')
, (2, 100, '20170311','20170311 01:03:00', 'Buy')
, (1, 110, '20170311','20170311 01:03:00', 'Buy');
returns:
+----+----------+--------------+------------+---------------------+------------+
| id | field_id | field_number | field_date | field_inserted | field_sale |
+----+----------+--------------+------------+---------------------+------------+
| 1 | 1 | 100 | 2017-03-11 | 2017-03-11 01:01:00 | Buy |
| 8 | 1 | 110 | 2017-03-11 | 2017-03-11 01:03:00 | Buy |
| 9 | 2 | 100 | 2017-03-11 | 2017-03-11 01:03:00 | Buy |
+----+----------+--------------+------------+---------------------+------------+
for this input:
(1, 100, '20170311','20170311 01:01:00', 'Buy')
, (1, 100, '20170311','20170311 01:02:00', 'Buy')
, (1, 100, '20170311','20170311 01:04:00', 'Retu')
, (1, 100, '20170311','20170311 01:05:00', 'Retu')
, (1, 100, '20170312','20170311 01:06:00', 'Buy')
, (1, 100, '20170312','20170311 01:07:00', 'Buy')
, (2, 100, '20170311','20170311 01:03:00', 'Buy')
, (1, 110, '20170311','20170311 01:03:00', 'Buy')
returns:
+----+----------+--------------+------------+---------------------+------------+
| id | field_id | field_number | field_date | field_inserted | field_sale |
+----+----------+--------------+------------+---------------------+------------+
| 5 | 1 | 100 | 2017-03-12 | 2017-03-11 01:06:00 | Buy |
| 6 | 1 | 100 | 2017-03-12 | 2017-03-11 01:07:00 | Buy |
| 7 | 1 | 110 | 2017-03-11 | 2017-03-11 01:03:00 | Buy |
| 8 | 2 | 100 | 2017-03-11 | 2017-03-11 01:03:00 | Buy |
+----+----------+--------------+------------+---------------------+------------+
It may help illustrate what we are doing to take a look what the cte is returning before we eliminate any pairs.
Looking at just the set that needs filtering, before we filter it:
+----+----------+--------------+------------+---------------------+------------+-------+
| id | field_id | field_number | field_date | field_inserted | field_sale | br_rn |
+----+----------+--------------+------------+---------------------+------------+-------+
| 1 | 1 | 100 | 2017-03-11 | 2017-03-11 01:01:00 | Buy | 4 |
| 2 | 1 | 100 | 2017-03-11 | 2017-03-11 01:02:00 | Buy | 3 |
| 3 | 1 | 100 | 2017-03-11 | 2017-03-11 01:03:00 | Buy | 2 |
| 4 | 1 | 100 | 2017-03-11 | 2017-03-11 01:04:00 | Retu | 3 |
| 5 | 1 | 100 | 2017-03-11 | 2017-03-11 01:05:00 | Buy | 1 |
| 6 | 1 | 100 | 2017-03-11 | 2017-03-11 01:06:00 | Retu | 2 |
| 7 | 1 | 100 | 2017-03-11 | 2017-03-11 01:07:00 | Retu | 1 |
+----+----------+--------------+------------+---------------------+------------+-------+
Looking at it like this, we can easily see that the 'buy' order id 1 has a br_rn of 4 and there is no associated 'retu'.
One thing i can suggest delete pairs of sequential buy/return while it's possible. Try
DECLARE #tablea TABLE (field_id int, field_number CHAR(3), field_date datetime, field_inserted DATETIME, field_sale varchar(4))
INSERT INTO #tablea
VALUES
(1, 100, '20170311','20170311 01:01:00', 'Buy'),
(1, 100, '20170311','20170311 01:02:00', 'Buy'),
(1, 100, '20170311','20170311 01:03:00', 'Buy'),
(1, 100, '20170311','20170311 01:04:00', 'Retu'),
(1, 100, '20170311','20170311 01:05:00', 'Buy'),
(1, 100, '20170311','20170311 01:06:00', 'Retu'),
(1, 100, '20170311','20170311 01:07:00', 'Retu'),
(2, 100, '20170311','20170311 01:03:00', 'Buy'),
(1, 110, '20170311','20170311 01:03:00', 'Buy');
select * from #tablea
order by field_id,
field_number,
field_inserted
declare #eoj int =1;
while #eoj > 0
begin
WITH cte AS
(
SELECT
case field_sale when 'Buy' then
lead (field_sale) OVER (PARTITION BY field_id, field_number ORDER BY field_inserted)
when 'Retu' then
lag (field_sale) OVER (PARTITION BY field_id, field_number ORDER BY field_inserted)
end nbr_type,
field_id,
field_number,
field_date,
field_sale,
field_inserted
FROM #tablea
)
delete
from cte
where nbr_type is not null and nbr_type <> field_sale;
set #eoj = ##rowcount;
-- check it
select * from #tablea
order by field_id,
field_number,
field_inserted;
end;
It will be repeated N+1 times where N is the length of the longest sequence of returns. N=2 in the above example.