How do I merge two SELECT queries with different WHERE clauses - sql

I'm trying to write query on two different selects with different WHERE clause and using GROUP BY.
I browsed for examples but mine is different since I have multiple fields for SELECT.
Here is example data:
-- drop table #temp_counts;
create table #temp_counts(
report_date smalldatetime not null,
Emp_id varchar(10) not null,
source_system varchar(10) not null
)
Insert into #temp_counts values ('2021-05-02 00:00:00', '12411', 'ABC');
Insert into #temp_counts values ('2021-05-02 00:00:00', '56421', 'ABC');
Insert into #temp_counts values ('2021-05-02 00:00:00', '45411', 'ABC');
Insert into #temp_counts values ('2021-05-02 00:00:00', '75411', 'ABC');
Insert into #temp_counts values ('2021-05-02 00:00:00', '13245', 'XYZ');
Insert into #temp_counts values ('2021-05-02 00:00:00', '66245', 'XYZ');
Insert into #temp_counts values ('2021-05-02 00:00:00', '77245', 'XYZ');
Insert into #temp_counts values ('2021-05-02 00:00:00', '98245', 'XYZ');
Insert into #temp_counts values ('2021-05-02 00:00:00', '34245', 'XYZ');
Insert into #temp_counts values ('2021-05-02 00:00:00', '29245', 'XYZ');
Insert into #temp_counts values ('2021-05-03 00:00:00', '14524', 'ABC');
Insert into #temp_counts values ('2021-05-03 00:00:00', '17824', 'ABC');
Insert into #temp_counts values ('2021-05-03 00:00:00', '32524', 'ABC');
Insert into #temp_counts values ('2021-05-03 00:00:00', '16724', 'XYZ');
Insert into #temp_counts values ('2021-05-03 00:00:00', '19924', 'XYZ');
Insert into #temp_counts values ('2021-05-03 00:00:00', '89424', 'XYZ');
Insert into #temp_counts values ('2021-05-03 00:00:00', '48324', 'XYZ');
Insert into #temp_counts values ('2021-05-03 00:00:00', '16000', 'XYZ');
Insert into #temp_counts values ('2021-05-04 00:00:00', '18724', 'ABC');
Insert into #temp_counts values ('2021-05-04 00:00:00', '12904', 'XYZ');
Insert into #temp_counts values ('2021-05-05 00:00:00', '12074', 'ABC');
Insert into #temp_counts values ('2021-05-05 00:00:00', '12784', 'XYZ');
Insert into #temp_counts values ('2021-05-05 00:00:00', '12324', 'XYZ');
Insert into #temp_counts values ('2021-05-05 00:00:00', '75124', 'XYZ');
These are the queries I would like to merge:
select count(*) emp_count, report_date , 'ABC' source_system from #temp_counts
where source_system = 'ABC'
group by report_date
order by report_date
select count(*) emp_count, report_date , 'XYZ' source_system from #temp_counts
where source_system = 'XYZ'
group by report_date
order by report_date
I tried 2 methods as under:
--Method 1
select fir.emp_count, fir.report_date, fir.source_system from
(select count(*) emp_count, report_date , 'ABC' source_system from #temp_counts
where source_system = 'ABC') as fir
inner join
(select count(*) emp_count, report_date , 'XYZ' source_system from #temp_counts
where source_system = 'XYZ') as sec
on fir.report_date = sec.report_date
group by fir.report_date
order by fir.report_date
--Method 2
select count(*) emp_count, report_date , 'ABC' source_system from #temp_counts
where source_system = 'ABC'
UNION ALL
select count(*) emp_count, report_date , 'XYZ' source_system from #temp_counts
where source_system = 'XYZ'
group by report_date
order by report_date
Both give Error:
Msg 8120, Level 16, State 1, Line 61
Column '#temp_counts.report_date' is invalid in the select list because it is not contained in either an aggregate function or the GROUP BY clause.
Please guide

seems like you just want to group by report_date and source_system:
select count(*) emp_count, report_date , source_system
FROM #temp_counts
group by report_date, source_system
order by source_system,report_date
if you want to have result for specific source systems then you can combine conditions:
select count(*) emp_count, report_date , source_system
FROM #temp_counts
where source_system in ('ABC', 'XYZ')
group by report_date, source_system
order by source_system,report_date
you can change order by to show rows in the order you want to show
just to illustrate how to use union :
select count(*) emp_count, report_date , source_system from #temp_counts
where source_system = 'ABC'
group by report_date
union all
select count(*) emp_count, report_date , source_system from #temp_counts
where source_system = 'XYZ'
group by report_date
order by source_system, report_date

Related

Spark SQL Query to assign join date by next closest once

`CREATE TABLE TABLE_1(
CALL_ID INT,
CALL_DATE DATE);
INSERT INTO TABLE_1(CALL_ID, CALL_DATE)
VALUES (1, '2022-10-22'),
(2, '2022-10-31'),
(3, '2022-11-04');
CREATE TABLE TABLE_2(
PROD_ID INT,
PROD_DATE DATE);
INSERT INTO TABLE_2(PROD_ID, PROD_DATE)
VALUES (1, '2022-10-25'),
(2, '2022-11-17');
CREATE TABLE TABLE_RESULT(
CALL_ID INT,
CALL_DATE DATE,
PROD_ID INT,
PROD_DATE DATE);
INSERT INTO TABLE_RESULT(CALL_ID, CALL_DATE, PROD_ID, PROD_DATE)
VALUES (1, '2022-10-22', 1, '2022-10-25'),
(2, '2022-10-31', NULL, NULL),
(3, '2022-11-04', 2, '2022-11-17');`
Can you help me to create the TABLE_RESULT with a join in a elegant way? This is a very small example.
Thanks
I solved it. Thanks anyway.
SELECT * FROM (SELECT *, COALESCE(LEAD(CALL_DATE) OVER (PARTITION BY 1 ORDER BY CALL_DATE), CURRENT_DATE) AS CALL_DATE_NEXT FROM TABLE_1) AS A LEFT JOIN TABLE_2 AS B ON (A.CALL_DATE<=B.PROD_DATE AND A.CALL_DATE_NEXT>B.PROD_DATE)

Oracle SQL Join columns on 2 conditions

I tried to search forums for my scenario but could not find anything remotely similar. So here goes my long winded explanation : I have 3 tables - order_fact , session_fact and orderline.
create table order_fact (order_no varchar2(20), order_timestamp date, cookie_id number, session_id number);
insert into order_fact values ('69857-20210329', to_date('29-MAR-2021 10:11:58', 'DD-MON-YYYY HH24:MI:SS'), 827678, 79853421);
insert into order_fact values ('78345-20210411', to_date('11-APR-2021 18:37:07', 'DD-MON-YYYY HH24:MI:SS'), 569834, 84886798);
insert into order_fact values ('79678-20210519', to_date('19-MAY-2021 20:51:34', 'DD-MON-YYYY HH24:MI:SS'), 589623, 89556782);
insert into order_fact values ('78759-20210411', to_date('11-APR-2021 09:46:52', 'DD-MON-YYYY HH24:MI:SS'), 685213, 77549823);
create table session_fact (cookie_id number, session_id number, session_timestamp date, marketing_vendor varchar2(30) , referral_type VARCHAR2(2) );
insert into session_fact values (827678, 79853421, to_date('29-MAR-2021 09:47:36', 'DD-MON-YYYY HH24:MI:SS'), '-1', 'D');
insert into session_fact values (827678, 79853378, to_date('28-MAR-2021 12:47:36', 'DD-MON-YYYY HH24:MI:SS'), '-1', 'D');
insert into session_fact values (827678, 79853313, to_date('24-MAR-2021 13:23:36', 'DD-MON-YYYY HH24:MI:SS'), 'Naaptol', 'S');
insert into session_fact values (827678, 79853254, to_date('23-MAR-2021 14:39:56', 'DD-MON-YYYY HH24:MI:SS'), '-1', 'D');
insert into session_fact values (569834, 84886798, to_date('11-APR-2021 14:41:44', 'DD-MON-YYYY HH24:MI:SS'), '-1', 'D');
insert into session_fact values (569834, 84886735, to_date('10-APR-2021 11:03:44', 'DD-MON-YYYY HH24:MI:SS'), '-1', 'D');
insert into session_fact values (569834, 84886687, to_date('08-APR-2021 17:26:49', 'DD-MON-YYYY HH24:MI:SS'), '-1', 'D');
insert into session_fact values (569834, 84886659, to_date('03-APR-2021 11:03:44', 'DD-MON-YYYY HH24:MI:SS'), '-1', 'D');
insert into session_fact values (569834, 84886497, to_date('01-APR-2021 07:59:08', 'DD-MON-YYYY HH24:MI:SS'), 'Google', 'R');
insert into session_fact values (685213, 77549823, to_date('11-APR-2021 09:07:34', 'DD-MON-YYYY HH24:MI:SS'), '-1', 'D');
insert into session_fact values (685213, 77549786, to_date('09-APR-2021 20:51:34', 'DD-MON-YYYY HH24:MI:SS'), '-1', 'D');
insert into session_fact values (685213, 77549589, to_date('07-APR-2021 14:11:57', 'DD-MON-YYYY HH24:MI:SS'), 'FabShopping', 'D');
insert into session_fact values (685213, 77548356, to_date('03-APR-2021 15:38:42', 'DD-MON-YYYY HH24:MI:SS'), '-1', 'D');
insert into session_fact values (589623, 89556782, to_date('19-MAY-2021 16:46:52', 'DD-MON-YYYY HH24:MI:SS'), '-1', 'D');
insert into session_fact values (589623, 89556512, to_date('18-MAY-2021 09:46:52', 'DD-MON-YYYY HH24:MI:SS'), '-1', 'D');
insert into session_fact values (589623, 89556477, to_date('13-MAY-2021 18:34:29', 'DD-MON-YYYY HH24:MI:SS'), '-1', 'D');
insert into session_fact values (589623, 89556348, to_date('10-MAY-2021 16:13:49', 'DD-MON-YYYY HH24:MI:SS'), '-1', 'D');
create table orderline (order_no varchar2(20), ol_nbr number, ol_ref varchar2(5));
insert into orderline values ('78345-20210411', 0, '-2');
insert into orderline values ('78345-20210411', 1, 'HV3');
insert into orderline values ('78345-20210411', 2, 'HV3');
insert into orderline values ('78759-20210411', 0, '-2');
insert into orderline values ('78759-20210411', 1, 'PS5');
insert into orderline values ('78759-20210411', 2, 'PS5');
insert into orderline values ('78759-20210411', 3, 'PS5');
insert into orderline values ('79678-20210519', 0, '-2');
insert into orderline values ('79678-20210519', 1, 'NPT');
insert into orderline values ('79678-20210519', 2, 'NPT');
insert into orderline values ('69857-20210329', 0, '-2');
insert into orderline values ('69857-20210329', 1, 'HV3');
insert into orderline values ('69857-20210329', 2, 'HV3');
insert into orderline values ('69857-20210329', 3, 'HV3');
As can be seen from above order_fact and session_fact tables are connected by cookie and session id. The request is to get these columns : ORDER_NO, MARKETING_VENDOR, REFERRAL_TYPE, OL_REF from the above 3 tables.
I have written the JOIN query :
select a.ORDER_NO, b.MARKETING_VENDOR,
b.REFERRAL_TYPE, c.OL_REF
FROM order_fact a
INNER JOIN session_fact b
ON (a.cookie_id = b.COOKIE_ID AND
b.session_timestamp < a.order_timestamp AND
b.session_timestamp > a.order_timestamp-7)
INNER JOIN orderline c ON
(a.ORDER_NO = c.ORDER_NO AND c.OL_NBR = 1);
Here is the sticky situation for me :
Get the data in session_fact table for a cookie_id in order_fact for timestamp of not more than 7 days before the order_timestamp. For example - order_no 78345-20210411 was placed on 11-APR-2021 18:37:07. Using the cookie id of that order I get all rows in session_fact till 11-APR - 7 days = 4-APR. So 3rd and 1st Apr data cannot be considered. This has been taken care in my query. But I wanted to mention why I had the additional AND clauses in the 1st JOIN ON condition.
From the data got in point 1 above do not consider those records where REFERRAL_TYPE = 'D' and MARKETING_VENDOR = '-1'. 'S' and '-1' can be considered and so is 'R' and '-1'. Basically any values can be considered as long as its NOT 'D' and '-1'. And select the record whose timestamp is closest to the order_timestamp in table order_fact. Now this is where it gets tricky - if there are no records of past 7 days where combo of REFERRAL_TYPE and MARKETING_VENDOR is NOT 'D' and '-1' then join the tables order_fact and session_fact on both cookie_id and session_id and fetch the values.
Join tables order_fact and orderline ON ORDER_NO and OL_NBR = 1. This also has been taken care in my join query.
So my only problem is getting the JOIN between session_fact and order_fact on the 2 different conditions mentioned in point 2. Can this be done by SQL? The Tech Lead of my team asked me to write a PL/SQL block. I did that because the original request was to add MARKETING_VENDOR, REFERRAL_TYPE, OL_REF columns in order_fact table and get the values from their respective tables. I cannot help but feel this can be done by SQL using CASE. Or am I wrong? If anyone could please help me with this query I will be grateful.
Edit : Adding the result data set
Edit : Any kind soul to help me out? 🙂 I take it it's not possible in a SQL statement.
And select the record whose timestamp is closest to the order_timestamp in table order_fact
From your description looks like you just need Top 1 record by session_timestamp:
with
step1 as (
SELECT
a.ORDER_NO
,a.order_timestamp
,c.MARKETING_VENDOR
,c.REFERRAL_TYPE
,c.session_timestamp
FROM order_fact a
cross apply (
select *
from session_fact b
where a.cookie_id = b.COOKIE_ID
and (REFERRAL_TYPE,MARKETING_VENDOR) not in (('D','-1'))
AND b.session_timestamp < a.order_timestamp
--AND b.session_timestamp > a.order_timestamp-7
order by b.session_timestamp desc
fetch first 1 rows only
) c
)
select
s.*
,o.OL_REF
FROM
step1 s
JOIN orderline o
ON (s.ORDER_NO = o.ORDER_NO AND o.OL_NBR = 1)
;
Result:
ORDER_NO ORDER_TIMESTAMP MARKETING_VENDOR REFERRAL_TYPE SESSION_TIMESTAMP OL_REF
-------------- ------------------- ---------------- ------------- ------------------- ------
78345-20210411 2021-04-11 18:37:07 Google R 2021-04-01 07:59:08 HV3
78759-20210411 2021-04-11 09:46:52 FabShopping D 2021-04-07 14:11:57 PS5
69857-20210329 2021-03-29 10:11:58 Naaptol S 2021-03-24 13:23:36 HV3

SQL: Getting previous record from other table

I want to get the previous record of each record in Table A from Table B.
for easy, below is the table sample data:
drop table if exists #A
drop table if exists #B
CREATE TABLE #A(Name varchar(10), time datetime, value int)
insert into #A values
('A', '2020-03-31 18:00:00', 56),
('A', '2020-03-31 19:00:00', 3),
('B', '2020-03-31 14:00:00', 14),
('C', '2020-03-31 15:00:00', 26)
CREATE TABLE #B(Name varchar(10), time datetime, value int)
insert into #A values
('A', '2020-03-31 21:00:00', 79),
('A', '2020-03-31 17:00:00', 44),
('A', '2020-03-31 14:00:00', 76),
('B', '2020-03-31 18:00:00', 89),
('C', '2020-03-31 11:00:00', 29),
('C', '2020-03-31 08:00:00', 6)
EDIT:
It should include only last previous record from TableB.
Sorry for the confusion. Changed image and sample data also.
I think you want:
select a.name, a.time, a.value
from #a a
union all
select b.name, b.time, b.value
from (select b.*, row_number() over (order by time desc) as seqnum
from #b b
where b.time < (select min(a.time)
from #a a
where a.name = b.name
)
) b
where seqnum = 1
order by name, time;
Here is a db<>fiddle.
EDIT:
If b could have multiple "previous" records, then:
select a.name, a.time, a.value
from #a a
union all
select b.name, b.time, b.value
from (select b.*,
row_number() over (partition by b.name order by b.time desc) as seqnum
from #b b
where b.time < (select min(a.time)
from #a a
where a.name = b.name
)
) b
where seqnum = 1
order by name, time;
Here is a db<>fiddle for this version.

SQL statement to get all customers with no orders TODAY(current date)

The question is, how do I write a statement that would return all customers with NO Orders TODAY using sql join?
Tables : tbl_member ,tbl_order
tbl_member consist of id,name,
tbl_order consist of id, date, foodOrdered
If you left join, the select where the table on the right is nulkl, it limits to the rows that DO NOT meet the join condition:
select t1.*
from tbl_member t1
left join tbl_member t2
on t1.id = t2.id -- assuming that t2.id relates to t1.id
and t2.date = current_date() -- today's date in mysql
where t2.id is null
Assuming tbl_order date is a datetime (it probably should be) for sql server you could use something like:
declare #tbl_member table
(
id int,
fullname varchar(50)
)
declare #tbl_order table
(
id int,
orderdate datetime,
foodOrdered varchar(50)
)
INSERT INTO #tbl_member VALUES (1, 'George Washington')
INSERT INTO #tbl_member VALUES (2, 'Abraham Lincoln')
INSERT INTO #tbl_member VALUES (3, 'Mickey Mouse')
INSERT INTO #tbl_member VALUES (3, 'Donald Duck')
INSERT INTO #tbl_order VALUES (1, '2017-07-01 13:00:00', 'Fish and Chips')
INSERT INTO #tbl_order VALUES (2, '2017-07-03 08:00:00', 'Full English')
INSERT INTO #tbl_order VALUES (3, '2017-07-25 08:00:00', 'Veggie Burger')
INSERT INTO #tbl_order VALUES (3, '2017-07-25 12:00:00', 'Bangers and Mash')
SELECT id, fullname FROM #tbl_member WHERE id NOT IN
(SELECT id FROM #tbl_order
WHERE CAST(orderDate as date) = CAST(GETDATE() as Date))
It helps if you specify what flavour database you are using as the syntax is often subtly different.

Summing up the records as per given conditions

I have a table like below, What I need that for any particular fund and up to any particular date logic will sum the amount value. Let say I need the sum for 3 dates as 01/28/2015,03/30/2015 and 04/01/2015. Then logic will check for up to first date how many records are there in table . If it found more than one record then it'll sum the amount value. Then for next date it'll sum up to the next date but from the previous date it had summed up.
Id Fund Date Amount
1 A 01/20/2015 250
2 A 02/28/2015 300
3 A 03/20/2015 400
4 A 03/30/2015 200
5 B 04/01/2015 500
6 B 04/01/2015 600
I want result to be like below
Id Fund Date SumOfAmount
1 A 02/28/2015 550
2 A 03/30/2015 600
3 B 04/01/2015 1100
Based on your question, it seems that you want to select a set of dates, and then for each fund and selected date, get the sum of the fund amounts from the selected date to the previous selected date. Here is the result set I think you should be expecting:
Fund Date SumOfAmount
A 2015-02-28 550.00
A 2015-03-30 600.00
B 2015-04-01 1100.00
Here is the code to produce this output:
DECLARE #Dates TABLE
(
SelectedDate DATE PRIMARY KEY
)
INSERT INTO #Dates
VALUES
('02/28/2015')
,('03/30/2015')
,('04/01/2015')
DECLARE #FundAmounts TABLE
(
Id INT PRIMARY KEY
,Fund VARCHAR(5)
,Date DATE
,Amount MONEY
);
INSERT INTO #FundAmounts
VALUES
(1, 'A', '01/20/2015', 250)
,(2, 'A', '02/28/2015', 300)
,(3, 'A', '03/20/2015', 400)
,(4, 'A', '03/30/2015', 200)
,(5, 'B', '04/01/2015', 500)
,(6, 'B', '04/01/2015', 600);
SELECT
F.Fund
,D.SelectedDate AS Date
,SUM(F.Amount) AS SumOfAmount
FROM
(
SELECT
SelectedDate
,LAG(SelectedDate,1,'1/1/1900') OVER (ORDER BY SelectedDate ASC) AS PreviousDate
FROM #Dates
) D
JOIN
#FundAmounts F
ON
F.Date BETWEEN DATEADD(DAY,1,D.PreviousDate) AND D.SelectedDate
GROUP BY
D.SelectedDate
,F.Fund
EDIT: Here is alternative to the LAG function for this example:
FROM
(
SELECT
SelectedDate
,ISNULL((SELECT TOP 1 SelectedDate FROM #Dates WHERE SelectedDate < Dates.SelectedDate ORDER BY SelectedDate DESC),'1/1/1900') AS PreviousDate
FROM #Dates Dates
) D
If i change your incorrect sample data to ...
CREATE TABLE TableName
([Id] int, [Fund] varchar(1), [Date] datetime, [Amount] int)
;
INSERT INTO TableName
([Id], [Fund], [Date], [Amount])
VALUES
(1, 'A', '2015-01-28 00:00:00', 250),
(2, 'A', '2015-01-28 00:00:00', 300),
(3, 'A', '2015-03-30 00:00:00', 400),
(4, 'A', '2015-03-30 00:00:00', 200),
(5, 'B', '2015-04-01 00:00:00', 500),
(6, 'B', '2015-04-01 00:00:00', 600)
;
this query using GROUP BY works:
SELECT MIN(Id) AS Id,
MIN(Fund) AS Fund,
[Date],
SUM(Amount) AS SumOfAmount
FROM dbo.TableName t
WHERE [Date] IN ('01/28/2015','03/30/2015','04/01/2015')
GROUP BY [Date]
Demo
Initially i have used Row_number and month function to pick max date of every month and in 2nd cte i did sum of amounts and joined them..may be this result set matches your out put
declare #t table (Id int,Fund Varchar(1),Dated date,amount int)
insert into #t (id,Fund,dated,amount) values (1,'A','01/20/2015',250),
(2,'A','01/28/2015',300),
(3,'A','03/20/2015',400),
(4,'A','03/30/2015',200),
(5,'B','04/01/2015',600),
(6,'B','04/01/2015',500)
;with cte as (
select ID,Fund,Amount,Dated,ROW_NUMBER() OVER
(PARTITION BY DATEDIFF(MONTH, '20000101', dated)ORDER BY dated desc)AS RN from #t
group by ID,Fund,DATED,Amount
),
CTE2 AS
(select SUM(amount)Amt from #t
GROUP BY MONTH(dated))
,CTE3 AS
(Select Amt,ROW_NUMBER()OVER (ORDER BY amt)R from cte2)
,CTE4 AS
(
Select DISTINCT C.ID As ID,
C.Fund As Fund,
C.Dated As Dated
,ROW_NUMBER()OVER (PARTITION BY RN ORDER BY (SELECT NULL))R
from cte C INNER JOIN CTE3 CC ON c.RN = CC.R
Where C.RN = 1
GROUP BY C.ID,C.Fund,C.RN,C.Dated )
select C.R,C.Fund,C.Dated,cc.Amt from CTE4 C INNER JOIN CTE3 CC
ON c.R = cc.R
declare #TableName table([Id] int, [Fund] varchar(1), [Date] datetime, [Amount] int)
declare #Sample table([SampleDate] datetime)
INSERT INTO #TableName
([Id], [Fund], [Date], [Amount])
VALUES
(1, 'A', '20150120 00:00:00', 250),
(2, 'A', '20150128 00:00:00', 300),
(3, 'A', '20150320 00:00:00', 400),
(4, 'A', '20150330 00:00:00', 200),
(5, 'B', '20150401 00:00:00', 500),
(6, 'B', '20150401 00:00:00', 600)
INSERT INTO #Sample ([SampleDate])
values ('20150128 00:00:00'), ('20150330 00:00:00'), ('20150401 00:00:00')
-- select * from #TableName
-- select * from #Sample
;WITH groups AS (
SELECT [Fund], [Date], [AMOUNT], MIN([SampleDate]) [SampleDate] FROM #TableName
JOIN #Sample ON [Date] <= [SampleDate]
GROUP BY [Fund], [Date], [AMOUNT])
SELECT [Fund], [SampleDate], SUM([AMOUNT]) FROM groups
GROUP BY [Fund], [SampleDate]
Explanation:
The CTE groups finds the earliest SampleDate which is later than (or equals to) your
data's date and enriches your data accordingly, thus giving them the group to be summed up in.
After that, you can group on the derived date.