Using EXISTS within a GROUP BY clause - sql

Is it possible to do the following:
I have a table that looks like this:
declare #tran_TABLE TABLE(
EOMONTH DATE,
AccountNumber INT,
CLASSIFICATION_NAME VARCHAR(50),
Value Float
)
INSERT INTO #tran_TABLE VALUES('2018-11-30','123','cat1',10)
INSERT INTO #tran_TABLE VALUES('2018-11-30','123','cat1',15)
INSERT INTO #tran_TABLE VALUES('2018-11-30','123','cat1',5 )
INSERT INTO #tran_TABLE VALUES('2018-11-30','123','cat2',10)
INSERT INTO #tran_TABLE VALUES('2018-11-30','123','cat3',12)
INSERT INTO #tran_TABLE VALUES('2019-01-31','123','cat1',5 )
INSERT INTO #tran_TABLE VALUES('2019-01-31','123','cat2',10)
INSERT INTO #tran_TABLE VALUES('2019-01-31','123','cat2',15)
INSERT INTO #tran_TABLE VALUES('2019-01-31','123','cat3',5 )
INSERT INTO #tran_TABLE VALUES('2019-01-31','123','cat3',2 )
INSERT INTO #tran_TABLE VALUES('2019-03-31','123','cat1',15)
EOMONTH AccountNumber CLASSIFICATION_NAME Value
2018-11-30 123 cat1 10
2018-11-30 123 cat1 15
2018-11-30 123 cat1 5
2018-11-30 123 cat2 10
2018-11-30 123 cat3 12
2019-01-31 123 cat1 5
2019-01-31 123 cat2 10
2019-01-31 123 cat2 15
2019-01-31 123 cat3 5
2019-01-31 123 cat3 2
2019-03-31 123 cat1 15
I want to produce a result where it will check whether in each month, for each AccountNumber (just one in this case) there exists a CLASSIFICATION_NAME cat1, cat2, cat3.
If all 3 exist for the month, then return 1 but if any are missing return 0.
The result should look like:
EOMONTH AccountNumber CLASSIFICATION_NAME
2018-11-30 123 1
2019-01-31 123 1
2019-03-31 123 0
But I want to do it as compactly as possible, without first creating a table that groups everything by CLASSIFICATION_NAME, EOMONTH and AccountNumber and then selects from that table.
For example, in the pseudo code below, is it possible to use maybe an EXISTS statement to do the group by?
SELECT
EOMONTH
,AccountNumber
,CASE WHEN EXISTS (CLASSIFICATION_NAME = 'cat1' AND 'cat2' AND 'cat3') THEN 1 ELSE 0 end
,SUM(Value) AS totalSpend
FROM #tran_TABLE
GROUP BY
EOMONTH
,AccountNumber

You could emulate this behavior by counting the distinct classifications that answer this condition (per group):
SELECT
EOMONTH
,AccountNumber
,CASE COUNT(DISTINCT CASE WHEN classification_name IN ('cat1', 'cat2', 'cat3') THEN classification_name END)
WHEN 3 THEN 1
ELSE 0
END
,SUM(Value) AS totalSpend
FROM #tran_TABLE
GROUP BY
EOMONTH
,AccountNumber

Try this-
SELECT EOMONTH,
AccountNumber,
CASE
WHEN COUNT(DISTINCT CLASSIFICATION_NAME) = 3 THEN 1
ELSE 0
END CLASSIFICATION_NAME
FROM #tran_TABLE
GROUP BY EOMONTH,AccountNumber
Output is-
2018-11-30 123 1
2019-01-31 123 1
2019-03-31 123 0

Query like this. You can count distinct values.
When you count unique values then column 'Three_Unique_Cat'. When you count exactly 'cat1','cat2','cat3' then column 'Three_Cat1_Cat2_Cat3'
SELECT
EOMONTH, AccountNumber
,CASE WHEN
COUNT(DISTINCT CLASSIFICATION_NAME)=3 THEN 1
ELSE 0
END AS 'Three_Unique_Cat'
,CASE WHEN
COUNT(DISTINCT CASE WHEN CLASSIFICATION_NAME IN ('cat1','cat2','cat3')
THEN CLASSIFICATION_NAME ELSE NULL END)=3 THEN 1
ELSE 0
END AS 'Three_Cat1_Cat2_Cat3'
,SUM(Value) AS totalSpend
FROM #tran_TABLE
GROUP BY EOMONTH, AccountNumber
Output:
EOMONTH AccountNumber Three_Unique_Cat Three_Cat1_Cat2_Cat3 totalSpend
2018-11-30 123 1 1 52
2019-01-31 123 1 1 37
2019-03-31 123 0 0 15

It's easy, just as below:
select
EOMONTH,
AccountNumber,
case when count(distinct CLASSIFICATION_NAME) = 3 then 1 else 0 end as CLASSIFICATION_NAME
from
tran_TABLE
group by
EOMONTH,
AccountNumber

Related

ROW_NUMBER() Based on Dates

I have the following data:
test_date
2018-07-01
2018-07-02
...
2019-06-30
2019-07-01
2019-07-02
...
2020-06-30
2020-07-01
I want to increment a row_number value every time right(test_date,5) = '07-01' so that my final result looks like this:
test_date row_num
2018-07-01 1
2018-07-02 1
... 1
2019-06-30 1
2019-07-01 2
2019-07-02 2
... 2
2020-06-30 2
2020-07-01 3
I tried doing something like this:
, ROW_NUMBER() OVER (
PARTITION BY CASE WHEN RIGHT(a.[test_date],5) = '07-01' THEN 1 ELSE 0 END
ORDER BY a.[test_date]
) AS [test2]
But that did not work out for me.
Any suggestions?
Use datepart to identify the correct date, and then add 1 to a sum every time it changes (assuming there will never be more than 1 row per date).
declare #Test table (test_date date);
insert into #Test (test_date)
values
('2018-07-01'),
('2018-07-02'),
('2019-06-30'),
('2019-07-01'),
('2019-07-02'),
('2020-06-30'),
('2020-07-01');
select *
, sum(case when datepart(month,test_date) = 7 and datepart(day,test_date) = 1 then 1 else 0 end) over (order by test_date asc) row_num
from #Test
order by test_date asc;
Returns:
test_date
row_num
2018-07-01
1
2018-07-02
1
2019-06-30
1
2019-07-01
2
2019-07-02
2
2020-06-30
2
2020-07-01
3
You can do it with DENSE_RANK() window function if you subtract 6 months from your dates:
SELECT test_date,
DENSE_RANK() OVER (ORDER BY YEAR(DATEADD(month, -6, test_date))) row_num
FROM tablename
See the demo.
Results:
test_date | row_num
---------- | -------
2018-07-01 | 1
2018-07-02 | 1
2019-06-30 | 1
2019-07-01 | 2
2019-07-02 | 2
2020-06-30 | 2
2020-07-01 | 3
build a running total based on month=7 and day=2
declare #Test table (mykey int,test_date date);
insert into #Test (mykey,test_date)
values
(1,'2018-07-01'),
(2,'2018-07-02'),
(3,'2019-06-30'),
(4,'2019-07-01'),
(5,'2019-07-02'),
(6,'2020-06-30'),
(7,'2020-07-01');
select mykey,test_date,
sum(case when DatePart(Month,test_date)=7 and DatePart(Day,test_date)=2 then 1 else 0 end) over (order by mykey) RunningTotal from #Test
order by mykey

Sql Pivot on time

Table 1: Daily attendance data:
att_date emp_code emp_name in_time out_time
2018-10-21 9999 Test 2018-10-21 08:00:00.000 2018-10-22 06:00:00.000
Table 2: Trnevents
emp_readr_id DT EVENTID
9999 2018-10-24 07:00:00.000 0
9999 2018-10-24 05:00:00.000 0
9999 2018-10-24 03:00:00.000 0
9999 2018-10-23 21:00:00.000 0
9999 2018-10-23 19:00:00.000 0
9999 2018-10-23 06:00:00.000 0
9999 2018-10-22 06:00:00.000 0
9999 2018-10-21 08:00:00.000 0
I used this query to get all times in between in time and out time ,below query works fine but i try to make in row by using pivot. While using pivot out time shows in next row.
declare #tempProcesstable as table(
[id] [nvarchar](200) NULL,
[time_stamp] datetime NULL,
[AccessType] varchar(3) NULL)
insert into #tempProcesstable
select distinct t1.emp_Reader_id, t1.DT,t1.eventid from daily_attendance_data t2 join trnevents t1
on t1.emp_reader_id=t2.emp_reader_id where (CONVERT(VARCHAR(26), t2.att_Date, 23) >=CONVERT(VARCHAR(26), '2018-10-20', 23)
and CONVERT(VARCHAR(26), t2.att_date, 23) <=CONVERT(VARCHAR(26), '2018-10-21', 23))
and
(t1.DT >=t2.in_time
and t1.DT <=t2.out_time)
-- and t1.emp_reader_id=1000
group by t1.emp_Reader_id,t1.dt,t1.eventid order by t1.emp_reader_id,DT asc
; With CheckIns
As (Select Rowemp_reader_id = Row_Number() Over (Partition by id, Cast(time_stamp As Date) Order By time_stamp),
id, time_stamp,
[Date] = Cast(time_stamp As Date),
[Time] = Cast(time_stamp As Time(0))
From #tempProcesstable)
Select Pvt.id,B.emp_name , [Date], CHECK1, CHECK2,Cast(dateadd(ss,datediff(ss,CHECK1,CHECK2),0) As Time(0)) Total1,
CHECK3, CHECK4,Cast(dateadd(ss,datediff(ss,CHECK3,CHECK4),0) As Time(0)) Total2
From (Select id, [Date], [Time],
CHECKNum = 'CHECK' + Cast(Rowemp_reader_id As varchar(11))
From CheckIns) As P
Pivot (Min([Time])
For CheckNum In (Check1, [Check2], Check3, Check4)
) As Pvt
LEFT OUTER JOIN
dbo.employee AS B ON Pvt.id= B.emp_reader_id
My output:
id emp_name Date CHECK1 CHECK2 Total1 CHECK3 CHECK4 Total2
1048 Singh 2018-10-21 07:06:07 17:34:05 10:27:58 NULL NULL NULL
9999 Test 2018-10-21 08:00:00 NULL NULL NULL NULL NULL NULL
9999 Test 2018-10-22 06:00:00 NULL NULL NULL NULL NULL NULL
Expected output:
I want all times between in time and out time in night to morning also.
can any one help me to rectify this.
id emp_name Date CHECK1 CHECK2 Total1 CHECK3 CHECK4 Total2
1048 Singh 2018-10-21 07:06:07 17:34:05 10:27:58 NULL NULL NULL
9999 Test 2018-10-21 08:00:00 06:00:00 NULL NULL NULL NULL NULL
You can try to use ROW_NUMBER() window function make row number by each date.
then use condition aggregate function to do pivot
SELECT emp_readr_id,
emp_name,
[Date],
MAX(CASE WHEN RN = 1 THEN time END) CHECK1,
MAX(CASE WHEN RN = 2 THEN time END) CHECK2,
MAX(CASE WHEN RN = 3 THEN time END) CHECK3,
MAX(CASE WHEN RN = 4 THEN time END) CHECK4
FROM (
SELECT emp_readr_id,
emp_name,
CONVERT(VARCHAR(10),DT,120) 'Date',
ROW_NUMBER() OVER(PARTITION BY CONVERT(VARCHAR(10),DT,120) ORDER BY DT) rn,
CONVERT(VARCHAR(10),DT,108) time
FROM Daily d
JOIN Trnevents t on t.DT between d.in_time and d.out_time
) t1
group by emp_readr_id,
emp_name,
[Date]
sqlifddle

Tolerance with Min Max

I am trying to adjust the below code by adding a 2 week tolerance piece.
What it does it looks when the first time a customer (identifier) created a request and the first time it was completed and counts the days which happened in between.
However I am trying to add a tolerance piece. Which says count the number of NCO which occurred between those dates and if there were further requests past the completion date which happened within 2 weeks of the completion date then count those as well (part of the same request). Anything past 2 weeks of the completions date consider as a new request.
CREATE TABLE #temp
(
Identifier varchar(40)NOT NULL
,Created_Date DATETIME NOT NULL
,Completed_Date DATETIME NULL
,SN_Type varchar(20) NOT NULL
,SN_Status varchar(20) NOT NULL
)
;
INSERT INTO #temp
VALUES ('3333333','2017-02-14 15:00:40.000','2017-02-15 00:00:00.000','Re-Activattion', 'COMP');
INSERT INTO #temp
VALUES ('3333333','2017-05-24 16:41:04.000','2017-06-05 00:00:00.000','Re-Activattion', 'N-CO');
INSERT INTO #temp
VALUES ('3333333','2017-05-25 11:49:54.000','2017-05-26 00:00:00.000','Re-Activattion', 'COMP');
INSERT INTO #temp
VALUES ('3333333','2017-06-27 10:24:29.000',NULL,'Re-Activattion', 'ACC');
#Alex you code is accurate just I would like to be selecting the min date the record is created a 2nd time, so line 2 of the result should return min date to be 2017-05-24 16:41:04.000.
select identifier
,case
when sum(case when SN_STATUS='COMP' and SN_TYPE = 'Re-Activattion' then 1 else 0 end)>0
then str(datediff(day
,MIN(case
when SN_TYPE = 'Re-Activattion'
then Created_Date
else null
end
)
,min(case
when (SN_TYPE = 'Re-Activattion'
and SN_STATUS='COMP'
)
then Completed_Date
else null
end
)
)
)
when sum(case when SN_TYPE='Re-Activattion' then 1 else 0 end)>0
then 'NOT COMP'
else 'NO RE-ACT'
end
as RE_ACT_COMPLETION_TIME
,Sum(CASE WHEN SN_STATUS = 'N-CO' THEN 1 ELSE 0 END) as [RE-AN NCO #]
from #temp
group by identifier
;
RESULTS I AM AFTER:
Your table design is not optimal for these kinds of queries as there is no definitive record that specified order start and order end. Additionally multiple orders are stored with the same identifier.
To work around this you need to calculate/identify Order start and Order End records yourself.
One way to do it is using Common Table Expressions.
Note: I have added comments to code to explain what each section does.
-- calculate/identify Order start and Order End records
WITH cte AS
(
-- 1st Order start record i.e. earliest record in the table for a given "Identifier"
SELECT Identifier, MIN( Created_Date ) AS Created_Date, CONVERT( VARCHAR( 30 ), 'Created' ) AS RecordType, 1 AS OrderNumber
FROM #temp
GROUP BY Identifier
UNION ALL
-- All records with "COMP" status are treated as order completed events. Add 2 weeks to the completed date to create a "dummy" Order End Date
SELECT Identifier, DATEADD( WEEK, 2, Created_Date ) AS Created_Date, 'Completed' AS RecordType, ROW_NUMBER() OVER( PARTITION BY Identifier ORDER BY Created_Date ) AS OrderNumber
FROM #temp
WHERE SN_STATUS = 'COMP'
UNION ALL
-- Set the start period of the next order to be right after (3 ms) the previous Order End Date
SELECT Identifier, DATEADD( ms, 3, DATEADD( WEEK, 2, Created_Date )) AS Created_Date, 'Created' AS RecordType, ROW_NUMBER() OVER( PARTITION BY Identifier ORDER BY Created_Date ) + 1 AS OrderNumber
FROM #temp
WHERE SN_STATUS = 'COMP'
),
-- Combine Start / End records into one record
OrderGroups AS(
SELECT Identifier, OrderNumber, MIN( Created_Date ) AS OrderRangeStartDate, MAX( Created_Date ) AS OrderRangeEndDate
FROM cte
GROUP BY Identifier, OrderNumber
)
SELECT a.Identifier, a.OrderNumber, OrderRangeStartDate, OrderRangeEndDate,
case
when sum(case when SN_STATUS='COMP' and SN_TYPE = 'Re-Activattion' then 1 else 0 end)>0
then str(datediff(day
,MIN(case
when SN_TYPE = 'Re-Activattion'
then Created_Date
else null
end
)
,min(case
when (SN_TYPE = 'Re-Activattion'
and SN_STATUS='COMP'
)
then Completed_Date
else null
end
)
)
)
when sum(case when SN_TYPE='Re-Activattion' then 1 else 0 end)>0
then 'NOT COMP'
else 'NO RE-ACT'
end as RE_ACT_COMPLETION_TIME,
Sum(CASE WHEN SN_STATUS = 'N-CO' THEN 1 ELSE 0 END) as [RE-AN NCO #]
FROM OrderGroups AS a
INNER JOIN #Temp AS b ON a.Identifier = b.Identifier AND a.OrderRangeStartDate <= b.Created_Date AND b.Created_Date <= a.OrderRangeEndDate
GROUP BY a.Identifier, a.OrderNumber, OrderRangeStartDate, OrderRangeEndDate
Output:
Identifier OrderNumber OrderRangeStartDate OrderRangeEndDate RE_ACT_COMPLETION_TIME RE-AN NCO #
-------------- ------------- ----------------------- ----------------------- ---------------------- -----------
200895691 1 2016-01-27 14:25:00.000 2016-02-10 15:15:00.000 0 2
200895691 2 2016-02-10 15:15:00.003 2017-01-16 12:15:00.000 1 1
Output for the updated data set:
Identifier OrderNumber OrderRangeStartDate OrderRangeEndDate RE_ACT_COMPLETION_TIME RE-AN NCO #
------------ ------------ ----------------------- ----------------------- ---------------------- -----------
200895691 1 2017-01-11 00:00:00.000 2017-03-27 00:00:00.000 61 4
200895691 2 2017-03-27 00:00:00.003 2017-04-20 00:00:00.000 1 1
3333333 1 2017-01-27 00:00:00.000 2017-02-10 00:00:00.000 0 2
44454544 1 2017-01-27 00:00:00.000 2017-01-27 00:00:00.000 NOT COMP 1
7777691 1 2017-02-08 09:36:44.000 2017-02-22 09:36:44.000 63 1
Update 2017-10-05 in response to the comment
Input:
INSERT INTO #temp VALUES
('11111','20170203','20170203','Re-Activattion', 'COMP'),
('11111','20170206','20170202','Re-Activattion', 'N-CO');
Output:
Identifier OrderNumber OrderRangeStartDate OrderRangeEndDate RE_ACT_COMPLETION_TIME RE-AN NCO #
---------- ------------ ----------------------- ----------------------- ---------------------- -----------
11111 1 2017-02-03 00:00:00.000 2017-02-17 00:00:00.000 0 1

How to add a new column to the result table?

This is the table mytable:
identifier thedate direction
111 2017-06-03 11:20 2
111 2017-06-03 12:22 1
222 2017-06-04 12:15 1
333 2017-06-05 12:21 1
444 2017-06-05 12:39 2
444 2017-06-08 14:23 2
555 2017-06-08 15:33 1
555 2017-06-08 16:12 2
I am calculating the average hourly count of unique identifiers in Apache Hive as follows:
SELECT HOUR(thedate) as hour,
COUNT(DISTINCT identifier, CAST(thedate as date),
HOUR(thedate)) / COUNT(DISTINCT CAST(thedate as date),
HOUR(thedate)) as hourly_avg_count
FROM mytable
GROUP BY HOUR(thedate)
Now I need to add a new calculated column to the result table (not the original one). This column called newcolumn should have value A for the results of thedate from the list ["2017-06-03","2017-06-04"]. It must have value B when thedate belongs to ["2017-06-05","2017-06-06"]. The rest of values of thedate that are not included in both lists should have the value C assigned.
The resulted table should have the following columns:
newcolumn hour hourly_avg_count
A 11 0.5
A 12 1
B ... ...
C ... ...
You would just add this to the GROUP BY:
SELECT (CASE WHEN DATE(thedate) IN ('2017-06-03', '2017-06-04') THEN 'A'
WHEN DATE(thedate) IN ('2017-06-05', '2017-06-06') THEN 'B'
ELSE 'C'
END) as grp,
HOUR(thedate) as hour,
COUNT(DISTINCT identifier, CAST(thedate as date), HOUR(thedate)
) / COUNT(DISTINCT CAST(thedate as date), HOUR(thedate)) as hourly_avg_count
FROM mytable
GROUP BY HOUR(thedate),
(CASE WHEN DATE(thedate) IN ('2017-06-03', '2017-06-04') THEN 'A'
WHEN DATE(thedate) IN ('2017-06-05', '2017-06-06') THEN 'B'
ELSE 'C'
END);
USE CASE STATEMENT
SELECT CASE WHEN thedate BETWEEN '2017-06-03' AND '2017-06-04'
THEN 'A'
WHEN thedate BETWEEN '2017-06-05' AND '2017-06-06'
THEN 'B'
ELSE 'C'
END newcolumn
...

Counting Distinct Months with Multiple records per month

My data looks like this:
Code Date
123 1/2/2016
123 1/4/2016
123 1/4/2016
123 2/5/2016
456 1/2/2016
456 1/3/2016
456 2/7/2016
789 1/7/2016
789 1/8/2016
789 3/7/2016
789 3/15/2016
I am looking for a distinct count of months grouped by the code.
So the results would look something like this
Code Jan2016 Feb2016 Mar2016
123 1 1 0
456 1 1 0
789 1 0 1
I feel like I may be overcomplicating my code.
So far I have
SELECT
p.code
,SUM(CASE WHEN p.date BETWEEN '11/1/2010' AND '11/30/2010'
THEN 1 ELSE 0 END) AS 'Nov2010'
FROM table
Group By p.code
But that is pulling in all records from Nov2010, when I just need to know if this exists
You can just change your aggregate function. Use MAX instead of SUM.
SELECT
p.code
,MAX(CASE WHEN p.date BETWEEN '11/1/2010' AND '11/30/2010'
THEN 1 ELSE 0 END) AS 'Nov2010'
FROM table
Group By p.code
in SQL Server you can use a pivot table, bit messy but something like this will work, sample data:
declare #table table (code int, date date)
insert into #table values
(123, '1/2/2016'),
(123, '1/4/2016'),
(123, '1/4/2016'),
(123, '2/5/2016'),
(456, '1/2/2016'),
(456, '1/3/2016'),
(456, '2/7/2016'),
(789, '1/7/2016'),
(789, '1/8/2016'),
(789, '3/7/2016'),
(789, '3/15/2016')
then using a pivot table:
with cte (code) as (select distinct code from #table)
select
c1.code,
ISNULL(months.[1],0) as 'Jan 2016',
ISNULL(months.[2],0) as 'Feb 2016',
ISNULL(months.[3],0) as 'Mar 2016'
from
(
select
c.code,
count( distinct t.code) as [ID],
month(date) as [month]
from #table t
join cte c on t.code = c.code
group by c.code, month(date)
) P
pivot
(
sum([ID])
for [month] IN ("1","2","3")--,"4","5","6","7","8","9","10","11","12")
) as months
join cte c1 on months.code = c1.code
will give you the following results:
code Jan 2016 Feb 2016 Mar 2016
123 1 1 0
456 1 1 0
789 1 0 1
If you take the comment out after the 3rd month you can do it for the rest of the year