How to count distinct in different time window in AWS redshift - sql

SELECT count(DISTINCT(c.visitid)) as count1,
t.prod
FROM x.t1 c
JOIN y.t1 t
ON c.headingid = t.prod_heading_id
WHERE
c.eventtimestamp BETWEEN '2021-01-01' AND '2021-04-02'
AND c.evaluation > 0
GROUP BY t.prod
ORDER BY count1 DESC
LIMIT 100
I have anther time window from '2020-01-01' to '2020-04-02' and I want to do the same counting by group as count2.

You can use conditional aggregation:
SELECT count(DISTINCT c.visitid) filter (where c.eventtimestamp BETWEEN '2021-01-01' AND '2021-04-02') as cnt1,
count(DISTINCT c.visitid) filter (where c.eventtimestamp BETWEEN '2020-01-01' AND '2020-04-02') as cnt2,
t.prod
FROM x.t1 c JOIN
y.t1 t
ON c.headingid = t.prod_heading_id
WHERE c.evaluation > 0 AND
c.eventtimestamp BETWEEN '2020-01-01' AND '2021-04-02'
GROUP BY t.prod
ORDER BY cnt1 DESC;
In Redshift (or many other databases), the syntax would be:
SELECT count(DISTINCT case when c.eventtimestamp BETWEEN '2021-01-01' AND '2021-04-02' then c.visitid end) as cnt1,
count(DISTINCT case when c.eventtimestamp BETWEEN '2020-01-01' AND '2020-04-02' then c.visitid end) as cnt2,
t.prod
FROM x.t1 c JOIN
y.t1 t
ON c.headingid = t.prod_heading_id
WHERE c.evaluation > 0 AND
c.eventtimestamp BETWEEN '2020-01-01' AND '2021-04-02'
GROUP BY t.prod
ORDER BY cnt1 DESC;

Related

PLSQL same data with different dates from same table

I have a table in Oracle and I want to get difference of sums of debit and credit columns for sysdate and the same difference of sums of debit and credit columns from 6 months ago
My query is
select a.name,a.id, nvl(sum(a.debit),0)-nvl(sum(a.credit),0) current_bal
from mytable a
where a.id='1092' and a.docdate<=sysdate
group by a.name,a.id
union
select b.name,b.id,nvl(sum(b.debit),0)-nvl(sum(b.credit),0) current_bal1
from mytable b
where b. id='1092' and b.docdate<=add_months(sysdate,-6)
group by b.name,b.id;
I am getting the correct result but the query is returning two rows, where as my need is to show this result as a single row.
Any suggestions / correction in my query please.
You can use the conditional aggregation as follows:
select a.name,a.id, nvl(sum(CASE WHEN a.docdate<=sysdate THEN a.debit END),0)-nvl(sum(CASE WHEN a.docdate<=sysdate THEN a.credit END),0) current_bal,
nvl(sum(CASE WHEN b.docdate<=add_months(sysdate,-6) THEN a.debit END),0)-nvl(sum(CASE WHEN b.docdate<=add_months(sysdate,-6) THEN a.credit END),0) current_bal1
from mytable a
where a.id='1092'
group by a.name,a.id;
-- Update
If you are facing any issue then easiest way for you is to use the self join between sub-queries as follows:
SELECT A.NAME, A.ID, A.CURRENT_BAL, B.CURRENT_BAL1
FROM
(select a.name,a.id, nvl(sum(a.debit),0)-nvl(sum(a.credit),0) current_bal
from mytable a
where a.id='1092' and a.docdate<=sysdate
group by a.name,a.id) A
JOIN
(select b.name,b.id,nvl(sum(b.debit),0)-nvl(sum(b.credit),0) current_bal1
from mytable b
where b. id='1092' and b.docdate<=add_months(sysdate,-6)
group by b.name,b.id) B
ON A.ID = B.ID AND A.NAME = B.NAME;
You can use conditional aggregation:
select a.name, a.id,
coalesce(sum(a.debit), 0) - coalesce(sum(a.credit), 0) as current_bal,
(sum(case when a.docdate < add_months(sysdate, -6) then a.debit else 0 end) -
sum(case when a.docdate < add_months(sysdate, -6) then a.credit else 0 end)
) as bal_6_months
from mytable a
where a.id = '1092' and a.docdate <= sysdate
group by a.name, a.id;
This puts the two values in the same row. That seems more useful to me then having them in different rows.
Can you try:
select a.name,a.id, LISTAGG(nvl(sum(a.debit),0)-nvl(sum(a.credit),0), ' ') WITHIN GROUP (ORDER BY a.id) current_bal
from mytable a
where a.id='1092' and a.docdate<=sysdate
group by a.name,a.id
union
select b.name,b.id, LISTAGG(nvl(sum(a.debit),0)-nvl(sum(a.credit),0), ' ') WITHIN GROUP (ORDER BY a.id) current_bal
from mytable b
where b. id='1092' and b.docdate<=add_months(sysdate,-6)
group by b.name,b.id;

Missing row when data value is 0 in teradata

I am trying to do a group-by in a query but every time the data has a 0, the group by does not show the entire row. How do I fix this?
This is the regular query
select
COUNT(DISTINCT sr.sr_number) AS NEW_CASES
FROM table sr
where
sr.business_unit in('BU1')
and OPENED_DATE < trunc(current_Date)
and OPENED_DATE > trunc(current_date -2)
and sr.status = 'Open'
The output is
NEW_CASES
0
But when I do a group by. The entire row is gone.
select
COUNT(DISTINCT sr.sr_number) AS NEW_CASES,
sr.business_unit
FROM table sr
where
sr.business_unit in('BU1','BU2','BU3' )
and OPENED_DATE < trunc(current_Date)
and OPENED_DATE > trunc(current_date -2)
and sr.status = 'Open'
group by sr.business_unit
The group by output is
New_CASES BUSINESS_UNIT
200 BU2
300 BU3
Desired output:
New_CASES BUSINESS_UNIT
0 BU1
200 BU2
300 BU3
One option is to start from a fixed list of values, and then bring the table with a left join, like so:
select b.business_unit, count(distinct t.sr_number) as new_cases
from (
select 'bu1' business_unit from dual
union all select 'bu2' from dual
union all select 'bu3' from dual
) b
left join mytable t
on t.business_unit = b.business_unit
and t.opened_date > trunc(current_date -2)
and t.opened_date < trunc(current_date)
and t.status = 'open'
group by b.business_unit
In Teradata, the syntax is somehow cumbersome:
select b.business_unit, count(distinct t.sr_number) as new_cases
from (
select * from (select 'bu1' as business_unit) x
union all select * from (select 'bu2' as business_unit ) x
union all select * from (select 'bu3' as business_unit ) x
) b
left join mytable t
on t.business_unit = b.business_unit
and t.opened_date > trunc(current_date -2)
and t.opened_date < trunc(current_date)
and t.status = 'open'
group by b.business_unit
Use a left join. In Oracle, this would look like
select b.business_unit, COUNT(DISTINCT sr.sr_number) AS NEW_CASES
from (select 'BU1' as business_unit from dual union all
select 'BU2' as business_unit from dual union all
select 'BU3' as business_unit from dual
) b left join
sr
on sr.business_unit = b.business_unit and
sr.OPENED_DATE < trunc(current_Date) and
sr.OPENED_DATE > trunc(current_date -2) and
sr.status = 'Open'
group by b.business_unit
EDIT:
Teradata doesn't have a really convenient way to create a derived table of constant values, but you can do it:
select b.business_unit, COUNT(DISTINCT sr.sr_number) AS NEW_CASES
from (select 'BU1' as business_unit from (select 1 as dummy) t union all
select 'BU2' as business_unit from (select 1 as dummy) t union all
select 'BU3' as business_unit from (select 1 as dummy) t
) b left join
sr
on sr.business_unit = b.business_unit and
sr.OPENED_DATE < trunc(current_Date) and
sr.OPENED_DATE > trunc(current_date -2) and
sr.status = 'Open'
group by b.business_unit

Reset ROW_NUMBER() after break

I have the following SQL Server query on which i'm having some trouble using ROW_NUMBER().
Select
ROW_NUMBER() OVER (Partition by R.DriverName, CASE WHEN Sum(R.Points) > 0 THEN 1 ELSE 0 END Order By E.EventDate ASC) As 'RowID',
CASE WHEN Sum(R.Points) > 0 THEN 1 ELSE 0 END As 'PointsId',
R.DriverName,
R.EventID,
Format(E.EventDate, 'dd/MM/yyyy') as 'Event Date'
From Races R
Inner Join Events E
On E.EventID = R.EventID
Where R.SeriesID Like 'FOE' And E.EventType Like 'R' And R.DriverName Like 'Lucas di Grassi'
Group By R.DriverName, R.EventID, E.EventDate
Order By E.EventDate
And get the following result:
I want that after each 0 on PointsId Column, the RowID resets to 1 and adds up again until the next 0.
Can anyone help?
Thank you,
VĂ­tor
You need nested Analytic Functions:
Select
ROW_NUMBER() OVER (Partition by R.DriverName, grp Order By E.EventDate ASC) As 'RowID',
...
from
(
Select
-- assign a number to each group of rows
SUM(CASE WHEN Sum(R.Points) > 0 THEN 1 ELSE 0 END)
OVER (Partition by R.DriverName
Order By E.EventDate ASC) As grp,
...
From Races R
Inner Join Events E
On E.EventID = R.EventID
Where R.SeriesID Like 'FOE' And E.EventType Like 'R' And R.DriverName Like 'Lucas di Grassi'
Group By R.DriverName, R.EventID, E.EventDate
) as dt
Order By E.EventDate

SSRS: how to get top 3 in order Z to A

I try to get in my diagram the top 3 of the worst value in SSRS:
my Code:
SELECT *
FROM (
Select top 3
intervaldate as Datum
,Name
,teamname as Team
,SUM(case when CounterName = 'Blown away' then calculationUnits else 0 end) as Blown
,Sum(case when CounterName = 'Thrown away' then calculationUnits else 0 end) as Thrown
,Sum(case when CounterName = 'total' then calculationUnits else 0 end) as Total
from Counting
where IntervalDate >= dateadd(day,datediff(day,1,GETDATE()),0)
AND IntervalDate < dateadd(day,datediff(day,0,GETDATE()),0)
and Name in (Select SystemID from tSystemView where SystemViewID = 2)
group by intervaldate, teamName, Name
) c
Expression of the diagram:
=Sum(Fields!Blown.Value + Fields!Thrown.Value) / Sum(Fields!Total.Value) * 100
And I sorted it from highest to lowest
But it does not show me the right order.
If I choose every "Name" then it shows me other value then the top 3:
all Names with value:
top 3:
It's because your top 3 statement is in the SQL while your sort is in the report. Without an order by SQL picks the top 3 random records. Also, unless there is more SQL you are not showing, the outer select is unnecessary. Add an order by <column> desc below your group by.
with Calcs as
(
select intervaldate as Datum,
Name,
TeamName,
SUM(case when CounterName = 'Blown away' then calculationUnits else 0 end) as Blown,
Sum(case when CounterName = 'Thrown away' then calculationUnits else 0 end) as Thrown,
Sum(case when CounterName = 'total' then calculationUnits else 0 end) as Total
from Counting
where IntervalDate >= dateadd(day,datediff(day,1,GETDATE()),0)
AND IntervalDate < dateadd(day,datediff(day,0,GETDATE()),0)
and Name in (Select SystemID from tSystemView where SystemViewID = 2)
group by intervaldate, teamName, Name
)
select b.*
from
(
select a.*, row_number() over (order by (Blown + Thrown)/Total desc) as R_Ord -- Change between ASC/DESC depending on needs
from Calcs a
) b
where R_Ord <=3

SQL Query to get the Max Date of a certain Status and subract that from the Max Date of another Status

An example would be.. Say a ticket is in New status. I want to get the MAX Date of New Status and the Max date of Completed Status and calculate the difference between the MAX Completed Status from the MAX New Status
ex.
SELECT t.ID,
MAX(update_date) WHERE t.status = 'New' start_time,
MAX(update_date) WHERE t.status = 'Completed' stop_time,
DATEDIFF(second, MAX(update_date), MAX(update_date)) elapsed_sec
FROM xxx.dbo t
GROUP BY t.ID;
Thank you so much,
P
SELECT
t.id
,DATEDIFF(second, start_time, stop_time) elapsed_sec
FROM (
SELECT
ID,
(SELECT MAX(update_date) from xxx.dbo WHERE status = 'New' AND ID=t2.ID) start_time,
(SELECT MAX(update_date) from xxx.dbo WHERE status = 'Completed' AND ID=t2.ID) stop_time
FROM xxx.dbo t2
) t
I would suggest doing this using condition aggregation and not with correlated subqueries:
SELECT t.ID,
MAX(CASE WHEN t.status = 'New' THEN update_date END) as start_time,
MAX(CASE WHEN t.status = 'Completed' THEN update_date END) as stop_time,
MAX(update_date) WHERE t.status = 'Completed' stop_time,
DATEDIFF(second,
MAX(CASE WHEN t.status = 'New' THEN update_date END),
MAX(CASE WHEN t.status = 'Completed' THEN update_date END)
) as elapsed_sec
FROM xxx.dbo t
GROUP BY t.ID;