Use of count in where statement sql - sql

I have N transactions with camera_id = 6 and i want to sample every N // 100 transaction.
I have the following query:
SELECT t.id from (
SELECT id, camera_id, start_ts, ROW_NUMBER() OVER (ORDER BY start_ts) AS rownum
FROM transactions
WHERE camera_id = 6
) as t
where t.rownum % (N / 100) = 1
order by t.start_ts
How can i change it so i don't need additional query for determining N?

Untested
Does the following work for you - add a windowed count in addition to your Rownumber and use that:
SELECT t.id from (
SELECT id, camera_id, start_ts,
Row_Number() OVER (ORDER BY start_ts) AS rownum,
Count(*) over() Qty
FROM transactions
WHERE camera_id = 6
) as t
where t.rownum % (Qty / 100) = 1
order by t.start_ts

Related

Oracle SQL Hierarchy Summation

I have a table TRANS that contains the following records:
TRANS_ID TRANS_DT QTY
1 01-Aug-2020 5
1 01-Aug-2020 1
1 03-Aug-2020 2
2 02-Aug-2020 1
The expected output:
TRANS_ID TRANS_DT BEGBAL TOTAL END_BAL
1 01-Aug-2020 0 6 6
1 02-Aug-2020 6 0 6
1 03-Aug-2020 6 2 8
2 01-Aug-2020 0 0 0
2 02-Aug-2020 0 1 1
2 03-Aug-2020 1 0 1
Each trans_id starts with a beginning balance of 0 (01-Aug-2020). For succeeding days, the beginning balance is the ending balance of the previous day and so on.
I can create PL/SQL block to create the output. Is it possible to get the output in 1 SQL statement?
Thanks.
Try this following script using CTE-
Demo Here
WITH CTE
AS
(
SELECT DISTINCT A.TRANS_ID,B.TRANS_DT
FROM your_table A
CROSS JOIN (SELECT DISTINCT TRANS_DT FROM your_table) B
),
CTE2
AS
(
SELECT C.TRANS_ID,C.TRANS_DT,SUM(D.QTY) QTY
FROM CTE C
LEFT JOIN your_table D
ON C.TRANS_ID = D.TRANS_ID
AND C.TRANS_DT = D.TRANS_DT
GROUP BY C.TRANS_ID,C.TRANS_DT
ORDER BY C.TRANS_ID,C.TRANS_DT
)
SELECT F.TRANS_ID,F.TRANS_DT,
(
SELECT COALESCE (SUM(QTY), 0) FROM CTE2 E
WHERE E.TRANS_ID = F.TRANS_ID AND E.TRANS_DT < F.TRANS_DT
) BEGBAL,
(
SELECT COALESCE (SUM(QTY), 0) FROM CTE2 E
WHERE E.TRANS_ID = F.TRANS_ID AND E.TRANS_DT = F.TRANS_DT
) TOTAL ,
(
SELECT COALESCE (SUM(QTY), 0) FROM CTE2 E
WHERE E.TRANS_ID = F.TRANS_ID AND E.TRANS_DT <= F.TRANS_DT
) END_BAL
FROM CTE2 F
You can as well do like this (I would assume it's a bit faster): Demo
with
dt_between as (
select mindt + level - 1 as trans_dt
from (select min(trans_dt) as mindt, max(trans_dt) as maxdt from t)
connect by level <= maxdt - mindt + 1
),
dt_for_trans_id as (
select *
from dt_between, (select distinct trans_id from t)
),
qty_change as (
select distinct trans_id, trans_dt,
sum(qty) over (partition by trans_id, trans_dt) as total,
sum(qty) over (partition by trans_id order by trans_dt) as end_bal
from t
right outer join dt_for_trans_id using (trans_id, trans_dt)
)
select
trans_id,
to_char(trans_dt, 'DD-Mon-YYYY') as trans_dt,
nvl(lag(end_bal) over (partition by trans_id order by trans_dt), 0) as beg_bal,
nvl(total, 0) as total,
nvl(end_bal, 0) as end_bal
from qty_change q
order by trans_id, trans_dt
dt_between returns all the days between min(trans_dt) and max(trans_dt) in your data.
dt_for_trans_id returns all these days for each trans_id in your data.
qty_change finds difference for each day (which is TOTAL in your example) and cumulative sum over all the days (which is END_BAL in your example).
The main select takes END_BAL from previous day and calls it BEG_BAL, it also does some formatting of final output.
First of all, you need to generate dates, then you need to aggregate your values by TRANS_DT, and then left join your aggregated data to dates. The easiest way to get required sums is to use analitic window functions:
with dates(dt) as ( -- generating dates between min(TRANS_DT) and max(TRANS_DT) from TRANS
select min(trans_dt) from trans
union all
select dt+1 from dates
where dt+1<=(select max(trans_dt) from trans)
)
,trans_agg as ( -- aggregating QTY in TRANS
select TRANS_ID,TRANS_DT,sum(QTY) as QTY
from trans
group by TRANS_ID,TRANS_DT
)
select -- using left join partition by to get data on daily basis for each trans_id:
dt,
trans_id,
nvl(sum(qty) over(partition by trans_id order by dates.dt range between unbounded preceding and 1 preceding),0) as BEGBAL,
nvl(qty,0) as TOTAL,
nvl(sum(qty) over(partition by trans_id order by dates.dt),0) as END_BAL
from dates
left join trans_agg tr
partition by (trans_id)
on tr.trans_dt=dates.dt;
Full example with sample data:
alter session set nls_date_format='dd-mon-yyyy';
with trans(TRANS_ID,TRANS_DT,QTY) as (
select 1,to_date('01-Aug-2020'), 5 from dual union all
select 1,to_date('01-Aug-2020'), 1 from dual union all
select 1,to_date('03-Aug-2020'), 2 from dual union all
select 2,to_date('02-Aug-2020'), 1 from dual
)
,dates(dt) as ( -- generating dates between min(TRANS_DT) and max(TRANS_DT) from TRANS
select min(trans_dt) from trans
union all
select dt+1 from dates
where dt+1<=(select max(trans_dt) from trans)
)
,trans_agg as ( -- aggregating QTY in TRANS
select TRANS_ID,TRANS_DT,sum(QTY) as QTY
from trans
group by TRANS_ID,TRANS_DT
)
select
dt,
trans_id,
nvl(sum(qty) over(partition by trans_id order by dates.dt range between unbounded preceding and 1 preceding),0) as BEGBAL,
nvl(qty,0) as TOTAL,
nvl(sum(qty) over(partition by trans_id order by dates.dt),0) as END_BAL
from dates
left join trans_agg tr
partition by (trans_id)
on tr.trans_dt=dates.dt;
You can use a recursive query to generate the overall date range, cross join it with the list of distinct tran_id, then bring the table with a left join. The last step is aggregation and window functions:
with all_dates (trans_dt, max_dt) as (
select min(trans_dt), max(trans_dt) from trans group by trans_id
union all
select trans_dt + interval '1' day, max_dt from all_dates where trans_dt < max_dt
)
select
i.trans_id,
d.trans_dt,
coalesce(sum(sum(t.qty)) over(partition by i.trans_id order by d.trans_dt), 0) - coalesce(sum(t.qty), 0) begbal,
coalesce(sum(t.qty), 0) total,
coalesce(sum(sum(t.qty)) over(partition by i.trans_id order by d.trans_dt), 0) endbal
from all_dates d
cross join (select distinct trans_id from trans) i
left join trans t on t.trans_id = i.trans_id and t.trans_dt = d.trans_dt
group by i.trans_id, d.trans_dt
order by i.trans_id, d.trans_dt

Query to get the sum of values for the maximum number of months

This query in Oracle 11 gets the sum of value for the last 1 years, and it works when there are 1 years of data.
When there is less than 1 years of data, this query returns 0, instead of the sum of values until whatever the oldest years are.
For example, if there are only 6 months of data, the query should return the sum of values until the 6th month.
SELECT SUM (DECODE (rnk, 11, rt, 0)) 1Y
FROM (SELECT entity_id,rnk,
SUM (ABS(NVL (value, 0))) OVER (PARTITION BY TRIM (entity_id) ORDER BY rnk) rt
FROM (SELECT psm.*,RANK () OVER (PARTITION BY entity_id ORDER BY period_end_date DESC) AS rnk
FROM myTable psm
WHERE psm.entity_id = '1'
ORDER BY period_end_date DESC
) rank_tab
WHERE rnk < 12
);
If the biggest rank is 6, the result from the above query is 0
I attempted this, but got the error "ORA-00978: nested group function without GROUP BY"
SELECT case when rnk < 11
then SUM (DECODE (rnk, Max(rnk), rt, 0))
else SUM (DECODE (rnk, 11, rt, 0))
end as Y
FROM (SELECT entity_id,rnk,
SUM (ABS(NVL (value, 0))) OVER (PARTITION BY TRIM (entity_id) ORDER BY rnk) rt
FROM (SELECT psm.*,RANK () OVER (PARTITION BY entity_id ORDER BY period_end_date DESC) AS rnk
FROM myTable psm
WHERE psm.entity_id = '1'
ORDER BY period_end_date DESC
) rank_tab
WHERE rnk < 12
);
Sample data:
entity_id value period_end_date
1 1 9/30/19
1 2 8/31/19
1 3 7/31/19
1 4 6/30/19
1 5 5/31/19
1 6 4/30/19
In the above example, 1Y should return 1+2+3+4+5+6 = 21.
Instead my query returns 0 because it is looking for rnk = 11, which doesn't exist.
SUM (DECODE (rnk, 11, rt, 0)) 1Y
Thank you.
EDIT:
This works. But, if you know of a better way to do it, please let me know. Thank you.
SELECT
CASE WHEN MRank < 11 then maxY else OneY end as lc_incearned_1Y
FROM (
WITH R as
(SELECT MAX(RNK) MaxRank FROM (
SELECT RANK () OVER (PARTITION BY TRIM (entity_id) ORDER BY period_end_date
DESC) AS rnk FROM myTbl psm
WHERE TRIM (psm.entity_id) = '1' AND period_end_date <
to_date('9/30/2019','MM/DD/YYYY')
ORDER BY period_end_date DESC))
select MAX(MaxRank) MRank,
SUM (DECODE (rnk, MaxRank, rt, 0)) maxY,
SUM (DECODE (rnk, 11, rt, 0)) OneY, --13051.97
FROM (SELECT entity_id,rnk,
SUM (ABS (NVL (value, 0))) OVER (PARTITION BY TRIM (entity_id) ORDER BY rnk) rt
FROM (SELECT psm.*,RANK () OVER (PARTITION BY TRIM (entity_id) ORDER BY period_end_date DESC) AS rnk FROM CREF.PORTFOLIO_SUMM_MTHEND psm
WHERE TRIM (psm.entity_id) = '1' AND period_end_date < to_date('9/30/2019','MM/DD/YYYY')
ORDER BY period_end_date DESC) rank_tab WHERE rnk < 12) T,R)
It seems you need to sum your values up starting from the latest period_end_date to the earliest date within the eleven months range. It would be suitable to use max(period_end_date) over (partition by entity_id order by period_end_date desc) analytic function along with your current rank() function. And then apply months_between(<max_period_end_date>,period_end_date). If your need to look up from the current date, then get rid of max() analytic function and replace <max_period_end_date> with trunc(sysdate) in months_between() function. So, use :
with t as
(
select max(period_end_date) over (partition by entity_id order by period_end_date desc) as mx,
rank() over (partition by entity_id order by period_end_date desc) as rnk,
t.*
from myTable t
)
select sum(nvl(value,0)) as sum_value
from t
where months_between(mx,period_end_date)<=11
Demo

Group by in columns and rows, counts and percentages per day

I have a table that has data like following.
attr |time
----------------|--------------------------
abc |2018-08-06 10:17:25.282546
def |2018-08-06 10:17:25.325676
pqr |2018-08-05 10:17:25.366823
abc |2018-08-06 10:17:25.407941
def |2018-08-05 10:17:25.449249
I want to group them and count by attr column row wise and also create additional columns in to show their counts per day and percentages as shown below.
attr |day1_count| day1_%| day2_count| day2_%
----------------|----------|-------|-----------|-------
abc |2 |66.6% | 0 | 0.0%
def |1 |33.3% | 1 | 50.0%
pqr |0 |0.0% | 1 | 50.0%
I'm able to display one count by using group by but unable to find out how to even seperate them to multiple columns. I tried to generate day1 percentage with
SELECT attr, count(attr), count(attr) / sum(sub.day1_count) * 100 as percentage from (
SELECT attr, count(*) as day1_count FROM my_table WHERE DATEPART(week, time) = DATEPART(day, GETDate()) GROUP BY attr) as sub
GROUP BY attr;
But this also is not giving me correct answer, I'm getting all zeroes for percentage and count as 1. Any help is appreciated. I'm trying to do this in Redshift which follows postgresql syntax.
Let's nail the logic before presenting:
with CTE1 as
(
select attr, DATEPART(day, time) as theday, count(*) as thecount
from MyTable
)
, CTE2 as
(
select theday, sum(thecount) as daytotal
from CTE1
group by theday
)
select t1.attr, t1.theday, t1.thecount, t1.thecount/t2.daytotal as percentofday
from CTE1 t1
inner join CTE2 t2
on t1.theday = t2.theday
From here you can pivot to create a day by day if you feel the need
I am trying to enhance the query #johnHC btw if you needs for 7days then you have to those days in case when
with CTE1 as
(
select attr, time::date as theday, count(*) as thecount
from t group by attr,time::date
)
, CTE2 as
(
select theday, sum(thecount) as daytotal
from CTE1
group by theday
)
,
CTE3 as
(
select t1.attr, EXTRACT(DOW FROM t1.theday) as day_nmbr,t1.theday, t1.thecount, t1.thecount/t2.daytotal as percentofday
from CTE1 t1
inner join CTE2 t2
on t1.theday = t2.theday
)
select CTE3.attr,
max(case when day_nmbr=0 then CTE3.thecount end) as day1Cnt,
max(case when day_nmbr=0 then percentofday end) as day1,
max(case when day_nmbr=1 then CTE3.thecount end) as day2Cnt,
max( case when day_nmbr=1 then percentofday end) day2
from CTE3 group by CTE3.attr
http://sqlfiddle.com/#!17/54ace/20
In case that you have only 2 days:
http://sqlfiddle.com/#!17/3bdad/3 (days descending as in your example from left to right)
http://sqlfiddle.com/#!17/3bdad/5 (days ascending)
The main idea is already mentioned in the other answers. Instead of joining the CTEs for calculating the values I am using window functions which is a bit shorter and more readable I think. The pivot is done the same way.
SELECT
attr,
COALESCE(max(count) FILTER (WHERE day_number = 0), 0) as day1_count, -- D
COALESCE(max(percent) FILTER (WHERE day_number = 0), 0) as day1_percent,
COALESCE(max(count) FILTER (WHERE day_number = 1), 0) as day2_count,
COALESCE(max(percent) FILTER (WHERE day_number = 1), 0) as day2_percent
/*
Add more days here
*/
FROM(
SELECT *, (count::float/count_per_day)::decimal(5, 2) as percent -- C
FROM (
SELECT DISTINCT
attr,
MAX(time::date) OVER () - time::date as day_number, -- B
count(*) OVER (partition by time::date, attr) as count, -- A
count(*) OVER (partition by time::date) as count_per_day
FROM test_table
)s
)s
GROUP BY attr
ORDER BY attr
A counting the rows per day and counting the rows per day AND attr
B for more readability I convert the date into numbers. Here I take the difference between current date of the row and the maximum date available in the table. So I get a counter from 0 (first day) up to n - 1 (last day)
C calculating the percentage and rounding
D pivot by filter the day numbers. The COALESCE avoids the NULL values and switched them into 0. To add more days you can multiply these columns.
Edit: Made the day counter more flexible for more days; new SQL Fiddle
Basically, I see this as conditional aggregation. But you need to get an enumerator for the date for the pivoting. So:
SELECT attr,
COUNT(*) FILTER (WHERE day_number = 1) as day1_count,
COUNT(*) FILTER (WHERE day_number = 1) / cnt as day1_percent,
COUNT(*) FILTER (WHERE day_number = 2) as day2_count,
COUNT(*) FILTER (WHERE day_number = 2) / cnt as day2_percent
FROM (SELECT attr,
DENSE_RANK() OVER (ORDER BY time::date DESC) as day_number,
1.0 * COUNT(*) OVER (PARTITION BY attr) as cnt
FROM test_table
) s
GROUP BY attr, cnt
ORDER BY attr;
Here is a SQL Fiddle.

Calculating the weighted average cost in firebird sql

I have the same problem in this question but in Firebird 2.5
Calculating the Weighted Average Cost of products stock
And this answer didn't work (as Firebird 2.5 doesn't have row_number)
with recursive
stock_temp as (
select
*,
row_number() over(partition by product_id order by row_num) as rn
from
stock_table
)
,cte as (
select
document_type, document_date,
product_id, qty_out, qty_in, price,
row_num, stock_balance, rn,
price as wac
from
stock_temp where document_type = 'SI'
union all
select
sub.document_type, sub.document_date,
sub.product_id, sub.qty_out, sub.qty_in, sub.price,
sub.row_num, sub.stock_balance, sub.rn,
case when sub.qty_in = 0 then main.wac else
((sub.stock_balance - sub.qty_in) * main.wac + sub.qty_in * sub.price)
/ ((sub.stock_balance - sub.qty_in) + sub.qty_in) end as wac
from
cte as main
join stock_temp as sub
on (main.product_id = sub.product_id and main.rn + 1 = sub.rn)
)
select * from cte

Doing a comparison using the previous row?

I'm trying to work out an efficient way of comparing two rows in SQL Server 2008. I need to write a query which finds all rows in the Movement table which have Speed < 10 N consecutive times.
The structure of the table is:
EventTime
Speed
If the data were:
2012-02-05 13:56:36.980, 2
2012-02-05 13:57:36.980, 11
2012-02-05 13:57:46.980, 2
2012-02-05 13:59:36.980, 2
2012-02-05 14:06:36.980, 22
2012-02-05 15:56:36.980, 2
Then it would return rows 3/4 (13:57:46.980 / 13:59:36.980) if I looked for 2 consecutive rows, and would return nothing if I looked for three consecutive rows. The order of the data is EventTime/DateTime only.
Any help you could give me would be great. I'm considering using cursors but they're usually pretty inefficient. Also, this table is approximately 10m rows in size, so the more efficient the better! :)
Thanks!
DECLARE
#n INT,
#speed_limit INT
SELECT
#n = 5,
#speed_limit = 10
;WITH
partitioned AS
(
SELECT
*,
CASE WHEN speed < #speed_limit THEN 1 ELSE 0 END AS PartitionID
FROM
Movement
)
,
sequenced AS
(
SELECT
ROW_NUMBER() OVER ( ORDER BY EventTime) AS MasterSeqID,
ROW_NUMBER() OVER (PARTITION BY PartitionID ORDER BY EventTime) AS PartIDSeqID,
*
FROM
partitioned
)
,
filter AS
(
SELECT
MasterSeqID - PartIDSeqID AS GroupID,
MIN(MasterSeqID) AS GroupFirstMastSeqID,
MAX(MasterSeqID) AS GroupFinalMastSeqID
FROM
sequenced
WHERE
PartitionID = 1
GROUP BY
MasterSeqID - PartIDSeqID
HAVING
COUNT(*) >= #n
)
SELECT
sequenced.*
FROM
filter
INNER JOIN
sequenced
ON sequenced.MasterSeqID >= filter.GroupFirstMastSeqID
AND sequenced.MasterSeqID <= filter.GroupFinalMastSeqID
Alternative final steps (inspired by #t-clausen-dk), to avoid an additional JOIN. I would test both to see which is more performant.
,
filter AS
(
SELECT
MasterSeqID - PartIDSeqID AS GroupID,
COUNT(*) OVER (PARTITION BY MasterSeqID - PartIDSeqID) AS GroupSize,
*
FROM
sequenced
WHERE
PartitionID = 1
)
SELECT
*
FROM
filter
WHERE
GroupSize >= #n
declare #t table(EventTime datetime, Speed int)
insert #t values('2012-02-05 13:56:36.980', 2)
insert #t values('2012-02-05 13:57:36.980', 11)
insert #t values('2012-02-05 13:57:46.980', 2)
insert #t values('2012-02-05 13:59:36.980', 2)
insert #t values('2012-02-05 14:06:36.980', 22)
insert #t values('2012-02-05 15:56:36.980', 2)
declare #N int = 1
;with a as
(
select EventTime, Speed, row_number() over (order by EventTime) rn from #t
), b as
(
select EventTime, Speed, 1 grp, rn from a where rn = 1
union all
select a.EventTime, a.Speed, case when a.speed < 10 and b.speed < 10 then grp else grp + 1 end, a.rn
from a join b on a.rn = b.rn+1
), c as
(
select EventTime, Speed, count(*) over (partition by grp) cnt from b
)
select * from c
where cnt > #N
OPTION (MAXRECURSION 0) -- Thx Dems
Almost the same ideea as Dems, a little bit different:
select * from (
select eventtime, speed, rnk, new_rnk,
rnk - new_rnk,
max(rnk) over (partition by speed, new_rnk-rnk) -
min(rnk) over (partition by speed, new_rnk-rnk) + 1 as no_consec
from (
select eventtime, rnk, speed,
row_number() over (partition by speed order by eventtime) as new_rnk
from (
select eventtime, speed,
row_number() over (order by eventtime) as rnk
from a
) a
where a.speed < 5
)
order by eventtime
)
where no_consec >= 2;
5 is speed limit and 2 is min number of consecutive events.
I put date as number for simplicity of writing the create database.
SQLFIDDLE
EDIT:
To answer to comments, I've added three columns in the first inner query. To get only the first row you need to add an pos_in_group = 1 to WHERE clause and the distance is at your fingers.
SQLFIDDLE
select eventtime, speed, min_date, max_date, pos_in_group
from (
select eventtime, speed, rnk, new_rnk,
rnk - new_rnk,
row_number() over (partition by speed, new_rnk-rnk order by eventtime) pos_in_group,
min(eventtime) over (partition by speed, new_rnk-rnk) min_date,
max(eventtime) over (partition by speed, new_rnk-rnk) max_date,
max(rnk) over (partition by speed, new_rnk-rnk) -
min(rnk) over (partition by speed, new_rnk-rnk) + 1 as no_consec
from (
select eventtime, rnk, speed,
row_number() over (partition by speed order by eventtime) as new_rnk
from (
select eventtime, speed,
row_number() over (order by eventtime) as rnk
from a
) a
where a.speed < 5
)
order by eventtime
)
where no_consec > 1;