Counting number of positive value in a query - sql

I'm working on the following query and table
SELECT dd.actual_date, dd.week_number_overall, sf.branch_id, AVG(sf.overtarget_qnt) AS targetreach
FROM sales_fact sf, date_dim dd
WHERE dd.date_id = sf.date_id
AND dd.week_number_overall BETWEEN 88-2 AND 88
AND sf.branch_id = 1
GROUP BY dd.actual_date, branch_id, dd.week_number_overall
ORDER BY dd.actual_date ASC;
ACTUAL_DATE WEEK_NUMBER_OVERALL BRANCH_ID TARGETREACH
----------- ------------------- ---------- -----------
13/08/14 86 1 -11
14/08/14 86 1 12
15/08/14 86 1 11.8
16/08/14 86 1 1.4
17/08/14 86 1 -0.2
19/08/14 86 1 7.2
20/08/14 87 1 16.6
21/08/14 87 1 -1.4
22/08/14 87 1 14.4
23/08/14 87 1 2.8
24/08/14 87 1 18
26/08/14 87 1 13.4
27/08/14 88 1 -1.8
28/08/14 88 1 10.6
29/08/14 88 1 7.2
30/08/14 88 1 14
31/08/14 88 1 9.6
02/09/14 88 1 -3.2
the "TargetReach" column shows whether target has been reach or not.
A negative value means target wasn't reached on that day.
How can I get calculate the number of ROW with positive value for this query?
that will show something like:
TOTAL_POSITIVE_TARGET_REACH WEEK_NUMBER_OVERALL
--------------------------- ------------------
13 88
I have tried to use CASE but still not working right.
Thanks a lot.

You want to use conditional aggregation:
with t as (
<your query here>
)
select week_number_overall, sum(case when targetreach > 0 then 1 else 0 end)
from t
group by week_number_overall;
However, I would rewrite your original query to use proper join syntax. Then the query would look like:
SELECT week_number_overall,
SUM(CASE WHEN targetreach > 0 THEN 1 ELSE 0 END)
FROM (SELECT dd.actual_date, dd.week_number_overall, sf.branch_id, AVG(sf.overtarget_qnt) AS targetreach
FROM sales_fact sf JOIN
date_dim dd
ON dd.date_id = sf.date_id
WHERE dd.week_number_overall BETWEEN 88-2 AND 88 AND sf.branch_id = 1
GROUP BY dd.actual_date, branch_id, dd.week_number_overall
) t
GROUP BY week_number_overall
ORDER BY week_number_overall;
THe difference between a CTE (the first solution) and a subquery is (in this case) just a matter of preference.

SELECT WEEK_NUMBER_OVERALL, COUNT(*) TOTAL_POSITIVE_TARGET_REACH
FROM (your original query)
WHERE TARGETREACH >= 0
GROUP BY WEEK_NUMBER_OVERALL

select sum( decode( sign( TARGETREACH ) , -1 , 0 , 0 , 0 , 1 , 1 ) )
from ( "your query here" );

Use HAVING Clause
SELECT dd.actual_date, dd.week_number_overall, sf.branch_id, AVG(sf.overtarget_qnt) AS targetreach
FROM sales_fact sf, date_dim dd
WHERE dd.date_id = sf.date_id
AND dd.week_number_overall BETWEEN 88-2 AND 88
AND sf.branch_id = 1
GROUP BY dd.actual_date, branch_id, dd.week_number_overall
HAVING AVG(sf.overtarget_qnt)>0
ORDER BY dd.actual_date ASC;

Using decode(), sign() get both positive count & negative count.
drop table test;
create table test (
key number(5),
value number(5));
insert into test values ( 1, -9 );
insert into test values ( 2, -8 );
insert into test values ( 3, 10 );
insert into test values ( 4, 12 );
insert into test values ( 5, -9 );
insert into test values ( 6, 8 );
insert into test values ( 7, 51 );
commit;
select sig , count ( sig ) from
(
select key, ( (decode( sign( value ) , -1 , '-ve' , 0 , 'zero' , 1 , '+ve' ) ) ) sig
from test
)
group by sig
SIG COUNT(SIG)
---- ----------------------
+ve 4
-ve 3

Related

Break up running sum into maximum group size / length

I am trying to break up a running (ordered) sum into groups of a max value. When I implement the following example logic...
IF OBJECT_ID(N'tempdb..#t') IS NOT NULL DROP TABLE #t
SELECT TOP (ABS(CHECKSUM(NewId())) % 1000) ROW_NUMBER() OVER (ORDER BY name) AS ID,
LEFT(CAST(NEWID() AS NVARCHAR(100)),ABS(CHECKSUM(NewId())) % 30) AS Description
INTO #t
FROM sys.objects
DECLARE #maxGroupSize INT
SET #maxGroupSize = 100
;WITH t AS (
SELECT
*,
LEN(Description) AS DescriptionLength,
SUM(LEN(Description)) OVER (/*PARTITION BY N/A */ ORDER BY ID) AS [RunningLength],
SUM(LEN(Description)) OVER (/*PARTITION BY N/A */ ORDER BY ID)/#maxGroupSize AS GroupID
FROM #t
)
SELECT *, SUM(DescriptionLength) OVER (PARTITION BY GroupID) AS SumOfGroup
FROM t
ORDER BY GroupID, ID
I am getting groups that are larger than the maximum group size (length) of 100.
A recusive common table expression (rcte) would be one way to resolve this.
Sample data
Limited set of fixed sample data.
create table data
(
id int,
description nvarchar(20)
);
insert into data (id, description) values
( 1, 'qmlsdkjfqmsldk'),
( 2, 'mldskjf'),
( 3, 'qmsdlfkqjsdm'),
( 4, 'fmqlsdkfq'),
( 5, 'qdsfqsdfqq'),
( 6, 'mds'),
( 7, 'qmsldfkqsjdmfqlkj'),
( 8, 'qdmsl'),
( 9, 'mqlskfjqmlkd'),
(10, 'qsdqfdddffd');
Solution
For every recursion step evaluate (r.group_running_length + len(d.description) <= #group_max_length) if the previous group must be extended or a new group must be started in a case expression.
Set group target size to 40 to better fit the sample data.
declare #group_max_length int = 40;
with rcte as
(
select d.id,
d.description,
len(d.description) as description_length,
len(d.description) as running_length,
1 as group_id,
len(d.description) as group_running_length
from data d
where d.id = 1
union all
select d.id,
d.description,
len(d.description),
r.running_length + len(d.description),
case
when r.group_running_length + len(d.description) <= #group_max_length
then r.group_id
else r.group_id + 1
end,
case
when r.group_running_length + len(d.description) <= #group_max_length
then r.group_running_length + len(d.description)
else len(d.description)
end
from rcte r
join data d
on d.id = r.id + 1
)
select r.id,
r.description,
r.description_length,
r.running_length,
r.group_id,
r.group_running_length,
gs.group_sum
from rcte r
cross apply ( select max(r2.group_running_length) as group_sum
from rcte r2
where r2.group_id = r.group_id ) gs -- group sum
order by r.id;
Result
Contains both the running group length as well as the group sum for every row.
id description description_length running_length group_id group_running_length group_sum
-- ---------------- ------------------ -------------- -------- -------------------- ---------
1 qmlsdkjfqmsldk 14 14 1 14 33
2 mldskjf 7 21 1 21 33
3 qmsdlfkqjsdm 12 33 1 33 33
4 fmqlsdkfq 9 42 2 9 39
5 qdsfqsdfqq 10 52 2 19 39
6 mds 3 55 2 22 39
7 qmsldfkqsjdmfqlkj 17 72 2 39 39
8 qdmsl 5 77 3 5 28
9 mqlskfjqmlkd 12 89 3 17 28
10 qsdqfdddffd 11 100 3 28 28
Fiddle to see things in action (includes random data version).

Removing pairs of transactions

I am attempting to remove transactions that have been reversed from a table. the table has Account, Date, Amount and Row. If a transaction has been reversed Account will match and Amount will be inverse of each other.
Example Table
Account Date Amount Row
12 1/1/18 45 72 -- Case 1
12 1/2/18 50 73
12 1/2/18 -50 74
12 1/3/18 52 75
15 1/1/18 51 76 -- Case 2
15 1/2/18 51 77
15 1/2/18 -51 78
15 1/2/18 51 79
18 1/2/18 50 80 -- Case 3
18 1/2/18 50 81
18 1/2/18 -50 82
18 1/2/18 -50 83
18 1/3/18 50 84
18 1/3/18 50 85
20 1/1/18 57 88 -- Case 4
20 1/2/18 57 89
20 1/4/18 -57 90
20 1/5/18 57 91
Desired Results Table
Account Date Amount Row
12 1/1/18 45 72 -- Case 1
12 1/3/18 52 75
15 1/1/18 51 76 -- Case 2
15 1/2/18 51 79
18 1/3/18 50 84 -- Case 3
18 1/3/18 50 85
20 1/1/18 57 88 -- Case 4
20 1/5/18 57 91
Removing all instances of inverse transactions does not work when there are multiple transactions when all other columns are the same. My attempt was to count all duplicate transactions, count of all inverse duplicate transactions, subtracting those to get the number of rows I needed from each transactions group. I was going to pull the first X rows but found in most cases I want the last X rows of each group, or even a mix (the first and last in Case 2).
I either need a method of removing pairs from the original table, or working from what I have so far, a method of distinguishing which transactions to pull.
Code so far:
--adding row Numbers
with a as (
select
account a,
date d,
amount f,
row_number() over(order by account, date) r
from table),
--counting Duplicates
b as (
select a.a, a.f, Dups
from a join (
select a, f, count(*) Dups
from a
group by a.a, a.f
having count(*)>1
) b
on a.a=b.a and
b.f=a.f
where a.f>0
),
--counting inverse duplicates
c as (
select a.a, a.f, InvDups
from a join (
select a, f, count(*) InvDups
from a
group by a.a, a.f
having count(*)>1
) b
on a.a=b.a and
-b.f=a.f
where a.f>0
),
--combining c and d to get desired number of rows of each transaction group
d as (
select
b.a, b.f, dups, InvDups, Dups-InvDups TotalDups
from b join c
on b.a=c.a and
b.f=c.f
),
--getting the number of rows from the beginning of each transaction group
select d.a, d.d, d.f
from
(select
a, d, f, row_number() over (group by a, d, f) r2
from a) e
join d
on e.a=d.a and
TotalDups<=r2
You can try this.
SELECT T_P.* FROM
( SELECT *, ROW_NUMBER() OVER(PARTITION BY Account, Amount ORDER BY [Row] ) RN from #MyTable WHere Amount > 0 ) T_P
LEFT JOIN
( SELECT *, ROW_NUMBER() OVER(PARTITION BY Account, Amount ORDER BY [Row] ) RN from #MyTable WHere Amount < 0 ) T_N
ON T_P.Account = T_N.Account
AND T_P.Amount = ABS(T_N.Amount)
AND T_P.RN = T_N.RN
WHERE
T_N.Account IS NULL
The following handles your three cases:
with t as (
select t.*,
row_number() over (partition by account, date, amount order by row) as seqnum
from table t
)
select t.*
from t
where not exists (select 1
from t t2
where t2.account = t.account and t2.date = t.date and
t2.amount = -t.amount and t2.seqnum = t.seqnum
);
Use This
;WITH CTE
AS
(
SELECT
[Row]
FROM YourTable YT
WHERE Amount > 0
AND EXISTS
(
SELECT 1 FROM YourTable WHERE Account = YT.Account
AND [Date] = YT.[Date]
AND (Amount+YT.Amount)=0
)
UNION ALL
SELECT
[Row]
FROM YourTable YT
WHERE Amount < 0
AND EXISTS
(
SELECT 1 FROM YourTable WHERE Account = YT.Account
AND [Date] = YT.[Date]
AND (Amount+YT.Amount)>0
)
)
SELECT * FROM YourTable
WHERE EXISTS
(
SELECT 1 FROM CTE WHERE [Row] = YourTable.[Row]
)

SQL: order by calculated rate between two tables

I might be confusing syntax from other languages with SQL and that's why this isn't working, my subquery is definitely incorrect.
Having the following 2 tables:
TEST_RESULTS
Student_ID Test_ID Test_Result
A1 234 90
B2 234 80
C3 345 85
D4 234 95
A1 345 95
C3 456 95
TEST_DESCRIPTION
Test_ID Test_Description Passing_Score
234 Test A 85
345 Test B 90
456 Test C 95
I want to calculate the rate of passing for each test and sort by it.
The output I am looking for:
Test_ID Test_Description students_taking students_passing rate
456 Test C 1 1 1
234 Test A 3 2 0.666666667
345 Test B 2 1 0.5
This is my query
SELECT td.Test_ID, td.Test_Description, COUNT(tr.Student_ID) as
students_taking, students_passing, students_passing/students_taking as rate
FROM
(SELECT td.Test_ID, td.Test_Description, COUNT(tr.Student_ID) as
students_passing
FROM TEST_RESULTS tr
JOIN TEST_DESCRIPTION td
on tr.Test_ID = td.Test_ID
WHERE tr.Test_Result > td.)
GROUP BY td.Test_ID, td.Test_Description
ORDER BY rate DESC, td.Test_ID, td.Test_Description
My select from select is wrong, because I am getting no results for this query.
I'm using CTE, LEFT JOIN for getting the desired result.
Try this query --
;WITH CTE
AS
(
SELECT
TD.TEST_ID,
TEST_DESCRIPTION,
COUNT(TR.STUDENT_ID) AS STUDENTS_TAKING,
COUNT(CASE WHEN TR.TEST_RESULT >= TD.PASSING_SCORES THEN
TR.STUDENT_ID END) AS STUDENTS_PASSING
FROM TEST_DESCRIPTION TD
LEFT JOIN TEST_RESULTS TR
ON TD.TEST_ID = TR.TEST_ID
GROUP BY TD.TEST_ID,
TEST_DESCRIPTION
)
SELECT
TEST_ID,
TEST_DESCRIPTION,
STUDENTS_TAKING,
STUDENTS_PASSING,
STUDENTS_PASSING / CONVERT (DECIMAL(4,2),STUDENTS_TAKING) AS RATE
FROM CTE
ORDER BY TEST_DESCRIPTION
Please check below query-
SELECT TD.TEST_ID,
TD.TEST_DESCRIPTION,
STUDENT_TAKING,
STUDENT_PASSING,
RATE
FROM TEST_DESCRIPTION TD,
(SELECT TR.TEST_ID,COUNT(TR.STUDENT_ID) "STUDENT_TAKING",
COUNT(CASE WHEN TEST_RESULT>=PASSING_SCORE THEN STUDENT_ID END) STUDENT_PASSING,
TO_NUMBER(TO_CHAR(COUNT(CASE WHEN TEST_RESULT>=PASSING_SCORE THEN STUDENT_ID END)/COUNT(TR.STUDENT_ID),'9999.99')) RATE
FROM TEST_RESULTS TR,TEST_DESCRIPTION TD
WHERE TR.TEST_ID=TD.TEST_ID
GROUP BY TR.TEST_ID)SUB
WHERE SUB.TEST_ID=TD.TEST_ID ORDER BY RATE DESC;
SELECT td.Test_ID, td.Test_Description,
students_taking = counts.students_taking,
students_passing = counts.students_passing,
rate = counts.rate
FROM TEST_DESCRIPTION td
OUTER APPLY (
SELECT
students_taking = COUNT(1),
students_passing = COUNT(CASE WHEN tr.Test_Result > td.Passing_score THEN 1 ELSE NULL END),
rate = IIF(COUNT(1) <> 0, COUNT(CASE WHEN tr.Test_Result > td.Passing_score THEN 1 ELSE NULL END) / CAST(COUNT(1) AS FLOAT), 0)
FROM TEST_RESULTS tr
WHERE tr.Test_ID = td.Test_ID
) counts
ORDER BY counts.rate DESC, td.Test_ID

Combing multiple rows into one row

I have the following table
Index BookNumber
2 51
2 52
2 53
1 41
1 42
1 43
I am trying to come up with the following output
Index BookNumber1 Booknumber2 Booknumber3
----------------------------------------------
1 41 42 43
2 51 52 53
I was able to come up with the following query , however the output is unexpected
SELECT DISTINCT
index,
CASE WHEN index = 1 THEN Booknumber END AS BookNumber1,
CASE WHEN index = 2 THEN Booknumber END AS BookNumber2,
CASE WHEN index = 3 THEN Booknumber END AS BookNumber3
FROM Mytable;
I get following output
Index BN1 BN2 BN3
------------------------------
1 41 null null
1 null 42 null
1 null null 43
2 51 null null
2 null 52 null
2 null null 53
Is there a way to compress this to only 2 rows?
I am not quite sure how the index in your query matches the index column in your data. But the query that you want is:
SELECT index,
max(CASE WHEN index = 1 THEN Booknumber END) AS BookNumber1 ,
max(CASE WHEN index = 2 THEN Booknumber END) AS BookNumber2,
max(CASE WHEN index = 3 THEN Booknumber END) AS BookNumber3
FROM Mytable
GROUP BY index;
Give your data, the query seems more like:
SELECT index,
max(CASE WHEN ind = 1 THEN Booknumber END) AS BookNumber1 ,
max(CASE WHEN ind = 2 THEN Booknumber END) AS BookNumber2,
max(CASE WHEN ind = 3 THEN Booknumber END) AS BookNumber3
FROM (select mt.*, row_number() over (partition by index order by BookNumber) as ind
from Mytable mt
) mt
GROUP BY index;
By the way, "index" is a reserved word, so I assume that it is just a placeholder for another column name. Otherwise, you need to escape it with double quotes or square braces.
Assuming there are always 3 or fewer book numbers for each index, you could use:
with data as
(select idx,
booknumber as bn1,
lag(booknumber, 1) over(partition by idx order by idx, booknumber) as bn2,
lag(booknumber, 2) over(partition by idx order by idx, booknumber) as bn3
from books)
select *
from data
where data.bn1 = (select max(x.bn1) from data x where x.idx = data.idx)
sqlfiddle demo is here: http://sqlfiddle.com/#!6/8dc82/5/0
Don't forget that index is a reserved word. Personally I prefer not to use reserved words as column names, but you can compensate by using square brackets like in my example.
This will work from sqlserver 2008+
declare #t table([Index] int, BookNumber int)
insert #t values
(2,51),(2,52),(2,53),(1,41),(1,42),(1,43)
;with cte as
(
select [Index], BookNumber,
row_number() over (partition by [Index] order by BookNumber) rn
from #t
)
select [Index], [1] as Booknumber1, [2] as Booknumber2, [3] as Booknumber3
from cte
pivot (max([booknumber]) FOR [rn] IN ([1],[2],[3])) AS pvt
Result:
Index Booknumber1 Booknumber2 Booknumber3
1 41 42 43
2 51 52 53

How to compute the diff between records?

My table records is like below
ym cnt
200901 57
200902 62
200903 67
...
201001 84
201002 75
201003 75
...
201101 79
201102 77
201103 80
...
I want to computer the diff between current month and per month .
the result would like below ...
ym cnt diff
200901 57 57
200902 62 5 (62 - 57)
200903 67 5 (67 - 62)
...
201001 84 ...
201002 75
201003 75
...
201101 79
201102 77
201103 80
...
Can anyone told me how to wrote a sql to got the result and with a good performance ?
UPDATE:
sorry for simple words
my solution is
step1: input the currentmonth data into temp table1
step2: input the permonth data into temp table2
step3: left join 2 tables to compute the result
Temp_Table1
SELECT (ym - 1) as ym , COUNT( item_cnt ) as cnt
FROM _table
GROUP BY (ym - 1 )
order by ym
Temp_Table2
SELECT ym , COUNT( item_cnt ) as cnt
FROM _table
GROUP BY ym
order by ym
select ym , (b.cnt - a.cnt) as diff from Temp_Table2 a
left join Temp_Table1 b
on a.ym = b.ym
*If i want to compare the diff between the month in this year and last year
I can only change the ym - 1 to ym - 100*
but , actually , the group by key is not only ym
there is max 15 keys and max 100 millions records
so , I wonder a good solution can easy to manager the source
and good performance.
For MSSQL, this has one reference to the table, so potentially it can be faster (maybe not) than left join which has two references to the table:
-- ================
-- sample data
-- ================
declare #t table
(
ym varchar(6),
cnt int
)
insert into #t values ('200901', 57)
insert into #t values ('200902', 62)
insert into #t values ('200903', 67)
insert into #t values ('201001', 84)
insert into #t values ('201002', 75)
insert into #t values ('201003', 75)
-- ===========================
-- solution
-- ===========================
select
ym2,
diff = case when cnt1 is null then cnt2
when cnt2 is null then cnt1
else cnt2 - cnt1
end
from
(
select
ym1 = max(case when k = 2 then ym end),
cnt1 = max(case when k = 2 then cnt end),
ym2 = max(case when k = 1 then ym end),
cnt2 = max(case when k = 1 then cnt end)
from
(
select
*,
rn = row_number() over(order by ym)
from #t
) t1
cross join
(
select k = 1 union all select k = 2
) t2
group by rn + k
) t
where ym2 is not null
Can anyone told me how to wrote a sql to got the result
Absolutely. Simply get the row with the next highest date, and subtract.
and with a good performance ?
No. Relational databases are not really meant to be traversed linearly, and even using indexes appropriately would require a virtual linear traversal.