Why does sum totals differ in query? - sql

with totals as (select two_sim_ind,
case when account_type_key in(86,88,13,113,37,39,131) then 'B2C' else 'B2B' end as Segment,
case when subs_activation_date_key >= time_key then 'New' else 'Old' end as Sales_to_active,
time_key as TIME_KEY, name_region as REGION, area_name as DISTRICT, sim_type as USIM_FLAG,
case when lte_device = 1 or lte_ind in('Yes') or lte_user > 0 then '4G Device'
when ind_3g in('Yes') and lte_ind in('No') then '3G Device' else '2G Device' end as device_tech_support,
device_type as DEVICE_TYPE,
case when device_type in('Smartphone') then 'Yes' else 'No' end as is_Smartphone_AAB,
case when data_user in(0) then 'No' else 'Yes' end as is_Data_User,
case when lte_user in(0) then 'No' else 'Yes' end as is_4G_Data_User,
count(distinct subs_key) as A1M,
sum(total_revenue) as REVENUE_TOTAL,
sum(data_traffic_local_2g + data_traffic_local_3g+data_traffic_local_4g) as TRAFFIC_DATA_full_Sum,
sum(data_traffic_local_2g + data_traffic_local_3g) as g2_g3_traffic,
sum(data_traffic_local_4g) as Data_traffic_4g
from dwh.m as m
left join (select model_name, ind_3g, lte_ind
from dwh.w) as devices
on m.model_name = devices.model_name
where rtc_active_ind > 0 and subs_status_key in ('A', 'S')
and (time_key >= '2021-12-01' and time_key <= '2021-12-01')
group by two_sim_ind,
case when account_type_key in(86,88,13,113,37,39,131) then 'B2C' else 'B2B' end,
case when subs_activation_date_key >= time_key then 'New' else 'Old' end,
case when data_user in(0) then 'No' else 'Yes' end,
case when lte_device = 1 or lte_ind in('Yes') or lte_user > 0 then '4G Device'
when ind_3g in('Yes') and lte_ind in('No') then '3G Device' else '2G Device' end,
time_key, name_region, area_name, sim_type, device_type, lte_user)
select REGION, sum(REVENUE_TOTAL)
from totals
group by REGION
When I aggregate total of all records by region, value is significantly higher than it should be.
Result I get:
select name_region as REGION, sum(total_revenue)
from dwh.m
where (time_key >= '2021-12-01' and time_key <= '2021-12-01')
and rtc_active_ind > 0 and subs_status_key in ('A', 'S')
group by name_region;
Result of query and what I need:
Something happens to individual records when grouping by? What could be the reason?

Related

How to split data in SQL

I have the following code:
select
FeeEarnerID,
(
select
(select [name] from [User] AS u where u.userid=f.userid)
from
feeearner AS f
where
f.FeeEarnerID=aa.FeeEarnerID
) FeeEarner,
sum(aa.FEES) Fees,
sum(aa.DISB) Disbursements,
sum(aa.CREDITORS) Creditors
from
(
SELECT
FeeEarner.FeeEarnerID,
case when WIPTransaction.WIPTransactionTypeID IN (1,17,18,20,21,25) then WIPTransaction.Amount else 0 end 'FEES',
case when WIPTransaction.WIPTransactionTypeID IN (2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,26,27,28,29) then WIPTransaction.Amount else 0 end 'DISB',
case when WIPTransaction.WIPTransactionTypeID IN (24) then WIPTransaction.Amount else 0 end 'CREDITORS'
FROM
(
(
FeeEarner
JOIN
WIPTransaction ON FeeEarner.FeeEarnerID = WIPTransaction.FeeEarnerID
)
JOIN
WIPTransactionType ON WIPTransactionType.WIPTransactionTypeID = WIPTransaction.WIPTransactionTypeID
)
WHERE
(WIPTransaction.TransactionDate BETWEEN '2020-10-01' AND '2020-12-31')
)
AS aa
group by
FeeEarnerID
Used Table names: WIPtransaction, WIPtransactiontype, Feeearner
I want to display two more columns at the end of the output, namely: Invoiced and Uninvoiced.
The "Invoicenumber" field in the "WIPtransaction" database will be tested for this. If the "Invoicenumber" is NULL - the transaction amount will be added to a sum in the uninvoiced column and if "Invoicenumber" contains a number - the transaction amount will be added to a sum in the invoiced column.
What is the code that I would need to write and where would it be placed?
select
FeeEarnerID,
(
select
(select [name] from [User] AS u where u.userid=f.userid)
from
feeearner AS f
where
f.FeeEarnerID=aa.FeeEarnerID
) FeeEarner,
sum(aa.FEES) Fees,
sum(aa.DISB) Disbursements,
sum(aa.CREDITORS) Creditors,
----------
SUM( InvoicedAmount) AS InvoicedAmount,
SUM(UnInvoicedAmount) AS UnInvoicedAmount
----------
from
(
SELECT
FeeEarner.FeeEarnerID,
case when WIPTransaction.WIPTransactionTypeID IN (1,17,18,20,21,25) then WIPTransaction.Amount else 0 end 'FEES',
case when WIPTransaction.WIPTransactionTypeID IN (2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,26,27,28,29) then WIPTransaction.Amount else 0 end 'DISB',
case when WIPTransaction.WIPTransactionTypeID IN (24) then WIPTransaction.Amount else 0 end 'CREDITORS',
----------
CASE WHEN WIPTransaction.Invoicenumber IS NOT NULL THEN WIPTransaction.Amount END AS InvoicedAmount,
CASE WHEN WIPTransaction.Invoicenumber IS NULL THEN WIPTransaction.Amount END AS UnInvoicedAmount
----------
FROM
FeeEarner
JOIN
WIPTransaction ON FeeEarner.FeeEarnerID = WIPTransaction.FeeEarnerID
JOIN
WIPTransactionType ON WIPTransactionType.WIPTransactionTypeID = WIPTransaction.WIPTransactionTypeID
WHERE
WIPTransaction.TransactionDate BETWEEN '2020-10-01' AND '2020-12-31'
)
AS aa
group by
FeeEarnerID
You can remove your derived query and combine it all into one. The FeeEarner double sub-query can also be optimized:
select
FeeEarnerID,
(
select [name] from [User] AS u where u.userid=FeeEarner.userid
) FeeEarner,
sum(case when WIPTransaction.WIPTransactionTypeID IN (1,17,18,20,21,25) then WIPTransaction.Amount else 0 end) Fees,
sum(case when WIPTransaction.WIPTransactionTypeID IN (2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,26,27,28,29) then WIPTransaction.Amount else 0 end) Disbursements,
sum(case when WIPTransaction.WIPTransactionTypeID IN (24) then WIPTransaction.Amount else 0 end) Creditors,
SUM(CASE WHEN WIPTransaction.Invoicenumber IS NOT NULL THEN WIPTransaction.Amount END) AS InvoicedAmount,
SUM(CASE WHEN WIPTransaction.Invoicenumber IS NULL THEN WIPTransaction.Amount END) AS UnInvoicedAmount
FROM
FeeEarner
JOIN
WIPTransaction ON FeeEarner.FeeEarnerID = WIPTransaction.FeeEarnerID
JOIN
WIPTransactionType ON WIPTransactionType.WIPTransactionTypeID = WIPTransaction.WIPTransactionTypeID
WHERE
WIPTransaction.TransactionDate BETWEEN '2020-10-01' AND '2020-12-31'
group by
FeeEarnerID;

How do I GROUP the results of a query that is already grouped?

I have a query:
SELECT
CONVERT(varchar(7),SUBMITDATE, 120) as 'Month'
,CASE WHEN ReportType = '1' THEN (SELECT AVG(DATEDIFF(DAY,SUBMITDATE,DateClosed))) END as 'Report1Avg'
,CASE WHEN ReportType = '2' THEN (SELECT AVG(DATEDIFF(DAY,SUBMITDATE,DateClosed))) END as 'Report2Avg'
,CASE WHEN ReportType = '3' THEN (SELECT AVG(DATEDIFF(DAY,SUBMITDATE,DateClosed))) END as 'Report3Avg'
,CASE WHEN ReportType = '4' THEN (SELECT AVG(DATEDIFF(DAY,SUBMITDATE,DateClosed))) END as 'Report4Avg'
,CASE WHEN ReportType = '5' THEN (SELECT AVG(DATEDIFF(DAY,SUBMITDATE,DateClosed))) END as 'Report5Avg'
FROM Table1
WHERE STATUS = 'Closed'
GROUP BY CONVERT(varchar(7),SUBMITDATE, 120), ReportType
ORDER BY CONVERT(varchar(7),SUBMITDATE, 120)
Which produces the following result:
My question is: How do I consolidate the results of each month in one row?
ex. for '2015-06', I have 3 rows of results.
Is this possible?
Use avg around the case expression. Also use else 0 to avoid null values.
SELECT
CONVERT(varchar(7),SUBMITDATE, 120) as 'Month'
,AVG(CASE WHEN ReportType = '1' THEN DATEDIFF(DAY,SUBMITDATE,DateClosed) ELSE 0 END) as 'Report1Avg'
,AVG(CASE WHEN ReportType = '2' THEN DATEDIFF(DAY,SUBMITDATE,DateClosed) ELSE 0 END) as 'Report2Avg'
,AVG(CASE WHEN ReportType = '3' THEN DATEDIFF(DAY,SUBMITDATE,DateClosed) ELSE 0 END) as 'Report3Avg'
,AVG(CASE WHEN ReportType = '4' THEN DATEDIFF(DAY,SUBMITDATE,DateClosed) ELSE 0 END) as 'Report4Avg'
,AVG(CASE WHEN ReportType = '5' THEN DATEDIFF(DAY,SUBMITDATE,DateClosed) ELSE 0 END) as 'Report5Avg'
FROM Table1
WHERE STATUS = 'Closed'
GROUP BY CONVERT(varchar(7),SUBMITDATE, 120)
ORDER BY CONVERT(varchar(7),SUBMITDATE, 120)

Count, Having and Case statement

I'm looking to produce a count of projects based on 3 different conditions (DB2 database). I need to count all projects <= .10, >= .5, and >= 1.00 (percentage_used) but can only group by dim_building_id and building_name. Of course this query will not run because it requires percentage_used to be added to the group by. How do I handle those 3 conditions with percentage_used?
SELECT
SUM(CAST(FTS.GROUP_A AS BIGINT)) AS GROUP_A,
SUM(CAST(FTS.GROUP_B AS BIGINT)) AS GROUP_B,
SUM(CAST(FTS.GROUP_C AS BIGINT)) AS GROUP_C,
CASE WHEN FAT.PERCENTAGE_USED <= '0.10'
THEN COUNT(*)
END AS PROJECTS_L10,
CASE WHEN FAT.PERCENTAGE_USED >= '0.50'
THEN COUNT(*)
END AS PROJECTS_G50,
CASE WHEN FAT.PERCENTAGE_USED >= '1.00'
THEN COUNT(*)
END AS PROJECTS_G100,
DAYS(DATE('2014-07-01')) - DAYS(CURRENT DATE) AS DAYS_LEFT,
(DAYS(DATE('2014-07-01')) - DAYS(CURRENT DATE))/7 AS WEEKS_LEFT,
DAYS(DATE('2013-12-31')) - DAYS(CURRENT DATE) AS DAYS_LEFT_YEAR
FROM FACT_TABLE AS FAT
INNER JOIN GROUPS AS FTS ON FAT.DIM_PROJECT_ID = FTS.DIM_PROJECT_ID
GROUP BY FAT.DIM_BUILDING_ID, FAT.BUILDING_NAME;
I'd do something like this:
select bn.building_name ,
t.*
from ( select fat.building_id ,
sum( case when fat.percentage_used <= 0.10 then 1 else 0 end ) as group_a ,
sum( case when fat.percentage_used > 0.10 and fat.percentage_used < 0.50 then 1 else 0 end ) as group_b ,
sum( case when fat.percentage_used >= 0.50 and fat.percentage_used < 1.00 then 1 else 0 end ) as group_c ,
sum( case when fat.percentage_used >= 1.00 then 1 else 0 end ) as group_d ,
sum( case when fat.percentage_used is null then 1 else 0 end ) as group_e
from fact_table fat
join groups fts on tfs.dim_project_id = fat.dim_project_id
group by fat.building_id
) t
join fact_table bn on bn.building_id = t.building_id
Instead of putting the aggregate as the THEN, wrap the CASE statement in an aggregate:
SELECT
SUM(CAST(FTS.GROUP_A AS BIGINT)) AS GROUP_A,
SUM(CAST(FTS.GROUP_B AS BIGINT)) AS GROUP_B,
SUM(CAST(FTS.GROUP_C AS BIGINT)) AS GROUP_C,
SUM(CASE WHEN FAT.PERCENTAGE_USED <= '0.10' THEN 1 ELSE 0 END) AS PROJECTS_L10,
SUM(CASE WHEN FAT.PERCENTAGE_USED >= '0.50' THEN 1 ELSE 0 END) AS PROJECTS_G50,
SUM(CASE WHEN FAT.PERCENTAGE_USED >= '1.00' THEN 1 ELSE 0 END) AS PROJECTS_G100,
DAYS(DATE('2014-07-01')) - DAYS(CURRENT DATE) AS DAYS_LEFT,
(DAYS(DATE('2014-07-01')) - DAYS(CURRENT DATE))/7 AS WEEKS_LEFT,
DAYS(DATE('2013-12-31')) - DAYS(CURRENT DATE) AS DAYS_LEFT_YEAR
FROM FACT_TABLE AS FAT
INNER JOIN GROUPS AS FTS ON FAT.DIM_PROJECT_ID = FTS.DIM_PROJECT_ID
GROUP BY FAT.DIM_BUILDING_ID, FAT.BUILDING_NAME;
The ELSE 0 isn't needed, but some like to see it in the query.
Alternatively this should work as well:
COUNT(CASE WHEN FAT.PERCENTAGE_USED >= '0.50' THEN 1 END) AS PROJECTS_G50,

One date check for entire query

I have the following query:
select
fp.id,
fr.id,
sum(case
when to_date(fp.offered_date) BETWEEN TO_DATE( :ad_startdate, 'YYYY-MM-DD')
AND TO_DATE(:ad_enddate, 'YYYY-MM-DD') and fp.result <> 'E'
then 1
else 0
end) total,
sum(case when fp.result = 'G'
and to_date(fp.offered_date) >= :ad_startdate
and to_date(fp.offered_date) <= :ad_enddate then 1 else 0 end) colorgreen,
sum(case when fp.resultat = 'R'
and to_date(fp.offered_date) >= :ad_startdate
and to_date(fp.offered_date) <= :ad_enddate then 1 else 0 end) colorred
FROM
fruit_properties fp, fruit fr
WHERE
fp.id = fr.id
GROUP BY
fp.id, fr.id
I'm checking dates 1 time for each sum column and have a feeling this can be made once somehow? Right now if I check only once at the total column, then colorgreen + colorred might be larger than the total since it counts no matter what date they have.
Can my query be enhanced somehow?
you can simplify like this. but PLEASE check your SQL. you're mixing TO_DATE and CHAR datatypes. this will only end in disaster.
eg you have:
when to_date(fp.offered_date) BETWEEN TO_DATE( :ad_startdate, 'YYYY-MM-DD')
AND TO_DATE(:ad_enddate, 'YYYY-MM-DD')
vs
sum(case when fp.result = 'G'
and to_date(fp.offered_date) >= :ad_startdate
in one case you are TO_DATE'ing ad_startdate but not another (so is it a date already or not?). you are also TO_DATEing the column but crucially WITHOUT a format mask. is the column really a VARCHAR datatype? if so you really should not store dates as anything but DATEs.
anyway assuming the column is a DATE datatype and the binds are of type DATE..
select fruit_prop_Id,fruit_id,
sum(case when result != 'E' then within_offer else 0 end) total,
sum(case when result = 'R' then within_offer else 0 end) colorred,
sum(case when result = 'G' then within_offer else 0 end) colorgreen
from (select fp.id fruit_id,
fr.id fruit_prop_Id,
fp.result,
case
when fp.offered_date >= :ad_startdate
and fp.offered_date <= :ad_enddate then 1 else 0 end within_offer
from fruit_properties fp, fruit fr
where fp.id = fr.id)
group by fruit_id, fruit_prop_Id
You can put the date check in the where clause:
select
fp.id,
fr.id,
sum(case when and fp.result <> 'E' then 1 else 0 end) total,
sum(case when fp.result = 'G' then 1 else 0 end) colorgreen,
sum(case when fp.resultat = 'R' then 1 else 0 end) colorred
FROM
fruit_properties fp, fruit fr
WHERE
fp.id = fr.id
AND to_date(fp.offered_date) >= :ad_startdate
AND to_date(fp.offered_date) <= :ad_enddate
GROUP BY
fp.id, fr.id
Edit: as pointed out in the comments, this query will filter out ids which doesn't have any offer dates in the given interval.

Odd GROUP BY output DB2 - Results not as expected

If I run the following query:
select load_cyc_num
, crnt_dnlq_age_cde
, sum(cc_min_pymt_amt) as min_pymt
, sum(ec_tot_bal) as budget
, case when ec_tot_bal > 0 then 'Y' else 'N' end as budget
, case when ac_stat_cde in ('A0P','A1P','ARP','A3P') then 'Y' else 'N' end as arngmnt
, sum(sn_close_bal) as st_bal
from statements
where (sn_close_bal > 0 or ec_tot_bal > 0)
and load_cyc_num in (200911)
group by load_cyc_num
, crnt_dnlq_age_cde
, case when ec_tot_bal > 0 then 'Y' else 'N' end
, case when ac_stat_cde in ('A0P','A1P','ARP','A3P') then 'Y' else 'N' end
then I get the correct "BUDGET" grouping, but not the correct "ARRANGEMENT" grouping, only two rows have a "Y".
If I change the order of the case statements in the GROUP BY, then I get the correct grouping (full Y-N breakdown for both columns).
Am I missing something obvious?
Try moving
, sum(cc_min_pymt_amt) as min_pymt, sum(ec_tot_bal) as budget
to the end of the select statement, i.e.
select load_cyc_num,
crnt_dnlq_age_cde,
case when ec_tot_bal > 0 then 'Y' else 'N' end as budget,
case when ac_stat_cde in ('A0P','A1P','ARP','A3P') then 'Y' else 'N' end as arngmnt,
sum(sn_close_bal) as st_bal,
sum(cc_min_pymt_amt) as min_pymt,
sum(ec_tot_bal) as budget
from statements
where (sn_close_bal > 0 or ec_tot_bal > 0)and load_cyc_num in (200911)
group by load_cyc_num,
crnt_dnlq_age_cde,
case when ec_tot_bal > 0 then 'Y' else 'N' end ,
case when ac_stat_cde in ('A0P','A1P','ARP','A3P') then 'Y' else 'N' end