SQL sum only distinct value - sql

I have a table which contain the following columns:
plantId (pk)
unitId
maxCapacity
numberOfPlants
geographicalArea
agreementStart
agreementEnd
plantType
The table contain the following data:
1, A1, 10, 1, 3, 2013-01-01, 2013-12-31
2, A2, 10, 1, 3, 2013-01-01, 2013-12-31
3, A1, 10, 1, 3, 2013-03-01, 2013-12-31
I would like to query my table like this:
SELECT DISTINCT
plantType,
geographicalArea,
sum(maxCapacity) as maxCapacity,
sum(numberOfPlants) as numberOfPlants,
count(unitId) as idCount
FROM
tbl_plant
WHERE
(agreementStart <= '2013-11-28' AND
agreementEnd >= '2013-11-28') AND
plantType <> '0' AND
plantType = '2' AND
geographicalArea = '3'
GROUP BY
plantType, geographicalArea
This query works well as long as there is only one unique unitId returned per row in the resultset. However, if the same unitId is returned a second time (see A1 above), I do not want the sum(maxCapacity) and sum(numberOfPlants) include the sum a second time since it has already included it.
Any ideas of how I'd need to change this query?

Use inner query to get rid of duplicates
SELECT plantType, geographicalArea, sum(maxCapacity) as maxCapacity, sum(numberOfPlants) as numberOfPlants, count(unitId) as idCount
FROM tbl_plant
WHERE plantId in (
SELECT MAX(plantId)
FROM tbl_plant
WHERE (agreementStart <= '2013-11-28' AND agreementEnd >= '2013-11-28')
AND plantType <> '0'
AND plantType = '2'
AND geographicalArea = '3'
GROUP BY unitId
)
GROUP BY plantType, geographicalArea

Related

How to display the ID value in a SUM SQL query?

I have this select query, that summarize columns:
SELECT
SUM (LineTotal),
SUM (VatSum),
SUM (GTotal)
FROM CSI1
WHERE DocEntry = '100'
It shows the expected values correctly, but I need to display the id (DocEntry) value too.
Thanks for any advice!
You need a GROUP BY clause:
SELECT
DocEntry,
SUM(LineTotal) AS LineTotal,
SUM(VatSum) AS VatSum,
SUM(GTotal) AS GTotal
FROM (VALUES
('100', 1, 1, 1),
('100', 1, 1, 1),
('200', 1, 1, 1)
) CSI1 (DocEntry, LineTotal, VatSum, GTotal)
WHERE DocEntry = '100'
GROUP BY DocEntry
Result:
DocEntry LineTotal VatSum GTotal
--------------------------------
100 2 2 2

SQL - '1' IF hour in month EXISTS, '0' IF NOT EXISTS

I have a table that has aggregations down to the hour level YYYYMMDDHH. The data is aggregated and loaded by an external process (I don't have control over). I want to test the data on a monthly basis.
The question I am looking to answer is: Does every hour in the month exist?
I'm looking to produce output that will return a 1 if the hour exists or 0 if the hour does not exist.
The aggregation table looks something like this...
YYYYMM YYYYMMDD YYYYMMDDHH DATA_AGG
201911 20191101 2019110100 100
201911 20191101 2019110101 125
201911 20191101 2019110103 135
201911 20191101 2019110105 95
… … … …
201911 20191130 2019113020 100
201911 20191130 2019113021 110
201911 20191130 2019113022 125
201911 20191130 2019113023 135
And defined as...
CREATE TABLE YYYYMMDDHH_DATA_AGG AS (
YYYYMM VARCHAR,
YYYYMMDD VARCHAR,
YYYYMMDDHH VARCHAR,
DATA_AGG INT
);
I'm looking to produce the following below...
YYYYMMDDHH HOUR_EXISTS
2019110100 1
2019110101 1
2019110102 0
2019110103 1
2019110104 0
2019110105 1
... ...
In the example above, two hours do not exist, 2019110102 and 2019110104.
I assume I'd have to join the aggregation table against a computed table that contains all the YYYYMMDDHH combos???
The database is Snowflake, but assume most generic ANSI SQL queries will work.
You can get what you want with a recursive CTE
The recursive CTE generates the list of possible Hours. And then a simple left outer join gets you the flag for if you have any records that match that hour.
WITH RECURSIVE CTE (YYYYMMDDHH) as
(
SELECT YYYYMMDDHH
FROM YYYYMMDDHH_DATA_AGG
WHERE YYYYMMDDHH = (SELECT MIN(YYYYMMDDHH) FROM YYYYMMDDHH_DATA_AGG)
UNION ALL
SELECT TO_VARCHAR(DATEADD(HOUR, 1, TO_TIMESTAMP(C.YYYYMMDDHH, 'YYYYMMDDHH')), 'YYYYMMDDHH') YYYYMMDDHH
FROM CTE C
WHERE TO_VARCHAR(DATEADD(HOUR, 1, TO_TIMESTAMP(C.YYYYMMDDHH, 'YYYYMMDDHH')), 'YYYYMMDDHH') <= (SELECT MAX(YYYYMMDDHH) FROM YYYYMMDDHH_DATA_AGG)
)
SELECT
C.YYYYMMDDHH,
IFF(A.YYYYMMDDHH IS NOT NULL, 1, 0) HOUR_EXISTS
FROM CTE C
LEFT OUTER JOIN YYYYMMDDHH_DATA_AGG A
ON C.YYYYMMDDHH = A.YYYYMMDDHH;
If your timerange is too long you'll have issues with the cte recursing too much. You can create a table or temp table with all of the possible hours instead. For example:
CREATE OR REPLACE TEMPORARY TABLE HOURS (YYYYMMDDHH VARCHAR) AS
SELECT TO_VARCHAR(DATEADD(HOUR, SEQ4(), TO_TIMESTAMP((SELECT MIN(YYYYMMDDHH) FROM YYYYMMDDHH_DATA_AGG), 'YYYYMMDDHH')), 'YYYYMMDDHH')
FROM TABLE(GENERATOR(ROWCOUNT => 10000)) V
ORDER BY 1;
SELECT
H.YYYYMMDDHH,
IFF(A.YYYYMMDDHH IS NOT NULL, 1, 0) HOUR_EXISTS
FROM HOURS H
LEFT OUTER JOIN YYYYMMDDHH_DATA_AGG A
ON H.YYYYMMDDHH = A.YYYYMMDDHH
WHERE H.YYYYMMDDHH <= (SELECT MAX(YYYYMMDDHH) FROM YYYYMMDDHH_DATA_AGG);
You can then fiddle with the generator count to make sure you have enough hours.
You can generate a table with every hour of the month and LEFT OUTER JOIN your aggregation to it:
WITH EVERY_HOUR AS (
SELECT TO_CHAR(DATEADD(HOUR, HH, TO_DATE(YYYYMM::TEXT, 'YYYYMM')),
'YYYYMMDDHH')::NUMBER YYYYMMDDHH
FROM (SELECT DISTINCT YYYYMM FROM YYYYMMDDHH_DATA_AGG) t
CROSS JOIN (
SELECT ROW_NUMBER() OVER (ORDER BY NULL) - 1 HH
FROM TABLE(GENERATOR(ROWCOUNT => 745))
) h
QUALIFY YYYYMMDDHH < (YYYYMM + 1) * 10000
)
SELECT h.YYYYMMDDHH, NVL2(a.YYYYMM, 1, 0) HOUR_EXISTS
FROM EVERY_HOUR h
LEFT OUTER JOIN YYYYMMDDHH_DATA_AGG a ON a.YYYYMMDDHH = h.YYYYMMDDHH
Here's something that might help get you started. I'm guessing you want to have 'synthetic' [YYYYMMDD] values? Otherwise, if the value aren't there, then they shouldn't appear in the list
DROP TABLE IF EXISTS #_hours
DROP TABLE IF EXISTS #_temp
--Populate a table with hours ranging from 00 to 23
CREATE TABLE #_hours ([hour_value] VARCHAR(2))
DECLARE #_i INT = 0
WHILE (#_i < 24)
BEGIN
INSERT INTO #_hours
SELECT FORMAT(#_i, '0#')
SET #_i += 1
END
-- Replicate OP's sample data set
CREATE TABLE #_temp (
[YYYYMM] INTEGER
, [YYYYMMDD] INTEGER
, [YYYYMMDDHH] INTEGER
, [DATA_AGG] INTEGER
)
INSERT INTO #_temp
VALUES
(201911, 20191101, 2019110100, 100),
(201911, 20191101, 2019110101, 125),
(201911, 20191101, 2019110103, 135),
(201911, 20191101, 2019110105, 95),
(201911, 20191130, 2019113020, 100),
(201911, 20191130, 2019113021, 110),
(201911, 20191130, 2019113022, 125),
(201911, 20191130, 2019113023, 135)
SELECT X.YYYYMM, X.YYYYMMDD, X.YYYYMMDDHH
-- Case: If 'target_hours' doesn't exist, then 0, else 1
, CASE WHEN X.target_hours IS NULL THEN '0' ELSE '1' END AS [HOUR_EXISTS]
FROM (
-- Select right 2 characters from converted [YYYYMMDDHH] to act as 'target values'
SELECT T.*
, RIGHT(CAST(T.[YYYYMMDDHH] AS VARCHAR(10)), 2) AS [target_hours]
FROM #_temp AS T
) AS X
-- Right join to keep all of our hours and only the target hours that match.
RIGHT JOIN #_hours AS H ON H.hour_value = X.target_hours
Sample output:
YYYYMM YYYYMMDD YYYYMMDDHH HOUR_EXISTS
201911 20191101 2019110100 1
201911 20191101 2019110101 1
NULL NULL NULL 0
201911 20191101 2019110103 1
NULL NULL NULL 0
201911 20191101 2019110105 1
NULL NULL NULL 0
With (almost) standard sql, you can do a cross join of the distinct values of YYYYMMDD to a list of all possible hours and then left join to the table:
select concat(d.YYYYMMDD, h.hour) as YYYYMMDDHH,
case when t.YYYYMMDDHH is null then 0 else 1 end as hour_exists
from (select distinct YYYYMMDD from tablename) as d
cross join (
select '00' as hour union all select '01' union all
select '02' union all select '03' union all
select '04' union all select '05' union all
select '06' union all select '07' union all
select '08' union all select '09' union all
select '10' union all select '11' union all
select '12' union all select '13' union all
select '14' union all select '15' union all
select '16' union all select '17' union all
select '18' union all select '19' union all
select '20' union all select '21' union all
select '22' union all select '23'
) as h
left join tablename as t
on concat(d.YYYYMMDD, h.hour) = t.YYYYMMDDHH
order by concat(d.YYYYMMDD, h.hour)
Maybe in Snowflake you can construct the list of hours with a sequence much easier instead of all those UNION ALLs.
This version accounts for the full range of days, across months and years. It's a simple cross join of the set of possible days with the set of possible hours of the day -- left joined to actual dates.
set first = (select min(yyyymmdd::number) from YYYYMMDDHH_DATA_AGG);
set last = (select max(yyyymmdd::number) from YYYYMMDDHH_DATA_AGG);
with
hours as (select row_number() over (order by null) - 1 h from table(generator(rowcount=>24))),
days as (
select
row_number() over (order by null) - 1 as n,
to_date($first::text, 'YYYYMMDD')::date + n as d,
to_char(d, 'YYYYMMDD') as yyyymmdd
from table(generator(rowcount=>($last-$first+1)))
)
select days.yyyymmdd || lpad(hours.h,2,0) as YYYYMMDDHH, nvl2(t.yyyymmddhh,1,0) as HOUR_EXISTS
from days cross join hours
left join YYYYMMDDHH_DATA_AGG t on t.yyyymmddhh = days.yyyymmdd || lpad(hours.h,2,0)
order by 1
;
$first and $last can be packed in as sub-queries if you prefer.

Get the aggregated result of a GROUP BY for each value on WHERE clause in TSQL

I have a table in SQL Server with the following format
MType (Integer), MDate (Datetime), Status (SmallInt)
1, 10-05-2018, 1
1, 15-05-2018, 1
2, 25-3-2018, 0
3, 12-01-2018, 1
....
I want to get the MIN MDate for specific MTypes for future dates. In case there isn't one, then the MType should be returned but with NULL value.
Here is what I have done until now:
SELECT m.MType,
MIN(m.MDate)
FROM MyTypes m
WHERE m.MType IN ( 1, 2, 3, 4)
AND m.MDate > GETDATE()
AND m.Status = 1
GROUP BY m.MType
Obviously, the above will return only the following:
1, 10-05-2018
Since there are any other rows with future date and status equals to 1.
However, the results I want are:
1, 10-05-2018
2, NULL
3, NULL
4, NULL //this is missing in general from the table. No MType with value 4
The table is big, so performance is something to take into account. Any ideas how to proceed?
One way is to join the table to itself and filter the date in the ON clause.
SELECT a.Mtype, MIN(b.MDate)
FROM MyTypes a
LEFT JOIN MyTypes b
ON a.MType = b.MType
AND b.MDate > GETDATE()
AND b.Status = 1
WHERE a.MType IN ( 1, 2, 3)
GROUP BY a.MType
Here's a Demo.
I don't know what is logic behind but it seems to use of look-up tables
SELECT a.MType, l.MDate
FROM
(
values (1),(2),(3),(4)
)a (MType)
LEFT JOIN (
SELECT m.MType,
MIN(m.MDate) MDate
FROM MyTypes m
WHERE m.MDate > GETDATE()
AND m.Status = 1
GROUP BY m.MType
)l on l.MType = a.MType
Use a windows function and a union to a numbers table:
declare #t table (MType int, MDate datetime, [Status] smallint)
Insert into #t values (1, convert(date, '10-05-2018', 103), 1)
,(1, convert(date, '15-05-2018', 103), 1)
,(2, convert(date, '25-03-2018', 103), 0)
,(3, convert(date, '12-01-2018', 103), 1)
Select DISTINCT Mtype
, min(iiF(MDate>getdate() and status = 1, MDate, NUll)) over (Partition By Mtype) as MDate
from ( SELECT TOP 10000 row_number() over(order by t1.number) as MType
, '1900-01-01' as MDate, 0 as [Status]
FROM master..spt_values t1
CROSS JOIN master..spt_values t2
union
Select Mtype, MDate, [Status] from #t
) x
where MType in (1,2,3,4)
order by x.MType

Getting rid of grouping field

Is there a safe way to not have to group by a field when using an aggregate in another field? Here is my example
SELECT
C.CustomerName
,D.INDUSTRY_CODE
,CASE WHEN D.INDUSTRY_CODE IN ('003','004','005','006','007','008','009','010','017','029')
THEN 'PM'
WHEN UPPER(CustomerName) = 'ULINE INC'
THEN 'ULINE'
ELSE 'DR'
END AS BU
,ISNULL((SELECT SUM(GrossAmount)
where CONVERT(date,convert(char(8),InvoiceDateID )) between DATEADD(yy, DATEDIFF(yy, 0, GETDATE()) - 1, 0) and DATEADD(year, -1, GETDATE())),0) [PREVIOUS YEAR GROSS]
FROM factMargins A
LEFT OUTER JOIN dimDate B ON A.InvoiceDateID = B.DateId
LEFT OUTER JOIN dimCustomer C ON A.CustomerID = C.CustomerId
LEFT OUTER JOIN CRCDATA.DBO.CU10 D ON D.CUST_NUMB = C.CustomerNumber
GROUP BY
C.CustomerName,D.INDUSTRY_CODE
,A.InvoiceDateID
order by CustomerName
before grouping I was only getting 984 rows but after grouping by the A.InvoiceDateId field I am getting over 11k rows. The rows blow up since there are multiple invoices per customer. Min and Max wont work since then it will pull data incorrectly. Would it be best to let my application (crystal) get rid of the extra lines? Usually I like to have my base data be as close as possible to how the report will layout if possible.
Try moving the reference to InvoiceDateID to within an aggregate function, rather than within a selected subquery's WHERE clause.
In Oracle, here's an example:
with TheData as (
select 'A' customerID, 25 AMOUNT , trunc(sysdate) THEDATE from dual union
select 'B' customerID, 35 AMOUNT , trunc(sysdate-1) THEDATE from dual union
select 'A' customerID, 45 AMOUNT , trunc(sysdate-2) THEDATE from dual union
select 'A' customerID, 11000 AMOUNT , trunc(sysdate-3) THEDATE from dual union
select 'B' customerID, 12000 AMOUNT , trunc(sysdate-4) THEDATE from dual union
select 'A' customerID, 15000 AMOUNT , trunc(sysdate-5) THEDATE from dual)
select
CustomerID,
sum(amount) as "AllRevenue"
sum(case when thedate<sysdate-3 then amount else 0 end) as "OlderRevenue",
from thedata
group by customerID;
Output:
CustomerID | AllRevenue | OlderRevenue
A | 26070 | 26000
B | 12035 | 12000
This says:
For each customerID
I want the sum of all amounts
and I want the sum of amounts earlier than 3 days ago

Results of multiple queries with aggregates combined

I have 2 seperate select statements, using aggregate functions in each. I would like to be able to take the results and combine them.
table_a
id int
entered_date datetime (holds utc stamp)
balance money
group_id int
table_b
id int
entered_date date
balance money
transaction_type int
query 1:
select convert(date,entered_date), sum(balance) as earned
from table_a
where group_id in (1, 2, 3, 4)
group by convert(date,entered_Date)
query 2:
select convert(date,entered_date), sum(balance) as spent
where transaction_type = 2
group by convert(date,entered_Date)
results:
query 1:
2012-05-13, 5000
2012-05-14, 12000
...
query 2:
2012-05-13, 9000
2012-05-14, 55856
...
I would like to return one row for each record without using temp tables. The result set should have a date, then earned vs. spent. I have a report running using union to get the totals and that is fine, but i need to generate a result set with 1 record and a earned vs against line. Any help with this is appreciated.
Try:
;With AllDates AS
(
select convert(date,entered_date) As EnteredDate
from table_a
where group_id in (1, 2, 3, 4)
group by convert(date,entered_Date)
UNION
select convert(date,entered_date)
from table_b
where transaction_type = 2
group by convert(date,entered_Date)
)
, AllEarned AS (
select convert(date,entered_date) AS EnteredDate, sum(balance) as Earned
from table_a
where group_id in (1, 2, 3, 4)
group by convert(date,entered_Date)
)
, AllSpent AS (
select convert(date,entered_date) AS EnteredDate, sum(balance) as Spent
from table_b
where transaction_type = 2
group by convert(date,entered_Date)
)
SELECT
d.EnteredDate, e.Earned, s.Spent
FROM AllDates d
LEFT OUTER JOIN AllEarned e ON d.EnteredDate=e.EnteredDate
LEFT OUTER JOIN AllSpent s ON d.EnteredDate=s.EnteredDate
ORDER BY 1,2,3
You can combine these using logic, assuming that both are from the same table
(the second query is missing the from statement):
select convert(date,entered_date),
sum(case when group_id in (1, 2, 3, 4) then balance end) as earned,
sum(case when transaction_type = 2 then balance end) as spend
from table_a
group by convert(date,entered_Date)
SELECT
CASE WHEN a.a_date IS NULL THEN b.a_date ELSE a.a_date END as a_data,
a.earned,
b.spent
FROM
(select
convert(date,entered_date) as a_date,
sum(balance) as earned
from table_a
where group_id in (1, 2, 3, 4)
group by entered_Date) A
FULL OUTER JOIN
(select
convert(date,entered_date) as a_date,
sum(balance) as spent
from table_a
where transaction_type = 2
group by entered_Date) B
ON A.a_date=b.a_date
Or using FULL OUTER JOIN if there are data that don't meet both conditions. And using CASE WHEN a.a_date IS NULL THEN b.a_date ELSE a.a_date END as a_data
Assuming earned amounts are from table_a and spent amounts are from table_b,
; WITH a AS (
select entered_date=convert(date,entered_date), balance as earned, 0 AS spent
from table_a
where group_id in (1, 2, 3, 4)
UNION ALL
select entered_date=convert(date,entered_date), 0 AS earned, balance as spent
from table_b
where transaction_type = 2
)
SELECT entered_date
, earned=SUM(earned)
, spent=SUM(spent)
FROM a
GROUP BY entered_date