SQL group by in Subquery - sql

I'm trying to get monthly production using group by after converting the unix column into regular timestamp. Can you please tell how to use group by here in the code.
'''
With production(SystemId, dayof, monthof, yearof, powerwatts, productionwattshours) as
(
Select SystemId,
[dayof] = DAY(hrdtc),
[monthof] = MONTH(hrdtc),
[yearof] = YEAR(hrdtc),
powerwatts, productionwatthours
from (
Select * , dateadd(s, UnixTime, '19700101') as hrdtc from meterreading ) ds
)
Select * from production
where systemId = 2368252
'''

I think you're looking for this (technically you don't need a subquery but it allows you to avoid repeating the DATEADD() expression):
SELECT SystemId = 2368252,
[Month] = DATEFROMPARTS(YEAR(hrdtc), MONTH(hrdtc), 1),
powerwatts = SUM(powerwatts),
productionwatthours = SUM(productionwatthours)
FROM
(
SELECT powerwatts, productionwatthours,
DATEADD(SECOND, UnixTime, '19700101') as hrdtc
FROM dbo.enphasemeterreading
WHERE systemId = 2368252
) AS ds
GROUP BY DATEFROMPARTS(YEAR(hrdtc), MONTH(hrdtc), 1);
If you want to also avoid repeating the GROUP BY expression:
SELECT SystemId = 2368252,
[Month],
powerwatts = SUM(powerwatts),
productionwatthours = SUM(productionwatthours)
FROM
(
SELECT [Month] = DATEFROMPARTS(YEAR(hrdtc), MONTH(hrdtc), 1),
powerwatts, productionwatthours
FROM
(
SELECT powerwatts, productionwatthours,
DATEADD(SECOND, UnixTime, '19700101') as hrdtc
FROM dbo.enphasemeterreading
WHERE systemId = 2368252
) AS ds1
) AS ds2
GROUP BY [Month];
Personally I don't think that's any prettier or clearer. A couple of other tips:
Spell it out; shorthand is lazy and problematic
Always qualify tables and other objects with schema
Updated requirement (please state these up front): How would I join this query to another table?
SELECT * FROM dbo.SomeOtherTable AS sot
INNER JOIN
(
SELECT SystemId = 2368252,
[Month],
powerwatts = SUM(powerwatts),
productionwatthours = SUM(productionwatthours)
FROM
...
GROUP BY [Month]
) AS agg
ON sot.SystemId = agg.SystemId;

Related

Parametrizing dates in SQL IN clause - using cell magic in jupyter notebook

Using MSSQL db as the backend, I have a cell in my notebook with this sql which works fine.
%%sql
select * from (
select count(*) as CNT, COL1,CONVERT(VARCHAR,CAST(CREATED_DATE AS date)) as dt from TABLE1
group by COL1,CONVERT(VARCHAR,CAST(CREATED_DATE AS date))
)t
PIVOT
(
sum(CNT) for [dt] in ([2022-07-25],[2022-07-26])
) AS PivotTable
I am trying to parameterize the [IN] clause in the pivot.
Tried a few things, but without much success
import pandas as pd
from datetime import datetime
rng = pd.date_range(end = datetime.today(), periods = 5).strftime('%Y-%m-%d').tolist()
#rng = format(','.join('[{}]'.format(i) for i in rng))
#rng = pd.date_range(end = datetime.today().date(), periods = 5)
print (rng)
['2022-07-26', '2022-07-27', '2022-07-28', '2022-07-29', '2022-07-30']
select * from (
select count(*) as CNT, COL1,CONVERT(VARCHAR,CAST(CREATED_DATE AS date)) as dt from TABLE1
group by COL1,CONVERT(VARCHAR,CAST(CREATED_DATE AS date))
)t
PIVOT
(
sum(CNT) for [dt] in (:rng)
) AS PivotTable
* mssql+pymssql://---
(pymssql._pymssql.ProgrammingError) (102, b"Incorrect syntax near '('.DB-Lib error message 20018, severity 15:\nGeneral SQL Server error: Check messages from the SQL Server\n")
[SQL: select * from (
select count(*) as CNT, COL1,CONVERT(VARCHAR,CAST(CREATED_DATE AS date)) as dt from TABLE1
group by COL1,CONVERT(VARCHAR,CAST(CREATED_DATE AS date))
)t
PIVOT
(
sum(CNT) for [dt] in (%(rng)s)
) AS PivotTable]
[parameters: {'rng': ['2022-07-26', '2022-07-27', '2022-07-28', '2022-07-29', '2022-07-30']}]
(Background on this error at: https://sqlalche.me/e/14/f405)
any ideas on how I can achieve this. I will try creating the entire query dynamically but it will be much better if I can pass the dates alone into the query.
thanks for your time.

SQL select distinct from a concatenated column

This query almost does what I want
SELECT staging.dbo.ITEM_CODES.ITEM_CODE, MAX(dbo.OC_VDAT_AUX.UDL40) AS SAMPLEDATE,
CONCAT(RTRIM(dbo.OC_VDATA.UDL1), RTRIM(dbo.OC_VDATA.UDL6)) as LinkID
FROM dbo.OC_VDATA
INNER JOIN dbo.OC_VDAT_AUX ON dbo.OC_VDATA.PARTNO = dbo.OC_VDAT_AUX.PARTNOAUX AND dbo.OC_VDATA.DATETIME = dbo.OC_VDAT_AUX.DATETIMEAUX
INNER JOIN stagingPLM.dbo.ITEM_CODES ON LEFT(dbo.OC_VDATA.PARTNO, 12) = staging.dbo.ITEM_CODES.SPEC_NO
AND LEFT(dbo.OC_VDAT_AUX.PARTNOAUX, 12) = stagingPLM.dbo.ITEM_CODES.SPEC_NO
INNER JOIN stagingPLM.dbo.PLANTS ON dbo.OC_VDATA.UDL1 = staging.dbo.PLANTS.PLANT_CODE
WHERE (CONVERT(DATETIME, dbo.OC_VDAT_AUX.UDL40) > DATEADD(day, - 30, GETDATE()))
GROUP BY CONCAT(RTRIM(dbo.OC_VDATA.UDL1), RTRIM(dbo.OC_VDATA.UDL6)),staging.dbo.ITEM_CODES.ITEM_CODE
Sample Table generated by query:
The end result that I am trying to achieve is the latest ITEM_CODE per unique LinkID Note the first and last rows in the table. The last row should not be pulled by the query.
How do I modify this query to make that happen?
I have tried various placements for DISTINCT and sub queries in the select and where statements.
I would do in your case with ROW_NUMBER window function and CTE.
Solution can be like this:
WITH FilterCTE AS
(
SELECT staging.dbo.ITEM_CODES.ITEM_CODE, MAX(dbo.OC_VDAT_AUX.UDL40) AS SAMPLEDATE,
CONCAT(RTRIM(dbo.OC_VDATA.UDL1), RTRIM(dbo.OC_VDATA.UDL6)) AS LinkID,
ROW_NUMBER() OVER (PARTITION BY CONCAT(RTRIM(dbo.OC_VDATA.UDL1), RTRIM(dbo.OC_VDATA.UDL6)) ORDER BY MAX(dbo.OC_VDAT_AUX.UDL40)) AS RowNumber
FROM dbo.OC_VDATA
INNER JOIN dbo.OC_VDAT_AUX ON dbo.OC_VDATA.PARTNO = dbo.OC_VDAT_AUX.PARTNOAUX AND dbo.OC_VDATA.DATETIME = dbo.OC_VDAT_AUX.DATETIMEAUX
INNER JOIN stagingPLM.dbo.ITEM_CODES ON LEFT(dbo.OC_VDATA.PARTNO, 12) = staging.dbo.ITEM_CODES.SPEC_NO
AND LEFT(dbo.OC_VDAT_AUX.PARTNOAUX, 12) = stagingPLM.dbo.ITEM_CODES.SPEC_NO
INNER JOIN stagingPLM.dbo.PLANTS ON dbo.OC_VDATA.UDL1 = staging.dbo.PLANTS.PLANT_CODE
WHERE (CONVERT(DATETIME, dbo.OC_VDAT_AUX.UDL40) > DATEADD(day, - 30, GETDATE()))
GROUP BY CONCAT(RTRIM(dbo.OC_VDATA.UDL1), RTRIM(dbo.OC_VDATA.UDL6)),staging.dbo.ITEM_CODES.ITEM_CODE
)
SELECT *
FROM FilterCTE
WHERE RowNumber = 1

The query I ran returns 2 of the same column which isn't allowed in tableau and I can't fix the query

I need to be able to get rid of one of the workdate and one of the sr_name columns but I can't figure out how to.
I'm getting the query returned like this:
Query
I'm also getting this error message when entered into tableau:
The column 'sr_name' was specified multiple times for 'Custom SQL Query'.
Below is the code I have. If I remove a sr_name from either subquery there will be an error in the join clause.
select *
from
(
select s.sr_name, cast(punchdatetime as date) as workdate,
((datediff(second, min(case when p.InOut = 1 then punchdatetime end),
max(case when p.InOut = 0 then punchdatetime end))/3600) - .5) as
hoursworked
from PunchClock p join ServiceReps s on p.ServRepID = s.ServRepID
where punchyear >= 2019
group by s.sr_name, cast(punchdatetime as date)
) v
join
(
select sr_name, t.*,
calls = (select count(*) from CRM_Correspondence cr where
cast(cr.DateCreated as date) = workdate and StatusType like '%call%' and
cr.ServRepID = t.servrepid),
reaches = (select count(*) from CRM_Correspondence cr where
cast(cr.DateCreated as date) = workdate and (StatusType = 'call reached'
or StatusType like '%SCHEDULE%') and cr.ServRepID = t.servrepid),
books = (select count(*) from os_appointments o where cast(o.DateCreated
as date) = workdate and isnull(o.confirmedby, o.booked_by) =
t.servrepid),
attends = (select count(*) from os_appointments o where
cast(o.DateCreated as date) = workdate and isnull(o.confirmedby,
o.booked_by) = t.servrepid and o.appointmentStatus = 'attended')
from
(
select cast(cor.datecreated as date) workdate, cor.ServRepID
from CRM_Correspondence cor
where cor.datecreated > '2019-01-01'
group by cast(cor.datecreated as date), cor.servrepid
) t
join ServiceReps sr on t.ServRepID = sr.ServRepID
) u on v.sr_name = u.sr_name and v.workdate = u.workdate
I need the same results just without the duplicate column so I can enter the query into tableau.
This is challenging because you have so many subqueries here. This could be refactored to use a single query which is what I would do. But using the existing query you could do something along these lines.
I had to format this very differently so I could isolate each piece.
select v.sr_name
, v.workdate
, v.hoursworked
, u.ServRepID
, u.calls
, u.reaches
, u.books
, u.attends
from
(
select s.sr_name
, cast(punchdatetime as date) as workdate
, ((datediff(second, min(case when p.InOut = 1 then punchdatetime end), max(case when p.InOut = 0 then punchdatetime end))/3600) - .5) as hoursworked
from PunchClock p
join ServiceReps s on p.ServRepID = s.ServRepID
where punchyear >= 2019
group by s.sr_name
, cast(punchdatetime as date)
) v
join
(
select sr_name
, t.*
, calls =
(
select count(*)
from CRM_Correspondence cr
where cast(cr.DateCreated as date) = workdate
and StatusType like '%call%'
and cr.ServRepID = t.servrepid
)
, reaches =
(
select count(*)
from CRM_Correspondence cr
where cast(cr.DateCreated as date) = workdate
and (StatusType = 'call reached' or StatusType like '%SCHEDULE%')
and cr.ServRepID = t.servrepid
)
, books =
(
select count(*)
from os_appointments o
where cast(o.DateCreated as date) = workdate and isnull(o.confirmedby, o.booked_by) = t.servrepid
)
, attends =
(
select count(*)
from os_appointments o
where cast(o.DateCreated as date) = workdate
and isnull(o.confirmedby, o.booked_by) = t.servrepid
and o.appointmentStatus = 'attended'
)
from
(
select cast(cor.datecreated as date) workdate
, cor.ServRepID
from CRM_Correspondence cor
where cor.datecreated > '2019-01-01'
group by cast(cor.datecreated as date)
, cor.servrepid
) t
join ServiceReps sr on t.ServRepID = sr.ServRepID
) u on v.sr_name = u.sr_name
and v.workdate = u.workdate

Running Total as SUM () OVER () in SQL when values are missing

I'm trying to calculate Running Total and it works correct but only when values I'm conditioning on are available. When some are unavailable, calculation is going wrong, some NULLs happen and at the end Running Total is incorrect, here's the example of such situation:
and I would like to be set like on the screen shot below (with missing months added), which should give correct Running Total (named backlog here) at the end:
Is there any way to define full_year and month_number columns to be visible with '0' value set when there was no value?
My current query is as below:
IF OBJECT_ID('tempdb..#Temp4') IS NOT NULL BEGIN
drop table #Temp4
end
SELECT * into #Temp4
from (
SELECT
datepart(yy, t3.[datestamp]) AS full_year
,datepart(mm, t3.[datestamp]) AS month_number
,count(*) as number_of_activities
,t2.affected_item
FROM [sm70prod].[dbo].[ACTSVCMGTM1] AS t3
JOIN [sm70prod].[dbo].[INCIDENTSM1] AS t2 ON t3.number = t2.incident_id
WHERE
t2.affected_item like 'service'
AND (t3.[type] LIKE 'Open')
GROUP BY t2.affected_item, datepart(yy, t3.[datestamp]), datepart(mm, t3.[datestamp])
)
as databases (full_year, month_number, number_of_activities, affected_item)
;
IF OBJECT_ID('tempdb..#Temp5') IS NOT NULL BEGIN
drop table #Temp5
end
SELECT * into #Temp5
from (
SELECT
datepart(yy, t3.[datestamp]) AS full_year
,datepart(mm, t3.[datestamp]) AS month_number
,count(*) as number_of_activities
,t2.affected_item
FROM [sm70prod].[dbo].[ACTSVCMGTM1] AS t3
JOIN [sm70prod].[dbo].[INCIDENTSM1] AS t2 ON t3.number = t2.incident_id
WHERE
t2.affected_item like 'service'
AND (t3.[type] LIKE 'Closed')
GROUP BY t2.affected_item, datepart(yy, t3.[datestamp]), datepart(mm, t3.[datestamp])
)
as databases (full_year, month_number, number_of_activities, affected_item)
select * from (select o.full_year
,o.month_number
,o.number_of_activities as [open]
,c.number_of_activities as [close]
,sum(o.number_of_activities - c.number_of_activities) over (ORDER BY c.full_year, c.month_number) as [backlog]
from #Temp4 o full join #Temp5 c on o.full_year = c.full_year and o.month_number = c.month_number) as sub
order by full_year, month_number
https://msdn.microsoft.com/en-gb/library/ms190349%28v=sql.110%29.aspx
Try using the coalesce function:
COALESCE(someattribute, 0);
if the attribute is NULL the value zero will be used instead.
Also note:
When comparing varchars without a regex you should use the = operator and not the LIKE operator.
I found the answer for that. For missing months I can use the code as below:
If(OBJECT_ID('tempdb..#Temp6') Is Not Null)
Begin
Drop Table #Temp6
End
create table #Temp6
(
full_year int
,month_number int
)
; WITH cteStartDate AS
(SELECT StartDate = '2007-01-01'),
cteSequence(SeqNo)
AS (SELECT 0
UNION ALL
SELECT SeqNo + 1
FROM cteSequence
WHERE SeqNo < DATEDIFF(MM,(SELECT StartDate FROM cteStartDate),getdate()))
INSERT INTO #Temp6
SELECT datepart(yy, DATEADD(MM,SeqNo,(SELECT StartDate FROM cteStartDate))) AS full_year
,datepart(mm, DATEADD(MM,SeqNo,(SELECT StartDate FROM cteStartDate))) AS month_number
FROM cteSequence
OPTION (MAXRECURSION 0)
Then I can use full join with #Temp4, add ISNULL(o.number_of_activities, 0), the same for #Temp5 and it will work.

How to join the next date value of the same table

I have a table in SQL with the following fields:
The timestamp field will have all the punches that an employee has in a day.
So having the following data:
I need to create 2 diferent queries.
need to select all the IN timestamps with their corresponding next OUT timestamp
need to select all the OUT timestamps with their corresponding previous IN timestamp
So, in the first query, I should get the following:
In the second query, I should get the following:
Any clue on how to build such queries?
HERE IS THE Fiddle: http://sqlfiddle.com/#!6/a137d/1
I believe this is what you're looking for. These queries should work on most DBMSs.
First
SELECT ea1.employeeid, ea1.timestamp AS instamp, ea2.timestamp AS outstamp
FROM employee_attendance ea1
LEFT JOIN employee_attendance ea2
ON ea2.employeeid=ea1.employeeid
AND ea2.accesscode = 'OUT'
AND ea2.timestamp = (
SELECT MIN(ea3.timestamp)
FROM employee_attendance ea3
WHERE ea3.timestamp > ea1.timestamp
AND ea3.employeeid = ea1.employeeid
)
WHERE ea1.accessCode = 'IN'
AND ea1.employeeid = 4;
Second
SELECT ea1.employeeid, ea1.timestamp AS outstamp, ea2.timestamp AS instamp
FROM employee_attendance ea1
LEFT JOIN employee_attendance ea2
ON ea2.employeeid=ea1.employeeid
AND ea2.accesscode = 'IN'
AND ea2.timestamp = (
SELECT MIN(ea3.timestamp)
FROM employee_attendance ea3
WHERE ea3.timestamp < ea1.timestamp
AND ea3.employeeid = ea1.employeeid
AND ea3.timestamp > ISNULL((
SELECT MAX(ea4.timestamp)
FROM employee_attendance ea4
WHERE ea4.accesscode = 'OUT'
AND ea4.timestamp < ea1.timestamp
AND ea4.employeeid = ea1.employeeid
), '2000-1-1')
)
WHERE ea1.accessCode = 'OUT'
AND ea1.employeeid = 4;
This looks like nice example for usage of LEAD, LAG ANALYTIC functions in SQL 2012.
SELECT * FROM
(
SELECT EMPLOYEEID, TIMESTAMP,
LEAD(timestamp) OVER (ORDER BY TIMESTAMP
) OUTTIMESTAMP, ACCESSCODE
FROM [dbo].[employee_attendance]
WHERE EMPLOYEEID =4
) T
where T.ACCESSCODE ='IN'
second query
SELECT * FROM
(
SELECT EMPLOYEEID, TIMESTAMP,
LAG(timestamp) OVER (ORDER BY TIMESTAMP
) INTIMESTAMP, ACCESSCODE
FROM [dbo].[employee_attendance]
WHERE EMPLOYEEID =4
) T
where T.ACCESSCODE ='OUT'