Partition by when NULL - sql

I have a table that looks like
Year Month ID Date Status
--------------------------------------
2013 8 99999 8/1/2013 Status A
2013 9 99999 NULL NULL
2013 10 99999 NULL NULL
2013 11 99999 NULL NULL
2013 12 99999 NULL NULL
2014 1 99999 NULL NULL
2014 2 99999 2/5/2014 Status B
2014 3 99999 NULL NULL
2014 4 99999 NULL NULL
2014 5 99999 NULL NULL
2014 6 99999 NULL NULL
2014 7 99999 NULL NULL
I want to add a column that will give me the number of the status, repeated until the next occurrence of a status, where it will add 1.
Result:
Year Month ID Date Status Value
--------------------------------------------
2013 8 99999 8/1/2013 Status A 1
2013 9 99999 NULL NULL 1
2013 10 99999 NULL NULL 1
2013 11 99999 NULL NULL 1
2013 12 99999 NULL NULL 1
2014 1 99999 NULL NULL 1
2014 2 99999 2/5/2014 Status B 2
2014 3 99999 NULL NULL 2
2014 4 99999 NULL NULL 2
2014 5 99999 NULL NULL 2
2014 6 99999 NULL NULL 2
2014 7 99999 NULL NULL 2
The Nulls are whats throwing me off...Thanks for the help!
Edit:
Here's my current query:
DECLARE #DateStart DATETIME
DECLARE #DateEnd DATETIME
SET #DateStart = '8/1/2013'
SET #DateEnd = '7/1/2014'
SELECT
P.Year, P.Month, P.ID,
PP.MaxStatusDate,
Status
FROM
(SELECT
*
FROM
(SELECT DISTINCT
year, Month
FROM
lu_Calendar
WHERE
Date BETWEEN #DateStart AND #DateEnd) AS A
CROSS JOIN
(SELECT DISTINCT
ID
FROM
dbo.StatusChangeData) AS B
) AS P
LEFT JOIN
(SELECT
yr, mnth, MaxStatusDate, Status, A.ID
FROM
(SELECT
ID, YEAR([ModifiedDate]) AS yr,
MONTH(ModifiedDate) AS mnth,
MAX([ModifiedDate]) AS MaxStatusDate
FROM
dbo.StatusChangeData
GROUP BY
ID, YEAR([ModifiedDate]), MONTH(ModifiedDate)) AS A
INNER JOIN
dbo.StatusChangeData sce ON sce.ID = A.ID AND A.MaxStatusDate = sce.[ModifiedDate]
) AS PP ON P.Month = pp.mnth AND P.YEAR = PP.yr AND P.ID = PP.ID
WHERE
P.ID = 99999

You can do this with a correlated subquery. Essentially, this counts the number of not-NULL values before any given value:
select scd.*,
(select count(*)
from StatusChangeData scd2
where scd2.id = scd.id and
scd2.status is not null and
scd2.year*100+scd2.month <= scd.year*100+scd.month
) as value
from StatusChangeData scd;

Related

SQL Server - complete results with non existent data

I have a table where I have all the customers and a table where I have all their restrictions.
CUSTOMER
customer_id customer_name
1 name 1
2 name 2
CUSTOMER_RESTRICTIONS
rest_type day_of_week hour_start hour_stop customer_id
TYPE1 0 08:00 12:00 1
TYPE1 0 13:00 17:00 1
TYPE2 0 17:00 23:59 1
Problem: I only have a record for a restriction type and a customer when the customer has a restriction and this is a problem in the visualization I want to build.
I need every customer, every day and every restriction type, even when there is no restriction. In that case hour_start and hour_stop would be NULL.
For the tables shown, the output would be
rest_type day_of_week hour_start hour_stop customer_id
TYPE1 0 08:00 12:00 1
TYPE1 0 08:00 12:00 1
TYPE1 1 NULL NULL 1
TYPE1 2 NULL NULL 1
TYPE1 3 NULL NULL 1
TYPE1 4 NULL NULL 1
TYPE1 5 NULL NULL 1
TYPE1 6 NULL NULL 1
TYPE1 1 NULL NULL 1
TYPE1 2 NULL NULL 1
TYPE1 3 NULL NULL 1
TYPE1 4 NULL NULL 1
TYPE1 5 NULL NULL 1
TYPE2 0 NULL NULL 1
TYPE2 1 NULL NULL 1
TYPE2 2 NULL NULL 1
TYPE2 3 NULL NULL 1
TYPE2 4 NULL NULL 1
TYPE2 5 NULL NULL 1
TYPE2 6 NULL NULL 1
TYPE1 0 NULL NULL 2
TYPE1 1 NULL NULL 2
TYPE1 2 NULL NULL 2
TYPE1 3 NULL NULL 2
TYPE1 4 NULL NULL 2
TYPE1 5 NULL NULL 2
TYPE1 6 NULL NULL 2
TYPE2 0 NULL NULL 2
TYPE2 1 NULL NULL 2
TYPE2 2 NULL NULL 2
TYPE2 3 NULL NULL 2
TYPE2 4 NULL NULL 2
TYPE2 5 NULL NULL 2
TYPE2 6 NULL NULL 2
How can I achieve that? I couldn't even start to build this query.
Essentially you need to start with the data you must have and left join the optional data. E.g., something like this:
select c.customer_id
,r.[rest_type]
,d.[day_of_week]
,r.[hour_start]
,r.[hour_stop]
from CUSTOMER c
cross apply (
select 0 as day_of_week
union all select 1
union all select 2
union all select 3
union all select 4
union all select 5
union all select 6
) d
left join CUSTOMER_RESTRICTIONS r on c.customer_id = r.customer_id and d.day_of_week = r.day_of_week
Output:
customer_id rest_type day_of_week hour_start hour_stop
----------- --------- ----------- ---------- ---------
1 TYPE1 0 08:00 12:00
1 TYPE1 0 13:00 17:00
1 TYPE2 0 17:00 23:59
1 NULL 1 NULL NULL
1 NULL 2 NULL NULL
1 NULL 3 NULL NULL
1 NULL 4 NULL NULL
1 NULL 5 NULL NULL
1 NULL 6 NULL NULL
If there are only type rest_types, you don't have a lookup table for them, and you want to show a row for each, you would do:
select c.customer_id
,t.[rest_type]
,d.[day_of_week]
,r.[hour_start]
,r.[hour_stop]
from CUSTOMER c
cross apply (
select 0 as day_of_week
union all select 1
union all select 2
union all select 3
union all select 4
union all select 5
union all select 6
) d
cross apply (
select 'TYPE1' as rest_type
union all select 'TYPE2'
) t
left join CUSTOMER_RESTRICTIONS r on c.customer_id = r.customer_id
and d.day_of_week = r.day_of_week
and t.rest_type = r.rest_type
(select rest_type, day_of_week,
hour_start ,
hour_stop
from table A
where rest_type IS NOT NULL)
Union
(select rest_type,
day_of_week,
NULL ,NULL
from table A
where rest_type IS NULL)
Is this what you want ?
First off, I wouldn't store rest type as you are, that is a bad habit, it should be a reference table!
You need to cross apply to get all your possible combinations, and then add in the values you DO have...
DECLARE #Customer TABLE (Id INT IDENTITY(1,1), Name NVARCHAR(100))
DECLARE #Rest TABLE (Id INT IDENTITY(1,1), Name NVARCHAR(100))
DECLARE #Restrictions TABLE (Id INT IDENTITY(1,1), RestID INT, CustomerID INT, Day_of_Week TINYINT, hour_start TIME, hour_end TIME)
INSERT INTO #Customer (NAME)
VALUES('JOHN'),('SUSAN')
INSERT INTO #Rest (NAME)
VALUES ('TYPE A'),('TYPE B')
INSERT INTO #Restrictions (RestID,CustomerID,Day_of_Week,hour_start,hour_end)
VALUES (1,1,0,'08:00','12:00'),
(1,1,0,'13:00','17:00'),
(1,2,0,'17:00','23:59')
;WITH DaysofWeek AS
(
SELECT 0 AS dow
UNION ALL
SELECT dow+1
FROM DaysofWeek
WHERE dow<5
)
SELECT *
FROM #Customer C
CROSS APPLY #Rest R
CROSS APPLY DaysofWeek D
LEFT JOIN #Restrictions X
ON X.Day_of_Week=D.dow
AND X.CustomerID=C.Id
AND X.RestID=R.Id
ORDER BY C.Id, D.dow, R.Id

Grouping problems using two CTEs

Code is providing correct numbers- Grouping is giving me a problem and this maybe a fundamental code chose issue. Query is as:
With
P as ( Select sum(r.qty) as proposed, rm.entity, id,
quarter (case When status = open then quarter = 1 ELSE 3 END) AS QUARTER,
year (case When status = open then year = 2017 ELSE 2016 END) AS YEAR,
FROM Db1
Group By proposed, quarter, id, entity, quarter, year)
A as ( Select sum(r.qty) as awarded, rm.entity, id,
quarter (case When status = open then quarter = 2 ELSE 4 END),
year(case When status = open then year = 2018 ELSE 2016 END)
From DB1
Group By proposed, quarter, id, entity, quarter, year
)
SELECT * FROM P right join a on p.id = a.id
Group By proposed, quarter, id, entity, quarter, year
My returns are something like:
ID p.Quarter p.Year a.Quarter a.Year Proposed Awarded
1 null null 1 2017 null 1
2 2 2018 3 2017 1 1
2 1 2018 4 2016 1 1
3 null null 2 2018 null 2
I want:
ID p.Quarter p.Year a.Quarter a.Year Proposed Awarded
1 null null 1 2017 null 1
2 2 2018 null null 1 null
2 1 2018 null null 1 null
2 null null 3 2017 null 1
2 null null 4 2017 null null
3 null null 2 2018 null 2
The problem is - If an ID has a proposed date, quantity, awarded date and quantity I want all of the years and quarters to be shown outside of the id grouping. So each awarded or proposed count will have it's own row. Otherwise the counts are coming in wrong.
I am pulling from two different databases and my Case statements are much more complex but adding that large amount of code seed irrelevant for this.

Retrieving data from specific columns of multiple rows in sql

I have a table which stores information as follows and has ID as the primary key:
Link for the table view: https://drive.google.com/file/d/0B4UzXmbWLTJZaU84WnVZMUJDT3M/view?usp=sharing
ID ScheduleDate WorkArea Employee1 Hours1 Employee2 Hours2 Employee3 Hours3 Employee4 Hours4 Employee5 Hours5 Employee6 Hours6 Employee7 Hours7 Employee8 Hours8
1 7/1/2014 W1 A 8 B 7 C 4 D 3 NULL 0 NULL 0 NULL 0 NULL 0
2 7/1/2014 W2 B 8 C 8 0 0 0 0 NULL
3 7/1/2014 W3 C 8 A 8 E 8 F 8 NULL NULL NULL NULL NULL NULL NULL NULL
4 7/1/2014 W4 D 8 B 8 F 8 NULL NULL NULL NULL NULL NULL
5 7/1/2014 W5 E 8 C 8 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
6 7/1/2014 W6 F 8 D 8 8 NULL NULL NULL NULL NULL NULL NULL NULL NULL
7 8/1/2014 W1 G 4 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
8 8/1/2014 W2 A 4 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
9 8/1/2014 W3 B 8 F 8 8 8 NULL NULL NULL NULL NULL NULL NULL NULL
For any particular ScheduleDate, there can be only one WorkArea (combination of ScheduleDate and WorkArea is unique). Now, I want to check if the sum of hours of the particular employee is greater than 8 hours or not in any single date. For example, the sum of hours of employee A for the particular date (can be hour1, hour2 or anything based on employee's placement in the table) should not be more than 8 hours. How do I check that?
Please help.
Thanks
You don't really strictly need the aliases on each of the union-ed queries:
with t as (
select ScheduleDate, Workarea, Employee1 as Employee, Hours1 as Hrs union all
select ScheduleDate, Workarea, Employee2 as Employee, Hours2 as Hrs union all
...
select ScheduleDate, Employee8 as Employee, Hours8 as Hrs
)
select ScheduleDate, Employee, sum(Hrs) as [Hours]
from t
group by ScheduleDate, Employee
having sum(Hrs) > 8

SQL Server Query to find CHI-SQUARE Values (Not Working)

I am trying to find the Chi-Square test from my following SQL Server Query on the sample data:
SELECT sessionnumber, sessioncount, timespent, expected, dev, dev*dev/expected as chi_square
FROM (SELECT clusters.sessionnumber, clusters.sessioncount, clusters.timespent,
(dim1.cnt * dim2.cnt * dim3.cnt)/(dimall.cnt*dimall.cnt) as expected,
clusters.cnt-(dim1.cnt * dim2.cnt * dim3.cnt)/(dimall.cnt*dimall.cnt) as dev
FROM clusters JOIN
(SELECT sessionnumber, SUM(cnt) as cnt FROM clusters
GROUP BY sessionnumber) dim1 ON clusters.sessionnumber = dim1.sessionnumber JOIN
(SELECT sessioncount, SUM(cnt) as cnt FROM clusters
GROUP BY sessioncount) dim2 ON clusters.sessioncount = dim2.sessioncount JOIN
(SELECT timespent, SUM(cnt) as cnt FROM clusters
GROUP BY timespent) dim3 ON clusters.timespent = dim3.timespent CROSS JOIN
(SELECT SUM(cnt) as cnt FROM clusters) dimall) a
My table has this sort of sample data:
sessionnumber sessioncount timespent cnt
1 17 28 NULL
2 22 8 NULL
3 1 1 NULL
4 1 1 NULL
5 8 111 NULL
6 8 65 NULL
7 11 5 NULL
8 1 1 NULL
9 62 64 NULL
10 6 42 NULL
The problem is that this query works fine but it gives wrong output or you can say no output at all. The output it gives my is like:
sessionnumber sessioncount timespent expected dev chi_square
1 17 28 NULL NULL NUL
2 22 8 NULL NULL NULL
3 1 1 NULL NULL NULL
4 1 1 NULL NULL NULL
5 8 111 NULL NULL NULL
6 8 65 NULL NULL NULL
7 11 5 NULL NULL NULL
8 1 1 NULL NULL NULL
9 62 64 NULL NULL NULL
10 6 42 NULL NULL NULL
How can I get rid of this problem because I tried my best at all! Thanks in advance telling me what I' doing wrong!
In your sample data, cnt is NULL, so the results are also NULL. You can replace these NULL values with a default value (1 for example, I don't know what is the context) using ISNULL, like
SELECT sessionnumber, SUM(ISNULL(cnt, 1)) as cnt FROM clusters GROUP BY sessionnumber

Showing one row for each calendar week in SQL

I have a SQL query which pulls unit sales by item, by week:
SELECT sls_vendor,
sls_item,
sls_units,
DATEPART(week, sls_week) AS sls_date
FROM mytable
Assume I'm looking at a 8 week period, but not every item/vendor combination has a full 8 weeks of sales. However I need my query to show a null value in that instance. So the query would return 8 rows for each item/vendor combination regardless of existence.
I tried creating a temp table which has the numbers 28 to 35 and performing a left join on the query above, but that doesn't show null values. The results are no different than running the original query alone.
I can think of how this would be done using a crosstab/pivot query, but isn't this something the join should be doing?
Edit: Updated to show my join query. Datetable just has 8 rows with 1 incremental number for each calendar week.
SELECT * FROM #datetable
LEFT JOIN
(SELECT
sls_vendor,
sls_item,
sls_units,
datepart(week,sls_week) AS sls_date
FROM mytable) AS QRY
ON temp_week = qry.sls_date
Your method should work just fine:
;with mytable as (
select 1 as sls_vendor, 'Test' as sls_item, 30 as sls_units, '8/7/2011' as sls_week union
select 1 as sls_vendor, 'Test' as sls_item, 30 as sls_units, '8/14/2011' as sls_week union
select 1 as sls_vendor, 'Test' as sls_item, 30 as sls_units, '8/21/2011' as sls_week
)
,datetable as (
select 28 as temp_week union
select 29 union
select 30 union
select 31 union
select 32 union
select 33 union
select 34 union
select 35
)
SELECT * FROM datetable
LEFT JOIN
(SELECT
sls_vendor,
sls_item,
sls_units,
datepart(week,sls_week) AS sls_date
FROM mytable) AS QRY
ON temp_week=qry.sls_date
Output:
temp_week sls_vendor sls_item sls_units sls_date
28 NULL NULL NULL NULL
29 NULL NULL NULL NULL
30 NULL NULL NULL NULL
31 NULL NULL NULL NULL
32 NULL NULL NULL NULL
33 1 Test 30 33
34 1 Test 30 34
35 1 Test 30 35
Edit: If you want to include all week values for every sales vendor, include a cross join with the distinct selection of vendors:
;with mytable as (
select 1 as sls_vendor, 'Test' as sls_item, 30 as sls_units, '8/7/2011' as sls_week union
select 2 as sls_vendor, 'Test' as sls_item, 30 as sls_units, '8/14/2011' as sls_week union
select 3 as sls_vendor, 'Test' as sls_item, 30 as sls_units, '8/21/2011' as sls_week
)
,datetable as (
select 28 as temp_week union
select 29 union
select 30 union
select 31 union
select 32 union
select 33 union
select 34 union
select 35
)
SELECT * FROM datetable
cross join (select distinct sls_vendor from mytable) v
LEFT JOIN
(SELECT
sls_vendor,
sls_item,
sls_units,
datepart(week,sls_week) AS sls_date
FROM mytable) AS QRY
ON temp_week=qry.sls_date and v.sls_vendor=qry.sls_vendor
Output:
temp_week sls_vendor sls_vendor sls_item sls_units sls_date
28 1 NULL NULL NULL NULL
29 1 NULL NULL NULL NULL
30 1 NULL NULL NULL NULL
31 1 NULL NULL NULL NULL
32 1 NULL NULL NULL NULL
33 1 1 Test 30 33
34 1 NULL NULL NULL NULL
35 1 NULL NULL NULL NULL
28 2 NULL NULL NULL NULL
29 2 NULL NULL NULL NULL
30 2 NULL NULL NULL NULL
31 2 NULL NULL NULL NULL
32 2 NULL NULL NULL NULL
33 2 NULL NULL NULL NULL
34 2 2 Test 30 34
35 2 NULL NULL NULL NULL
28 3 NULL NULL NULL NULL
29 3 NULL NULL NULL NULL
30 3 NULL NULL NULL NULL
31 3 NULL NULL NULL NULL
32 3 NULL NULL NULL NULL
33 3 NULL NULL NULL NULL
34 3 NULL NULL NULL NULL
35 3 3 Test 30 35
Does it work for you?
SELECT sls_vendor,
sls_item,
sls_units,
DATEPART(WEEK, sls_week) AS sls_date
FROM (
SELECT VALUE = 28 UNION ALL
SELECT VALUE = 29 UNION ALL
SELECT VALUE = 30 UNION ALL
SELECT VALUE = 31 UNION ALL
SELECT VALUE = 32 UNION ALL
SELECT VALUE = 33 UNION ALL
SELECT VALUE = 34 UNION ALL
SELECT VALUE = 35
) dates
LEFT JOIN mytable m
ON dates.value = DATEPART(WEEK, m.sls_week)
The following query works in Data.StackExchange. See here. It gets the top Post per week by score.
WITH weeksyears
AS (SELECT w.NUMBER AS week,
y.NUMBER AS year
FROM (SELECT v.NUMBER
FROM MASTER..spt_values v
WHERE TYPE = 'P'
AND v.NUMBER BETWEEN 1 AND 52) w,
(SELECT v.NUMBER
FROM MASTER..spt_values v
WHERE TYPE = 'P'
AND v.NUMBER BETWEEN 2008 AND 2012) y),
topPostPerWeek
AS (SELECT score,
Datepart(week, creationdate) week,
Datepart(YEAR, creationdate) YEAR,
Row_number() OVER (PARTITION BY Datepart(wk, creationdate),
Datepart(
YEAR,
creationdate) ORDER BY score DESC) row
FROM posts)
SELECT *
FROM weeksyears wy
LEFT JOIN topPostPerWeekt
ON wy.week = t.week
AND wy.YEAR = t.YEAR
WHERE row = 1
OR row IS NULL
ORDER BY wy.YEAR, wy.WEEK
​
Every row prior to the 38 week in 2008 is empty except for week and year. As well as the rows after the 35 week in 2011.
However if you edit the query and remove OR row IS NULL the query will act just as if it were an INNER JOIN
My guess is that there's somthing in your WHERE that's referring to the "RIGHT" table. Just add OR [rightTable.field] IS NULL and you'll be fine.