Which product sales is Increasing year wise? - sql

Table name is SALES
**PROD_ID** **YEAR** **QUANTITY**
P1 2012 50
P1 2013 40
P1 2014 30
P2 2012 20
P2 2013 30
P2 2014 40
Output should be P2 but how..?

How about this?
select prod_id
from sales
group by prod_id
having (sum(case when year = 2014 then quantity else 0 end) >
sum(case when year = 2012 then quantity else 0 end)
);

A slightly complex way to accomplish this with ctes.
Fiddle with sample data
with diff as (
select prod_id ,
case when quantity - nvl(lag(quantity) over(partition by prod_id order by yr),0) > 0
then 1 else 0 end as df
from sales
)
,totdiff as (select prod_id, sum(df) totdf from diff group by prod_id)
, totals as (select prod_id, count(*) cnt from sales group by prod_id)
select d.prod_id
from totdiff d join totals t on t.prod_id = d.prod_id and d.totdf = t.cnt
Edit: as suggested by #shawnt00 in the comments..the query could be simplified to
with diff as (
select prod_id ,
case when quantity - nvl(lag(quantity) over(partition by prod_id order by yr),0) > 0
then 1 else 0 end as df
from sales
)
select prod_id
from diff
group by prod_id
having count(*) = sum(df)

This question can be approached with 2 steps
First, create a column to calculate difference of current year sales from previous year using lag function from windows, and then another column to calculate the distinct number of years for each PROD_ID
Second, Group the data using a group by clause on PROD_ID and filter the correct products only if all the distinct years had a positive sales compared to last year.
Data Table -
+---------+------+-------+
| PROD_ID | Year | Sales |
+---------+------+-------+
| P1 | 2012 | 50 |
| P1 | 2013 | 40 |
| P1 | 2014 | 30 |
| P2 | 2012 | 20 |
| P2 | 2013 | 30 |
| P2 | 2014 | 40 |
+---------+------+-------+
Query -
select PROD_ID
from
(
select
PROD_ID, sales,
sales - LAG(sales,1,0) over (partition by PROD_ID order by year asc) as diff,
count(year) over (partition by PROD_ID) as num_of_years
from sales
) inner_tbl
group by PROD_ID,num_of_years
having SUM(CASE WHEN diff > 0 THEN 1 ELSE 0 END) = num_of_years
Innner query output -
+---------+--------+------+--------------+
| PROD_ID | sales | diff | num_of_years |
+---------+--------+------+--------------+
| P1 | 50 | 50 | 3 |
| P1 | 40 | -10 | 3 |
| P1 | 30 | -10 | 3 |
| P2 | 20 | 20 | 3 |
| P2 | 30 | 10 | 3 |
| P2 | 40 | 10 | 3 |
+---------+--------+------+--------------+
Final output -
+---------+
| PROD_ID |
+---------+
| P2 |
+---------+

I know its a very old question, posting answer since i was able to solve it in a different way.
create table sales (prod_id varchar(10), yr int, quantity int);
insert into sales values ('P1',2012 , 50);
insert into sales values ('P1', 2013, 40);
insert into sales values ('P1', 2014, 30);
insert into sales values ('P2', 2012, 20);
insert into sales values ('P2', 2013, 30);
insert into sales values ('P2', 2014, 40);
with next_year_sales as
(
select s.prod_id, s.yr, nvl(s1.yr,0) as prev_yr, s.quantity, nvl(s1.quantity,0) as prev_qty from sales s
left outer join sales s1 on s.prod_id = s1.prod_id and s.yr = s1.yr+1
),
flag_high_sales as
(
select prod_id, yr, case when prev_yr=0 then 1 when quantity > prev_qty then 1 else 0 end as flag from next_year_sales A
)
select prod_id, min(flag) from flag_high_sales group by prod_id having min(flag)=1;

I can think of 3 ways of doing it :
select a.prod_id from
(
select
prod_id,
CASE WHEN quantity > coalesce(lag(quantity) over(partition by prod_id order by year asc),0) THEN 1 ELSE 0 END as val
FROM
sales
) a
group by a.prod_id
having sum(a.val) = count(prod_id)
;
select a.prod_id from
(
select
prod_id,
quantity - coalesce(lag(quantity) over(partition by prod_id order by year asc),0) as val
FROM
sales
) a
group by a.prod_id
having min(a.val) >=0
;
select a.prod_id from
(
select
prod_id,
year - dense_rank() over(partition by prod_id order by quantity asc) as cal
FROM
sales
) a
group by a.prod_id
having count(distinct cal)=1
;

The following solution uses CTE and OVER CLAUSE -
WITH Sales_CTE
AS (
SELECT n1.Prod_ID AS n1Prod_ID
,COUNT(n1.Year) OVER (PARTITION BY n1.Prod_ID) AS #CountYn1
,COUNT(n2.Year) OVER (PARTITION BY n1.Prod_ID) AS #CountYn2
FROM #Q2 n1
LEFT JOIN #Q2 n2 ON n1.Prod_ID = n2.Prod_ID
AND (n1.Year + 1) = n2.Year
AND n1.Quantity < n2.Quantity
)
SELECT DISTINCT n1Prod_ID AS [Product ID]
FROM Sales_CTE
WHERE #CountYn1 = (#CountYn2 + 1);

Related

SQL join condition either A or B but not both A and B

I have sales data by years and quarters and for the last year I want to fill missing quarters with last available value.
Say we have source table:
+------+---------+-------+--------+
| year | quarter | sales | row_no |
+------+---------+-------+--------+
| 2018 | 1 | 4000 | 5 |
| 2018 | 2 | 6000 | 4 |
| 2018 | 3 | 5000 | 3 |
| 2018 | 4 | 3000 | 2 |
| 2019 | 1 | 8000 | 1 |
+------+---------+-------+--------+
Desired results:
+------+---------+-------+------------------------+
| year | quarter | sales | |
+------+---------+-------+------------------------+
| 2018 | 1 | 4000 | |
| 2018 | 2 | 6000 | |
| 2018 | 3 | 5000 | |
| 2018 | 4 | 3000 | |
| 2019 | 1 | 8000 | |
| 2019 | 2 | 8000 | <repeat the last value |
| 2019 | 3 | 8000 | <repeat the last value |
| 2019 | 4 | 8000 | <repeat the last value |
+------+---------+-------+------------------------+
So the task is to make Cartesian of year and quarter and left join to it the sales either corresponding or the last.
This code gets me almost there:
select r.year, k.quarter, t.sales
from (select distinct year from [MyTable]) r cross join
(select distinct quarter from [MyTable]) k left join
[MyTable] t
on (r.year = t.year and k.quarter=t.quarter) or row_no=1
How to correct the last line (condition of join) so thet the 2018 is not doubled?
One method uses outer apply:
select y.year, q.quarter, t.sales
from (select distinct year from [MyTable]) y cross join
(select distinct quarter from [MyTable]) q outer apply
(select top (1) t.*
from [MyTable] t
where t.year < y.year or
(t.year = y.year and t.quarter <= q.quarter)
order by t.year desc, t.quarter desc
) t;
For your volume of data, this should be fine.
A more efficient method -- assuming you are only assigning values to the end -- would be:
select y.year, q.quarter,
coalesce(t.sales, tdefault.sales)
from (select distinct year from [MyTable]) y cross join
(select distinct quarter from [MyTable]) q left join
[MyTable] t
on t.year = y.year and
t.quarter = q.quarter cross join
(select top (1) t.*
from [MyTable] t
order by t.year desc, t.quarter desc
) tdefault
A very different approach using a CTE and some window functions. This doesn't require 2 scans of the table, nor a triangular join.
WITH VTE AS(
SELECT *
FROM (VALUES (2018,1,4000,5),
(2018,2,6000,4),
(2018,3,5000,3),
(2018,4,3000,2),
(2019,1,8000,1)) V([Year],[Quarter],sales, row_no)),
CTE AS(
SELECT Y.Year,
Q.Quarter,
V.sales,
V.row_no,
COUNT(CASE WHEN V.sales IS NOT NULL THEN 1 END) OVER (ORDER BY Y.[Year], Q.[Quarter]
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS Grp
FROM (VALUES(2018),(2019)) Y([Year])
CROSS JOIN (VALUES(1),(2),(3),(4)) Q([Quarter])
LEFT JOIN VTE V ON Y.[Year] = V.[Year] AND Q.[Quarter] = V.[Quarter])
SELECT C.[Year],
C.[Quarter],
MAX(C.sales) OVER (PARTITION BY C.Grp) AS Sales
FROM CTE C;
This will only work on SQL Server 2012+ (as ROWS BETWEEN was introduced with SQL Server 2012), however, hopefully you're not using 2008- which are all (almost) entirely out of support.
I would simply do JOIN :
SELECT TT.YEAR, TT.Quarter, COALESCE(T.SALES, MAX(T.SALES) OVER (PARTITION BY TT.YEAR)) AS sales
FROM (SELECT DISTINCT T.YEAR, TT.Quarter
FROM [MyTable] T CROSS JOIN
( SELECT DISTINCT TT.Quarter FROM [MyTable] TT ) TT
) TT LEFT JOIN
[MyTable] T
ON TT.YEAR = T.YEAR AND TT.Quarter = T.Quarter;
EDIT : I just misread the question for additional quarters so, you need APPLY in OUTER JOIN :
SELECT TT.YEAR, TT.Quarter, COALESCE(T.SALES, T1.SALES) AS Sales
FROM (SELECT DISTINCT T.YEAR, TT.Quarter
FROM [MyTable] T CROSS JOIN
( SELECT DISTINCT TT.Quarter FROM [MyTable] TT ) TT
) TT LEFT JOIN
[MyTable] T
ON TT.YEAR = T.YEAR AND TT.Quarter = T.Quarter OUTER APPLY
( SELECT TOP (1) T.*
FROM [MyTable] T
WHERE T.YEAR = TT.YEAR
ORDER BY T.Quarter DESC
) T1;

display 3 or more consecutive rows(Sql)

I have a table with below data
+------+------------+-----------+
| id | date1 | people |
+------+------------+-----------+
| 1 | 2017-01-01 | 10 |
| 2 | 2017-01-02 | 109 |
| 3 | 2017-01-03 | 150 |
| 4 | 2017-01-04 | 99 |
| 5 | 2017-01-05 | 145 |
| 6 | 2017-01-06 | 1455 |
| 7 | 2017-01-07 | 199 |
| 8 | 2017-01-08 | 188 |
+------+------------+-----------+
now what i am trying to do is to display 3 consecutive rows where people were >=100 like this
+------+------------+-----------+
| id | date1 | people |
+------+------------+-----------+
| 5 | 2017-01-05 | 145 |
| 6 | 2017-01-06 | 1455 |
| 7 | 2017-01-07 | 199 |
| 8 | 2017-01-08 | 188 |
+------+------------+-----------+
can anyone help me how to do this query using oracle database. I am able to display rows which are above 100 but not in a consecutive way
Table creation(reducing typing time for people who will be helping)
CREATE TABLE stadium
( id int
, date1 date, people int
);
Insert into stadium values (
1,TO_DATE('2017-01-01','YYYY-MM-DD'),10);
Insert into stadium values
(2,TO_DATE('2017-01-02','YYYY-MM-DD'),109);
Insert into stadium values(
3,TO_DATE('2017-01-03','YYYY-MM-DD'),150);
Insert into stadium values(
4,TO_DATE('2017-01-04','YYYY-MM-DD'),99);
Insert into stadium values(
5,TO_DATE('2017-01-05','YYYY-MM-DD'),145);
Insert into stadium values(
6,TO_DATE('2017-01-06','YYYY-MM-DD'),1455);
Insert into stadium values
(7,TO_DATE('2017-01-07','YYYY-MM-DD'),199);
Insert into stadium values(
8,TO_DATE('2017-01-08','YYYY-MM-DD'),188);
Thanks in advance for the help
Assuming you mean >= 100, there are a couple of ways. One method just uses lead() and lag(). But a simple method defines each group >= 100 by the number of values < 100 before it. Then it uses count(*) to find the size of the consecutive values:
select s.*
from (select s.*, count(*) over (partition by grp) as num100pl
from (select s.*,
sum(case when people < 100 then 1 else 0 end) over (order by date) as grp
from stadium s
) s
) s
where num100pl >= 3;
Here is a SQL Fiddle showing that the syntax works.
You can use the following sql script to get the desired output.
WITH partitioned AS (
SELECT *, id - ROW_NUMBER() OVER (ORDER BY id) AS grp
FROM stadium
WHERE people >= 100
),
counted AS (
SELECT *, COUNT(*) OVER (PARTITION BY grp) AS cnt
FROM partitioned
)
select id , visit_date,people
from counted
where cnt>=3
I'm assuming that both the id and date columns are sequential and correspond to each other (there will need to be additional ROW_NUMBER() if the ids are not sequential with the dates, and more complex logic included if the dates are not necessarily sequential).
SELECT
*
FROM
(
SELECT
*
,COUNT(date) OVER (PARTITION BY sequential_group_num) AS num_days_in_sequence
FROM
(
SELECT
*
,(id - ROW_NUMBER() OVER (ORDER BY date)) AS sequential_group_num
FROM
stadium
WHERE
people >= 100
) AS subquery1
) AS subquery2
WHERE
num_days_in_sequence >= 3
That produces the following output:
id date people sequential_group_num num_days_in_sequence
----------- ---------- ----------- -------------------- --------------------
5 2017-01-05 145 2 4
6 2017-01-06 1455 2 4
7 2017-01-07 199 2 4
8 2017-01-08 188 2 4
By using joins we can display the consecutive rows like this
SELECT id, date1, people FROM stadium a WHERE people >= 100
AND (SELECT people FROM stadium b WHERE b.id = a.id + 1) >= 100
AND (SELECT people FROM stadium c WHERE c.id = a.id + 2) >= 100
OR people >= 100
AND (SELECT people FROM stadium e WHERE e.id = a.id - 1) >= 100
AND (SELECT people FROM stadium f WHERE f.id = a.id + 1) >= 100
OR people >= 100
AND (SELECT people FROM stadium g WHERE g.id = a.id - 1) >= 100
AND (SELECT people FROM stadium h WHERE h.id = a.id - 2) >= 100
order by id;
select distinct
t1.*
from
stadium t1
join
stadium t2
join
stadium t3
where
t1.people >= 100
and t2.people >= 100
and t3.people >= 100
and
(
(t1.id + 1 = t2.id
and t2.id + 1 = t3.id)
or
(
t2.id + 1 = t1.id
and t1.id + 1 = t3.id
)
or
(
t2.id + 1 = t3.id
and t3.id + 1 = t1.id
)
)
order by
id;
SQL script:
SELECT DISTINCT SS.*
FROM STADIUM SS
INNER JOIN
(SELECT S1.ID
FROM STADIUM S1
WHERE 3 = (
SELECT COUNT(1)
FROM STADIUM S2
WHERE (S2.ID=S1.ID OR S2.ID=S1.ID+1 OR S2.ID=S1.ID+2)
AND S2.PEOPLE >= 100
)) AS SS2
ON SS.ID>=SS2.ID AND SS.ID<SS2.ID+3
select *
from(
select * , count(*) over (partition by grp) as total
from
(select * , Sum(case when people < 100 then 1 else 0 end) over (order by date) as grp
from stadium) T -- inner Query 1
where people >=100 )S--inner query 2
where total >=3 --outer query
I wrote the following solution for this similar leetcode problem:
with groupVisitsOver100 as (
select *,
sum(
case
when people < 100 then 1
else 0
end
) over (order by date1) as visitGroups
from stadium
),
filterUnder100 as (
select
*
from groupVisitsOver100
where people >= 100
),
countGroupsSize as (
select
*,
count(*) over (partition by visitGroups) as groupsSize
from filterUnder100
)
select id, date1, people from countGroupsSize where groupsSize >= 3 order by date1

How to calculate average in SQL?

lets say I have the following table:
**FOOD** | **AMOUNT**
Bread | 2
Banana | 5
Pizza | 4
Apple | 57
Mandarin| 9
Orange | 8
Final result:
Bread | Percentage Of Total
Banana | percentage of total
etc
etc
I tried it in every single way, but couldn't find a solution. I hope someone can help me.
Using ANSI SQL (and SQL Server supports this syntax), you can do:
select food, sum(amount),
sum(amount) / sum(sum(amount)) over () as proportion_of_total
from t
group by food;
Note: Some databases do integer division, so you may need to convert to a floating point or fixed point type.
We can also try like below-
DECLARE #tbl AS TABLE
(
food VARCHAR(15)
,amount INT
)
INSERT INTO #tbl VALUES
('bread', 2)
,('banana', 5)
,('pizza', 4)
,('apple', 57)
,('mandarin', 9)
,('orange', 8)
SELECT
DISTINCT
food
,SUM(amount) OVER() TotalAmount
,SUM(amount) OVER (PARTITION BY food) PerFoodTotal
,CAST(SUM(amount) OVER (PARTITION BY food) * 100. / (SUM(amount) OVER()) AS DECIMAL(10,2)) [Percentage Of Total]
FROM #tbl
OUTPUT
food TotalAmount PerFoodTotal Percentage Of Total
--------------- ----------- ------------ ---------------------------------------
apple 85 57 67.06
banana 85 5 5.88
bread 85 2 2.35
mandarin 85 9 10.59
orange 85 8 9.41
pizza 85 4 4.71
(6 row(s) affected)
You can try something like this:
declare #tbl as table (
food varchar(15)
,amount int
)
insert into #tbl values
('bread', 2)
,('banana', 5)
,('pizza', 4)
,('apple', 57)
,('mandarin', 9)
,('orange', 8)
select SUM(amount) from #tbl
select
food
,SUM(amount) as [food amount]
,(SUM(cast(amount as numeric(18,2))) / (select sum(cast(amount as numeric(18,2))) from #tbl)) * 100 as [Percentage Of Total]
,(select sum(amount) from #tbl) as total
from #tbl
group by food
Here you got a way fo getting the PercentageOfTotal, asuming that the sum of all will not be 0
DECLARE #total INT = (SELECT SUM(AMOUNT) FROM Table1)
SELECT FOOD, CAST((CAST((100 * AMOUNT) AS DECIMAL (18,2)) / #total ) AS DECIMAL(18,2)) AS PercentageOfTotal from Table1
SQL Fiddle
MS SQL Server 2014 Schema Setup:
CREATE TABLE MusicGenres (name varchar(10)) ;
INSERT INTO MusicGenres (name)
VALUES ('Pop'),('Techno'),('Trance'),('trap'),('Hardcore'),('Electro') ;
CREATE TABLE Table2 (SongID int, MusicGenres varchar(10)) ;
INSERT INTO Table2 (SongID, MusicGenres)
VALUES (1,'Hardcore')
,(2,'Hardcore')
,(3,'Pop')
,(4,'Trap')
,(5,'Hardcore')
,(6,'Pop')
,(7,'Electro')
,(8,'Electro')
,(9,'Pop')
,(10,'Pop')
,(11,'Pop')
;
Query 1:
SELECT s1.name
, s1.recCount
, ( s1.recCount / CAST( ( SUM(recCount) OVER() ) AS decimal(5,2) ) )*100 AS pct
FROM (
SELECT m.name
, count(t.SongID) AS recCount
FROM MusicGenres m
LEFT OUTER JOIN Table2 t ON m.name = t.MusicGenres
GROUP BY m.name
) s1
Could be shortened to
SELECT m.name
, count(t.SongID) AS recCount
, ( count(t.SongID) / CAST( ( SUM(count(t.SongID)) OVER() ) AS decimal(5,2) )
)*100 AS pct
FROM MusicGenres m
LEFT OUTER JOIN Table2 t ON m.name = t.MusicGenres
GROUP BY m.name
Results:
| name | recCount | pct |
|----------|----------|---------|
| Electro | 2 | 18.1818 |
| Hardcore | 3 | 27.2727 |
| Pop | 5 | 45.4545 |
| Techno | 0 | 0 |
| Trance | 0 | 0 |
| trap | 1 | 9.0909 |

How can I generate a previous value table by combining fields from SQL table?

Let's say I have some data in a SQL Server database.
Location PayID Year
------------------------
Loc1 100 2010
Loc1 100 2011
Loc1 101 2012
Loc2 200 2010
Loc2 201 2011
Loc2 202 2012
And I am trying to write a query in SQL Server that will give me a table with two columns that I can search on to find out what the previous PayID for a particular Location. So the output would be.
PayID PrevID
-----------------
101 100
202 201
201 200
It only needs an entry when the previous year ID is different from the current year and I will query it recursively if I don't have the right match when a user goes back more than one year so It will pull each previous ID based on the one that was just pulled until it finds a PayID and Year matching the first table.
Any help on this would be much appreciated. I'll be attentively searching and will post a solution if I can find it.
This can be done pretty easily with a recursive CTE:
with cte as (
select Location, PayID, PayID as PrevID, Year from payhistory
union all
select p.Location, p.PayID, cte.PayID as PrevID, p.Year
from payhistory p
join cte on cte.Location = p.Location and cte.Year + 1 = p.Year
)
select distinct Location, PayID, PrevID
from cte
where PayID <> PrevID;
Here's the results I get:
| LOCATION | PAYID | PREVID |
|----------|-------|--------|
| Loc1 | 101 | 100 |
| Loc2 | 201 | 200 |
| Loc2 | 202 | 201 |
Demo: http://www.sqlfiddle.com/#!3/e0ac0/4
I didn't see a version specified, so I'd use LAG in 2012. You can filter the results if you want less info. You could replace LAG(PayID,1,NULL) with LAG(PayID,1,PayID) to alter the behavior of the first payid.
DECLARE #tbl TABLE (Location VARCHAR(4), PayID INT, Year INT)
INSERT INTO #tbl VALUES
('Loc1',100,2010)
,('Loc1',100,2011)
,('Loc1',101,2012)
,('Loc2',200,2010)
,('Loc2',201,2011)
,('Loc2',202,2012)
SELECT Location
,PayID
,LAG(PayID,1,NULL) OVER (PARTITION BY Location ORDER BY Year ASC) PrevID
FROM #tbl
http://www.sqlfiddle.com/#!6/e0ac0/2
Key assumption - This query will work only if the PayIds for each location are consecutive numbers.
select distinct l1.payId as PayId,
l2.payId as PrevId
from locs as l1
inner join locs as l2
on l1.location = l2.location
and l1.payid = (l2.payid + 1)
Following solution is non-recursive and it might offer a better performance:
DECLARE #Payment TABLE (
ID INT IDENTITY(1,1) PRIMARY KEY,
Location VARCHAR(50) NOT NULL,
PayID INT NOT NULL,
[Year] SMALLINT NOT NULL
);
INSERT #Payment
SELECT 'Loc1', 100, 2010
UNION ALL SELECT 'Loc1', 100, 2011
UNION ALL SELECT 'Loc1', 101, 2012
UNION ALL SELECT 'Loc2', 200, 2010
UNION ALL SELECT 'Loc2', 201, 2011
UNION ALL SELECT 'Loc2', 202, 2012
SELECT z.Location, z.GroupID,
MAX(CASE WHEN z.RowType = 1 THEN z.[Year] END) AS CurrentYear,
MAX(CASE WHEN z.RowType = 0 THEN z.[Year] END) AS PreviousYear,
MAX(CASE WHEN z.RowType = 1 THEN z.[PayID] END) AS CurrentPayID,
MAX(CASE WHEN z.RowType = 0 THEN z.[PayID] END) AS PreviousPayID
FROM
(
SELECT y.PayID, y.[Location], y.[Year],
-- It "groups" rows two by two: current row and previous row will have the same GroupID
(ROW_NUMBER() OVER(PARTITION BY y.Location ORDER BY y.RowNum + n.Num ASC) + 1) / 2 AS GroupID,
-- RowType: 1=Current row, 0=Previous row
ROW_NUMBER() OVER(PARTITION BY y.Location ORDER BY y.RowNum + n.Num ASC) % 2 AS RowType
FROM
(
SELECT x.Location, x.[Year], x.PayID, ROW_NUMBER() OVER(PARTITION BY x.Location ORDER BY x.[Year] DESC) RowNum
FROM #Payment x
) y
-- For every location, it duplicates every row except the last one
INNER JOIN (VALUES (1), (2)) n(Num) ON y.RowNum = 1 AND n.Num = 1 OR y.RowNum > 1
) z
GROUP BY z.Location, z.GroupID
HAVING MAX(CASE WHEN z.RowType = 1 THEN z.[Year] END) = MAX(CASE WHEN z.RowType = 0 THEN z.[Year] END) + 1
AND MAX(CASE WHEN z.RowType = 1 THEN z.[PayID] END) <> MAX(CASE WHEN z.RowType = 0 THEN z.[PayID] END)
ORDER BY z.Location;
Output:
Location GroupID CurrentYear PreviousYear CurrentPayID PreviousPayID
--------- ------- ----------- ------------ ------------ -------------
Loc1 1 2012 2011 101 100
Loc2 1 2012 2011 202 201
Loc2 2 2011 2010 201 200

Aggregating Several Columns in SQL

Suppose I have a table that looks like the following
id | location | dateHired | dateRehired | dateTerminated
1 | 1 | 10/1/2011 | NULL | 12/1/2011
2 | 1 | 10/3/2011 | 11/1/2011 | 12/31/2011
3 | 5 | 10/5/2011 | NULL | NULL
4 | 5 | 10/5/2011 | NULL | NULL
5 | 7 | 11/5/2011 | NULL | 12/1/2011
6 | 10 | 11/2/2011 | NULL | NULL
and I wanted to condense that into a summary table such that:
location | date | hires | rehires | terms
1 | 10/1/2011 | 1 | 0 | 0
1 | 10/3/2011 | 1 | 0 | 0
1 | 11/1/2011 | 0 | 1 | 0
1 | 12/1/2011 | 0 | 0 | 1
1 | 12/31/2011 | 1 | 0 | 0
5 | 10/5/2011 | 2 | 0 | 0
etc.
-- what would that SQL look like? I was thinking it would be something to the effect of:
SELECT
e.location
, -- ?
,SUM(CASE WHEN e.dateHired IS NOT NULL THEN 1 ELSE 0 END) AS Hires
,SUM(CASE WHEN e.dateRehired IS NOT NULL THEN 1 ELSE 0 END) As Rehires
,SUM(CASE WHEN e.dateTerminated IS NOT NULL THEN 1 ELSE 0 END) As Terms
FROM
Employment e
GROUP BY
e.Location
,--?
But I'm not real keen if that's entirely correct or not?
EDIT - This is for SQL 2008 R2.
Also,
INNER JOIN on the date columns assumes that there are values for all three categories, which is false; which is the original problem I was trying to solve. I was thinking something like COALESCE, but that doesn't really make sense either.
I am sure there is probably an easier, more elegant way to solve this. However, this is the simplest, quickest that I can think of this late that works.
CREATE TABLE #Temp
(
Location INT,
Date DATETIME,
HireCount INT,
RehireCount INT,
DateTerminatedCount INT
)
--This will keep us from having to do an insert if does not already exist
INSERT INTO #Temp (Location, Date)
SELECT DISTINCT Location, DateHired FROM Employment
UNION
SELECT DISTINCT Location, DateRehired FROM Employment
UNION
SELECT DISTINCT Location, DateTerminated FROM Employment
UPDATE #Temp
SET HireCount = Hired.HireCount
FROM #Temp
JOIN
(
SELECT Location, DateHired AS Date, SUM(*) AS HireCount
FROM Employment
GROUP BY Location, DateHired
) AS Hired
UPDATE #Temp
SET RehireCount= Rehire.RehireCount
FROM #Temp
JOIN
(
SELECT Location, DateRehired AS Date, SUM(*) AS RehireCount
FROM Employment
GROUP BY Location, DateRehired
) AS Rehire
ON Rehire.Location = #Temp.Location AND Rehire.Date = #Temp.Date
UPDATE #Temp
SET DateTerminatedCount = Terminated.DateTerminatedCount
FROM #Temp
JOIN
(
SELECT Location, DateTerminated AS Date, SUM(*) AS DateTerminatedCount
FROM Employment
GROUP BY Location, DateTerminated
) AS Terminated
ON Terminated.Location = #Temp.Location AND Terminated.Date = #Temp.Date
SELECT * FROM #Temp
How about something like:
with dates as (
select distinct location, d from (
select location, dateHired as [d]
from tbl
where dateHired is not null
union all
select location, dateRehired
from tbl
where dateRehired is not null
union all
select location, dateTerminated
from tbl
where dateTerminated is not null
)
)
select location, [d],
(
select count(*)
from tbl
where location = dates.location
and dateHired = dates.[d]
) as hires,
(
select count(*)
from tbl
where location = dates.location
and dateRehired = dates.[d]
) as rehires,
(
select count(*)
from tbl
where location = dates.location
and dateTerminated = dates.[d]
) as terms
from dates
I don't have a SQL server handy, or I'd test it out.
SELECT * FROM
(SELECT location, dateHired as date, COUNT(1) as hires FROM mytable GROUP BY location, date) H
INNER JOIN
(SELECT location, dateReHired as date, COUNT(1) as rehires FROM mytable GROUP BY location, date) R ON H.location = R.location AND H.dateHired = R.dateRehired
INNER JOIN
(SELECT location, dateTerminated as date, COUNT(1) as terminated FROM mytable GROUP BY location, date) T
ON H.location = T.location AND H.dateHired = T.dateTerminated