Aggregating Several Columns in SQL - sql

Suppose I have a table that looks like the following
id | location | dateHired | dateRehired | dateTerminated
1 | 1 | 10/1/2011 | NULL | 12/1/2011
2 | 1 | 10/3/2011 | 11/1/2011 | 12/31/2011
3 | 5 | 10/5/2011 | NULL | NULL
4 | 5 | 10/5/2011 | NULL | NULL
5 | 7 | 11/5/2011 | NULL | 12/1/2011
6 | 10 | 11/2/2011 | NULL | NULL
and I wanted to condense that into a summary table such that:
location | date | hires | rehires | terms
1 | 10/1/2011 | 1 | 0 | 0
1 | 10/3/2011 | 1 | 0 | 0
1 | 11/1/2011 | 0 | 1 | 0
1 | 12/1/2011 | 0 | 0 | 1
1 | 12/31/2011 | 1 | 0 | 0
5 | 10/5/2011 | 2 | 0 | 0
etc.
-- what would that SQL look like? I was thinking it would be something to the effect of:
SELECT
e.location
, -- ?
,SUM(CASE WHEN e.dateHired IS NOT NULL THEN 1 ELSE 0 END) AS Hires
,SUM(CASE WHEN e.dateRehired IS NOT NULL THEN 1 ELSE 0 END) As Rehires
,SUM(CASE WHEN e.dateTerminated IS NOT NULL THEN 1 ELSE 0 END) As Terms
FROM
Employment e
GROUP BY
e.Location
,--?
But I'm not real keen if that's entirely correct or not?
EDIT - This is for SQL 2008 R2.
Also,
INNER JOIN on the date columns assumes that there are values for all three categories, which is false; which is the original problem I was trying to solve. I was thinking something like COALESCE, but that doesn't really make sense either.

I am sure there is probably an easier, more elegant way to solve this. However, this is the simplest, quickest that I can think of this late that works.
CREATE TABLE #Temp
(
Location INT,
Date DATETIME,
HireCount INT,
RehireCount INT,
DateTerminatedCount INT
)
--This will keep us from having to do an insert if does not already exist
INSERT INTO #Temp (Location, Date)
SELECT DISTINCT Location, DateHired FROM Employment
UNION
SELECT DISTINCT Location, DateRehired FROM Employment
UNION
SELECT DISTINCT Location, DateTerminated FROM Employment
UPDATE #Temp
SET HireCount = Hired.HireCount
FROM #Temp
JOIN
(
SELECT Location, DateHired AS Date, SUM(*) AS HireCount
FROM Employment
GROUP BY Location, DateHired
) AS Hired
UPDATE #Temp
SET RehireCount= Rehire.RehireCount
FROM #Temp
JOIN
(
SELECT Location, DateRehired AS Date, SUM(*) AS RehireCount
FROM Employment
GROUP BY Location, DateRehired
) AS Rehire
ON Rehire.Location = #Temp.Location AND Rehire.Date = #Temp.Date
UPDATE #Temp
SET DateTerminatedCount = Terminated.DateTerminatedCount
FROM #Temp
JOIN
(
SELECT Location, DateTerminated AS Date, SUM(*) AS DateTerminatedCount
FROM Employment
GROUP BY Location, DateTerminated
) AS Terminated
ON Terminated.Location = #Temp.Location AND Terminated.Date = #Temp.Date
SELECT * FROM #Temp

How about something like:
with dates as (
select distinct location, d from (
select location, dateHired as [d]
from tbl
where dateHired is not null
union all
select location, dateRehired
from tbl
where dateRehired is not null
union all
select location, dateTerminated
from tbl
where dateTerminated is not null
)
)
select location, [d],
(
select count(*)
from tbl
where location = dates.location
and dateHired = dates.[d]
) as hires,
(
select count(*)
from tbl
where location = dates.location
and dateRehired = dates.[d]
) as rehires,
(
select count(*)
from tbl
where location = dates.location
and dateTerminated = dates.[d]
) as terms
from dates
I don't have a SQL server handy, or I'd test it out.

SELECT * FROM
(SELECT location, dateHired as date, COUNT(1) as hires FROM mytable GROUP BY location, date) H
INNER JOIN
(SELECT location, dateReHired as date, COUNT(1) as rehires FROM mytable GROUP BY location, date) R ON H.location = R.location AND H.dateHired = R.dateRehired
INNER JOIN
(SELECT location, dateTerminated as date, COUNT(1) as terminated FROM mytable GROUP BY location, date) T
ON H.location = T.location AND H.dateHired = T.dateTerminated

Related

Split Columns into two equal number of Rows

I have the table structure below,
I need to merge the CouponNumber to two equal as CouponNumber1 and CouponNumber2 as shown in the figure
SELECT Name, MobileNumber, CouponNumber, IsDispatched, Status
FROM CouponInvoicePrescription
This is my query.
Try this:
WITH
input(ord,name,mobno,couponno,isdispatched,status) AS (
SELECT 0,'amar',8888888888,'CPever901',FALSE,1
UNION ALL SELECT 1,'amar',8888888888,'CP00005' ,FALSE,1
UNION ALL SELECT 2,'pt3' ,7777777777,'cp9090' ,FALSE,1
UNION ALL SELECT 3,'pt3' ,7777777777,'ev2' ,FALSE,1
UNION ALL SELECT 4,'pt3' ,7777777777,'cp9909' ,FALSE,1
UNION ALL SELECT 5,'pt3' ,7777777777,'cp10' ,FALSE,1
)
SELECT
name
, MAX(CASE ord % 2 WHEN 1 THEN couponno END) AS couponno1
, MAX(CASE ord % 2 WHEN 0 THEN couponno END) AS couponno2
, isdispatched
, status
FROM input
GROUP BY
ord / 2
, name
, isdispatched
, status
ORDER BY 1
-- out name | couponno1 | couponno2 | isdispatched | status
-- out ------+-----------+-----------+--------------+--------
-- out amar | CP00005 | CPever901 | f | 1
-- out pt3 | cp10 | cp9909 | f | 1
-- out pt3 | ev2 | cp9090 | f | 1
Try this:
SELECT * FROM
(
SELECT
sub.rn,
sub.Name,
sub.MobileNumber,
sub.CouponNumber as CouponNumber1,
LEAD(sub.CouponNumber,1) OVER (PARTITION BY sub.MobileNumber ORDER BY sub.rn) as CouponNumber2,
sub.IsDispatched,
sub.Status
FROM
(
SELECT
ROW_NUMBER() OVER (PARTITION by MobileNumber ORDER BY Name) as rn,
*
FROM
input
) sub
)
WHERE rn % 2 <> 0

display 3 or more consecutive rows(Sql)

I have a table with below data
+------+------------+-----------+
| id | date1 | people |
+------+------------+-----------+
| 1 | 2017-01-01 | 10 |
| 2 | 2017-01-02 | 109 |
| 3 | 2017-01-03 | 150 |
| 4 | 2017-01-04 | 99 |
| 5 | 2017-01-05 | 145 |
| 6 | 2017-01-06 | 1455 |
| 7 | 2017-01-07 | 199 |
| 8 | 2017-01-08 | 188 |
+------+------------+-----------+
now what i am trying to do is to display 3 consecutive rows where people were >=100 like this
+------+------------+-----------+
| id | date1 | people |
+------+------------+-----------+
| 5 | 2017-01-05 | 145 |
| 6 | 2017-01-06 | 1455 |
| 7 | 2017-01-07 | 199 |
| 8 | 2017-01-08 | 188 |
+------+------------+-----------+
can anyone help me how to do this query using oracle database. I am able to display rows which are above 100 but not in a consecutive way
Table creation(reducing typing time for people who will be helping)
CREATE TABLE stadium
( id int
, date1 date, people int
);
Insert into stadium values (
1,TO_DATE('2017-01-01','YYYY-MM-DD'),10);
Insert into stadium values
(2,TO_DATE('2017-01-02','YYYY-MM-DD'),109);
Insert into stadium values(
3,TO_DATE('2017-01-03','YYYY-MM-DD'),150);
Insert into stadium values(
4,TO_DATE('2017-01-04','YYYY-MM-DD'),99);
Insert into stadium values(
5,TO_DATE('2017-01-05','YYYY-MM-DD'),145);
Insert into stadium values(
6,TO_DATE('2017-01-06','YYYY-MM-DD'),1455);
Insert into stadium values
(7,TO_DATE('2017-01-07','YYYY-MM-DD'),199);
Insert into stadium values(
8,TO_DATE('2017-01-08','YYYY-MM-DD'),188);
Thanks in advance for the help
Assuming you mean >= 100, there are a couple of ways. One method just uses lead() and lag(). But a simple method defines each group >= 100 by the number of values < 100 before it. Then it uses count(*) to find the size of the consecutive values:
select s.*
from (select s.*, count(*) over (partition by grp) as num100pl
from (select s.*,
sum(case when people < 100 then 1 else 0 end) over (order by date) as grp
from stadium s
) s
) s
where num100pl >= 3;
Here is a SQL Fiddle showing that the syntax works.
You can use the following sql script to get the desired output.
WITH partitioned AS (
SELECT *, id - ROW_NUMBER() OVER (ORDER BY id) AS grp
FROM stadium
WHERE people >= 100
),
counted AS (
SELECT *, COUNT(*) OVER (PARTITION BY grp) AS cnt
FROM partitioned
)
select id , visit_date,people
from counted
where cnt>=3
I'm assuming that both the id and date columns are sequential and correspond to each other (there will need to be additional ROW_NUMBER() if the ids are not sequential with the dates, and more complex logic included if the dates are not necessarily sequential).
SELECT
*
FROM
(
SELECT
*
,COUNT(date) OVER (PARTITION BY sequential_group_num) AS num_days_in_sequence
FROM
(
SELECT
*
,(id - ROW_NUMBER() OVER (ORDER BY date)) AS sequential_group_num
FROM
stadium
WHERE
people >= 100
) AS subquery1
) AS subquery2
WHERE
num_days_in_sequence >= 3
That produces the following output:
id date people sequential_group_num num_days_in_sequence
----------- ---------- ----------- -------------------- --------------------
5 2017-01-05 145 2 4
6 2017-01-06 1455 2 4
7 2017-01-07 199 2 4
8 2017-01-08 188 2 4
By using joins we can display the consecutive rows like this
SELECT id, date1, people FROM stadium a WHERE people >= 100
AND (SELECT people FROM stadium b WHERE b.id = a.id + 1) >= 100
AND (SELECT people FROM stadium c WHERE c.id = a.id + 2) >= 100
OR people >= 100
AND (SELECT people FROM stadium e WHERE e.id = a.id - 1) >= 100
AND (SELECT people FROM stadium f WHERE f.id = a.id + 1) >= 100
OR people >= 100
AND (SELECT people FROM stadium g WHERE g.id = a.id - 1) >= 100
AND (SELECT people FROM stadium h WHERE h.id = a.id - 2) >= 100
order by id;
select distinct
t1.*
from
stadium t1
join
stadium t2
join
stadium t3
where
t1.people >= 100
and t2.people >= 100
and t3.people >= 100
and
(
(t1.id + 1 = t2.id
and t2.id + 1 = t3.id)
or
(
t2.id + 1 = t1.id
and t1.id + 1 = t3.id
)
or
(
t2.id + 1 = t3.id
and t3.id + 1 = t1.id
)
)
order by
id;
SQL script:
SELECT DISTINCT SS.*
FROM STADIUM SS
INNER JOIN
(SELECT S1.ID
FROM STADIUM S1
WHERE 3 = (
SELECT COUNT(1)
FROM STADIUM S2
WHERE (S2.ID=S1.ID OR S2.ID=S1.ID+1 OR S2.ID=S1.ID+2)
AND S2.PEOPLE >= 100
)) AS SS2
ON SS.ID>=SS2.ID AND SS.ID<SS2.ID+3
select *
from(
select * , count(*) over (partition by grp) as total
from
(select * , Sum(case when people < 100 then 1 else 0 end) over (order by date) as grp
from stadium) T -- inner Query 1
where people >=100 )S--inner query 2
where total >=3 --outer query
I wrote the following solution for this similar leetcode problem:
with groupVisitsOver100 as (
select *,
sum(
case
when people < 100 then 1
else 0
end
) over (order by date1) as visitGroups
from stadium
),
filterUnder100 as (
select
*
from groupVisitsOver100
where people >= 100
),
countGroupsSize as (
select
*,
count(*) over (partition by visitGroups) as groupsSize
from filterUnder100
)
select id, date1, people from countGroupsSize where groupsSize >= 3 order by date1

Selecting records with maximum value in group

I have a transaction table with the following structure:
select t.[GUID], t.[ID], ts.Description "Status", t.Payee, t.Amount, t.SequenceNumber
from [Transaction] t
inner join TransactionStatus ts on t.StatusID = ts.ID
GUID | ID | Status | Payee | Amount | SequenceNumber
AF732CF5-E6C0-E411-B8F6-004056AB77C2 | 1 | Posted | Amy | 500.00 | 1
AF732CF5-E6C0-E411-B8F6-004056AB77C2 | 2 | Voided | Amy | 500.00 | 2
1F7D880C-E7C0-E411-B8F6-004056AB77C2 | 3 | Posted | Bob | 70.00 | 1
AF732CF5-E6C0-E411-B8F6-004056AB77C2 | 4 | Posted | Amy | 512.50 | 3
1F7D880C-E7C0-E411-B8F6-004056AB77C2 | 5 | Posted | Bob | 66.00 | 2
F2CC0B03-76C7-E411-A48D-004056AB787C | 6 | Pending | Carol | 240.00 | NULL
I'm trying to construct a query to group the records by GUID and select the single record with the largest SequenceNumber (if it isn't NULL):
GUID | ID | Status | Payee | Amount | SequenceNumber
AF732CF5-E6C0-E411-B8F6-004056AB77C2 | 4 | Posted | Amy | 512.50 | 3
1F7D880C-E7C0-E411-B8F6-004056AB77C2 | 5 | Posted | Bob | 66.00 | 2
F2CC0B03-76C7-E411-A48D-004056AB787C | 6 | Pending | Carol | 240.00 | NULL
I've tried adding this line:
where SequenceNumber = (select MAX(SequenceNumber) from [Transaction] t2 where t.[GUID] = t2.[GUID])
but that doesn't get me any transactions where the status is Pending (they don't have sequence numbers). How can I fix this query?
If it's SQL-Server you can use a CTE + ROW_NUMBER:
WITH CTE AS
(
select t.[GUID], t.[ID], ts.Description "Status", t.Payee, t.Amount, t.SequenceNumber,
rn = row_number() over (partition by t.[GUID] Order By t.SequenceNumber DESC)
from [Transaction] t
inner join TransactionStatus ts on t.StatusID = ts.ID
)
SELECT GUID, ID, Status, Payee, Amount, SequenceNumber
FROM CTE
WHERE rn = 1
This will include the row where SequenceNumber is null. If you want all rows with the maximum SequenceNumber(in case of ties) use DENSE_RANK instead of ROW_NUMBER.
You can calculate the MAX(ID) and it's related [GUID] in a subquery and JOIN to it in order to get the desired results:
Sample subquery:
SELECT [GUID] ,
MAX(ID) MaxId
FROM Transaction
GROUP BY [GUID]
Would produce:
GUID MaxId
1F7D880C-E7C0-E411-B8F6-004056AB77C2 5
AF732CF5-E6C0-E411-B8F6-004056AB77C2 4
F2CC0B03-76C7-E411-A48D-004056AB787C 6
Full Demo:
CREATE TABLE #Transaction
(
[GUID] VARCHAR(36) ,
[ID] INT ,
[Status] VARCHAR(7) ,
[Payee] VARCHAR(5) ,
[Amount] INT ,
[SequenceNumber] VARCHAR(4)
);
INSERT INTO #Transaction
( [GUID], [ID], [Status], [Payee], [Amount], [SequenceNumber] )
VALUES ( 'AF732CF5-E6C0-E411-B8F6-004056AB77C2', 1, 'Posted', 'Amy', 500.00,
'1' ),
( 'AF732CF5-E6C0-E411-B8F6-004056AB77C2', 2, 'Voided', 'Amy', 500.00,
'2' ),
( '1F7D880C-E7C0-E411-B8F6-004056AB77C2', 3, 'Posted', 'Bob', 70.00,
'1' ),
( 'AF732CF5-E6C0-E411-B8F6-004056AB77C2', 4, 'Posted', 'Amy', 512.50,
'3' ),
( '1F7D880C-E7C0-E411-B8F6-004056AB77C2', 5, 'Posted', 'Bob', 66.00,
'2' ),
( 'F2CC0B03-76C7-E411-A48D-004056AB787C', 6, 'Pending', 'Carol',
240.00, NULL );
SELECT #Transaction.*
FROM #Transaction
INNER JOIN ( SELECT [GUID] ,
MAX(ID) MaxId
FROM #Transaction
GROUP BY [GUID]
) t ON t.[GUID] = #Transaction.[GUID]
AND t.MaxId = #Transaction.ID
ORDER BY ID
Try this way to get maximum SequenceNumber
CASE WHEN MAX(SequenceNumber IS NULL) = 0 THEN MAX(SequenceNumber) ELSE NULL END AS SequenceNumber
I don't know if SQL Server has windowing functions, so you may be able to do this more cleanly, but here's a vanilla SQL solution:
select highest.[GUID],
highest.[ID],
ts.Description "Status",
highest.Payee,
highest.Amount,
highest.SequenceNumber
from [Transaction] highest
join TransactionStatus ts
on ts.ID = highest.ID
left join [Transaction] higher
on higher.[GUID] = highest.[GUID]
and higher.SequenceNumber > highest.SequenceNumber
where higher.[GUID] is null;
omething like this:
SELECT * FROM
(
select
t.[GUID], t.[ID], ts.Description "Status", t.Payee, t.Amount,
ROW_NUMBER() OVER PARTITION BY (t.[GUID]
ORDER BY t.SequenceNumber DESC) AS rownum
from [Transaction] t
inner join TransactionStatus ts on t.StatusID = ts.ID
)vals where vals.rownum = 1

make a select query with group by

This my table with sample data.
id | path | category (1-6) | secter_id | date
----------------------------------------------
1 | ddd | 5 | a | 10-01
2 | ddgg | 6 | a | 10-03
3 | fff | 5 | a | 10-02
I want to filter the latest category 5 and 6 rows for each sector id.
Expected result
id path | category| secter_id | date
--------------------------------------
2 | ddgg | 6 | a | 10-03
3 | fff | 5 | a | 10-02
Is this possible do only sql?
This query should do it for you
SELECT A.ID,
A.PATH,
A.CATEGORY,
A.SECTOR_ID,
A.dDATE
FROM yourTable A
INNER JOIN
(SELECT CATEGORY,
MAX(dDate) AS dDate
FROM yourTable
GROUP BY CATEGORY) B
ON A.CATEGORY = B.CATEGORY
AND A.dDate = B.dDate
Here is a SQLFiddle with the query
You can try with this code, is not elegant but it should work.
Select id,path,category,secter_id,date
FROM myTable a
INNER JOIN (SELECT category, MAX(date) date FROM myTable GROUP BY Category) b ON a.category = b.Category AND a.date = b.Date
WHERE A.Category IN (5,6)
You can try this -
SELECT id,path,category,secter_id, date
FROM
(
SELECT id,path,category,secter_id, date,
DENSE_RANK() OVER (PARTITION BY category ORDER BY DATE DESC) date_rank
FROM sample_table t
WHERE category in (5,6)
)
WHERE date_rank = 1;
try this
select path,category,secter_id,date from
(
select path,category,secter_id,date,dense_rank() over(PARTITION by category order by date desc)as rk
from tbl WHERE category in (5,6)
)data
where rk=1
select * from (
select
id, path , category, secter_id, date ,
row_number() over (partition by category order by date desc) as rnk
from your_table
)
where rnk = 1;
Try this
SELECT [id]
,[path]
,[category]
,[secter_id]
,[date]
FROM [MyTable]
WHERE date IN (SELECT MAX(date)
FROM [MyTable]
WHERE category IN (SELECT DISTINCT category FROM MyTable)
GROUP BY category)

T-sql rank for max and min value

I need help with a t-sql query.
I have a table with this structure:
id | OverallRank | FirstRank | SecondRank | Nrank..
1 | 10 | 20 | 30 | 5
2 | 15 | 24 | 12 | 80
3 | 10 | 40 | 37 | 12
I need a query that produces this kind of result:
When id: 1
id | OverallRank | BestRankLabel | BestRankValue | WorstRankLabel | WorkRankValue
1 | 10 | SecondRank | 30 | Nrank | 5
When id: 2
id | OverallRank | BestRankLabel | BestRankValue | WorstRankLabel | WorkRankValue
1 | 15 | FirstRank | 24 | SecondRank | 12
How can I do it?
Thanks in advance
with cte(id, RankValue,RankName) as (
SELECT id, RankValue,RankName
FROM
(SELECT id, OverallRank, FirstRank, SecondRank, Nrank
FROM ##input) p
UNPIVOT
(RankValue FOR RankName IN
(OverallRank, FirstRank, SecondRank, Nrank)
)AS unpvt)
select t1.id, max(case when RankName = 'OverallRank' then RankValue else null end) as OverallRank,
max(case when t1.RankValue = t2.MaxRankValue then RankName else null end) as BestRankName,
MAX(t2.MaxRankValue) as BestRankValue,
max(case when t1.RankValue = t3.MinRankValue then RankName else null end) as WorstRankName,
MAX(t3.MinRankValue) as WorstRankValue
from cte as t1
left join (select id, MAX(RankValue) as MaxRankValue from cte group by id) as t2 on t1.id = t2.id
left join (select id, min(RankValue) as MinRankValue from cte group by id) as t3 on t1.id = t3.id
group by t1.id
Working good with your test data. You should only edit RankName IN (OverallRank, FirstRank, SecondRank, Nrank) by adding right columns' names.
CASE
WHEN OverallRank > FirstRank and OverallRank > FirstSecondRand and OverallRank > nRank THEN 'OverallRank'
WHEN FirstRank > OverallRank ... THEN 'FirstRank'
END
This kind of query is why you should normalise your data.
declare #id int, #numranks int
select #id = 1, #numranks = 3 -- number of Rank columns
;with cte as
(
select *
from
(
select *,
ROW_NUMBER() over (partition by id order by rank desc) rn
from
(
select * from YourBadlyDesignedTable
unpivot (Rank for RankNo in (FirstRank, SecondRank, ThirdRank))u -- etc
) v2
) v1
where id=#id and rn in (1, #numranks)
)
select
tMin.id,
tMin.OverallRank,
tMin.RankNo as BestRankLabel,
tMin.Rank as BestRankValue,
tMax.RankNo as WorstRankLabel,
tMax.Rank as WorstRankValue
from (select * from cte where rn=1) tMin
inner join (select * from cte where rn>1) tMax
on tMin.id = tmax.id
You can take out the id = #id if you want all rows.