I have a table of 2 columns like below. One ID can have multiple Code.
╔═════════╗
║ ID Code ║
╠═════════╣
║ 1 0 ║
║ 1 16 ║
║ 1 41 ║
║ 2 1 ║
║ 2 40 ║
║ 3 41 ║
║ 3 67 ║
║ 4 70 ║
║ 5 16 ║
║ 5 67 ║
║ 6 41 ║
║ 6 68 ║
╚═════════╝
My desired outcome is identifying each ID belongs to which Group, by checking all Code of that ID. The checking rule is:
╔══════════════════════════════════╦═══════╗
║ Code ║ Group ║
╠══════════════════════════════════╬═══════╣
║ Contains 0 ║ M1 ║
║ Contains 1 or 16 or 40 ║ M2 ║
║ Contains 41 or 67 ║ M3 ║
║ Contains 68 or 70 ║ M4 ║
║ Contains codes of both M2 and M3 ║ M5 ║
║ Contains codes of both M3 and M4 ║ M6 ║
╚══════════════════════════════════╩═══════╝
Note: If ID has Code 0, Group is M1, then stop checking other rules.
The required output should looks like this:
╔══════════╗
║ ID Group ║
╠══════════╣
║ 1 M1 ║
║ 2 M2 ║
║ 3 M3 ║
║ 4 M4 ║
║ 5 M5 ║
║ 6 M6 ║
╚══════════╝
What I have tried so far is using STUFF and FOR XML PATH studied from this thread:
SELECT *,
STUFF((SELECT DISTINCT ', ' + Code FROM tblFee WHERE ID = t.ID FOR XML PATH ('')), 1, 1, '') AS Group
FROM (
SELECT DISTINCT ID FROM tblFee
) t
to have the result likes:
╔══════╦═══════════╗
║ Code ║ Group ║
╠══════╬═══════════╣
║ 1 ║ 0, 16, 41 ║
║ 2 ║ 1, 40 ║
║ 3 ║ 41, 67 ║
║ 4 ║ 70 ║
║ 5 ║ 16, 67 ║
║ 6 ║ 41, 48 ║
╚══════╩═══════════╝
then use CASE...WHEN... and LIKE to check the rule. However my production data contains nearly 1 million records, so performance is a big problem.
You can separate records to those having one code and others before applying the CASE operator:
create table #rules (code numeric, grp varchar(2));
insert into #rules (code, grp) values (0,'M1'),(1, 'M2'),(16, 'M2'),(40, 'M2'),
(41, 'M3'),(67, 'M3'),(68, 'M4'),(70, 'M4');
with raw_groups as (
select distinct ID, grp from table1 join #rules using(code)
group by ID, grp
order by ID
),
enumerated as (
select ID, grp, count(*) over (partition by ID) num
from raw_groups
),
mult as (
select id, string_agg(grp, ',') as groups
from enumerated
where num > 1
group by id
)
select ID, grp as 'GROUP' from enumerated
where num=1
union all
select ID,
case
when groups like '%M1%' then 'M1'
when groups like '%M2%' and groups not like '%M1%' and groups not like '%M3%' and groups not like '%M4%' then 'M2'
when groups like '%M3%' and groups not like '%M1%' and groups not like '%M2%' and groups not like '%M4%' then 'M3'
when groups like '%M4%' and groups not like '%M1%' and groups not like '%M2%' and groups not like '%M3%' then 'M4'
when groups not like '%M1%' and groups like '%M2%' and groups like '%M3%' then 'M5'
when groups not like '%M1%' and groups like '%M3%' and groups like '%M4%' then 'M6'
else 'Rule not defined' end as 'GROUP'
from mult
One method is to use conditional aggregation and some case expressions:
select id,
concat_ws(', ',
(case when has_code_0 > 0 then 'M1' end),
(case when has_code_1 > 0 or has_code_16 > 0 or has_code_40 then 'M2' end),
(case when has_code_41 > 0 or has_code_67 > 0 then 'M3' end),
(case when has_code_68 > 0 or has_code_70 > 0 then 'M4' end),
(case when (has_code_1 > 0 or has_code_16 > 0 or has_code_40) and (has_code_41 > 0 or has_code_67) then 'M5' end)
(case when (has_code_41 > 0 or has_code_67 > 0) and (has_code_68 > 0 or has_code_70 > 0) then 'M6' end)
)
from (select t.id,
max(case when code = 0 then 1 else 0 end) as has_code_0,
max(case when code = 1 then 1 else 0 end) as has_code_1,
max(case when code = 16 then 1 else 0 end) as has_code_16,
max(case when code = 40 then 1 else 0 end) as has_code_40,
max(case when code = 41 then 1 else 0 end) as has_code_41,
max(case when code = 67 then 1 else 0 end) as has_code_67,
max(case when code = 68 then 1 else 0 end) as has_code_68,
max(case when code = 70 then 1 else 0 end) as has_code_70
from t
group by id
) t;
Note: concat_ws() is a recent addition to SQL Server. Slightly different code is needed in older versions.
There are various ways to structure this logic. For instance, you could assign the grouping flags in the subquery directly -- this is particularly appropriate if the groups are strictly hierarchical such as in your example:
select id,
concat_ws(', ',
(case when in_group_1 > 0 then 'M1' end),
(case when in_group_2 > 0 then 'M2' end),
(case when in_group_3 > 0 then 'M3' end),
(case when in_group_4 > 0 then 'M4' end),
(case when in_group_2 > 0 and in_group_3 > 0 then 'M5' end),
(case when in_group_3 > 0 and in_group_4 > 0 then 'M6' end)
)
from (select t.id,
max(case when code = 0 then 1 else 0 end) as in_group_1,
max(case when code = 1 then 1 else 0 end) as in_group_2,
max(case when code = 16 then 1 else 0 end) as in_group_2,
max(case when code = 40 then 1 else 0 end) as in_group_2,
max(case when code = 41 then 1 else 0 end) as in_group_3,
max(case when code = 67 then 1 else 0 end) as in_group_3,
max(case when code = 68 then 1 else 0 end) as in_group_4,
max(case when code = 70 then 1 else 0 end) as in_group_4
from t
group by id
) t;
Related
For example, Below is input table which has Month & User
Output Required:
NewUsers are new in that month. ExistingUsers are users in that month which have some data in previous month as well. Inactive users are users active in previous month but not in current month
Is it possible?
You can use windowed function to achieve that:
New User is very easy COUNT rows that have rn = 1
Existing Users: easy too, COUNT rows that have rn > 1
Inactive Users: bit complicated (get sum of new + existing and substract (new + existing) from row before.
Code:
WITH cte AS
(
SELECT *
,rn = ROW_NUMBER() OVER (PARTITION BY UserKey ORDER BY MonthId)
FROM #tab t1
), cte2 AS(
SELECT
MonthId,
[New_User] = COUNT(CASE WHEN rn = 1 THEN 1 END),
[Existing_User] = COUNT(CASE WHEN rn > 1 THEN 1 END),
[s] = COUNT(rn)
FROM cte
GROUP BY MonthId
)
SELECT
MonthId,
[New_User],
[Existing_User],
[Inactive_User] = CASE WHEN [s] - LAG(s, 1) OVER(ORDER BY MonthId) < 0
THEN ABS([s] - LAG(s, 1) OVER(ORDER BY MonthId))
ELSE 0
END
FROM cte2
ORDER BY MonthId;
LiveDemo
Output:
╔═════════╦═══════════╦════════════════╦════════════════╗
║ MonthID ║ New_Users ║ Existing_Users ║ Inactive_Users ║
╠═════════╬═══════════╬════════════════╬════════════════╣
║ 201411 ║ 1 ║ 0 ║ 0 ║
║ 201412 ║ 1 ║ 1 ║ 0 ║
║ 201501 ║ 1 ║ 2 ║ 0 ║
║ 201502 ║ 0 ║ 2 ║ 1 ║
╚═════════╩═══════════╩════════════════╩════════════════╝
Warning:
I've assumed that data per each MonthId is UNIQUE if not add one more CTE step to remove duplicates first.
I wanna calculate a running total based on 5 types of transactions (let's say transaction A, B, C, D, E). But I have over one thousand different products in this table and each product could have millions of transaction records on different days.
So the table looks like this:
ProductID A B C D E Running Total
1 10 0 5 0 5 20
2 15 0 0 0 0 15
3 20 5 0 10 0 35
1 10 0 0 0 0 30 (20 for product 1, plus 10 for product 1 again)
3 12 0 33 0 0 80 (35 for product 3, plus 45 for product 3 again)
The ANSI standard method is to use sum() as a window function:
select t.*,
sum(a + b + c + d + e) over (partition by productid order by <datetimecol>) as RunningTotal
from table t;
SQL tables represent unordered sets, so you need a column that specifies the ordering. I am guessing there is a date/time column somewhere for this purpose.
Most databases support this standard syntax: Oracle, SQL Server 2012+, Postgres, Teradata, and DB2.
Test Data
DECLARE #TABLE TABLE (ProductID INT, A INT, B INT, C INT, D INT, E INT)
INSERT INTO #TABLE VALUES
(1 ,10, 0, 5 , 0 , 5), -- 20
(2 ,15, 0, 0 , 0 , 0), -- 15
(3 ,20, 5, 0 , 10, 0), -- 35
(1 ,10, 0, 0 , 0 , 0), -- 30 (20 for product 1, plus 10 for product 1 again)
(3 ,12, 0, 33, 0 , 0) -- 80
Query
;WITH CTE AS
(
select *
,ROW_NUMBER() OVER (PARTITION BY ProductID ORDER BY ProductID ASC) rn
from #TABLE
)
SELECT ProductID
,A
,B
,C
,D
,E
,runningTotal
FROM CTE c
cross apply (select sum(A+B+C+D+E) as runningTotal
from CTE
where rn <= c.rn
and ProductID = c.ProductID
) as rt
Result
╔═══════════╦════╦═══╦════╦════╦═══╦══════════════╗
║ ProductID ║ A ║ B ║ C ║ D ║ E ║ runningTotal ║
╠═══════════╬════╬═══╬════╬════╬═══╬══════════════╣
║ 1 ║ 10 ║ 0 ║ 5 ║ 0 ║ 5 ║ 20 ║
║ 1 ║ 10 ║ 0 ║ 0 ║ 0 ║ 0 ║ 30 ║
║ 2 ║ 15 ║ 0 ║ 0 ║ 0 ║ 0 ║ 15 ║
║ 3 ║ 20 ║ 5 ║ 0 ║ 10 ║ 0 ║ 35 ║
║ 3 ║ 12 ║ 0 ║ 33 ║ 0 ║ 0 ║ 80 ║
╚═══════════╩════╩═══╩════╩════╩═══╩══════════════╝
As we all know general sorting is using order by. The sort I want to perform is different. I want the smallest length value in middle of table n the largest ones in top and bottom of it. One half should be descending and another half should be ascending. Can you guys help. It was an interview question.
This is one way:
;WITH CTE AS
(
SELECT *,
RN = ROW_NUMBER() OVER(ORDER BY LEN(YourColumn))
FROM dbo.YourTable
)
SELECT *
FROM CTE
ORDER BY RN%2, (CASE WHEN RN%2 = 0 THEN 1 ELSE -1 END)*RN DESC
Test Data
DECLARE #Table TABLE
(ID INT, Value VARCHAR(10))
INSERT INTO #Table VALUES
(1 , 'A'),
(2 , 'AB'),
(3 , 'ABC'),
(4 , 'ABCD'),
(5 , 'ABCDE'),
(6 , 'ABCDEF'),
(7 , 'ABCDEFG'),
(8 , 'ABCDEFGI'),
(9 , 'ABCDEFGIJ'),
(10 ,'ABCDEFGIJK')
Query
;WITH CTE AS (
SELECT *
,NTILE(2) OVER (ORDER BY LEN(Value) DESC) rn
FROM #Table )
SELECT *
FROM CTE
ORDER BY CASE WHEN rn = 1 THEN LEN(Value) END DESC
,CASE WHEN rn = 2 THEN LEN(Value) END ASC
Result
╔════╦════════════╦════╗
║ ID ║ Value ║ rn ║
╠════╬════════════╬════╣
║ 10 ║ ABCDEFGIJK ║ 1 ║
║ 9 ║ ABCDEFGIJ ║ 1 ║
║ 8 ║ ABCDEFGI ║ 1 ║
║ 7 ║ ABCDEFG ║ 1 ║
║ 6 ║ ABCDEF ║ 1 ║
║ 1 ║ A ║ 2 ║
║ 2 ║ AB ║ 2 ║
║ 3 ║ ABC ║ 2 ║
║ 4 ║ ABCD ║ 2 ║
║ 5 ║ ABCDE ║ 2 ║
╚════╩════════════╩════╝
Here's a short approach that would ge t you started:
WITH cte AS
(
SELECT TOP 1000 number
FROM master..spt_values
WHERE type = 'P' and number >0
)
SELECT number, row_number() OVER(ORDER BY CASE WHEN number %2 = 1 THEN number ELSE -(number) END) pos
FROM cte
I am trying to add a Group column to a data set based on some criteria. For a simple example:
╔════╦══════╗
║ ID ║ DATA ║
╠════╬══════╣
║ 1 ║ 12 ║
║ 2 ║ 20 ║
║ 3 ║ 3 ║
║ 4 ║ 55 ║
║ 5 ║ 11 ║
╚════╩══════╝
Let's say our criteria is that the Data should be greater than 10. Then the result should be similar to:
╔════╦══════╦═══════╗
║ ID ║ DATA ║ GROUP ║
╠════╬══════╬═══════╣
║ 1 ║ 12 ║ 1 ║
║ 2 ║ 20 ║ 1 ║
║ 3 ║ 3 ║ 2 ║
║ 4 ║ 55 ║ 3 ║
║ 5 ║ 11 ║ 3 ║
╚════╩══════╩═══════╝
So, all the rows that satisfied the criteria until an exception to the criteria occurred became part of a group. The numbering of the group doesn't necessarily need to follow this pattern, I just felt like this was a logical/simple numbering to explain the solution I am looking for.
You can calculate the group identifier by finding each row where data <= 10. Then, the group identifier is simply the number of rows where that condition is true, before the given row.
select t.*,
(select count(*)
from t t2
where t2.id <= t.id and
t2.data <= 10
) as groupId
from t;
SQL Server 2012 has cumulative sum syntax. The statement would be simpler in that database:
select t.*,
sum(case when t2.data <= 10) over (order by id) as groupId
from t;
EDIT:
The above does not take into account that the values less than 10 are in their own group. The logic above is that they start a new group.
The following assigns a group id with this constraint:
select t.*,
((select 2*count(*)
from t t2
where t2.id < t.id and
t2.data <= 10
) + (case when t.id <= 10 then 1 else 0 end)
) as groupId
from t;
This can be done easily with a recursive query:
;WITH CTE
AS (SELECT *,
1 AS [GROUP]
FROM TABLEB
WHERE ID = 1
UNION ALL
SELECT T1.ID,
T1.DATA,
CASE
WHEN T1.DATA < 10 THEN T2.[GROUP] + 1
ELSE T2.[GROUP]
END [GROUP]
FROM TABLEB T1
INNER JOIN CTE T2
ON T1.ID = T2.ID + 1)
SELECT *
FROM CTE
A working example can be found on SQL Fiddle.
Good Luck!
This question already has answers here:
SQL Server: Examples of PIVOTing String data
(7 answers)
Closed 8 years ago.
I have a table with 2 columns plateno and alerts.
table Data
plateno alerts
A 1
B 2
C 2
A 3
B 2
A 4
A 1
Now I want to get the result like this:
alerts-> 1 2 3 4
---------------------------
A 2 0 1 1
B 0 2 0 0
C 0 1 0 0
I mean 'A' has two alerts of type '1',1 alert of type '3' and '4'...and so on..
Here is my query I am trying with
Select count(alert)
from mytable
group by plateno
Try this :-
Select plateno,[1],[2],[3],[4]
from
(
Select plateno,alerts from Sample
)p
pivot
(
count(alerts)
for alerts in ([1],[2],[3],[4])
)pvt
Demo Here
If you have unknown number of values for the column Alert, a dynamic sql is much more preferred,
DECLARE #colList AS NVARCHAR(MAX), #sqlStatement AS NVARCHAR(MAX)
SELECT #colList = STUFF((SELECT DISTINCT ',' + QUOTENAME(alerts)
FROM data
FOR XML PATH(''), TYPE
).value('.', 'NVARCHAR(MAX)') ,1,1,'')
SET #sqlStatement = 'SELECT plateno,' + #colList + '
FROM
(
SELECT plateno, alerts
FROM data
) dta
pivot
(
COUNT(alerts)
FOR alerts IN (' + #colList + ')
) pvt '
EXECUTE(#sqlStatement)
SQLFiddle Demo
OUTPUT
╔═════════╦═══╦═══╦═══╦═══╗
║ PLATENO ║ 1 ║ 2 ║ 3 ║ 4 ║
╠═════════╬═══╬═══╬═══╬═══╣
║ A ║ 2 ║ 0 ║ 1 ║ 1 ║
║ B ║ 0 ║ 2 ║ 0 ║ 0 ║
║ C ║ 0 ║ 1 ║ 0 ║ 0 ║
╚═════════╩═══╩═══╩═══╩═══╝
Try this:
SELECT plateno
,SUM(CASE alerts WHEN 1 THEN 1 ELSE 0 END) AS [1]
,SUM(CASE alerts WHEN 2 THEN 1 ELSE 0 END) AS [2]
,SUM(CASE alerts WHEN 3 THEN 1 ELSE 0 END) AS [3]
,SUM(CASE alerts WHEN 4 THEN 1 ELSE 0 END) AS [4]
FROM Table1
GROUP BY plateno
Output:
╔═════════╦═══╦═══╦═══╦═══╗
║ PLATENO ║ 1 ║ 2 ║ 3 ║ 4 ║
╠═════════╬═══╬═══╬═══╬═══╣
║ A ║ 2 ║ 0 ║ 1 ║ 1 ║
║ B ║ 0 ║ 2 ║ 0 ║ 0 ║
║ C ║ 0 ║ 1 ║ 0 ║ 0 ║
╚═════════╩═══╩═══╩═══╩═══╝
See this SQLFiddle
Select plateno,sum([1]) as [1],sum([2]) as [2],sum([3]) as [3],sum([4]) as [4]
from
(
Select plateno,
case when alert=1 then 1 else 0 end as [1],
case when alert=2 then 1 else 0 end as [2],
case when alert=3 then 1 else 0 end as [3],
case when alert=4 then 1 else 0 end as [4]
from planet
)z
group by plateno
SQL fiddle here