Get Count for Each Column values - sql

Input
Create Table #t1 (CaseId Int, NewValue char(2),Attribute char(2),TimeStamp datetime)
insert into #t1 values
(1, 'A', 'X' , '2020-01-01 13:01'),
(1, 'Au', 'WB' , '2020-01-01 13:02'),
(1 , 'C' , 'P' , '2020-01-01 13:03'),
(1 , 'Ma', 'WB' , '2020-01-01 13:04'),
(1 , 'C' , 'D', '2020-01-01 13:05'),
(1, 'D' , 'E', '2020-01-01 13:04'),
(2 , 'M' , 'P' , '2020-05-01 15:20'),
(2 , 'X' , 'WB' , '2020-05-01 15:26'),
(2 , 'Y' , 'WB', '2020-05-01 15:29'),
(2 , 'X' , 'P' , '2020-05-01 15:31')
I need output like the following.
CaseId NewValue Attribute TimeStamp NewColumn NewColumn Count
1 A X 01:00.0 NULL NULL 0
1 Au WB 02:00.0 Au-WB Au-WB 2
1 C P 03:00.0 Au-WB Au-WB 2
1 Ma WB 04:00.0 Ma-WB Ma-WB 3
1 C D 05:00.0 Ma-WB Ma-WB 3
1 D E 04:00.0 Ma-WB Ma-WB 3
2 M P 20:00.0 NULL NULL 0
2 X WB 26:00.0 X -WB X -WB 1
2 Y WB 29:00.0 Y -WB Y -WB 2
2 X P 31:00.0 Y -WB Y -WB 2
Squirrel helped to get everything minus count. The query is as follows. Does anyone know how to get that count?
select *, wb.NewColumn
from #t1 t
outer apply
(
select top 1 x.NewValue + '-' + x.Attibute as NewColumn
from #t1 x
where x.CaseId = t.CaseId
and x.TimeStamp <= t.TimeStamp
and x.Attibute = 'WB'
order by x.TimeStamp desc
) wb

This looks like a gaps-and-island problem, where a new island starts everytime a record with Attribute 'WB' is encountered.
If so, here is one way to solve it using window functions:
select
caseId,
newValue,
attribute,
timeStamp,
case when grp > 0
then first_value(newValue) over(partition by caseId, grp order by timeStamp)
+ '-'
+ first_value(attribute) over(partition by caseId, grp order by timeStamp)
end newValue,
case when grp > 0
then count(*) over(partition by caseId, grp)
else 0
end cnt
from (
select
t.*,
sum(case when attribute = 'WB' then 1 else 0 end)
over(partition by caseId order by timeStamp) grp
from #t1 t
) t
order by caseId, timeStamp
The inner query does a window sum() to define the groups: everytime attribute 'WB' is met for a given caseId, a new group starts. Then, the outer query uses first_value() to recover the first value in the group, and performs a window count() to compute the number of records per group. This is wrapped in conditional logic so the additional columns are not filled before the first 'WB' atribute is met.
Demo on DB Fiddle:
caseId | newValue | attribute | timeStamp | newValue | cnt
-----: | :------- | :-------- | :---------------------- | :------- | --:
1 | A | X | 2020-01-01 13:01:00.000 | null | 0
1 | Au | WB | 2020-01-01 13:02:00.000 | Au-WB | 2
1 | C | P | 2020-01-01 13:03:00.000 | Au-WB | 2
1 | Ma | WB | 2020-01-01 13:04:00.000 | Ma-WB | 3
1 | D | E | 2020-01-01 13:04:00.000 | Ma-WB | 3
1 | C | D | 2020-01-01 13:05:00.000 | Ma-WB | 3
2 | M | P | 2020-05-01 15:20:00.000 | null | 0
2 | X | WB | 2020-05-01 15:26:00.000 | X -WB | 1
2 | Y | WB | 2020-05-01 15:29:00.000 | Y -WB | 2
2 | X | P | 2020-05-01 15:31:00.000 | Y -WB | 2

Using your query output, create a cte and perform the count using the windowing function by partition on caseid,newcolumn as follows
with data
as (
select *, wb.NewColumn
from #t1 t
outer apply
(
select top 1 x.NewValue + '-' + x.Attibute as NewColumn
from #t1 x
where x.CaseId = t.CaseId
and x.TimeStamp <= t.TimeStamp
and x.Attibute = 'WB'
order by x.TimeStamp desc
) wb
)
select *,count(*) over(partition by caseid,newcolumn) as cnt
from data

Related

SQL return second max date for each id, date and channel

I have the following table:
id channel_id date
1 | 1 | 2017-01-10
1 | 2 | 2018-02-05
1 | 1 | 2019-03-07
1 | 2 | 2020-03-15
2 | 1 | 2018-01-17
2 | 1 | 2019-07-20
2 | 1 | 2020-01-10
I want to return for previous maximum date for each date and id but two separate columns for both channel_id. So, one column for previous max date for channel_id is equal to 1 and another for previous max date for channel_id is equal to 2. What I want to get can be found below:
id channel_id date prev_date_channel_id1 prev_date_channel_id2
1 | 1 | 2017-01-10 | NULL | NULL |
1 | 2 | 2018-02-05 | 2017-01-10 | NULL |
1 | 1 | 2019-03-07 | 2017-01-10 | 2018-02-05 |
1 | 2 | 2020-03-15 | 2019-03-07 | 2018-02-05 |
2 | 1 | 2018-01-17 | NULL | NULL |
2 | 1 | 2019-07-20 | 2018-01-17 | NULL |
2 | 1 | 2020-01-10 | 2019-07-20 | NULL |
I made a query as below and returns what I want but takes too much time. I'd appreciate any optimization suggestions!
SELECT
a.id,
a.date,
MAX(c.date) AS prev_date_channel_id1,
MAX(d.date) AS prev_date_channel_id2
FROM
table a
LEFT JOIN
table c ON a.id=c.id AND a.date>c.date AND c.channel_id=1
LEFT JOIN
table d ON a.id=d.id AND a.date>d.date AND d.channel_id=2
GROUP BY a.id, a.date
Use lag() for the previous date and a cumulative conditional max for the channel 2 date:
select t.*, lag(date) over (partition by id order by date) as prev_date,
max(case when channel = 2 then date end) over
(partition by id
order by date
rows between unbounded preceding and 1 row preceding
) as prev_date_channel2
from t;
I think there's an error in your "expected output" for the value of prev_date_channel_id1 on the last row (it should be 2019-07-20).
In any case, with appropriate indexing an outer apply top 1 construct might serve you better:
create table t
(
id int,
channel_id int,
[date] date
constraint pk_t primary key clustered (id, channel_id, [date])
);
insert t values
(1, 1, '2017-01-10'),
(1, 2, '2018-02-05'),
(1, 1, '2019-03-07'),
(1, 2, '2020-03-15'),
(2, 1, '2018-01-17'),
(2, 1, '2019-07-20'),
(2, 1, '2020-01-10');
select t1.id,
t1.channel_id,
t1.[date],
prev_date_channel_id1 = c1.dt,
prev_date_channel_id2 = c2.dt
from t t1
outer apply (
select top 1 [date]
from t
where id = t1.id
and channel_id = 1
and [date] < t1.[date]
order by date desc
) c1(dt)
outer apply (
select top 1 [date]
from t
where id = t1.id
and channel_id = 2
and [date] < t1.[date]
order by date desc
) c2(dt)
order by t1.id, t1.[date];
Or possibly faster still, especially with the key changed to constraint pk_t primary key clustered (id, [date], [channel_id]))
select t1.id,
t1.channel_id,
t1.[date],
prev_date_channel_id1 = prev.c1,
prev_date_channel_id2 = prev.c2
from t t1
outer apply (
select c1 = max(iif(channel_id = 1, [date], null)),
c2 = max(iif(channel_id = 2, [date], null))
from t
where id = t1.id
and [date] < t1.[date]
) prev
Assuming you have an index on those three columns, you can use subqueries:
SELECT [T0].[id],
[T0].[channel_id],
[T0].[date],
[prev_date_channel_id1] = (
SELECT MAX([T1].[date])
FROM [t] [T1]
WHERE [T1].[id] = [T0].[id]
AND [T1].[date] < [T0].[date]
AND [T1].[channel_id] = 1
),
[prev_date_channel_id2] = (
SELECT MAX([T1].[date])
FROM [t] [T1]
WHERE [T1].[id] = [T0].[id]
AND [T1].[date] < [T0].[date]
AND [T1].[channel_id] = 2
)
FROM [t] [T0];

single column value in multiple columns

ID|Class | Number
--+------+---------
1 | 1 | 58.2
2 | 1 | 85.4
3 | 2 | 28.2
4 | 2 | 55.4
The desired result would be:
Column1 |Number | Column2 | Number
--------+-------+---------+---------
1 | 58.2 | 2 |28.2
1 | 85.4 | 2 |55.4
What would be the required SQL?
You can user row_number() and aggregate:
select 1, max(case when seqnum % 2 = 1 then number end),
2, max(case when seqnum % 2 = 0 then number end)
from (select t.*,
row_number() over (partition by class order by id) as seqnum
from t
) t
group by ceiling(seqnum / 2.0);
The aggregation uses arithmetic to put pairs of rows for each class into one row.
try this
SELECT 1 AS Column1,t2.Number,2 AS Column2,t1.Number
FROM
(
SELECT *
FROM test11
) t2
INNER JOIN
(
SELECT *
FROM test11
) t1
ON t1.Class = t2.Class
WHERE t1.ID < t2.ID
ORDER BY t1.ID DESC
Demo in db<>fiddle

Turn one column into multiple based on index ranges

I have the following table in SQL Server:
| idx | value |
| --- | ----- |
| 1 | N |
| 2 | C |
| 3 | C |
| 4 | P |
| 5 | N |
| 6 | N |
| 7 | C |
| 8 | N |
| 9 | P |
I would like to turn it to this:
| idx 1-3 | idx 4-6 | idx 7-9 |
| ------- | ------- | ------- |
| N | P | C |
| C | N | N |
| C | N | P |
How can I do this?
If you want to split the data into three columns, with the data in order by id -- and assuming that the ids start at 1 and have no gaps -- then on your particular data, you can use:
select max(case when (idx - 1) / 3 = 0 then value end) as grp_1,
max(case when (idx - 1) / 3 = 1 then value end) as grp_2,
max(case when (idx - 1) / 3 = 2 then value end) as grp_3
from t
group by idx % 3
order by min(idx);
The above doesn't hard-code the ranges, but the "3" means different things in different contexts -- sometimes the number of columns, sometimes the number of rows in the result set.
However, the following generalizes so it adds additional rows as needed:
select max(case when (idx - 1) / num_rows = 0 then idx end) as grp_1,
max(case when (idx - 1) / num_rows = 1 then idx end) as grp_2,
max(case when (idx - 1) / num_rows = 2 then idx end) as grp_3
from (select t.*, convert(int, ceiling(count(*) over () / 3.0)) as num_rows
from t
) t
group by idx % num_rows
order by min(idx);
Here is a db<>fiddle.
You can compute the category of each row with a lateral join, then enumerate the rows within each category, and finally pivot with conditional aggregation:
select
max(case when cat = 'idx_1_3' then value end) as idx_1_3,
max(case when cat = 'idx_4_6' then value end) as idx_4_6,
max(case when cat = 'idx_7_9' then value end) as idx_7_9
from (
select t.*, row_number() over(partition by v.cat) as rn
from mytable t
cross apply (values (
case
when idx between 1 and 3 then 'idx_1_3'
when idx between 4 and 6 then 'idx_4_6'
when idx between 7 and 9 then 'idx_7_9'
end
)) v(cat)
) t
group by rn
Another solution with union all operator and row_number function
select max(IDX_1_3) as IDX_1_3, max(IDX_4_6) as IDX_4_6, max(IDX_1_3) as IDX_1_3
from (
select
case when idx in (1, 2, 3) then value end as idx_1_3
, null as idx_4_6
, null as idx_7_9
, row_number()over(order by idx) as rnb
from Your_table where idx in (1, 2, 3)
union all
select null as idx_1_3
, case when idx in (4, 5, 6) then value end as idx_4_6
, null as idx_7_9
, row_number()over(order by idx) as rnb
from Your_table where idx in (4, 5, 6)
union all
select null as idx_1_3
, null as idx_4_6
, case when idx in (7, 8, 9) then value end as idx_7_9
, row_number()over(order by idx) as rnb
from Your_table where idx in (7, 8, 9)
) t
group by rnb
;
drop table if exists #t;
create table #t (id int identity(1,1) primary key clustered, val varchar(20));
insert into #t(val)
select top (2002) concat(row_number() over(order by ##spid), ' - ', char(65 + abs(checksum(newid()))%26))
from sys.all_objects
order by row_number() over(order by ##spid);
select p.r, 1+(p.r-1)/3 grp3id, p.[1] as [idx 1-3], p.[2] as [idx 4-6], p.[3] as [idx 7-9]
from
(
select
val,
1+((1+(id-1)/3)-1)%3 as c3,
row_number() over(partition by 1+((1+(id-1)/3)-1)%3 order by id) as r
from #t
) as src
pivot
(
max(val) for c3 in ([1], [2], [3])
) as p
order by p.r;
You can use the mod as follows:
select max(case when idx between 1 and 3 then value end) as idx_1_3,
max(case when idx between 4 and 6 then value end) as idx_4_6,
max(case when idx between 7 and 9 then value end) as idx_7_9
from t
group by (idx-1) % 3;
If your idx is not continuous numbers then instead of from t use the following
from (select value, row_number() over(order by idx) as idx
from your_table t) t

Join column name with value from other table

SELECT id FROM Table2 t2
INNER JOIN Table1 t1
on t1.ordno = t2.ordno
and t1.testcode = t2.testcode
WHERE RN1 > 0
AND RN2 > 0
AND RN3 > 0
AND RN3 > 0
AND RN4 > 0
AND RN5 > 0
AND RN6 > 0
I only want to return the id from Table2 if a value from Table1 is >0 (column name from Table1 exists in Table2.RNVALUE). So in this case, I only want the first two rows of table2 to pop-up because they have a value in table1 which is greater then 0. Can anyone help me with a query to do this?
Table1:
+--------------------------------------------------------+
| ORDNO | TESTCODE | RN1 | RN2 | RN3 | RN4 | RN5 | RN6 |
+--------------------------------------------------------+
| 123 | 456 | 55 | 56 | 0 | 0 | null | null |
+--------------------------------------------------------+
Table2:
+----------------------------------+
| ORDNO | TESTCODE | RN_VALUE | ID |
+----------------------------------+
| 123 456 RN1 1 |
| 123 456 RN2 2 |
| 123 456 RN3 3 |
| 123 456 RN4 4 |
+----------------------------------+
I believe you want something like this:
SELECT t2.*
FROM Table2 t2 INNER JOIN
Table1 t1
ON t1.ordno = t2.ordno AND t1.testcode = t2.testcode
WHERE (RN1 > 0 AND t2.RN_VALUE = 'RN1') OR
(RN2 > 0 AND t2.RN_VALUE = 'RN2') OR
(RN3 > 0 AND t2.RN_VALUE = 'RN3') OR
(RN4 > 0 AND t2.RN_VALUE = 'RN4') OR
(RN5 > 0 AND t2.RN_VALUE = 'RN5') OR
(RN6 > 0 AND t2.RN_VALUE = 'RN6');
Having multiple columns with names like that suggests a poor data model. Perhaps these should be in separate rows, with one value per row.
select t2.*
from table2 t2
inner join (select ordno, testcode, 1 as rn, rn1 as val
union
select ordno, testcode, 2 as rn, rn2 as val
union
select ordno, testcode, 3 as rn, rn3 as val
union
select ordno, testcode, 4 as rn, rn4 as val
union
select ordno, testcode, 5 as rn, rn5 as val
union
select ordno, testcode, 6 as rn, rn6 as val
) t1
on t2.rn_value=t1.rn
and t2.ordno=t1.ordno
and t2.testcode=t1.testcode
where t1.val>0

Pivot table to turn rows into columns

I currently run the query
SELECT [PriceAttributeID]
,[PriceID]
,[AttributeID]
,[PriceAttributeComparator]
,[PriceAttributeMin]
,[PriceAttributeMax]
FROM [PriceAttribute]
Which gives the output
1 2 1 1 S NULL
2 3 1 1 M NULL
3 4 1 1 L NULL
4 5 1 1 L NULL
5 5 2 1 Black NULL
I would like to get the output (where _Comp, _Min and _Max relate to PriceAttributeComparator, PriceAttributeMin and PriceAttributeMax)
PriceID 1_Comp 1_Min 1_Max 2_Comp 2_Min 2_Max
2 1 S NULL NULL NULL NULL
3 1 M NULL NULL NULL NULL
4 1 L NULL NULL NULL NULL
5 1 L NULL 1 Black NULL
The same query would also be expected to have 1_ and 2_ prefixes as 4_, 5_, 19_ and 32_ or any other indeterminate number of ID's based on what is in the table at the time.
I have attempted a PIVOT table, but i am new to them and haven't the first clue on how to create what it is i am looking to do.
Part of the problem you are probably having with the PIVOT function is due to the fact you have multiple columns that you want to apply the function to. If you want to use the PIVOT function, then I would suggest first unpivoting the columns PriceAttributeComparator, PriceAttributeMin and PriceAttributeMax. When you unpivot the data you will no longer have multiple columns, you will have multiple rows, then you can apply the pivot to all of the appropriate values.
You did not specify what version of SQL Server you are using but you can use CROSS APPLY with a UNION ALL to unpivot the columns:
select priceid,
col = cast(attributeid as varchar(10))+'_'+ col,
value
from
(
select PriceID,
AttributeID,
comp = cast(PriceAttributeComparator as varchar(10)),
[min] = cast(PriceAttributeMin as varchar(10)),
[max] = cast(PriceAttributeMax as varchar(10))
from PriceAttribute
) d
cross apply
(
select 'comp', comp union all
select 'min', [min] union all
select 'max', [max]
) c (col, value)
See Demo. This process will convert your data into the following format:
| PRICEID | COL | VALUE |
-----------------------------
| 2 | 1_comp | 1 |
| 2 | 1_min | S |
| 2 | 1_max | (null) |
| 3 | 1_comp | 1 |
| 3 | 1_min | M |
| 3 | 1_max | (null) |
Once the data is in multiple rows, then you can apply the PIVOT function to the values in col:
select priceid,
[1_comp], [1_min], [1_max], [2_comp], [2_min], [2_max]
from
(
select priceid,
col = cast(attributeid as varchar(10))+'_'+ col,
value
from
(
select PriceID,
AttributeID,
comp = cast(PriceAttributeComparator as varchar(10)),
[min] = cast(PriceAttributeMin as varchar(10)),
[max] = cast(PriceAttributeMax as varchar(10))
from PriceAttribute
) d
cross apply
(
select 'comp', comp union all
select 'min', [min] union all
select 'max', [max]
) c (col, value)
) src
pivot
(
max(value)
for col in ([1_comp], [1_min], [1_max], [2_comp], [2_min], [2_max])
) piv;
See SQL Fiddle with Demo.
The above versions work great if you have a known number of values but if the values are unknown, then you will need to use dynamic SQL to get the result:
DECLARE #cols AS NVARCHAR(MAX),
#query AS NVARCHAR(MAX)
select #cols = STUFF((SELECT ',' + QUOTENAME(cast(attributeid as varchar(10))+'_'+ col)
from
(
select distinct attributeid
from priceattribute
) d
cross apply
(
select 'comp', 1 union all
select 'min', 2 union all
select 'max', 3
) c (col, so)
group by attributeid, col, so
order by attributeid, so
FOR XML PATH(''), TYPE
).value('.', 'NVARCHAR(MAX)')
,1,1,'')
set #query = 'SELECT priceid, ' + #cols + '
from
(
select priceid,
col = cast(attributeid as varchar(10))+''_''+ col,
value
from
(
select PriceID,
AttributeID,
comp = cast(PriceAttributeComparator as varchar(10)),
[min] = cast(PriceAttributeMin as varchar(10)),
[max] = cast(PriceAttributeMax as varchar(10))
from PriceAttribute
) d
cross apply
(
select ''comp'', comp union all
select ''min'', [min] union all
select ''max'', [max]
) c (col, value)
) x
pivot
(
max(value)
for col in (' + #cols + ')
) p '
execute sp_executesql #query;
See SQL Fiddle with Demo. These solutions will give a result:
| PRICEID | 1_COMP | 1_MIN | 1_MAX | 2_COMP | 2_MIN | 2_MAX |
----------------------------------------------------------------
| 2 | 1 | S | (null) | (null) | (null) | (null) |
| 3 | 1 | M | (null) | (null) | (null) | (null) |
| 4 | 1 | L | (null) | (null) | (null) | (null) |
| 5 | 1 | L | (null) | 1 | Black | (null) |
It might be simplest to do this using conditional aggregation rather than pivot:
SELECT PriceID,
max(case when AttributeID = 1 then PriceAttributeComparator end) as comp_1,
max(case when AttributeID = 1 then PriceAttributeMin end) as min_1,
max(case when AttributeID = 1 then PriceAttributeMax end) as max_1,
max(case when AttributeID = 2 then PriceAttributeComparator end) as comp_2,
max(case when AttributeID = 2 then PriceAttributeMin end) as min_2,
max(case when AttributeID = 2 then PriceAttributeMax end) as max_2
FROM PriceAttribute pa
group by PriceId;