split data row wise based on row values - sql

I have a table that stores information in the below format.
id, value , property are the columns. I have a requirement now to sum up data based on property.
i.e for property column F2 and Value,
I need values summed up and displayed as below:
Type | Sum
Cars | 1892+702+515
Bikes | 1393 +0 + 474.6
Note: I know this is not the way to store data in a table, but table alterations are currently not possible.
Appreciate if you could give your inputs on this.

Here's another solution which uses LEAD in case if you are running SQL Server 2012+ (note my comments).
-- Sample data
DECLARE #yourtable
TABLE
(
id int identity primary key, -- emulate an index on ID
value varchar(100),
property varchar(5)
);
INSERT #yourtable (value, property) VALUES
('0', 'F2'),
('0', 'V1'),
('0', 'V2'),
('0', 'V3'),
('Cars', 'F2'),
('1892', 'V1'),
('702', 'V2'),
('515', 'V3'),
('Bikes', 'F2'),
('1393', 'V1'),
('0', 'V2'),
('474.6', 'V2');
-- Solution
WITH startPoints AS
(
SELECT *, rn = ROW_NUMBER() OVER (ORDER BY id)
FROM #yourtable
),
groups AS
(
SELECT value, rn, ttl =
ISNULL(LEAD(id,1) OVER (ORDER BY id), (SELECT COUNT(*) FROM #yourtable)+1) - (rn+1)
FROM startPoints
WHERE property = 'F2' AND value LIKE ('%[^0-9.]%')
)
SELECT
value,
SUM =
(
SELECT SUM(CAST(value AS decimal(10,2)))
FROM startPoints s
WHERE s.rn BETWEEN g.rn+1 AND g.rn+ttl
)
FROM groups g;

This looks like a really bad design. It looks like you are using the positions in the table to assign "groupings". Fortunately, you have an id column, so this is possible to do in SQL.
Here is the idea: First assign the appropriate F2 property to each row. Then do an aggregation. This following uses outer apply for the first part and group by for the second:
select t2.value,
sum(case when isnumeric(t.value) = 1 then cast(t.value as decimal(10, 2))
end) as thesum
from t outer apply
(select top 1 t2.*
from t t2
where t2.id <= t.id and t2.property = 'F2'
order by t2.id desc
) t2
group by t2.value;
This doesn't filter out the first group (all 0's). You can do that with an additional WHERE clause if you like.

Related

Selecting entries with biggest value less than list of values

Suppose my table structure as follows:
id | Word
---|-----
1 | a
2 | aa
. | ..
I have a list of id's like this:
(...,900, 1000, 2000, 3000, 4000,....)
I want to find the biggest id less than each id in the above list.
My table id's is not necessarily consecutive and there are some gaps between two successive id's, for example:
(...,889,900,950,952,997,1000,1001,1010,1920,2000,2990,3000,3500,4000,...)
The expected result according to the above list would be:
(889, 997, 1920, 2990, 3500,...)
How do i achieve desired results?
Use a common table expression and ROW_NUMBER()
;WITH cte AS(
SELECT *, ROW_NUMBER() OVER (ORDER BY ID) rowNum
FROM example)
SELECT ID, word
FROM cte
WHERE rowNum IN (
SELECT (rowNum - 1)
FROM cte
WHERE ID IN ('900','1000','2000','3000','4000'))
--WHERE ID IN (SELECT ID FROM <tableWithIDs>))
If you already have all of the ID you are looking for in another table, you would instead use the commented portion of my answer instead of the hardcoded IN list.
This will work only if the ID you are looking for exists in the table. So, as noted in a comment below if you were searching for 1001 you would not get 997, unless 1001 existed in the table (meaning, if it existed it would get a rowNum value and could be used to decrement in the subquery)
[DEMO HERE]
The following is another way to just see what the previous ID is for each row:
SELECT *, LEAD(ID,1) OVER(ORDER BY ID DESC) PreviousID
FROM example
ORDER BY ID
I would simply do:
select v.val, t.*
from (values (900), (1000), (2000), (3000), (4000) ) v(val) outer apply
(select top 1 t.*
from t
where t.id < v.val
order by t.id desc
) t;
This allows you to see the value on each of the rows. That is probably important because SQL result sets are unordered and it will not be obvious which value goes with which row.
EDIT:
If you know the row numbers are in the table, the most performance solution is probably:
select t.*
from (select t.*, lead(id) over (order by id) as next_id
from t
) t
where next_id in ( . . . );
This should work and I think it will be fairly efficient.
declare #V table (num int primary key);
insert into #V values (800), (889), (900), (997), (1000), (1910), (1920), (2000), (2990), (3000), (3500), (4000);
declare #T table (num int primary key);
insert into #T values (800), (900), (1000), (1200), (2000), (3000), (4000);
select tt.vP
from ( select t.num as t, v.num as v
, LAG(v.num) over (order by v.num) as vP
from #V v
left join #T t
on v.num = t.num
) tt
where tt.t is not null
and tt.vP is not null
order by tt.vP
Not clear how you want it to behave
select t.num
, (select max(v.num) from #V v where v.num < t.num) as prior
from #T t

Conditional Selection of Rows Using TSQL SQL Server (2008 R2)

I've been staring at this for hours and hours and can't come up with an "elegant" set-based way of getting the result set I need...
Here's my sample data (my real data could be 1,000,000+ rows)...
DECLARE #t AS TABLE (ID int,ID1 nvarchar(15),[DATE] date,PERIOD int,[TYPE] nchar(1));
INSERT INTO #t (ID,ID1,[DATE],PERIOD,[TYPE])
VALUES
(1,N'NUM1','2016-01-01',1,N'B'),
(2,N'NUM1','2016-01-01',2,N'A'),
(3,N'NUM1','2016-01-01',3,N'A'),
(4,N'NUM1','2016-01-01',4,N'B'),
(5,N'NUM1','2016-01-01',4,N'A'),
(6,N'NUM1','2016-01-01',5,N'A'),
(7,N'NUM1','2016-01-02',1,N'A'),
(8,N'NUM1','2016-01-02',2,N'A'),
(9,N'NUM1','2016-01-02',3,N'A'),
(10,N'NUM1','2016-01-02',4,N'A'),
(11,N'NUM1','2016-01-02',5,N'A'),
(12,N'NUM2','2016-01-01',1,N'A'),
(13,N'NUM2','2016-01-01',1,N'B'),
(14,N'NUM2','2016-01-01',2,N'A'),
(15,N'NUM2','2016-01-01',3,N'A'),
(16,N'NUM2','2016-01-01',4,N'B'),
(17,N'NUM2','2016-01-01',4,N'A'),
(18,N'NUM2','2016-01-01',5,N'A'),
(19,N'NUM2','2016-01-02',1,N'A'),
(20,N'NUM2','2016-01-02',2,N'B'),
(21,N'NUM2','2016-01-02',3,N'A'),
(22,N'NUM2','2016-01-02',4,N'A'),
(23,N'NUM2','2016-01-02',4,N'B'),
(24,N'NUM2','2016-01-02',5,N'A');
Here is the result set I'm trying to get...
1,'NUM1','2016-01-01',1,'B'
2,'NUM1','2016-01-01',2,'A'
3,'NUM1','2016-01-01',3,'A'
5,'NUM1','2016-01-01',4,'A'
6,'NUM1','2016-01-01',5,'A'
7,'NUM1','2016-01-02',1,'A'
8,'NUM1','2016-01-02',2,'A'
9,'NUM1','2016-01-02',3,'A'
10,'NUM1','2016-01-02',4,'A'
11,'NUM1','2016-01-02',5,'A'
12,'NUM2','2016-01-01',1,'A'
14,'NUM2','2016-01-01',2,'A'
15,'NUM2','2016-01-01',3,'A'
17,'NUM2','2016-01-01',4,'A'
18,'NUM2','2016-01-01',5,'A'
19,'NUM2','2016-01-02',1,'A'
20,'NUM2','2016-01-02',2,'B'
21,'NUM2','2016-01-02',3,'A'
22,'NUM2','2016-01-02',4,'A'
24,'NUM2','2016-01-02',5,'A'
Simply put, each day has 5 periods. They can be of type A or B. I need to get the A types. but if there are no A types, I need to get the B types... (Sounds so simple when I write it out.., but my brain will not come up with something suitable)
Pleeeeeease put me out of my misery..
You can use ROW_NUMBER for this:
SELECT ID, ID1, [DATE], PERIOD, [TYPE]
FROM (
SELECT ID, ID1, [DATE], PERIOD, [TYPE],
ROW_NUMBER() OVER (PARTITION BY ID1, [DATE], PERIOD
ORDER BY [TYPE]) AS rn
FROM #t) AS t
WHERE t.rn = 1
Using ORDER BY [TYPE] in the OVER clause of ROW_NUMBER places 'A' records on top of 'B' records. If there are no 'A' records for a given ID1, [DATE], PERIOD then B records are assigned rn = 1.
Your desired outpout contradicts the statement that "I need to get the A types. but if there are no A types, I need to get the B types... ". Every date in the data has one or more 'A' types. By the statement, the output should include only the 'A' types. But if the statement is correct, then this should work:
Select d.[DATE], t.Id, t.ID1, t.PERIOD, t.[TYPE]
from (select distinct [date] from #t) d
left join #t t
on t.[date] = d.[date]
and t.type = case when exists
(select * from #t
where [date] = d.[Date]
and type = 'A') then 'A'
else 'B' End
I've just come up with
SELECT * FROM #t WHERE [TYPE]='A'
UNION ALL
SELECT * FROM #t t1 WHERE [TYPE]='B' AND NOT EXISTS (SELECT ID FROM #t WHERE ID1=t1.ID1 AND [TYPE]='A' AND [DATE]=t1.[DATE] AND Period=t1.Period)
ORDER BY ID;
which give's me what I need...

Make value from every second row appear in new 3rd column

Lets assume my data looks like this :
Every second row represents old (previous value) in a table that holds historical data.
table 1 :
id value
------------
1 a
1 b
2 c
2 d
3 a
3 b
and i want to get value of every second row to appear in new 3rd column like this :
table 2:
id new_value old_value
------------------------
1 a b
2 c d
3 a b
EDIT:
For clarity ill post the skeleton of query thats producing data i want to transform (so its clear i am already using WITH so cant use additional one due to oracle not yet allowing nesting of WITH elements) :
skeleton code that produces data in table 1 :
with candidates as
(
--select list of candidates
)
SELECT * FROM
(
(
--select new values
MINUS
--select old values
)
UNION
(
--select old values
MINUS
--select new values
)
)
ORDER BY id;
The goal is to finally get only a list of ids that changed with their old and new values.
Thanks in advance.
Use CTE
;WITH CTE AS(
SELECT *, ROW_NUMBER() OVER (PARTITION BY ID ORDER BY ID) RN
FROM TableName
)
SELECT ID,
MIN(CASE WHEN RN=1 THEN [value] END) NewValue,
MIN(CASE WHEN RN=2 THEN [value] END) OldValue
FROM CTE
GROUP BY ID
It is quite possible that overall query can be written in a much simpler way. Just join intermediary results with old and new values together on id to put them in two different columns instead of unioning them into the same column.
WITH
candidates
AS
(
--select list of candidates
)
,CTE_NewValues
AS
(
--select new values
select id, value AS new_value
FROM candidates
WHERE ...
-- assumes id is unique, one row per id
)
,CTE_OldValues
AS
(
--select old values
select id, value AS old_value
FROM candidates
WHERE ...
-- assumes id is unique, one row per id
)
SELECT
CTE_NewValues.id
,CTE_NewValues.new_value
,CTE_OldValues.old_value
FROM
CTE_NewValues
INNER JOIN CTE_OldValues ON CTE_NewValues.id = CTE_OldValues.id
WHERE
CTE_NewValues.new_value <> CTE_OldValues.old_value
ORDER BY
CTE_NewValues.id;
If we stick to the skeleton of the query in the question, there are also many ways to do it. Self-join is likely to be less efficient than using analytic functions, like ROW_NUMBER and LEAD.
Sorting just by id is not enough to unambiguously define which value is new or old. You need to have some extra column to resolve it.
You don't "nest" WITH (common-table expressions), you "chain" them. Something like the following. As you do that, make sure to add the sort_order column to be able to distinguish old and new values, if you don't have a similar column already.
WITH
candidates
AS
(
--select list of candidates
)
,CTE_YourQuery
AS
(
SELECT * FROM
(
(
--select new values
select 1 AS sort_order, id, value
MINUS
--select old values
select 1 AS sort_order, id, value
)
UNION ALL
(
--select old values
select 2 AS sort_order, id, value
MINUS
--select new values
select 2 AS sort_order, id, value
)
)
)
,CTE_RowNumber
AS
(
SELECT
id
,value AS new_value
,ROW_NUMBER() OVER (PARTITION BY id ORDER BY sort_order) AS rn
,LEAD(value) OVER (PARTITION BY id ORDER BY sort_order) AS old_value
FROM CTE_YourQuery
)
SELECT
id
,new_value
,old_value
FROM CTE_RowNumber
WHERE rn = 1
ORDER BY id;
Assuming there is some other column which defines the "order" in which the new and old value appears, you can do this:
select t1.id, t1.value as old_value, t2.value as new_value
from the_table t1
join the_table t2 on t1.id = t2.id and t1.sort_order < t2.sort_order
But you have to have some column that distinguishes the row that is considered "old" from the one that is considered "new".

How to get the first not null value from a column of values in Big Query?

I am trying to extract the first not null value from a column of values based on timestamp. Can somebody share your thoughts on this. Thank you.
What have i tried so far?
FIRST_VALUE( column ) OVER ( PARTITION BY id ORDER BY timestamp)
Input :-
id,column,timestamp
1,NULL,10:30 am
1,NULL,10:31 am
1,'xyz',10:32 am
1,'def',10:33 am
2,NULL,11:30 am
2,'abc',11:31 am
Output(expected) :-
1,'xyz',10:30 am
1,'xyz',10:31 am
1,'xyz',10:32 am
1,'xyz',10:33 am
2,'abc',11:30 am
2,'abc',11:31 am
You can modify your sql like this to get the data you want.
FIRST_VALUE( column )
OVER (
PARTITION BY id
ORDER BY
CASE WHEN column IS NULL then 0 ELSE 1 END DESC,
timestamp
)
Try this old trick of string manipulation:
Select
ID,
Column,
ttimestamp,
LTRIM(Right(CColumn,20)) as CColumn,
FROM
(SELECT
ID,
Column,
ttimestamp,
MIN(Concat(RPAD(IF(Column is null, '9999999999999999',STRING(ttimestamp)),20,'0'),LPAD(Column,20,' '))) OVER (Partition by ID) CColumn
FROM (
SELECT
*
FROM (Select 1 as ID, STRING(NULL) as Column, 0.4375 as ttimestamp),
(Select 1 as ID, STRING(NULL) as Column, 0.438194444444444 as ttimestamp),
(Select 1 as ID, 'xyz' as Column, 0.438888888888889 as ttimestamp),
(Select 1 as ID, 'def' as Column, 0.439583333333333 as ttimestamp),
(Select 2 as ID, STRING(NULL) as Column, 0.479166666666667 as ttimestamp),
(Select 2 as ID, 'abc' as Column, 0.479861111111111 as ttimestamp)
))
As far as I know, Big Query has no options like 'IGNORE NULLS' or 'NULLS LAST'. Given that, this is the simplest solution I could come up with. I would like to see even simpler solutions.
Assuming the input data is in table "original_data",
select w2.id, w1.column, w2.timestamp
from
(select id,column,timestamp
from
(select id,column,timestamp, row_number()
over (partition BY id ORDER BY timestamp) position
FROM original_data
where column is not null
)
where position=1
) w1
right outer join
original_data as w2
on w1.id = w2.id
SELECT id,
(SELECT top(1) column FROM test1 where id=1 and column is not null order by autoID desc) as name
,timestamp
FROM yourTable
Output :-
1,'xyz',10:30 am
1,'xyz',10:31 am
1,'xyz',10:32 am
1,'xyz',10:33 am
2,'abc',11:30 am
2,'abc',11:31 am

SQL group by if values are close

Class| Value
-------------
A | 1
A | 2
A | 3
A | 10
B | 1
I am not sure whether it is practical to achieve this using SQL.
If the difference of values are less than 5 (or x), then group the rows (of course with the same Class)
Expected result
Class| ValueMin | ValueMax
---------------------------
A | 1 | 3
A | 10 | 10
B | 1 | 1
For fixed intervals, we can easily use "GROUP BY". But now the grouping is based on nearby row's value. So if the values are consecutive or very close, they will be "chained together".
Thank you very much
Assuming MSSQL
You are trying to group things by gaps between values. The easiest way to do this is to use the lag() function to find the gaps:
select class, min(value) as minvalue, max(value) as maxvalue
from (select class, value,
sum(IsNewGroup) over (partition by class order by value) as GroupId
from (select class, value,
(case when lag(value) over (partition by class order by value) > value - 5
then 0 else 1
end) as IsNewGroup
from t
) t
) t
group by class, groupid;
Note that this assumes SQL Server 2012 for the use of lag() and cumulative sum.
Update:
*This answer is incorrect*
Assuming the table you gave is called sd_test, the following query will give you the output you are expecting
In short, we need a way to find what was the value on the previous row. This is determined using a join on row ids. Then create a group to see if the difference is less than 5. and then it is just regular 'Group By'.
If your version of SQL Server supports windowing functions with partitioning the code would be much more readable.
SELECT
A.CLASS
,MIN(A.VALUE) AS MIN_VALUE
,MAX(A.VALUE) AS MAX_VALUE
FROM
(SELECT
ROW_NUMBER()OVER(PARTITION BY CLASS ORDER BY VALUE) AS ROW_ID
,CLASS
,VALUE
FROM SD_TEST) AS A
LEFT JOIN
(SELECT
ROW_NUMBER()OVER(PARTITION BY CLASS ORDER BY VALUE) AS ROW_ID
,CLASS
,VALUE
FROM SD_TEST) AS B
ON A.CLASS = B.CLASS AND A.ROW_ID=B.ROW_ID+1
GROUP BY A.CLASS,CASE WHEN ABS(COALESCE(B.VALUE,0)-A.VALUE)<5 THEN 1 ELSE 0 END
ORDER BY A.CLASS,cASE WHEN ABS(COALESCE(B.VALUE,0)-A.VALUE)<5 THEN 1 ELSE 0 END DESC
ps: I think the above is ANSI compliant. So should run in most SQL variants. Someone can correct me if it is not.
These give the correct result, using the fact that you must have the same number of group starts as ends and that they will both be in ascending order.
if object_id('tempdb..#temp') is not null drop table #temp
create table #temp (class char(1),Value int);
insert into #temp values ('A',1);
insert into #temp values ('A',2);
insert into #temp values ('A',3);
insert into #temp values ('A',10);
insert into #temp values ('A',13);
insert into #temp values ('A',14);
insert into #temp values ('b',7);
insert into #temp values ('b',8);
insert into #temp values ('b',9);
insert into #temp values ('b',12);
insert into #temp values ('b',22);
insert into #temp values ('b',26);
insert into #temp values ('b',67);
Method 1 Using CTE and row offsets
with cte as
(select distinct class,value,ROW_NUMBER() over ( partition by class order by value ) as R from #temp),
cte2 as
(
select
c1.class
,c1.value
,c2.R as PreviousRec
,c3.r as NextRec
from
cte c1
left join cte c2 on (c1.class = c2.class and c1.R= c2.R+1 and c1.Value < c2.value + 5)
left join cte c3 on (c1.class = c3.class and c1.R= c3.R-1 and c1.Value > c3.value - 5)
)
select
Starts.Class
,Starts.Value as StartValue
,Ends.Value as EndValue
from
(
select
class
,value
,row_number() over ( partition by class order by value ) as GroupNumber
from cte2
where PreviousRec is null) as Starts join
(
select
class
,value
,row_number() over ( partition by class order by value ) as GroupNumber
from cte2
where NextRec is null) as Ends on starts.class=ends.class and starts.GroupNumber = ends.GroupNumber
** Method 2 Inline views using not exists **
select
Starts.Class
,Starts.Value as StartValue
,Ends.Value as EndValue
from
(
select class,Value ,row_number() over ( partition by class order by value ) as GroupNumber
from
(select distinct class,value from #temp) as T
where not exists (select 1 from #temp where class=t.class and Value < t.Value and Value > t.Value -5 )
) Starts join
(
select class,Value ,row_number() over ( partition by class order by value ) as GroupNumber
from
(select distinct class,value from #temp) as T
where not exists (select 1 from #temp where class=t.class and Value > t.Value and Value < t.Value +5 )
) ends on starts.class=ends.class and starts.GroupNumber = ends.GroupNumber
In both methods I use a select distinct to begin because if you have a dulpicate entry at a group start or end things go awry without it.
Here is one way of getting the information you are after:
SELECT Under5.Class,
(
SELECT MIN(m2.Value)
FROM MyTable AS m2
WHERE m2.Value < 5
AND m2.Class = Under5.Class
) AS ValueMin,
(
SELECT MAX(m3.Value)
FROM MyTable AS m3
WHERE m3.Value < 5
AND m3.Class = Under5.Class
) AS ValueMax
FROM
(
SELECT DISTINCT m1.Class
FROM MyTable AS m1
WHERE m1.Value < 5
) AS Under5
UNION
SELECT Over4.Class,
(
SELECT MIN(m4.Value)
FROM MyTable AS m4
WHERE m4.Value >= 5
AND m4.Class = Over4.Class
) AS ValueMin,
(
SELECT Max(m5.Value)
FROM MyTable AS m5
WHERE m5.Value >= 5
AND m5.Class = Over4.Class
) AS ValueMax
FROM
(
SELECT DISTINCT m6.Class
FROM MyTable AS m6
WHERE m6.Value >= 5
) AS Over4