Elegant and efficient way to write minimum in bigquery - sql

I have a table structure like as shown below
What I am trying to do is find the minimum for each group of items. I have to keep finding minimums for different group of itemids. Though my code works, am sure this isn't the elegant and efficient way to do it.
finding minimum WHERE itemid IN (1,2)
select subject_id,icu_id,value as min_val_1 FROM
(SELECT c.subject_id,c.time_1,d.min_time,d.max_time,c.value,c.icu_id,
row_number() OVER (PARTITION BY c.subject_id ORDER BY c.value,c.time_1) AS rank
from table_1 d
left join table_2 c
on c.subject_id = d.subject_id and (c.icu_id = d.icu_id_1 or c.icu_id = d.icu_id_2)
where c. itemid in
(1,2)) SBP
where rank = 1
order by subject_id,charttime
finding minimum where itemid in (3,4)
select subject_id,icu_id,value as min_val_2 FROM
(SELECT c.subject_id,c.time_1,d.min_time,d.max_time,c.value,c.icu_id,
row_number() OVER (PARTITION BY c.subject_id ORDER BY c.value,c.time_1) AS rank
from table_1 d
left join table_2 c
on c.subject_id = d.subject_id and (c.icu_id = d.icu_id_1 or c.icu_id = d.icu_id_2)
where c. itemid in
(3,4)) SBP
where rank = 1
order by subject_id
As you can see everything is same. Only difference is the itemid. Is there any elegant way to join/merge these two? can you help me?
I expect my output to be like this?

Below is for BigQuery Standard SQL and leaves your original query fully intact while just adding missing peaces (added comments so you can see those few changes/additions)
#standardSQL
SELECT
subject_id,
icu_id,
MAX(IF(grp = 1, value, NULL)) AS min_val_1, -- changed
MAX(IF(grp = 2, value, NULL)) AS min_val_2 -- changed
FROM (
SELECT
c.subject_id,
c.time_1,
d.min_time,
d.max_time,
c.value,c.icu_id,
-- in below row - added element to PARTITION BY
ROW_NUMBER() OVER (PARTITION BY c.subject_id, CASE WHEN c.itemid IN (1, 2) THEN 1 WHEN c.itemid IN (3, 4) THEN 2 END ORDER BY c.value, c.time_1) AS RANK,
CASE WHEN c.itemid IN (1, 2) THEN 1 WHEN c.itemid IN (3, 4) THEN 2 END grp -- added
FROM table_1 d
LEFT JOIN table_2 c
ON c.subject_id = d.subject_id AND (c.icu_id = d.icu_id_1 OR c.icu_id = d.icu_id_2)
WHERE c.itemid IN (1, 2, 3, 4) -- changed
) SBP
WHERE RANK = 1
GROUP BY subject_id, icu_id -- added
ORDER BY subject_id
If to apply to sample data from your question - result is
Row subject_id icu_id min_val_1 min_val_2
1 124 A1 10 19
2 199 B2 21 21

I think you just want conditional aggregation:
select t2.subject_id, t2.icu_id,
min(case when t2.item_id in (1, 2) then t2.value end) as value_1,
min(case when t2.item_id in (3, 4) then t2.value end) as value_2
from table_2 t2
group by t2.subject_id, t2.icu_id;

Related

Swap two adjacent rows of a column in sql

I'm trying to solve this following problem:
Write a sql query to swap two adjacent rows in a column of a table.
Input table
Name Id
A 1
B 2
C 3
D 4
E 5
Output table
Name Id
A 2
B 1
C 4
D 3
E 5
Description:- 1 is associated with A and 2 with B, swap them, thus now 1 is associated with B and 2 with A, Similarly do for C and D, Since E doesn't has any pair, leave it as it is.
Note:- This may be solved using CASE Statements, but I am trying for a generalized solution, Say currently it is only 5 rows, it may be 10,20 etc..
Eg:
SELECT
*,CASE WHEN Name = A then 2 ELSEIF Name = B then 1 etc...
FROM YourTable
You can use window functions to solve this.
on MySQL (>= 8.0):
SELECT ID, IFNULL(CASE WHEN t.rn % 2 = 0 THEN LAG(Name) OVER (ORDER BY ID) ELSE LEAD(Name) OVER (ORDER BY ID) END, Name) AS Name
FROM (
SELECT ID, Name, ROW_NUMBER() OVER (ORDER BY ID) AS rn
FROM table_name
) t
demo on dbfiddle.uk
on SQL-Server:
SELECT ID, ISNULL(CASE WHEN t.rn % 2 = 0 THEN LAG(Name) OVER (ORDER BY ID) ELSE LEAD(Name) OVER (ORDER BY ID) END, Name) AS Name
FROM (
SELECT ID, Name, ROW_NUMBER() OVER (ORDER BY ID) AS rn
FROM table_name
) t
demo on dbfiddle.uk
If you have sql-server, you can try this.
DECLARE #YourTable TABLE (Name VARCHAR(10), Id INT)
INSERT INTO #YourTable VALUES
('A', 1),
('B', 2),
('C', 3),
('D', 4),
('E', 5)
;WITH CTE AS (
SELECT *, ROW_NUMBER()OVER(ORDER BY Name) AS RN FROM #YourTable
)
SELECT T1.Name, ISNULL(T2.Id, T1.Id) Id FROM CTE T1
LEFT JOIN CTE T2 ON T1.RN + CASE WHEN T1.RN%2 = 0 THEN - 1 ELSE 1 END = T2.RN
Result:
Name Id
---------- -----------
A 2
B 1
C 4
D 3
E 5
You didn't specify your DBMS, but the following is standard ANSI SQL.
You can use a values() clause to provide the mapping of the IDs and then join against that:
with id_map (source_id, target_id) as (
values
(1, 2),
(2, 1)
)
select t.name, coalesce(m.target_id, t.id) as mapped_id
from the_table t
left join id_map m on m.source_id = t.id
order by name;
Alternatively if you only want to specify the mapping once for one direction, you can use this:
with id_map (source_id, target_id) as (
values
(1, 2)
)
select t.name,
case id
when m.source_id then m.target_id
when m.target_id then m.source_id
else id
end as mapped_id
from the_table t
left join id_map m on t.id in (m.source_id, m.target_id)
order by name;
Online example: https://rextester.com/FBFH52231

In SQL how to increment a varibale in case statement

So I have a table A as follows
Message code trig timestamp
a x 1 T1
a x 1 T2
a x 0 T3
b y 1 T4
b y 1 T5
a x 1 T6
I want the following result
Message code trig timestamp groupbycolumn
a x 1 T1 1
a x 1 T2 1
a x 0 T3 2
b y 1 T4 3
b y 1 T5 3
a x 1 T6 4
I need to group the rows according to message, code and trigg but ordered by the timestamp. So if a new message, code and trigg value comes then it should have a new number in the groupby column. Note that a,x 1 in the first line has a groupby value 1 and the one in the last has 4.
declare #chngeVal int;
set #chngeVal=0;
select n.Message,n.code,n.trig,
case when n.Message<>n.nextMessage or n.code<>n.nextCode or n.trig<>n.nextTrigg
then #chngeVal+1
else #chngeVal
end as groupbycolumn,
n.timeStamp
from ( select Message,code,trig,timestamp,
lead(Message) over (order by timestamp asc) as nextMessage,
lead(code) over (order by timestamp asc) as nextCode,
lead(trig) over (order by timestamp asc) as nextTrig
from A ) n
If I could get the case to do a #chngeVal= #chngeVal+1 it would work, but I cannot do that in case. Would anybody know how to change the value of a variable in a query.
Any idea would be much appreciated.
I broke the solution into a three part query using two CTEs:
CreateIds produces ids I use to identify the rows in the next two parts.
Firstrows gets only the rows that start each group, and determines the unique id for each group as well as the row id that starts the next group (NexdtGroupRowId).
Finally, I produce the result by joining Firstrows to a range of rows from CreateIds that have a rowId between the rowId of the first row and the rowId of NextGroupRowId - 1.
My feeling is that this is inefficient as heck, and there's a way to do this with a recursive CTE. But since you started using window functions I just went in that direction.
WITH createIds AS (
SELECT *
, ROW_NUMBER() OVER(ORDER BY [timestamp]) AS RowId
, DENSE_RANK() OVER(ORDER BY Message, code, trig DESC) AS GroupId
FROM src
)
, firstrows AS (
SELECT a.RowId
, ROW_NUMBER() OVER (ORDER BY a.RowId) AS OrderedGroupId
, LEAD(a.RowId, 1, NULL) OVER (ORDER BY a.RowId) NextGroupRowId
FROM createIds a
LEFT JOIN createIds b ON b.RowId = a.RowId - 1
WHERE a.GroupId != b.GroupId OR b.GroupId IS NULL
)
SELECT a.[Message], a.code, a.trig, a.[timestamp], r1.OrderedGroupId
FROM firstrows r1
INNER JOIN createIds a ON a.RowId >= r1.RowId AND (r1.NextGroupRowId IS NULL OR a.RowId < r1.NextGroupRowId)
ORDER BY a.[timestamp]
You can use the difference of row_numbers() or lag() and cmulative sums:
select t.*,
sum(case when message = prev_message and code = prev_code and trig = prev_trig
then 0 else 1
end) over (order by timestamp) as groupbycolumn
from (select t.*,
lag(message) over (order by timestamp) as prev_message,
lag(code) over (order by timestamp) as prev_code,
lag(trig) over (order by timestamp) as prev_trig
from a
) a

SQL Joining table with Min and Sec Min row

I want to join table 1 with table2 twice becuase I need to get the first minimum record and the second minimum. However, I can only think of using a cte to get the second minimum record. Is there a better way to do it?
Here is the table table:
I want to join Member with output table FirstRunID whose Output value is 1 and second RunID whose Output value is 0
current code I am using:
select memid, a.runid as aRunid,b.runid as bRunid
into #temp
from FirstTable m inner join
(select min(RunID), MemID [SecondTable] where ouput=1 group by memid)a on m.memid=a.memid
inner join (select RunID, MemID [SecondTable] where ouput=0 )b on m.memid=a.memid and b.runid>a.runid
with cte as
(
select row_number() over(partition by memid, arunid order by brunid ),* from #temp
)
select * from cte where n=1
You can use outer apply operator for this:
select * from t1
outer apply(select top 1 t2.runid from t2
where t1.memid = t2.memid and t2.output = 1 order by t2.runid) as oa1
outer apply(select top 1 t2.runid from t2
where t1.memid = t2.memid and t2.output = 0 order by t2.runid) as oa2
You can do this with conditional aggregation. Based on your results, you don't need the first table:
select t2.memid,
max(case when output = 1 and seqnum = 1 then runid end) as OutputValue1,
max(case when output = 0 and seqnum = 2 then runid end) as OutputValue2
from (select t2.*,
row_number() over (partition by memid, output order by runid) a seqnum
from t2
) t2
group by t2.memid;
declare #FirstTable table
(memid int, name varchar(20))
insert into #firsttable
values
(1,'John'),
(2,'Victor')
declare #secondtable table
(runid int,memid int,output int)
insert into #secondtable
values
(1,1,0),(1,2,1),(2,1,1),(2,2,1),(3,1,1),(3,2,0),(4,1,0),(4,2,0)
;with cte as
(
SELECT *, row_number() over (partition by memid order by runid) seq --sequence
FROM #SECONDTABLE T
where t.output = 1
union all
SELECT *, row_number() over (partition by memid order by runid) seq --sequence
FROM #SECONDTABLE T
where t.output = 0 and
t.runid > (select min(x.runid) from #secondtable x where x.memid = t.memid and x.output = 1 group by x.memid) --lose any O output record where there is no prior 1 output record
)
select cte1.memid,cte1.runid,cte2.runid from cte cte1
join cte cte2 on cte2.memid = cte1.memid and cte2.seq = cte1.seq
where cte1.seq = 1 --remove this test if you want matched pairs
and cte1.output = 1 and cte2.output = 0

concatenate recursive cross join

I need to concatenate the name in a recursive cross join way. I don't know how to do this, I have tried a CTE using WITH RECURSIVE but no success.
I have a table like this:
group_id | name
---------------
13 | A
13 | B
19 | C
19 | D
31 | E
31 | F
31 | G
Desired output:
combinations
------------
ACE
ACF
ACG
ADE
ADF
ADG
BCE
BCF
BCG
BDE
BDF
BDG
Of course, the results should multiply if I add a 4th (or more) group.
Native Postgresql Syntax:
SqlFiddleDemo
WITH RECURSIVE cte1 AS
(
SELECT *, DENSE_RANK() OVER (ORDER BY group_id) AS rn
FROM mytable
),cte2 AS
(
SELECT
CAST(name AS VARCHAR(4000)) AS name,
rn
FROM cte1
WHERE rn = 1
UNION ALL
SELECT
CAST(CONCAT(c2.name,c1.name) AS VARCHAR(4000)) AS name
,c1.rn
FROM cte1 c1
JOIN cte2 c2
ON c1.rn = c2.rn + 1
)
SELECT name as combinations
FROM cte2
WHERE LENGTH(name) = (SELECT MAX(rn) FROM cte1)
ORDER BY name;
Before:
I hope if you don't mind that I use SQL Server Syntax:
Sample:
CREATE TABLE #mytable(
ID INTEGER NOT NULL
,TYPE VARCHAR(MAX) NOT NULL
);
INSERT INTO #mytable(ID,TYPE) VALUES (13,'A');
INSERT INTO #mytable(ID,TYPE) VALUES (13,'B');
INSERT INTO #mytable(ID,TYPE) VALUES (19,'C');
INSERT INTO #mytable(ID,TYPE) VALUES (19,'D');
INSERT INTO #mytable(ID,TYPE) VALUES (31,'E');
INSERT INTO #mytable(ID,TYPE) VALUES (31,'F');
INSERT INTO #mytable(ID,TYPE) VALUES (31,'G');
Main query:
WITH cte1 AS
(
SELECT *, rn = DENSE_RANK() OVER (ORDER BY ID)
FROM #mytable
),cte2 AS
(
SELECT
TYPE = CAST(TYPE AS VARCHAR(MAX)),
rn
FROM cte1
WHERE rn = 1
UNION ALL
SELECT
[Type] = CAST(CONCAT(c2.TYPE,c1.TYPE) AS VARCHAR(MAX))
,c1.rn
FROM cte1 c1
JOIN cte2 c2
ON c1.rn = c2.rn + 1
)
SELECT *
FROM cte2
WHERE LEN(Type) = (SELECT MAX(rn) FROM cte1)
ORDER BY Type;
LiveDemo
I've assumed that the order of "cross join" is dependent on ascending ID.
cte1 generate DENSE_RANK() because your IDs contain gaps
cte2 recursive part with CONCAT
main query just filter out required length and sort string
The recursive query is a bit simpler in Postgres:
WITH RECURSIVE t AS ( -- to produce gapless group numbers
SELECT dense_rank() OVER (ORDER BY group_id) AS grp, name
FROM tbl
)
, cte AS (
SELECT grp, name
FROM t
WHERE grp = 1
UNION ALL
SELECT t.grp, c.name || t.name
FROM cte c
JOIN t ON t.grp = c.grp + 1
)
SELECT name AS combi
FROM cte
WHERE grp = (SELECT max(grp) FROM t)
ORDER BY 1;
The basic logic is the same as in the SQL Server version provided by #lad2025, I added a couple of minor improvements.
Or you can use a simple version if your maximum number of groups is not too big (can't be very big, really, since the result set grows exponentially). For a maximum of 5 groups:
WITH t AS ( -- to produce gapless group numbers
SELECT dense_rank() OVER (ORDER BY group_id) AS grp, name AS n
FROM tbl
)
SELECT concat(t1.n, t2.n, t3.n, t4.n, t5.n) AS combi
FROM (SELECT n FROM t WHERE grp = 1) t1
LEFT JOIN (SELECT n FROM t WHERE grp = 2) t2 ON true
LEFT JOIN (SELECT n FROM t WHERE grp = 3) t3 ON true
LEFT JOIN (SELECT n FROM t WHERE grp = 4) t4 ON true
LEFT JOIN (SELECT n FROM t WHERE grp = 5) t5 ON true
ORDER BY 1;
Probably faster for few groups. LEFT JOIN .. ON true makes this work even if higher levels are missing. concat() ignores NULL values. Test with EXPLAIN ANALYZE to be sure.
SQL Fiddle showing both.

Using SQL to get the previous rows data

I have a requirement where I need to get data from the previous row to use in a calculation to give a status to the current row. It's a history table. The previous row will let me know if a data has changed in a date field.
I've looked up using cursors and it seems a little complicated. Is this the best way to go?
I've also tried to assgin a value to a new field...
newField =(Select field1 from Table1 where "previous row") previous row is where I seem to get stuck. I can't figure out how to select the row beneath the current row.
I'm using SQL Server 2005
Thanks in advance.
-- Test data
declare #T table (ProjectNumber int, DateChanged datetime, Value int)
insert into #T
select 1, '2001-01-01', 1 union all
select 1, '2001-01-02', 1 union all
select 1, '2001-01-03', 3 union all
select 1, '2001-01-04', 3 union all
select 1, '2001-01-05', 4 union all
select 2, '2001-01-01', 1 union all
select 2, '2001-01-02', 2
-- Get CurrentValue and PreviousValue with a Changed column
;with cte as
(
select *,
row_number() over(partition by ProjectNumber order by DateChanged) as rn
from #T
)
select
C.ProjectNumber,
C.Value as CurrentValue,
P.Value as PreviousValue,
case C.Value when P.Value then 0 else 1 end as Changed
from cte as C
inner join cte as P
on C.ProjectNumber = P.ProjectNumber and
C.rn = P.rn + 1
-- Count the number of changes per project
;with cte as
(
select *,
row_number() over(partition by ProjectNumber order by DateChanged) as rn
from #T
)
select
C.ProjectNumber,
sum(case C.Value when P.Value then 0 else 1 end) as ChangeCount
from cte as C
inner join cte as P
on C.ProjectNumber = P.ProjectNumber and
C.rn = P.rn + 1
group by C.ProjectNumber
This really depends on what tells you a row is a "Previous Row". however, a self join should do what you want:
select *
from Table1 this
join Table2 prev on this.incrementalID = prev.incrementalID+1
If you have the following table
CREATE TABLE MyTable (
Id INT NOT NULL,
ChangeDate DATETIME NOT NULL,
.
.
.
)
The following query will return the previous record for any record from MyTable.
SELECT tbl.Id,
tbl.ChangeDate,
hist.Id,
hist.ChangeDate
FROM MyTable tbl
INNER JOIN MyTable hist
ON hist.Id = tbl.Id
AND hiost.ChangeDate = (SELECT MAX(ChangeDate)
FROM MyTable sub
WHERE sub.Id = tbl.Id AND sub.ChangeDate < tbl.ChangeDate)