Assign unique id based on combination in sql - sql

The data looks like this:
Need to assign id based on the combination of 2 columns and get the id of each value in 2 columns
final output should look like:
I tried with
WITH RNS AS (
SELECT *, ROW_NUMBER() OVER () AS rn
FROM test
),
IDS AS (
SELECT t1.coLA, t1.colB, t1.rn, MIN(COALESCE(t2.rn, t1.rn)) AS id
FROM RNS t1
LEFT JOIN RNS t2 ON t1.rn > t2.rn
AND (t1.colA = t2.colA OR t1.colA = t2.colB OR
t1.colB = t2.colA OR t1.colB = t2.colB)
GROUP BY t1.coLA, t1.colB, t1.rn
ORDER BY t1.rn
)
SELECT colA, colB, DENSE_RANK() OVER (ORDER BY id) AS id
FROM IDS
ORDER BY rn
but not working as expected

Using RECURSIVE CTE in BigQuery, you may try below query
WITH RECURSIVE test AS (
SELECT * EXCEPT(offset)
FROM UNNEST(SPLIT('abaaeghjc', '')) colA WITH OFFSET
JOIN UNNEST(SPLIT('bccdfhikl', '')) colB WITH OFFSET USING (offset)
),
IDS AS (
SELECT *, DENSE_RANK() OVER (ORDER BY colA) id
FROM test t1
WHERE NOT EXISTS (SELECT 1 FROM test t2 WHERE t1.colA = t2.colB)
UNION ALL
SELECT t.*, id FROM IDS i JOIN test t ON i.colB = t.colA
)
SELECT DISTINCT col, id FROM IDS, UNNEST([colA, colB]) col
ORDER BY 1;
Query results:
+-----+----+
| col | id |
+-----+----+
| a | 1 |
| b | 1 |
| c | 1 |
| d | 1 |
| e | 2 |
| f | 2 |
| g | 3 |
| h | 3 |
| i | 3 |
| j | 4 |
| k | 4 |
| l | 1 |
+-----+----+

Related

Join tables displaying one column based on max of another

I'm joining some tables and need to return values based upon a max value without actually returning the max value column itself.
Table1
+------------+----------+---------+
| EmployeeID | TaskTime | JobType |
+------------+----------+---------+
| 1000 | 5:14:00 | Read |
| 1000 | 5:42:00 | Write |
| 1000 | 6:14:00 | Write |
+------------+----------+---------+
Table2
+------------+-----------+-----------+---------+
| EmployeeID | ClockType | JobDetail | JobType |
+------------+-----------+-----------+---------+
| 1000 | 5:03:00 | This | Read |
| 1000 | 5:21:00 | That | Write |
+------------+-----------+-----------+---------+
What I want to now return is the JobDetail corresponding to the last ClockType before the TaskTime. So something like this:
+------------+----------+---------+-----------+
| EmployeeID | TaskTime | JobType | JobDetail |
+------------+----------+---------+-----------+
| 1000 | 5:14:00 | Read | This |
| 1000 | 5:42:00 | Write | That |
| 1000 | 6:14:00 | Write | That |
+------------+----------+---------+-----------+
My query I'm using is as follows:
SELECT DISTINCT X.*, t2.JobDetail
FROM
(
SELECT t1.EmployeeID, t1.TaskTime, t1.JobType
FROM Table1 t1
WHERE t1.EmployeeID=10000
) X
LEFT JOIN Table2 t2 on (X.EmployeeID = t2.EmployeeID) and (X.JobType = t2.JobType) and (t2.ClockTime < X.TaskTime)
This returns all rows where ClockTime < TaskTime, as expected. I just cant seem to figure out how to show only the last ClockTime before TaskTime. I tried changing my JOIN like the following but it returns all null values for JobDetail:
LEFT JOIN
(
SELECT row_number() over(partition by ClockTime order by ClockTime desc) rn, JobDetail, ClockTime as time2
FROM Table2) t2
on (X.EmployeeID = t2.EmployeeID) and (X.JobType = t2.JobType) and (t2.ClockTime < X.TaskTime)
You can try using TOP and Apply to get the max values.
select *
from Table1 as t1
cross apply ( select top 1
JobDetail
from Table2 as t2
where t1.EmployeeID = t2.EmployeeID
and t1.JobType = t2.JobType
and t1.TaskTime >= t2.ClockType
order by ClockType desc) as jt
SQL Fiddle
Using Row_Number()
select *
from ( select t1.EmployeeID
, t1.TaskTime
, t1.JobType
, t2.JobDetail
, row_number() over (partition by t1.EmployeeID, t1.JobType, t1.TaskTime
order by t2.ClockType desc) as Rn
from Table1 as t1
join Table2 as t2 on t1.EmployeeID = t2.EmployeeID
and t1.JobType = t2.JobType
and t1.TaskTime >= t2.ClockType) as t
where Rn = 1
SQL Fiddle

If 2 rows have the same ID select one with the greater other column value

I'm having difficulty getting my head round this one, which should be simple.
When selecting from the table, if multiple rows have the same ID then select the row which has a greater value in Col2.
Here is my sample table:
ID | Col2 |
----------------
123 | 1 |
123 | 2 |
1234 | 2 |
12345 | 3 |
Expected output:
ID | Col2 |
----------------
123 | 2 |
1234 | 2 |
12345 | 3 |
For this example, group by is sufficient;
select id, max(col2) as col2
from t
group by id;
If you want the row with the maximum column, then I would often recommend row_number():
select t.*
from (select t.*, row_number() over (partition by id order by col2 desc) as seqnum
from t
) t
where seqnum = 1;
However, the "old-fashioned" method might have better performance:
select t.*
from t
where t.col2 = (select max(t2.col2) from t t2 where t2.id = t.id);
NOT EXISTS operator can also be used:
SELECT * FROM Table1 t1
WHERE NOT EXISTS(
SELECT 'Anything' FROM Table1 t2
WHERE t1.id = t2.id
AND t1.Col2 < t2.col2
)
Demo: http://sqlfiddle.com/#!18/5e1d6/3
| ID | Col2 |
|-------|------|
| 123 | 2 |
| 1234 | 2 |
| 12345 | 3 |

Add randomly selected column to result

I'd like to know if there is a more optimal query to get what I want from the database.
My database schema is as follows :
Table1:
(NUM_T1C1;ID_T1C2)
Table2:
(ID_T2C1;RES_T2C2)
First, I want to get a random row defined as:
SELECT NUM_T1C1, ID_T2C1
FROM (
SELECT T1.NUM_T1C1, T2.ID_T2C1, DBMS_RANDOM.VALUE
FROM TABLE1 T1, TABLE2 T2
WHERE T1.NUM_T1C1 IS NOT NULL
AND T2.RES_T2C2 IS NOT NULL
AND T1.ID_T1C2 = T2.ID_T2C1
ORDER BY 3
) WHERE ROWNUM = 1
I will call this query: QUERY1.
My question is as follows, I want to get a row as (random NUM_T1C1 != QUERY1.NUM_T1C1; ID_T2C1 == QUERY1.ID_T2C1), so I have tried:
SELECT NUM_T1C1, ID_T2C1
FROM (
SELECT R2.NUM_T1C1, R1.ID_T2C1, DBMS_RANDOM.VALUE
FROM (
SELECT NUM_T1C1, ID_T2C1
FROM (
SELECT T1.NUM_T1C1, T2.ID_T2C1, DBMS_RANDOM.VALUE
FROM TABLE1 T1, TABLE2 T2
WHERE T1.NUM_T1C1 IS NOT NULL
AND T2.RES_T2C2 IS NOT NULL
AND T1.ID_T1C2 = T2.ID_T2C1
ORDER BY 3
) WHERE ROWNUM = 1
) R1, TABLE1 R2
WHERE R2.NUM_T1C1 <> R1.NUM_T1C1
ORDER BY 3
) WHERE ROWNUM = 1
It's working, but I think this is not the optimal way to do so.
Is there a better way to get the expected result?
EDIT:
I found another way to get those random rows but i still don't know if it's optimal:
SELECT NUM_T1C1, ID_T2C1
FROM (
SELECT R1.NUM_T1C1, R2.ID_T2C1
FROM
(
SELECT T1.NUM_T1C1, T2.ID_T2C1, DBMS_RANDOM.VALUE
FROM TABLE1 T1, TABLE2 T2
WHERE T1.NUM_T1C1 IS NOT NULL
AND T2.T2C2 IS NULL
AND T1.ID_T1C2 = T2.ID_T2C1
ORDER BY 3
) R1,
(
SELECT T2.ID_T2C1, DBMS_RANDOM.VALUE
FROM TABLE1 T1, TABLE2 T2
WHERE T1.NUM_T1C1 IS NOT NULL
AND T2.T2C2 IS NOT NULL
AND T1.ID_T1C2 = T2.ID_T2C1
ORDER BY 2
) R2
) WHERE ROWNUM = 1
Here is an example :
Table1 Table2
+----------+---------+ +---------+----------+
| NUM_T1C1 | ID_T1C2 | | ID_T2C1 | RES_T2C2 |
+----------+---------+ +---------+----------+
| 23 | 5 | | 9 | NULL |
| 521 | 4 | | 4 | DG_513 |
| 71 | 7 | | 7 | FN_731 |
| 97 | 9 | | 5 | NULL |
+----------+---------+ +---------+----------+
Result would be one of those (select one randomly) :
+----------+---------+
| NUM_T1C1 | ID_T2C1 |
+----------+---------+
| 23 | 4 |
| 23 | 7 |
| 97 | 4 |
| 97 | 7 |
+----------+---------+
Here is one way with only a single (cross) join:
SQL Fiddle
Oracle 11g R2 Schema Setup:
CREATE TABLE Table1 ( NUM_T1C1, ID_T1C2 ) AS
SELECT 23, 5 FROM DUAL UNION ALL
SELECT 521, 4 FROM DUAL UNION ALL
SELECT 71, 7 FROM DUAL UNION ALL
SELECT 97, 9 FROM DUAL;
CREATE TABLE Table2 ( ID_T2C1, RES_T2C2 ) As
SELECT 9, NULL FROM DUAL UNION ALL
SELECT 4, 'DG_513' FROM DUAL UNION ALL
SELECT 7, 'FN_731' FROM DUAL UNION ALL
SELECT 5, NULL FROM DUAL;
Query 1:
SELECT NUM_T1C1, ID_T2C1
FROM (
SELECT NUM_T1C1,
ID_T2C1,
COUNT( CASE WHEN T1.ID_T1C2 = T2.ID_T2C1 THEN 1 END )
OVER ( PARTITION BY T1.NUM_T1C1 ) AS num_matches
FROM Table1 T1
CROSS JOIN
(
SELECT *
FROM Table2
WHERE RES_T2C2 IS NOT NULL
) T2
ORDER BY DBMS_RANDOM.VALUE
)
WHERE num_matches = 0
--AND ROWNUM = 1
Results:
| NUM_T1C1 | ID_T2C1 |
|----------|---------|
| 23 | 7 |
| 97 | 4 |
| 97 | 7 |
| 23 | 4 |

PostgreSQL previous and next group value

The problem is the following:
Suppose, I have a table of such view (it is a sub-sample of the table I'm working with):
| col1 | col2 |
|------|------|
| 1 | a2 |
| 1 | b2 |
| 2 | c2 |
| 2 | d2 |
| 2 | e2 |
| 1 | f2 |
| 1 | g2 |
| 3 | h2 |
| 1 | j2 |
I need to add two new columns
prev containing the previous value in col1 not equal to the current
next containing the next value in col1 not equal to the current
If there is no previous value, prev should contain the current col1's value as well as next should contain the current value if no next values exist.
Result should have the following form:
| col1 | col2 | prev | next |
|------|------|------|------|
| 1 | a2 | 1 | 2 |
| 1 | b2 | 1 | 2 |
| 2 | c2 | 1 | 1 |
| 2 | d2 | 1 | 1 |
| 2 | e2 | 1 | 1 |
| 1 | f2 | 2 | 3 |
| 1 | g2 | 2 | 3 |
| 3 | h2 | 1 | 1 |
| 1 | j2 | 3 | 1 |
I will be grateful any help.
You can try this using combination of window functions lead, lag, first_value, last_value and sum.
select
t.col1, t.col2, n,
coalesce(first_value(y) over (partition by x order by col2), col1) prev_val,
coalesce(last_value(y2) over (partition by x order by col2
rows between current row and unbounded following), col1) next_val
from (
select
t.*,
case when col1 <> lag(col1) over (order by col2) then lag(col1) over (order by col2) end y,
case when col1 <> lead(col1) over (order by col2) then lead(col1) over (order by col2) end y2,
sum(n) over (order by col2) x
from (
select
t.*,
case when col1 <> lag(col1) over (order by col2) then 1 else 0 end n
from t
) t
) t;
It finds the lead/lag per group of rows.
If I assume that you have an id column that specifies the ordering, then this is possible. I'm just not sure this is easily expressed using window functions.
You can use correlated subqueries:
select t.*,
(select t2.col1
from t t2
where t2.id < t.id and t2.col1 <> t.col1
order by t2.id desc
fetch first 1 row only
) as prev_col1,
(select t2.col1
from t t2
where t2.id > t.id and t2.col1 <> t.col1
order by t2.id asc
fetch first 1 row only
) as prev_col2
from t;
You can add the coalece() for missing previous and next values. That is not the interesting part of the problem.
WITH cte AS (
SELECT row_number() over() rowid, *
FROM unnest(array[1,1,2,2,2,1,1,3,1], array['a2','b2','c2','d2','e2','f2','g2','h2','j2']) t(col1,col2)
)
SELECT t.col1,
t.col2,
COALESCE(prev.col1,t.col1) prev,
COALESCE("next".col1,t.col1) "next"
FROM cte t
LEFT JOIN LATERAL (SELECT prev.col1
FROM cte prev
WHERE prev.rowid < t.rowid
AND prev.col1 != t.col1
ORDER BY prev.rowid DESC
LIMIT 1
) prev ON True
LEFT JOIN LATERAL (SELECT "next".col1
FROM cte "next"
WHERE "next".rowid > t.rowid
AND "next".col1 != t.col1
ORDER BY "next".rowid ASC
LIMIT 1
) "next" ON True

How to get Previous Value for Null Values

I have the Below Data in my Table.
| Id | FeeModeId |Name | Amount|
---------------------------------------------
| 1 | NULL | NULL | 20 |
| 2 | 1 | Quarter-1 | 5000 |
| 3 | NULL | NULL | 2000 |
| 4 | 2 | Quarter-2 | 8000 |
| 5 | NULL | NULL | 5000 |
| 6 | NULL | NULL | 2000 |
| 7 | 3 | Quarter-3 | 6000 |
| 8 | NULL | NULL | 4000 |
How to write such query to get below output...
| Id | FeeModeId |Name | Amount|
---------------------------------------------
| 1 | NULL | NULL | 20 |
| 2 | 1 | Quarter-1 | 5000 |
| 3 | 1 | Quarter-1 | 2000 |
| 4 | 2 | Quarter-2 | 8000 |
| 5 | 2 | Quarter-2 | 5000 |
| 6 | 2 | Quarter-2 | 2000 |
| 7 | 3 | Quarter-3 | 6000 |
| 8 | 3 | Quarter-3 | 4000 |
Since you are on SQL Server 2012... here is a version that uses that. It might be faster than other solutions but you have to test that on your data.
sum() over() will do a running sum ordered by Id adding 1 when there are a value in the column and keeping the current value for null values. The calculated running sum is then used to partition the result in first_value() over(). The first value ordered by Id for each "group" of rows generated by the running sum has the value you want.
select T.Id,
first_value(T.FeeModeId)
over(partition by T.NF
order by T.Id
rows between unbounded preceding and current row) as FeeModeId,
first_value(T.Name)
over(partition by T.NS
order by T.Id
rows between unbounded preceding and current row) as Name,
T.Amount
from (
select Id,
FeeModeId,
Name,
Amount,
sum(case when FeeModeId is null then 0 else 1 end)
over(order by Id) as NF,
sum(case when Name is null then 0 else 1 end)
over(order by Id) as NS
from YourTable
) as T
SQL Fiddle
Something that will work pre SQL Server 2012:
select T1.Id,
T3.FeeModeId,
T2.Name,
T1.Amount
from YourTable as T1
outer apply (select top(1) Name
from YourTable as T2
where T1.Id >= T2.Id and
T2.Name is not null
order by T2.Id desc) as T2
outer apply (select top(1) FeeModeId
from YourTable as T3
where T1.Id >= T3.Id and
T3.FeeModeId is not null
order by T3.Id desc) as T3
SQL Fiddle
Please try:
select
a.ID,
ISNULL(a.FeeModeId, x.FeeModeId) FeeModeId,
ISNULL(a.Name, x.Name) Name,
a.Amount
from tbl a
outer apply
(select top 1 FeeModeId, Name
from tbl b
where b.ID<a.ID and
b.Amount is not null and
b.FeeModeId is not null and
a.FeeModeId is null order by ID desc)x
OR
select
ID,
ISNULL(FeeModeId, bFeeModeId) FeeModeId,
ISNULL(Name, bName) Name,
Amount
From(
select
a.ID , a.FeeModeId, a.Name, a.Amount,
b.ID bID, b.FeeModeId bFeeModeId, b.Name bName,
MAX(b.FeeModeId) over (partition by a.ID) mx
from tbl a left join tbl b on b.ID<a.ID
and b.FeeModeId is not null
)x
where bFeeModeId=mx or mx is null
SELECT
T.ID,
ISNULL(T.FeeModeId,
(SELECT TOP 1 FeeModeId
FROM TableName AS T1
WHERE ID < T.ID AND FeeModeId IS NOT NULL
ORDER BY ID DESC)) AS FeeModeId,
ISNULL(Name,
(SELECT TOP 1 Name
FROM TableName
WHERE ID < T.ID AND Name IS NOT NULL
ORDER BY ID DESC)) AS Name,
T.Amount
FROM
TableName AS T
try this -
SELECT Id,
CASE
WHEN Feemodeid IS NOT NULL THEN
Feemodeid
ELSE
(SELECT Feemodeid
FROM Table_Name t_2
WHERE t_2.Id = (SELECT MAX(Id)
FROM Table_Name t_3
WHERE t_3.Id < t_1.Id
AND Feemodeid IS NOT NULL))
END Feemodeid,
CASE
WHEN NAME IS NOT NULL THEN
NAME
ELSE
(SELECT NAME
FROM Table_Name t_2
WHERE t_2.Id = (SELECT MAX(Id)
FROM Table_Name t_3
WHERE t_3.Id < t_1.Id
AND NAME IS NOT NULL))
END NAME,
Amount
FROM Table_Name t_1
id name
1 toto
2 NULL
3 NULL
4 titi
5 NULL
6 NULL
7 tutu
8 NULL
9 NULL
SELECT
id_table
,name
FROM
(
SELECT
T_01.id AS 'id_table'
,max(T_02.id) AS 'id_name'
FROM
names AS T_01
cross join
(
SELECT
id
,name
FROM
names
WHERE
name IS NOT NULL
) AS T_02
WHERE
T_02.id <= T_01.id
GROUP BY
T_01.id
) AS tt02
left join names
ON names.id = tt02.id_name
id_table name
1 toto
2 toto
3 toto
4 titi
5 titi
6 titi
7 tutu
8 tutu
9 tutu