MS SQL Server: advanced substring, splitting one string column to multimle columns - sql

I have a column in a table, I would like to separate the contents into different columns, locations of dash is not always same. Please advise more simple code.
Thanks.
COL1
AGH-WH6X-23-4534-OPDQE-QADF
xxx-xxxx-xxxx-xxxx-xxx-xxxx
xxx-xxxx-xxxxxx-xxxx-xxxxx-xx
x-xx-xxxx-xxxxxx-xxxx-xxx-xx
xxx-xx-xxxx-xxxx-xxx-xxx-x
xxx-xxxx-xxxxxx-xxxxxx-xxx-xx
x-xxx-xxxx-xxxx-xxxxxx-xxx-xx
xxx-xxxxx-xxxx-xxxxxx-xxx-xx
Expectation:
COL2 COL3 COL4 COL5 COL6 COL7
AGH WH6X 23 4534 OPDQE QADF
xxx xxxx xxxx xxxx xxx NULL

One method is a recursive CTE with aggregation:
with cte as (
select col1, left(col1, charindex('-', col1 + '-') - 1) as val,
1 as level,
substring(col1, charindex('-', col1) + 1, len(col1)) as rest
from t
union all
select col1, left(rest, charindex('-', rest + '-') - 1),
level + 1,
substring(rest, charindex('-', rest + '-') + 1, len(col1))
from cte
where rest > ''
)
select max(case when level = 1 then val end) as val1,
max(case when level = 2 then val end) as val2,
max(case when level = 3 then val end) as val3,
max(case when level = 4 then val end) as val4,
max(case when level = 5 then val end) as val5,
max(case when level = 6 then val end) as val6,
max(case when level = 7 then val end) as val7
from cte
group by col1;

Related

Oracle self join starting with minimum value (yearmonths) for each partition

I have this table:
COL1 COL2 COL3
--------------------
A 202011 VAL1
A 202012 VAL2
A 202205 VAL3
B 202111 VAL4
B 202201 VAL5
B 202202 VAL6
COL1 COL2 COL3
--------------------
A 202011 VAL1
A 202012 VAL2
A 202101 NULL
B 202111 VAL4
B 202112 NULL
B 202201 VAL5
EDIT: I have the dates too if that's easier to play with than the ISO weeks.
Logic:
with the smallest COL2 value for each partition of COL1, take the following 3 yearmonths (YYYYMM) and, if the combination COL1 and COL2 present in the first table, show COL3 and NULL otherwise.
I have attempted to create this query to replicate what I would need in terms of adding up months, but not sure if it will be useful though.
WITH level_aux ( lev ) AS (
SELECT
to_number('2020'
|| lpad(level, 2, 0)) + 7 - 1
FROM
dual
CONNECT BY
level <= 12
), level_final AS (
SELECT
lev,
substr(lev, 1, 4) +
CASE
WHEN mod(substr(lev, 5, 2), 13) = 0 THEN
1
ELSE
0
END
||
CASE
WHEN substr(lev, 5, 2) < 13 THEN
lpad(mod(substr(lev, 5, 2), 13), 2, 0)
ELSE
lpad(mod(substr(lev, 5, 2), 13) + 1, 2, 0)
END
h
FROM
level_aux
)
SELECT
*
FROM
level_final;
Use MIN() as a window function:
select t.*,
(case when col2 < add_months(min(col2) over (partition by col1), 3)
then col3
end) as imputed_col3
from t;
Note: If col2 is not a date, you can convert it:
select t.*,
(case when to_date(col2, 'YYYYMM') < add_months(min(to_date(col2, 'YYYYMM')) over (partition by col1), 3)
then col3
end) as imputed_col3
from t;

sort return data with SQL in SQLite order by row

I have a DB which has 8 columns, all are integers range 1~99:
Col1 Col2 Col3 Col4 Col5 Col6 Col7 Col8
1 13 24 18 35 7 50 88
13 4 33 90 78 42 26 57
22 18 30 3 57 90 71 8
...
When I perform "select Col1, Col2, Col3, Col5, Col6, Col7, Col8 from MyTable where Col4>10"
I would like the return data is sorted, e.g. the first row should return like this:
1,7,13,24,35,50,88
However, "order by" only work on "Column", is there anyway to preform this in SQL ? Or need a temp table/max() to perform this ? Thanks.
Regds
LAM Chi-fung
Your current design is not appropriate for this requirement.
Consider changing it to something like this:
CREATE TABLE tablename (
id INTEGER, -- corresponds to the rowid of your current table
col_id INTEGER NOT NULL, -- 1-8, corresponds to the number of each of the columns ColX
value INTEGER NOT NULL -- corresponds to the value of each of the columns ColX
);
You can populate it from your current table:
INSERT INTO tablename (id, col_id, value)
SELECT rowid, 1, Col1 FROM MyTable UNION ALL
SELECT rowid, 2, Col2 FROM MyTable UNION ALL
SELECT rowid, 3, Col3 FROM MyTable UNION ALL
SELECT rowid, 4, Col4 FROM MyTable UNION ALL
SELECT rowid, 5, Col5 FROM MyTable UNION ALL
SELECT rowid, 6, Col6 FROM MyTable UNION ALL
SELECT rowid, 7, Col7 FROM MyTable UNION ALL
SELECT rowid, 8, Col8 FROM MyTable
Now you can get the result that you want with GROUP_CONCAT() window function and aggregation:
SELECT result
FROM (
SELECT id, GROUP_CONCAT(value) OVER (PARTITION BY id ORDER BY value) result
FROM tablename
WHERE id IN (SELECT id FROM tablename WHERE col_id = 4 AND value > 10)
)
GROUP BY id
HAVING MAX(LENGTH(result))
See the demo.
Results:
result
1,7,13,18,24,35,50,88
4,13,26,33,42,57,78,90
Fix your data model! You should not be storing values in columns. You should be storing them in rows.
That is, SQL doesn't "sort columns". It deals with data in rows!
You can do what you want by unpivoting the data into rows, calculating the new order, and then reaggregating:
with t as (
select row_number() over () as id,
t.*
from mytable t
where col4 > 10
)
select max(case when seqnum = 1 then col end) as col1,
max(case when seqnum = 2 then col end) as col2,
max(case when seqnum = 3 then col end) as col3,
max(case when seqnum = 4 then col end) as col4,
max(case when seqnum = 5 then col end) as col5,
max(case when seqnum = 6 then col end) as col6,
max(case when seqnum = 7 then col end) as col7,
max(case when seqnum = 8 then col end) as col8
from (select row_number() over (partition by id order by col) as seqnum,
x.*
from (select id, col1 as col from t union all
select id, col2 as col from t union all
select id, col3 as col from t union all
select id, col4 as col from t union all
select id, col5 as col from t union all
select id, col6 as col from t union all
select id, col7 as col from t union all
select id, col8 as col from t
) x
) x
group by id;

SQL Subquery with delimiter

I need to be able to split one string by the delimiter * into separate columns without including *
The column y from table x looks like this:
column y
*1HS*AB*GXX*123*02*PA45*2013-08-10*
*1R1*B*GX*123*02*PA45*2013-08-10*
*1HS*B*GX*13*01*PA45*2013-08-01*
*1P*C*GXX*123*02*PA45*2013-08-10*
STRING_SPLIT is not avalible
The outcome should be this:
Column1 Column2 Column3 Column4 Column5 Column6 Column7
1HS AB GXX 123 2 PA45 10-08-2013
1R1 B GX 123 2 PA45 10-08-2013
1HS B GX 13 1 PA45 01-08-2013
1P C GXX 123 2 PA45 10-08-2013
will you use the below query..
select RTRIM (REGEXP_SUBSTR (column y, '[^,]*,', 1, 1), ',') AS column 1
, RTRIM (REGEXP_SUBSTR (column y, '[^,]*,', 1, 2), ',') AS column 2
, RTRIM (REGEXP_SUBSTR (column y, '[^,]*,', 1, 3), ',') AS column 3
, LTRIM (REGEXP_SUBSTR (column y, ',[^,]*', 1, 3), ',') AS column 4
from YOUR_TABLE
Unfortunately, string_split() does not guarantee that it preserves the ordering of the values. And, SQL Server does not offer other useful string functions.
So, I recommend using recursive CTEs for this purpose:
with t as (
select *
from (values ('*1HS*AB*GXX*123*02*PA45*2013-08-10*'), ('1HSB*GX*13*01*PA45*2013-08-01*')) v(str)
),
cte as (
select convert(varchar(max), null) as val, 0 as lev, convert(varchar(max), str) as rest,
row_number() over (order by (select null)) as id
from t
union all
select left(rest, charindex('*', rest) - 1), lev + 1, stuff(rest, 1, charindex('*', rest) + 1, ''), id
from cte
where rest <> '' and lev < 10
)
select max(case when lev = 1 then val end) as col1,
max(case when lev = 2 then val end) as col2,
max(case when lev = 3 then val end) as col3,
max(case when lev = 4 then val end) as col4,
max(case when lev = 5 then val end) as col5,
max(case when lev = 6 then val end) as col6,
max(case when lev = 7 then val end) as col7
from cte
where lev > 0
group by cte.id;
Here is a db<>fiddle.
Assuming you can add a table valued function to your database then Jeff Moden's string split function is the best approach I've encountered. It will allow you to maintain order as well.
Find details here

SQL issues, How can i separate this row of data into multiple column. i want it to separate by the pipe | and put it in its columns

I am trying to separate a one-row data into multiple columns, and I have a pipe | between each data that I wanted to be separated.
I want this one-row data to split into a multi column
1234 |abcd | 123abc | some | more | 0922
to Be like this
col1 col2 col3 col4 col5 col6
1234 abcd 123abc some more 0922
select
[Col1] = SUBSTRING(PostData,1,CHARINDEX('|',PostData)-1) --does what it suppose to
,[Col1] = SUBSTRING(PostData,CHARINDEX('|',PostData)+1,CHARINDEX('|',PostData,CHARINDEX('|',PostData)+1)-CHARINDEX('|', PostData)-1) --does what it suppose to
,[Col1] = SUBSTRING(PostData,CHARINDEX('|',PostData,CHARINDEX('|',PostData)+1)-CHARINDEX('|', PostData)-1,CHARINDEX('|',PostData,CHARINDEX('|',PostData)+1)-CHARINDEX('|', PostData)-1) --i need help with this
,[Col1] = SUBSTRING(PostData,CHARINDEX('|',PostData)+1,CHARINDEX('|',PostData,CHARINDEX('|',PostData)+1)-CHARINDEX('|', PostData)-1)--i need help with this
,[Col1] = SUBSTRING(PostData,CHARINDEX('|',PostData)+1,CHARINDEX('|',PostData,CHARINDEX('|',PostData)+1)-CHARINDEX('|', PostData)-1)--i need help with this
,[Col1] = SUBSTRING(PostData,CHARINDEX('|',PostData)+1,CHARINDEX('|',PostData,CHARINDEX('|',PostData)+1)-CHARINDEX('|', PostData)-1) --i need help with this
,[ID] = REVERSE(SUBSTRING(reverse(PostData),0,CHARINDEX('|',REVERSE(PostData) --does what
it suppose to
from tableName
col1 col2 col3 col4 col5 col6
1234 abcd 123abc some more 0922
what I am getting is:
col1 col2 col3 col4 col5 col6
1234 abcd abcd abcd abcd 0922
One method uses a recursive CTE:
with cte as (
select convert(varchar(max), left(row, charindex('|', row + '|') - 1)) as val,
convert(varchar(max), stuff(row, 1, charindex('|', row) + 1, '')) as rest,
1 as lev, row
from (values ('1234 |abcd | 123abc | some | more | 0922')) v(row)
union all
select convert(varchar(max), left(rest, charindex('|', rest + '|') - 1)) as val,
convert(varchar(max), stuff(rest, 1, charindex('|', rest + '|') + 1, '')) as rest,
lev + 1 as lev, row
from cte
where lev < 5 and rest <> ''
)
select max(case when lev = 1 then val end) as col1,
max(case when lev = 2 then val end) as col2,
max(case when lev = 3 then val end) as col3,
max(case when lev = 4 then val end) as col4,
max(case when lev = 5 then val end) as col5
from cte
group by row;
Here is a db<>fiddle.

sql different columns compare

I have a database table with six columns(column1,column2...column6). The data are ordered and none repeat. These are the data in the database table
col1 col2 col3 col4 col5 col6
--------------------------------------------
1 3 4 6 7 8
2 5 7 9 10 14
I want to write sql to compare the data, if I have the data which one different number in varied/shift position. These are the parameters in the sql select statement.
col1 col2 col3 col4 col5 col6
--------------------------------------------
2 3 4 6 7 8
3 4 6 7 8 9
1 2 4 6 7 8
I want to query out the row 1,3,4,6,7,8
More complex...two different numbers and three different numbers
Merry Christmas!!
The following sql is from my friend.
data in table: Col1=10, Col2=11, Col3=12, Col4=13, Col5=26, Col6=28
parameters: 10,11,12,18,26,28
select * from
( select id,Col1,Col2, Col3,Col4, Col5,Col6,
( (case when Col1=10 then 1 else 0 end)
+(case when Col2=10 then 1 else 0 end)
+(case when Col3=10 then 1 else 0 end)
+(case when Col4=10 then 1 else 0 end)
+(case when Col5=10 then 1 else 0 end)
+(case when Col6=10 then 1 else 0 end)
)
+( (case when Col1=11 then 1 else 0 end)
+(case when Col2=11 then 1 else 0 end)
+(case when Col3=11 then 1 else 0 end)
+(case when Col4=11 then 1 else 0 end)
+(case when Col5=11 then 1 else 0 end)
+(case when Col6=11 then 1 else 0 end)
)
+( (case when Col1=12 then 1 else 0 end)
+(case when Col2=12 then 1 else 0 end)
+(case when Col3=12 then 1 else 0 end)
+(case when Col4=12 then 1 else 0 end)
+(case when Col5=12 then 1 else 0 end)
+(case when Col6=12 then 1 else 0 end)
)
+( (case when Col1=18 then 1 else 0 end)
+(case when Col2=18 then 1 else 0 end)
+(case when Col3=18 then 1 else 0 end)
+(case when Col4=18 then 1 else 0 end)
+(case when Col5=18 then 1 else 0 end)
+(case when Col6=18 then 1 else 0 end)
)
+( (case when Col1=26 then 1 else 0 end)
+(case when Col2=26 then 1 else 0 end)
+(case when Col3=26 then 1 else 0 end)
+(case when Col4=26 then 1 else 0 end)
+(case when Col5=26 then 1 else 0 end)
+(case when Col6=26 then 1 else 0 end)
)
+( (case when Col1=28 then 1 else 0 end)
+(case when Col2=28 then 1 else 0 end)
+(case when Col3=28 then 1 else 0 end)
+(case when Col4=28 then 1 else 0 end)
+(case when Col5=28 then 1 else 0 end)
+(case when Col6=28 then 1 else 0 end)
) as sub
from [DBName1].[dbo].[nList] ) aa
wheCole aa.sub>=5
Please give more comments and new answers!
Thanks for the reply !
My version of the solution (for SQL Server 2005 and above):
-- PREPARATIONS
create table #tbl1 (col1 int, col2 int, col3 int, col4 int, col5 int, col6 int);
create table #tbl2 (col1 int, col2 int, col3 int, col4 int, col5 int, col6 int);
insert into #tbl1 (col1, col2, col3, col4, col5, col6) values
(1, 3, 4, 6, 7, 8),
(2, 5, 7, 9, 10, 14);
insert into #tbl2 (col1, col2, col3, col4, col5, col6) values
(2, 3, 4, 6, 7, 8),
(3, 4, 6, 7, 8, 9),
(1, 2, 4, 6, 7, 8);
go
create function [dbo].[CompareDelimitedStrings] (#value1 varchar(max), #value2 varchar(max), #separator char(1))
returns int
as
begin
declare #result int = 0;
with r1 as
(
select value, cast(null as varchar(max)) [x], 0 [no] from (select rtrim(cast(#value1 as varchar(max))) [value]) as j
union all
select right(value, len(value)-case charindex(#separator, value) when 0 then len(value) else charindex(#separator, value) end) [value]
, left(r.[value], case charindex(#separator, r.value) when 0 then len(r.value) else abs(charindex(#separator, r.[value])-1) end ) [x]
, [no] + 1 [no]
from r1 r where value > ''
)
, r2 as
(
select value, cast(null as varchar(max)) [x], 0 [no] from (select rtrim(cast(#value2 as varchar(max))) [value]) as j
union all
select right(value, len(value)-case charindex(#separator, value) when 0 then len(value) else charindex(#separator, value) end) [value]
, left(r.[value], case charindex(#separator, r.value) when 0 then len(r.value) else abs(charindex(#separator, r.[value])-1) end ) [x]
, [no] + 1 [no]
from r2 r where value > ''
)
select #result = count(*)
from (
select x, [no] from r1 where x is not null
intersect
select x, [no] from r2 where x is not null
) as t
return #result;
end
go
-- SOLUTION
with [t1] as
(
select *
, replace(str(col1) + ',' + str(col2) + ',' + str(col3) + ',' + str(col4) + ',' + str(col5) + ',' + str(col6), ' ', '') [str]
from #tbl1
)
, [t2] as
(
select *
, replace(str(col1) + ',' + str(col2) + ',' + str(col3) + ',' + str(col4) + ',' + str(col5) + ',' + str(col6), ' ', '') [str]
from #tbl2
)
select distinct t1.col1, t1.col2, t1.col3, t1.col4, t1.col5, t1.col6
from t1
-- number 5 in this case means 5 intersections (or 1 difference).
-- you can change this number to 4 or 3 to find rows with 2 or 3 differences
join t2 on [dbo].[CompareDelimitedStrings] (t1.[str], t2.[str], ',') >= 5;
-- CLEANUP
drop table #tbl1;
drop table #tbl2;
drop function [dbo].[CompareDelimitedStrings];
Result:
col1 col2 col3 col4 col5 col6
--------------------------------------------
1 3 4 6 7 8
Algorithm in a few words: joining these two tables on the next condition: intersection of the numbers in the rows must be greater or equal to 5 (for the case with 1 difference)
This is the one-table-solution (shows the matched pairs and intersections count)
-- PREPARATIONS
create table #tbl (col1 int, col2 int, col3 int, col4 int, col5 int, col6 int);
insert into #tbl (col1, col2, col3, col4, col5, col6) values
(1, 3, 4, 6, 7, 8),
(2, 5, 7, 9, 10, 14),
(2, 3, 4, 6, 7, 8),
(3, 4, 6, 7, 8, 9),
(1, 2, 4, 6, 7, 8);
go
create function [dbo].[CompareDelimitedStrings] (#value1 varchar(max), #value2 varchar(max), #separator char(1))
returns int
as
begin
declare #result int = 0;
with r1 as
(
select value, cast(null as varchar(max)) [x], 0 [no] from (select rtrim(cast(#value1 as varchar(max))) [value]) as j
union all
select right(value, len(value)-case charindex(#separator, value) when 0 then len(value) else charindex(#separator, value) end) [value]
, left(r.[value], case charindex(#separator, r.value) when 0 then len(r.value) else abs(charindex(#separator, r.[value])-1) end ) [x]
, [no] + 1 [no]
from r1 r where value > ''
)
, r2 as
(
select value, cast(null as varchar(max)) [x], 0 [no] from (select rtrim(cast(#value2 as varchar(max))) [value]) as j
union all
select right(value, len(value)-case charindex(#separator, value) when 0 then len(value) else charindex(#separator, value) end) [value]
, left(r.[value], case charindex(#separator, r.value) when 0 then len(r.value) else abs(charindex(#separator, r.[value])-1) end ) [x]
, [no] + 1 [no]
from r2 r where value > ''
)
select #result = count(*)
from (
select x, [no] from r1 where x is not null
intersect
select x, [no] from r2 where x is not null
) as t
return #result;
end
go
-- SOLUTION
with [t] as
(
select *
, replace(str(col1) + ',' + str(col2) + ',' + str(col3) + ',' + str(col4) + ',' + str(col5) + ',' + str(col6), ' ', '') [str]
, row_number() over(order by col1, col2, col3, col4, col5, col6) [id]
from #tbl
)
, cross_dedup as
(
select t1.col1, t1.col2, t1.col3, t1.col4, t1.col5, t1.col6
, [dbo].[CompareDelimitedStrings] (t1.[str], t2.[str], ',') [intersections_count]
-- ranking the cross-doubled pairs
, row_number() over (partition by case when t1.id > t2.id then t1.id else t2.id end order by t1.id, t2.id) [rank]
from t t1
-- number 5 in this case means 5 intersections (or 1 difference).
-- you can change this number to 4 or 3 to find rows with 2 or 3 differences
join t t2 on [dbo].[CompareDelimitedStrings] (t1.[str], t2.[str], ',') >= 5
and t1.id != t2.id
)
select distinct col1, col2, col3, col4, col5, col6, [intersections_count] from cross_dedup where [rank] = 1;
-- CLEANUP
drop table #tbl;
drop function [dbo].[CompareDelimitedStrings];
Result:
col1 col2 col3 col4 col5 col6 intersections_count
-------------------------------------------------------------------
1 2 4 6 7 8 5
1 3 4 6 7 8 5