sql different columns compare - sql

I have a database table with six columns(column1,column2...column6). The data are ordered and none repeat. These are the data in the database table
col1 col2 col3 col4 col5 col6
--------------------------------------------
1 3 4 6 7 8
2 5 7 9 10 14
I want to write sql to compare the data, if I have the data which one different number in varied/shift position. These are the parameters in the sql select statement.
col1 col2 col3 col4 col5 col6
--------------------------------------------
2 3 4 6 7 8
3 4 6 7 8 9
1 2 4 6 7 8
I want to query out the row 1,3,4,6,7,8
More complex...two different numbers and three different numbers
Merry Christmas!!
The following sql is from my friend.
data in table: Col1=10, Col2=11, Col3=12, Col4=13, Col5=26, Col6=28
parameters: 10,11,12,18,26,28
select * from
( select id,Col1,Col2, Col3,Col4, Col5,Col6,
( (case when Col1=10 then 1 else 0 end)
+(case when Col2=10 then 1 else 0 end)
+(case when Col3=10 then 1 else 0 end)
+(case when Col4=10 then 1 else 0 end)
+(case when Col5=10 then 1 else 0 end)
+(case when Col6=10 then 1 else 0 end)
)
+( (case when Col1=11 then 1 else 0 end)
+(case when Col2=11 then 1 else 0 end)
+(case when Col3=11 then 1 else 0 end)
+(case when Col4=11 then 1 else 0 end)
+(case when Col5=11 then 1 else 0 end)
+(case when Col6=11 then 1 else 0 end)
)
+( (case when Col1=12 then 1 else 0 end)
+(case when Col2=12 then 1 else 0 end)
+(case when Col3=12 then 1 else 0 end)
+(case when Col4=12 then 1 else 0 end)
+(case when Col5=12 then 1 else 0 end)
+(case when Col6=12 then 1 else 0 end)
)
+( (case when Col1=18 then 1 else 0 end)
+(case when Col2=18 then 1 else 0 end)
+(case when Col3=18 then 1 else 0 end)
+(case when Col4=18 then 1 else 0 end)
+(case when Col5=18 then 1 else 0 end)
+(case when Col6=18 then 1 else 0 end)
)
+( (case when Col1=26 then 1 else 0 end)
+(case when Col2=26 then 1 else 0 end)
+(case when Col3=26 then 1 else 0 end)
+(case when Col4=26 then 1 else 0 end)
+(case when Col5=26 then 1 else 0 end)
+(case when Col6=26 then 1 else 0 end)
)
+( (case when Col1=28 then 1 else 0 end)
+(case when Col2=28 then 1 else 0 end)
+(case when Col3=28 then 1 else 0 end)
+(case when Col4=28 then 1 else 0 end)
+(case when Col5=28 then 1 else 0 end)
+(case when Col6=28 then 1 else 0 end)
) as sub
from [DBName1].[dbo].[nList] ) aa
wheCole aa.sub>=5
Please give more comments and new answers!
Thanks for the reply !

My version of the solution (for SQL Server 2005 and above):
-- PREPARATIONS
create table #tbl1 (col1 int, col2 int, col3 int, col4 int, col5 int, col6 int);
create table #tbl2 (col1 int, col2 int, col3 int, col4 int, col5 int, col6 int);
insert into #tbl1 (col1, col2, col3, col4, col5, col6) values
(1, 3, 4, 6, 7, 8),
(2, 5, 7, 9, 10, 14);
insert into #tbl2 (col1, col2, col3, col4, col5, col6) values
(2, 3, 4, 6, 7, 8),
(3, 4, 6, 7, 8, 9),
(1, 2, 4, 6, 7, 8);
go
create function [dbo].[CompareDelimitedStrings] (#value1 varchar(max), #value2 varchar(max), #separator char(1))
returns int
as
begin
declare #result int = 0;
with r1 as
(
select value, cast(null as varchar(max)) [x], 0 [no] from (select rtrim(cast(#value1 as varchar(max))) [value]) as j
union all
select right(value, len(value)-case charindex(#separator, value) when 0 then len(value) else charindex(#separator, value) end) [value]
, left(r.[value], case charindex(#separator, r.value) when 0 then len(r.value) else abs(charindex(#separator, r.[value])-1) end ) [x]
, [no] + 1 [no]
from r1 r where value > ''
)
, r2 as
(
select value, cast(null as varchar(max)) [x], 0 [no] from (select rtrim(cast(#value2 as varchar(max))) [value]) as j
union all
select right(value, len(value)-case charindex(#separator, value) when 0 then len(value) else charindex(#separator, value) end) [value]
, left(r.[value], case charindex(#separator, r.value) when 0 then len(r.value) else abs(charindex(#separator, r.[value])-1) end ) [x]
, [no] + 1 [no]
from r2 r where value > ''
)
select #result = count(*)
from (
select x, [no] from r1 where x is not null
intersect
select x, [no] from r2 where x is not null
) as t
return #result;
end
go
-- SOLUTION
with [t1] as
(
select *
, replace(str(col1) + ',' + str(col2) + ',' + str(col3) + ',' + str(col4) + ',' + str(col5) + ',' + str(col6), ' ', '') [str]
from #tbl1
)
, [t2] as
(
select *
, replace(str(col1) + ',' + str(col2) + ',' + str(col3) + ',' + str(col4) + ',' + str(col5) + ',' + str(col6), ' ', '') [str]
from #tbl2
)
select distinct t1.col1, t1.col2, t1.col3, t1.col4, t1.col5, t1.col6
from t1
-- number 5 in this case means 5 intersections (or 1 difference).
-- you can change this number to 4 or 3 to find rows with 2 or 3 differences
join t2 on [dbo].[CompareDelimitedStrings] (t1.[str], t2.[str], ',') >= 5;
-- CLEANUP
drop table #tbl1;
drop table #tbl2;
drop function [dbo].[CompareDelimitedStrings];
Result:
col1 col2 col3 col4 col5 col6
--------------------------------------------
1 3 4 6 7 8
Algorithm in a few words: joining these two tables on the next condition: intersection of the numbers in the rows must be greater or equal to 5 (for the case with 1 difference)
This is the one-table-solution (shows the matched pairs and intersections count)
-- PREPARATIONS
create table #tbl (col1 int, col2 int, col3 int, col4 int, col5 int, col6 int);
insert into #tbl (col1, col2, col3, col4, col5, col6) values
(1, 3, 4, 6, 7, 8),
(2, 5, 7, 9, 10, 14),
(2, 3, 4, 6, 7, 8),
(3, 4, 6, 7, 8, 9),
(1, 2, 4, 6, 7, 8);
go
create function [dbo].[CompareDelimitedStrings] (#value1 varchar(max), #value2 varchar(max), #separator char(1))
returns int
as
begin
declare #result int = 0;
with r1 as
(
select value, cast(null as varchar(max)) [x], 0 [no] from (select rtrim(cast(#value1 as varchar(max))) [value]) as j
union all
select right(value, len(value)-case charindex(#separator, value) when 0 then len(value) else charindex(#separator, value) end) [value]
, left(r.[value], case charindex(#separator, r.value) when 0 then len(r.value) else abs(charindex(#separator, r.[value])-1) end ) [x]
, [no] + 1 [no]
from r1 r where value > ''
)
, r2 as
(
select value, cast(null as varchar(max)) [x], 0 [no] from (select rtrim(cast(#value2 as varchar(max))) [value]) as j
union all
select right(value, len(value)-case charindex(#separator, value) when 0 then len(value) else charindex(#separator, value) end) [value]
, left(r.[value], case charindex(#separator, r.value) when 0 then len(r.value) else abs(charindex(#separator, r.[value])-1) end ) [x]
, [no] + 1 [no]
from r2 r where value > ''
)
select #result = count(*)
from (
select x, [no] from r1 where x is not null
intersect
select x, [no] from r2 where x is not null
) as t
return #result;
end
go
-- SOLUTION
with [t] as
(
select *
, replace(str(col1) + ',' + str(col2) + ',' + str(col3) + ',' + str(col4) + ',' + str(col5) + ',' + str(col6), ' ', '') [str]
, row_number() over(order by col1, col2, col3, col4, col5, col6) [id]
from #tbl
)
, cross_dedup as
(
select t1.col1, t1.col2, t1.col3, t1.col4, t1.col5, t1.col6
, [dbo].[CompareDelimitedStrings] (t1.[str], t2.[str], ',') [intersections_count]
-- ranking the cross-doubled pairs
, row_number() over (partition by case when t1.id > t2.id then t1.id else t2.id end order by t1.id, t2.id) [rank]
from t t1
-- number 5 in this case means 5 intersections (or 1 difference).
-- you can change this number to 4 or 3 to find rows with 2 or 3 differences
join t t2 on [dbo].[CompareDelimitedStrings] (t1.[str], t2.[str], ',') >= 5
and t1.id != t2.id
)
select distinct col1, col2, col3, col4, col5, col6, [intersections_count] from cross_dedup where [rank] = 1;
-- CLEANUP
drop table #tbl;
drop function [dbo].[CompareDelimitedStrings];
Result:
col1 col2 col3 col4 col5 col6 intersections_count
-------------------------------------------------------------------
1 2 4 6 7 8 5
1 3 4 6 7 8 5

Related

Oracle self join starting with minimum value (yearmonths) for each partition

I have this table:
COL1 COL2 COL3
--------------------
A 202011 VAL1
A 202012 VAL2
A 202205 VAL3
B 202111 VAL4
B 202201 VAL5
B 202202 VAL6
COL1 COL2 COL3
--------------------
A 202011 VAL1
A 202012 VAL2
A 202101 NULL
B 202111 VAL4
B 202112 NULL
B 202201 VAL5
EDIT: I have the dates too if that's easier to play with than the ISO weeks.
Logic:
with the smallest COL2 value for each partition of COL1, take the following 3 yearmonths (YYYYMM) and, if the combination COL1 and COL2 present in the first table, show COL3 and NULL otherwise.
I have attempted to create this query to replicate what I would need in terms of adding up months, but not sure if it will be useful though.
WITH level_aux ( lev ) AS (
SELECT
to_number('2020'
|| lpad(level, 2, 0)) + 7 - 1
FROM
dual
CONNECT BY
level <= 12
), level_final AS (
SELECT
lev,
substr(lev, 1, 4) +
CASE
WHEN mod(substr(lev, 5, 2), 13) = 0 THEN
1
ELSE
0
END
||
CASE
WHEN substr(lev, 5, 2) < 13 THEN
lpad(mod(substr(lev, 5, 2), 13), 2, 0)
ELSE
lpad(mod(substr(lev, 5, 2), 13) + 1, 2, 0)
END
h
FROM
level_aux
)
SELECT
*
FROM
level_final;
Use MIN() as a window function:
select t.*,
(case when col2 < add_months(min(col2) over (partition by col1), 3)
then col3
end) as imputed_col3
from t;
Note: If col2 is not a date, you can convert it:
select t.*,
(case when to_date(col2, 'YYYYMM') < add_months(min(to_date(col2, 'YYYYMM')) over (partition by col1), 3)
then col3
end) as imputed_col3
from t;

How to create a pivot table where columns and rows are the same in Snowflake SQL?

I have a table like
col1 | col2 | col3 | col4 | col5
id1 | 1 0 0 1 0
id2 | 1 1 0 0 0
id3 | 0 1 0 1 0
id4 | 0 0 1 0 1
id5 | 1 0 1 0 0
id6 | 0 0 0 1 0
.
.
.
idN
How would I create a query such that I get a table like
col1 | col2 | col3 | col4 | col5
col1 | 3 1 1 1 0
col2 | 1 2 0 1 0
col3 | 1 1 2 0 1
col4 | 1 1 1 2 0
col5 | 0 0 1 0 1
where each entry in the result is the number of times that some value of 1 in one column occurred with another column that had a value of 1?
I can get the diagonal values by doing the following:
SELECT
sum(col1), sum(col2), sum(col3), sum(col4), sum(col5)
FROM (
SELECT
col1, col2, col3, col4, col5, col1 + col2 + col3 + col4 + col5 ) AS total
FROM (
SELECT
ROW_NUMBER()OVER(PARTITION BY id ORDER BY date) row_num, *
FROM (
SELECT DISTINCT(id), date, col1, col2, col3, col4, col5
FROM db.schema.table)
)
WHERE row_num = 1 AND total <= 1
ORDER BY total DESC);
I assume that I have to do some kind of pivot or various union all's but I can't seem to figure it out.
I think I would approach this by unpivoting the data and re-aggregating. The following gets the pairs and counts:
with u as (
select t.id, v.col
from t cross join lateral
(values ('col1', col1),
('col2', col2),
('col3', col3),
('col4', col4),
('col5', col5)
) v(col, val)
where val = 1
)
select u1.col, u2.col, count(*)
from u u1 join
u u2
on u1.id = u2.id
group by u1.col, u2.col;
This seems good enough for me, but you can use conditional aggregation:
select u1.col,
sum(case when u2.col = 'col1' then 1 else 0 end) as col1,
sum(case when u2.col = 'col2' then 1 else 0 end) as col2,
sum(case when u2.col = 'col3' then 1 else 0 end) as col3,
sum(case when u2.col = 'col4' then 1 else 0 end) as col4,
sum(case when u2.col = 'col5' then 1 else 0 end) as col5
from u u1 join
u u2
on u1.id = u2.id
group by u1.col;
Here is one approach that showcases one of Snowflake's powerful semi-structured functions (namely, OBJECT_CONSTRUCT(*)) and also exploits two meta-attributes (SEQ and KEY) that are returned by the FLATTEN function so that there is no need for a unique business key on the original (source) table:
WITH CTE_ROW AS (
SELECT OBJECT_CONSTRUCT(*) AS COL_DICT
FROM T
)
,CTE_ROW_COL AS (
SELECT F.SEQ - 1 AS ROW_OFFSET
,F.KEY AS COL_NAME
,COL_DICT[F.KEY]::INTEGER AS VAL
FROM CTE_ROW R
,LATERAL FLATTEN(R.COL_DICT) F
)
,CTE_CALC AS (
SELECT RC1.COL_NAME AS COL_NAME_1
,RC2.COL_NAME AS COL_NAME_2
,COUNT(*) AS COUNT_VAL
FROM CTE_ROW_COL RC1
JOIN CTE_ROW_COL RC2
ON RC2.ROW_OFFSET = RC1.ROW_OFFSET
AND RC2.VAL = 1
WHERE RC1.VAL = 1
GROUP BY RC1.COL_NAME
,RC2.COL_NAME
)
SELECT COL_NAME_1 AS COL_NAME
,SUM(IFF(COL_NAME_2='COL1', COUNT_VAL, 0)) AS COL1
,SUM(IFF(COL_NAME_2='COL2', COUNT_VAL, 0)) AS COL2
,SUM(IFF(COL_NAME_2='COL3', COUNT_VAL, 0)) AS COL3
,SUM(IFF(COL_NAME_2='COL4', COUNT_VAL, 0)) AS COL4
,SUM(IFF(COL_NAME_2='COL5', COUNT_VAL, 0)) AS COL5
FROM CTE_CALC
GROUP BY COL_NAME_1
ORDER BY COL_NAME_1
;

Get Top N row from each set from table with 4 column in SQL Server

Assume I have a table with 4 columns:
Col1 Col2 Col3 Col4
My initial query is :
SELECT Col1, Col2, Col3, Col4
FROM myTable
ORDER BY Col1, Col2, Col3 DESC, Col4
My desired result is all 4 columns, but with this condition that Top N Col3 different row when Col1, Col2 is equal.
Example with N=2 :
Table sample data:
Col1 Col2 Col3 Col4
---------------------
1 a 2000 s
1 a 2002 c
1 a 2001 b
2 b 1998 s
2 b 2002 c
2 b 2000 b
3 c 2000 b
1 f 1998 n
1 g 1999 e
Desired result:
1 a 2002 c
1 a 2001 b
1 f 1998 n
1 g 1999 e
2 b 2002 c
2 b 2000 b
3 c 2000 b
In another description, when (col1, col2) is repeated in multiple records, just export top N rows of those records when order by Col3 descending.
Can I do this with SQL script, without hard coding?
declare #t table (Col1 int, Col2 char, Col3 int, Col4 char);
insert into #t values
(1, 'a', 2000, 's'),
(1, 'a', 2002, 'c'),
(1, 'a', 2001, 'b'),
(2, 'b', 1998, 's'),
(2, 'b', 2002, 'c'),
(2, 'b', 2000, 'b'),
(3, 'c', 2000, 'b'),
(1, 'f', 1998, 'n'),
(1, 'g', 1999, 'e');
declare #N int = 2; -- number per "top"
with cte as
(
select *,
row_number() over(partition by col1, col2 order by col3 desc) as rn
from #t
)
select *
from cte c
where rn <= #N;
I think below code was as expected
declare #tab table (Col1 int, Col2 char(1), Col3 int, Col4 char(1))
declare #N int
insert into #tab
select 1, 'a' , 2000, 's'
union all
select 1 , 'a' , 2002 , 'c'
union all
select 1 , 'a' , 2001 , 'b'
union all
select 2 , 'b' , 1998 , 's'
union all
select 2 , 'b' , 2002 ,'c'
union all
select 2 , 'b' , 2000 ,'b'
union all
select 3 , 'c' , 2000 ,'b'
union all
select 1 , 'f' , 1998 ,'n'
union all
select 1 , 'g' , 1999 ,'e'
;with tab as
(
select ROW_NUMBER() over(partition by t.col1,t.col2 order by t.col3 desc) as row,t.*
from #tab t
)
select Col1,Col2,Col3,Col4
from tab
where row < 3
output
Col1 Col2 Col3 Col4
1 a 2002 c
1 a 2001 b
1 f 1998 n
1 g 1999 e
2 b 2002 c
2 b 2000 b
3 c 2000 b
METHOD 1- FOR MSSQL
http://sqlfiddle.com/#!6/4bda39/6
with a as (
select ROW_NUMBER() over(partition by t.col1,t.col2 order by t.col3 desc) as row,t.*
from myTable as t)
select * from a where a.row <= 2
Replace a.row <= 2 (2 with your N)
METHOD 2- FOR MYSQL
http://sqlfiddle.com/#!9/79e81a/63
SELECT myTable.Col1, myTable.Col2, myTable.Col3, myTable.Col4
FROM (
Select Col1 as Col1, Col2 as Col2, count(Col1) as cc, AVG(Col3) as aa
From myTable
group by Col1, Col2) as tt
join myTable on myTable.Col1 = tt.Col1 and myTable.Col2 = tt.Col2
where myTable.Col3 >= tt.aa
Order by Col1 ,Col2 ,Col3 Desc,Col4
METHOD 3- FOR MYSQL
http://sqlfiddle.com/#!9/79e81a/79
SELECT * FROM (
SELECT CASE Col1
WHEN #Col1 THEN
CASE Col2
WHEN #Col2 THEN #curRow := #curRow + 1
ELSE #curRow := 1
END
ELSE #curRow :=1
END AS rank,
#Col1 := Col1 AS Col1,
#Col2 := Col2 AS Col2,
Col3, Col4
FROM myTable p
JOIN (SELECT #curRow := 0, #Col1 := 0, #Col2 := '') r
ORDER BY Col1, Col2, Col3 DESC) as tt
WHERE tt.rank <= 2
Replace tt.rank <= 2 replace 2 by your desired index

MS SQL Server: advanced substring, splitting one string column to multimle columns

I have a column in a table, I would like to separate the contents into different columns, locations of dash is not always same. Please advise more simple code.
Thanks.
COL1
AGH-WH6X-23-4534-OPDQE-QADF
xxx-xxxx-xxxx-xxxx-xxx-xxxx
xxx-xxxx-xxxxxx-xxxx-xxxxx-xx
x-xx-xxxx-xxxxxx-xxxx-xxx-xx
xxx-xx-xxxx-xxxx-xxx-xxx-x
xxx-xxxx-xxxxxx-xxxxxx-xxx-xx
x-xxx-xxxx-xxxx-xxxxxx-xxx-xx
xxx-xxxxx-xxxx-xxxxxx-xxx-xx
Expectation:
COL2 COL3 COL4 COL5 COL6 COL7
AGH WH6X 23 4534 OPDQE QADF
xxx xxxx xxxx xxxx xxx NULL
One method is a recursive CTE with aggregation:
with cte as (
select col1, left(col1, charindex('-', col1 + '-') - 1) as val,
1 as level,
substring(col1, charindex('-', col1) + 1, len(col1)) as rest
from t
union all
select col1, left(rest, charindex('-', rest + '-') - 1),
level + 1,
substring(rest, charindex('-', rest + '-') + 1, len(col1))
from cte
where rest > ''
)
select max(case when level = 1 then val end) as val1,
max(case when level = 2 then val end) as val2,
max(case when level = 3 then val end) as val3,
max(case when level = 4 then val end) as val4,
max(case when level = 5 then val end) as val5,
max(case when level = 6 then val end) as val6,
max(case when level = 7 then val end) as val7
from cte
group by col1;

Querying a table in SQL Server based on permutation of column2 and 3

I have a table like this:
col1 col2 col3
111 1 1
222 1 0
333 0 1
444 0 0
Here col2 = 1 means col1 is commercial, col3 = 1 means col1 is retail as well. How do I get a result like below?
ID Description
111 Commercial
111 Retail
222 Commercial
333 Retail
You can do it with a UNION ALL:
SELECT ID = col1, 'Commercial' FROM MyTable WHERE col2=1
UNION ALL
SELECT ID = col1, 'Retail' FROM MyTable WHERE col3=1
Uses almost the same as above but in a single result set
Select ID = col1, t.Description
from MyTable
cross apply (select Description = 'Commercial' where col2 = 1 union
select Description = 'Retail' where coll3 = 1)t
Can be done with UNPIVOT also:
DECLARE #t TABLE
(
col1 INT ,
col2 INT ,
col3 INT
)
INSERT INTO #t
VALUES ( 111, 1, 1 ),
( 222, 1, 0 ),
( 333, 0, 1 ),
( 444, 0, 0 )
SELECT col1 ,
CASE WHEN col = 'col2' THEN 'Commercial'
ELSE 'Retail'
END AS Description
FROM #t UNPIVOT( r FOR col IN ( [col2], [col3] ) ) u
WHERE r <> 0