Related
I have a column with different length strings which has dashes (-) that separates alphanumeric strings.
The string could look like "A1-2-3".
I need to order by first "A1" then "2" then "3"
I want to achieve the following order for the column:
A1
A1-1-1
A1-1-2
A1-1-3
A1-2-1
A1-2-2
A1-2-3
A1-7
A2-1-1
A2-1-2
A2-1-3
A2-2-1
A2-2-2
A2-2-3
A2-10-1
A2-10-2
A2-10-3
A10-1-1
A10-1-2
A10-1-3
A10-2-1
A10-2-2
A10-2-3
I can separate the string with the following code:
declare #string varchar(max) = 'A1-2-3'
declare #first varchar(max) = SUBSTRING(#string,1,charindex('-',#string)-1)
declare #second varchar(max) = substring(#string, charindex('-',#string) + 1, charindex('-',reverse(#string))-1)
declare #third varchar(max) = right(#string,charindex('-',reverse(#string))-1)
select #first, #second, #third
With the above logic I thought that I could use the following:
Note this only regards strings with 2 dashes
select barcode from tabelWithBarcodes
order by
case when len(barcode) - len(replace(barcode,'-','')) = 2 then
len(SUBSTRING(barcode,1,charindex('-',barcode)-1))
end
, case when len(barcode) - len(replace(barcode,'-','')) = 2 then
SUBSTRING(barcode,1,(charindex('-',barcode)-1))
end
, case when len(barcode) - len(replace(barcode,'-','')) = 2 then
len(substring(barcode, charindex('-',barcode) + 1, charindex('-',reverse(barcode))-1))
end
, case when len(barcode) - len(replace(barcode,'-','')) = 2 then
substring(barcode, charindex('-',barcode) + 1, charindex('-',reverse(barcode))-1)
end
, case when len(barcode) - len(replace(barcode,'-','')) = 2 then
len(right(barcode,charindex('-',reverse(barcode))-1))
end
, case when len(barcode) - len(replace(barcode,'-','')) = 2 then
right(barcode,charindex('-',reverse(barcode))-1)
end
But the sorting is not working for the second and third section of the string.
(I haven't added the code for checking if the string has only 1 or no dash in it for simplicity)
Not sure if I'm on the right path here.
Is anybody able to solve this?
This is not pretty, however...
USE Sandbox;
GO
WITH VTE AS(
SELECT V.SomeString
--Randomised order
FROM (VALUES ('A1-1-1'),
('A10-1-3'),
('A10-2-2'),
('A1-1-3'),
('A10-2-1'),
('A2-2-2'),
('A1-2-1'),
('A1-2-2'),
('A2-1-1'),
('A10-1-2'),
('B2-1-2'),
('A1'),
('A2-2-1'),
('A2-10-3'),
('A10-2-3'),
('A2-1-2'),
('B1-4'),
('A2-10-2'),
('A2-2-3'),
('A10-1-1'),
('A1-A1-3'),
('A1-7'),
('A2-10-1'),
('A2-1-3'),
('A1-1-2'),
('A1-2-3')) V(SomeString)),
Splits AS(
SELECT V.SomeString,
DS.Item,
DS.ItemNumber,
CONVERT(int,STUFF((SELECT '' + NG.token
FROM dbo.NGrams8k(DS.item,1) NG
WHERE TRY_CONVERT(int, NG.Token) IS NOT NULL
ORDER BY NG.position
FOR XML PATH('')),1,0,'')) AS NumericPortion
FROM VTE V
CROSS APPLY dbo.DelimitedSplit8K(V.SomeString,'-') DS),
Pivoted AS(
SELECT S.SomeString,
MIN(CASE V.P1 WHEN S.Itemnumber THEN REPLACE(S.Item, S.NumericPortion,'') END) AS P1Alpha,
MIN(CASE V.P1 WHEN S.Itemnumber THEN S.NumericPortion END) AS P1Numeric,
MIN(CASE V.P2 WHEN S.Itemnumber THEN REPLACE(S.Item, S.NumericPortion,'') END) AS P2Alpha,
MIN(CASE V.P2 WHEN S.Itemnumber THEN S.NumericPortion END) AS P2Numeric,
MIN(CASE V.P3 WHEN S.Itemnumber THEN REPLACE(S.Item, S.NumericPortion,'') END) AS P3Alpha,
MIN(CASE V.P3 WHEN S.Itemnumber THEN S.NumericPortion END) AS P3Numeric
FROM Splits S
CROSS APPLY (VALUES(1,2,3)) AS V(P1,P2,P3)
GROUP BY S.SomeString)
SELECT P.SomeString
FROM Pivoted P
ORDER BY P.P1Alpha,
P.P1Numeric,
P.P2Alpha,
P.P2Numeric,
P.P3Alpha,
P.P3Numeric;
This outputs:
A1
A1-1-1
A1-1-2
A1-1-3
A1-2-1
A1-2-2
A1-2-3
A1-7
A1-A1-3
A2-1-1
A2-1-2
A2-1-3
A2-2-1
A2-2-2
A2-2-3
A2-10-1
A2-10-2
A2-10-3
A10-1-1
A10-1-2
A10-1-3
A10-2-1
A10-2-2
A10-2-3
B1-4
B2-1-2
This makes use of 2 user defined functions. Firstly or DelimitedSplit8k_Lead (I used DelimitedSplit8k as I don't have the other on my sandbox at the moment). Then you also have NGrams8k.
I really should explain how this works, but yuck... (edit coming).
OK... (/sigh) What it does. Firstly, we split the data into its relevant parts using delimitedsplit8k(_lead). Then, within the SELECT we use FOR XML PATH to get (only) the nuemrical part of that string (For example, for 'A10' we get '10') and we convert it to a numerical value (an int).
Then we pivot that data out into respective parts. The alphanumerical part, and the numerical part. So, for the value 'A10-A1-12' we end up with the row:
'A', 10, 'A', 1, 12
Then, now that we've pivoted the data, we sort it by each column individually. And voila.
This will fall over if you have a value like 'A1A' or '1B1', and honestly, I'm not changing it to catter for that. This was messy, and really isn't what the RDBMS should be doing.
Up to 3 dashes can be covered by fiddling with replace & parsename & patindex:
declare #TabelWithBarcodes table (id int primary key identity(1,1), barcode varchar(20) not null, unique (barcode));
insert into #TabelWithBarcodes (barcode) values
('2-2-3'),('A2-2-2'),('A2-2-1'),('A2-10-3'),('A2-10-2'),('A2-10-1'),('A2-1-3'),('A2-1-2'),('A2-1-1'),
('A10-2-3'),('A10-2-2'),('A10-2-10'),('A10-1-3'),('AA10-A111-2'),('A10-1-1'),
('A1-7'),('A1-2-3'),('A1-2-12'),('A1-2-1'),('A1-1-3'),('B1-1-2'),('A1-1-1'),('A1'),('A10-10-1'),('A12-10-1'), ('AB1-2-E1') ;
with cte as
(
select barcode,
replace(BarCode, '-', '.')
+ replicate('.0', 3 - (len(BarCode)-len(replace(BarCode, '-', '')))) as x
from #TabelWithBarcodes
)
select *
, substring(parsename(x,4), 1, patindex('%[0-9]%',parsename(x,4))-1)
,cast(substring(parsename(x,4), patindex('%[0-9]%',parsename(x,4)), 10) as int)
,substring(parsename(x,3), 1, patindex('%[0-9]%',parsename(x,3))-1)
,cast(substring(parsename(x,3), patindex('%[0-9]%',parsename(x,3)), 10) as int)
,substring(parsename(x,2), 1, patindex('%[0-9]%',parsename(x,2))-1)
,cast(substring(parsename(x,2), patindex('%[0-9]%',parsename(x,2)), 10) as int)
,substring(parsename(x,1), 1, patindex('%[0-9]%',parsename(x,1))-1)
,cast(substring(parsename(x,1), patindex('%[0-9]%',parsename(x,1)), 10) as int)
from cte
order by
substring(parsename(x,4), 1, patindex('%[0-9]%',parsename(x,4))-1)
,cast(substring(parsename(x,4), patindex('%[0-9]%',parsename(x,4)), 10) as int)
,substring(parsename(x,3), 1, patindex('%[0-9]%',parsename(x,3))-1)
,cast(substring(parsename(x,3), patindex('%[0-9]%',parsename(x,3)), 10) as int)
,substring(parsename(x,2), 1, patindex('%[0-9]%',parsename(x,2))-1)
,cast(substring(parsename(x,2), patindex('%[0-9]%',parsename(x,2)), 10) as int)
,substring(parsename(x,1), 1, patindex('%[0-9]%',parsename(x,1))-1)
,cast(substring(parsename(x,1), patindex('%[0-9]%',parsename(x,1)), 10) as int)
extend each barcode to 4 groups by adding trailing .0 if missing
split each barcode in 4 groups
split each group in leading characters and trailing digits
sort by the leading character first
then by casting the digits as numeric
See db<>fiddle
An alterative approach would be to use your technique to split the string into its 3 component parts, then left pad those strings with leading zeros (or characters of your choice). That avoids any issues where the string may contain alphanumerics rather than just numerics. However, it does mean that strings containing different length alphabetic characters may not be sorted as you may expect... Here's the code to play with (using the definitions from #dnoeth's excellent answer):
;with cte as
(
select barcode
, case
when barcode like '%-%' then
substring(barcode,1,charindex('-',barcode)-1)
else
barcode
end part1
, case
when barcode like '%-%' then
substring(barcode, charindex('-',barcode) + 1, case
when barcode like '%-%-%' then
(charindex('-',barcode,charindex('-',barcode) + 1)) - 1
else
len(barcode)
end
- charindex('-',barcode))
else
''
end part2
, case
when barcode like '%-%-%' then
right(barcode,charindex('-',reverse(barcode))-1) --note: assumes you don't have %-%-%-%
else
''
end part3
from #TabelWithBarcodes
)
select barcode
, part1, part2, part3
, right('0000000000' + coalesce(part1,''), 10) lpad1
, right('0000000000' + coalesce(part2,''), 10) lpad2
, right('0000000000' + coalesce(part3,''), 10) lpad3
from cte
order by lpad1, lpad2, lpad3
DBFiddle Example
Inside a stored procedure, I am trying to set value of a declared varchar(100) variable 'my_path' based on a virtual dataset created via a WITH and recursive CTE statement.
First I tried "SELECT INTO", but got "missing keyword" error.
I can alternatively think of updating a table row with value of 'path' from WITH statement, but then I get "missing select keyword" error and I have understood that I cannot UPDATE with CTE in Oracle SQL server. But is there any way to access the output of recursive CTE then? I need it later in my stored procedure.
declare mypath varchar(100);
begin
with CTE (toN,path,done)
as
(
-- anchor
select
cap.toN,
concat(concat(CAST(cap.fromN as varchar(10)) , ',') , CAST(cap.toN as varchar(10))),
case when cap.toN = 10000 then 1 else 0 end
from cap
where
(fromN = 1) and (cap.cap_up > 0)
union all
-- recursive
select
cap.toN,
concat(concat(path,','), CAST(cap.toN as varchar(10)) ),
case when cap.toN=10000 then 1 else 0 end
from cap join cte on
cap.fromN = cte.toN
where
(cap.cap_up > 0) and (cte.done = 0)
)
select path into mypath from cte where done=1
);
end;
I think your code should work. It does have a lingering closing paren, which is suspicious.
Perhaps some simplification will help:
with CTE(toN, path, done) as (
-- anchor
select cap.toN, cap.fromN || ',' || cap.toN
(case when cap.toN = 10000 then 1 else 0 end)
from cap
where fromN = 1 and cap.cap_up > 0
union all
-- recursive
select cap.toN, path || ',' || cap.toN,
(case when cap.to = 10000 then 1 else 0 end)
from cap join
cte
on cap.fromN = cte.toN
where cap.cap_up > 0 and cte.done = 0
)
select path into my_path
from cte
where done = 1;
Now that you have fixed the variable names... you still need a third column for the CTE to contain the toN that is in the join condition of your recursive part of the query. There is also a trailing ) bracket a the end of the query and you can use || to concatenate strings (which you do not need to cast to varchars).
WITH CTE (path,done, toN) as (
-- anchor
SELECT fromN || ',' || toN,
CASE WHEN toN = 10000 THEN 1 ELSE 0 END,
toN
FROM cap
WHERE "FROM" = 1
AND cap_up > 0
UNION ALL
-- recursive
SELECT cte.path || ',' || cap.toN,
CASE WHEN cap.toN =10000 THEN 1 ELSE 0 END,
cap.toN
FROM cap
join cte
on ( cap.fromN = cte.toN )
WHERE cap.cap_up > 0
AND cte.done = 0)
)
select path into my_path from cte where done=1;
A simpler solution would be to use a hierarchical query.
SELECT SUBSTR( SYS_CONNECT_BY_PATH( fromN, ',' ), 2 ) || ',' || toN
INTO mypath
FROM cap
WHERE toN = 10000
START WITH fromN = 1
CONNECT BY PRIOR toN = fromN;
I am in some Confusion to find which of these will improve the query execution time.
Data base and Table information:
I am using database which is hosted in remote location might be US.
Table is having approx 2 cr. records and having only 2 columns String format.
I wants to extract the data from that table using filters and some modification.
in select statements.
DB_KEY is having unique key indexing
Select statement condition:
CASE WHEN (INSTR(DB_KEY , 'MsgOutMdlName' ) > 0) THEN
reverse(SUBSTR( reverse(DB_KEY) , INSTR (reverse(DB_KEY) , '|' , 1 ) ,LENGTH(DB_KEY) )) || 'MsgOutMdlName' ELSE
reverse(SUBSTR( reverse(DB_KEY) , INSTR (reverse(DB_KEY) , '|' , 1 ) ,LENGTH(DB_KEY) )) || 'MsgInAttName'
Or
CASE WHEN INSTR(DB_KEY , 'MsgOutMdlName' ) > 0 THEN
SUBSTR( DB_KEY , 0, INSTR(DB_KEY , '|' , 1, REGEXP_COUNT (DB_KEY, '[^|]+')-1 ) ) || 'MsgOutMdlName' ELSE
SUBSTR( DB_KEY , 0, INSTR(DB_KEY , '|' , 1, REGEXP_COUNT (DB_KEY, '[^|]+')-1 ) ) || 'MsgInAttName' end
Or
CASE WHEN DB_KEY like '%MsgOutMdlName'
then any one of above code:
and in where clause:
WHERE 1 = ( case when DB_VALUE is not null and
(DB_KEY LIKE 'TP|%MsgInAttName' or DB_KEY LIKE 'TP|%MsgOutMdlName') then 1 else null end )
Or use like operator in where clause.
Please suggest me the best approach.
DB_key are having dynamic data values.
I have already use function based indexing but didn't get better performance.
I’m fairly used to using MySQL, but not particularly familiar with SQL Server. Tough luck, the database I’m dealing with here is on SQL Server 2014.
I have a table with a column whose values are all integers with leading, separating, and trailing semicolons, like these three fictitious rows:
;905;1493;384;13387;29;933;467;28732;
;905;138;3084;1387;290;9353;4767;2732;
;9085;14493;3864;130387;289;933;4767;28732;
What I am trying to do now is to select all rows where more than one number taken from a list of numbers appears in this column. So for example, given the three rows above, if I have the group 905,467,4767, the statement I’m trying to figure out how to construct should return the first two rows: the first row contains 905 and 467; the second row contains 905 and 4767. The third row contains only 4767, so that row should not be returned.
As far as I can tell, SQL Server does not actually support regex directly (and I don’t even know what managed code is), which doesn’t help. Even with regex, I wouldn’t know where to begin. Oracle seems to have a function that would be very useful, but that’s Oracle.
Most similar questions on here deal with finding multiple instances of the same character (usually singular) and solve the problem by replacing the string to match with nothing and counting the difference in length. I suppose that would technically work here, too, but given a ‘filter’ group of 15 numbers, the SELECT statement would become ridiculously long and convoluted and utterly unreadable. Additionally, I only want to match entire numbers (so if one of the numbers to match is 29, the value 29 would match in the first row, but the value 290 in the second row should not match), which means I’d have to include the semicolons in the REPLACE clause and then discount them when calculating the length. A complete mess.
What I would ideally like to do is something like this:
SELECT * FROM table WHERE REGEXP_COUNT(column, ';(905|467|4767);') > 1
– but that will obviously not work, for all kinds of reasons (the most obvious one being the nonexistence of REGEXP_COUNT outside Oracle).
Is there some sane, manageable way of doing this?
You can do
SELECT *
FROM Mess
CROSS APPLY (SELECT COUNT(*)
FROM (VALUES (905),
(467),
(4767)) V(Num)
WHERE Col LIKE CONCAT('%;', Num, ';%')) ca(count)
WHERE count > 1
SQL Fiddle
Or alternatively
WITH Nums
AS (SELECT Num
FROM (VALUES (905),
(467),
(4767)) V(Num))
SELECT Mess.*
FROM Mess
CROSS APPLY (VALUES(CAST(CONCAT('<x>', REPLACE(Col, ';', '</x><x>'), '</x>') AS XML))) x(x)
CROSS APPLY (SELECT COUNT(*)
FROM (SELECT n.value('.', 'int')
FROM x.x.nodes('/x') n(n)
WHERE n.value('.', 'varchar') <> ''
INTERSECT
SELECT Num
FROM Nums) T(count)
HAVING COUNT(*) > 1) ca2(count)
Could you put your arguments into a table (perhaps using a table-valued function accepting a string (of comma-separated integers) as a parameter) and use something like this?
DECLARE #T table (String varchar(255))
INSERT INTO #T
VALUES
(';905;1493;384;13387;29;933;467;28732;')
, (';905;138;3084;1387;290;9353;4767;2732;')
, (';9085;14493;3864;130387;289;933;4767;28732;')
DECLARE #Arguments table (Arg int)
INSERT INTO #Arguments
VALUES
(905)
, (467)
, (4767)
SELECT String
FROM
#T
CROSS JOIN #Arguments
GROUP BY String
HAVING SUM(CASE WHEN PATINDEX('%;' + CAST(Arg AS varchar) + ';%', String) > 0 THEN 1 ELSE 0 END) > 1
And example of using this with a function to generate the arguments:
CREATE FUNCTION GenerateArguments (#Integers varchar(255))
RETURNS #Arguments table (Arg int)
AS
BEGIN
WITH cte
AS
(
SELECT
PATINDEX('%,%', #Integers) p
, LEFT(#Integers, PATINDEX('%,%', #Integers) - 1) n
UNION ALL
SELECT
CASE WHEN PATINDEX('%,%', SUBSTRING(#Integers, p + 1, LEN(#Integers))) + p = p THEN 0 ELSE PATINDEX('%,%', SUBSTRING(#Integers, p + 1, LEN(#Integers))) + p END
, CASE WHEN PATINDEX('%,%', SUBSTRING(#Integers, p + 1, LEN(#Integers))) = 0 THEN RIGHT(#Integers, PATINDEX('%,%', REVERSE(#Integers)) - 1) ELSE LEFT(SUBSTRING(#Integers, p + 1, LEN(#Integers)), PATINDEX('%,%', SUBSTRING(#Integers, p + 1, LEN(#Integers))) - 1) END
FROM cte
WHERE p <> 0
)
INSERT INTO #Arguments (Arg)
SELECT n
FROM cte
RETURN
END
GO
DECLARE #T table (String varchar(255))
INSERT INTO #T
VALUES
(';905;1493;384;13387;29;933;467;28732;')
, (';905;138;3084;1387;290;9353;4767;2732;')
, (';9085;14493;3864;130387;289;933;4767;28732;')
;
SELECT String
FROM
#T
CROSS JOIN GenerateArguments('905,467,4767')
GROUP BY String
HAVING SUM(CASE WHEN PATINDEX('%;' + CAST(Arg AS varchar) + ';%', String) > 0 THEN 1 ELSE 0 END) > 1
You can achieve this using the like function for the regex and row_number to determine the number of matches.
Here we declare the column values for testing:
DECLARE #tbl TABLE (
string NVARCHAR(MAX)
)
INSERT #tbl VALUES
(';905;1493;384;13387;29;933;467;28732;'),
(';905;138;3084;1387;290;9353;4767;2732;'),
(';9085;14493;3864;130387;289;933;4767;28732;')
Then we pass your search parameters into a table variable to be joined on:
DECLARE #search_tbl TABLE (
search_value INT
)
INSERT #search_tbl VALUES
(905),
(467),
(4767)
Finally we join the table with the column to search for onto the search table. We apply the row_number function to determine the number of times it matches. We select from this subquery where the row_number = 2 meaning that it joined at least twice.
SELECT
string
FROM (
SELECT
tbl.string,
ROW_NUMBER() OVER (PARTITION BY tbl.string ORDER BY tbl.string) AS rn
FROM #tbl tbl
JOIN #search_tbl search_tbl ON
tbl.string LIKE '%;' + CAST(search_tbl.search_value AS NVARCHAR(MAX)) + ';%'
) tbl
WHERE rn = 2
You could build a where clause like this :
WHERE
case when column like '%;905;%' then 1 else 0 end +
case when column like '%;467;%' then 1 else 0 end +
case when column like '%;4767;%' then 1 else 0 end >= 2
The advantage is that you do not need a helper table. I don't know how you build the query, but the following also works, and is useful if the numbers are in a tsql variable.
case when column like ('%;' + #n + ';%') then 1 else 0 end
ACT and CAT are anagrams
I have to Write a function in sql server that takes 2 strings and given a Boolean output that indicates whether the both of them are anagram or not.
This doesnt make sense to do it in sql server,but,it is for learning purpose only
SQL Server is not good at this kind of things, but here you are:
WITH Src AS
(
SELECT * FROM (VALUES
('CAT', 'ACT'),
('CAR', 'RAC'),
('BUZ', 'BUS'),
('FUZZY', 'MUZZY'),
('PACK', 'PACKS'),
('AA', 'AA'),
('ABCDEFG', 'GFEDCBA')) T(W1, W2)
), Numbered AS
(
SELECT *, ROW_NUMBER() OVER (ORDER BY (SELECT 1)) Num
FROM Src
), Splitted AS
(
SELECT Num, W1 Word1, W2 Word2, LEFT(W1, 1) L1, LEFT(W2, 1) L2, SUBSTRING(W1, 2, LEN(W1)) W1, SUBSTRING(W2, 2, LEN(W2)) W2
FROM Numbered
UNION ALL
SELECT Num, Word1, Word2, LEFT(W1, 1) L1, LEFT(W2, 1) L2, SUBSTRING(W1, 2, LEN(W1)) W1, SUBSTRING(W2, 2, LEN(W2)) W2
FROM Splitted
WHERE LEN(W1)>0 AND LEN(W2)>0
), SplitOrdered AS
(
SELECT *,
ROW_NUMBER() OVER (PARTITION BY Num ORDER BY L1) LNum1,
ROW_NUMBER() OVER (PARTITION BY Num ORDER BY L2) LNum2
FROM Splitted
)
SELECT S1.Num, S1.Word1, S1.Word2, CASE WHEN COUNT(*)=LEN(S1.Word1) AND COUNT(*)=LEN(S1.Word2) THEN 1 ELSE 0 END Test
FROM SplitOrdered S1
JOIN SplitOrdered S2 ON S1.L1=S2.L2 AND S1.Num=S2.Num AND S1.LNum1=S2.LNum2
GROUP BY S1.Num, S1.Word1, S1.Word2
And results:
1 CAT ACT 1
2 CAR RAC 1
3 BUZ BUS 0
4 FUZZY MUZZY 0
5 PACK PACKS 0
6 AA AA 1
7 ABCDEFG GFEDCBA 1
First split (T-SQL Split Word into characters) both words into temporary tables. Then perform an outer join and check for nulls.
Edit thanks to George's comment:
split (T-SQL Split Word into characters) both words into temporary tables
Modify temporary tables or use CTEs to add a column with count(*) with group by letters clause
Perform a full outer join on two temporary tables using a letter and it's count in join condition
Check for nulls in the output - if there are none, you have an anagram
The first get in my mind:
DECLARE #word1 nvarchar(max) = NULL,
#word2 nvarchar(max) = 'Test 1',
#i int = 0, #n int
DECLARE #table TABLE (
id int,
letter int
)
SELECT #word1 = ISNULL(LOWER(#word1),''), #word2 = ISNULL(LOWER(#word2),'')
SELECT #n = CASE WHEN LEN(#word1) > LEN(#word2) THEN LEN(#word1) ELSE LEN(#word2) END
WHILE #n > 0
BEGIN
INSERT INTO #table
SELECT 1, ASCII(SUBSTRING(#word1,#n,1))
UNION ALL
SELECT 2, ASCII(SUBSTRING(#word2,#n,1))
SET #n=#n-1
END
SELECT CASE WHEN COUNT(*) = 0 THEN 1 ELSE 0 END isAnagram
FROM (
SELECT id, letter, COUNT(letter) as c
FROM #table
WHERE id = 1
GROUP BY id, letter)as t
FULL OUTER JOIN (
SELECT id, letter, COUNT(letter) as c
FROM #table
WHERE id = 2
GROUP BY id, letter) as p
ON t.letter = p.letter and t.c =p.c
WHERE t.letter is NULL OR p.letter is null
Output:
isAnagram
0
You can also use loops in functions, and they can work fast. I am not able to get any of the of other answers even close to the performance of this function:
CREATE FUNCTION IsAnagram
(
#value1 VARCHAR(255)
, #value2 VARCHAR(255)
)
RETURNS BIT
BEGIN
IF(LEN(#value1) != LEN(#value2))
RETURN 0;
DECLARE #firstChar VARCHAR(3);
WHILE (LEN(#value1) > 0)
BEGIN
SET #firstChar = CONCAT('%', LEFT(#value1, 1), '%');
IF(PATINDEX(#firstChar, #value2) > 0)
SET #value2 = STUFF(#value2, PATINDEX(#firstChar, #value2), 1, '');
ELSE
RETURN 0;
SET #value1 = STUFF(#value1, 1, 1, '');
END
RETURN (SELECT IIF(#value2 = '', 1, 0));
END
GO
SELECT dbo.IsAnagram('asd', 'asd')
--1
SELECT dbo.IsAnagram('asd', 'dsa')
--1
SELECT dbo.IsAnagram('assd', 'dsa')
--0
SELECT dbo.IsAnagram('asd', 'dssa')
--0
SELECT dbo.IsAnagram('asd', 'asd')
This is something a numbers table can help with.
Code to create and populate a small numbers table is below.
CREATE TABLE dbo.Numbers
(
Number INT PRIMARY KEY
);
WITH Ten(N) AS
(
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
)
INSERT INTO dbo.Numbers
SELECT ROW_NUMBER() OVER (ORDER BY ##SPID) AS Number
FROM Ten T10,
Ten T100,
Ten T1000
Once that is in place you can use
SELECT W1,
W2,
IsAnagram = CASE
WHEN LEN(W1) <> LEN(W2)
THEN 0
ELSE
CASE
WHEN EXISTS (SELECT SUBSTRING(W1, Number, 1),
COUNT(*)
FROM dbo.Numbers
WHERE Number <= LEN(W1)
GROUP BY SUBSTRING(W1, Number, 1)
EXCEPT
SELECT SUBSTRING(W2, Number, 1),
COUNT(*)
FROM dbo.Numbers
WHERE Number <= LEN(W2)
GROUP BY SUBSTRING(W2, Number, 1))
THEN 0
ELSE 1
END
END
FROM (VALUES
('CAT', 'ACT'),
('CAR', 'RAC'),
('BUZ', 'BUS'),
('FUZZY', 'MUZZY'),
('PACK', 'PACKS'),
('AA', 'AA'),
('ABCDEFG', 'GFEDCBA')) T(W1, W2)
Or an alternative implementation could be
IsAnagram = CASE
WHEN LEN(W1) <> LEN(W2)
THEN 0
ELSE
CASE
WHEN EXISTS (SELECT 1
FROM dbo.Numbers N
CROSS APPLY (VALUES(1,W1),
(2,W2)) V(Col, String)
WHERE N.Number <= LEN(W1)
GROUP BY SUBSTRING(String, Number, 1)
HAVING COUNT(CASE WHEN Col = 1 THEN 1 END) <>
COUNT(CASE WHEN Col = 2 THEN 1 END))
THEN 0
ELSE 1
END
END