Sql split comma separated data and check with other table - sql

I have a one table with 2 fields one for tag and another for ProspectID
DECLARE #Filter NVARCHAR(251) ='30,40'
declare #temp table
(
TagID NVARCHAR(MAX),
ProspectID INT
)
INSERT INTO #temp(TAGID,ProspectID)
VALUES
('20,30,40' ,1),
('30,50' ,2),
('20,30,40' ,3),
('60,70' ,4),
('30' ,5)
Need to return 30 contains prospectID and 40 contains as per my example
Output I need
ProspectID
1
3
5

This is a question of Relational Division With Remainder, of which there are many solutions. I will present one common one.
You can use STRING_SPLIT to break up your values:
declare #temp table
(
TagID NVARCHAR(MAX),
ProspectID INT
)
INSERT INTO #temp(TAGID,ProspectID)
VALUES
('20,30,40' ,1),
('30,50' ,2),
('20,30,40' ,3),
('60,70' ,4),
('30' ,5)
DECLARE #Filter NVARCHAR(251) ='30,40'
SELECT
t.ProspectID
FROM #temp t
WHERE EXISTS (SELECT 1
FROM STRING_SPLIT(#Filter, ',') f
LEFT JOIN STRING_SPLIT(t.TagID, ',') t ON t.value = f.value
HAVING COUNT(t.value) = COUNT(*) -- none missing
);
db<>fiddle
However, your schema design is flawed. Do not store multiple pieces of information in one column or value. Instead store them in separate rows.
So you would have a table ProspectTag storing each combination (what you get by splitting the strings into separate rows), and #Filter should be a table variable or Table Valued Parameter also.
declare #temp table
(
TagID int,
ProspectID int
);
INSERT INTO #temp (TagID, ProspectID)
VALUES
(20, 1),
(30, 1),
(40, 1),
(30, 2),
(50, 2),
(20, 3),
(30, 3),
(40, 3),
(60, 4),
(70, 4),
(30, 5);
DECLARE #Filter TABLE(value int PRIMARY KEY);
INSERT #Filter (value) VALUES
(30),
(40);
DECLARE #totalFilters int = (SELECT COUNT(*) FROM #Filter);
SELECT
t.ProspectID
FROM #temp t
JOIN #Filter f ON t.TagID = f.value
GROUP BY
t.ProspectID
HAVING COUNT(*) = #totalFilters; -- none missing
db<>fiddle

use follwing query
SELECT ProspectID
FROM #temp
WHERE TAGID LIKE '%' + #Filter + '%'
--WHERE TAGID LIKE '%' + LTRIM(RTRIM(#Filter)) + '%'
or TagID in (PARSENAME(REPLACE(#Filter,',','.'),2) )--for ProspectID=5

Related

Find data by multiple Lookup table clauses

declare #Character table (id int, [name] varchar(12));
insert into #Character (id, [name])
values
(1, 'tom'),
(2, 'jerry'),
(3, 'dog');
declare #NameToCharacter table (id int, nameId int, characterId int);
insert into #NameToCharacter (id, nameId, characterId)
values
(1, 1, 1),
(2, 1, 3),
(3, 1, 2),
(4, 2, 1);
The Name Table has more than just 1,2,3 and the list to parse on is dynamic
NameTable
id | name
----------
1 foo
2 bar
3 steak
CharacterTable
id | name
---------
1 tom
2 jerry
3 dog
NameToCharacterTable
id | nameId | characterId
1 1 1
2 1 3
3 1 2
4 2 1
I am looking for a query that will return a character that has two names. For example
With the above data only "tom" will be returned.
SELECT *
FROM nameToCharacterTable
WHERE nameId in (1,2)
The in clause will return every row that has a 1 or a 3. I want to only return the rows that have both a 1 and a 3.
I am stumped I have tried everything I know and do not want to resort to dynamic SQL. Any help would be great
The 1,3 in this example will be a dynamic list of integers. for example it could be 1,3,4,5,.....
Filter out a count of how many times the Character appears in the CharacterToName table matching the list you are providing (which I have assumed you can convert into a table variable or temp table) e.g.
declare #Character table (id int, [name] varchar(12));
insert into #Character (id, [name])
values
(1, 'tom'),
(2, 'jerry'),
(3, 'dog');
declare #NameToCharacter table (id int, nameId int, characterId int);
insert into #NameToCharacter (id, nameId, characterId)
values
(1, 1, 1),
(2, 1, 3),
(3, 1, 2),
(4, 2, 1);
declare #RequiredNames table (nameId int);
insert into #RequiredNames (nameId)
values
(1),
(2);
select *
from #Character C
where (
select count(*)
from #NameToCharacter NC
where NC.characterId = c.id
and NC.nameId in (select nameId from #RequiredNames)
) = 2;
Returns:
id
name
1
tom
Note: Providing DDL+DML as shown here makes it much easier for people to assist you.
This is classic Relational Division With Remainder.
There are a number of different solutions. #DaleK has given you an excellent one: inner-join everything, then check that each set has the right amount. This is normally the fastest solution.
If you want to ensure it works with a dynamic amount of rows, just change the last line to
) = (SELECT COUNT(*) FROM #RequiredNames);
Two other common solutions exist.
Left-join and check that all rows were joined
SELECT *
FROM #Character c
WHERE EXISTS (SELECT 1
FROM #RequiredNames rn
LEFT JOIN #NameToCharacter nc ON nc.nameId = rn.nameId AND nc.characterId = c.id
HAVING COUNT(*) = COUNT(nc.nameId) -- all rows are joined
);
Double anti-join, in other words: there are no "required" that are "not in the set"
SELECT *
FROM #Character c
WHERE NOT EXISTS (SELECT 1
FROM #RequiredNames rn
WHERE NOT EXISTS (SELECT 1
FROM #NameToCharacter nc
WHERE nc.nameId = rn.nameId AND nc.characterId = c.id
)
);
A variation on the one from the other answer uses a windowed aggregate instead of a subquery. I don't think this is performant, but it may have uses in certain cases.
SELECT *
FROM #Character c
WHERE EXISTS (SELECT 1
FROM (
SELECT *, COUNT(*) OVER () AS cnt
FROM #RequiredNames
) rn
JOIN #NameToCharacter nc ON nc.nameId = rn.nameId AND nc.characterId = c.id
HAVING COUNT(*) = MIN(rn.cnt)
);
db<>fiddle

Dynamic values in where clause

I am trying to read rows one by one from a table and pass values to the query condition. I am unable to proceed further. do I have to use counter /iteration or is there any options. Any help is much appreciated
I want to pass values into the value like condition
create table #temp
(
userid int,
typeid int
)
insert into #temp values (1, 101)
insert into #temp values (1, 221)
insert into #temp values (3, 401)
insert into #temp values (4, 501)
create table #target
(
userid int,
roleid int,
value varchar(max)
)
insert into #target values (1, 000, 'something here userid:1 typeid:101 something here')
insert into #target values (1, 001, 'something here userid:1 typeid:221 something here')
insert into #target values (1, 001, 'something here userid:1 typeid:331 something here')
insert into #target values (3, 002, 'something here userid:3 typeid:401 something here')
select t.userid, d.roleid, t.typeid
from #target d
inner join #temp t on t.userid = d.userid
and value like '%userid:'t.userid' typeid:'t.typeid'%'
This is the result I get:
userid
roleid
typeid
1
000
101
1
001
221
3
002
401
Seems you just need to familiarise yourself with the string concatenation operator:
select t.userid, roleid, typeid
-- How to debug such a query
--, [value]
--,'%userid:' + convert(varchar(32),t.userid) + ' typeid:' + convert(varchar(32),t.typeid) + '%'
--, case when d.[value] like '%userid:' + convert(varchar(32),t.userid) + ' typeid:' + convert(varchar(32),t.typeid) + '%' then 1 else 0 end
from #target d
inner join #temp t on t.userid = d.userid
and d.[value] like '%userid:' + convert(varchar(32),t.userid) + ' typeid:' + convert(varchar(32),t.typeid) + '%'
Returns
userid
roleid
typeid
1
0
101
1
1
221
3
2
401

Can a Pivot table be used with a unknown number of columns?

If I have a team table with a unknown amount of members, is there a way to make the pivot query dynamic?
create table #t (
team varchar (20), member varchar (20)
)
insert into #t values ('ERP', 'Jack')
insert into #t values ('ERP', 'John')
insert into #t values ('ERP', 'Mary')
insert into #t values ('ERP', 'Tim')
insert into #t values ('CRM', 'Robert')
insert into #t values ('CRM', 'Diana')
select * from #t
select team, [1] as teamMember1, /* 1st select */
[2] as teamMember2, [3] as teamMember3
from
(select team , member, row_number () /* 3rd select */
over (partition by team order by team) as rownum
from #t) a
pivot (max(member) for rownum in ([1], [2], [3])) as pvt
drop table #t
Why yes, yes there is. Here's a script I cooked up years ago for a similar problem that was ultimately solved by giving the user Excel and washing my hands of it. I apologize it's not configured with your example data, but hopefully it's easy to follow.
Hope that helps,
John
--------------START QUERY--------------
-- Example Table
CREATE TABLE #glbTestTable
(
ProviderID INT,
Total INT,
PaymentDate SMALLDATETIME
)
--So the dates insert properly
SET DATEFORMAT dmy
-- Populate Example Table
INSERT INTO #glbTestTable VALUES (232, 12200, '12/01/09')
INSERT INTO #glbTestTable VALUES (456, 10200, '12/01/09')
INSERT INTO #glbTestTable VALUES (563, 11899, '02/03/09')
INSERT INTO #glbTestTable VALUES (221, 5239, '13/04/09')
INSERT INTO #glbTestTable VALUES (987, 7899, '02/03/09')
INSERT INTO #glbTestTable VALUES (1, 1234, '02/08/09')
INSERT INTO #glbTestTable VALUES (2, 4321, '02/07/09')
INSERT INTO #glbTestTable VALUES (3, 5555, '02/06/09')
-- Raw Output
SELECT *
FROM #glbTestTable
-- Build Query for Pivot --
DECLARE #pvtColumns VARCHAR(MAX)
SET #pvtColumns = ''
-- Grab up to the first 1023 "Columns" that we want to use in Pivot Table.
-- Tables can only have 1024 columns at a maximum
SELECT TOP 1023 #pvtColumns = #pvtColumns + '[' + CONVERT(VARCHAR, PaymentDate, 103) + '], '
FROM (SELECT DISTINCT PaymentDate FROM #glbTestTable) t_distFP
-- Create PivotTable Query
DECLARE #myQuery VARCHAR(MAX)
SET #myQuery = '
SELECT ProviderID, ' + LEFT(#pvtColumns, LEN(#pvtColumns) - 1) + '
FROM (SELECT ProviderID, PaymentDate, Total
FROM #glbTestTable) AS SourceTable
PIVOT
(
SUM(Total)
FOR PaymentDate IN (' + LEFT(#pvtColumns, LEN(#pvtColumns) - 1) + ')
) AS PivotTable'
-- Run the Pivot Query
EXEC(#myQuery)
-- Cleanup
DROP TABLE #glbTestTable
---------------END QUERY---------------

project a sparse result at some level

I don't really know what to call this but it's not that hard to explain
Basically what I have is a result like this
Similarity ColumnA ColumnB ColumnC
1 SomeValue NULL SomeValue
2 NULL SomeB NULL
3 SomeValue NULL SomeC
4 SomeA NULL NULL
This result is created by matching a set of strings against another table. Each string also contains some values for these ColumnA..C which are the values I wan't to aggregate in some way.
Something like min/max works very well but I can't figure out how to get it to account for the highest similarity not just the min/max value. I don't really want the min/max, I want the first non-null value with the highest similarity.
Ideally the result would look like this
ColumnA ColumnB ColumnC
SomeA SomeB SomeC
I'd like be able to efficiently join in the temporary result to compute the rest and I've been exploring different options. Something which I've been considering is creating a SQL Server CLR aggregate the yields the "first" non-null value but I'm unsure if there's even such a thing as a first or last when running an aggregate on a result.
Okay, so I figured it out, I originally had trouble with the UPDATE FROM and JOIN not playing well together. I was counting on that the UPDATE would just occur multiple times and that would give me the correct results, however, there's no such guarantee from SQL Server (it's actually undefined behavior and alltough it appeared to work we'll have none of that) but since you can run UPDATE against a CTE I combined that with the OUTER APPLY to select the exactly 1 row to complement a missing value if possible.
Here's the whole thing with test data as well.
DECLARE #cost TABLE (
make nvarchar(100) not null,
model nvarchar(100),
a numeric(18,2),
b numeric(18,2)
);
INSERT #cost VALUES ('a%', null, 100, 2);
INSERT #cost VALUES ('a%', 'a%', 149, null);
INSERT #cost VALUES ('a%', 'ab', 349, null);
INSERT #cost VALUES ('b', null, null, 2.5);
INSERT #cost VALUES ('b', 'b%', 249, null);
INSERT #cost VALUES ('b', 'b', null, 3);
DECLARE #unit TABLE (
id int,
make nvarchar(100) not null,
model nvarchar(100)
);
INSERT #unit VALUES (1, 'a', null);
INSERT #unit VALUES (2, 'a', 'a');
INSERT #unit VALUES (3, 'a', 'ab');
INSERT #unit VALUES (4, 'b', null);
INSERT #unit VALUES (5, 'b', 'b');
DECLARE #tmp TABLE (
id int,
specificity int,
a numeric(18,2),
b numeric(18,2),
primary key(id, specificity)
);
INSERT #tmp
OUTPUT inserted.* --FOR DEBUGGING
SELECT
unit.id
, ROW_NUMBER() OVER (
PARTITION BY unit.id
ORDER BY cost.make DESC, cost.model DESC
) AS specificity
, cost.a
, cost.b
FROM #unit unit
INNER JOIN #cost cost ON unit.make LIKE cost.make
AND (cost.model IS NULL OR unit.model LIKE cost.model)
;
--fix the holes
WITH tmp AS (
SELECT *
FROM #tmp
WHERE specificity = 1
AND (a IS NULL OR b IS NULL) --where necessary
)
UPDATE tmp
SET
tmp.a = COALESCE(tmp.a, a.a)
, tmp.b = COALESCE(tmp.b, b.b)
OUTPUT inserted.* --FOR DEBUGGING
FROM tmp
OUTER APPLY (
SELECT TOP 1 a
FROM #tmp a
WHERE a.id = tmp.id
AND a.specificity > 1
AND a.a IS NOT NULL
ORDER BY a.specificity
) a
OUTER APPLY (
SELECT TOP 1 b
FROM #tmp b
WHERE b.id = tmp.id
AND b.specificity > 1
AND b.b IS NOT NULL
ORDER BY b.specificity
) b
;

SQL "tree-like" query - most parent group

I'm having some trouble doing a "tree-like" query (what do we call this?) in SQL.
Take a look at my diagram below (table and column names are in danish - sorry about that):
DB diagram http://img197.imageshack.us/img197/8721/44060572.jpg
Using MSSQL Server 2005, the goal is to find the most parent group (Gruppe), for each customer (Kunde).
Each group can have many parent groups and many child groups.
And, I would also like to know how to display the tree like this:
Customer 1
- Parent group 1
- Child group 1
- ChildChild group n
- Child group n
- Parent group n
- ...
- ...
Customer n
- ...
Another question:
How does the query look to get ALL the groups for all the customers? Parent and child groups.
You can use CTE's to construct "the full path" column on the fly
--DROP TABLE Gruppe, Kunde, Gruppe_Gruppe, Kunde_Gruppe
CREATE TABLE Gruppe (
Id INT PRIMARY KEY
, Name VARCHAR(100)
)
CREATE TABLE Kunde (
Id INT PRIMARY KEY
, Name VARCHAR(100)
)
CREATE TABLE Gruppe_Gruppe (
ParentGruppeId INT
, ChildGruppeId INT
)
CREATE TABLE Kunde_Gruppe (
KundeId INT
, GruppeId INT
)
INSERT Gruppe
VALUES (1, 'Group 1'), (2, 'Group 2'), (3, 'Group 3')
, (4, 'Sub-group A'), (5, 'Sub-group B'), (6, 'Sub-group C'), (7, 'Sub-group D')
INSERT Kunde
VALUES (1, 'Kunde 1'), (2, 'Kunde 2'), (3, 'Kunde 3')
INSERT Gruppe_Gruppe
VALUES (1, 4), (1, 5), (1, 7)
, (2, 6), (2, 7)
, (6, 1)
INSERT Kunde_Gruppe
VALUES (1, 1), (1, 2)
, (2, 3), (2, 4)
;WITH CTE
AS (
SELECT CONVERT(VARCHAR(1000), REPLACE(CONVERT(CHAR(5), k.Id), ' ', 'K')) AS TheKey
, k.Name AS Name
FROM Kunde k
UNION ALL
SELECT CONVERT(VARCHAR(1000), REPLACE(CONVERT(CHAR(5), x.KundeId), ' ', 'K')
+ REPLACE(CONVERT(CHAR(5), g.Id), ' ', 'G')) AS TheKey
, g.Name
FROM Gruppe g
JOIN Kunde_Gruppe x
ON g.Id = x.GruppeId
UNION ALL
SELECT CONVERT(VARCHAR(1000), p.TheKey + REPLACE(CONVERT(CHAR(5), g.Id), ' ', 'G')) AS TheKey
, g.Name
FROM Gruppe g
JOIN Gruppe_Gruppe x
ON g.Id = x.ChildGruppeId
JOIN CTE p
ON REPLACE(CONVERT(CHAR(5), x.ParentGruppeId), ' ', 'G') = RIGHT(p.TheKey, 5)
WHERE LEN(p.TheKey) < 32 * 5
)
SELECT *
, LEN(TheKey) / 5 AS Level
FROM CTE c
ORDER BY c.TheKey
Performance might be sub-optimal if you have lots of reads vs rare modifications.
I just can't say it better than Joe Celko. The problem is usually that the models built doesn't lend themselves well to build hierarchies, and that those models have to take in consideration the characteristics of your hierarchy. Is it too deep? Is it too wide? Is it narrow and shallow?
One key to success on wide and shallow trees is to have the full path in the hierarchy in a column, like Celko mentions in the first link.
http://onlamp.com/pub/a/onlamp/2004/08/05/hierarchical_sql.html
http://www.dbmsmag.com/9603d06.html and http://www.dbmsmag.com/9604d06.html
http://www.ibase.ru/devinfo/DBMSTrees/sqltrees.html
I came up with a solution that solves the problem of listing ALL the groups for each customer. Parent and child groups.
What do you think?
WITH GroupTree
AS
(
SELECT kg.KundeId, g.Id GruppeId
FROM ActiveDirectory.Gruppe g
INNER JOIN ActiveDirectory.Kunde_Gruppe kg ON g.Id = kg.GruppeId
AND (EXISTS (SELECT * FROM ActiveDirectory.Gruppe_Gruppe WHERE ParentGruppeId = g.Id)
OR NOT EXISTS (SELECT * FROM ActiveDirectory.Gruppe_Gruppe WHERE ParentGruppeId = g.Id))
UNION ALL
SELECT GroupTree.KundeId, gg.ChildGruppeId
FROM ActiveDirectory.Gruppe_Gruppe gg
INNER JOIN GroupTree ON gg.ParentGruppeId = GroupTree.GruppeId
)
SELECT KundeId, GruppeId
FROM GroupTree
OPTION (MAXRECURSION 32767)
How about something like this:
DECLARE #Customer TABLE(
CustomerID INT IDENTITY(1,1),
CustomerName VARCHAR(MAX)
)
INSERT INTO #Customer SELECT 'Customer1'
INSERT INTO #Customer SELECT 'Customer2'
INSERT INTO #Customer SELECT 'Customer3'
DECLARE #CustomerTreeStructure TABLE(
CustomerID INT,
TreeItemID INT
)
INSERT INTO #CustomerTreeStructure (CustomerID,TreeItemID) SELECT 1, 1
INSERT INTO #CustomerTreeStructure (CustomerID,TreeItemID) SELECT 2, 12
INSERT INTO #CustomerTreeStructure (CustomerID,TreeItemID) SELECT 3, 1
INSERT INTO #CustomerTreeStructure (CustomerID,TreeItemID) SELECT 3, 12
DECLARE #TreeStructure TABLE(
TreeItemID INT IDENTITY(1,1),
TreeItemName VARCHAR(MAX),
TreeParentID INT
)
INSERT INTO #TreeStructure (TreeItemName,TreeParentID) SELECT '001', NULL
INSERT INTO #TreeStructure (TreeItemName,TreeParentID) SELECT '001.001', 1
INSERT INTO #TreeStructure (TreeItemName,TreeParentID) SELECT '001.001.001', 2
INSERT INTO #TreeStructure (TreeItemName,TreeParentID) SELECT '001.001.002', 2
INSERT INTO #TreeStructure (TreeItemName,TreeParentID) SELECT '001.001.003', 2
INSERT INTO #TreeStructure (TreeItemName,TreeParentID) SELECT '001.002', 1
INSERT INTO #TreeStructure (TreeItemName,TreeParentID) SELECT '001.003', 1
INSERT INTO #TreeStructure (TreeItemName,TreeParentID) SELECT '001.003.001', 7
INSERT INTO #TreeStructure (TreeItemName,TreeParentID) SELECT '001.001.002.001', 4
INSERT INTO #TreeStructure (TreeItemName,TreeParentID) SELECT '001.001.002.002', 4
INSERT INTO #TreeStructure (TreeItemName,TreeParentID) SELECT '001.001.002.003', 4
INSERT INTO #TreeStructure (TreeItemName,TreeParentID) SELECT '002', NULL
INSERT INTO #TreeStructure (TreeItemName,TreeParentID) SELECT '002.001', 12
INSERT INTO #TreeStructure (TreeItemName,TreeParentID) SELECT '002.001.001', 13
INSERT INTO #TreeStructure (TreeItemName,TreeParentID) SELECT '002.001.002', 13
;WITH Structure AS (
SELECT TreeItemID,
TreeItemName,
TreeParentID,
REPLICATE('0',5 - LEN(CAST(TreeItemID AS VARCHAR(MAX)))) + CAST(TreeItemID AS VARCHAR(MAX)) + '\\' TreePath
FROM #TreeStructure ts
WHERE ts.TreeParentID IS NULL
UNION ALL
SELECT ts.*,
s.TreePath + REPLICATE('0',5 - LEN(CAST(ts.TreeItemID AS VARCHAR(5)))) + CAST(ts.TreeItemID AS VARCHAR(5)) + '\\' TreePath
FROM #TreeStructure ts INNER JOIN
Structure s ON ts.TreeParentID = s.TreeItemID
)
SELECT c.CustomerName,
Children.TreeItemName,
Children.TreePath
FROM #Customer c INNER JOIN
#CustomerTreeStructure cts ON c.CustomerID = cts.CustomerID INNER JOIN
Structure s ON cts.TreeItemID = s.TreeItemID INNER JOIN
(
SELECT *
FROM Structure
) Children ON Children.TreePath LIKE s.TreePath +'%'
ORDER BY 1,3
OPTION (MAXRECURSION 0)
In T-SQL, you can write a while loop. Untested:
#group = <starting group>
WHILE (EXISTS(SELECT * FROM Gruppe_Gruppe WHERE ChildGruppeId=#group))
BEGIN
SELECT #group=ParentGruppeId FROM Gruppe_Gruppe WHERE ChildGruppeId=#group
END
We use SQL Server 2000 and there is an example of expanding hierarchies using a stack in the SQL Books Online, I have written a number of variants for our ERP system
http://support.microsoft.com/kb/248915
I gather that there is a Native method using CTE within SQL 2005 but I have not used it myself