I have two tables. The first one with all movements in twelve months and the second one with claims registered in the same period of time. When I run the following query from the first table I've got 10 records. Of course, there are other records with a different number of movements (e.g.: 7, 23, 2 movements):
select t.cod_suc
,t.cod_ramo_comercial
,t.Poliza
,t.Item
,t.id_pv
from temp_portafolio_personal_accidents as t
where t.cod_suc = 2
and t.cod_ramo_comercial = 46
and t.Poliza = 50283
and t.Item = 1
and t.id_pv = 788383;
With the second query, for the second table, I have the following results:
select c.cod_suc
,c.cod_ramo_comercial
,c.[No. Policy]
,c.Item
,c.[ID Incident]
,max(c.id_pv) as id_pv
,count(distinct [No. Incident]) as 'Conteo R12'
from #claims as c
where c.[ID Incident] = 343632
group by c.cod_suc
,c.cod_ramo_comercial
,c.[No. Policy]
,c.Item
,c.[ID Incident];
Now, I need to update the first table but only one record. I'm using the following query, but all records are being updated. When I sum results I have 10 but is just one claim, as the second query shows.
update p
set [No. Siniestros R12] = b.[Conteo R12]
from temp_portafolio_personal_accidents p
left join
(select c.cod_suc
,c.cod_ramo_comercial
,c.[No. Policy]
,c.Item
,c.[ID Incident]
,max(c.id_pv) as id_pv
,count(distinct [No. Incident]) as 'Conteo R12'
from
#claims as c
where c.[ID Incident] = 343632
group by c.cod_suc
,c.cod_ramo_comercial
,c.[No. Policy]
,c.Item
,c.[ID Incident]
) b
on p.id_pv = b.id_pv
and p.cod_suc = b.cod_suc
and p.cod_ramo_comercial = b.cod_ramo_comercial
and p.Poliza = b.[No. Policy]
and p.Item = b.Item
where p.id_pv = 788383;
You can use a CTE with a ROW_NUMBER() function to do this. Simple example:
DECLARE #TABLE AS TABLE (Testing INT, Testing2 VARCHAR(55), Testing3 BIT);
INSERT INTO #TABLE VALUES (1, '1', 1);
INSERT INTO #TABLE VALUES (1, '1', 1);
INSERT INTO #TABLE VALUES (1, '1', 1);
INSERT INTO #TABLE VALUES (1, '1', 1);
INSERT INTO #TABLE VALUES (1, '1', 1);
INSERT INTO #TABLE VALUES (1, '1', 1);
INSERT INTO #TABLE VALUES (1, '1', 1);
INSERT INTO #TABLE VALUES (1, '1', 1);
WITH CTE AS
(
SELECT
ROW_NUMBER() OVER (ORDER BY Testing) AS RowID
,Testing
,Testing2
,Testing3
FROM #TABLE
)
UPDATE CTE
SET Testing = 2, Testing2 = '2', Testing3 = 0
WHERE RowID = 1
;
SELECT * FROM #TABLE
;
Related
I am getting expected results from my query, I am using group by to group the data on the basis of different Ids.
The problem I am facing is that I have to insert this grouped data in the table called gstl_calculated_daily_fee, but when I pass the grouped result to variables called #total_mada_local_switch_high_value and #mada_range_id and insert them in the table then I get only the last result of the query in the table.
Sample result:
Fee range_id
1.23 1
1.22 2
2.33 3
I get only 2.33 and 1 after I insert but I have to insert the whole result in to the table.
Please suggest how can I insert the whole query result into the table. Below is the query:
DECLARE #total_mada_local_switch_high_value decimal(32,4) = 0.00;
DECLARE #mada_range_id int = 0;
select
#total_mada_local_switch_high_value = SUM(C.settlement_fees),
#mada_range_id = C.range_id
From
(
select
*
from
(
select
rowNumber = #previous_mada_switch_fee_volume_based_count + (ROW_NUMBER() OVER(PARTITION BY DATEPART(MONTH, x_datetime) ORDER BY x_datetime)),
tt.x_datetime
from gstl_trans_temp tt where (message_type_mapping = 0220) and card_type ='GEIDP1' and response_code IN(00,10,11) and tran_amount_req >= 5000
) A
CROSS APPLY
(
select
rtt.settlement_fees,
rtt.range_id
From gstl_mada_local_switch_fee_volume_based rtt
where A.rowNumber >= rtt.range_start
AND (A.rowNumber <= rtt.range_end OR rtt.range_end IS NULL)
) B
) C
group by CAST(C.x_datetime AS DATE),C.range_id
-- Insert Daily Volume
INSERT INTO
gstl_calculated_daily_fee(business_date,fee_type,fee_total,range_id)
VALUES
(#tlf_business_date,'MADA_SWITCH_FEE_LOCAL_CARD', #total_mada_local_switch_high_value, #mada_range_id)
I see no need for variables here. You can insert the aggregated results directly.
Sample data
create table Data
(
Range int,
Fee money
);
insert into Data (Range, Fee) values
(1, 1.00),
(1, 0.50),
(2, 3.00),
(3, 0.25),
(3, 0.50);
create table DataSum
(
Range int,
FeeSum money
);
Solution
insert into DataSum (Range, FeeSum)
select d.Range, sum(d.Fee)
from Data d
group by d.Range;
Fiddle to see things in action.
I have the following query:
Original query:
SELECT
cd1.cust_number_id, cd1.cust_number_id, cd1.First_Name, cd1.Last_Name
FROM #Customer_Data cd1
inner join #Customer_Data cd2 on
cd1.Cd_Id <> cd2.Cd_Id
and cd2.cust_number_id <> cd1.cust_number_id
and cd2.First_Name = cd1.First_Name
and cd2.Last_Name = cd1.Last_Name
inner join #Customer c1 on c1.Cust_id = cd1.cust_number_id
inner join #Customer c2 on c2.cust_id = cd2.cust_number_id
WHERE c1.cust_number <> c2.cust_number
I optimized it as follows, but there is an error in my optimization and I can't find it:
Optimized query:
SELECT cd1.cust_number_id, cd1.cust_number_id, cd1.First_Name,cd1.Last_Name
FROM (
SELECT cdResult.cust_number_id, cdResult.First_Name,cdResult.Last_Name, COUNT(*) OVER (PARTITION BY cdResult.First_Name, cdResult.Last_Name) as cnt_name_bday
FROM #Customer_Data cdResult
WHERE cdResult.First_Name IS NOT NULL
AND cdResult.Last_Name IS NOT NULL) AS cd1
WHERE cd1.cnt_name_bday > 1;
Test data:
DECLARE #Customer_Data TABLE
(
Cd_Id INT,
cust_number_id INT,
First_Name NVARCHAR(30),
Last_Name NVARCHAR(30)
)
INSERT #Customer_Data (Cd_Id,cust_number_id,First_Name,Last_Name)
VALUES (1, 22, N'Alex', N'Bor'),
(2, 22, N'Alex', N'Bor'),
(3, 23, N'Alex', N'Bor'),
(4, 24, N'Tom', N'Cruse'),
(5, 25, N'Tom', N'Cruse')
DECLARE #Customer TABLE
(
Cust_id INT,
Cust_number INT
)
INSERT #Customer (Cust_id, Cust_number)
VALUES (22, 022),
(23, 023),
(24, 024),
(25, 025)
The problem is that the original query returns 6 rows (duplicating the row). And optimized returns just duplicates, how to make the optimized query also duplicated the row?
I would suggest just using window functions:
SELECT CD.cud_customer_id
FROM (SELECT cd.*, COUNT(*) OVER (PARTITION BY cud_name, cud_birthday) as cnt_name_bday FROM dbo.customer_data cd
) cd
WHERE cnt_name_bday > 1;
Your query is finding duplicates for either name or birthday. You want duplicates with both at the same time.
You can use only one exists :
SELECT cd.cud_customer_id
FROM dbo.customer_data AS cd
WHERE EXISTS (SELECT 1
FROM dbo.customer_data AS c
WHERE c.cud_name = cd.cud_name AND c.cud_birthday = cd.cud_birthday AND c.cust_id <> cd.cud_customer_id
);
I have a table that is storing groups of related rows, the different rows are related via a groupIdentifier column. Groups can be any number of rows in size.
I need to be able to pass in a new set of groups of rows and then find a mapping of new to existing matching groups. The complication is that the order of each row within the group is defined by a rowOrdinal value and must be taken into account. That rowOrdinal value is not always 0 based but the rows within a group are sorted by that value. Also #existingData contains 100s of thousands of potential groups, so the query needs to be performant
Here is an example input dataset:
declare #existingData table (
groupIdentifier int,
rowOrdinal int,
value varchar(1))
insert into #existingData values
(100, 0, 'X'),
(100, 1, 'Y'),
(200, 0, 'A'),
(200, 1, 'B'),
(200, 2, 'C'),
(40, 0, 'X'),
(41, 0, 'Y')
declare #newData table (
groupIdentifier int,
rowOrdinal int,
value varchar(1))
insert into #newData values
(1, 55, 'X'),
(1, 59, 'Y'),
(2, 0, 'Y'),
(2, 1, 'X')
-- #newData group 1 matches to #existingData group 100, #newData group 2 has no match in existingData
The desired result is a result set with two columns, existingGroupIdentifier and newGroupIdentifier. In this case the only result row would be 100, 1. The 100 being the #existingData groupIdentifier and the 1 being the #newData groupIdentifier
Edit
The following is what I have come up with so far, by assuming I will ever have a max group size of N, I can manually copy paste tsql code that uses pivot and temp tables to do the comparison for each group size. BUT, this limits the system to N, seems ugly, and I would prefer a way to do it in a single query if possible
declare #existingData table (
groupIdentifier int,
rowOrdinal int,
value varchar(1))
insert into #existingData values
(100, 0, 'X'),
(100, 1, 'Y'),
(200, 0, 'A'),
(200, 1, 'B'),
(200, 2, 'C'),
(40, 0, 'X'),
(41, 0, 'Y')
declare #newData table (
groupIdentifier int,
rowOrdinal int,
value varchar(1))
insert into #newData values
(1, 55, 'X'),
(1, 59, 'Y'),
(2, 0, 'Y'),
(2, 1, 'X'),
(3, 99, 'Y'),
(5, 4, 'A'),
(5, 10, 'B'),
(5, 200, 'C')
-- First build table of the size of each group, limiting #existingData to only potentially matching groups (have at least one member in common)
declare #potentialGroupsInExistingData table (groupIdentifier int, groupSize int)
insert into #potentialGroupsInExistingData
select
ExistingData.groupIdentifier, COUNT(ExistingData.groupIdentifier)
from
#existingData ExistingData
where
exists (select top 1 * from #newData where value = ExistingData.value)
group by ExistingData.groupIdentifier
declare #groupsInNewData table (groupIdentifier int, groupSize int)
insert into #groupsInNewData
select
NewData.groupIdentifier, COUNT(NewData.groupIdentifier)
from
#newData NewData
group by NewData.groupIdentifier
-- handle groups of size one, this is a simpler case of the pivoting used with more than size 1 groups
-----------------------------------
select
ExistingData.groupIdentifier as ExistingGroupIdentifier,
NewData.groupIdentifier as NewGroupIdentifier
from
#potentialGroupsInExistingData PotentialExistingGroup
cross join #groupsInNewData GroupsInNewData
inner join #existingData ExistingData on
ExistingData.groupIdentifier = PotentialExistingGroup.groupIdentifier
inner join #newData NewData on
NewData.groupIdentifier = GroupsInNewData.groupIdentifier
and NewData.value = ExistingData.value
where
PotentialExistingGroup.groupSize = 1
and GroupsInNewData.groupSize = 1
-- handle groups of size two
-----------------------------------
declare #existingGroupsOfSizeTwo table (groupIdentifier int, valueOne varchar(1), valueTwo varchar(2))
insert into #existingGroupsOfSizeTwo
select
*
from
(select
ExistingData.groupIdentifier,
ExistingData.value,
ROW_NUMBER() over (partition by ExistingData.groupIdentifier order by ExistingData.rowOrdinal desc) as ActualOrdinal
from
#potentialGroupsInExistingData PotentialGroup
inner join #existingData ExistingData on
ExistingData.groupIdentifier = PotentialGroup.groupIdentifier
where
PotentialGroup.groupSize = 2) as T
pivot ( min(value) for T.ActualOrdinal in ([1], [2]) ) as p
declare #newGroupsOfSizeTwo table (groupIdentifier int, valueOne varchar(1), valueTwo varchar(2))
insert into #newGroupsOfSizeTwo
select
*
from
(select
NewData.groupIdentifier,
NewData.value,
ROW_NUMBER() over (partition by NewData.groupIdentifier order by NewData.rowOrdinal desc) as ActualOrdinal
from
#groupsInNewData NewDataGroup
inner join #newData NewData on
NewData.groupIdentifier = NewDataGroup.groupIdentifier
where
NewDataGroup.groupSize = 2) as T
pivot ( min(value) for T.ActualOrdinal in ([1], [2]) ) as p
select
ExistingData.groupIdentifier as ExistingGroupIdentifier,
NewData.groupIdentifier as NewGroupIdentifier
from
#newGroupsOfSizeTwo NewData
inner join #existingGroupsOfSizeTwo ExistingData on
ExistingData.valueOne = NewData.valueOne
and ExistingData.valueTwo = NewData.valueTwo
-- handle groups of size three
-----------------------------------
declare #existingGroupsOfSizeThree table (groupIdentifier int, valueOne varchar(1), valueTwo varchar(1), valueThree varchar(1))
insert into #existingGroupsOfSizeThree
select
*
from
(select
ExistingData.groupIdentifier,
ExistingData.value,
ROW_NUMBER() over (partition by ExistingData.groupIdentifier order by ExistingData.rowOrdinal desc) as ActualOrdinal
from
#potentialGroupsInExistingData PotentialGroup
inner join #existingData ExistingData on
ExistingData.groupIdentifier = PotentialGroup.groupIdentifier
where
PotentialGroup.groupSize = 3) as T
pivot ( min(value) for T.ActualOrdinal in ([1], [2], [3]) ) as p
declare #newGroupsOfSizeThree table (groupIdentifier int, valueOne varchar(1), valueTwo varchar(1), valueThree varchar(1))
insert into #newGroupsOfSizeThree
select
*
from
(select
NewData.groupIdentifier,
NewData.value,
ROW_NUMBER() over (partition by NewData.groupIdentifier order by NewData.rowOrdinal desc) as ActualOrdinal
from
#groupsInNewData NewDataGroup
inner join #newData NewData on
NewData.groupIdentifier = NewDataGroup.groupIdentifier
where
NewDataGroup.groupSize = 3) as T
pivot ( min(value) for T.ActualOrdinal in ([1], [2], [3]) ) as p
select
ExistingData.groupIdentifier as ExistingGroupIdentifier,
NewData.groupIdentifier as NewGroupIdentifier
from
#newGroupsOfSizeThree NewData
inner join #existingGroupsOfSizeThree ExistingData on
ExistingData.valueOne = NewData.valueOne
and ExistingData.valueTwo = NewData.valueTwo
and ExistingData.valueThree = NewData.valueThree
General idea
The given tables can have several rows for the same group ID.
If we had a method to converge the given tables in such a way that they had one row for each group ID plus all the values of the group in one column, then it would become trivial to find all matching groups.
If we did this transformation
#existingData -> #ExistingDataGrouped (ID, DataValues)
#newData -> #NewDataGrouped (ID, DataValues)
then the final query would look like this (note that we are joining on DataValues, not ID):
SELECT
E.ID, N.ID
FROM
#ExistingDataGrouped AS E
INNER JOIN #NewDataGrouped AS N ON N.DataValues = E.DataValues
How to make the grouped tables
convert values into XML (search for "group_concat" for SQL Server, e.g. How to make a query with group_concat in sql server)
use CLR implementation of GroupConcat function with extra parameter for specifying the order. I personally used http://groupconcat.codeplex.com/ and it could be a good start.
Some optimization
If the number of source rows is significant, it is possible to do some preliminary filtering by using CHECKSUM_AGG.
WITH
CTE_ExistingRN
AS
(
SELECT
GroupIdentifier
,ROW_NUMBER() OVER(PARTITION BY GroupIdentifier ORDER BY RowOrdinal) AS rn
,Value
FROM #ExistingData
)
,CTE_NewRN
AS
(
SELECT
GroupIdentifier
,ROW_NUMBER() OVER(PARTITION BY GroupIdentifier ORDER BY RowOrdinal) AS rn
,Value
FROM #NewData
)
,CTE_ExistingAgg
AS
(
SELECT
GroupIdentifier
, CHECKSUM_AGG(CHECKSUM(rn, Value)) AS DataValues
FROM CTE_ExistingRN
GROUP BY GroupIdentifier
)
,CTE_NewAgg
AS
(
SELECT
GroupIdentifier
, CHECKSUM_AGG(CHECKSUM(rn, Value)) AS DataValues
FROM CTE_NewRN
GROUP BY GroupIdentifier
)
SELECT
CTE_ExistingAgg.GroupIdentifier AS ExistingGroupIdentifier
, CTE_NewAgg.GroupIdentifier AS NewGroupIdentifier
FROM
CTE_ExistingAgg
INNER JOIN CTE_NewAgg ON CTE_NewAgg.DataValues = CTE_ExistingAgg.DataValues
;
At first we re-number all rows so that each group starts from 1 (CTE_ExistingRN and CTE_NewRN).
CHECKSUM(rn, Value) returns some integer for each source row taking into account the row number and its value. Different values would usually produce different checksums.
CHECKSUM_AGG groups all checksums together.
Result set:
ExistingGroupIdentifier NewGroupIdentifier
100 1
100 2
This result would contain all groups that match exactly (100, 1), and it also can contain some groups that do not match, but by chance their checksums happened to be the same (100, 2). That's why this step is preliminary. To get accurate results you should compare actual values, not their checksums. But this step may filter out a significant number of groups that definitely don't match.
Solution using XML
This solution converts values of each group into XML and would provide accurate results. I personally never used FOR XML before and was curious to see how it works.
WITH
CTE_ExistingGroups
AS
(
SELECT DISTINCT GroupIdentifier
FROM #ExistingData
)
,CTE_NewGroups
AS
(
SELECT DISTINCT GroupIdentifier
FROM #NewData
)
,CTE_ExistingAgg
AS
(
SELECT
GroupIdentifier
,CA_Data.XML_Value AS DataValues
FROM
CTE_ExistingGroups
CROSS APPLY
(
SELECT Value+','
FROM #ExistingData
WHERE GroupIdentifier = CTE_ExistingGroups.GroupIdentifier
ORDER BY RowOrdinal FOR XML PATH(''), TYPE
) AS CA_XML(XML_Value)
CROSS APPLY
(
SELECT CA_XML.XML_Value.value('.', 'NVARCHAR(MAX)')
) AS CA_Data(XML_Value)
)
,CTE_NewAgg
AS
(
SELECT
GroupIdentifier
,CA_Data.XML_Value AS DataValues
FROM
CTE_NewGroups
CROSS APPLY
(
SELECT Value+','
FROM #NewData
WHERE GroupIdentifier = CTE_NewGroups.GroupIdentifier
ORDER BY RowOrdinal FOR XML PATH(''), TYPE
) AS CA_XML(XML_Value)
CROSS APPLY
(
SELECT CA_XML.XML_Value.value('.', 'NVARCHAR(MAX)')
) AS CA_Data(XML_Value)
)
SELECT
CTE_ExistingAgg.GroupIdentifier AS ExistingGroupIdentifier
, CTE_NewAgg.GroupIdentifier AS NewGroupIdentifier
FROM
CTE_ExistingAgg
INNER JOIN CTE_NewAgg ON CTE_NewAgg.DataValues = CTE_ExistingAgg.DataValues
;
Result set:
ExistingGroupIdentifier NewGroupIdentifier
100 1
Try this:
declare #existingData table (
groupIdentifier int,
rowOrdinal int,
value varchar(1))
insert into #existingData values
(100, 0, 'X'),
(100, 1, 'Y'),
(200, 0, 'A'),
(200, 1, 'B'),
(200, 2, 'C'),
(40, 0, 'X'),
(41, 0, 'Y')
declare #newData table (
groupIdentifier int,
rowOrdinal int,
value varchar(1))
insert into #newData values
(1, 55, 'X'),
(1, 59, 'Y'),
(2, 0, 'Y'),
(2, 1, 'X')
declare #results table (
existingGID int,
newGID int)
DECLARE #existingGroupID int
DECLARE outer_cursor CURSOR FOR
SELECT DISTINCT groupIdentifier FROM #existingData
OPEN outer_cursor
FETCH NEXT FROM outer_cursor INTO #existingGroupID
WHILE ##FETCH_STATUS = 0
BEGIN
DECLARE #existingGroupCount int
SELECT #existingGroupCount = COUNT(value) FROM #existingData WHERE groupIdentifier = #existingGroupID
DECLARE #newGroupID int
DECLARE inner_cursor CURSOR FOR
SELECT DISTINCT groupIdentifier from #newData
OPEN inner_cursor
FETCH NEXT FROM inner_cursor INTO #newGroupID
WHILE ##FETCH_STATUS = 0
BEGIN
DECLARE #newGroupCount int
SELECT #newGroupCount = COUNT(value) FROM #newData WHERE groupIdentifier = #newGroupID
-- if groups are different sizes, skip
IF #newGroupCount = #existingGroupCount
BEGIN
DECLARE #newStart int = -1
DECLARE #currentValue varchar(1)
DECLARE #validGroup bit = 1
DECLARE equality_cursor CURSOR FOR
SELECT value FROM #existingData WHERE groupIdentifier = #existingGroupID ORDER BY rowOrdinal
OPEN equality_cursor
FETCH NEXT FROM equality_cursor INTO #currentValue
WHILE ##FETCH_STATUS = 0
BEGIN
DECLARE #newValue varchar(1)
SELECT TOP 1 #newValue = value, #newStart = rowOrdinal FROM #newData WHERE groupIdentifier = #newGroupID AND #newStart < rowOrdinal ORDER BY rowOrdinal
IF(#newValue <> #currentValue)
BEGIN
SET #validGroup = 0
BREAK
END
FETCH NEXT FROM equality_cursor INTO #currentValue
END
CLOSE equality_cursor
DEALLOCATE equality_cursor
IF #validGroup = 1
BEGIN
INSERT INTO #results (existingGID, newGID) VALUES (#existingGroupID, #newGroupID)
END
END
FETCH NEXT FROM inner_cursor INTO #newGroupID
END
CLOSE inner_cursor
DEALLOCATE inner_cursor
FETCH NEXT FROM outer_cursor INTO #existingGroupID
END
CLOSE outer_cursor
DEALLOCATE outer_cursor
SELECT * FROM #results
I need to get going, but I'll edit this later with better comments to explain what the code does.
I've inherited some fun SQL and am trying to figure out how to how to eliminate rows with duplicate IDs. Our indexes are stored in a somewhat columnar format and then we pivot all the rows into one with the values as different columns.
The below sample returns three rows of unique data, but the IDs are duplicated. I need just two rows with unique IDs (and the other columns that go along with it). I know I'll be losing some data, but I just need one matching row per ID to the query (first, top, oldest, newest, whatever).
I've tried using DISTINCT, GROUP BY, and ROW_NUMBER, but I keep getting the syntax wrong, or using them in the wrong place.
I'm also open to rewriting the query completely in a way that is reusable as I currently have to generate this on the fly (cardtypes and cardindexes are user defined) and would love to be able to create a stored procedure. Thanks in advance!
declare #cardtypes table ([ID] int, [Name] nvarchar(50))
declare #cards table ([ID] int, [CardTypeID] int, [Name] nvarchar(50))
declare #cardindexes table ([ID] int, [CardID] int, [IndexType] int, [StringVal] nvarchar(255), [DateVal] datetime)
INSERT INTO #cardtypes VALUES (1, 'Funny Cards')
INSERT INTO #cardtypes VALUES (2, 'Sad Cards')
INSERT INTO #cards VALUES (1, 1, 'Bunnies')
INSERT INTO #cards VALUES (2, 1, 'Dogs')
INSERT INTO #cards VALUES (3, 1, 'Cat')
INSERT INTO #cards VALUES (4, 1, 'Cat2')
INSERT INTO #cardindexes VALUES (1, 1, 1, 'Bunnies', null)
INSERT INTO #cardindexes VALUES (2, 1, 1, 'playing', null)
INSERT INTO #cardindexes VALUES (3, 1, 2, null, '2014-09-21')
INSERT INTO #cardindexes VALUES (4, 2, 1, 'Dogs', null)
INSERT INTO #cardindexes VALUES (5, 2, 1, 'playing', null)
INSERT INTO #cardindexes VALUES (6, 2, 1, 'poker', null)
INSERT INTO #cardindexes VALUES (7, 2, 2, null, '2014-09-22')
SELECT TOP(100)
[ID] = c.[ID],
[Name] = c.[Name],
[Keyword] = [colKeyword].[StringVal],
[DateAdded] = [colDateAdded].[DateVal]
FROM #cards AS c
LEFT JOIN #cardindexes AS [colKeyword] ON [colKeyword].[CardID] = c.ID AND [colKeyword].[IndexType] = 1
LEFT JOIN #cardindexes AS [colDateAdded] ON [colDateAdded].[CardID] = c.ID AND [colDateAdded].[IndexType] = 2
WHERE [colKeyword].[StringVal] LIKE 'p%' AND c.[CardTypeID] = 1
ORDER BY [DateAdded]
Edit:
While both solutions are valid, I ended up using the MAX() solution from #popovitsj as it was easier to implement. The issue of data coming from multiple rows doesn't really factor in for me as all rows are essentially part of the same record. I will most likely use both solutions depending on my needs.
Here's my updated query (as it didn't quite match the answer):
SELECT TOP(100)
[ID] = c.[ID],
[Name] = MAX(c.[Name]),
[Keyword] = MAX([colKeyword].[StringVal]),
[DateAdded] = MAX([colDateAdded].[DateVal])
FROM #cards AS c
LEFT JOIN #cardindexes AS [colKeyword] ON [colKeyword].[CardID] = c.ID AND [colKeyword].[IndexType] = 1
LEFT JOIN #cardindexes AS [colDateAdded] ON [colDateAdded].[CardID] = c.ID AND [colDateAdded].[IndexType] = 2
WHERE [colKeyword].[StringVal] LIKE 'p%' AND c.[CardTypeID] = 1
GROUP BY c.ID
ORDER BY [DateAdded]
You could use MAX or MIN to 'decide' on what to display for the other columns in the rows that are duplicate.
SELECT ID, MAX(Name), MAX(Keyword), MAX(DateAdded)
(...)
GROUP BY ID;
using row number windowed function along with a CTE will do this pretty well. For example:
;With preResult AS (
SELECT TOP(100)
[ID] = c.[ID],
[Name] = c.[Name],
[Keyword] = [colKeyword].[StringVal],
[DateAdded] = [colDateAdded].[DateVal],
ROW_NUMBER()OVER(PARTITION BY c.ID ORDER BY [colDateAdded].[DateVal]) rn
FROM #cards AS c
LEFT JOIN #cardindexes AS [colKeyword] ON [colKeyword].[CardID] = c.ID AND [colKeyword].[IndexType] = 1
LEFT JOIN #cardindexes AS [colDateAdded] ON [colDateAdded].[CardID] = c.ID AND [colDateAdded].[IndexType] = 2
WHERE [colKeyword].[StringVal] LIKE 'p%' AND c.[CardTypeID] = 1
ORDER BY [DateAdded]
)
SELECT * from preResult WHERE rn = 1
I have 2 tables: sets and groups. Both are joined using a 3rd table set_has_groups.
I would like to get sets that have ALL groups that I specify
One way of doing it would be
SELECT column1, column2 FROM sets WHERE
id IN(SELECT set_id FROM set_has_group WHERE group_id = 1)
AND id IN(SELECT set_id FROM set_has_group WHERE group_id = 2)
AND id IN(SELECT set_id FROM set_has_group WHERE group_id = 3)
obviously this is not the most beautiful solution
I've also tried this:
SELECT column1, column2 FROM sets WHERE
id IN(SELECT set_id FROM set_has_group WHERE group_id IN(1,2,3) GROUP BY group_id
HAVING COUNT(*) = 3
This looks prettier but the problem is that it takes forever to execute.
While the first query runs in like 200ms the 2nd one takes more than 1 minute.
Any idea why that is?
===UPDATE:
I've played with this some more and I modified the 2nd query like this
SELECT columns FROM `set` WHERE id IN(
select set_id FROM
(
SELECT set_id FROM set_has_group
WHERE group_id IN(1,2,3)
GROUP BY set_id HAVING COUNT(*) = 3
) as temp
)
that is really fast
It's the same as the 2nd query before just that I wrap it in another temporary table
Pretty strange
I am suspecting a small mistyping in the second query.
Really, I am not sure. Probably, the second query is executed via full table scan. At the same time the first one "IN" is really transformed into "EXISTS". So, you can try to use "exists". For example:
...
where 3 = (select count(*) from set_has_group
where group_id in (1, 2, 3) and set_id = id
group by set_id)
Assuming SQL Server, here is a working example with a JOIN that should work better than the IN clauses you are using as long as you have your primary and foreign keys set correctly. I have built joined 5 sets to 3 groups, but set 4 and 5 are not a part of group 3 and will not show in the answer. However, this query is not scalable (for ex. find in group 4, 5, 7, 8 and 13 will require code modifications unless you parse input params into a table variable)
set nocount on
declare #sets table
(
Id INT Identity (1, 1),
Column1 VarChar (50),
Column2 VarChar (50)
)
declare #Set_Has_Group table
(
Set_Id Int,
Group_Id Int
)
insert into #sets values (newid(), newid())
insert into #sets values (newid(), newid())
insert into #sets values (newid(), newid())
insert into #sets values (newid(), newid())
insert into #sets values (newid(), newid())
update #sets set column1 = 'Column1 at Row ' + Convert (varchar, id)
update #sets set column2 = 'Column2 at Row ' + Convert (varchar, id)
insert into #Set_Has_Group values (1, 1)
insert into #Set_Has_Group values (1, 2)
insert into #Set_Has_Group values (1, 3)
insert into #Set_Has_Group values (2, 1)
insert into #Set_Has_Group values (2, 2)
insert into #Set_Has_Group values (2, 3)
insert into #Set_Has_Group values (3, 1)
insert into #Set_Has_Group values (3, 2)
insert into #Set_Has_Group values (3, 3)
insert into #Set_Has_Group values (4, 1)
insert into #Set_Has_Group values (4, 2)
insert into #Set_Has_Group values (5, 1)
insert into #Set_Has_Group values (5, 2)
/* your query with IN */
SELECT column1, column2 FROM #sets WHERE
id IN(SELECT set_id FROM #set_has_group WHERE group_id = 1)
AND id IN(SELECT set_id FROM #set_has_group WHERE group_id = 2)
AND id IN(SELECT set_id FROM #set_has_group WHERE group_id = 3)
/* my query with JOIN */
SELECT * -- Column1, Column2
FROM #sets sets
WHERE 3 = (
SELECT Count (1)
FROM #Set_Has_Group Set_Has_Group
WHERE 1=1
AND sets.Id = Set_Has_Group.Set_Id
AND Set_Has_Group.Group_ID IN (1, 2, 3)
Group by Set_Id
)
Here's a solution that uses a non-correlated subquery and no GROUP BY:
SELECT column1, column2
FROM sets
WHERE id IN (
SELECT g1.set_id FROM set_has_group g1
JOIN set_has_group g2 ON (g1.set_id = g3.set_id)
JOIN set_has_group g3 ON (g1.set_id = g3.set_id)
WHERE g1.group_id = 1 AND g2.group_id = 2 AND g3.group_id = 3);