I have following data:
ID --- GRP_ID --- REC_VAL
1 --- 1 --- A
2 --- 2 --- A
3 --- 2 --- B
4 --- 3 --- A
5 --- 3 --- B
6 --- 3 --- C
7 --- 4 --- A
8 --- 4 --- B
9 --- 4 --- C
10 --- 5 --- A
11 --- 5 --- B
12 --- 5 --- E
Is there a way how to find id of record groups that have same values ? (in this case only grp_id 3 and 4 have same values)
Second question:
Is there effecient way how to find exact grp_id when i had a set of values ? My solution is not very quick because table with groups has over 6mil. records:
-- Large table - up to 6m records
create table tmp_grp (id number, grp_id number, rec_val varchar2(10));
--
insert into tmp_grp(id, grp_id, rec_val) values (1, 1, 'A');
insert into tmp_grp(id, grp_id, rec_val) values (2, 2, 'A');
insert into tmp_grp(id, grp_id, rec_val) values (3, 2, 'B');
insert into tmp_grp(id, grp_id, rec_val) values (4, 3, 'A');
insert into tmp_grp(id, grp_id, rec_val) values (5, 3, 'B');
insert into tmp_grp(id, grp_id, rec_val) values (6, 3, 'C');
insert into tmp_grp(id, grp_id, rec_val) values (7, 4, 'A');
insert into tmp_grp(id, grp_id, rec_val) values (8, 4, 'B');
insert into tmp_grp(id, grp_id, rec_val) values (9, 4, 'C');
insert into tmp_grp(id, grp_id, rec_val) values (10, 5, 'A');
insert into tmp_grp(id, grp_id, rec_val) values (11, 5, 'B');
insert into tmp_grp(id, grp_id, rec_val) values (12, 5, 'E');
commit;
--
-- CTE representing record group for asking
WITH datrec AS
(SELECT 'A' rec FROM dual UNION ALL
SELECT 'B' rec FROM dual)
--
SELECT x.grp_id
FROM (
-- Count of joined records
SELECT COUNT(1) cnt, t.grp_id
FROM tmp_grp t
JOIN datrec d
ON d.rec = t.rec_val
GROUP BY t.grp_id
--
) x
WHERE
-- Count of all data records
x.cnt = (SELECT COUNT(1) FROM datrec)
-- Count of all group records
AND x.cnt = (SELECT COUNT(1) FROM tmp_grp g WHERE x.grp_id = g.grp_id);
--
This question is similar to Find group of records that match multiple values , but this topic only cover exact set of values (number of values and values in column rec of datrec will be provided by another query) and query return groups which contains this set. I need to return only exact match.
UPDATE
- added data in table for better clarification
Also related to How to compare groups of tuples in sql
Here is a way that avoids joining the base table to itself. It will be more efficient especially if there are several (many?) possible values of rec_val for each grp_id. It can be made faster still if the distinct grp_id already exist somewhere in your data; I create them on the fly.
with gid ( grp_id ) as (
select distinct grp_id from tmp_grp
),
prep ( grp_id_1, grp_id_2, rec_val ) as (
select t.grp_id, g.grp_id, t.rec_val
from tmp_grp t join gid g on t.grp_id < g.grp_id
union all
select g.grp_id, t.grp_id, t.rec_val
from gid g join tmp_grp t on g.grp_id < t.grp_id
),
counts ( grp_id_1, grp_id_2, cnt ) as (
select grp_id_1, grp_id_2, count(*)
from prep
group by grp_id_1, grp_id_2, rec_val
)
select grp_id_1, grp_id_2
from counts
group by grp_id_1, grp_id_2
having min(cnt) = 2
;
Related
I have a matter which seemed simple to solve but now I find it troublesome.
In simplification - I need to find a way to identify unique sets of rows within groups defined by another column. In basic example the source table contains only two columns:
routeID nodeID nodeName
1 1 a
1 2 b
2 1 a
2 2 b
3 1 a
3 2 b
4 1 a
4 2 c
5 1 a
5 2 c
6 1 a
6 2 b
6 3 d
7 1 a
7 2 b
7 3 d
So, the routeID column refers to set of nodes which define a route.
What I need to do is to somehow group the routes, so that there will be only one unique sequence of nodes for one routeID.
In my actual case I tried to use window function to add columns which help to identify nodes sequence, but I still have no idea how to get those unique sequences and group routes.
As a final effect I want to get only unique routes - for example routes 1,2 and 3 aggregated to one route.
Do you have any idea how to help me ?
EDIT:
The other table which I would like to join with the one from the example may look like that:
journeyID nodeID nodeName routeID
1 1 a 1
1 2 b 1
2 1 a 1
2 2 b 1
3 1 a 4
3 2 c 4
...........................
...........................
You can try this idea:
DECLARE #DataSource TABLE
(
[routeID] TINYINT
,[nodeID] TINYINT
,[nodeName] CHAR(1)
);
INSERT INTO #DataSource ([routeID], [nodeID], [nodeName])
VALUES ('1', '1', 'a')
,('1', '2', 'b')
,('2', '1', 'a')
,('2', '2', 'b')
,('3', '1', 'a')
,('3', '2', 'b')
,('4', '1', 'a')
,('4', '2', 'c')
,('5', '1', 'a')
,('5', '2', 'c')
,('6', '1', 'a')
,('6', '2', 'b')
,('6', '3', 'd')
,('7', '1', 'a')
,('7', '2', 'b')
,('7', '3', 'd');
SELECT DS.[routeID]
,nodes.[value]
,ROW_NUMBER() OVER (PARTITION BY nodes.[value] ORDER BY [routeID]) AS [rowID]
FROM
(
-- getting unique route ids
SELECT DISTINCT [routeID]
FROM #DataSource DS
) DS ([routeID])
CROSS APPLY
(
-- for each route id creating CSV list with its node ids
SELECT STUFF
(
(
SELECT ',' + [nodeName]
FROM #DataSource DSI
WHERE DSI.[routeID] = DS.[routeID]
ORDER BY [nodeID]
FOR XML PATH(''), TYPE
).value('.', 'VARCHAR(MAX)')
,1
,1
,''
)
) nodes ([value]);
The code will give you this output:
So, you simple need to filter by rowID = 1. Of course, you can change the code as you like in order to satisfy your bussness criteria (for example showing no the first route ID with same nodes, but the last).
Also, ROW_NUMBER function cannot be used directly in the WHERE clause, so you need to wrap the code before filtering:
WITH DataSource AS
(
SELECT DS.[routeID]
,nodes.[value]
,ROW_NUMBER() OVER (PARTITION BY nodes.[value] ORDER BY [routeID]) AS [rowID]
FROM
(
-- getting unique route ids
SELECT DISTINCT [routeID]
FROM #DataSource DS
) DS ([routeID])
CROSS APPLY
(
-- for each route id creating CSV list with its node ids
SELECT STUFF
(
(
SELECT ',' + [nodeName]
FROM #DataSource DSI
WHERE DSI.[routeID] = DS.[routeID]
ORDER BY [nodeID]
FOR XML PATH(''), TYPE
).value('.', 'VARCHAR(MAX)')
,1
,1
,''
)
) nodes ([value])
)
SELECT DS2.*
FROM DataSource DS1
INNER JOIN #DataSource DS2
ON DS1.[routeID] = DS2.[routeID]
WHERE DS1.[rowID] = 1;
ok, let's use some recursion to create a complete node list for each routeID
First of all let's populate source table and journeyes tale
-- your source
declare #r as table (routeID int, nodeID int, nodeName char(1))
-- your other table
declare #j as table (journeyID int, nodeID int, nodeName char(1), routeID int)
-- temp results table
declare #routes as table (routeID int primary key, nodeNames varchar(1000))
;with
s as (
select *
from (
values
(1, 1, 'a'),
(1, 2, 'b'),
(2, 1, 'a'),
(2, 2, 'b'),
(3, 1, 'a'),
(3, 2, 'b'),
(4, 1, 'a'),
(4, 2, 'c'),
(5, 1, 'a'),
(5, 2, 'c'),
(6, 1, 'a'),
(6, 2, 'b'),
(6, 3, 'd'),
(7, 1, 'a'),
(7, 2, 'b'),
(7, 3, 'd')
) s (routeID, nodeID, nodeName)
)
insert into #r
select *
from s
;with
s as (
select *
from (
values
(1, 1, 'a', 1),
(1, 2, 'b', 1),
(2, 1, 'a', 1),
(2, 2, 'b', 1),
(3, 1, 'a', 4),
(3, 2, 'c', 4)
) s (journeyID, routeID, nodeID, nodeName)
)
insert into #j
select *
from s
now let's exctract routes:
;with
d as (
select *, row_number() over (partition by r.routeID order by r.nodeID desc) n2
from #r r
),
r as (
select d.*, cast(nodeName as varchar(1000)) Names, cast(0 as bigint) i2
from d
where nodeId=1
union all
select d.*, cast(r.names + ',' + d.nodeName as varchar(1000)), r.n2
from d
join r on r.routeID = d.routeID and r.nodeId=d.nodeId-1
)
insert into #routes
select routeID, Names
from r
where n2=1
table #routes will be like this:
routeID nodeNames
1 'a,b'
2 'a,b'
3 'a,b'
4 'a,c'
5 'a,c'
6 'a,b,d'
7 'a,b,d'
an now the final output:
-- the unique routes
select MIN(r.routeID) routeID, nodeNames
from #routes r
group by nodeNames
-- the unique journyes
select MIN(journeyID) journeyID, r.nodeNames
from #j j
inner join #routes r on j.routeID = r.routeID
group by nodeNames
output:
routeID nodeNames
1 'a,b'
4 'a,c'
6 'a,b,d'
and
journeyID nodeNames
1 'a,b'
3 'a,c'
I have a simple tree that has 4 level of deep data. Here is the table DDL
CREATE TABLE HIER_DEMO(
ID NUMBER,
LABEL VARCHAR2 (100),
PARENT_ID NUMBER)
Hierarchy starts WITH ID = PARENT_ID. Number of levels are fixed. It is always 4. We have leafs to all branches at 4th level. So we can also add 3 more columns that represent LABEL of ancestors if necessary.
I need to build a query that
Searches for particular phrase in LABEL on any level of hierarchy. For example LABEL LIKE '%MAGIC_WORD%'.
Returns all the nodes till leaf level under the hierarchy node that satisfies condition 1.
In addition we need to return all the ancestors of the hierarchy node that satisfies condition 1.
Here is an example
INSERT INTO HIER_DEMO VALUES (1, 'Mike', 1);
INSERT INTO HIER_DEMO VALUES (2, 'Arthur', 2);
INSERT INTO HIER_DEMO VALUES (3, 'Alex', 1);
INSERT INTO HIER_DEMO VALUES (4, 'Suzanne', 1);
INSERT INTO HIER_DEMO VALUES (5, 'Brian', 3);
INSERT INTO HIER_DEMO VALUES (6, 'Rick', 3);
INSERT INTO HIER_DEMO VALUES (7, 'Patrick', 4);
INSERT INTO HIER_DEMO VALUES (8, 'Simone', 4);
INSERT INTO HIER_DEMO VALUES (9, 'Tim', 5);
INSERT INTO HIER_DEMO VALUES (10, 'Andrew', 5);
INSERT INTO HIER_DEMO VALUES (11, 'Sandy', 6);
INSERT INTO HIER_DEMO VALUES (12, 'Brian', 6);
INSERT INTO HIER_DEMO VALUES (13, 'Chris', 7);
INSERT INTO HIER_DEMO VALUES (14, 'Laure', 7);
INSERT INTO HIER_DEMO VALUES (15, 'Maureen', 8);
INSERT INTO HIER_DEMO VALUES (16, 'Andy', 8);
INSERT INTO HIER_DEMO VALUES (17, 'Al', 2);
INSERT INTO HIER_DEMO VALUES (18, 'John', 17);
INSERT INTO HIER_DEMO VALUES (19, 'Frank', 18);
INSERT INTO HIER_DEMO VALUES (20, 'Tim', 19);
I am looking for the query that searches the tree for word 'Brian' in the LABEL column
The query should return these data
ID LABEL PARENT_ID
1 Mike 1
3 Alex 1
5 Brian 3
6 Rick 3
9 Tim 5
10 Andrew 5
12 Brian 6
Could somebody help with the Oracle query? We are using 11.2 version of Oracle database.
SQL> select * from HIER_DEMO
2 start with label like '%Brian%'
3 connect by prior id = parent_id
4 union
5 select * from HIER_DEMO
6 start with label like '%Brian%'
7 connect by prior parent_id = id and PRIOR parent_id != PRIOR id
8 /
ID LABEL PARENT_ID
---- -------------------- ---------
1 Mike 1
3 Alex 1
5 Brian 3
6 Rick 3
9 Tim 5
10 Andrew 5
12 Brian 6
We can use recursive CTE to accomplish this
WITH CTE1(ID, LABEL,PARENT_ID) AS
(
SELECT * FROM Hier_Demo
WHERE LABEL LIKE '%Brian%'
UNION ALL
SELECT h.ID, h.LABEL, h.PARENT_ID FROM Hier_Demo h
INNER JOIN CTE1 c
ON h.ID = c.PARENT_ID
WHERE h.ID <> h.PARENT_ID
),
CTE2(ID, LABEL,PARENT_ID) AS
(
SELECT * FROM Hier_Demo
WHERE LABEL LIKE '%Brian%'
UNION ALL
SELECT h.ID, h.LABEL, h.PARENT_ID FROM Hier_Demo h
INNER JOIN CTE2 c
ON h.PARENT_ID = c.ID
)
SELECT * FROM CTE2
UNION
SELECT * FROM CTE1
UNION
SELECT * FROM HIER_DEMO WHERE ID = 1
In the above code CTE1 gets records up in the hierarchy and CTE2 gets records down in the hierarchy of Brian, after that we just UNION the records returned by these CTEs
see the code working at SQLFiddle: http://sqlfiddle.com/#!4/0c99d/39
You can try this :
SELECT * FROM HIER_DEMO WHERE PARENT_ID=
(SELECT id FROM HIER_DEMO WHERE LABEL='Brian');
I'm trying to query a hierarchy of data in a single database table from the bottom up (I don't want to include parents that don't have a particular type of child due to authorities). The schema and sample data are as follows:
create table Users(
id int,
name varchar(100));
insert into Users values (1, 'Jill');
create table nodes(
id int,
name varchar(100),
parent int,
nodetype int);
insert into nodes values (1, 'A', 0, 1);
insert into nodes values (2, 'B', 0, 1);
insert into nodes values (3, 'C', 1, 1);
insert into nodes values (4, 'D', 3, 2);
insert into nodes values (5, 'E', 1, 1);
insert into nodes values (6, 'F', 5, 2);
insert into nodes values (7, 'G', 5, 2);
create table nodeAccess(
userid int,
nodeid int,
access int);
insert into nodeAccess values (1, 1, 1);
insert into nodeAccess values (1, 2, 1);
insert into nodeAccess values (1, 3, 1);
insert into nodeAccess values (1, 4, 1);
insert into nodeAccess values (1, 5, 1);
insert into nodeAccess values (1, 6, 0);
insert into nodeAccess values (1, 7, 1);
with Tree(id, name, nodetype, parent)
as
(
select n.id, n.name, n.nodetype, n.parent
from nodes as n
inner join nodeAccess as na on na.nodeid = n.id
where na.access =1 and na.userid=1 and n.nodetype=2
union all
select n.id, n.name, n.nodetype, n.parent
from nodes as n
inner join Tree as t on t.parent = n.id
inner join nodeAccess as na on na.nodeid = n.id
where na.access =1 and na.userid=1 and n.nodetype=1
)
select * from Tree
Yields:
id name nodetype parent
4 D 2 3
7 G 2 5
5 E 1 1
1 A 1 0
3 C 1 1
1 A 1 0
How can I not include the duplicates in the result set? The queries against the real tables have many more nodes at the lowest levels and hence many more duplicates of the parent nodes. The solution needs to work with at least SQL Server 2005.
Thanks in advance!
The simplest (not necessarily the most efficient) solution:
...
)
SELECT DISTINCT id,name,nodetype,parent FROM Tree;
This changes the order from your sample output because the DISTINCT operator implements a sort. If there is some intentional ordering there I cannot detect it but you can add an ORDER BY if you know the order you want.
My tables:
suggestions:
suggestion_id|title|description|user_id|status|created_time
suggestion_comments:
scomment_id|text|user_id|suggestion_id
suggestion_votes:
user_id|suggestion_id|value
Where value is the number of points assigned to a vote.
I'd like to be able to SELECT:
suggestion_id, title, the number of comments and the SUM of values for that suggestion.
sorted by SUM of values. LIMIT 30
Any ideas?
You may want to try using sub queries, as follows:
SELECT s.suggestion_id,
(
SELECT COUNT(*)
FROM suggestion_comments sc
WHERE sc.suggestion_id = s.suggestion_id
) num_of_comments,
(
SELECT SUM(sv.value)
FROM suggestion_votes sv
WHERE sv.suggestion_id = s.suggestion_id
) sum_of_values
FROM suggestions s;
Test case:
CREATE TABLE suggestions (suggestion_id int);
CREATE TABLE suggestion_comments (scomment_id int, suggestion_id int);
CREATE TABLE suggestion_votes (user_id int, suggestion_id int, value int);
INSERT INTO suggestions VALUES (1);
INSERT INTO suggestions VALUES (2);
INSERT INTO suggestions VALUES (3);
INSERT INTO suggestion_comments VALUES (1, 1);
INSERT INTO suggestion_comments VALUES (2, 1);
INSERT INTO suggestion_comments VALUES (3, 2);
INSERT INTO suggestion_comments VALUES (4, 2);
INSERT INTO suggestion_comments VALUES (5, 2);
INSERT INTO suggestion_comments VALUES (6, 3);
INSERT INTO suggestion_votes VALUES (1, 1, 3);
INSERT INTO suggestion_votes VALUES (2, 1, 5);
INSERT INTO suggestion_votes VALUES (3, 1, 1);
INSERT INTO suggestion_votes VALUES (1, 2, 4);
INSERT INTO suggestion_votes VALUES (2, 2, 2);
INSERT INTO suggestion_votes VALUES (1, 3, 5);
Result:
+---------------+-----------------+---------------+
| suggestion_id | num_of_comments | sum_of_values |
+---------------+-----------------+---------------+
| 1 | 2 | 9 |
| 2 | 3 | 6 |
| 3 | 1 | 5 |
+---------------+-----------------+---------------+
3 rows in set (0.00 sec)
UPDATE: #Naktibalda's solution is an alternative solution that avoids sub queries.
I was typing the same query as potatopeelings.
But there is an issue:
Resultset after joins contains M*N rows (M-number of comments, N-number of votes, not less than 1) for each suggestion.
To avoid that you have to count distinct comment ids and divide a sum of votes by number of comments.
SELECT
s.*,
COUNT(DISTINCT c.scommentid) AS comment_count,
SUM(v.value)/GREATEST(COUNT(DISTINCT c.scommentid), 1) AS total_votes
FROM suggestions AS s
LEFT JOIN suggestion_comments AS c ON s.suggestion_id = c.suggestion_id
LEFT JOIN suggestion_votes AS v ON s.suggestion_id = v.suggestion_id
GROUP BY s.suggestion_id
ORDER BY total_votes DESC
LIMIT 30
Based on the following table
ID Effort Name
-------------------------
1 1 A
2 1 A
3 8 A
4 10 B
5 4 B
6 1 B
7 10 C
8 3 C
9 30 C
I want to check if the total effort against a name is less than 40 then add a row with effort = 40 - (Total Effort) for the name. The ID of the new row can be anything. If the total effort is greater than 40 then trucate the data for one of the rows to make it 40.
So after applying the logic above table will be
ID Effort Name
-------------------------
1 1 A
2 1 A
3 8 A
10 30 A
4 10 B
5 4 B
6 1 B
11 25 B
7 10 C
8 3 C
9 27 C
I was thinking of opening a cursor, keeping a counter of the total effort, and based on the logic insert existing and new rows in another temporary table.
I am not sure if this is an efficient way to deal with this. I would like to learn if there is a better way.
I think the first part could be done this way:
INSERT INTO tbl(Effort, Name)
SELECT 40 - SUM(Effort), Name
FROM tbl
GROUP BY Name
HAVING SUM(Effort) < 40)
The second part is harder. Perhaps you could do something like this instead?
INSERT INTO tbl(Effort, Name)
SELECT 40 - SUM(Effort), Name
FROM tbl
GROUP BY Name
HAVING SUM(Effort) <> 40)
What this does is, rather than making changes to your actual data, adds a row with a negative number for the Name if the total effort is > 40 hours, or a positive value if it is < 40 hours. This seems much safer for your data integrity than messing with the original values.
In SQL Server 2008, this may be done with a single MERGE statement:
DECLARE #efforts TABLE (id INT NOT NULL PRIMARY KEY, effort INT NOT NULL, name CHAR(1))
INSERT
INTO #efforts
VALUES (1, 1, 'A'),
(2, 1, 'A'),
(3, 8, 'A'),
(4, 10, 'B'),
(5, 4, 'B'),
(6, 1, 'B'),
(7, 10, 'C'),
(8, 3, 'C'),
(9, 30, 'C'),
(10, 60, 'C')
SELECT *
FROM #efforts
ORDER BY
name, id
;WITH total AS
( SELECT *
FROM #efforts e
UNION ALL
SELECT ROW_NUMBER() OVER(ORDER BY name) +
(
SELECT MAX(id)
FROM #efforts
),
40 - SUM(effort),
name
FROM #efforts
GROUP BY
name
HAVING SUM(effort) < 40
),
source AS
(
SELECT *,
(
SELECT SUM(effort)
FROM total ep
WHERE ep.name = e.name
AND ep.id <= e.id
) AS ce,
COALESCE(
(
SELECT SUM(effort)
FROM total ep
WHERE ep.name = e.name
AND ep.id < e.id
), 0) AS cp
FROM total e
)
MERGE
INTO #efforts e
USING source s
ON e.id = s.id
WHEN MATCHED AND 40 BETWEEN cp AND ce THEN
UPDATE
SET e.effort = s.effort + 40 - ce
WHEN MATCHED AND cp > 40 THEN
DELETE
WHEN NOT MATCHED BY TARGET THEN
INSERT (id, effort, name)
VALUES (id, effort, name);
SELECT *
FROM #efforts
ORDER BY
name, id
In SQL Server 2005, you'll need two statements (in one transaction):
DECLARE #efforts TABLE (id INT NOT NULL PRIMARY KEY, effort INT NOT NULL, name CHAR(1))
INSERT
INTO #efforts
VALUES (1, 1, 'A')
INSERT
INTO #efforts
VALUES (2, 1, 'A')
INSERT
INTO #efforts
VALUES (3, 8, 'A')
INSERT
INTO #efforts
VALUES (4, 10, 'B')
INSERT
INTO #efforts
VALUES (5, 4, 'B')
INSERT
INTO #efforts
VALUES (6, 1, 'B')
INSERT
INTO #efforts
VALUES (7, 10, 'C')
INSERT
INTO #efforts
VALUES (8, 3, 'C')
INSERT
INTO #efforts
VALUES (9, 30, 'C')
INSERT
INTO #efforts
VALUES (10, 60, 'C')
;WITH total AS
(
SELECT *,
COALESCE(
(
SELECT SUM(effort)
FROM #efforts ep
WHERE ep.name = e.name
AND ep.id <= e.id
), 0) AS cp
FROM #efforts e
)
DELETE
FROM total
WHERE cp > 40
INSERT
INTO #efforts
SELECT (
SELECT MAX(id)
FROM #efforts
) +
ROW_NUMBER() OVER (ORDER BY name),
40 - SUM(effort),
name
FROM #efforts
GROUP BY
name
HAVING SUM(effort) < 40
SELECT *
FROM #efforts
ORDER BY
name, id
This will give you the names that need modify:
SELECT Name, SUM(Effort)
FROM Table
GROUP BY Name
HAVING SUM(Effort) < 40
Select this into a temp table, Add a column for 40 - SUM, then create an insert statement from that. Much better than a cursor.
This will do the first part:
Insert Into dbo.Test (Name, Effort)
Select t.Name, 40 - SUM(t.Effort)
From dbo.Test t
Group By t.Name
Having SUM(t.Effort) < 40
And this will do the second part:
Update a
Set a.Effort = a.Effort - b.AmountToDeduct
From dbo.Test a
Join (
Select t.Name, (40 - SUM(t.Effort)) as 'AmountToDeduct'
From dbo.Test t
Group By t.Name
Having SUM(t.Effort) > 40
)b on a.Name = b.Name
Where a.ID = (Select MAX(c.ID)
From dbo.Test c
Where c.Name = a.Name
)