Displaying whole table after stripping characters in SQL Server - sql

This question has 2 parts.
Part 1
I have a table "Groups":
group_ID person
-----------------------
1 Person 10
2 Person 11
3 Jack
4 Person 12
Note that not all data in the "person" column have the same format.
In SQL Server, I have used the following query to strip the "Person " characters out of the person column:
SELECT
REPLACE([person],'Person ','')
AS [person]
FROM Groups
I did not use UPDATE in the query above as I do not want to alter the data in the table.
The query returned this result:
person
------
10
11
12
However, I would like this result instead:
group_ID person
-------------------
1 10
2 11
3 Jack
4 12
What should be my query to achieve this result?
Part 2
I have another table "Details":
detail_ID group1 group2
-------------------------------
100 1 2
101 3 4
From the intended result in Part 1, where the numbers in the "person" column correspond to those in "group1" and "group2" of table "Details", how do I selectively convert the numbers in "person" to integers and join them with "Details"?
Note that all data under "person" in Part 1 are strings (nvarchar(100)).
Here is the intended query output:
detail_ID group1 group2
-------------------------------
100 10 11
101 Jack 12
Note that I do not wish to permanently alter anything in both tables and the intended output above is just a result of a SELECT query.

I don't think first part will be a problem here. Your query is working fine with your expected result.
Schema:
CREATE TABLE #Groups (group_ID INT, person VARCHAR(50));
INSERT INTO #Groups
SELECT 1,'Person 10'
UNION ALL
SELECT 2,'Person 11'
UNION ALL
SELECT 3,'Jack'
UNION ALL
SELECT 4,'Person 12';
CREATE TABLE #Details(detail_ID INT,group1 INT, group2 INT);
INSERT INTO #Details
SELECT 100, 1, 2
UNION ALL
SELECT 101, 3, 4 ;
Part 1:
For me your query is giving exactly what you are expecting
SELECT group_ID,REPLACE([person],'Person ','') AS person
FROM #Groups
+----------+--------+
| group_ID | person |
+----------+--------+
| 1 | 10 |
| 2 | 11 |
| 3 | Jack |
| 4 | 12 |
+----------+--------+
Part 2:
;WITH CTE AS(
SELECT group_ID
,REPLACE([person],'Person ','') AS person
FROM #Groups
)
SELECT D.detail_ID, G1.person, G2.person
FROM #Details D
INNER JOIN CTE G1 ON D.group1 = G1.group_ID
INNER JOIN CTE G2 ON D.group1 = G2.group_ID
Result:
+-----------+--------+--------+
| detail_ID | person | person |
+-----------+--------+--------+
| 100 | 10 | 10 |
| 101 | Jack | Jack |
+-----------+--------+--------+

Try following query, it should give you the desired output.
;WITH MT AS
(
SELECT
GroupId, REPLACE([person],'Person ','') Person
AS [person]
FROM Groups
)
SELECT Detail_Id , MT1.Person AS group1 , MT2.Person AS AS group2
FROM
Details D
INNER JOIN MT MT1 ON MT1.GroupId = D.group1
INNER JOIN MT MT2 ON MT2.GroupId= D.group2

The first query works
declare #T table (id int primary key, name varchar(10));
insert into #T values
(1, 'Person 10')
, (2, 'Person 11')
, (3, 'Jack')
, (4, 'Person 12');
declare #G table (id int primary key, grp1 int, grp2 int);
insert into #G values
(100, 1, 2)
, (101, 3, 4);
with cte as
( select t.id, t.name, ltrim(rtrim(replace(t.name, 'person', ''))) as sp
from #T t
)
-- select * from cte order by cte.id;
select g.id, c1.sp as grp1, c2.sp as grp2
from #G g
join cte c1
on c1.id = g.grp1
join cte c2
on c2.id = g.grp2
order
by g.id;
id grp1 grp2
----------- ----------- -----------
100 10 11
101 Jack 12

Related

Select all records of one table that contain two records in another with certain id

I have two tables of 1:m relation. Need to select which People records have both records in Actions table whit id 1 and 2
People
+----+------+--------------+
| id | name | phone_number |
+----+------+--------------+
| 1 | John | 111111111111 |
+----+------+--------------+
| 3 | Jane | 222222222222 |
+----+------+--------------+
| 4 | Jack | 333333333333 |
+----+------+--------------+
Action
+----+------+------------+
| id | PplId| ActionId |
+----+------+------------+
| 1 | 1 | 1 |
+----+------+------------+
| 2 | 1 | 2 |
+----+------+------------+
| 3 | 2 | 1 |
+----+------+------------+
| 4 | 4 | 2 |
+----+------+------------+
Output
+----+------+--------------+----------
|PplId| name | Phone |ActionId |
+-----+------+-------------+----+-----
| 1 | John | 111111111111| 1 |
+-----+------+-------------+----+-----
| 1 | John | 111111111111| 2 |
+-----+------+-------------+----+-----
Return records of People that have both Have Actionid 1 and Action id 2(Have records in Actions).
Window functions are one method. Assuming actions are not duplicated for a person:
select pa.*
from (select p.*, a.action, count(*) over (partition by p.id) as num_actions
from people p join
action a
on p.id = a.pplid
where a.action in (1, 2)
) pa
where num_actions = 2;
In my opinion, getting two rows with the action detail seems superfluous -- you already know the actions. If you only want the people, then exists comes to mind:
select p.*
from people p
where exists (select 1 from actions where a.pplid = p.id and a.action = 1) and
exists (select 1 from actions where a.pplid = p.id and a.action = 2);
With the right index (actions(pplid, action)), I would expect two exists to be faster than group by.
Try this below query using subquery and join
select a.Pplid, name, phone, actionid from (
select a.pplid as Pplid, name, phone_number as phone
from People P
join Action A on a.pplid= p.id
group by a.pplid, name, phone_number
having count(*)>1 )P
join Action A on a.Pplid= p.Pplid
Try something like this
IF OBJECT_ID('tempdb..#People') IS NOT NULL DROP TABLE #People
CREATE TABLE #People (id INT, name VARCHAR(255), phone_number VARCHAR(50))
INSERT #People
SELECT 1, 'John', '111111111111' UNION ALL
SELECT 3, 'Jane', '222222222222' UNION ALL
SELECT 4, 'Jack', '333333333333'
IF OBJECT_ID('tempdb..#Action') IS NOT NULL DROP TABLE #Action
CREATE TABLE #Action (id INT, PplId INT, ActionId INT)
INSERT #Action
SELECT 1, 1, 1 UNION ALL
SELECT 2, 1, 2 UNION ALL
SELECT 3, 2, 1 UNION ALL
SELECT 4, 4, 2
GO
SELECT p.ID AS PplId
, p.name
, p.phone_number AS Phone
, a.ActionId
FROM #People p
JOIN #Action a
ON p.ID = a.PplId
WHERE p.ID IN ( SELECT PplId
FROM #Action
WHERE ActionId IN (1, 2)
GROUP BY PplId
HAVING COUNT(*) = 2 )
AND a.ActionId IN (1, 2)
GO

Find exactly matched groups in a many to many relationship table

Table shown below maps the many-to-many relationship between courses and students.
CREATE Table CourseStudents
(
CourseId INT NOT NULL,
StudentId INT NOT NULL,
PRIMARY KEY (CourseId, StudentId)
);
INSERT INTO CourseStudents VALUES (1, 1), (1, 2), (2, 1), (2, 2), (3, 3), (3, 2),
(4, 3), (4, 2), (5, 1)
Example data
| CourseId | StudentId |
|----------|-----------|
| 1 | 1 |
| 1 | 2 |
| 2 | 1 |
| 2 | 2 |
| 3 | 2 |
| 3 | 3 |
| 4 | 2 |
| 4 | 3 |
| 5 | 1 |
I'm looking for a query that returns all courses that have the exact same students. I was able to come up with the query shown below.
WITH CourseGroups AS
(
SELECT c.CourseId,
STUFF ((
SELECT ',' + CAST(c2.StudentId AS VARCHAR)
FROM CourseStudents c2
WHERE c2.CourseId = c.CourseId
ORDER BY c2.StudentId
FOR XML PATH ('')), 1, 1, '') AS StudentList
FROM CourseStudents c
GROUP BY c.CourseId)
SELECT cg.StudentList,
STUFF ((
SELECT ',' + CAST(cg2.CourseId AS VARCHAR(10))
FROM CourseGroups cg2
WHERE cg2.StudentList = cg.StudentList
FOR XML PATH ('')), 1, 1, '') AS ExactMatchCourseList
FROM CourseGroups cg
GROUP BY cg.StudentList
HAVING COUNT(*) > 1
Query returns
| StudentList | ExactMatchCourseList |
|-------------|----------------------|
| 1,2 | 1,2 |
| 2,3 | 3,4 |
Above result is fine. But I only need the ExactMatchCourseList.
The table I'm dealing has more than a billion rows so I need an efficient query that can find any matched courses within a few minutes of run time. Appreciate any help.
SqlFiddle
This only does 2 runs over your CourseStudents table, instead of the 4 you're currently doing. And if you add an index on CourseId on the CourseStudents table, the first run will only be an index scan. It also only runs the original STUFF once for each course, instead of once for each student, then grouping by course. I left out the final STUFF that I wasn't sure if you wanted or if it was just a byproduct of how you were calculating it.
CREATE TABLE #Course
(
CourseId INT NOT NULL PRIMARY KEY
);
INSERT INTO #Course
SELECT CourseId
FROM
CourseStudents s
GROUP BY
CourseId
ORDER BY
CourseId;
CREATE TABLE #CourseStudentList
(
CourseId INT NOT NULL PRIMARY KEY,
StudentList VARCHAR(MAX) NOT NULL
);
INSERT INTO #CourseStudentList
SELECT
c.CourseId,
STUFF ((
SELECT ',' + CAST(c2.StudentId AS VARCHAR)
FROM CourseStudents c2
WHERE c2.CourseId = c.CourseId
ORDER BY c2.StudentId
FOR XML PATH ('')), 1, 1, '') AS StudentList
FROM
#Course c
ORDER BY
c.CourseId;
SELECT *
FROM
(
SELECT
l.CourseId,
l.StudentList,
COUNT(*) OVER (PARTITION BY l.StudentList) AS [Count]
FROM
#CourseStudentList l
) l
WHERE
l.[Count] > 1
ORDER BY
l.StudentList;
This will give you a list of course pairs, although if you are going to get triplicates (or more) then you'll end up with some extra results. I don't have time to toy with this further to correct that issue, but maybe it points you in the right direction:
WITH CTE_CourseMatches AS (
SELECT
CS1.CourseId AS CourseId_1,
CS2.CourseId AS CourseId_2,
COUNT(*) AS cnt
FROM
CourseStudents CS1
INNER JOIN CourseStudents CS2 ON CS2.StudentId = CS1.StudentId AND CS2.CourseId > CS1.CourseId
GROUP BY
CS1.CourseId,
CS2.CourseId
),
CTE_CourseCounts AS (SELECT CourseId, COUNT(*) AS cnt FROM CourseStudents GROUP BY CourseID)
SELECT
CM.CourseId_1,
CM.CourseId_2
FROM
CTE_CourseMatches CM
INNER JOIN CTE_CourseCounts CC1 ON CC1.CourseId = CM.CourseId_1 AND CC1.cnt = CM.cnt
INNER JOIN CTE_CourseCounts CC2 ON CC2.CourseId = CM.CourseId_2 AND CC2.cnt = CM.cnt

Oracle duplicate row N times where N is a column

I'm new to Oracle and I'm trying to do something a little unusual. Given this table and data I need to select each row, and duplicate ones where DupCount is greater than 1.
create table TestTable
(
Name VARCHAR(10),
DupCount NUMBER
)
INSERT INTO TestTable VALUES ('Jane', 1);
INSERT INTO TestTable VALUES ('Mark', 2);
INSERT INTO TestTable VALUES ('Steve', 1);
INSERT INTO TestTable VALUES ('Jeff', 3);
Desired Results:
Name DupCount
--------- -----------
Jane 1
Mark 2
Mark 2
Steve 1
Jeff 3
Jeff 3
Jeff 3
If this isn't possible via a single select statement any help with a stored procedure would be greatly appreciated.
You can do it with a hierarchical query:
SQL Fiddle
Query 1:
WITH levels AS (
SELECT LEVEL AS lvl
FROM DUAL
CONNECT BY LEVEL <= ( SELECT MAX( DupCount ) FROM TestTable )
)
SELECT Name,
DupCount
FROM TestTable
INNER JOIN
levels
ON ( lvl <= DupCount )
ORDER BY Name
Results:
| NAME | DUPCOUNT |
|-------|----------|
| Jane | 1 |
| Jeff | 3 |
| Jeff | 3 |
| Jeff | 3 |
| Mark | 2 |
| Mark | 2 |
| Steve | 1 |
You can do this with a recursive cte. It would look like this
with cte as (name, dupcount, temp)
(
select name,
dupcount,
dupcount as temp
from testtable
union all
select name,
dupcount,
temp-1 as temp
from cte
where temp > 1
)
select name,
dupcount
from cte
order by name

Data Matching with SQL and assigning Identity ID's

How to write a query that will match data and produce and identity for it.
For Example:
RecordID | Name
1 | John
2 | John
3 | Smith
4 | Smith
5 | Smith
6 | Carl
I want a query which will assign an identity after matching exactly on Name.
Expected Output:
RecordID | Name | ID
1 | John | 1X
2 | John | 1X
3 | Smith | 1Y
4 | Smith | 1Y
5 | Smith | 1Y
6 | Carl | 1Z
Note: The ID should be unique for every match. Also, it can be numbers or varchar.
Can somebody help me with this? The main thing is to assign the ID's.
Thanks.
How about this:
with temp as
(
select 1 as id,'John' as name
union
select 2,'John'
union
select 3,'Smith'
union
select 4,'Smith'
union
select 5,'Smith'
union
select 6,'Carl'
)
SELECT *, DENSE_RANK() OVER
(ORDER BY Name) as NewId
FROM TEMP
Order by id
The first part is for testing purposes only.
Please try:
SELECT *,
Rank() over (order by Name ASC)
FROM table
This structure seems to work:
CREATE TABLE #Table
(
Department VARCHAR(100),
Name VARCHAR(100)
);
INSERT INTO #Table VALUES
('Sales','michaeljackson'),
('Sales','michaeljackson'),
('Sales','jim'),
('Sales','jim'),
('Sales','jill'),
('Sales','jill'),
('Sales','jill'),
('Sales','j');
WITH Cte_Rank AS
(
SELECT [Name],
rw = ROW_NUMBER() OVER (ORDER BY [Name])
FROM #Table
GROUP BY [Name]
)
SELECT a.Department,
a.Name,
b.rw
FROM #Table a
INNER JOIN Cte_Rank b
ON a.Name = b.Name;

Query for missing elements

I have a table with the following structure:
timestamp | name | value
0 | john | 5
1 | NULL | 3
8 | NULL | 12
12 | john | 3
33 | NULL | 4
54 | pete | 1
180 | NULL | 4
400 | john | 3
401 | NULL | 4
592 | anna | 2
Now what I am looking for is a query that will give me the sum of the values for each name, and treats the nulls in between (orderd by the timestamp) as the first non-null name down the list, as if the table were as follows:
timestamp | name | value
0 | john | 5
1 | john | 3
8 | john | 12
12 | john | 3
33 | pete | 4
54 | pete | 1
180 | john | 4
400 | john | 3
401 | anna | 4
592 | anna | 2
and I would query SUM(value), name from this table group by name. I have thought and tried, but I can't come up with a proper solution. I have looked at recursive common table expressions, and think the answer may lie in there, but I haven't been able to properly understand those.
These tables are just examples, and I don't know the timestamp values in advance.
Could someone give me a hand? Help would be very much appreciated.
With Inputs As
(
Select 0 As [timestamp], 'john' As Name, 5 As value
Union All Select 1, NULL, 3
Union All Select 8, NULL, 12
Union All Select 12, 'john', 3
Union All Select 33, NULL, 4
Union All Select 54, 'pete', 1
Union All Select 180, NULL, 4
Union All Select 400, 'john', 3
Union All Select 401, NULL, 4
Union All Select 592, 'anna', 2
)
, NamedInputs As
(
Select I.timestamp
, Coalesce (I.Name
, (
Select I3.Name
From Inputs As I3
Where I3.timestamp = (
Select Max(I2.timestamp)
From Inputs As I2
Where I2.timestamp < I.timestamp
And I2.Name Is not Null
)
)) As name
, I.value
From Inputs As I
)
Select NI.name, Sum(NI.Value) As Total
From NamedInputs As NI
Group By NI.name
Btw, what would be orders of magnitude faster than any query would be to first correct the data. I.e., update the name column to have the proper value, make it non-nullable and then run a simple Group By to get your totals.
Additional Solution
Select Coalesce(I.Name, I2.Name), Sum(I.value) As Total
From Inputs As I
Left Join (
Select I1.timestamp, MAX(I2.Timestamp) As LastNameTimestamp
From Inputs As I1
Left Join Inputs As I2
On I2.timestamp < I1.timestamp
And I2.Name Is Not Null
Group By I1.timestamp
) As Z
On Z.timestamp = I.timestamp
Left Join Inputs As I2
On I2.timestamp = Z.LastNameTimestamp
Group By Coalesce(I.Name, I2.Name)
You don't need CTE, just a simple subquery.
select t.timestamp, ISNULL(t.name, (
select top(1) i.name
from inputs i
where i.timestamp < t.timestamp
and i.name is not null
order by i.timestamp desc
)), t.value
from inputs t
And summing from here
select name, SUM(value) as totalValue
from
(
select t.timestamp, ISNULL(t.name, (
select top(1) i.name
from inputs i
where i.timestamp < t.timestamp
and i.name is not null
order by i.timestamp desc
)) as name, t.value
from inputs t
) N
group by name
I hope I'm not going to be embarassed by offering you this little recursive CTE query of mine as a solution to your problem.
;WITH
numbered_table AS (
SELECT
timestamp, name, value,
rownum = ROW_NUMBER() OVER (ORDER BY timestamp)
FROM your_table
),
filled_table AS (
SELECT
timestamp,
name,
value
FROM numbered_table
WHERE rownum = 1
UNION ALL
SELECT
nt.timestamp,
name = ISNULL(nt.name, ft.name),
nt.value
FROM numbered_table nt
INNER JOIN filled_table ft ON nt.rownum = ft.rownum + 1
)
SELECT *
FROM filled_table
/* or go ahead aggregating instead */