Improve SQL Query to find redundant data - sql

the following shows my sample dataset
PatientID PatientName
XXX-037070002 Riger, Jens^Wicki
XXX-037070002 Riger^Wicki
XXX-10052 Weier,Nicole^Peggy
XXX-10052 Weier,Nicole^Peppy
XXX-23310 Rodem^Sieglinde
XXX-23310 Sauberger, Birgit^Finja
XXX-23343 Je, Ronny^Wilma
XXX-23343 Jer, Ronny^Wilma
XXX-2349 Kel,Andy^Juka
XXX-2349 Kel^Juka
XXX-2998 Hel, Frank
XXX-2998 Hel,Frank^Fenris
XXX-3188 Mey, Marion
XXX-3188 Mey, Marion^Paula
XXX-3188 Schulz^Roma
XXX-3218 Böntgen-Simnet,Dr. Regine^Cara
XXX-3218 Simnet,Dr. Regine^Cara
XXX-3826 Mertes, Bernd Uwe^Ellie
XXX-3826 Mertes,Bernd^Ellie
XXX-3826 Mertes^Ellie
This is the query I got from my last request:
with d as
(
select distinct
patid,
patname
from dicomstudys
)
select *
from d
where d.patid in
(
select d.patid
from d
group by d.patid
having count(*) > 1
)
Now I want to adjust the query that only the following data get's an output:
PatientID PatientName
XXX-23310 Rodem^Sieglinde
XXX-23310 Sauberger, Birgit^Finja
XXX-23343 Je, Ronny^Wilma
XXX-23343 Jer, Ronny^Wilma
XXX-3188 Mey, Marion
XXX-3188 Mey, Marion^Paula
XXX-3188 Schulz^Roma
XXX-3218 Böntgen-Simnet,Dr. Regine^Cara
XXX-3218 Simnet,Dr. Regine^Cara
Last names are either seperated with a ',' or '^' . If last names are the same for the same PatientID then I dont want them being displayed. I tried fiddling with a sub select statement featuring a combination of CHARINDEX commands and others but my SQL syntax knowledge is very limited with the complexity of the request.
Please also note that for the case for XXX-3188 has two datasets with the same last name but also another dataset with a complete new patientName and thus it needs to be in the output.

Try this:
DECLARE #DataSource TABLE
(
[ID] VARCHAR(32)
,[Name] VARCHAR(256)
);
INSERT INTO #DataSource ([ID], [Name])
VALUES ('XXX-037070002', 'Riger, Jens^Wicki')
,('XXX-037070002', 'Riger^Wicki')
,('XXX-10052', 'Weier,Nicole^Peggy')
,('XXX-10052', 'Weier,Nicole^Peppy')
,('XXX-23310', 'Rodem^Sieglinde')
,('XXX-23310', 'Sauberger, Birgit^Finja')
,('XXX-23343', 'Je, Ronny^Wilma')
,('XXX-23343', 'Jer, Ronny^Wilma')
,('XXX-2349', 'Kel,Andy^Juka')
,('XXX-2349', 'Kel^Juka')
,('XXX-2998', 'Hel, Frank')
,('XXX-2998', 'Hel,Frank^Fenris')
,('XXX-3188', 'Mey, Marion')
,('XXX-3188', 'Mey, Marion^Paula')
,('XXX-3188', 'Schulz^Roma')
,('XXX-3218', 'Böntgen-Simnet,Dr. Regine^Cara')
,('XXX-3218', 'Simnet,Dr. Regine^Cara')
,('XXX-3826', 'Mertes, Bernd Uwe^Ellie')
,('XXX-3826', 'Mertes,Bernd^Ellie')
,('XXX-3826', 'Mertes^Ellie');
WITH DataSource AS
(
SELECT [ID]
,[Name]
,COUNT(*) OVER (PARTITION BY [ID], LTRIM(RTRIM(SUBSTRING([Name], 0, CHARINDEX(',', REPLACE([Name], '^', ',')))))) AS [ID_Name_Count]
,COUNT(*) OVER (PARTITION BY [ID]) AS [ID_Count]
,LTRIM(RTRIM(SUBSTRING([Name], 0, CHARINDEX(',', REPLACE([Name], '^', ','))))) AS [FamilyName]
FROM #DataSource
)
SELECT [ID]
,[Name]
FROM DataSource
WHERE [ID_Name_Count] = 1
AND [ID_Count] = 2
OR [ID] IN
(
SELECT [ID]
FROM DataSource
GROUP BY [ID]
HAVING COUNT(DISTINCT [FamilyName]) > 1
);
Тhe solution is pretty easy. Here are the interesting parts:
replace the ^ with , in order to simplify the last name extraction
extract the last name and calculation count based on ID and last name
in the final select check for unique id-last name pairs with id count equal to 2 and add ids with more then one unique family names (your special case)

You can try something like that:
Test data
drop table if exists #Patient;
create table #Patient (
PatientID varchar(20),
PatientName varchar(50)
);
insert into #Patient(PatientID,PatientName)
values ('XXX-037070002' ,'Riger, Jens^Wicki'),
('XXX-037070002' ,'Riger^Wicki'),
('XXX-10052' ,'Weier,Nicole^Peggy'),
('XXX-10052' ,'Weier,Nicole^Peppy'),
('XXX-23310' ,'Rodem^Sieglinde'),
('XXX-23310' ,'Sauberger, Birgit^Finja'),
('XXX-23343' ,'Je, Ronny^Wilma'),
('XXX-23343' ,'Jer, Ronny^Wilma'),
('XXX-2349' ,'Kel,Andy^Juka'),
('XXX-2349' ,'Kel^Juka'),
('XXX-2998' ,'Hel, Frank'),
('XXX-2998' ,'Hel,Frank^Fenris'),
('XXX-3188' ,'Mey, Marion'),
('XXX-3188' ,'Mey, Marion^Paula'),
('XXX-3188' ,'Schulz^Roma'),
('XXX-3218' ,'Böntgen-Simnet,Dr. Regine^Cara'),
('XXX-3218' ,'Simnet,Dr. Regine^Cara'),
('XXX-3826' ,'Mertes, Bernd Uwe^Ellie'),
('XXX-3826' ,'Mertes,Bernd^Ellie'),
('XXX-3826' ,'Mertes^Ellie');
My solution
with q1 as (
select
PatientID,
PatientName,
case when CHARINDEX(',',REPLACE( PatientName, '^',',')) > 0
then LEFT(PatientName,CHARINDEX(',',REPLACE( PatientName, '^',','))-1)
else PatientName end as FullName
from #Patient
) ,
q2 as (
select PatientID
from q1
group by PatientID having COUNT(1) > 1 and COUNT(DISTINCT FullName) > 1 )
select t.PatientID,t.PatientName
from #Patient t join q2 on t.PatientID = q2.PatientID;

Related

Sample 3 random rows then order them alphabetically by another property

I was just wondering if I can get three random results and then order them by another column alphabetically.
Example:
SELECT TOP(3)[Name]
FROM Table
ORDER BY NEWID()
¿ORDER BY [Surname]?
Thanks in advance.
Not sure to understand the problem at 100%, but with a second ORDER BY it will sort all of the differents ID.
I did a CTE for achieving this result.
WITH CTE AS
(
SELECT TOP(3)[Name]
FROM Table
ORDER BY NEWID()
)
SELECT *
FROM CTE
ORDER BY [Surname]
Have a derived table (sub-query) where you select 3 random rows. ORDER BY its result.
select [Name]
from
(
SELECT TOP(3) [Name], [Surname]
FROM Table
ORDER BY NEWID()
) dt
ORDER BY [Surname]
You can try this.
SELECT T.*
FROM
(
SELECT TOP(3)[Name], [Surname]
FROM Table
ORDER BY NEWID()
) AS T
ORDER BY [Surname];
you can try this -
declare #name table
(Name varchar(20), SurName varchar(20))
insert into #name
values
('mukesh', 'arora'),
('amit', 'kumar'),
('Vijay', 'gupta'),
('jai', 'poddar'),
('vishal', 'sharma')
select Name from
(
SELECT TOP(3)[Name] , [Surname]
FROM #name
ORDER BY NEWID()
) a
ORDER BY [Surname]

How to transform select query

I have DDL:
drop table names
drop table salary
create table names(
id int,
name1 varchar(50),
char1 varchar(50),
char2 varchar(50))
insert into names values (1,'name1','char1','chara'),
(2,'name2','char2','charb'),
(3,'name3','char3','charc'),
(4,'name4','char4','chard'),
(5,'name5','char5','charf');
create table salary(
id int,
salary int,
bonus int,
oldsalary int)
insert into salary values (1,500,245,354),
(2,600,345,246),
(3,60,365,334),
(4,55,545,364),
(5,25,345,374);
And have the many SQL query' s (one of them)
SELECT n.name1,
s.salary,
s.bonus,
( s.bonus + s.salary ) AS Sumsalary
FROM names n
INNER JOIN salary s
ON n.id = s.id
I want to write select query that retrieves all fieldnames from tables that used in SQL query divided by usage in this select query with some additional info mentioned in screenshot.
Format of data that I want retrieve from this query:
You could get the list of tables & columns for queries if you have access to the system views:
;WITH XMLNAMESPACES ('http://schemas.microsoft.com/sqlserver/2004/07/showplan' AS ns)
SELECT DISTINCT st.TEXT AS QueryText,
C.value('./#Table', 'varchar(50)') As Tab,
C.value('./#Column', 'varchar(50)') As Col
FROM sys.dm_exec_cached_plans AS cp
CROSS APPLY sys.dm_exec_query_plan(cp.plan_handle) AS qp
CROSS APPLY sys.dm_exec_sql_text(cp.plan_handle) AS st
CROSS APPLY query_plan.nodes('//ns:ColumnReference') as T(C)
WHERE cp.ObjType = 'Adhoc' AND
St.Text Like '%s.bonus%' AND
st.Text Not Like '%WITH XMLNAMESPACES%' AND
C.value('./#Table', 'varchar(50)') IS NOT NULL
This gives the following results:
QueryText Tab Col
SELECT n.name1, ... [Names] id
SELECT n.name1, ... [Names] name1
SELECT n.name1, ... [Salary] bonus
SELECT n.name1, ... [Salary] id
SELECT n.name1, ... [Salary] salary
You could then use these results as a means of linking to sys.columns to determine which coulmns are missing etc.

sql query serial number

I have written a stored procedure in SQL Server 2000. I want a serial number for output table.
So when I run this stored proc I get this error:
An explicit value for the identity column in table
'#tmpSearchResults1' can only be specified when a column list is used
and IDENTITY_INSERT is ON.
I have tried with set IDENTITY_INSERT #tmpSearchResults1 on
Create Procedure dbo.usp_mobile_All_KeyWord(#searchkey varchar(30))
AS
CREATE TABLE #tmpSearchResults
(
property_id varchar(255),
property_number varchar(255),
auction_date_reason varchar(255)
)
INSERT INTO #tmpSearchResults
SELECT
p.property_id, p.property_number, p.auction_date_reason
FROM
Pr p
INNER JOIN
Au a ON p.auction_id = a.auction_id
INNER JOIN
PrAdd pa ON p.property_id = pa.property_id
INNER JOIN state AS s ON s.state_id=pa.state
where
(
(p.archive = 'N'
AND
a.show_on_site = 'Y'
AND
(
(
((p.auction_date >= CONVERT(datetime, CONVERT(varchar, GETDATE(), 103), 103) and (p.auction_date_reason is null or p.auction_date_reason = ''))
or
(p.auction_date <= CONVERT(datetime, CONVERT(varchar, GETDATE(), 103), 103) and ( p.auction_date_reason = 'Accepting Offers' )))
and
pa.property_address_type_id = 1 )) )
and
(state_abbreviation=#searchkey or s.state_name like '%'+''+ #searchkey +''+'%' or city like '%'+''+ #searchkey +''+'%' or pa.address1 like '%'+''+ #searchkey +''+'%'
or pa.address2 like '%'+''+ #searchkey +''+'%')
)
)
CREATE TABLE #tmpSearchResults1
(
i1 int identity,
property_id varchar(255),
property_number varchar(255),
auction_date_reason varchar(255)
)
insert into #tmpSearchResults1
select
property_id ,
property_number,
auction_date_reason
from #tmpSearchResults
order by
case when charindex(#searchkey,state) >0 then 1000 else 0 end desc,
case when charindex(#searchkey,statename) >0 then 1000 else 0 end desc,
case when charindex(#searchkey,city) >0 then 1000 else 0 end desc,
case when charindex(#searchkey,address2) >0 then 1000 else 0 end desc,
case when charindex(#searchkey,address1) >0 then 1000 else 0 end desc,
case when charindex(#searchkey,short_description) >0 then 1000 else 0 end desc
select * from #tmpSearchResults1
Plz do help me
The error code is very very very clear.
The relevant portion is ...when a column list is used....
You need to specify your column list in the INSERT statement.
INSERT INTO #tmpSearchResults
(i1,
property_id,
property_number,
auction_date_reason)
SELECT
p.property_id, p.property_number, p.auction_date_reason
FROM...
First, there is a comma too much in the SELECT part of your second statement:
insert into #tmpSearchResults1
select
property_id ,
property_number,
auction_date_reason , <-- THIS ONE!!
from #tmpSearchResults
The last column of a SELECT statement must be without a comma.
So this would be correct:
insert into #tmpSearchResults1
select
property_id ,
property_number,
auction_date_reason
from #tmpSearchResults
Second, did you read this part of the error message?
An explicit value [...] can only be specified when a column list is used
The "column list" part means that you have to specify the columns in the INSERT part:
insert into #tmpSearchResults1
(property_id, property_number, auction_date_reason)
select
property_id ,
property_number,
auction_date_reason
from #tmpSearchResults
You can get away with not specifying the columns when the number of columns in the SELECT statement is the same as in the table in which they should be inserted (and if the data types match).
If one of these conditions is not met, you need to specify the columns because otherwise SQL Server doesn't know which value to insert into which column.

Create Distinct Column Values as Extra Rows

Any tricks anyone can share on how to manipulate the following table
ID TYPE Name Description
1 X A DESC_A
2 X B DESC_B
3 Z C DESC_C
to this view?
NAME_X DESCRIPTION_X
A DESC_A
B DESC_B
NAME_Z DESCRIPTION_Z
C DESC_C
For every distinct column, I would like to create a custom row for every distinct value in the 'TYPE' column. In this example, the custom row is created by appending the TYPE value to 'NAME_' and 'DESCRIPTION_'.
Thanks!
Try this:
create view vwTestDistinctData
as
select [type], [Description]
from testdistinctdata
union all
select
'NAME_' + [type] as [Type],
'DESCRIPTION_' + [type] as [Description]
from testdistinctdata
group by [type]
go
Edit: Return some meta data from the view:
alter view dbo.yourView
as
with c_Distinct([type])
as ( select distinct [Type]
from dbo.yourTable
)
select [Sort] = 0,
[Type],
Name,
[Description]
from dbo.yourTable
union all
select [Sort] = 1,
[Type],
'NAME_'+[Type],
'DESCRIPTION_'+[Type]
from c_Distinct
And then perform the ordering when selecting from the View:
select *
from yourView
order by [Type] asc, [Sort] desc

Pivot String SQL

I am trying to Pivot this table whose name is #salida
IDJOB NAME DATE
1 Michael NULL
1 Aaron NULl
THe result which I want to obtain is
IDJOB DATE NAME1 NAME2
1 NULL Michael Aaron
My code is this
SELECT *
FROM #salida
PIVOT
(
MAX([Name]) FOR [Name] IN ([Name1],[Name2])
) PVT GROUP BY IdJob,Date,Name1,Name2 ;
SELECT * FROM #salida
The result which obtain is
IDJOB DATE NAME1 NAME2
1 NULL NULL NULL
#XabiIparra, see a mock up. you need to partition by the IdJob and then add the columns needed.
DECLARE #salida TABLE(idjob VARCHAR(100),[Name] VARCHAR(100),[DATE] DATE);
INSERT INTO #salida VALUES
(1,'Michael', NULL)
,(1,'Aaron', NULL)
,(2,'Banabas', NULL)
SELECT p.*
FROM
(
SELECT *
,'NAME'+CAST(ROW_NUMBER() OVER(PARTITION BY [idjob] ORDER BY NAME) AS varchar(100)) ColumnName
FROM #salida
)t
PIVOT
(
MAX([Name]) FOR ColumnName IN (NAME1,NAME2,NAME3,NAME4,NAME5 /*add as many as you need*/)
)p;
How about must using aggregation and min() and max()?
select idjob, date, min(name), max(name)
from #salida
group by idjob, date;
SQL tables represent unordered sets, so there is no ordering to the values (unless another column specifies the ordering). So, this is probably the simplest way to get two different values in the same row.