Related
I have a table that has 3 columns.
create table myTable
(
ID int Primary key,
Detail_ID int references myTable(ID) null, -- reference to self
Master_Value varchar(50) -- references to master table
)
this table has the follow records:
insert into myTable select 100,null,'aaaa'
insert into myTable select 101,100,'aaaa'
insert into myTable select 102,101,'aaaa'
insert into myTable select 103,102,'aaaa' ---> last record
insert into myTable select 200,null,'bbbb'
insert into myTable select 201,200,'bbbb'
insert into myTable select 202,201,'bbbb' ---> last record
the records is saved In the form of relational with ID and Detail_ID columns.
I need to select the last record each Master_Value column. follow output:
lastRecordID Master_Value Path
202 bbbb 200=>201=>202
103 aaaa 100=>101=>102=>103
tips:
The records are not listed in order in the table.
I can not use the max(ID) keyword. beacuse data is not sorted.(may
be the id column updated manually.)
attempts:
I was able to Prepare follow query and is working well:
with Q as
(
select ID ,Detail_ID, Master_Value , 1 RowOrder, CAST(id as varchar(max)) [Path] from myTable where Detail_ID is null
union all
select R.id,R.Detail_ID , r.Master_Value , (q.RowOrder + 1) RowOrder , (q.[Path]+'=>'+CAST(r.id as varchar(max))) [Path] from myTable R inner join Q ON Q.ID=R.Detail_ID --where r.Dom_ID_RowType=1010
)
select * into #q from Q
select Master_Value, MAX(RowOrder) lastRecord into #temp from #Q group by Master_Value
select
q.ID lastRecordID,
q.Master_Value,
q.[Path]
from #temp t
join #q q on q.RowOrder = t.lastRecord
where
q.Master_Value = t.Master_Value
but I need to simple way (one select) and optimal method.
Can anyone help me?
One method uses a correlated subquery to get the last value (which is how I interpreted your question):
select t.*
from mytable t
where not exists (select 1
from mytable t2
where t2.master_value = t.master_value and
t2.id = t.detail_id
);
This returns rows that are not referred to by another row.
For the path, you need a recursive CTE:
with cte as (
select master_value, id as first_id, id as child_id, convert(varchar(max), id) as path, 1 as lev
from mytable t
where detail_id is null
union all
select cte.master_value, cte.first_id, t.id, concat(path, '->', t.id), lev + 1
from cte join
mytable t
on t.detail_id = cte.child_id and t.master_value = cte.master_value
)
select cte.*
from (select cte.*, max(lev) over (partition by master_value) as max_lev
from cte
) cte
where max_lev = lev
Here is a db<>fiddle.
I want to show only one row per column (PN) from result of calculations base on 2 indicators (WK) and (Prio), and I think that doing this in way below using double grouping is... stupid, but I see no other solution. Is there other way to reach same result as query below?
CREATE TABLE #table
(
[PN] varchar(3) null
,[WK] int null
,[Prio] int null
);
INSERT INTO #table
(
[PN]
,[WK]
,[Prio]
)
VALUES
('AAA',37,1)
,('AAA',37,2)
,('AAA',38,3)
,('BBB',39,1)
,('BBB',39,2)
,('BBB',37,3)
,('BBB',38,4)
,('CCC',null,1)
,('CCC',null,2)
,('CCC',37,3)
,('CCC',38,4);
SELECT GTG.[PN]
,GTG.[WK]
,MIN([Prio]) [Prio]
FROM
(
SELECT [PN]
,MIN([WK]) [WK]
FROM #table
GROUP BY [PN]
) GTG
LEFT JOIN #table TMP
ON GTG.[PN] = TMP.[PN]
and GTG.[WK] = TMP.[WK]
GROUP BY GTG.[PN],GTG.[WK];
DROP TABLE #table;
Try using a Common Table Expression (CTE) with ROW_NUMBER():
SQL Fiddle
MS SQL Server 2017 Schema Setup:
CREATE TABLE MyTable
(
[PN] varchar(3) null
,[WK] int null
,[Prio] int null
);
INSERT INTO MyTable
(
[PN]
,[WK]
,[Prio]
)
VALUES
('AAA',37,1)
,('AAA',37,2)
,('AAA',38,3)
,('BBB',39,1)
,('BBB',39,2)
,('BBB',37,3)
,('BBB',38,4)
,('CCC',null,1)
,('CCC',null,2)
,('CCC',37,3)
,('CCC',38,4);
Query 1:
;WITH CTE
AS
( SELECT [PN],
[WK],
[Prio],
ROW_Number() OVER (Partition BY [PN] ORDER BY COALESCE([WK], 999), [Prio]) AS RN
FROM MyTable T1
)
SELECT [PN], [WK], [Prio]
FROM CTE
WHERE RN = 1
Results:
| PN | WK | Prio |
|-----|----|------|
| AAA | 37 | 1 |
| BBB | 37 | 3 |
| CCC | 37 | 3 |
do you find below
SELECT GTG.[PN]
,min(GTG.[WK])
,MIN([Prio]) [Prio]
FROM
#table GTG
LEFT JOIN #table TMP
ON GTG.[PN] = TMP.[PN]
and GTG.[WK] = TMP.[WK]
GROUP BY GTG.[PN]
You seem to want window functions. This should do what you want:
select t.*
from (select t.*, row_number() over (partition by pn order by coalesce(wk, 999), prio) as seqnum
from #table t
) t
where seqnum = 1;
Here is a db<>fiddle.
Note: wk possibly represents a week value, so I replaced it with 999 when it is NULL for the sorting to be correct. You could filter out NULL values or use a CASE expression if COALESCE() does not quite meet your needs.
I think, we can use Row_Number window function to sort result first and then apply where clause. Please try this-
;with cte as (
select
pn,
wk,
prio,
row_number() over (
partition by pn
order by (case when wk is not null then 0 else 1 end), wk, prio
) as rankid
from #table
)
select pn,wk,prio
from cte
where rankid =1;
I have a patients table with details such as conditions that the patient has. from the below table I want to select Patients, Claims which have ONLY a single condition - 'Hypertension'. Example Patient B is the expected output. Patient A will not be selected because he claimed for multiple conditions.
+----+---------+--------------+
| ID | ClaimID | Condition |
+----+---------+--------------+
| A | 14234 | Hypertension |
| A | 14234 | Diabetes |
| A | 63947 | Diabetes |
| B | 23853 | Hypertension |
+----+---------+--------------+
I tried using the NOT IN condition as below but doesn't seem to help
SELECT ID, ClaimID, Condition
FROM myTable
WHERE Condition IN ('Hypertension')
AND Condition NOT IN ('Diabetes')
One method uses not exists:
select t.*
from mytable t
where t.condition = 'Hypertension' and
not exists (select 1
from mytable t2
where t2.id = t.id and t2.condition <> t.condition
);
Or you can do it like this:
select
id,
claim_id,
condition
from
patient
where
id in
(
select
id
from
patient
group by
id having count (distinct condition) = 1
);
Result:
id claim_id condition
-- ----------- ----------------
B 23853 Hypertension
(1 rows affected)
Setup:
create table patient
(
id varchar(1),
claim_id int,
condition varchar(16)
);
insert into patient (id, claim_id, condition) values ('A', 14234, 'Hypertension');
insert into patient (id, claim_id, condition) values ('A', 14234, 'Diabetes');
insert into patient (id, claim_id, condition) values ('A', 63947, 'Diabetes');
insert into patient (id, claim_id, condition) values ('B', 23853, 'Hypertension');
You can do this with a CTE.
I set up this CTE with two parameters, one being the Condition you seek, and the other being the max number of combined conditions to find (in your case 1).
DECLARE #myTable TABLE (Id VARCHAR(1), ClaimID INT, Condition VARCHAR(100))
INSERT INTO #myTable (Id, ClaimID, Condition)
SELECT 'A',14234,'Hypertension' UNION ALL
SELECT 'A',14234,'Diabetes' UNION ALL
SELECT 'A',63947,'Diabetes' UNION ALL
SELECT 'B',23853,'Hypertension'
DECLARE #Condition VARCHAR(100)
DECLARE #MaxConditions TINYINT
SET #Condition='Hypertension'
SET #MaxConditions=1
; WITH CTE AS
(
SELECT *, COUNT(2) OVER(PARTITION BY ClaimID) AS CN
FROM #myTable T1
WHERE EXISTS (SELECT 1 FROM #myTable T2 WHERE T1.ClaimID=T2.ClaimID AND T2.Condition=#Condition)
)
SELECT *
FROM CTE
WHERE CN<=#MaxConditions
If you don't care about the fluff, and just want all ClaimID's with just ONE condition regardless of which condition it is use this.
DECLARE #myTable TABLE (Id VARCHAR(1), ClaimID INT, Condition VARCHAR(100))
INSERT INTO #myTable (Id, ClaimID, Condition)
SELECT 'A',14234,'Hypertension' UNION ALL
SELECT 'A',14234,'Diabetes' UNION ALL
SELECT 'A',63947,'Diabetes' UNION ALL
SELECT 'B',23853,'Hypertension'
DECLARE #MaxConditions TINYINT
SET #MaxConditions=1
; WITH CTE AS
(
SELECT *, COUNT(2) OVER(PARTITION BY ClaimID) AS CN
FROM #myTable T1
)
SELECT *
FROM CTE
WHERE CN<=#MaxConditions
Here is one method using Having clause
SELECT t.*
FROM mytable t
WHERE EXISTS (SELECT 1
FROM mytable t2
WHERE t2.id = t.id
HAVING Count(CASE WHEN condition = 'Hypertension' THEN 1 END) > 0
AND Count(CASE WHEN condition != 'Hypertension' THEN 1 END) = 0)
And yet a couple of other ways to do this:
declare #TableA table(Id char,
ClaimId int,
Condition varchar(250));
insert into #TableA (id, claimid, condition)
values ('A', 14234, 'Hypertension'),
('A', 14234, 'Diabetes'),
('A', 63947, 'Diabetes'),
('B', 23853, 'Hypertension')
select id, claimid, condition
from #TableA a
where not exists(select id
from #TableA b
where a.id = b.id
group by b.id
having count(b.id) > 1)
OR
;with cte as
(
select id, claimid, condition
from #TableA
)
,
cte2 as
(
Select id, count(Id) as counts
from cte
group by id
having count(id) < 2
)
Select cte.id, claimid, condition
From cte
inner join
cte2
on cte.id = cte2.id
I decided to revise my answer into an appropriate one.
A simple solution to your question is to count the rows instead of the ID values (since it's not an integer).
Here is a simple introduction:
SELECT
ID
FROM
#PatientTable
GROUP BY
ID
HAVING
ID = ID AND COUNT(*) = 1
This will Return the ID B
+----+
| ID |
+----+
| B |
+----+
Surely, this is not enough, as you may work with a large data and need more filtering.
So, we will go and use it as a sub-query.
Using it as a sub-query it's simple :
SELECT
ID,
ClaimID,
Condition
FROM
#PatientTable
WHERE
ID = (SELECT ID AS NumberOfClaims FROM #PatientTable GROUP BY ID HAVING ID = ID AND COUNT(*) = 1)
This will return
+----+---------+--------------+
| ID | ClaimID | Condition |
+----+---------+--------------+
| B | 23853 | Hypertension |
+----+---------+--------------+
So far so good, but there is another issue we may face. Let's say you have a multiple Claims from a multiple patients, using this query as is will only show one patient. To show all patients we need to use IN rather than = under the WHERE clause
WHERE
ID IN (SELECT ID AS NumberOfClaims FROM #PatientTable GROUP BY ID HAVING ID = ID AND COUNT(*) = 1)
This will list all patients that falls under this condition.
If you need more conditions to filter, you just add them to the WHERE clause and you'll be good to go.
SELECT id, sum(ct)
FROM (SELECT customer_id, CASE WHEN category = 'X' THEN 0 else 1
end ct
FROM MASTER_TABLE
) AS t1
GROUP BY id
HAVING sum(ct) = 0
id which will have sum(ct) more than 1, will have multiple conditions
Use joins instead of subquery. Joins are always better in performance. You can use below query.
SELECT T1.id, T1.claimid, T1.Condition
FROM mytable T1
INNER JOIN
(
select id, count(Condition) counter
from mytable
group by id HAVING COUNT(DISTINCT CONDITION)=1
) T2 ON T1.ID=T2.ID
WHERE T2.counter=1
Class| Value
-------------
A | 1
A | 2
A | 3
A | 10
B | 1
I am not sure whether it is practical to achieve this using SQL.
If the difference of values are less than 5 (or x), then group the rows (of course with the same Class)
Expected result
Class| ValueMin | ValueMax
---------------------------
A | 1 | 3
A | 10 | 10
B | 1 | 1
For fixed intervals, we can easily use "GROUP BY". But now the grouping is based on nearby row's value. So if the values are consecutive or very close, they will be "chained together".
Thank you very much
Assuming MSSQL
You are trying to group things by gaps between values. The easiest way to do this is to use the lag() function to find the gaps:
select class, min(value) as minvalue, max(value) as maxvalue
from (select class, value,
sum(IsNewGroup) over (partition by class order by value) as GroupId
from (select class, value,
(case when lag(value) over (partition by class order by value) > value - 5
then 0 else 1
end) as IsNewGroup
from t
) t
) t
group by class, groupid;
Note that this assumes SQL Server 2012 for the use of lag() and cumulative sum.
Update:
*This answer is incorrect*
Assuming the table you gave is called sd_test, the following query will give you the output you are expecting
In short, we need a way to find what was the value on the previous row. This is determined using a join on row ids. Then create a group to see if the difference is less than 5. and then it is just regular 'Group By'.
If your version of SQL Server supports windowing functions with partitioning the code would be much more readable.
SELECT
A.CLASS
,MIN(A.VALUE) AS MIN_VALUE
,MAX(A.VALUE) AS MAX_VALUE
FROM
(SELECT
ROW_NUMBER()OVER(PARTITION BY CLASS ORDER BY VALUE) AS ROW_ID
,CLASS
,VALUE
FROM SD_TEST) AS A
LEFT JOIN
(SELECT
ROW_NUMBER()OVER(PARTITION BY CLASS ORDER BY VALUE) AS ROW_ID
,CLASS
,VALUE
FROM SD_TEST) AS B
ON A.CLASS = B.CLASS AND A.ROW_ID=B.ROW_ID+1
GROUP BY A.CLASS,CASE WHEN ABS(COALESCE(B.VALUE,0)-A.VALUE)<5 THEN 1 ELSE 0 END
ORDER BY A.CLASS,cASE WHEN ABS(COALESCE(B.VALUE,0)-A.VALUE)<5 THEN 1 ELSE 0 END DESC
ps: I think the above is ANSI compliant. So should run in most SQL variants. Someone can correct me if it is not.
These give the correct result, using the fact that you must have the same number of group starts as ends and that they will both be in ascending order.
if object_id('tempdb..#temp') is not null drop table #temp
create table #temp (class char(1),Value int);
insert into #temp values ('A',1);
insert into #temp values ('A',2);
insert into #temp values ('A',3);
insert into #temp values ('A',10);
insert into #temp values ('A',13);
insert into #temp values ('A',14);
insert into #temp values ('b',7);
insert into #temp values ('b',8);
insert into #temp values ('b',9);
insert into #temp values ('b',12);
insert into #temp values ('b',22);
insert into #temp values ('b',26);
insert into #temp values ('b',67);
Method 1 Using CTE and row offsets
with cte as
(select distinct class,value,ROW_NUMBER() over ( partition by class order by value ) as R from #temp),
cte2 as
(
select
c1.class
,c1.value
,c2.R as PreviousRec
,c3.r as NextRec
from
cte c1
left join cte c2 on (c1.class = c2.class and c1.R= c2.R+1 and c1.Value < c2.value + 5)
left join cte c3 on (c1.class = c3.class and c1.R= c3.R-1 and c1.Value > c3.value - 5)
)
select
Starts.Class
,Starts.Value as StartValue
,Ends.Value as EndValue
from
(
select
class
,value
,row_number() over ( partition by class order by value ) as GroupNumber
from cte2
where PreviousRec is null) as Starts join
(
select
class
,value
,row_number() over ( partition by class order by value ) as GroupNumber
from cte2
where NextRec is null) as Ends on starts.class=ends.class and starts.GroupNumber = ends.GroupNumber
** Method 2 Inline views using not exists **
select
Starts.Class
,Starts.Value as StartValue
,Ends.Value as EndValue
from
(
select class,Value ,row_number() over ( partition by class order by value ) as GroupNumber
from
(select distinct class,value from #temp) as T
where not exists (select 1 from #temp where class=t.class and Value < t.Value and Value > t.Value -5 )
) Starts join
(
select class,Value ,row_number() over ( partition by class order by value ) as GroupNumber
from
(select distinct class,value from #temp) as T
where not exists (select 1 from #temp where class=t.class and Value > t.Value and Value < t.Value +5 )
) ends on starts.class=ends.class and starts.GroupNumber = ends.GroupNumber
In both methods I use a select distinct to begin because if you have a dulpicate entry at a group start or end things go awry without it.
Here is one way of getting the information you are after:
SELECT Under5.Class,
(
SELECT MIN(m2.Value)
FROM MyTable AS m2
WHERE m2.Value < 5
AND m2.Class = Under5.Class
) AS ValueMin,
(
SELECT MAX(m3.Value)
FROM MyTable AS m3
WHERE m3.Value < 5
AND m3.Class = Under5.Class
) AS ValueMax
FROM
(
SELECT DISTINCT m1.Class
FROM MyTable AS m1
WHERE m1.Value < 5
) AS Under5
UNION
SELECT Over4.Class,
(
SELECT MIN(m4.Value)
FROM MyTable AS m4
WHERE m4.Value >= 5
AND m4.Class = Over4.Class
) AS ValueMin,
(
SELECT Max(m5.Value)
FROM MyTable AS m5
WHERE m5.Value >= 5
AND m5.Class = Over4.Class
) AS ValueMax
FROM
(
SELECT DISTINCT m6.Class
FROM MyTable AS m6
WHERE m6.Value >= 5
) AS Over4
This question already has answers here:
Select top 10 records for each category
(14 answers)
Closed 5 years ago.
Assume we have a table which has two columns, one column contains the names of some people and the other column contains some values related to each person. One person can have more than one value. Each value has a numeric type. The question is we want to select the top 3 values for each person from the table. If one person has less than 3 values, we select all the values for that person.
The issue can be solved if there are no duplicates in the table by the query provided in this article Select top 3 values from each group in a table with SQL . But if there are duplicates, what is the solution?
For example, if for one name John, he has 5 values related to him. They are 20,7,7,7,4. I need to return the name/value pairs as below order by value descending for each name:
-----------+-------+
| name | value |
-----------+-------+
| John | 20 |
| John | 7 |
| John | 7 |
-----------+-------+
Only 3 rows should be returned for John even though there are three 7s for John.
In many modern DBMS (e.g. Postgres, Oracle, SQL-Server, DB2 and many others), the following will work just fine. It uses CTEs and ranking function ROW_NUMBER() which is part of the latest SQL standard:
WITH cte AS
( SELECT name, value,
ROW_NUMBER() OVER (PARTITION BY name
ORDER BY value DESC
)
AS rn
FROM t
)
SELECT name, value, rn
FROM cte
WHERE rn <= 3
ORDER BY name, rn ;
Without CTE, only ROW_NUMBER():
SELECT name, value, rn
FROM
( SELECT name, value,
ROW_NUMBER() OVER (PARTITION BY name
ORDER BY value DESC
)
AS rn
FROM t
) tmp
WHERE rn <= 3
ORDER BY name, rn ;
Tested in:
Postgres
Oracle
SQL-Server
In MySQL and other DBMS that do not have ranking functions, one has to use either derived tables, correlated subqueries or self-joins with GROUP BY.
The (tid) is assumed to be the primary key of the table:
SELECT t.tid, t.name, t.value, -- self join and GROUP BY
COUNT(*) AS rn
FROM t
JOIN t AS t2
ON t2.name = t.name
AND ( t2.value > t.value
OR t2.value = t.value
AND t2.tid <= t.tid
)
GROUP BY t.tid, t.name, t.value
HAVING COUNT(*) <= 3
ORDER BY name, rn ;
SELECT t.tid, t.name, t.value, rn
FROM
( SELECT t.tid, t.name, t.value,
( SELECT COUNT(*) -- inline, correlated subquery
FROM t AS t2
WHERE t2.name = t.name
AND ( t2.value > t.value
OR t2.value = t.value
AND t2.tid <= t.tid
)
) AS rn
FROM t
) AS t
WHERE rn <= 3
ORDER BY name, rn ;
Tested in MySQL
I was going to downvote the question. However, I realized that it might really be asking for a cross-database solution.
Assuming you are looking for a database independent way to do this, the only way I can think of uses correlated subqueries (or non-equijoins). Here is an example:
select distinct t.personid, val, rank
from (select t.*,
(select COUNT(distinct val) from t t2 where t2.personid = t.personid and t2.val >= t.val
) as rank
from t
) t
where rank in (1, 2, 3)
However, each database that you mention (and I note, Hadoop is not a database) has a better way of doing this. Unfortunately, none of them are standard SQL.
Here is an example of it working in SQL Server:
with t as (
select 1 as personid, 5 as val union all
select 1 as personid, 6 as val union all
select 1 as personid, 6 as val union all
select 1 as personid, 7 as val union all
select 1 as personid, 8 as val
)
select distinct t.personid, val, rank
from (select t.*,
(select COUNT(distinct val) from t t2 where t2.personid = t.personid and t2.val >= t.val
) as rank
from t
) t
where rank in (1, 2, 3);
Using GROUP_CONCAT and FIND_IN_SET you can do that.Check SQLFIDDLE.
SELECT *
FROM tbl t
WHERE FIND_IN_SET(t.value,(SELECT
SUBSTRING_INDEX(GROUP_CONCAT(t1.value ORDER BY VALUE DESC),',',3)
FROM tbl t1
WHERE t1.name = t.name
GROUP BY t1.name)) > 0
ORDER BY t.name,t.value desc
If your result set is not so heavy, you can write a stored procedure (or an anonymous PL/SQL-block) for that problem which iterates the result set and finds the bigges three by a simple comparing algorithm.
Try this -
CREATE TABLE #list ([name] [varchar](100) NOT NULL, [value] [int] NOT NULL)
INSERT INTO #list VALUES ('John', 20), ('John', 7), ('John', 7), ('John', 7), ('John', 4);
WITH cte
AS (
SELECT NAME
,value
,ROW_NUMBER() OVER (
PARTITION BY NAME ORDER BY (value) DESC
) RN
FROM #list
)
SELECT NAME
,value
FROM cte
WHERE RN < 4
ORDER BY value DESC
This works for MS SQL. Should be workable in any other SQL dialect that has the ability to assign row numbers in a group by or over clause (or equivelant)
if object_id('tempdb..#Data') is not null drop table #Data;
GO
create table #data (name varchar(25), value integer);
GO
set nocount on;
insert into #data values ('John', 20);
insert into #data values ('John', 7);
insert into #data values ('John', 7);
insert into #data values ('John', 7);
insert into #data values ('John', 5);
insert into #data values ('Jack', 5);
insert into #data values ('Jane', 30);
insert into #data values ('Jane', 21);
insert into #data values ('John', 5);
insert into #data values ('John', -1);
insert into #data values ('John', -1);
insert into #data values ('Jane', 18);
set nocount off;
GO
with D as (
SELECT
name
,Value
,row_number() over (partition by name order by value desc) rn
From
#Data
)
SELECT Name, Value
FROM D
WHERE RN <= 3
order by Name, Value Desc
Name Value
Jack 5
Jane 30
Jane 21
Jane 18
John 20
John 7
John 7