left join without duplicate values using MIN() - sql

I have a table_1:
id custno
1 1
2 2
3 3
and a table_2:
id custno qty descr
1 1 10 a
2 1 7 b
3 2 4 c
4 3 7 d
5 1 5 e
6 1 5 f
When I run this query to show the minimum order quantities from every customer:
SELECT DISTINCT table_1.custno,table_2.qty,table_2.descr
FROM table_1
LEFT OUTER JOIN table_2
ON table_1.custno = table_2.custno AND qty = (SELECT MIN(qty) FROM table_2
WHERE table_2.custno = table_1.custno )
Then I get this result:
custno qty descr
1 5 e
1 5 f
2 4 c
3 7 d
Customer 1 appears twice each time with the same minimum qty (& a different description) but I only want to see customer 1 appear once. I don't care if that is the record with 'e' as a description or 'f' as a description.

First of all... I'm not sure why you need to include table_1 in the queries to begin with:
select custno, min(qty) as min_qty
from table_2
group by custno;
But just in case there is other information that you need that wasn't included in the question:
select table_1.custno, ifnull(min(qty),0) as min_qty
from table_1
left outer join table_2
on table_1.custno = table_2.custno
group by table_1.custno;

"Generic" SQL way:
SELECT table_1.custno,table_2.qty,table_2.descr
FROM table_1, table_2
WHERE table_2.id = (SELECT TOP 1 id
FROM table_2
WHERE custno = table_1.custno
ORDER BY qty )
SQL 2008 way (probably faster):
SELECT custno, qty, descr
FROM
(SELECT
custno,
qty,
descr,
ROW_NUMBER() OVER (PARTITION BY custno ORDER BY qty) RowNum
FROM table_2
) A
WHERE RowNum = 1

If you use SQL-Server you could use ROW_NUMBER and a CTE:
WITH CTE AS
(
SELECT table_1.custno,table_2.qty,table_2.descr,
RN = ROW_NUMBER() OVER ( PARTITION BY table_1.custno
Order By table_2.qty ASC)
FROM table_1
LEFT OUTER JOIN table_2
ON table_1.custno = table_2.custno
)
SELECT custno, qty,descr
FROM CTE
WHERE RN = 1
Demolink

Related

How do i select all columns, plus the result of the sum

I have this select:
"Select * from table" that return:
Id
Value
1
1
1
1
2
10
2
10
My goal is create a sum from each Value group by id like this:
Id
Value
Sum
1
1
2
1
1
2
2
10
20
2
10
20
I Have tried ways like:
SELECT Id,Value, (SELECT SUM(Value) FROM Table V2 WHERE V2.Id= V.Id GROUP BY IDRNC ) FROM Table v;
But the is not grouping by id.
Id
Value
Sum
1
1
1
1
1
1
2
10
10
2
10
10
Aggregation aggregates rows, reducing the number of records in the output. In this case you want to apply the result of a computation to each of your records, task carried out by the corresponding window function.
SELECT table.*, SUM(Value) OVER(PARTITION BY Id) AS sum_
FROM table
Check the demo here.
Your attempt looks correct.
Can you try the below query :
It works for me :
SELECT Id, Value,
(SELECT SUM(Value) FROM Table V2 WHERE V2.Id= V.Id GROUP BY ID) as sum
FROM Table v;
You can do it using inner join to join with selection grouped by id :
select t.*, sum
from _table t
inner join (
select id, sum(Value) as sum
from _table
group by id
) as s on s.id = t.id
You can check it here
Your select is ok if you adjust it just a little:
SELECT Id,Value, (SELECT SUM(Value) FROM Table V2 WHERE V2.Id= V.Id GROUP BY IDRNC ) FROM Table v;
GROUP BY IDRNC is a mistake and should be GROUP BY ID
you should give an alias to a sum column ...
subquery selecting the sum does not have to have self table alias to be compared with outer query that has one (this is not a mistake - works either way)
Test:
WITH
a_table (ID, VALUE) AS
(
Select 1, 1 From Dual Union All
Select 1, 1 From Dual Union All
Select 2, 10 From Dual Union All
Select 2, 10 From Dual
)
SELECT ID, VALUE, (SELECT SUM(VALUE) FROM a_table WHERE ID = v.ID GROUP BY ID) "ID_SUM" FROM a_table v;
ID VALUE ID_SUM
---------- ---------- ----------
1 1 2
1 1 2
2 10 20
2 10 20

Reorder the rows of a table according to the numbers of similar cells in a specific column using SQL

I have a table like this:
D
S
2
1
2
3
4
2
4
3
4
5
6
1
in which the code of symptoms(S) of three diseases(D) are shown. I want to rearrange this table (D-S) such that the diseases with more symptoms come up i.e. order it by decreasing the numbers of symptoms as below:
D
S
4
2
4
3
4
5
2
1
2
3
6
1
Can anyone help me to write a SQL code for it in SQL server?
I had tried to do this as the following but this doesn't work:
SELECT *
FROM (
select D, Count(S) cnt
from [D-S]
group by D
) Q
order by Q.cnt desc
select
D,
S
from
D-S
order by
count(*) over(partition by D) desc,
D,
S;
Two easy ways to approach this:
--==== Sample Data
DECLARE #t TABLE (D INT, S INT);
INSERT #t VALUES(2,1),(2,3),(4,2),(4,3),(4,5),(6,1);
--==== Using Window Function
SELECT t.D, t.S
FROM (SELECT t.*, Rnk = COUNT(*) OVER (PARTITION BY t.D) FROM #t AS t) AS t
ORDER BY t.Rnk DESC;
--==== Using standard GROUP BY
SELECT t.*
FROM #t AS t
JOIN
(
SELECT t2.D, Cnt = COUNT(*)
FROM #t AS t2
GROUP BY t2.D
) AS t2 ON t.D = t2.D
ORDER BY t2.Cnt DESC;
Results:
D S
----------- -----------
4 2
4 3
4 5
2 1
2 3
6 1

SQL get the closest two rows within duplicate rows

I have following table
ID Name Stage
1 A 1
1 B 2
1 C 3
1 A 4
1 N 5
1 B 6
1 J 7
1 C 8
1 D 9
1 E 10
I need output as below with parameters A and N need to select closest rows where difference between stage is smallest
ID Name Stage
1 A 4
1 N 5
I need to select rows where difference between stage is smallest
This query can make use of an index on (name, stage) efficiently:
WITH cte AS (
SELECT TOP 1
a.id AS a_id, a.name AS a_name, a.stage AS a_stage
, n.id AS n_id, n.name AS n_name, n.stage AS n_stage
FROM tbl a
CROSS APPLY (
SELECT TOP 1 *, stage - a.stage AS diff
FROM tbl
WHERE name = 'N'
AND stage >= a.stage
ORDER BY stage
UNION ALL
SELECT TOP 1 *, a.stage - stage AS diff
FROM tbl
WHERE name = 'N'
AND stage < a.stage
ORDER BY stage DESC
) n
WHERE a.name = 'A'
ORDER BY diff
)
SELECT a_id AS id, a_name AS name, a_stage AS stage FROM cte
UNION ALL
SELECT n_id, n_name, n_stage FROM cte;
SQL Server uses CROSS APPLY in place of standard-SQL LATERAL.
In case of ties (equal difference) the winner is arbitrary, unless you add more ORDER BY expressions as tiebreaker.
dbfiddle here
This solution works, if u know the minimum difference is always 1
SELECT *
FROM myTable as a
CROSS JOIN myTable as b
where a.stage-b.stage=1;
a.ID a.Name a.Stage b.ID b.Name b.Stage
1 A 4 1 N 5
Or simpler if u don't know the minimum
SELECT *
FROM myTable as a
CROSS JOIN myTable as b
where a.stage-b.stage in (SELECT min (a.stage-b.stage)
FROM myTable as a
CROSS JOIN myTable as b)

SQL select top if columns are same

If I have a table like this:
Id StateId Name
1 1 a
2 2 b
3 1 c
4 1 d
5 3 e
6 2 f
I want to select like below:
Id StateId Name
4 1 d
5 3 e
6 2 f
For example, Ids 1,3,4 have stateid 1. So select row with max Id, i.e, 4.
; WITH CTE AS
(
SELECT *, ROW_NUMBER() OVER(PARTITION BY STATEID ORDER BY ID DESC) AS RN
)SELECT ID, STATEID, NAME FROM CTE WHERE RN = 1
You can use ROW_NUMBER() + TOP 1 WITH TIES:
SELECT TOP 1 WITH TIES
Id,
StateId,
[Name]
FROM YourTable
ORDER BY ROW_NUMBER() OVER (PARTITION BY StateId ORDER BY Id DESC)
Output:
Id StateId Name
4 1 d
6 2 f
5 3 e
Disclaimer: I gave this answer before the OP had specified an actual database, and hence avoided using window functions. For a possibly more appropriate answer, see the reply by #Tanjim above.
Here is an option using joins which should work across most RDBMS.
SELECT t1.*
FROM yourTable t1
INNER JOIN
(
SELECT StateId, MAX(Id) AS Id
FROM yourTable
GROUP BY StateId
) t2
ON t1.StateId = t2.StateId AND
t1.Id = t2.Id
The following using a subquery, to find the maximum Id for each of the states. The WHERE clause then only includes rows with ids from that subquery.
SELECT
[Id], [StateID], [Name]
FROM
TABLENAME S1
WHERE
Id IN (SELECT MAX(Id) FROM TABLENAME S2 WHERE S2.StateID = S1.StateID)

Any other alternative to write this SQL query

I need to select data base upon three conditions
Find the latest date (StorageDate Column) from the table for each record
See if there is more then one entry for date (StorageDate Column) found in first step for same ID (ID Column)
and then see if DuplicateID is = 2
So if table has following data:
ID |StorageDate | DuplicateTypeID
1 |2014-10-22 | 1
1 |2014-10-22 | 2
1 |2014-10-18 | 1
2 |2014-10-12 | 1
3 |2014-10-11 | 1
4 |2014-09-02 | 1
4 |2014-09-02 | 2
Then I should get following results
ID
1
4
I have written following query but it is really slow, I was wondering if anyone has better way to write it.
SELECT DISTINCT(TD.RecordID)
FROM dbo.MyTable TD
JOIN (
SELECT T1.RecordID, T2.MaxDate,COUNT(*) AS RecordCount
FROM MyTable T1 WITH (nolock)
JOIN (
SELECT RecordID, MAX(StorageDate) AS MaxDate
FROM MyTable WITH (nolock)
GROUP BY RecordID)T2
ON T1.RecordID = T2.RecordID AND T1.StorageDate = T2.MaxDate
GROUP BY T1.RecordID, T2.MaxDate
HAVING COUNT(*) > 1
)PT ON TD.RecordID = PT.RecordID AND TD.StorageDate = PT.MaxDate
WHERE TD.DuplicateTypeID = 2
Try this and see how the performance goes:
;WITH
tmp AS
(
SELECT *,
RANK() OVER (PARTITION BY ID ORDER BY StorageDate DESC) AS StorageDateRank,
COUNT(ID) OVER (PARTITION BY ID, StorageDate) AS StorageDateCount
FROM MyTable
)
SELECT DISTINCT ID
FROM tmp
WHERE StorageDateRank = 1 -- latest date for each ID
AND StorageDateCount > 1 -- more than 1 entry for date
AND DuplicateTypeID = 2 -- DuplicateTypeID = 2
You can use analytic function rank , can you try this query ?
Select recordId from
(
select *, rank() over ( partition by recordId order by [StorageDate] desc) as rn
from mytable
) T
where rn =1
group by recordId
having count(*) >1
and sum( case when duplicatetypeid =2 then 1 else 0 end) >=1