How to get minimum 3 records per a group? - sql

I have 3 columns in SalesCart table as follows,
I need to get minimum 3 records per Item as follows,
How to do that?

I guess we can use simply Row_Number() -
declare #testtable TABLE
(
ItemCode NVARCHAR(30),
Customer VARCHAR(10),
Amount INT
)
INSERT INTO #testtable
VALUES
('A-001','A', 25000)
,('A-001','B', 15000)
,('A-001','C', 12000)
,('A-001','D', 12500)
,('A-001','E', 20000)
,('A-002','C', 3000)
,('A-002','X', 2250)
,('A-002','Y', 3750)
,('A-002','D', 3100)
select *
from #testtable
select *
from
(
select *, ROW_number() over (PARTITION BY ItemCode ORDER BY ItemCode ) as Number
from #testtable
) t
where t.Number < 4

You can also try this and you can increase or decrease number based on your requirement dynamically.
DECLARE #top INT;
SET #top = 3;
;WITH grp AS
(
SELECT ItemCode, Customer, Amount,
rn = ROW_NUMBER() OVER
(PARTITION BY ItemCode ORDER BY ItemCode DESC)
FROM itemTable
)
SELECT ItemCode, Customer, Amount
FROM grp
WHERE rn <= #top
ORDER BY ItemCode DESC;

Related

Insert last not null value in temp table by date

I have this table for testing:
CREATE TABLE #ExchRates
(
[TimeId] int,
[CurrencyId] INT,
[ExchRate] DECIMAL(30,6)
)
INSERT INTO #ExchRates ([TimeId], [CurrencyId], [ExchRate])
VALUES
(
2017030500,
3,
6.142911
),
(
2017030600,
3,
6.152911
),
(
2017030700,
3,
NULL
),
(
2017030800,
3,
5.5
)
;
I want to insert values from this table in other table for one particular day(TimeId BETWEEN GETUTCDATE()-1 AND GETUTCDATE). Problem is when ExchRate is not set (NULL in table #ExchRate). In that case I want to use last known ExchRate for that currency. How can I solve this problem?
Try this-
SELECT * FROM(
SELECT *, ROW_NUMBER() OVER (ORDER BY TimeID DESC) RN
FROM #ExchRates
WHERE ExchRate IS NOT NULL
) A WHERE RN = 1
If you have more than one currency in the table, you can do this following -
SELECT * FROM(
SELECT *, ROW_NUMBER() OVER (PARTITION BY CurrencyId ORDER BY TimeID DESC) RN
FROM #ExchRates
WHERE ExchRate IS NOT NULL
) A WHERE RN = 1
for the case of null you can use row_number() for getting the last value
select * from (select *,row_number() over(partition by CurrencyId order by TimeId desc) rn
from #ExchRates
) a where a.rn=1
Here's your query.
insert into Table2 ([TimeId], [CurrencyId], [ExchRate])
select ([TimeId], [CurrencyId], [ExchRate]),
isnull([ExchRate], (select top 1 [ExchRate] from #ExchRates order by [TimeId] desc)) from #ExchRates
Use ROW_NUMBER() to get the last record you want :
WITH CTE AS (
SELECT *,ROW_NUMBER() OVER (PARTITION BY CurrencyId ORDER BY TimeId DESC) rn
FROM #ExchRates )
SELECT
*
FROM CTE
WHERE rn = 1;

aggregation according to different conditions on same column

I have a table #tbl like below, i need to write a query like if there are more than 3 records availble
for particular cid then avg(val of particular cid ) for particular cid should be dispalyed against each id and if there are less than
3 records availble for particular cid then avg(val of all records availble).
Please suggest.
declare #tbl table(id int, cid int, val float )
insert into #tbl
values(1,100,20),(2,100,30),(3,100,25),(4,100,31),(5,100,50),
(6,200,30),(7,200,30),(8,300,90)
Your description is not clear, but I believe you need windowed functions:
WITH cte AS (
SELECT *, COUNT(*) OVER(PARTITION BY cid) AS cnt
FROM #tbl
)
SELECT id, (SELECT AVG(val) FROM cte) AS Av
FROM cte
WHERE cnt <=3
UNION ALL
SELECT id, AVG(val) OVER(PARTITION BY cid) AS Av
FROM cte
WHERE cnt > 3
ORDER BY id;
DBFiddle Demo
EDIT:
SELECT id,
CASE WHEN COUNT(*) OVER(PARTITION BY cid) <= 3 THEN AVG(val) OVER()
ELSE AVG(val) OVER(PARTITION BY cid)
END
FROM #tbl
ORDER BY id;
DBFiddle Demo2
You can try with the following. First calculate the average for each Cid depending in it's number of occurences, then join each Cid with the Id to display all table.
;WITH CidAverages AS
(
SELECT
T.cid,
Average = CASE
WHEN COUNT(1) >= 3 THEN AVG(T.val)
ELSE (SELECT AVG(Val) FROM #tbl) END
FROM
#tbl AS T
GROUP BY
T.cid
)
SELECT
T.*,
C.Average
FROM
#tbl AS T
INNER JOIN CidAverages AS C ON T.cid = C.cid
Given the clarifications in comments, I am thinking this is the intention
declare #tbl table(id int, cid int, val float )
insert into #tbl
values(1,100,20),(2,100,30),(3,100,25),(4,100,31),(5,100,50),
(6,200,30),(7,200,30),(8,300,90);
select distinct
cid
, case
when count(*) over (partition by cid) > 3 then avg(val) over (partition by cid)
else avg (val) over (partition by 1)
end as avg
from #tbl;
http://dbfiddle.uk/?rdbms=sqlserver_2017&fiddle=fdf4c4457220ec64132de7452a034976
cid avg
100 31.2
200 38.25
300 38.25
There are a number of aspects of a query like this that when run at scale though are going to be pretty bad on the query plan, I'd want to test this at a larger scale and tune before using.
The description was not clear on what happened if it was exactly 3, it mentions 'more than 3' and 'less than 3' - within this code the 'more than' was used to determine which category it was in, and less than interpreted to mean 'less than or equal to 3'

Get Second duplicate Record

I am getting after doing joins as ::
CompanyID EmpID Emp_no Location
-------------------- -------------------- ------------- -------------
1 24 100543 First.png
1 24 100543 Second.png
I want to select second Record i.e. Second.png by using CASE WHEN in select query.
Check this out.
declare #t table(CompanyID int, empid int, emp_no varchar(50), location varchar(100))
insert into #t values (1,24,100543,'First.png'),(1,24,100543,'Second.png'),(1,25,100544,'Second.png'),(1,25,100544,'First.png')
select * from
(
select
ROW_NUMBER() over(partition by companyid, empid order by companyid, empid ) rowno, *
from
#t
) a where rowno = 2 --and empid = 24 --here you can give empid to get particular employee detail
In case you want to get multiple empid's second entry in single select statement.
declare #t table(CompanyID int, empid int, emp_no varchar(50), location varchar(100))
insert into #t values (1,24,100543,'First.png'),(1,24,100543,'Second.png'),(1,25,100544,'Second.png'),(1,25,100544,'First.png')
,(1,26,100545,'First.png')
;with cte as
(
select
*
from
(
select
ROW_NUMBER() over(partition by empid order by empid ) rowno, *
from
#t
) a
),
cte1 as (
select
*,
ROW_NUMBER() OVER(PARTITION BY empid ORDER BY rowno DESC) as RN
from cte
)
select * from cte1 where rn = 1
You can write as:
;WITH CTE as
(
SELECT ROW_NUMBER() OVER ( PARTITION BY CompanyID,EmpID,Emp_no ORDER BY (SELECT 1))
AS rownum,CompanyID,EmpID,Emp_no,Location
FROM (SELECT * FROM #Test ) AS T
),CTE1 as
(
SELECT MAX(rownum) AS maxrownum,
CompanyID,
EmpID,
Emp_no
FROM CTE
GROUP BY CompanyID,EmpID,Emp_no
)
SELECT T.CompanyID,T.EmpID,T.Emp_no,T.Location
FROM CTE T
JOIN CTE1 T1 ON T.CompanyID = T1.CompanyID
AND T.EmpID = T1.EmpID
AND T.Emp_no = T1.Emp_no
AND T.rownum = T1.maxrownum
Explanation:
As there's no column like primary key through which we can identify
which row comes first you can write SELECT 1 in partition window.
Once you get rownumber for each combination of CompanyID,EmpID and Emp_no you can use second CTE to get the maxrow for each
combination
Just collect the data from the table for all rows with maxrownumbers
Hope this helps:)

SQL Server - How to filter rows based on matching rows?

I have a complex query that feeds into a simple temp table named #tempTBRB.
select * from #tempTBRB ORDER BY AccountID yields this result set:
In all cases, when there is only 1 row for a given AccountID, the row should remain, no problem. But whenever there are 2 rows (there will never be more than 2), I want to keep the row with SDIStatus of 1, and filter out SDIStatus of 2.
Obviously if I used a simple where clause like "WHERE SDIStatus = 1", that wouldn't work, because it would filter out a lot of valid rows in which there is only 1 row for an AccountID, and the SDIStatus is 2.
Another way of saying it is that I want to filter out all rows with an SDIStatus of 2 ONLY WHEN there is another row for the same AccountID. And when there are 2 rows for the same AccountID, there will always be exactly 1 row with SDIStatus of 1 and 1 row with SDIStatus of 2.
I am using SQL Server 2012. How is it done?
SELECT
AccountID
,MIN(SDIStatus) AS MinSDIStatus
INTO #MinTable
FROM #tempTBRB
GROUP BY AccountID
SELECT *
FROM #tempTBRB T
JOIN #MinTable M ON
T.AccountID = M.AccountID
AND T.SDIStatus = M.MinSDIStatus
DROP TABLE #MinTable
Here is a little test that worked for me. If you just add the extra columns in your SELECT statements, all should be well:
CREATE TABLE #Temp ( ID int, AccountID int, Balance money, SDIStatus int )
INSERT INTO #Temp ( ID, AccountID, Balance, SDIStatus ) VALUES ( 1, 4100923, -31.41, 2 )
INSERT INTO #Temp ( ID, AccountID, Balance, SDIStatus ) VALUES ( 2, 4132170, 0, 2 )
INSERT INTO #Temp ( ID, AccountID, Balance, SDIStatus ) VALUES ( 3, 4137728, 193.10, 1 )
INSERT INTO #Temp ( ID, AccountID, Balance, SDIStatus ) VALUES ( 4, 4137728, 0, 2 )
SELECT ID, AccountID, Balance, SDIStatus
FROM
(
SELECT ID, AccountID, Balance, SDIStatus,
row_number() over (partition by AccountID order by SDIStatus desc) as rn
FROM #Temp
) x
WHERE x.rn = 1
DROP TABLE #Temp
Yields the following:
ID AccountID Balance SDIStatus
1 4100923 -31.41 2
2 4132170 0.00 2
4 4137728 0.00 2
I guess you need a similar code, make the necessary changes according to your table structure
declare #tab table (ID INT IDENTITY (1,1),AccountID int,SDISTATUS int)
insert into #tab values(4137728,1),(4137728,2),(41377,1),(41328,2)
select * from
(select *, row_number()OVER(Partition by AccountID Order by SDISTATUS ) RN from #tab) T
where t.RN=1
Or
WITH CTE AS
(select *, row_number()OVER(Partition by AccountID Order by SDISTATUS ) RN from #tab)
select * from CTE where t.RN=1

SQL query - strange behaviour

DECLARE #OrdersTemp TABLE
(
OrderId UNIQUEIDENTIFIER
)
INSERT INTO #OrdersTemp
SELECT ord.Id
FROM Orders
--all rows count
SELECT
#RowsCount = COUNT(DISTINCT ord.Id)
FROM Orders
--#RowsCount = 5. It's right!
--second table with paging
DECLARE #OrdersTempWithPaging TABLE
(
OrderId UNIQUEIDENTIFIER
)
INSERT INTO #OrdersTempWithPaging
SELECT OrderId
FROM (SELECT DISTINCT OrderId,
ROW_NUMBER() OVER (ORDER BY OrderId) AS RowNum
FROM #OrdersTemp) AS alias
WHERE
RowNum BETWEEN (#PageIndex - 1) * #PageSize + 1
AND #PageIndex * #PageSize
SELECT * FROM #OrdersTempWithPaging
--10 or more rows. It's wrong.
Why does #OrdersTempWithPaging return wrong amount of rows? How do I avoid it?
UPDATE:
The statement below returns 25 = 5*5 rows (instead of 5)
INSERT INTO #OrdersTempWithPaging
SELECT OrderId
FROM (
SELECT OrderId,
ROW_NUMBER() OVER (ORDER BY OrderId ) AS RowNum
FROM #OrdersTemp ) AS alias
--WHERE RowNum BETWEEN ( #PageIndex - 1 ) * #PageSize + 1
-- AND #PageIndex * #PageSize
SELECT * FROM #OrdersTempWithPaging
It's because your ordering inside the select,
SELECT DISTINCT OrderId,
ROW_NUMBER() OVER (ORDER BY OrderId ) AS RowNumber
You have to coose an ordering over a column where you don't have to use DISTINCT in the selection.
SELECT OrderId,
ROW_NUMBER() OVER (ORDER BY OrderId ) AS RowNumber
Try it, without DISTINCT
Try this (reversing the DISTINCT use):
INSERT INTO #OrdersTempWithPaging
SELECT DISTINCT OrderId
FROM (SELECT OrderId,
ROW_NUMBER() OVER (ORDER BY OrderId) AS RowNum
FROM #OrdersTemp) AS alias
WHERE
RowNum BETWEEN (#PageIndex - 1) * #PageSize + 1
AND #PageIndex * #PageSize
If you need only distinct order-ids, you could have:
INSERT INTO #OrdersTemp
SELECT DISTINCT ord.Id
FROM Orders
and then:
INSERT INTO #OrdersTempWithPaging
SELECT OrderId
FROM (SELECT OrderId,
ROW_NUMBER() OVER (ORDER BY OrderId) AS RowNum
FROM #OrdersTemp) AS alias
WHERE
RowNum BETWEEN (#PageIndex - 1) * #PageSize + 1
AND #PageIndex * #PageSize
Instead of
SELECT DISTINCT OrderId,
ROW_NUMBER() OVER (ORDER BY OrderId) AS RowNum
FROM #OrdersTemp
use
SELECT OrderId,
ROW_NUMBER() OVER (ORDER BY OrderId) AS RowNum
FROM #OrdersTemp
GROUP BY OrderId
This is an interesting case of another difference between SELECT DISTINCT and SELECT GROUP BY, which manifests itself when the select list includes a ranking function.
In the first query the output includes duplicate OrderId values from #OrdersTemp because the ranking function is evaluated before DISTINCT is applied . In contrast, the second query first groups the rows by OrderId (i.e. effectively selects distinct OrderId values first) and then applies ranking.