T-SQL using SUM for a running total - sql

I have a simple table with some dummy data setup like:
|id|user|value|
---------------
1 John 2
2 Ted 1
3 John 4
4 Ted 2
I can select a running total by executing the following sql(MSSQL 2008) statement:
SELECT a.id, a.user, a.value, SUM(b.value) AS total
FROM table a INNER JOIN table b
ON a.id >= b.id
AND a.user = b.user
GROUP BY a.id, a.user, a.value
ORDER BY a.id
This will give me results like:
|id|user|value|total|
---------------------
1 John 2 2
3 John 4 6
2 Ted 1 1
4 Ted 2 3
Now is it possible to only retrieve the most recent rows for each user? So the result would be:
|id|user|value|total|
---------------------
3 John 4 6
4 Ted 2 3
Am I going about this the right way? any suggestions or a new path to follow would be great!

No join is needed, you can speed up the query this way:
select id, [user], value, total
from
(
select id, [user], value,
row_number() over (partition by [user] order by id desc) rn,
sum(value) over (partition by [user]) total
from users
) a
where rn = 1

try this:
;with cte as
(SELECT a.id, a.[user], a.value, SUM(b.value) AS total
FROM users a INNER JOIN users b
ON a.id >= b.id
AND a.[user] = b.[user]
GROUP BY a.id, a.[user], a.value
),
cte1 as (select *,ROW_NUMBER() over (partition by [user]
order by total desc) as row_num
from cte)
select id,[user],value,total from cte1 where row_num=1
SQL Fiddle Demo

add where statement:
select * from
(
your select statement
) t
where t.id in (select max(id) from table group by user)
also you can use this query:
SELECT a.id, a.user, a.value,
(select max(b.value) from table b where b.user=a.user) AS total
FROM table a
where a.id in (select max(id) from table group by user)
ORDER BY a.id

Adding a right join would perform better than nested select.
Or even simpler:
SELECT MAX(id), [user], MAX(value), SUM(value)
FROM table
GROUP BY [user]

Compatible with SQL Server 2008 or later
DECLARE #AnotherTbl TABLE
(
id INT
, somedate DATE
, somevalue DECIMAL(18, 4)
, runningtotal DECIMAL(18, 4)
)
INSERT INTO #AnotherTbl
(
id
, somedate
, somevalue
, runningtotal
)
SELECT LEDGER_ID
, LL.LEDGER_DocDate
, LL.LEDGER_Amount
, NULL
FROM ACC_Ledger LL
ORDER BY LL.LEDGER_DocDate
DECLARE #RunningTotal DECIMAL(18, 4)
SET #RunningTotal = 0
UPDATE #AnotherTbl
SET #RunningTotal=runningtotal = #RunningTotal + somevalue
FROM #AnotherTbl
SELECT *
FROM #AnotherTbl

Related

Randomly join tables and return columns

I have these 3 tables:
CREATE TABLE DimEmployee(EmployeeID INT)
CREATE TABLE DimDepartment(DepartmentID INT)
CREATE TABLE DimDocteur(PositionID INT)
INSERT INTO DimEmployee(EmployeeID) VALUES (1),(2),(3)
INSERT INTO DimDepartment(DepartmentID) VALUES (1),(5),(6)
INSERT INTO DimPosition(PositionID) VALUES (7),(8),(9)
I want to randomly join the 3 tables and get output like below : (example)
First execute:
EmployeeID DepartmentID PositionID RandomDate
1 4 7 2020-07-24 00:00:00.000
2 5 9 2020-11-25 00:00:00.000
Second execute:
EmployeeID DepartmentID PositionID RandomDate
1 4 7 2020-05-04 00:00:00.000
2 5 9 2020-10-30 00:00:00.000
If you want a random join :
SELECT DP.EmployeeID, Q.Department INTO #T1
FROM DimEmployee AS DP
CROSS APPLY (SELECT TOP 1 Dd.DepartmentID FROM DimDepartment AS DD
ORDER BY NEWID() ) AS Q
SELECT *
INTO #T2
FROM #T1 AS T
CROSS APPLY (SELECT TOP 1 DP.PositionID FROM DimPosition AS DP
ORDER BY NEWID() ) AS Q
Or if you want all possibilities :
SELECT
a.EmployeeID, b.DepartmentID, c.PositionID
FROM
DimEmployee AS a
CROSS JOIN
DimDepartment AS b
CROSS JOIN
DimPosition AS c
You need to row-number each table and join on row-number:
CREATE TABLE DimEmployee(EmployeeID INT)
CREATE TABLE DimDepartment(DepartmentID INT)
CREATE TABLE DimDocteur(PositionID INT)
SELECT
emp.EmployeeID,
dep.DepartmentID,
doc.PositionID,
DATEADD(day, (ABS(CHECKSUM(NEWID())) % 65530), 0) RandomDate
FROM (
SELECT *, ROW_NUMBER() OVER(ORDER BY (SELECT 1)) rn
FROM DimEmployee
) emp
JOIN (
SELECT *, ROW_NUMBER() OVER(ORDER BY (SELECT 1)) rn
FROM DimDepartment
) dep ON dep.rn = emp.rn
JOIN (
SELECT *, ROW_NUMBER() OVER(ORDER BY (SELECT 1)) rn
FROM DimDocteur
) doc ON doc.rn = emp.rn
You can also change the ORDER BY to ORDER BY NEWID() to get a more random ordering.

Query to get the list of Addresses that have AddressType one else AddressType two?

Consider the folowing table
Id PersonId Address AddressTypeId
--------------------------------------------------------------------
1 1 AI1P1T1 1
2 1 AI2P1T2 2
3 2 AI3P2T2 2
I want to write a query to print the list of Addresses of Persons who have AddressType =1 or AddressTypeId=2 and
When person has AddressType =1 then select it,
else select person with AddressType =2
Expected result:
Address
--------------
AI1P1T1
AI3P2T2
Good day,
Please check if this solve your needs:
/***************************** DDL+DML */
drop table if exists T;
create table T(Id int,PersonId int, [Address] nvarchar(10), AddressTypeId int)
INSERT T(Id,PersonId, [Address], AddressTypeId)
values
(1,1,'AI1P1T1',1),
(2,1,'AI2P1T2',2),
(3,2,'AI3P2T2',2)
GO
select * from T
GO
/***************************** Solution */
With MyCTE as (
select *, ROW_NUMBER() OVER (partition by PersonId order by AddressTypeId) as RN
from T
)
select [Address]
from MyCTE
where
AddressTypeId in (1,2) -- if there can be only positive numbers then you can use "< 3"
and RN = 1
GO
You can try this also using joins:
select t1.PersonId,t1.Address from #T t1
inner join (select personid,min(AddressTypeId)atype from #T
group by PersonId )x
on x.atype=t1.AddressTypeId and x.PersonId=t1.PersonId
I would write a subquery to make ROW_NUMBER by window function, then use MAX in the main query.
SELECT
PersonId, MAX(Address) Address
FROM
(SELECT
PersonId,
(CASE
WHEN ROW_NUMBER() OVER (PARTITION BY PersonId ORDER BY PersonId) = 1
THEN Address
END) Address
FROM
T
WHERE
AddressTypeId IN (1,2)
) t1
GROUP BY
PersonId
sqlfiddle
[Results]:
| PersonId | Address |
+----------+---------+
| 1 | AI1P1T1 |
| 2 | AI3P2T2 |
Here's the top 1 with ties trick:
select top 1 * with ties
from yourtable
order by row_number() over (partition by PersonId order by AddressTypeId)
This will also work for versions <2012, and can return every field
You could use an union between the result for the result for only 1, only 2 and 1 when 1 and 2
select Address
from my_table m
Inner join (
select PersonId , count(distinct distinct AddressTypeId)
from my_table
where AddressTypeId in (1, 2)
group by PersonId
having count(distinct AddressTypeId) = 2
) t on t.personId = m.personId andm.AddressTypeId = 1
UNION
select Address
from my_table m
Inner join (
select PersonId , count(distinct distinct AddressTypeId)
from my_table
where AddressTypeId in ( 2)
group by PersonId
having count(distinct AddressTypeId) = 1
) t on t.personId = m.personId andm.AddressTypeId = 2
UNION
select Address
from my_table m
Inner join (
select PersonId , count(distinct distinct AddressTypeId)
from my_table
where AddressTypeId in ( 1)
group by PersonId
having count(distinct AddressTypeId) = 1
) t on t.personId = m.personId andm.AddressTypeId = 1
Try this one
select personId, last_value(Address) over(partition by personId order by AddressTypeId) as Address
from table
--use the where statement optionally
--where AddressTypeId in (1,2);

Microsoft SQL server to select Top N group

There are a lot of answers about how to select n rows from each group.
But what I am looking for is to select every row from top N group, for example I have the data below:
id group
1 a
2 a
3 b
4 c
5 c
6 d
7 d
.......
If I want to select Top 3 Group, my intended results as below:
1 a
2 a
3 b
4 c
5 c
How can I achieve this with Microsoft SQL server 2008?
One option is to use a subquery which selects the top N groups:
SELECT t1.id, t1.group
FROM yourTable t1
INNER JOIN
(
SELECT DISTINCT TOP(N) group
FROM yourTable
ORDER BY group
) t2
ON t1.group = t2.group
You could rank your rows by the group and then take only the top three:
SELECT [id], [group]
FROM (SELECT [id], [group], RANK() OVER (ORDER BY [group] ASC) rk
FROM mytable) t
WHERE rk <= 3
#Tim: I just modified your query.
SELECT t1.id, t1.group
FROM yourTable t1
INNER JOIN
(
SELECT TOP N group
FROM yourTable
GROUP BY group
--ORDER BY group USE IT IF YOU WANT
) t2
ON t1.group = t2.group

How to filter out the first and last entry from a table using RANK?

I've this data:
Id Date Value
'a' 2000 55
'a' 2001 3
'a' 2012 2
'a' 2014 5
'b' 1999 10
'b' 2014 110
'b' 2015 8
'c' 2011 4
'c' 2012 33
I want to filter out the first and the last value (when the table is sorted on the Date column), and only keep the other values. In case there are only two entries, nothing is returned. (Example for Id = 'c')
ID Date Value
'a' 2001 3
'a' 2012 2
'b' 2014 110
I tried to use order by (RANK() OVER (PARTITION BY [Id] ORDER BY Date ...)) in combination with this article (http://blog.sqlauthority.com/2008/03/02/sql-server-how-to-retrieve-top-and-bottom-rows-together-using-t-sql/) but I can't get it to work.
[UPDATE]
All the 3 answers seem fine. But I'm not a SQL expert, so my question is which one has the fastest performance if the table has around 800000 rows and there a no indexes on any column.
You can use row_number twice to determine the min and max dates and then filter accordingly:
with cte as (
select id, [date], value,
row_number() over (partition by id order by [date]) minrn,
row_number() over (partition by id order by [date] desc) maxrn
from data
)
select id, [date], value
from cte
where minrn != 1 and maxrn != 1
SQL Fiddle Demo
Here's another approach using min and max for this without needing to use a ranking function:
with cte as (
select id, min([date]) mindate, max([date]) maxdate
from data
group by id
)
select *
from data d
where not exists (
select 1
from cte c
where d.id = c.id and d.[date] in (c.mindate, c.maxdate))
More Fiddle
Here is a similar solution with row_number and count :
SELECT id,
dat,
value
FROM (SELECT *,
ROW_NUMBER()
OVER(
partition BY id
ORDER BY dat) rnk,
COUNT(*)
OVER (
partition BY id) cnt
FROM #table) t
WHERE rnk NOT IN( 1, cnt )
You can do this with EXISTS:
SELECT *
FROM Table1 a
WHERE EXISTS (SELECT 1
FROM Table1 b
WHERE a.ID = b.ID
AND b.Date < a.Date
)
AND EXISTS (SELECT 1
FROM Table1 b
WHERE a.ID = b.ID
AND b.Date > a.Date
)
Demo: SQL Fiddle

left join without duplicate values using MIN()

I have a table_1:
id custno
1 1
2 2
3 3
and a table_2:
id custno qty descr
1 1 10 a
2 1 7 b
3 2 4 c
4 3 7 d
5 1 5 e
6 1 5 f
When I run this query to show the minimum order quantities from every customer:
SELECT DISTINCT table_1.custno,table_2.qty,table_2.descr
FROM table_1
LEFT OUTER JOIN table_2
ON table_1.custno = table_2.custno AND qty = (SELECT MIN(qty) FROM table_2
WHERE table_2.custno = table_1.custno )
Then I get this result:
custno qty descr
1 5 e
1 5 f
2 4 c
3 7 d
Customer 1 appears twice each time with the same minimum qty (& a different description) but I only want to see customer 1 appear once. I don't care if that is the record with 'e' as a description or 'f' as a description.
First of all... I'm not sure why you need to include table_1 in the queries to begin with:
select custno, min(qty) as min_qty
from table_2
group by custno;
But just in case there is other information that you need that wasn't included in the question:
select table_1.custno, ifnull(min(qty),0) as min_qty
from table_1
left outer join table_2
on table_1.custno = table_2.custno
group by table_1.custno;
"Generic" SQL way:
SELECT table_1.custno,table_2.qty,table_2.descr
FROM table_1, table_2
WHERE table_2.id = (SELECT TOP 1 id
FROM table_2
WHERE custno = table_1.custno
ORDER BY qty )
SQL 2008 way (probably faster):
SELECT custno, qty, descr
FROM
(SELECT
custno,
qty,
descr,
ROW_NUMBER() OVER (PARTITION BY custno ORDER BY qty) RowNum
FROM table_2
) A
WHERE RowNum = 1
If you use SQL-Server you could use ROW_NUMBER and a CTE:
WITH CTE AS
(
SELECT table_1.custno,table_2.qty,table_2.descr,
RN = ROW_NUMBER() OVER ( PARTITION BY table_1.custno
Order By table_2.qty ASC)
FROM table_1
LEFT OUTER JOIN table_2
ON table_1.custno = table_2.custno
)
SELECT custno, qty,descr
FROM CTE
WHERE RN = 1
Demolink