Multiple SQL MAX when items are not in order - sql

I have some data as below:
DECLARE #MyTable AS TABLE
(productName varchar(13), test1 int,test2 int)
INSERT INTO #MyTable
(productName, test1,test2)
VALUES
('a', 1,1),
('a', 2,2),
('a', 3,3),
('b', 1,4),
('b', 2,5),
('b', 3,6),
('a', 1,7),
('a', 4,8),
('a', 5,9)
;
SELECT productname,MAX(test1) from #MyTable group BY productname
a MAX query on test1 column gives
a,5
b,3
but I need to have result as
a,3
b,3
a,5
when I have order by test2

You can solve this by using a trick with row_numbers, so that you assign 2 different row numbers, one for the whole data and one that is partitioned by productname. If you compare the difference between these numbers, you can figure out when product name has changed, and use that to determine the max values for each group.
select productname, max(test1) from (
SELECT *,
row_number() over (order by test2 asc) -
row_number() over (partition by productname order by test2 asc) as GRP
from #MyTable
) X
group by productname, GRP
You can test this in SQL Fiddle
If the test2 column is always a row number without gaps, you can use that too instead of the first row number column. If you need ordering in the data, you'll have to for example to use the max of test1 to do that.

Please check the following SQL Select statement
DECLARE #MyTable AS TABLE (productName varchar(13), test1 int,test2 int)
INSERT INTO #MyTable
(productName, test1,test2)
VALUES
('a', 1,1),
('a', 2,2),
('a', 3,3),
('b', 1,4),
('b', 2,5),
('b', 3,6),
('a', 1,7),
('a', 4,8),
('a', 5,9)
DECLARE #MyTableNew AS TABLE (id int identity(1,1), productName varchar(13), test1 int,test2 int)
insert into #MyTableNew select * from #MyTable
--select * from #MyTableNew
;with cte as (
SELECT
id, productName, test1, test2,
case when (lag(productName,1,'') over (order by id)) = productName then 0 else 1 end ischange
from #MyTableNew
), cte2 as (
select t.*,(select sum(ischange) from cte where id <= t.id) grp from cte t
)
select distinct grp, productName, max(test1) over (partition by grp) from cte2
This is implemented according to the following SQL Server Lag() function tutorial
The Lag() function is used to identify and order the groups in table data

Please try this query
DECLARE #MyTable AS TABLE
(productName varchar(13), test1 int,test2 int)
INSERT INTO #MyTable
(productName, test1,test2)
VALUES
('a', 1,1),
('a', 2,2),
('a', 3,3),
('b', 1,4),
('b', 2,5),
('b', 3,6),
('a', 1,7),
('a', 4,8),
('a', 5,9)
;
SELECT productname,MAX(test1)
from #MyTable
where test1 = test2
group BY productname
union all
SELECT productname,MAX(test1)
from #MyTable
where test1 != test2
group BY productname

Related

Update same data from the same table depending on a column

I have a similar case of a table, Like the case of this link Update same data from the same table
but in my case it must update depending on the column "dependency". In other words, it updates the repetitions in the tables always leaving the most recent line and for table that only have one line it does not update. My data is like this:
I want it to be updated like this:
I tryed this code:
create table dbo.test( id int, CAR varchar(30), ACTIVE int, dependency int)
insert into dbo.test(id, CAR, ACTIVE, dependency)
values
(1, 'AAA-25-35', 0,1),
(2, 'LDB-25-35', 0,2),
(3, 'LDB-00-35', 0,2),
(4, 'LDB-25-35', 0,2),
(5, 'LDB-00-35', 0,2),
(6, 'LDC-10-10', 0,2),
(7, 'LDC-10-10', 0,2),
(8, 'LDB-00-35', 0,2),
(9, 'AAA-25-35', 0,1),
(10, 'AAA-25-35', 0,3),
(11, 'AAA-25-35', 0,3),
(12, 'BBB-25-35', 0,2),
(13, 'BBB-25-35', 0,3),
(14, 'BBB-25-35', 0,3)
GO
SELECT * FROM TEST
WITH CTE AS
(
SELECT ROW_NUMBER() OVER(PARTITION BY CAR ORDER BY ID) AS t,
CAR,
ACTIVE
FROM Test
)
UPDATE CTE
SET ACTIVE = 1
WHERE t=1
AND EXISTS (SELECT 1 FROM CTE c WHERE c.CAR = CTE.CAR GROUP BY CAR HAVING COUNT(*) > 1)
go
SELECT * FROM test
Try changing the SELECT and WHERE clauses:
WITH CTE AS (
SELECT ROW_NUMBER() OVER(PARTITION BY CAR, dependency ORDER BY ID) AS t,
LEAD(id) OVER (PARTITION BY CAR, dependency ORDER BY ID) as next_id,
CAR,
ACTIVE
FROM Test
)
UPDATE CTE
SET ACTIVE = 1
WHERE t = 1 AND next_id IS NOT NULL

SQL Server : create a column based on the first occurrence of a value in another column

Consider the following table in SQL Server:
I want to write a SQL query to generate the column Indicator. This column should be set to 1 on the first occurrence of Flag = 1 for each category.
For instance, for Category A, column Flag is set to 1 for the dates 1/3/2019, 1/4/2019, 1/5/2019, 1/6/2019. Since 1/3/2019 is the earliest date when Flag was set to 1, the Indicator column for that record should also be set to 1.
What SQL Server query should I write for this?
PS: The figure already shows the desired output for the Indicator column.
Below is the code to generate the table in SQL Server:
CREATE TABLE myTable
(
Category CHAR(1),
Date DATE,
Flag INT
)
INSERT INTO myTable (Category, Date, Flag)
VALUES ('A', '2019-01-01', 0), ('A', '2019-02-01', 0),
('A', '2019-03-01', 1), ('A', '2019-04-01', 1),
('A', '2019-05-01', 1), ('A', '2019-06-01', 1),
('B', '2019-01-01', 0), ('B', '2019-02-01', 0),
('B', '2019-03-01', 0), ('B', '2019-04-01', 0),
('B', '2019-05-01', 1), ('B', '2019-06-01', 1),
('C', '2019-01-01', 0), ('C', '2019-02-01', 0),
('C', '2019-03-01', 0), ('C', '2019-04-01', 1),
('C', '2019-05-01', 1), ('C', '2019-06-01', 1),
('C', '2019-07-01', 1)
One way using a derived table and MIN() to figure out which is the first date for a category that has the flag. Join that back to the original table.
DEMO
SELECT
yt.*
, ISNULL(b.Indicator, 0) AS Indicator
FROM YourTable yt
LEFT JOIN
(SELECT category, MIN(date) AS date, 1 AS Indicator
FROM dbo.YourTable
WHERE Flag = 1
GROUP BY Category) b ON b.Category = yt.Category AND b.date = yt.date
I am thinking of using the min() function as a window function:
select t.*,
(case then t.flag = 1 and
t.date = min(t.date) over (partition by t.category, t.flag)
then 1 else 0
end) as indicator
from myTable t
order by t.Category, t.date
Another way
DEMO
CREATE TABLE myTable
(
Category char(1),
Date date,
Flag int
)
INSERT INTO myTable (Category, Date, Flag) VALUES
('A','2019-01-01',0),
('A','2019-02-01',0),
('A','2019-03-01',1),
('A','2019-04-01',1),
('A','2019-05-01',1),
('A','2019-06-01',1),
('B','2019-01-01',0),
('B','2019-02-01',0),
('B','2019-03-01',0),
('B','2019-04-01',0),
('B','2019-05-01',1),
('B','2019-06-01',1),
('C','2019-01-01',0),
('C','2019-02-01',0),
('C','2019-03-01',0),
('C','2019-04-01',1),
('C','2019-05-01',1),
('C','2019-06-01',1),
('C','2019-07-01',1);
select t.* ,
CASE WHEN T.FLAG=1 AND FIRST_VALUE(T.DATE) OVER (PARTITION BY T.Category ORDER BY t.FLAG desc, t.Date asc)=T.DATE THEN 1
ELSE 0 END Indicator
from myTable t
order by t.Category, t.date

Elegant way to aggregate and compute percentages in sql?

I have a table of user, product, count that tells what the user purchased and how many times ("count").
I would like to know what is the "average basket" of a user, that is for each product what percentage it represents for the user.
e.g
user1,fruits,4
user1,water,2
user2,fruits,3
user2,food,9
so I would get
user1,fruits,0.6666 // = 4 / 4+2
user1,water,0.3333 // = 2 / 4+2
user2,fruits,0.25 // = 3 / 3+9
user2,food,0.75 // = 9 / 3+9
and later
fruits,0.45 // = 0.666+0.25 / 2
water,0.16 // = 0.33/2
food,0.38 // = 0.75/2
I have used
select t1.user as user, t1.product as product, max(t1.c) / max(t2.c) as ratio
from (
select user, product, count(*) as c
from table
group by user, product
) t1
join (
select user, count(*) as c
from table
group by user
) t2
on t1.user=t2.user
group by user, product
to get the first table, and then a select product, avg(ratio) ... group by product on that table.
Things work but I wonder if there was a more efficient / better way to do it?
I always use window functions to calculate percentages:
Reference: http://www.mysqltutorial.org/mysql-window-functions/
Example: http://sqlfiddle.com/#!17/66373/6
SELECT
user,
product,
c,
sum(c) over(partition by usr) sc,
c / sum(c) over(partition by usr) per
FROM (
SELECT usr, product, count(*) c
FROM tablex
GROUP BY usr, product
) t
CREATE TABLE tablex (
usr varchar(32),
product varchar(32)
);
INSERT INTO tablex VALUES ('a', 'x');
INSERT INTO tablex VALUES ('a', 'y');
INSERT INTO tablex VALUES ('a', 'y');
INSERT INTO tablex VALUES ('a', 'y');
INSERT INTO tablex VALUES ('a', 'z');
INSERT INTO tablex VALUES ('a', 'z');
INSERT INTO tablex VALUES ('a', 'z');
INSERT INTO tablex VALUES ('a', 'z');
INSERT INTO tablex VALUES ('a', 'z');
INSERT INTO tablex VALUES ('b', 'x');
INSERT INTO tablex VALUES ('b', 'x');
INSERT INTO tablex VALUES ('b', 'x');
INSERT INTO tablex VALUES ('b', 'y');
INSERT INTO tablex VALUES ('b', 'y');
INSERT INTO tablex VALUES ('b', 'y');
INSERT INTO tablex VALUES ('b', 'y');
INSERT INTO tablex VALUES ('b', 'y');
INSERT INTO tablex VALUES ('b', 'y');
INSERT INTO tablex VALUES ('b', 'z');
INSERT INTO tablex VALUES ('b', 'z');
INSERT INTO tablex VALUES ('b', 'z');
INSERT INTO tablex VALUES ('b', 'z');
INSERT INTO tablex VALUES ('b', 'z');
I would write this as:
select user, product, count(*) as c,
count(*) * 1.0 / sum(count(*)) over (partition by user) as ratio
from table
group by user, product;
You can use this code and check the execution plan, I am sure the performance has been improved.
select user, product, CAST(count(*) AS decimal(18,4)) / (select count(*)
from table t2 where t2.user = t1.user)
from table t1
group by user, product

Remove duplicates by multiple column criteria

I have following table
CREATE TABLE Test (
ID INT NOT NULL IDENTITY(1,1) PRIMARY KEY,
FIRST VARCHAR(10) NOT NULL,
SECOND VARCHAR(10) NOT NULL
)
Table filled with some duplicate data. TestTarget table have same structure and it filled using following procedural algorithm:
DECLARE #first varchar(10), #second varchar(10)
DECLARE c CURSOR FAST_FORWARD
FOR
SELECT first, second FROM Test ORDER BY id
OPEN c
FETCH NEXT FROM c INTO #first, #second
WHILE ##fetch_status = 0
BEGIN
IF NOT EXISTS(SELECT 1 FROM TestTarget WHERE first=#first OR second=#second)
INSERT INTO TestTarget (first, second) VALUES(#first, #second)
FETCH NEXT FROM c INTO #first, #second
END
CLOSE c
DEALLOCATE c
Briefly here we checking target table before insert if it already contains such 'first' OR 'second' value.
Example:
Source table
ID FIRST SECOND
1 A 2
2 A 1
3 A 3
4 B 2
5 B 1
6 B 3
7 B 2
8 B 4
9 C 2
10 C 3
INSERT INTO Test (first, second)
VALUES ('A', '2'),
('A', '1'),
('A', '3'),
('B', '2'),
('B', '1'),
('B', '3'),
('B', '2'),
('B', '4'),
('C', '2'),
('C', '3')
Target table
ID FIRST SECOND
1 A 2
5 B 1
10 C 3
Real source table have x*100k rows and at least 2 rows for same 'first' or 'second' column.
I'm looking for set based solution if it ever possible or please at least something faster than such loop because it takes hours for my real case.
NOTE Classic duplicate removals via partition/join/etc. is not the case here because it will produce different results even with different final number of rows.
INSERT INTO TestTarget (first, second)
SELECT first,second
FROM Test t
WHERE NOT EXISTS
(
SELECT 1
FROM Test t2
WHERE t2.id>t.id and (t2.first=t.first or t2.second=t.second)
)
I cannot think of any simple set based solution to your problem, I am afraid, but I would hope that something along the following lines would be much faster than your existing cursor:
declare #test table
(id int,
first varchar(1),
second varchar(1))
declare #target table
(id int,
first varchar(1),
second varchar(1))
declare #temp table
(id int,
first varchar(1),
second varchar(1))
INSERT INTO #Test (id, first, second)
VALUES (1, 'A', '2'),
(2, 'A', '1'),
(3, 'A', '3'),
(4, 'B', '2'),
(5, 'B', '1'),
(6, 'B', '3'),
(7, 'B', '2'),
(8, 'B', '4'),
(9, 'C', '2'),
(10, 'C', '3')
declare #firsts table
(first varchar(1))
declare #seconds table
(second varchar(1))
INSERT INTO #firsts
SELECT DISTINCT first FROM #test
INSERT INTO #seconds
SELECT DISTINCT second FROM #test
declare #firstcnt int = (SELECT count(*) FROM #firsts)
declare #secondcnt int = (SELECT count(*) FROM #firsts)
WHILE (#firstcnt > 0 AND #secondcnt > 0)
BEGIN
DELETE FROM #temp
INSERT INTO #temp
SELECT TOP 1 t.id, t.first, t.second FROM #test t
INNER JOIN #firsts f On t.first = f.first
INNER JOIN #seconds s On t.second = s.second
ORDER BY id
INSERT INTO #target
SELECT * FROM #temp
DELETE FROM #firsts WHERE first = (SELECT first FROM #temp)
SET #firstcnt = #firstcnt - 1
DELETE FROM #seconds WHERE second = (SELECT second FROM #temp)
SET #secondcnt = #secondcnt - 1
END
SELECT * FROM #target
This does produce the desired values and I would expect it to be faster because the while loop only needs to run for the total number of unique value pairs, rather than having to step through the entire table.
It also gives 10 C 3 as the last row, which I take to be correct, despite #Gordon's comment. If I understand the question correctly, the ID order takes precedence: that is to say, although 'A' and 'B' have entries with '3' as the second value, these entries have a greater id, than another second value that can legitimately be inserted.
HTH
using Recursive CTE,
declare #Target table(col1 varchar(20),col2 int)
declare #Test table(col1 varchar(20),col2 int)
INSERT INTO #Test (col1, col2
VALUES ('A', '2')
('A', '1')
('A', '3'),
('B', '1')
('B', '2'),
('B', '3'),
('B', '2'),
('B', '4'),
('C', '2'),
('C', '3')
 
;With CTE as
(
select col1 ,col2
,DENSE_RANK()over( ORDER by col1)rn1
from #Test
)
,cte1 AS(
select top 1 c.col1,c.col2,rn1 from cte c where rn1=1
union ALL
select c.col1,c.col2,c.rn1 from cte c
inner join cte1 c1
on c.rn1>c1.rn
where c.col2!=c1.col2
)
insert into #Target
select col1,col2 FROM(
select *,ROW_NUMBER()over(partition by col1 order by (select null)) rn2 from cte1
)t4
where rn2=1
select * from #Target

How to use DateDiff into only one SELECT statement?

I want to make a short version on my DATEDIFF function on my SQL Query. In my code, I created two temporary tables then there, I select and use the DATEDIFF funtion.
I would want this code to be simplified and only use ONE SELECT statement that will provide the same results. Is it possible?
Here is my result:
This is my SQL Query
DECLARE #Temp TABLE (ID int, Stamp datetime)
INSERT INTO #Temp (ID, Stamp) VALUES (1, '2016-08-17')
INSERT INTO #Temp (ID, Stamp) VALUES (1, GETDATE())
INSERT INTO #Temp (ID, Stamp) VALUES (1, GETDATE()+0.5)
INSERT INTO #Temp (ID, Stamp) VALUES (2, '2016-08-16')
INSERT INTO #Temp (ID, Stamp) VALUES (2, GETDATE())
INSERT INTO #Temp (ID, Stamp) VALUES (2, GETDATE()+3)
SELECT ROW_NUMBER() OVER (ORDER BY ID) as c, ID, Stamp INTO #Temp2
FROM #Temp
SELECT ROW_NUMBER() OVER (ORDER BY ID) as d, ID, Stamp INTO #Temp3
FROM #Temp
SELECT temp2.ID, temp2.Stamp, ISNULL(DATEDIFF(day, temp3.Stamp, temp2.Stamp),0) as DateDiff
FROM #Temp2 as temp2
LEFT JOIN #Temp3 as temp3 on temp2.ID = temp3.ID and temp2.c = temp3.d + 1
Thanks!
If you are using SQL Server 2012:
select * ,isnull(datediff(day,lag(stamp) over(partition by id order by stamp),stamp) ,0)
from #temp t1
Else use this..
;with cte
as
(select * ,row_number() over (partition by id order by stamp ) as rownum
from #temp t1
)
select c1.id,c1.stamp,isnull(datediff(day,c2.stamp,c1.stamp),0) as datee
from cte c1
left join
cte c2
on c1.id=c2.id and c1.rownum=c2.rownum+1
You could remove insert into the temp-tables and use subselects within the final query:
DECLARE #Temp TABLE (ID int, Stamp datetime)
INSERT INTO #Temp (ID, Stamp) VALUES (1, '2016-08-17')
INSERT INTO #Temp (ID, Stamp) VALUES (1, GETDATE())
INSERT INTO #Temp (ID, Stamp) VALUES (1, GETDATE()+0.5)
INSERT INTO #Temp (ID, Stamp) VALUES (2, '2016-08-16')
INSERT INTO #Temp (ID, Stamp) VALUES (2, GETDATE())
INSERT INTO #Temp (ID, Stamp) VALUES (2, GETDATE()+3)
SELECT temp2.ID, temp2.Stamp, ISNULL(DATEDIFF(day, temp3.Stamp, temp2.Stamp),0) as DateDiff
FROM (SELECT ROW_NUMBER() OVER (ORDER BY ID) as c, ID, Stamp FROM #Temp) as temp2
LEFT JOIN (SELECT ROW_NUMBER() OVER (ORDER BY ID) as d, ID, Stamp FROM #Temp) as temp3
on temp2.ID = temp3.ID and temp2.c = temp3.d + 1
In SQL Server 2012+, you would just use lag():
select t.*
isnull(datediff(day, lag(stamp) over (partition by id order by stamp), stamp), 0)
from #temp t;
In earlier versions, I would use outer apply:
select t.*,
isnull(datediff(day, t2.stamp, t.stamp), 0)
from #temp t outer apply
(select top 1 t2.*
from #temp t2
where t2.id = t.id and t2.stamp < t.stamp
order by t2.stamp desc
) t2;
try a cte,
DECLARE #Temp TABLE (ID int, Stamp datetime)
INSERT INTO #Temp (ID, Stamp) VALUES (1, '2016-08-17')
INSERT INTO #Temp (ID, Stamp) VALUES (1, GETDATE())
INSERT INTO #Temp (ID, Stamp) VALUES (1, GETDATE()+0.5)
INSERT INTO #Temp (ID, Stamp) VALUES (2, '2016-08-16')
INSERT INTO #Temp (ID, Stamp) VALUES (2, GETDATE())
INSERT INTO #Temp (ID, Stamp) VALUES (2, GETDATE()+3)
;WITH CTE AS
(
SELECT ROW_NUMBER() OVER (ORDER BY ID) as RowNo, ID, Stamp
FROM #Temp
)
SELECT temp2.ID, temp2.Stamp, ISNULL(DATEDIFF(day, temp3.Stamp, temp2.Stamp),0) as DateDiff
FROM CTE as temp2
LEFT JOIN CTE as temp3 on temp2.ID = temp3.ID
AND temp2.RowNo = temp3.RowNo + 1