Update Query with Condition in SQL - sql

I have a table that has 2 columns and i am trying to update another table based on these criteria:
Set the flag to 'Good' for the most duplicate keys in the Main_Key column for the same GROUP_KEY (Note we can have different Main_Keys for any GROUP_KEY)
Set the flag to 'Bad' for the least duplicate keys in the Main_Key column for the same GROUP_KEY
Set the flag to 'Don't Use' if the different Main_Keys are equal for the same GROUP_KEY
HERE IS MY TABLE
GROUP_KEY MAIN_KEY
22 4
22 4
22 55
22 55
22 55
22 55
10 10
10 10
18 87
18 22
18 22
HERE IS THE DESIRED RESULT AFTER THE UPDATE
GROUP_KEY MAIN_KEY FLAG
22 4 Bad
22 4 bad
22 55 Good
22 55 Good
22 55 Good
22 55 Good
10 10 Don't Use
10 10 Don't Use
18 87 Bad
18 22 Good
18 22 Good
I only know how to do just normal update query but not where even to start this logic. thnx for the help

Use:
declare #t table(GROUP_KEY int, MAIN_KEY int)
insert #t values
(22, 4),
(22, 4),
(22, 55),
(22, 55),
(22, 55),
(22, 55),
(10, 10),
(10, 10),
(18, 87),
(18, 22),
(18, 22)
select t.*, b.flag
from #t t
join
(
select a.GROUP_KEY, a.MAIN_KEY
,
case
when a.GROUP_KEY = a.MAIN_KEY
then 'Don''t Use'
when a.count = MAX(a.count) over(partition by a.GROUP_KEY)
then 'Good'
else 'Bad'
end [flag]
from
(
select t.GROUP_KEY, t.MAIN_KEY, COUNT(*) [count]
from #t t
group by t.GROUP_KEY, t.MAIN_KEY
)a
)b
on b.GROUP_KEY = t.GROUP_KEY and b.MAIN_KEY = t.MAIN_KEY
Output:
GROUP_KEY MAIN_KEY flag
----------- ----------- ---------
10 10 Don't Use
10 10 Don't Use
18 22 Good
18 22 Good
18 87 Bad
22 4 Bad
22 4 Bad
22 55 Good
22 55 Good
22 55 Good
22 55 Good
Update:
Assuming you have flag column in your table:
update #t
set flag = b.flag
from #t t
join
(
select a.GROUP_KEY, a.MAIN_KEY
,
case
when a.GROUP_KEY = a.MAIN_KEY
then 'Don''t Use'
when a.count = MAX(a.count) over(partition by a.GROUP_KEY)
then 'Good'
else 'Bad'
end [flag]
from
(
select t.GROUP_KEY, t.MAIN_KEY, COUNT(*) [count]
from #t t
group by t.GROUP_KEY, t.MAIN_KEY
)a
)b
on b.GROUP_KEY = t.GROUP_KEY and b.MAIN_KEY = t.MAIN_KEY

Related

Break up running sum into maximum group size / length

I am trying to break up a running (ordered) sum into groups of a max value. When I implement the following example logic...
IF OBJECT_ID(N'tempdb..#t') IS NOT NULL DROP TABLE #t
SELECT TOP (ABS(CHECKSUM(NewId())) % 1000) ROW_NUMBER() OVER (ORDER BY name) AS ID,
LEFT(CAST(NEWID() AS NVARCHAR(100)),ABS(CHECKSUM(NewId())) % 30) AS Description
INTO #t
FROM sys.objects
DECLARE #maxGroupSize INT
SET #maxGroupSize = 100
;WITH t AS (
SELECT
*,
LEN(Description) AS DescriptionLength,
SUM(LEN(Description)) OVER (/*PARTITION BY N/A */ ORDER BY ID) AS [RunningLength],
SUM(LEN(Description)) OVER (/*PARTITION BY N/A */ ORDER BY ID)/#maxGroupSize AS GroupID
FROM #t
)
SELECT *, SUM(DescriptionLength) OVER (PARTITION BY GroupID) AS SumOfGroup
FROM t
ORDER BY GroupID, ID
I am getting groups that are larger than the maximum group size (length) of 100.
A recusive common table expression (rcte) would be one way to resolve this.
Sample data
Limited set of fixed sample data.
create table data
(
id int,
description nvarchar(20)
);
insert into data (id, description) values
( 1, 'qmlsdkjfqmsldk'),
( 2, 'mldskjf'),
( 3, 'qmsdlfkqjsdm'),
( 4, 'fmqlsdkfq'),
( 5, 'qdsfqsdfqq'),
( 6, 'mds'),
( 7, 'qmsldfkqsjdmfqlkj'),
( 8, 'qdmsl'),
( 9, 'mqlskfjqmlkd'),
(10, 'qsdqfdddffd');
Solution
For every recursion step evaluate (r.group_running_length + len(d.description) <= #group_max_length) if the previous group must be extended or a new group must be started in a case expression.
Set group target size to 40 to better fit the sample data.
declare #group_max_length int = 40;
with rcte as
(
select d.id,
d.description,
len(d.description) as description_length,
len(d.description) as running_length,
1 as group_id,
len(d.description) as group_running_length
from data d
where d.id = 1
union all
select d.id,
d.description,
len(d.description),
r.running_length + len(d.description),
case
when r.group_running_length + len(d.description) <= #group_max_length
then r.group_id
else r.group_id + 1
end,
case
when r.group_running_length + len(d.description) <= #group_max_length
then r.group_running_length + len(d.description)
else len(d.description)
end
from rcte r
join data d
on d.id = r.id + 1
)
select r.id,
r.description,
r.description_length,
r.running_length,
r.group_id,
r.group_running_length,
gs.group_sum
from rcte r
cross apply ( select max(r2.group_running_length) as group_sum
from rcte r2
where r2.group_id = r.group_id ) gs -- group sum
order by r.id;
Result
Contains both the running group length as well as the group sum for every row.
id description description_length running_length group_id group_running_length group_sum
-- ---------------- ------------------ -------------- -------- -------------------- ---------
1 qmlsdkjfqmsldk 14 14 1 14 33
2 mldskjf 7 21 1 21 33
3 qmsdlfkqjsdm 12 33 1 33 33
4 fmqlsdkfq 9 42 2 9 39
5 qdsfqsdfqq 10 52 2 19 39
6 mds 3 55 2 22 39
7 qmsldfkqsjdmfqlkj 17 72 2 39 39
8 qdmsl 5 77 3 5 28
9 mqlskfjqmlkd 12 89 3 17 28
10 qsdqfdddffd 11 100 3 28 28
Fiddle to see things in action (includes random data version).

Total Sum and Partial Sum

I am currently using SSMS. I am pulling data, and trying to get two different columns that sum prices. The two columns 'ChangeSpend' and 'TotalSpend' both reference the same column and this is where I am running into problems.
I want ChangeSpend to return the sum of all the codes per receipt that start with V.Ch% (so they exclude all the others) and the TotalSpend to sum all of the codes for each receipt.
Here is my current code:
SELECT
Receipt
,ReceiptCode
,ReceiptAmount
,sum(ReceiptAmount) over (Partition by Receipt) as TotalSpend
,(CASE WHEN ReceiptCode = 'V.Ch%' then sum(ReceiptAmount)
over (Partition by Receipt)
ELSE 0
END) as ChangeSpend
FROM tableA
LEFT OUTER JOIN tableB
on A.Receipt = B.Receipt
WHERE ReceiptCode LIKE 'V.%'
ORDER BY Receipt
However, my query currently prints this:
Receipt ReceiptCode ReceiptAmount TotalSpend ChangeSpend
1 v.cha 5 20 0
1 v.rt 2 20 0
1 v.chb 6 20 0
1 v.abc 7 20 0
2 v.cha 20 21 0
2 v.abc 1 21 0
3 v.cha 4 14 0
3 v.chb 1 14 0
3 v.tye 7 14 0
3 v.chs 2 14 0
And I would like it to print this:
Receipt ReceiptCode ReceiptAmount TotalSpend ChangeSpend
1 v.cha 5 20 11
1 v.rt 2 20 11
1 v.chb 6 20 11
1 v.abc 7 20 11
2 v.cha 20 21 20
2 v.abc 1 21 20
3 v.cha 4 14 7
3 v.chb 1 14 7
3 v.tye 7 14 7
3 v.chs 2 14 7
Thanks for any help
Try
,SUM(CASE WHEN ReceiptCode LIKE 'V.Ch%' THEN ReceiptAmount ELSE 0 END)
OVER (Partition by Receipt)
AS ChangeSpend
You have to put the SUM outside the CASE, not the other way around:
SUM(CASE WHEN SomeCondition=true THEN MyColumn ELSE 0 END)
This may help:
Create Table Payment(
Receipt Int,
ReceiptCode VARCHAR(10),
ReceiptAmount decimal)
Insert Into Payment
Values
(1, 'v.cha', 5),
(1, 'v.rt', 2),
(1, 'v.chb', 6),
(1, 'v.abc', 7),
(2, 'v.cha', 20),
(2, 'v.abc', 1),
(3, 'v.cha', 4),
(3, 'v.chb', 1),
(3, 'v.the', 7),
(3, 'v.chs', 2);
SELECT * ,
SUM(ReceiptAmount) OVER ( PARTITION BY Receipt ) AS TotalSpend ,
SUM(IIF(ReceiptCode LIKE 'v.ch%',ReceiptAmount,0)) OVER ( PARTITION
BY Receipt ) AS ChangeSpend
FROM payment;
Result:
SUM(
CASE WHEN ReceiptCode like 'V.Ch%' then ReceiptAmount ELSE 0 END) as ChangeSpend

Find the most recent record from the table for the given criteria

It may seem like a duplicate question, but all the answers I found on SO didn't help me solve this.
So, I have this database that stores every update on an item. Essentially, when the item is first created, the statusId is 1 and the date it's created. If someone updated the item and changed the status of the item, the statusId for that item is added. For eg. a new row with statusId 2 is added with the current date. And so on and so forth. One example of the table is shown below:
id statusId updatedDate userId authId
1 1 2016-12-20 15:43:17.703 14 14
2 1 2016-12-20 15:54:01.523 14 15
3 2 2016-12-21 16:05:48.157 14 14
4 3 2016-12-21 16:27:58.610 14 15
5 1 2016-12-20 17:16:47.627 14 18
6 1 2016-12-20 17:27:58.930 14 19
7 1 2017-01-18 14:13:35.800 18 20
So, what I want to do next is query the table where the most recent statusId is given. For the table above, the query for statusid = 1 should show the following result:
id statusId updatedDate userId authId
5 1 2016-12-20 17:16:47.627 14 18
6 1 2016-12-20 17:27:58.930 14 19
7 1 2017-01-18 14:13:35.800 18 20
Notice how the list doesn't show for authIds 14 and 15 even though it has status 1 but have different statusId in the later date.
One way I tried doing is the following:
select A1.id, A1.statusId, A1.updatedDate, A1.userId, A1.authId from AuthProgressTracker A1
left join AuthProgressTracker A2
on (A1.authId = A2.authId and A1.updatedDate > A2.updatedDate)
where A2.authId is not null
That didn't show the result I was looking for. I tried another one
SELECT *
FROM AuthProgressTracker T
INNER JOIN (
SELECT id, authId, statusId, MAX(updatedDate) as maxDate FROM AuthProgressTracker GROUP BY authId, statusId, id
) AP
ON AP.id = T.id AND T.updatedDate = AP.maxDate
order by T.id
This didn't produce the desired result either.
What am I missing?
And how can I break down the problems in SQL Server 2012 so that I can learn to figure out the problems like this in the future?
Your problem statement may have lead you a bit astray, because while you want the most recent records, the timestamp may not be how you arrive at your result set. In the query below, I use a subquery which identifies all authId which do not have a statusId other than 1. This then filters the original table to leave you with the results you want.
SELECT t1.*
FROM AuthProgressTracker t1
INNER JOIN
(
SELECT authId
FROM AuthProgressTracker
GROUP BY authId
HAVING SUM(CASE WHEN statusId <> 1 THEN 1 ELSE 0 END) = 0
) t2
ON t1.authId = t2.authId
(You haven't stated what RDBMS you're using, so you'll need to adjust your queries accordingly. E.g. If using mysql, use LIMIT syntax instead of TOP.)
declare #AuthProgressTracker table (
id int,
statusId int,
updatedDate datetime,
userId int,
authId int
)
insert into #AuthProgressTracker
values
(1, 1, '2016-12-20 15:43:17.703', 14, 14),
(2, 1, '2016-12-20 15:54:01.523', 14, 15),
(3, 2, '2016-12-21 16:05:48.157', 14, 14),
(4, 3, '2016-12-21 16:27:58.610', 14, 15),
(5, 1, '2016-12-20 17:16:47.627', 14, 18),
(6, 1, '2016-12-20 17:27:58.930', 14, 19),
(7, 1, '2017-01-18 14:13:35.800', 18, 20)
/* Determine id's of latest status updates per authId */
SELECT MAX(id) as LatestSatus
FROM #AuthProgressTracker
GROUP BY authId
/* Note the above includes row id's you've chosen to exclude, so... */
/* Determine most recent statusId */
SELECT TOP 1 statusId
FROM #AuthProgressTracker
ORDER BY updatedDate DESC
/* Putting it all together */
SELECT i.*
FROM #AuthProgressTrackeri
INNER JOIN (
SELECT MAX(id) as LatestSatus
FROM #AuthProgressTracker
GROUP BY authId
) ls ON
ls.LatestSatus = i.id
WHERE i.statusId = (
SELECT TOP 1 statusId
FROM #AuthProgressTracker
ORDER BY updatedDate DESC
)

Using a loop in SQL to populate the table (SQL Server)

I am quite new with SQL and loops especially and need some help with the following problem.
I have a table like this:
SpotID EventID MaxTemp
123 1 45
236 1 109
69 1 18
123 2 216
236 2 29
69 2 84
123 3 91
236 3 457
69 3 280
I would like to generate a new table with the following output:
SpotID Over30 Over70 Over100
123 3 2 1
236 2 2 2
69 2 2 1
So what i am after is the count of how many times did the temperature exceed the limits of 30, 70 and 100 per SpotID for different EventIDs.
Is there a way to do this with a loop? My data set is obviously bigger and I am curious if how could i use something efficient.
Thank you very much.
Mike
You just need conditional aggregation:
select spotid,
sum(case when maxtemp > 30 then 1 else 0 end) as over_30,
sum(case when maxtemp > 70 then 1 else 0 end) as over_70
sum(case when maxtemp > 100 then 1 else 0 end) as over_100
from likethis
group by spotid;
If you just want to learn how to use loops....
DECLARE #DATA TABLE (
SpotID INT,
EventID INT,
MaxTemp INT
);
DECLARE #NEWDATA TABLE (
SpotID INT,
T30 INT,
T90 INT,
T100 INT
);
DECLARE
#SPOT AS INT,
#T30 AS INT,
#T90 AS INT,
#T100 AS INT;
INSERT INTO #DATA VALUES
(123, 1, 45 ),
(236, 1, 109),
(69 , 1, 18 ),
(123, 2, 216),
(236, 2, 29 ),
(69 , 2, 84 ),
(123, 3, 91 ),
(236, 3, 457),
(69 , 3, 280);
DECLARE STATION CURSOR FOR SELECT SpotID FROM #DATA GROUP BY SpotID;
OPEN STATION;
FETCH NEXT FROM STATION INTO #SPOT;
WHILE ##FETCH_STATUS = 0
BEGIN
SET #T30 = 0;
SET #T90 = 0;
SET #T100 = 0;
SELECT
#T30 = SUM(CASE WHEN MaxTemp > 30 AND MaxTemp < 70 THEN 1 ELSE 0 END),
#T90 = SUM(CASE WHEN MaxTemp >= 70 AND MaxTemp < 100 THEN 1 ELSE 0 END),
#T100 = SUM(CASE WHEN MaxTemp >= 100 THEN 1 ELSE 0 END)
FROM #DATA WHERE SpotID = #SPOT
INSERT INTO #NEWDATA VALUES (#SPOT,#T30,#T90,#T100)
FETCH NEXT FROM STATION INTO #SPOT;
END;
CLOSE STATION;
DEALLOCATE STATION;
SELECT * FROM #NEWDATA
Not anyway I would write the code requested, but this example shows how to create table variables, a simple cursor for looping, and writing to answers to variables that are loaded into a new table.
A lot of moving parts but it can give you insight to doing loops.
One minor change to the previous post, my version only counts within each temp range, else the lower temps will count most temps not temps in that range.
DECLARE #DATA TABLE (
SpotID INT,
EventID INT,
MaxTemp INT
)
INSERT INTO #DATA VALUES
(123, 1, 45 ),
(236, 1, 109),
(69 , 1, 18 ),
(123, 2, 216),
(236, 2, 29 ),
(69 , 2, 84 ),
(123, 3, 91 ),
(236, 3, 457),
(69 , 3, 280)
SELECT
SpotID,
SUM(CASE WHEN MaxTemp > 30 AND MaxTemp < 70 THEN 1 ELSE 0 END) AS OVER_30,
SUM(CASE WHEN MaxTemp >= 70 AND MaxTemp < 100 THEN 1 ELSE 0 END) AS OVER_70,
SUM(CASE WHEN MaxTemp >= 100 THEN 1 ELSE 0 END) AS OVER_100
FROM
#DATA
GROUP BY
SpotID

Counting number of positive value in a query

I'm working on the following query and table
SELECT dd.actual_date, dd.week_number_overall, sf.branch_id, AVG(sf.overtarget_qnt) AS targetreach
FROM sales_fact sf, date_dim dd
WHERE dd.date_id = sf.date_id
AND dd.week_number_overall BETWEEN 88-2 AND 88
AND sf.branch_id = 1
GROUP BY dd.actual_date, branch_id, dd.week_number_overall
ORDER BY dd.actual_date ASC;
ACTUAL_DATE WEEK_NUMBER_OVERALL BRANCH_ID TARGETREACH
----------- ------------------- ---------- -----------
13/08/14 86 1 -11
14/08/14 86 1 12
15/08/14 86 1 11.8
16/08/14 86 1 1.4
17/08/14 86 1 -0.2
19/08/14 86 1 7.2
20/08/14 87 1 16.6
21/08/14 87 1 -1.4
22/08/14 87 1 14.4
23/08/14 87 1 2.8
24/08/14 87 1 18
26/08/14 87 1 13.4
27/08/14 88 1 -1.8
28/08/14 88 1 10.6
29/08/14 88 1 7.2
30/08/14 88 1 14
31/08/14 88 1 9.6
02/09/14 88 1 -3.2
the "TargetReach" column shows whether target has been reach or not.
A negative value means target wasn't reached on that day.
How can I get calculate the number of ROW with positive value for this query?
that will show something like:
TOTAL_POSITIVE_TARGET_REACH WEEK_NUMBER_OVERALL
--------------------------- ------------------
13 88
I have tried to use CASE but still not working right.
Thanks a lot.
You want to use conditional aggregation:
with t as (
<your query here>
)
select week_number_overall, sum(case when targetreach > 0 then 1 else 0 end)
from t
group by week_number_overall;
However, I would rewrite your original query to use proper join syntax. Then the query would look like:
SELECT week_number_overall,
SUM(CASE WHEN targetreach > 0 THEN 1 ELSE 0 END)
FROM (SELECT dd.actual_date, dd.week_number_overall, sf.branch_id, AVG(sf.overtarget_qnt) AS targetreach
FROM sales_fact sf JOIN
date_dim dd
ON dd.date_id = sf.date_id
WHERE dd.week_number_overall BETWEEN 88-2 AND 88 AND sf.branch_id = 1
GROUP BY dd.actual_date, branch_id, dd.week_number_overall
) t
GROUP BY week_number_overall
ORDER BY week_number_overall;
THe difference between a CTE (the first solution) and a subquery is (in this case) just a matter of preference.
SELECT WEEK_NUMBER_OVERALL, COUNT(*) TOTAL_POSITIVE_TARGET_REACH
FROM (your original query)
WHERE TARGETREACH >= 0
GROUP BY WEEK_NUMBER_OVERALL
select sum( decode( sign( TARGETREACH ) , -1 , 0 , 0 , 0 , 1 , 1 ) )
from ( "your query here" );
Use HAVING Clause
SELECT dd.actual_date, dd.week_number_overall, sf.branch_id, AVG(sf.overtarget_qnt) AS targetreach
FROM sales_fact sf, date_dim dd
WHERE dd.date_id = sf.date_id
AND dd.week_number_overall BETWEEN 88-2 AND 88
AND sf.branch_id = 1
GROUP BY dd.actual_date, branch_id, dd.week_number_overall
HAVING AVG(sf.overtarget_qnt)>0
ORDER BY dd.actual_date ASC;
Using decode(), sign() get both positive count & negative count.
drop table test;
create table test (
key number(5),
value number(5));
insert into test values ( 1, -9 );
insert into test values ( 2, -8 );
insert into test values ( 3, 10 );
insert into test values ( 4, 12 );
insert into test values ( 5, -9 );
insert into test values ( 6, 8 );
insert into test values ( 7, 51 );
commit;
select sig , count ( sig ) from
(
select key, ( (decode( sign( value ) , -1 , '-ve' , 0 , 'zero' , 1 , '+ve' ) ) ) sig
from test
)
group by sig
SIG COUNT(SIG)
---- ----------------------
+ve 4
-ve 3