T-SQL to stamp individual rows - sql

I'm looking for some T-SQL code that will add a column called tag which will tag each row with the same number until there is a change in value within any of the columns "team", "id", "kmvid", "name", "cid" and "pid". If there is a change, use the next sequence of numbering for that row. See the expected results below as image.

You can do this with cumulative sums and lag():
select t.*,
sum(case when prev_team = team and
id = id and
kmvid = kmvid and
name = name and
prev_id = id and
prev_cid = pid and
prev_oid = cid
then 1 else 0
end) over (order by date) as Tag
from (select t.*,
lag(team) over (order by date) as prev_team,
lag(id) over (order by date) as prev_id,
lag(kmvid) over (order by date) as prev_kmvid,
lag(name) over (order by date) as prev_name,
lag(cid) over (order by date) as prev_cid,
lag(pid) over (order by date) as prev_pid
from t
) t;
You should not use a cursor for things that are better done using set-based operations.

The following will give you the desired results based on the data you took a picture of...
IF OBJECT_ID('tempdb..#TestData', 'U') IS NOT NULL
DROP TABLE #TestData;
CREATE TABLE #TestData (
CountryId CHAR(2) NOT NULL,
[Date] DATETIME NOT NULL
);
INSERT #TestData (CountryId, Date)
SELECT '99', '2004-04-30' UNION ALL
SELECT '99', '2004-07-31' UNION ALL
SELECT '99', '2004-10-31' UNION ALL
SELECT '99', '2005-01-31' UNION ALL
SELECT '99', '2005-04-30' UNION ALL
SELECT '99', '2005-07-31' UNION ALL
SELECT '99', '2005-10-31' UNION ALL
SELECT '99', '2006-01-31' UNION ALL
SELECT '99', '2006-04-30' UNION ALL
SELECT '99', '2006-07-31' UNION ALL
SELECT '99', '2006-10-31' UNION ALL
SELECT '99', '2007-01-31' UNION ALL
SELECT 'HK', '2007-04-30' UNION ALL
SELECT 'CA', '2007-07-31' UNION ALL
SELECT 'HK', '2007-10-31';
-- SELECT * FROM #TestData td;
--=======================================
WITH
cte_TagGroup AS (
SELECT
td.CountryId,
td.[Date],
TagGroup = ROW_NUMBER() OVER (ORDER BY td.Date)
- ROW_NUMBER() OVER (PARTITION BY td.CountryId ORDER BY td.Date)
- CASE WHEN td.CountryId = LAG(td.CountryId, 1, td.CountryId) OVER (ORDER BY td.CountryId, td.Date) THEN 0 ELSE 1 END
FROM
#TestData td
)
SELECT
tg.CountryId,
tg.Date,
Tag = DENSE_RANK() OVER (ORDER BY tg.TagGroup)
FROM
cte_TagGroup tg;
... That said, I suspect that the 1st 3 columns of data aren't all the same values, so you may need to add columns to the "PARTITION BY" clauses to make it fit your actual data.
HTH,
Jason

Related

How can i find rows before a specific value?

I have the next row and what I want to do is to select all the rows before the type "shop". I tried using case in the "where clause" but I didn't get any result. How can I do it?
|id|visitnumber|type |
|01| 1|register|
|01| 2|visit |
|01| 3|visit |
|01| 4|shop |
|01| 5|visit |
For example, what I want to get is the visitnumber before type = "shop".
it would be very helpful because what I'm trying to do is to get all the actions that happened before an specific event on big query.
|id|numberofvisits|
|01| 3|
One method uses correlated subqueries:
select id, count(*)
from t
where visitnumber < (select min(t2.visitnumber) from t t2 where t2.id = t.id and type = 'shop')
group by id;
However, in BigQuery, I prefer an approach using window functions:
select id, countif(visitnumber < visitnumber_shop)
from (select t.*,
min(case when type = 'shop' then visitnumber end) over (partition by id) as visitnumber_shop
from t
) t
group by id;
This has the advantage of keeping all ids even those that don't have a "shop" type.
One option uses a subquery for filtering:
select id, count(*) number_of_visits
from mytable t
where t.visit_number < (
select min(t1.visit_number)
from mytable t
where t1.id = t.id and t1.type = 'shop'
)
group by id
You can also use window functions:
select id, count(*) number_of_visits
from (
select
t.*,
countif(type = 'shop') over(partition by id order by visit_number) has_shop
from mytable t
) t
where has_shop = 0
group by id
Below option is for BigQuery Standard SQL
#standardSQL
SELECT id,
ARRAY_LENGTH(SPLIT(REGEXP_EXTRACT(',' || STRING_AGG(type ORDER BY visitnumber), r'(.*?),shop'))) - 1 AS number_of_visits_before_first_shop
FROM `project.dataset.table`
GROUP BY id
You can test, play with above using dummy data as in below example
#standardSQL
WITH `project.dataset.table` AS (
SELECT '01' id, 1 visitnumber, 'register' type UNION ALL
SELECT '01', 2, 'visit' UNION ALL
SELECT '01', 3, 'visit' UNION ALL
SELECT '01', 4, 'shop' UNION ALL
SELECT '01', 5, 'visit' UNION ALL
SELECT '02', 1, 'register' UNION ALL
SELECT '02', 2, 'visit' UNION ALL
SELECT '02', 3, 'visit' UNION ALL
SELECT '03', 1, 'shop' UNION ALL
SELECT '03', 2, 'shop' UNION ALL
SELECT '03', 3, 'visit'
)
SELECT id,
ARRAY_LENGTH(SPLIT(REGEXP_EXTRACT(',' || STRING_AGG(type ORDER BY visitnumber), r'(.*?),shop'))) - 1 AS number_of_visits_before_first_shop
FROM `project.dataset.table`
GROUP BY id
with result
Row id number_of_visits_before_first_shop
1 01 3
2 02 null
3 03 0
This is the query i run on Big Query with an Analytics 360 test dataset:
select
id,
visitnumber,
countif(hit_number < hitnumber_quickviewclick) as hitsprev_quickviewclick
from (
select
a.fullVisitorID as id,
a.visitnumber as visitnumber,
h.hitNumber as hit_number,
MIN (case when h.eventInfo.eventAction = 'Quickview Click' then h.hitNumber end) over (partition by a.fullVisitorID) as hitnumber_quickviewclick
FROM `bigquery-public-data.google_analytics_sample.ga_sessions_20170725` as a
CROSS JOIN UNNEST(hits) as h
) as T
group by 1,2;
I wanted to make a query where i could find the total number of hits before the event action 'quickview click' hitted. If this is wrong or can be improved let me know!
Thanks a lot, guys!
This is how I would approach in SQL in general:
select count(*)
from yourtable yt
where type = 'visit' and not exists (
select 1
from yourtable yt2
where yt.id > yt2.id and yt2.type = 'shop'
)
However, I would very much think about situations when we want to find visits before the next shop... And the next shop... And the next shop. For that purpose you could find out the ids of shop and group by intervals.

SQL Query for finding longest streak of wins

I have data like below -
Year,winning_country
2001,IND
2002,IND
2003,IND
2004,AUS
2005,AUS
2006,SA
2007,SA
2008,SA
2009,IND
2010,IND
2011,IND
2012,IND
2013,AUS
2014,AUS
2015,SA
2016,NZ
2017,SL
2018,IND
The question here is to find out the longest streak of wins for each country and desired output will be like below -
Country,no_of_wins
IND,4
AUS,2
SA,3
SL,1
NZ,1
Can someone help here.
This is a gaps and islands problem, but the simplest method is to subtract a sequence from the year. So, to get all the sequences:
select country, count(*) as streak,
min(year) as from_year, max(year) as to_year
from (select year, country,
row_number() over (partition by country order by year) as seqnum
from t
) t
group by country, (year - seqnum);
To get the longest per country, aggregate again or use window functions:
select country, streak
from (select country, count(*) as streak,
min(year) as from_year, max(year) as to_year,
row_number() over (partition by country order by count(*) desc) as seqnum_2
from (select year, country,
row_number() over (partition by country order by year) as seqnum
from t
) t
group by country, (year - seqnum)
) cy
where seqnum_2 = 1;
I prefer using row_number() to get the longest streak because it allows you to also get the years when it occurred.
Looks like an gaps-and-islands problem.
The SQL below calculates some ranking based on 2 row_number.
Then it's just a matter of grouping.
SELECT q2.Country, MAX(q2.no_of_wins) AS no_of_wins
FROM
(
SELECT q1.winning_country as Country,
COUNT(*) AS no_of_wins
FROM
(
SELECT t.Year, t.winning_country,
(ROW_NUMBER() OVER (ORDER BY t.Year ASC) -
ROW_NUMBER() OVER (PARTITION BY t.winning_country ORDER BY t.Year)) AS rnk
FROM yourtable t
) q1
GROUP BY q1.winning_country, q1.rnk
) q2
GROUP BY q2.Country
ORDER BY MAX(q2.no_of_wins) DESC
If Redshift supports analytic function, below would be the query.
with t1 as
(
select 2001 as year,'IND' as cntry from dual union
select 2002,'IND' from dual union
select 2003,'IND' from dual union
select 2004,'AUS' from dual union
select 2005,'AUS' from dual union
select 2006,'SA' from dual union
select 2007,'SA' from dual union
select 2008,'SA' from dual union
select 2009,'IND' from dual union
select 2010,'IND' from dual union
select 2011,'IND' from dual union
select 2012,'IND' from dual union
select 2013,'AUS' from dual union
select 2014,'AUS' from dual union
select 2015,'SA' from dual union
select 2016,'NZ' from dual union
select 2017,'SL' from dual union
select 2018,'IND' from dual) ,
t2 as (select year, cntry, year - row_number() over (partition by cntry order by year) as grpBy from t1 order by cntry),
t3 as (select cntry, count(grpBy) as consWins from t2 group by cntry, grpBy),
res as (select cntry, consWins, row_number() over (partition by cntry order by consWins desc) as rnk from t3)
select cntry, consWins from res where rnk=1;
Hope this helps.
Here is a solution that leverages the use of Redshift Python UDF's
There may be simpler ways to achieve the same but this is a good example of how to create a simple UDF.
create table temp_c (competition_year int ,winning_country varchar(4));
insert into temp_c (competition_year, winning_country)
values
(2001,'IND'),
(2002,'IND'),
(2003,'IND'),
(2004,'AUS'),
(2005,'AUS'),
(2006,'SA'),
(2007,'SA'),
(2008,'SA'),
(2009,'IND'),
(2010,'IND'),
(2011,'IND'),
(2012,'IND'),
(2013,'AUS'),
(2014,'AUS'),
(2015,'SA'),
(2016,'NZ'),
(2017,'SL'),
(2018,'IND')
;
create or replace function find_longest_streak(InputStr varChar)
returns integer
stable
as $$
MaxStreak=0
ThisStreak=0
ThisYearStr=''
LastYear=0
for ThisYearStr in InputStr.split(','):
if int(ThisYearStr) == LastYear + 1:
ThisStreak+=1
else:
if ThisStreak > MaxStreak:
MaxStreak=ThisStreak
ThisStreak=1
LastYear=int(ThisYearStr)
return max(MaxStreak,1)
$$ language plpythonu;
select winning_country,
find_longest_streak(listagg(competition_year,',') within group (order by competition_year))
from temp_c
group by winning_country
order by 2 desc
;
How about something like...
SELECT
winning_country,
COUNT(*)
GROUP BY winning_country
HAVING MAX(year) - MIN(year) = COUNT(year) - 1
This assumes no duplicate entries.
Creating a session abstraction do the trick:
WITH winning_changes AS (
SELECT *,
CASE WHEN LAG(winning_country) OVER (ORDER BY year) <> winning_country THEN 1 ELSE 0 END AS same_winner
FROM winners
),
sequences AS (
SELECT *,
SUM(same_winner) OVER (ORDER BY year) AS winning_session
FROM winning_changes
),
streaks AS (
SELECT winning_country AS country,
winning_session,
COUNT(*) streak
FROM sequences
GROUP BY 1,2
)
SELECT country,
MAX(streak) AS no_of_wins
FROM streaks
GROUP BY 1;

Finding sequence in data and grouping by it

Data in Phone_number column of my Temp_table looks like this
1234560200
1234560201
1234560202
2264540300
2264540301
2264540302
2264540303
2264540304
2264540305
2264540306
I want it to find sequence of last 4 digits and and find First and Last number of sequence of it. For eg.
There is sequence of first 3 rows as 0200, 0201, 0202, so First = 0200 and Last = 0202
Final Output of this query should be
First Last
0200 0202
0300 0306
I tried below query, but not sure about this approach.
WITH get_nxt_range AS
(
select substr(a.PHONE_NUMBER,7,4) range1,
LEAD(substr(a.PHONE_NUMBER,7,4)) OVER (ORDER BY a.PHONE_NUMBER ) nxt_range
from Temp_table a
)
SELECT range1,nxt_range FROM get_nxt_range
WHERE nxt_range = range1 +1
ORDER BY range1
One method to get sequences is to use the difference of row numbers approach. This works in your case as well:
select substr(phone_number, 1, 6),
min(substr(phone_number, 7, 4)), max(substr(phone_number, 7, 4))
from (select t.*,
(row_number() over (order by phone_number) -
row_number() over (partition by substr(phone_number, 1, 6) order by phone_number)
) as grp
from temp_table t
) t
group by substr(phone_number, 1, 6), grp;
I think something like this might work:
select
min (substr (phone_number, -4, 4)) as first,
max (substr (phone_number, -4, 4)) as last
from temp_table
group by
substr (phone_number, -4, 2)
SELECT DISTINCT
COALESCE(
first_in_sequence,
LAG( first_in_sequence ) IGNORE NULLS OVER ( ORDER BY phone_number )
) AS first_in_sequence,
COALESCE(
last_in_sequence,
LAG( last_in_sequence ) IGNORE NULLS OVER ( ORDER BY phone_number )
) AS last_in_sequence
FROM (
SELECT phone_number,
CASE phone_number
WHEN LAG( phone_number ) OVER ( ORDER BY phone_number ) + 1
THEN NULL
ELSE phone_number
END AS first_in_sequence,
CASE phone_number
WHEN LEAD( phone_number ) OVER ( ORDER BY phone_number ) - 1
THEN NULL
ELSE phone_number
END AS last_in_sequence
FROM temp_table
);
Update:
CREATE TABLE phone_numbers ( phone_number ) AS
select 1234560200 from dual union all
select 1234560201 from dual union all
select 1234560202 from dual union all
select 2264540300 from dual union all
select 2264540301 from dual union all
select 2264540302 from dual union all
select 2264540303 from dual union all
select 2264540304 from dual union all
select 2264540305 from dual union all
select 2264540306 from dual;
SELECT MIN( phone_number ) AS first_in_sequence,
MAX( phone_number ) AS last_in_sequence
FROM (
SELECT phone_number,
phone_number - ROW_NUMBER() OVER ( ORDER BY phone_number ) AS grp
FROM phone_numbers
)
GROUP BY grp;
Output:
FIRST_IN_SEQUENCE LAST_IN_SEQUENCE
----------------- ----------------
2264540300 2264540306
1234560200 1234560202
If 1234560201 1234560203 1234560204 are two instances then this should work:
with tt as (
select substr(PHONE_NUMBER,7,4) id from Temp_table
),
t as (
select
t1.id,
case when t3.id is null then 1 else 0 end start,
case when t2.id is null then 1 else 0 end "end"
from tt t1
-- no next adjacent element - we have an end of interval
left outer join tt t2 on t2.id - 1 = t1.id
-- not previous adjacent element - we have a start of interval
left outer join tt t3 on t3.id + 1 = t1.id
-- select starts and ends only
where t2.id is null or t3.id is null
)
-- find nearest end record for each start record (it may be the same record)
select t1.id, (select min(id) from t where id >= t1.id and "end" = 1)
from t t1
where t1.start = 1
I see guys already have answered for your question.
I just want to propose my variant how resolve this task:
with list_num (phone_number) as (
select 1234560200 from dual union all
select 1234560201 from dual union all
select 1234560202 from dual union all
select 2264540300 from dual union all
select 2264540301 from dual union all
select 2264540302 from dual union all
select 2264540303 from dual union all
select 2264540304 from dual union all
select 2264540305 from dual union all
select 2264540306 from dual)
select root as from_value,
max(phone_number) keep (dense_rank last order by lvl) as to_value
from
(select phone_number, level as lvl, CONNECT_BY_ROOT phone_number as root
from
(select phone_number,
decode(phone_number-lag (phone_number) over(order by phone_number),1,1,0) as start_value
from list_num) b
connect by nocycle phone_number = prior phone_number + 1
start with start_value = 0)
group by root
having count(1) > 1
If you need only last 4 numbers just substr it.
substr(root,7,4) as from_value,
substr(max(phone_number) keep (dense_rank last order by lvl),7,4) as to_value
Thanks.

Find Top Most AND Lowest In a Table's Group Column

I have a table and there are 4 fields in it, ID, Price, QTY, Ratting and Optional [Position].
I have all the records Grouped By Columns [Qty,Ratting]
I have to define the position of groupwise and store that Position into Optional column.
For better understanding I have added an image with data in table:
On the basis of QTY in Each Rating I have to Mark Top3, Bottom3 and Rest of them as remaining.
I am not getting how to do it.
Can anybody suggest me how to do it?
So far what I've tried is:
Declare #RankTable TABLE
(
ID INT,
Price Decimal (10,2),
Qty INT,
Ratting INT
)
INSERT INTO #RankTable
SELECT 1,10,15,1
UNION ALL
SELECT 2,11,11,1
UNION ALL
SELECT 3,96,10,1
UNION ALL
SELECT 4,96,8,1
UNION ALL
SELECT 5,56,7,1
UNION ALL
SELECT 6,74,5,1
UNION ALL
SELECT 7,93,4,1
UNION ALL
SELECT 8,98,2,1
UNION ALL
SELECT 9,12,1,1
UNION ALL
SELECT 10,32,80,2
UNION ALL
SELECT 11,74,68,2
UNION ALL
SELECT 12,58,57,2
UNION ALL
SELECT 13,37,43,2
UNION ALL
SELECT 14,79,32,2
UNION ALL
SELECT 15,29,28,2
UNION ALL
SELECT 16,46,17,2
UNION ALL
SELECT 17,86,13,2
UNION ALL
SELECT 19,75,110,3
UNION ALL
SELECT 20,27,108,3
UNION ALL
SELECT 21,38,104,3
UNION ALL
SELECT 22,87,100,3
UNION ALL
SELECT 23,47,89,3
DECLARE #PositionGroup VARCHAR(1)
SELECT *,ISNULL(#PositionGroup,'') AS Position FROM #RankTable
You can try this:
SELECT ID
,Price
,Qty
,Ratting
,CASE WHEN RowID >= 1 AND RowID <= 3
THEN 0
ELSE CASE WHEN RowID > Total - 3 THEN 1 ELSE 2 END END AS Position
FROM (SELECT ID
,Price
,Qty
,Ratting
,COUNT(*) OVER(PARTITION BY Ratting) AS Total
,ROW_NUMBER() OVER(PARTITION BY Ratting ORDER BY Qty DESC) AS RowID
,ISNULL(#PositionGroup,'') AS Position
FROM #RankTable) AS T
Use Window Function. Try this.
;WITH cte
AS (SELECT *,
Row_number()OVER(partition BY rating ORDER BY id) rn,
count(id)OVER(partition BY rating) mx
FROM #RankTable)
SELECT ID,
Price,
Qty,
Rating,
mx - rn,
CASE WHEN rn IN ( 1, 2, 3 ) THEN 0
WHEN mx - rn IN( 0, 1, 2 ) THEN 1
ELSE 2
END position
FROM cte
try this as well.
;WITH cte AS
(
SELECT MAX(Row) [Max],
MIN(Row) [Min],
LU.Ratting
FROM (
SELECT *,
ROW_NUMBER() OVER(PARTITION BY Ratting ORDER BY Qty DESC) Row
FROM #RankTable)LU
GROUP BY LU.Ratting
)
SELECT ID,
R.Price,
R.Qty,
cte.Ratting,
CASE WHEN (Row - Min) <= 2 THEN 0 WHEN (Max - Row) <= 2 THEN 1 ELSE 2 END Position
FROM cte
JOIN (
SELECT Ratting,
ID,
Price,
Qty,
ROW_NUMBER() OVER(PARTITION BY Ratting ORDER BY Qty DESC) [Row]
FROM #RankTable
) R ON R.Ratting = cte.Ratting
Result:

Select Rows with Maximum Column Value group by Another Column

This should be a simple question, but I can't get it to work :(
How to select rows that have the maximum column value,as group by another column?
For example,
I have the following table definition:
ID
Del_Index
docgroupviewid
The issue now is that I want to group by results by docgroupviewid first, and then choose one row from each docgroupviewid group, depending on which one has the highest del_index.
I tried
SELECT docgroupviewid, max(del_index),id FROM table
group by docgroupviewid
But instead of return me with the correct id, it returns me with the earliest id from the group with the same docgroupviewid.
Any ideas?
I've struggled with this many times myself and the solution is to think about your query differently.
I want each DocGroupViewID row where the Del_Index is the highest(max) for all rows with that DocGroupViewID:
SELECT
T.DocGroupViewID,
T.Del_Index,
T.ID
FROM MyTable T
WHERE T.Del_Index = (
SELECT MAX( T1.Del_Index ) FROM MyTable T1
WHERE T1.DocGroupViewID = T.DocGroupViewID
)
It gets more complex when more than one row can have the same Del_Index, since then you need some way to choose which one to show.
EDIT: wanted to follow up with another option
You can use the RANK() or ROW_NUMBER() functions with a CTE to get more control over the results, as follows:
-- fake a source table
DECLARE #t TABLE (
ID int IDENTITY(1,1) PRIMARY KEY,
Del_Index int,
DocGroupViewID int
)
INSERT INTO #t
SELECT 1, 1 UNION ALL
SELECT 2, 1 UNION ALL
SELECT 3, 1 UNION ALL
SELECT 1, 2 UNION ALL
SELECT 2, 2 UNION ALL
SELECT 2, 2 UNION ALL
SELECT 1, 3 UNION ALL
SELECT 2, 3 UNION ALL
SELECT 3, 3 UNION ALL
SELECT 4, 3
-- show our source
SELECT * FROM #t
-- select using RANK (can have duplicates)
;WITH cteRank AS
(
SELECT
DocGroupViewID,
Del_Index,
ID,
RANK() OVER
(PARTITION BY DocGroupViewID ORDER BY Del_Index DESC)
AS RowRank,
ROW_NUMBER() OVER
(PARTITION BY DocGroupViewID ORDER BY Del_Index DESC)
AS RowNumber
FROM #t
)
SELECT *
FROM cteRank
WHERE RowRank = 1
-- select using ROW_NUMBER
;WITH cteRowNumber AS
(
SELECT
DocGroupViewID,
Del_Index,
ID,
RANK() OVER
(PARTITION BY DocGroupViewID ORDER BY Del_Index DESC)
AS RowRank,
ROW_NUMBER() OVER
(PARTITION BY DocGroupViewID ORDER BY Del_Index DESC)
AS RowNumber
FROM #t
)
SELECT *
FROM cteRowNumber
WHERE RowNumber = 1
If you have ways to sort out ties, just add it to the ORDER BY.
You will have to complicate your query a little bit:
select a.docgroupviewid, a.del_index, a.id from table a
where a.del_index = (select max(b.del_index) from table
where b.docgroupviewid = a.docgroupviewid)