SQL Server : find break in dates to show unique rows

SQL Server : find break in dates to show unique rows - sql

I have developed a solution to a problem (I think), and I am keen to see if there is a better way around this, as I can't help but feel there is a better way.
The problem: a company name, and a move in date are shown. The company could leave, another company come in and then the original company could come back. To make this problem a bit tricky, there may be rogue dates for a company in there. Best way to explain it is via the table:
Table example
What I need to extract, is only the first time a company moved in, until it is broken by a different company and so on.
The code I have is:
IF OBJECT_ID('tempdb..#tmpData') IS NOT NULL
DROP TABLE #tmpData
GO
CREATE TABLE #tmpData
(
COMPANY_NAME NVARCHAR(30),
DATE_MOVED_IN DATETIME,
ID INT IDENTITY(1,1),
UNIQUE_ID INT
)
INSERT INTO #tmpData(COMPANY_NAME, DATE_MOVED_IN)
SELECT 'ABC LTD','01/01/2017' UNION ALL
SELECT 'ABC LTD','01/04/2017' UNION ALL
SELECT 'XYZ LTD','01/10/2017' UNION ALL
SELECT 'ABC LTD','01/12/2017';
DECLARE #intMinID INT,
#intMaxID INT,
#strNextComp NVARCHAR(50),
#strCurrentComp NVARCHAR(50),
#strPreviousComp NVARCHAR(50),
#intMaxUID INT;
SELECT
#intMinID = MIN(TD.ID),
#intMaxID = MAX(TD.ID)
FROM
#tmpData AS TD
UPDATE TD
SET TD.UNIQUE_ID = 1
FROM #tmpData AS TD
WHERE TD.ID = #intMinID;
WHILE #intMinID <= #intMaxID
BEGIN
SELECT
#strCurrentComp = TD.COMPANY_NAME
FROM
#tmpData AS TD
WHERE
TD.ID = #intMinID;
SELECT
#strNextComp = TD.COMPANY_NAME
FROM
#tmpData AS TD
WHERE
TD.ID = (#intMinID + 1)
SELECT
#strPreviousComp = CASE WHEN EXISTS (SELECT 1
FROM #tmpData AS TD
WHERE TD.ID = (#intMinID - 1))
THEN TD.COMPANY_NAME
ELSE 'No Company Exists'
END
FROM
#tmpData AS TD
WHERE
TD.ID = (#intMinID - 1)
SELECT
#intMaxUID = MAX(TD.UNIQUE_ID)
FROM
#tmpData AS TD
IF(#strPreviousComp IS NULL)
PRINT 'Nothing to do'
ELSE IF((#strCurrentComp <> #strNextComp) AND (#strCurrentComp = #strPreviousComp))
BEGIN
UPDATE TD
SET TD.UNIQUE_ID = #intMaxUID
FROM #tmpData AS TD
WHERE TD.ID = #intMinID;
END
ELSE
BEGIN
UPDATE TD
SET TD.UNIQUE_ID = #intMaxUID + 1
FROM #tmpData AS TD
WHERE TD.ID = #intMinID;
END
SET #intMinID = #intMinID + 1;
END
SELECT
COMPANY_NAME, MIN(DATE_MOVED_IN) AS DATE_MOVED_IN
FROM
#tmpData
GROUP BY
COMPANY_NAME, UNIQUE_ID
ORDER BY
UNIQUE_ID ASC
Any suggestions on how to do this in a more efficient way, or if any errors are spotted, feedback is very much appreciated.
Thanks,
Leo

Lag() should do it...
with CTE as
(
select Company_Name, Date_Moved_in, lag(Company_Name) over (order by Date_Moved_In) as PrevComp
from #TempTable
)
select Company_Name, Date_Moved_In
from CTE
where PrevComp <> Company_Name
or PrevComp is null

You can use the difference in row number logic to classify continuous dates by company into one group. Run the inner query alone to see how groups are assigned.
Thereafter, just group by the company and previously classified group to get the first date moved in.
select company_name,min(date_moved_in)
from (
select t.*,
row_number() over(order by date_moved_in)
-row_number() over(partition by company_name order by date_moved_in) as grp
from #tmpData t
) x
group by company_name,grp

Related

Incorrect Syntax near With

No matter where I place my With statement inside the SQL query, the keyword in the next line always shows an error, 'Incorrect syntax near keyword'. I also tried putting semi-colon.
; WITH Commercial_subset AS
(
SELECT DISTINCT
PRDID_Clean, Value, [Year]
FROM
Reporting_db_SPKPI.DBO.[tbl_RCCP_commercial]
WHERE
MEASURE = 'Unit Growth Rate'
)
--error appears at truncate
TRUNCATE TABLE Reporting_db_SPKPI.DBO.[tbl_RCCP_3_NR_dup]
Example 1:
[Example 1][1]
Example 2:
[Example 2][2]
What am I missing?
[1]: https://i.stack.imgur.com/lkfVd.png
[2]: https://i.stack.imgur.com/tZRnG.png
My Final code after getting suggestions in the comments,
--Ensure the correct database is selected for creating the views
USE Reporting_db_SPKPI
--Create the table where new values will be appended
Insert into Reporting_db_SPKPI.DBO.[tbl_RCCP_3_NR_dup]
Select *, Replace(productID,'-','') as ProductID_clean from Reporting_db_SPKPI.DBO.[tbl_RCCP_3_NR]
GO
--Create a subset as view which will be used for join later
Create or Alter View QRY_Commerical_Subset AS
Select Distinct PRDID_Clean, Value, [Year] From Reporting_db_SPKPI.DBO.[tbl_RCCP_commercial] where MEASURE = 'Unit Growth Rate'
Go
--Create a view with distinct list of all SKUs
CREATE OR ALTER VIEW QRY_RCCP_TEMP AS
SELECT
PRODUCTID, ROW_NUMBER() Over (ORDER BY ProductID) AS ID
FROM (
SELECT
DISTINCT A.ProductID_clean ProductID
FROM
Reporting_db_SPKPI.DBO.[tbl_RCCP_3_NR_dup] A
LEFT JOIN
Reporting_db_SPKPI.DBO.QRY_Commerical_Subset B ON A.ProductID_clean = B.PRDID_Clean
WHERE
B.PRDID_Clean IS NOT NULL --and A.filename = 'Capacity Planning_INS_Springhill' --DYNAMIC VARIABLE HERE
and Cast(A.SnapshotDate as date) =
(SELECT Max(Cast(SnapshotDate as date)) FROM reporting_db_spkpi.dbo.tbl_RCCP_3_NR)
) T
GO
SET NOCOUNT ON
-- For every product id from the distinct list iterate the following the code
DECLARE #I INT = 1
WHILE #I <= (SELECT MAX(ID) FROM QRY_RCCP_TEMP)
BEGIN
DECLARE #PRODUCT NVARCHAR(50) = (SELECT PRODUCTID FROM QRY_RCCP_TEMP WHERE ID = #I)
DROP TABLE Reporting_db_SPKPI.DBO.[tbl_RCCP_3_NR_temp]
--Retrieve last 12 months of value from NR and add it to a temp table in increasing order of their months. These 12 data points will be baseline
SELECT
Top 12 A.*,
Case When B.[Value] is Null then 0 else CAST(B.[Value] as float) End GROWTH
INTO
Reporting_db_SPKPI.DBO.[tbl_RCCP_3_NR_temp]
FROM
Reporting_db_SPKPI.DBO.[tbl_RCCP_3_NR_dup] A
LEFT JOIN
--using the view here
QRY_Commerical_Subset B ON B.PRDID_Clean = A.ProductID_clean AND B.[YEAR] = YEAR(A.[MONTH])+1
WHERE
A.PRODUCTID= #PRODUCT
AND Cast(A.SnapshotDate AS DATE) = (SELECT Max(Cast(SnapshotDate AS DATE)) FROM reporting_db_spkpi.dbo.[tbl_RCCP_3_NR_dup])
Order by
[Month] desc
-- Generate 3 years of data
DECLARE #J INT = 1
WHILE #J<=3
BEGIN
--Calculate next year's value
UPDATE Reporting_db_SPKPI.DBO.[tbl_RCCP_3_NR_temp]
SET
[Value] = [Value]*(1+ GROWTH),
[MONTH] = DATEADD(YEAR,1,[Month]),
MonthCode= 'F' + CAST(CAST(SUBSTRING(MonthCode,2,LEN(MonthCode)) AS INT) + 12 AS NVARCHAR(10))
--Add it to the NR table.
Insert into Reporting_db_SPKPI.DBO.[tbl_RCCP_3_NR_dup]
(ProductID, MonthCode, Value, Month, FileName,
LastModifiedDate, SnapshotDate, Quarter, IsError, ErrorDescription)
Select
ProductID, MonthCode, Value, Month, FileName,
LastModifiedDate, SnapshotDate, Quarter, IsError, ErrorDescription
from
Reporting_db_SPKPI.DBO.[tbl_RCCP_3_NR_temp]
--Update growth rate for next year
UPDATE Reporting_db_SPKPI.DBO.[tbl_RCCP_3_NR_temp]
SET GROWTH = Case When B.[Value] is Null then 0 else CAST(B.[Value] as float) End
FROM Reporting_db_SPKPI.DBO.QRY_Commerical_Subset B
WHERE B.PRDID_Clean = ProductID_clean AND [YEAR] = YEAR([MONTH])+1
SET #J=#J+1
END
SET #I=#I+1
END
DROP VIEW QRY_RCCP_TEMP
DROP VIEW QRY_Commerical_Subset

The WITH is a Common Table Expression, aka CTE.
And a CTE is like a template for a sub-query.
For example this join of the same sub-query:
SELECT *
FROM (
select distinct bar
from table1
where foo = 'baz'
) AS foo1
JOIN (
select distinct bar
from table1
where foo = 'baz'
) AS foo2
ON foo1.bar > foo2.bar
Can be written as
WITH CTE_FOO AS (
select distinct bar
from table1
where foo = 'baz'
)
SELECT *
FROM CTE_FOO AS foo1
JOIN CTE_FOO AS foo2
ON foo1.bar > foo2.bar
It's meant to be used with a SELECT.
Not with a TRUNCATE TABLE or DROP TABLE.
(It can be used with an UPDATE though)
As such, treat the TRUNCATE as a seperate statement.
TRUNCATE TABLE Reporting_db_SPKPI.DBO.[tbl_RCCP_3_NR_dup];
WITH Commercial_subset AS
(
SELECT DISTINCT
PRDID_Clean, Value, [Year]
FROM
Reporting_db_SPKPI.DBO.[tbl_RCCP_commercial]
WHERE
MEASURE = 'Unit Growth Rate'
)
SELECT *
FROM Commercial_subset;
Btw, the reason why many write a CTE with a leading ; is because the WITH clause raises an error if the previous statement wasn't ended with a ;. It's just a small trick to avoid that error.

Find the specific number of specific value in SQL

I have to show the specific value in a table that I have to get that from user, for example user write 2 and 'toilet paper' than I have to show the second date of toilet paper in my table:
I wrote this but I know I doesn't work:
CREATE PROC BuyCount(#NInput INT,
#TitleInput nvarchar(50))
AS
BEGIN
DECLARE #RecordItemCount INT
SET #RecordItemCount = (SELECT COUNT(FID) FROM Buy_tbl WHERE Bname = #TitleInput)
IF (#Input <= #RecordItemCount)
BEGIN
SELECT BuyDate , #Input
FROM Buy_tbl
WHERE Bname = #TitleInput AND ...
END
ELSE
BEGIN
PRINT 'Out of range'
SELECT BuyDate , #Input
FROM Buy_tbl
WHERE Bname = #TitleInput
END
END
PS: Also I should mention, If the number was out of range it returns the last value of the buy items

Following query should work in both the scenarios
;WITH cte
AS (
SELECT BuyDate
,row_number() OVER (ORDER BY BuyDate) rn
,count(*) OVER () ct
FROM Buy_tbl
WHERE Bname = #TitleInput
)
SELECT TOP 1 BuyDate
,#Input
FROM cte
WHERE rn = #Input
OR rn = ct
ORDER BY BuyDate
Example Demo

Selecting data from table where sum of values in a column equal to the value in another column

Sample data:
create table #temp (id int, qty int, checkvalue int)
insert into #temp values (1,1,3)
insert into #temp values (2,2,3)
insert into #temp values (3,1,3)
insert into #temp values (4,1,3)
According to data above, I would like to show exact number of lines from top to bottom where sum(qty) = checkvalue. Note that checkvalue is same for all the records all the time. Regarding the sample data above, the desired output is:
Id Qty checkValue
1 1 3
2 2 3
Because 1+2=3 and no more data is needed to show. If checkvalue was 4, we would show the third record: Id:3 Qty:1 checkValue:4 as well.
This is the code I am handling this problem. The code is working very well.
declare #checkValue int = (select top 1 checkvalue from #temp);
declare #counter int = 0, #sumValue int = 0;
while #sumValue < #checkValue
begin
set #counter = #counter + 1;
set #sumValue = #sumValue + (
select t.qty from
(
SELECT * FROM (
SELECT
ROW_NUMBER() OVER (ORDER BY id ASC) AS rownumber,
id,qty,checkvalue
FROM #temp
) AS foo
WHERE rownumber = #counter
) t
)
end
declare #sql nvarchar(255) = 'select top '+cast(#counter as varchar(5))+' * from #temp'
EXECUTE sp_executesql #sql, N'#counter int', #counter = #counter;
However, I am not sure if this is the best way to deal with it and wonder if there is a better approach. There are many professionals here and I'd like to hear from them about what they think about my approach and how we can improve it. Any advice would be appreciated!

Try this:
select id, qty, checkvalue from (
select t1.*,
sum(t1.qty) over (partition by t2.id) [sum]
from #temp [t1] join #temp [t2] on t1.id <= t2.id
) a where checkvalue = [sum]
Smart self-join is all you need :)

For SQL Server 2012, and onwards, you can easily achieve this using ROWS BETWEEN in your OVER clause and the use of a CTE:
WITH Running AS(
SELECT *,
SUM(qty) OVER (ORDER BY id
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS RunningQty
FROM #temp t)
SELECT id, qty, checkvalue
FROM Running
WHERE RunningQty <= checkvalue;

One basic improvement is to try & reduce the no. of iterations. You're incrementing by 1, but if you repurpose the logic behind binary searching, you'd get something close to this:
DECLARE #RoughAverage int = 1 -- Some arbitrary value. The closer it is to the real average, the faster things should be.
DECLARE #CheckValue int = (SELECT TOP 1 checkvalue FROM #temp)
DECLARE #Sum int = 0
WHILE 1 = 1 -- Refer to BREAK below.
BEGIN
SELECT TOP (#RoughAverage) #Sum = SUM(qty) OVER(ORDER BY id)
FROM #temp
ORDER BY id
IF #Sum = #CheckValue
BREAK -- Indicating you reached your objective.
ELSE
SET #RoughAverage = #CheckValue - #Sum -- Most likely incomplete like this.
END

For SQL 2008 you can use recursive cte. Top 1 with ties limits result with first combination. Remove it to see all combinations
with cte as (
select
*, rn = row_number() over (order by id)
from
#temp
)
, rcte as (
select
i = id, id, qty, sumV = qty, checkvalue, rn
from
cte
union all
select
a.id, b.id, b.qty, a.sumV + b.qty, a.checkvalue, b.rn
from
rcte a
join cte b on a.rn + 1 = b.rn
where
a.sumV < b.checkvalue
)
select
top 1 with ties id, qty, checkvalue
from (
select
*, needed = max(case when sumV = checkvalue then 1 else 0 end) over (partition by i)
from
rcte
) t
where
needed = 1
order by dense_rank() over (order by i)

Show 0 in count SQL

This is my result :
Year matches
2005 1
2008 2
and this is my expected result:
Year matches
2005 1
2006 0
2007 0
2008 2
This is what I have tried:
SELECT DATEPART(yy,A.match_date) AS [Year], COUNT(A.match_id) AS "matches"
FROM match_record A
INNER JOIN match_record B ON A.match_id = B.match_id
WHERE (score) IS NULL OR (score) = 0
GROUP BY DATEPART(yy,A.match_date);
I want to get zero as count in the years where score have some values(not null and zero, anything greater than 0) . Can someone help me?

This might do what you're looking for:
SELECT DATEPART(yy,A.match_date) AS [Year],
SUM(CASE WHEN score=0 or score is null THEN 1 ELSE 0 END) AS "matches"
FROM match_record A
INNER JOIN match_record B ON A.match_id = B.match_id
GROUP BY DATEPART(yy,A.match_date);
Assuming you have any data in the missing years, this should now produce your expected results.
If, instead, you need 0s for years where you have no data, you'll need to provide the list of years separately (say, via a numbers table) and then LEFT JOIN that source to your existing query.

Consider following is your table
SELECT * INTO #TEMP FROM
(
SELECT 2005 [YEARS],1 [MATCHES]
UNION ALL
SELECT 2008,2
)T
Declare two variables to get min and max date in your table
DECLARE #MINYEAR int;
DECLARE #MAXYEAR int;
SELECT #MINYEAR = MIN(YEARS) FROM #TEMP
SELECT #MAXYEAR = MAX(YEARS) FROM #TEMP
Do the following recursion to get years between the period in your table and LEFT JOIN with your table.
; WITH CTE as
(
select #MINYEAR as yr FROM #TEMP
UNION ALL
SELECT YR + 1
FROM CTE
WHERE yr < #MAXYEAR
)
SELECT DISTINCT C.YR,CASE WHEN T.MATCHES IS NULL THEN 0 ELSE T.MATCHES END MATCHES
FROM CTE C
LEFT JOIN #TEMP T ON C.yr=T.YEARS

DECLARE #t table(Year int, matches int)
DECLARE #i int=2005
WHILE #i <=2008
BEGIN
IF NOT exists (SELECT matches FROM tbl WHERE year=#i)
BEGIN
INSERT INTO #t
SELECT #i,'0'
SET #i=#i+1
END
else
BEGIN
INSERT INTO #t
SELECT year,[matches] from tbl
SET #i=#i+1
END
END
SELECT DISTINCT * FROM #t

how about,
SELECT
[year],
COUNT(*) [matches]
FROM (
SELECT
DATEPART(yy, [A].[match_date]) [year]
FROM
[match_record] [A]
LEFT JOIN
[match_record] [B]
ON [A].[match_id] = [B].[match_id]
WHERE
COALESCE([B].[score], 0) = 0) [Nils]
GROUP BY
[Year];

find circular transactions in database table

I have a table in sql server database in which records of transactions are stored. Table consists of user id of buyer and user id of seller of product. I have to find the circles in the table for example-
I have to get the records of type- A sells to B, B sells to C, C sells to D AND D sells to A.
Please help.

Use following function:
CREATE FUNCTION dbo.CheckIsCircular(#SellerId INT)
RETURNS BIT
AS BEGIN
DECLARE #IsCircular BIT = 0
DECLARE #Sellers TABLE(Id INT)
DECLARE #TempSellers TABLE(Id INT)
DECLARE #Buyers TABLE(Id INT)
INSERT INTO #TempSellers(Id)VALUES(#SellerId)
WHILE EXISTS(SELECT * FROM #TempSellers)BEGIN
IF EXISTS(SELECT *
FROM #Sellers s
INNER JOIN #TempSellers t ON t.Id = s.Id)BEGIN
SET #IsCircular = 1
BREAK;
END
INSERT INTO #Sellers(Id)
SELECT Id FROM #TempSellers
INSERT INTO #Buyers(Id) SELECT BuyerId FROM YourTable
DELETE #TempSellers
INSERT Into #TempSellers(Id)
SELECT YourTable.SellerId
FROM YourTable
INNER JOIN #Buyers ON [#Buyers].Id = YourTable.SellerId
END
RETURN #IsCircular
END

Your problem is a graph traversal challenge; this is not natively supported in TSQL, but you can simulate it.

This is a skeleton how I do it in Teradata, so syntax must be slightly modified for SQL Server:
WITH RECURSIVE cte (..., Path, isCycle) AS
(
SELECT
...
,',' || CAST(seller AS VARCHAR(1000)) || ',' AS path
,0 AS isCycle
FROM tab
UNION ALL
SELECT
...
,cte.Path || cte.buyer || ',',
,case when cte.Path LIKE '%,' || TRIM(tab.buyer) || ',%' then 1 else 0 end
FROM cte, tab
WHERE cte.buyer = tab.seller
AND cte.isCycle <> 1
)
SELECT ...
,Path || Destination
,isCycle
FROM cte
WHERE isCycle = 1
Build a materialized path of the graph while traversing and check if the next buyer is already in this path.

With a recursive cte
declare #trans table (seller int, buyer int)
insert #trans
values (1,2),(2,3),(3,4),(4,1),(1,5),(2,6),(3,5)
begin try
;with cte as
(
select *, convert(varchar(500),'') as route from #trans
union all
select cte.seller, t1.buyer, convert(varchar(500),route + CONVERT(varchar(5),t1.seller)) from cte
inner join #trans t1 on cte.buyer = t1.seller
)
select * from cte
where seller=buyer
option (maxrecursion 50)
end try
begin catch
print 'loops'
end catch

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

SQL Server : find break in dates to show unique rows - sql

Lag() should do it... with CTE as ( select Company_Name, Date_Moved_in, lag(Company_Name) over (order by Date_Moved_In) as PrevComp from #TempTable ) select Company_Name, Date_Moved_In from CTE where PrevComp <> Company_Name or PrevComp is null

Related

Incorrect Syntax near With

Find the specific number of specific value in SQL

Selecting data from table where sum of values in a column equal to the value in another column

Show 0 in count SQL

find circular transactions in database table

Categories

Resources