Grouping file paths in a Microsoft SQL database - sql

I have a table with a list of folders that looks like this:
Path Size
C:\ParentFolder\A 123
C:\ParentFolder\A\B 442434
C:\ParentFolder\A\B\C 13413412
C:\ParentFolder\D 2422341234
C:\ParentFolder\D\E 3342
C:\ParentFolder\D\E\F 2
C:\ParentFolder\D\E\G 2
...
I'm looking for some combination of SUM, GROUP BY, and PATINDEX/LTRIM/SUBSTRING/etc. which would give me back this:
Path SumSize
C:\ParentFolder\A 13855969
C:\ParentFolder\D 2422344580
...
C:\ParentFolder is a known prefix, but A,D,etc. are variable folder names. Do I need to write a function to accomplish that or can I use some combination of string functions?

select r.Path, sum(Size) as SumSize
from MyTable m
inner join (
select Path
from MyTable
where charindex('\', Path, len('C:\ParentFolder\') + 1) = 0
) r on charindex(r.Path, m.Path, 0) = 1
group by r.Path
SQL Fiddle example here

Starting with your testing set,
CREATE TABLE #MyTable (Folder varchar(100) not null, Size bigint not null)
INSERT #MyTable values
('C:\ParentFolder\A' , 123)
,('C:\ParentFolder\A\B' , 442434)
,('C:\ParentFolder\A\B\C' , 13413412)
,('C:\ParentFolder\D' , 2422341234)
,('C:\ParentFolder\D\E' , 3342)
,('C:\ParentFolder\D\E\F' , 2)
,('C:\ParentFolder\D\E\G' , 2)
you'd first determine what folders you want to summarize. I do so here by loading them into a temp table:
DECLARE #Targets table (Folder varchar(100) not null)
INSERT #Targets values
('C:\ParentFolder\A')
,('C:\ParentFolder\D')
From here it's easy, using the like clause:
SELECT ta.Folder, sum(Size) TotalSize
from #Targets ta
left outer join #MyTable mt
on mt.Folder like ta.Folder + '%'
group by ta.Folder
Complications may ensue if your folders contain reserved characters used by the like clause: % _ ] [ and a few others.

Assuming that there is always an entry for the highest level dir (i.e if there is a c:\xxx\yyy\zzz there will always be a c:\xxx\yyy how about
;with roots (root) as (
select distinct
path + '\'
from
thetable
where
--only include paths with 2 x \
len(path) - 2 = len(replace(path, '\', ''))
)
select
roots.root,
sum(thetable.size)
from
roots
inner join
thetable on left(thetable.path + '\', len(roots.root)) = roots.root
group by
roots.root

--If the folder name is always one character
select
LEFT(folder,CHARINDEX('r\',folder)+2) as folder_group
,SUM(size) as sumsize
from #mytable
GROUP BY
LEFT(folder,CHARINDEX('r\',folder)+2)
--If the folder name has a variable length
select
CASE WHEN CHARINDEX('\',folder,CHARINDEX('\',folder,CHARINDEX('\',folder)+1)+1) = 0 THEN folder
ELSE LEFT(folder,CHARINDEX('\',folder,CHARINDEX('\',folder,CHARINDEX('\',folder)+1)+1) -1) END AS folder_group
,SUM(size) as sumsize
from #mytable
GROUP BY
CASE WHEN CHARINDEX('\',folder,CHARINDEX('\',folder,CHARINDEX('\',folder)+1)+1) = 0 THEN folder
ELSE LEFT(folder,CHARINDEX('\',folder,CHARINDEX('\',folder,CHARINDEX('\',folder)+1)+1) -1) END

select path, (select sum(Size)
from Paths p2 where p2.Path like p1.Path+'%') as total
from Paths p1
where charIndex('\',Path, len('C:\ParentFolder\')+1) = 0

Related

Alphanumeric sort on nvarchar(50) column

I am trying to write a query that will return data sorted by an alphanumeric column, Code.
Below is my query:
SELECT *
FROM <<TableName>>
CROSS APPLY (SELECT PATINDEX('[A-Z, a-z][0-9]%', [Code]),
CHARINDEX('', [Code]) ) ca(PatPos, SpacePos)
CROSS APPLY (SELECT CONVERT(INTEGER, CASE WHEN ca.PatPos = 1 THEN
SUBSTRING([Code], 2,ISNULL(NULLIF(ca.SpacePos,0)-2, 8000)) ELSE NULL END),
CASE WHEN ca.PatPos = 1 THEN LEFT([Code],
ISNULL(NULLIF(ca.SpacePos,0)-0,1)) ELSE [Code] END) ca2(OrderBy2, OrderBy1)
WHERE [TypeID] = '1'
OUTPUT:
FFS1
FFS2
...
FFS12
FFS1.1
FFS1.2
...
FFS1.1E
FFS1.1R
...
FFS12.1
FFS12.2
FFS.12.1E
FFS12.1R
FFS12.2E
FFS12.2R
DESIRED OUTPUT:
FFS1
FFS1.1
FFS1.1E
FFS1.1R
....
FFS12
FFS12.1
FFS12.1E
FFS12.1R
What am I missing or overlooking?
EDIT:
Let me try to detail the table contents a little better. There are records for FFS1 - FFS12. Those are broken into X subs, i.e., FFS1.1 - FFS1.X to FFS12.1 - FFS12.X. The E and the R was not a typo, each sub record has two codes associated with it: FFS1.1E & FFS1.1R.
Additionally I tried using ORDER BY but it sorted as
FFS1
...
FFS10
FFS2
This will work for any count of parts separated by dots. The sorting is alphanumerical for each part separately.
DECLARE #YourValues TABLE(ID INT IDENTITY, SomeVal VARCHAR(100));
INSERT INTO #YourValues VALUES
('FFS1')
,('FFS2')
,('FFS12')
,('FFS1.1')
,('FFS1.2')
,('FFS1.1E')
,('FFS1.1R')
,('FFS12.1')
,('FFS12.2')
,('FFS.12.1E')
,('FFS12.1R')
,('FFS12.2E')
,('FFS12.2R');
--The query
WITH Splittable AS
(
SELECT ID
,SomeVal
,CAST(N'<x>' + REPLACE(SomeVal,'.','</x><x>') + N'</x>' AS XML) AS Casted
FROM #YourValues
)
,Parted AS
(
SELECT Splittable.*
,ROW_NUMBER() OVER(ORDER BY (SELECT NULL)) AS PartNmbr
,A.part.value(N'text()[1]','nvarchar(max)') AS Part
FROM Splittable
CROSS APPLY Splittable.Casted.nodes(N'/x') AS A(part)
)
,AddSortCrit AS
(
SELECT ID
,SomeVal
,(SELECT LEFT(x.Part + REPLICATE(' ',10),10) AS [*]
FROM Parted AS x
WHERE x.ID=Parted.ID
ORDER BY PartNmbr
FOR XML PATH('')
) AS SortColumn
FROM Parted
GROUP BY ID,SomeVal
)
SELECT ID
,SomeVal
FROM AddSortCrit
ORDER BY SortColumn;
The result
ID SomeVal
10 FFS.12.1E
1 FFS1
4 FFS1.1
6 FFS1.1E
7 FFS1.1R
5 FFS1.2
3 FFS12
8 FFS12.1
11 FFS12.1R
9 FFS12.2
12 FFS12.2E
13 FFS12.2R
2 FFS2
Some explanation:
The first CTE will transform your codes to XML, which allows to address each part separately.
The second CTE returns each part toegther with a number.
The third CTE re-concatenates your code, but each part is padded to a length of 10 characters.
The final SELECT uses this new single-string-per-row in the ORDER BY.
Final hint:
This design is bad! You should not store these values in concatenated strings... Store them in separate columns and fiddle them together just for the output/presentation layer. Doing so avoids this rather ugly fiddle...

Replace Matching Row String From Another Column

I have this table:
ID NewName OldName Link
1 NewName1 OldName1 OldName2|OldName3
2 NewName2 OldName2 OldName1|OldName3
3 NewName3 OldName3 OldName1|OldName2
What I want to happen is to change all the OldName on the Link column to the NewName. Like this:
ID NewName OldName Link
1 NewName1 OldName1 NewName2|NewName3
2 NewName2 OldName2 NewName1|NewName3
3 NewName3 OldName3 NewName1|NewName2
Can anyone suggest what's the best way to do this?
You are looking to Change the value of Link according OldNames to its with New Names:
First you will need to split your Link data delimited by |into row & then Join with Your Table
SELECT TTT.ID,
TTT.[NewName],
TTT.OldName,
[Link] = STUFF(
(
SELECT
'|'+[Link]
FROM
(
SELECT AA.ID,
AA.[NewName],
AA.OldName,
T.[NewName] [Link]
FROM
(
SELECT ID,
NewName,
OldName,
split.x.value('.', 'NVARCHAR(MAX)') DATA
FROM
(
SELECT ID,
NewName,
OldName,
CAST('<M>'+REPLACE(Link, '|', '</M><M>')+'</M>' AS XML) AS String
FROM <table_name>
) AS a
CROSS APPLY String.nodes('/M') AS split(x)
) AA
INNER JOIN <table_name> T ON T.OldName = AA.DATA
) TT
WHERE TT.ID = TTT.ID FOR XML PATH('')
), 1, 1, '')
FROM <table_name> TTT;
Result :
ID NewName OldName Link
1 NewName1 OldName1 NewName2|NewName3
2 NewName2 OldName2 NewName1|NewName3
3 NewName3 OldName3 NewName1|NewName2
MSSQL:
To store delimited-values from other column/other sources to a specific row, you may use FOR XML PATH, look at this SO thread:
UPDATE YourTable
SET Link = SUBSTRING(
(
SELECT '|' + T2.NewName
FROM YourTable T2
WHERE '|'+T2.Link+'|' LIKE '%|'+YourTable.OldName+'|%'
FOR XML PATH ('')
), 2, 1000);
MYSQL:
If you want to store delimited-values from other column/other sources to a specific row, you may use mysql GROUP_CONCAT function:
UPDATE table t1
SET Link = (
SELECT GROUP_CONCAT(t2.NewName SEPARATOR '|')
FROM table t2 WHERE FIND_IN_SET(t2.OldName, REPLACE(t1.Link, '|', ','))
)
I assumed that you want to replace any old values in the Link column with its new value.
See the results in action on dbfiddle.uk
If the Link column have only two names always, then we try self joining twice to match the respective new names which should be used for replacement. The join condition is ugly, but this is the price paid for storing denormalized data in your table.
WITH cte AS
(
SELECT t1.Link, t2.NewName AS NewNameLeft, t3.NewName AS NewNameRight
FROM yourTable t1
LEFT JOIN yourTable t2
ON SUBSTRING(t1.Link, 1, CHARINDEX('|', t1.Link) - 1) = t2.OldName
LEFT JOIN yourTable t3
ON SUBSTRING(t1.Link,
CHARINDEX('|', t1.Link) + 1,
LEN(t1.Link) - CHARINDEX('|', t1.Link)) = t3.OldName
)
UPDATE cte
SET Link = NewNameLeft + '|' + NewNameRight
WHERE NewNameLeft IS NOT NULL AND NewNameRight IS NOT NULL;
Note that this answer assumes that each old name appears only once in the table. I default to not doing an update unless both left and right new names are found.
I guess you just need REPLACE
select id, NewName, OldName, replace(link, 'OldName', 'NewName') Link
from your_data
and if you need to do it directly in table then use
update your_data
set link = replace(link, 'OldName', 'NewName')

How to merge two columns from CASE STATEMENT of DIFFERENT CONDITION

My expected result should be like
----invoiceNo----
T17080003,INV14080011
But right now, I've come up with following query.
SELECT AccountDoc.jobCode,AccountDoc.shipmentSyskey,AccountDoc.docType,
CASE AccountDoc.docType
WHEN 'M' THEN
JobInvoice.invoiceNo
WHEN 'I' THEN
(STUFF((SELECT ', ' + RTRIM(CAST(AccountDoc.docNo AS VARCHAR(20)))
FROM AccountDoc LEFT OUTER JOIN JobInvoice
ON AccountDoc.principalCode = JobInvoice.principalCode AND
AccountDoc.jobCode = JobInvoice.jobCode
WHERE (AccountDoc.isCancelledByCN = 0)
AND (AccountDoc.docType = 'I')
AND (AccountDoc.jobCode = #jobCode)
AND (AccountDoc.shipmentSyskey = #shipmentSyskey)
AND (AccountDoc.principalCode = #principalCode) FOR XML
PATH(''), TYPE).value('.','NVARCHAR(MAX)'),1,2,' '))
END AS invoiceNo
FROM AccountDoc LEFT OUTER JOIN JobInvoice
ON JobInvoice.principalCode = AccountDoc.principalCode AND
JobInvoice.jobCode = AccountDoc.jobCode
WHERE (AccountDoc.jobCode = #jobCode)
AND (AccountDoc.isCancelledByCN = 0)
AND (AccountDoc.shipmentSyskey = #shipmentSyskey)
AND (AccountDoc.principalCode = #principalCode)
OUTPUT:
----invoiceNo----
T17080003
INV14080011
Explanation:
I want to select docNo from table AccountDoc if AccountDoc.docType = I.
Or select invoiceNo from table JobInvoice if AccountDoc.docType = M.
The problem is what if under same jobCode there have 2 docType which are M and I, how I gonna display these 2 invoices?
You can achieve this by using CTE and FOR XML. below is the sample code i created using similar tables you have -
Create table #AccountDoc (
id int ,
docType char(1),
docNo varchar(10)
)
Create table #JobInvoice (
id int ,
invoiceNo varchar(10)
)
insert into #AccountDoc
select 1 , 'M' ,'M1234'
union all select 2 , 'M' ,'M2345'
union all select 3 , 'M' ,'M3456'
union all select 4 , 'I' ,'I1234'
union all select 5 , 'I' ,'I2345'
union all select 6 , 'I' ,'I3456'
insert into #JobInvoice
select 1 , 'INV1234'
union all select 2 , 'INV2345'
union all select 3 , 'INV3456'
select *
from #AccountDoc t1 left join #JobInvoice t2
on t1.id = t2.id
with cte as
(
select isnull( case t1.docType WHEN 'M' THEN t2.invoiceNo WHEN 'I' then
t1.docNo end ,'') invoiceNo
from #AccountDoc t1 left join #JobInvoice t2
on t1.id = t2.id )
select invoiceNo + ',' from cte For XML PATH ('')
You need to pivot your data if you have situations where there are two rows, and you want two columns. Your sql is a bit messy, particularly the bit where you put an entire select statement inside a case when in the select part of another query. These two queries are virtually the same, you should look for a more optimal way of writing them. However, you can wrap your entire sql in the following:
select
Jobcode, shipmentsyskey, [M],[I]
from(
--YOUR ENTIRE SQL GOES HERE BETWEEN THESE BRACKETS. Do not alter anything else, just paste your entire sql here
) yoursql
pivot(
max(invoiceno)
for docType in([M],[I])
)pvt

Add rows based on a column value

I am having issues with creating additional rows based on a column value.
If my PageCount = 3 then I would need to have 2 additional rows where PONo is repeated but the ImagePath is incremented by 1 for each new row.
I am able to get the first row but, creating the additional rows with the ImagePath incremented by 1 is where I am stuck.
My result:
Expected result:
Finished: finished values
Current Select statement:
SELECT PO, CASE WHEN LEFT(u.Path,3)= 'M:\' THEN '\\ServerName\'+RIGHT(u.Path,LEN(u.Path)-3) ELSE u.Path END AS [Imagepath],PAGECOUNT
FROM OPENQUERY([LinkedServer],'select * from data.vw_purchasing_docs_unc') AS u INNER JOIN
OPENQUERY([LinkedServer],'select * from data.purchasing_docs') AS d ON u.docid=d.docid
WHERE (CONVERT(VARCHAR(10),d.STATUS_DATE,120)=CONVERT(VARCHAR(10),GETDATE(),120))
batch file:
bcp "select d.DOCID,DOC_TYPE,PO,d.STATUS, CASE WHEN LEFT(Path,3)= 'M:\' THEN '\\ServerName'+RIGHT(DWPath,LEN(Path)-3) ELSE Path END AS ImagePath, STATUS_DATE,'No' AS dwimport from openquery([LinkedServer],'select * from data.vw_purchasing_docs_unc') as u INNER JOIN openquery([LinkedServer],'select * from dwdata.purchasing_docs') as d ON u.docid=d.docid WHERE (CONVERT(varchar(10),STATUS_DATE,120)=CONVERT(varchar(10),GETDATE(),120)) AND d.STATUS IN ('FILED - Processing Complete','FILED - Partial Payment','FILED - Confirming') AND DOC_TYPE IN ('CO = Change Order','Purchase Order','CP = Capital Projects','Change Order','PO = Purchase Order','PO','PR = General Operating')" queryout "E:\Data\PO Trigger CSV\PO_Trigger_Doc.csv" -r \n -T -c -t"," -Umv -Smtvwrtst -Pm -q -k
Select ponumber,b.rplc,pagecount
from table t
cross apply
(select replace(imagepath,'f'+cast(n-1) as varchar(100),'f0') as rplc from numbers n where n<=t.pagecount)b
To create numbers table,if you are wondering why you need it.Look here
CREATE TABLE Number (N INT IDENTITY(1,1) PRIMARY KEY NOT NULL);
GO
INSERT INTO Number DEFAULT VALUES;
GO 10000
Using your select statement after update:
;With cte(ponumber,imagepath,pagecount)
as
SELECT PO, CASE WHEN LEFT(u.Path,3)= 'M:\' THEN '\\ServerName\'+RIGHT(u.Path,LEN(u.Path)-3) ELSE u.Path END AS [Imagepath],PAGECOUNT
FROM OPENQUERY([LinkedServer],'select * from data.vw_purchasing_docs_unc') AS u INNER JOIN
OPENQUERY([LinkedServer],'select * from data.purchasing_docs') AS d ON u.docid=d.docid
WHERE (CONVERT(VARCHAR(10),d.STATUS_DATE,120)=CONVERT(VARCHAR(10),GETDATE(),120))
)
select Ponumber,b.rplc,pagecount from cte c
cross apply
(select replace(imagepath,'f'+cast((n-1) as varchar(100)),'f0') as rplc from numbers n where n<=c.pagecount)b
If you would like to avoid additional table, you can use CTE:
WITH Images AS
(
SELECT * FROM (VALUES
('C:\Folder', 2),
('D:\Folder', 3)) T(ImagePath, Val)
), Numbers AS
(
SELECT * FROM (VALUES (1),(2),(3),(4)) T(N)
UNION ALL
SELECT N1.N*4+T.N N FROM (VALUES(1),(2),(3),(4)) T(N) CROSS JOIN Numbers N1
WHERE N1.N*4+T.N<=100
)
SELECT ImagePath + '\f' + CONVERT(nvarchar(10) ,ROW_NUMBER() OVER (PARTITION BY ImagePath ORDER BY (SELECT 1))) NewPath
FROM Images
CROSS APPLY (SELECT TOP(Val) * FROM Numbers) T(N)
Images is your source table. It can be anything, i.e. OPENQUERY. It produces:
NewPath
-------
C:\Folder\f1
C:\Folder\f2
D:\Folder\f1
D:\Folder\f2
D:\Folder\f3

SQL query for finding first missing sequence string (prefix+no)

T-SQL query for finding first missing sequence string (prefix+no)
Sequence can have a prefix + a continuing no.
ex sequence will be
ID
-------
AUTO_500
AUTO_501
AUTO_502
AUTO_504
AUTO_505
AUTO_506
AUTO_507
AUTO_508
So above the missing sequence is AUTO_503 or if there is no missing sequence then it must return next sequence.
Also starting no is to specified ex. 500 in this case and prefix can be null i.e. no prefix only numbers as sequence.
You could LEFT JOIN the id numbers on shifted(+1) values to find gaps in sequential order:
SELECT
MIN(a.offsetnum) AS first_missing_num
FROM
(
SELECT 500 AS offsetnum
UNION
SELECT CAST(REPLACE(id, 'AUTO_', '') AS INT) + 1
FROM tbl
) a
LEFT JOIN
(SELECT CAST(REPLACE(id, 'AUTO_', '') AS INT) AS idnum FROM tbl) b ON a.offsetnum = b.idnum
WHERE
a.offsetnum >= 500 AND b.idnum IS NULL
SQLFiddle Demo
Using a recursive CTE to dynamically generate the sequence between the min and max of the ID Numbers maybe over complicated things a bit but it seems to work -
LIVE ON FIDDLE
CREATE TABLE tbl (
id VARCHAR(55)
);
INSERT INTO tbl VALUES
('AUTO_500'),
('AUTO_501'),
('AUTO_502'),
('AUTO_504'),
('AUTO_505'),
('AUTO_506'),
('AUTO_507'),
('AUTO_508'),
('509');
;WITH
data_cte(id)AS
(SELECT [id] = CAST(REPLACE(id, 'AUTO_', '') AS INT) FROM tbl)
,maxmin_cte(minId, maxId)AS
(SELECT [minId] = min(id),[maxId] = max(id) FROM data_cte)
,recursive_cte(n) AS
(
SELECT [minId] n from maxmin_cte
UNION ALL
SELECT (1 + n) n FROM recursive_cte WHERE n < (SELECT [maxId] from maxmin_cte)
)
SELECT x.n
FROM
recursive_cte x
LEFT OUTER JOIN data_cte y ON
x.n = y.id
WHERE y.id IS NULL
Check this solution.Here you just need to add identity column.
CREATE TABLE tbl (
id VARCHAR(55),
idn int identity(0,1)
);
INSERT INTO tbl VALUES
('AUTO_500'),
('AUTO_501'),
('AUTO_502'),
('AUTO_504'),
('AUTO_505'),
('AUTO_506'),
('AUTO_507'),
('AUTO_508'),
('509');
SELECT min(idn+500) FROM tbl where 'AUTO_'+cast((idn+500) as varchar)<>id
try this:
with cte as(
select cast(REPLACE(id,'AUTO_','') as int)-500+1 [diff],ROW_NUMBER()
over(order by cast(REPLACE(id,'AUTO_','') as int)) [rnk] from tbl)
select top 1 'AUTO_'+cast(500+rnk as varchar(50)) [ID] from cte
where [diff]=[rnk]
order by rnk desc
SQL FIddle Demo
Had a similar situation, where we have R_Cds that were like this R01005
;with Active_R_CD (R_CD)
As
(
Select Distinct Cast(Replace(R_CD,'R', ' ') as Int)
from table
where stat = 1)
select Arc.R_CD + 1 as 'Gaps in R Code'
from Active_R_CD as Arc
left outer join Active_R_CD as r on ARC.R_CD + 1 = R.R_CD
where R.R_CD is null
order by 1