Compare contents of a text file based on line number VB2008 - vba

Let's say I have two text file, I will compare (based on the line numbering of text ->see below) because this is where the unique key is generated.
sample1.txt:
5th line -> _000_000F_01CE2577.B840E640
sample2.txt
5th line -> _000_000F_01CE2577.B840E640
Now here is my code:
Dim FILE_NAME As String = "C:\myfiles"
'This is to determine the number of lines in the text file
Dim count As Integer
count = 0
Dim obj As StreamReader
obj = New StreamReader(FILE_NAME)
Do Until obj.ReadLine Is Nothing
count = count + 1
Loop
'------------------------------
'this is my computation to get the number of line -->disregard this
Dim temp3 As Integer
temp3 = count - 3
'------------------------------
obj.Close()
'This is to read all the text in the text file
Dim fileReader(fs) As String
fileReader(fs) = My.Computer.FileSystem.ReadAllText(FILE_NAME, _
System.Text.Encoding.ASCII)
I have stored each file in an array
Example:
file[0]
file[1]
Then I have to read each file and its contents, now how will i compare the line of text to each other. i believe i have to use regex.
Please give me some pointers on how to compare the line of text...
e.g. 5th line in sample1.txt == 5th line of sample2.txt
I have to know if they are the same.

this should do the job for you
it will read each line in txt file , save it to an array then compare
note: set paths do your 2 txt files
it will go out of bounds if there is less lines in file 2 than file 1. You can add a little bit of code to handle that case though.
Option Explicit
Sub Read_text_File()
Dim firstFile() As String, secondFile() As String
Dim path1 As String, path2 As String
Dim i As Long
path1 = "C:\ ... .txt"
path2 = "C:\ ... .txt"
Call fill_array(firstFile, path1)
Call fill_array(secondFile, path2)
For i = LBound(firstFile) To UBound(firstFile) - 1
Debug.Print (firstFile(i) & vbTab & vbTab & vbTab & vbTab & secondFile(i))
If StrComp(firstFile(i), secondFile(i), vbTextCompare) = 0 Then
MsgBox "Line: " & i + 1 & " matches "
End If
Next i
End Sub
Sub fill_array(ByRef arr() As String, pathToFile As String)
Dim oFSO As New FileSystemObject
Dim oFS As TextStream
Dim cnt As Long
cnt = 0
Set oFS = oFSO.OpenTextFile(pathToFile)
Do Until oFS.AtEndOfStream
oFS.ReadLine
cnt = cnt + 1
Loop
ReDim arr(cnt)
Set oFS = oFSO.OpenTextFile(pathToFile)
cnt = 0
Do Until oFS.AtEndOfStream
arr(cnt) = oFS.ReadLine
cnt = cnt + 1
Loop
oFS.Close
Set oFS = Nothing
End Sub

Related

Itext 7 IPdfTextLocation.GetPageNumber turning up 0 all the time

I'm writing a (VB/Net) procedure that searches a PDF document for a Regex pattern and writes a text file with the matched locations:
Public Sub ReadAndMatch(ByVal InputFileName As String, OutputFileName As String, RegexPattern As String)
Dim pdfIn As New iText.Kernel.Pdf.PdfReader(InputFileName) ' A Pdfreader object associated with the input file name
Dim pdfDoc As New iText.Kernel.Pdf.PdfDocument(pdfIn) 'This object holds the actual document being analyzed
Dim strategy As New iText.Kernel.Pdf.Canvas.Parser.Listener.RegexBasedLocationExtractionStrategy(RegexPattern) 'extraction strategy
Dim Parser As iText.Kernel.Pdf.Canvas.Parser.PdfCanvasProcessor = New Kernel.Pdf.Canvas.Parser.PdfCanvasProcessor(strategy)
Dim Loclist As System.Collections.ICollection 'all the matches
Dim Location As iText.Kernel.Pdf.Canvas.Parser.Listener.IPdfTextLocation 'one match
Dim CoordFile As New System.IO.StreamWriter(OutputFileName) 'initiate output stream
Dim TextString As String
Dim L, B, W, H As Single ' Left, bottom, width & height of the rectangle containing the extracted text
Dim pg As Integer = 0 'number of current page, number of matches in page, total number of matches
Do While pg < pdfDoc.GetNumberOfPages 'loop thru document pages
pg += 1
Parser.ProcessPageContent(pdfDoc.GetPage(pg)) 'parse page
Loop
Loclist = strategy.GetResultantLocations
If Loclist.Count = 0 Then Exit Sub
For Each Location In Loclist
L = Location.GetRectangle.GetLeft
B = Location.GetRectangle.GetBottom
W = Location.GetRectangle.GetWidth
H = Location.GetRectangle.GetHeight
TextString = Location.GetText
pg = Location.GetPageNumber
CoordFile.WriteLine(TextString & Chr(9) & L & Chr(9) & B & Chr(9) & W & Chr(9) & H & Chr(9) & pg & Chr(9) & InputFileName)
Next Location
'Finished processing
pdfDoc.Close() 'close pdf
CoordFile.Close() 'close output file
End Sub
I am getting the rectangle coordinates OK, and the matched text string, but location.GetPageNumber is always 0
What am I doing wrong?
Here is a workaround I used.
I re-initiate the strategy object and the parser object every page, and also output the locations matched by the Regex pattern per page.
That way I can use the pg counter of my own While loop instead of the one [not] given by the .PageNumber method:
Private Sub Main() 'the main processing routine
'Sub assumes that the PDF document (pdfDoc) and the output file (CoordFile) --both module level object-- are open and available
Dim Location As iText.Kernel.Pdf.Canvas.Parser.Listener.IPdfTextLocation
Dim TextString As String
Dim L, B, W, H As Single ' Left, bottom, width & height of the rectangle containing the extracted text
Dim pg As Integer = 0 'number of current page, number of matches
Dim N As Integer = 0
Do While pg < pdfDoc.GetNumberOfPages 'loop thru document pages
pg += 1
Dim strategy As New iText.Kernel.Pdf.Canvas.Parser.Listener.RegexBasedLocationExtractionStrategy(RegexPattern) 'extraction strategy (RegexPattern is a module variable)
Dim Parser As iText.Kernel.Pdf.Canvas.Parser.PdfCanvasProcessor = New Kernel.Pdf.Canvas.Parser.PdfCanvasProcessor(strategy)
Parser.ProcessPageContent(pdfDoc.GetPage(pg)) 'parse page
If strategy.GetResultantLocations.Count > 0 Then
For Each Location In strategy.GetResultantLocations
TextString = Location.GetText
L = Location.GetRectangle.GetLeft
B = Location.GetRectangle.GetBottom
W = Location.GetRectangle.GetWidth
H = Location.GetRectangle.GetHeight
CoordFile.WriteLine(TextString & Chr(9) & L & Chr(9) & B & Chr(9) & W & Chr(9) & H & Chr(9) & pg)
N += 1
Next Location
End If
strategy.GetResultantLocations.Clear() 'dispose loclist
Loop
'Finished processing
pdfDoc.Close()
CoordFile.Close() 'close output file
End Sub
... not elegant but it works...

How to read binary content of an embeded word object

I have an embedded OLE object in word as "InlineShape". I would like to access this object as a data stream / string. at the moment, I can see some ideas for Excel via OLEObject, but it seems that there is no solution for Word that I can see.
The following code achieves what I want:
' from here: https://stackoverflow.com/questions/1356118/vba-ws-toolkit-how-to-get-current-file-as-byte-array
Public Function GetFileBytes(ByVal path As String) As Byte()
Dim lngFileNum As Long
Dim bytRtnVal() As Byte
lngFileNum = FreeFile
If LenB(Dir(path)) Then ''// Does file exist?
Open path For Binary Access Read As lngFileNum
ReDim bytRtnVal(LOF(lngFileNum) - 1&) As Byte
Get lngFileNum, , bytRtnVal
Close lngFileNum
Else
Err.Raise 53
End If
GetFileBytes = bytRtnVal
Erase bytRtnVal
End Function
Sub TestMe()
Dim shapeIndex As Integer: shapeIndex = 1
Dim ns As Object
Dim folderItem
Const namePrefix = "site-visit-v2.5"
Const nameSuffix = ".dat"
Dim fileBytes() As Byte
Dim tempDir As String: tempDir = Environ("TEMP")
' first embedded Item - you may need adjust if you have more shapes
ActiveDocument.InlineShapes.Item(shapeIndex).Range.Copy
' paste it to temp dir
Set ns = CreateObject("Shell.Application").namespace((tempDir))
ns.Self.InvokeVerb ("Paste")
' find the file now
Dim Item As Object
Dim rightItem As Object
Set rightItem = Nothing
' find the file that was pasted
' because when files are pasted and name exists, you could get a name such as "site-visit-v2.5 (10).dat"
' we pick the most recent that matches
For Each Item In ns.Items
If Item.Name Like namePrefix & "*" & nameSuffix Then
If rightItem Is Nothing Then
Set rightItem = Item
Else
If Item.modifyDate > rightItem.modifyDate Then 'a more recent date is found
Set rightItem = Item
End If
End If
End If
Next
fileBytes = GetFileBytes(tempDir & "\" & rightItem.Name)
MsgBox "Read " & UBound(fileBytes) + 1 & " bytes"
End Sub

Writing a string to a new .csv in VB.net

I am trying to write a string to a .csv file, but unable to get it to display.
I have managed to do it in VBA, but when writing in VB.net it's not working.
I first create the file and set the headers for each column. After this I am getting information on each required attribute and writing it to a string s.
All i want to do now is write the string to the .csv file so that each attribute is in the right column under the right header.
Each time the string s simply needs to be on a new row.
This is what I have so far (I have cut out some bits of code so some syntax may look incorrect). What am i doing wrong or missing?
Sub Main()
Dim sOutput As String
' Create a header for the output file
sOutput = ("Level,Occurrence Name,Reference Name,Object type, Visibility, Path" & vbLf)
If Occs.Count > 0 Then
For i = 1 To Occs.Count
iLevel = 0
curOcc = Occs.Item(i)
GetOccurrenceData(curOcc, sOutput, oSel, False, iLevel)
Next
End If
' Write the output string to a file
Dim sPath As String
Dim bWrite As Boolean
sPath = ("C:\temp\data3.csv")
bWrite = WriteFile(sPath, sOutput)
End Sub
Sub GetOccurrenceData(curOcc As VPMOccurrence, s As String, sel As Selection, ByVal bParentHidden As Boolean, ByVal iParentLevel As Integer)
'CODE TO GET DATA REMOVED AS IRRELEVANT
' Append the output string with the data for the current occurrence.
s = (s & curLevel & "," & sName & "," & sRefName & "," & sType & "," & sVisibility & vbLf)
' Repeat this data gathering procedure on any children the current occurrence may have.
Occs = curOcc.Occurrences
If Occs.Count > 0 Then
For i = 1 To Occs.Count
GetOccurrenceData(Occs.Item(i), s, sel, bChildrenInheritNoShow, curLevel)
Next
End If
In GetOccurrenceData you pass in a string and change it in the method, but you did not pass it in as a ByRef so anything done to the string in the method stays in the method.
Change the header of your method to read
Sub GetOccurrenceData(curOcc As VPMOccurrence,ByRef s As String, sel As Selection, ByVal bParentHidden As Boolean, ByVal iParentLevel As Integer)
I would however recommend using a StringBuilder to accomplish what you are doing.
Like This:
Sub Main()
Dim sb As New Text.StringBuilder()
sb.AppendLine("Level,Occurrence Name,Reference Name,Object type, Visibility, Path")
If Occs.Count > 0 Then
For i = 1 To Occs.Count
iLevel = 0
curOcc = Occs.Item(i)
GetOccurrenceData(curOcc, sb, oSel, False, iLevel)
Next
End If
' Write the output string to a file
Dim sPath As String
Dim bWrite As Boolean
sPath = ("C:\temp\data3.csv")
bWrite = WriteFile(sPath, sb.ToString())
End Sub
Sub GetOccurrenceData(curOcc As VPMOccurrence, sb As Text.StringBuilder, sel As Selection, ByVal bParentHidden As Boolean, ByVal iParentLevel As Integer)
'CODE TO GET DATA REMOVED AS IRRELEVANT
' Append the output string with the data for the current occurrence.
sb.Append(curLevel).Append(",").Append(sName).Append(",").Append(sRefName).Append(",").Append(sType).Append(",").AppendLine(sVisibility)
' Repeat this data gathering procedure on any children the current occurrence may have.
Occs = curOcc.Occurrences
If Occs.Count > 0 Then
For i = 1 To Occs.Count
GetOccurrenceData(Occs.Item(i), sb, sel, bChildrenInheritNoShow, curLevel)
Next
End If
End Sub

VB.Net read file content

I'm trying to make an simple program to message box the executable that runs in "autorun.inf"
example:
[Autorun]
open=setup.exe
icon=setup.exe,0
how do i define the line which says
even if it's not the second line, it could be the 5th, i mean after some text..
open=setup.exe
and trim the file name and it's extension?
Dim FilePath As String = "E:\run.inf"
Dim INF As String = My.Computer.FileSystem.ReadAllText(FilePath)
INF = INF.ToLower
Dim StartIndex As Integer = INF.IndexOf("open=") + 5
Dim EndIndex As Integer = INF.IndexOf(vbCrLf, StartIndex)
Dim ExecutableName As String = ""
If StartIndex = 4 Then
MsgBox("the executable file name could not found at file :" & vbCrLf & FilePath)
Else
If EndIndex = -1 Then
ExecutableName = INF.Substring(StartIndex)
Else
ExecutableName = INF.Substring(StartIndex, EndIndex - StartIndex)
End If
MsgBox("The Executable Name is " & Chr(34) & ExecutableName & Chr(34))
End If
first we read all the text of the INF file and change its case to lower so that we can search through it for lower "open=".
and then we define the start index and the end index of the executable name as the start after string "open=" and the end before the newline vbCrlf.
and finally get Executable Name and show it in a MsgBox.

VBA to load very large file in one go (no buffering)

I am experiencing an unexpected vb limitation on the string max size, as explained in this post:
VBA unexpected reach of string size limit
While I was expecting to be able to load files up to 2GB (2^31 char) using open path for binary and get function, I get an out of string space error when I try to load a string larger than 255,918,061 characters.
I managed to work around this issue buffering the input stream of get. The problem is that I need to load the file as an array of string by splitting the buffer on vbCrLf characters.
This requires then to build the array line by line. Moreover, since I cannot be sure whether the buffer is ending on a break line or not I need additional operations. This solution is Time and Memory consuming. Loading a file of 300MB with this code costs 900MB (!) use of memory by excel. Is there a better solution ?
Here bellow is my code:
Function Load_File(path As String) As Variant
Dim MyData As String, FNum As Integer
Dim LenRemainingBytes As Long
Dim BufferSizeCurrent As Long
Dim FileByLines() As String
Dim CuttedLine As Boolean
Dim tmpSplit() As String
Dim FinalSplit() As String
Dim NbOfLines As Long
Dim LastLine As String
Dim count As Long, i As Long
Const BufferSizeMax As Long = 100000
FNum = FreeFile()
Open path For Binary As #FNum
LenRemainingBytes = LOF(FNum)
NbOfLines = FileNbOfLines(path)
ReDim FinalSplit(NbOfLines)
CuttedLine = False
Do While LenRemainingBytes > 0
MyData = ""
If LenRemainingBytes > BufferSizeMax Then
BufferSizeCurrent = BufferSizeMax
Else
BufferSizeCurrent = LenRemainingBytes
End If
MyData = Space$(BufferSizeCurrent)
Get #FNum, , MyData
tmpSplit = Split(MyData, vbCrLf)
If CuttedLine Then
count = count - 1
tmpSplit(0) = LastLine & tmpSplit(0)
For i = 0 To UBound(tmpSplit)
If count > NbOfLines Then Exit For
FinalSplit(count) = tmpSplit(i)
count = count + 1
Next i
Else
For i = 0 To UBound(tmpSplit)
If count > NbOfLines Then Exit For
FinalSplit(count) = tmpSplit(i)
count = count + 1
Next i
End If
Erase tmpSplit
LastLine = Right(MyData, Len(MyData) - InStrRev(MyData, vbCrLf) - 1)
CuttedLine = Len(LastLine) > 1
LenRemainingBytes = LenRemainingBytes - BufferSizeCurrent
Loop
Close FNum
Load_File = FinalSplit
Erase FinalSplit
End Function
Where the function FileNbOfLines is efficiently returning the number of line break characters.
Edit:
My Needs are:
To look for a specific string within the file
To get a specific number of lines coming after this string
Here you go, not pretty but should give you the general concept:
Sub GetLines()
Const fileName As String = "C:\Users\bloggsj\desktop\testfile.txt"
Const wordToFind As String = "FindMe"
Dim lineStart As String
Dim lineCount As String
Dim linesAfterWord As Long
With CreateObject("WScript.Shell")
lineCount = .Exec("CMD /C FIND /V /C """" """ & fileName & """").StdOut.ReadAll
lineStart = Split(.Exec("CMD /C FIND /N """ & wordToFind & """ """ & fileName & """").StdOut.ReadAll, vbCrLf)(2)
End With
linesAfterWord = CLng(Trim(Mid(lineCount, InStrRev(lineCount, ":") + 1))) - CLng(Trim(Mid(lineStart, 2, InStr(lineStart, "]") - 2)))
Debug.Print linesAfterWord
End Sub
Uses CMD to count the number of lines, then find the line at which the word appears, then subtract one from the other to give you the amount of lines after the word has been found.
Answer: Yes, using ReadAll from FSO should do the job.
Best answer: Just avoid it !
My needs were:
Identify a specific string within the file
Extract a certain number of lines after this string
As far as you precisely know the exact amout of data you want to extract, and assuming this amount of data is below vba string size limit (!), here is what it does the job the faster.
Decrease of computation time is improved using binary comparison of strings. My code is as follows:
Function GetFileLines(path As String, str As String, NbOfLines As Long) As String()
Const BUFSIZE As Long = 100000
Dim StringFound As Boolean
Dim lfAnsi As String
Dim strAnsi As String
Dim F As Integer
Dim BytesLeft As Long
Dim Buffer() As Byte
Dim strBuffer As String
Dim BufferOverlap As String
Dim PrevPos As Long
Dim NextPos As Long
Dim LineCount As Long
Dim data As String
F = FreeFile(0)
strAnsi = StrConv(str, vbFromUnicode) 'Looked String
lfAnsi = StrConv(vbLf, vbFromUnicode) 'LineBreak character
Open path For Binary Access Read As #F
BytesLeft = LOF(F)
ReDim Buffer(BUFSIZE - 1)
'Overlapping buffer is 3/2 times the size of strBuffer
'(two bytes per character)
BufferOverlap = Space$(Int(3 * BUFSIZE / 4))
StringFound = False
Do Until BytesLeft = 0
If BytesLeft < BUFSIZE Then ReDim Buffer(BytesLeft - 1)
Get #F, , Buffer
strBuffer = Buffer 'Binary copy of bytes.
BytesLeft = BytesLeft - LenB(strBuffer)
Mid$(BufferOverlap, Int(BUFSIZE / 4) + 1) = strBuffer 'Overlapping Buffer
If Not StringFound Then 'Looking for the the string
PrevPos = InStrB(BufferOverlap, strAnsi) 'Position of the looked string within the buffer
StringFound = PrevPos <> 0
If StringFound Then strBuffer = BufferOverlap
End If
If StringFound Then 'When string is found, loop until NbOfLines
Do Until LineCount = NbOfLines
NextPos = InStrB(PrevPos, strBuffer, lfAnsi)
If NextPos = 0 And LineCount < NbOfLines Then 'Buffer end reached, NbOfLines not reached
'Adding end of buffer to data
data = data & Mid$(StrConv(strBuffer, vbUnicode), PrevPos)
PrevPos = 1
Exit Do
Else
'Adding New Line to data
data = data & Mid$(StrConv(strBuffer, vbUnicode), PrevPos, NextPos - PrevPos + 1)
End If
PrevPos = NextPos + 1
LineCount = LineCount + 1
If LineCount = NbOfLines Then Exit Do
Loop
End If
If LineCount = NbOfLines then Exit Do
Mid$(BufferOverlap, 1, Int(BUFSIZE / 4)) = Mid$(strBuffer, Int(BUFSIZE / 4))
Loop
Close F
GetFileLines = Split(data, vbCrLf)
End Function
To crunch even more computation time, it is highly advised to use fast string concatenation as explained here.
For instance the following function can be used:
Sub FastConcat(ByRef Dest As String, ByVal Source As String, ByRef ccOffset)
Dim L As Long, Buffer As Long
Buffer = 50000
L = Len(Source)
If (ccOffset + L) >= Len(Dest) Then
If L > Buffer Then
Dest = Dest & Space$(L)
Else
Dest = Dest & Space$(Buffer)
End If
End If
Mid$(Dest, ccOffset + 1, L) = Source
ccOffset = ccOffset + L
End Sub
And then use the function as follows:
NbOfChars = 0
Do until...
FastConcat MyString, AddedString, NbOfChars
Loop
MyString = Left$(MyString,NbOfChars)