I have a csv file with several values: strings, dates, numbers...
A macro (Excel) imports the csv data to a Excel Sheet, all going well but...
In the csv file I have a column for dates like: "8/1/2018" (where 5 is day, 1 is month. Format: d/M/Y), but when I go to the imported data, I see the date like: "1/8/2018" (note day and month permuted). That's annoying because my Regional Window configuration specify the format: d/M/Y.
Here you can see the line where all happens: bad date conversion:
How could I import date data with correct format? Perhaps some dateFormat for the Sheet?
EDIT
Here is the code where the file get opened:
Sub ImportFile()
Dim sPath As String
'Below we assume that the file, csvtest.csv,
'is in the same folder as the workbook. If
'you want something more flexible, you can
'use Application.GetOpenFilename to get a
'file open dialogue that returns the name
'of the selected file.
'On the page Fast text file import
'I show how to do that - just replace the
'file pattern "txt" with "csv".
sPath = ThisWorkbook.Path & "\2018w02_wbt_exito.csv"
'Procedure call. Semicolon is defined as separator,
'and data is to be inserted on "Sheet2".
'Of course you could also read the separator
'and sheet name from the worksheet or an input
'box. There are several options.
copyDataFromCsvFileToSheet sPath, ";", """", "Hoja1"
End Sub
'**************************************************************
Private Sub copyDataFromCsvFileToSheet(parFileName As String, _
parDelimiter As String, parExcludeCharacter As String, parSheetName As String)
Dim Data As Variant 'Array for the file values
'Function call - the file is read into the array
Data = getDataFromFile(parFileName, parDelimiter, parExcludeCharacter)
'If the array isn't empty it is inserted into
'the sheet in one swift operation.
If Not isArrayEmpty(Data) Then
'If you want to operate directly on the array,
'you can leave out the following lines.
With Sheets(parSheetName)
'Delete any old content
' .Cells.ClearContents
'A range gets the same dimensions as the array
'and the array values are inserted in one operation.
.Cells(2, 1).Resize(UBound(Data, 1), UBound(Data, 2)) = Data
End With
End If
End Sub
'**************************************************************
Public Function isArrayEmpty(parArray As Variant) As Boolean
'Returns False if not an array or a dynamic array
'that hasn't been initialised (ReDim) or
'deleted (Erase).
If IsArray(parArray) = False Then isArrayEmpty = True
On Error Resume Next
If UBound(parArray) < LBound(parArray) Then
isArrayEmpty = True
Exit Function
Else
isArrayEmpty = False
End If
End Function
'**************************************************************
Private Function getDataFromFile(parFileName As String, _
parDelimiter As String, _
Optional parExcludeCharacter As String = "") As Variant
'parFileName is the delimited file (csv, txt ...)
'parDelimiter is the separator, e.g. semicolon.
'The function returns an empty array, if the file
'is empty or cannot be opened.
'Number of columns is based on the line with most
'columns and not the first line.
'parExcludeCharacter: Some csv files have strings in
'quotations marks ("ABC"), and if parExcludeCharacter = """"
'quotation marks are removed.
Dim locLinesList() As Variant 'Array
Dim locData As Variant 'Array
Dim i As Long 'Counter
Dim j As Long 'Counter
Dim locNumRows As Long 'Nb of rows
Dim locNumCols As Long 'Nb of columns
Dim fso As Variant 'File system object
Dim ts As Variant 'File variable
Const REDIM_STEP = 10000 'Constant
'If this fails you need to reference Microsoft Scripting Runtime.
'You select this in "Tools" (VBA editor menu).
Set fso = CreateObject("Scripting.FileSystemObject")
On Error GoTo error_open_file
'Sets ts = the file
Set ts = fso.OpenTextFile(parFileName)
On Error GoTo unhandled_error
'Initialise the array
ReDim locLinesList(1 To 1) As Variant
i = 0
'Loops through the file, counts the number of lines (rows)
'and finds the highest number of columns.
Do While Not ts.AtEndOfStream
'If the row number Mod 10000 = 0
'we redimension the array.
If i Mod REDIM_STEP = 0 Then
ReDim Preserve locLinesList _
(1 To UBound(locLinesList, 1) + REDIM_STEP) As Variant
End If
locLinesList(i + 1) = Split(ts.ReadLine, parDelimiter)
j = UBound(locLinesList(i + 1), 1) 'Nb of columns in present row
'If the number of columns is then highest so far.
'the new number is saved.
If locNumCols < j Then locNumCols = j
i = i + 1
Loop
ts.Close 'Close file
locNumRows = i
'If number of rows is zero
If locNumRows = 0 Then Exit Function
ReDim locData(1 To locNumRows, 1 To locNumCols + 1) As Variant
'Copies the file values into an array.
'If parExcludeCharacter has a value,
'the characters are removed.
If parExcludeCharacter <> "" Then
For i = 1 To locNumRows
For j = 0 To UBound(locLinesList(i), 1)
If Left(locLinesList(i)(j), 1) = parExcludeCharacter Then
If Right(locLinesList(i)(j), 1) = parExcludeCharacter Then
locLinesList(i)(j) = _
Mid(locLinesList(i)(j), 2, Len(locLinesList(i)(j)) - 2)
Else
locLinesList(i)(j) = _
Right(locLinesList(i)(j), Len(locLinesList(i)(j)) - 1)
End If
ElseIf Right(locLinesList(i)(j), 1) = parExcludeCharacter Then
locLinesList(i)(j) = _
Left(locLinesList(i)(j), Len(locLinesList(i)(j)) - 1)
End If
locData(i, j + 1) = locLinesList(i)(j)
Next j
Next i
Else
For i = 1 To locNumRows
For j = 0 To UBound(locLinesList(i), 1)
locData(i, j + 1) = locLinesList(i)(j)
Next j
Next i
End If
getDataFromFile = locData
Exit Function
error_open_file: 'Returns empty Variant
unhandled_error: 'Returns empty Variant
End Function
If you want to use the FileSystemObject to obtain your data, you will need to convert each date item in your Data variable into a "real date" before writing it to the worksheet.
As it is in your screenshot, it is a string and, as you have discovered, the conversion when writing to the worksheet is unreliable.
One way to convert it.
Dim V As Variant, i As Long
For i = LBound(Data) To UBound(Data)
V = Split(Data(i, 3), "/")
'test for proper date
If UBound(V) = 2 Then _
Data(i, 3) = DateSerial(V(2), V(1), V(0))
Next i
Might be easier to IMPORT the data rather than using the FSO
Related
I want to copy some texts from a sheet to another. For example: 01/02/2021 .
However VBA automatically convert it to 2020/01/02. How can I stop it?
The following codes didn't work.
Example1:
sheet_1.Range("A1:A" & sheet1.Cells(1, 1).CurrentRegion.End(xlDown).row).Copy
ws.Range("start").PasteSpecial xlPasteValues
ws.Range("start").PasteSpecial xlPasteFormats
Example2:
sheet_1.Range("A1:A" & sheet1.Cells(1, 1).CurrentRegion.End(xlDown).row).Copy
ws.Range("start").PasteSpecial xlPasteFormulasAndNumberFormats
Example3:
sheet_1.Range("A1:A" & sheet1.Cells(1, 1).CurrentRegion.End(xlDown).row).Copy
ws.Range("start").Paste xlPaste Format:="Text" 'This causes an error
Please, try the next code. It will extract the date from the (pseudo) xls file and place it in the first column of the active sheet. Correctly formatted as date:
Sub openXLSAsTextExtractDate()
Dim sh As Worksheet, arrTXT, arrLine, arrD, arrDate, fileToOpen As String, i As Long, k As Long
Set sh = ActiveSheet 'use here the sheet you need
fileToOpen = "xls file full name" 'use here the full name of the saved xls file
'put the file content in an array splitting the read text by end of line (vbCrLf):
arrTXT = Split(CreateObject("Scripting.FileSystemObject").OpenTextFile(fileToOpen, 1).ReadAll, vbCrLf)
ReDim arrDate(UBound(arrTXT)) 'redim the array where the date will be kept, to have enough space for all the date values
For i = 39 To UBound(arrTXT) - 1 'iterate between the array elements, starting from the row where date data starts
arrLine = Split(arrTXT(i), vbTab) 'split the line by vbTab
arrD = Split(arrLine(0), "/") 'split the first line element (the date) by "/"
arrDate(k) = DateSerial(arrD(2), arrD(1), arrD(0)): k = k + 1 'properely format as date and fill the arrDate elements
Next i
ReDim Preserve arrDate(k - 1) 'keep only the array elements keeping data
With sh.Range("A1").Resize(UBound(arrDate) + 1, 1)
.value = Application.Transpose(arrDate) 'drop the array content
.NumberFormat = "dd/mm/yyyy" 'format the column where the date have been dropped
End With
End Sub
Edited:
You did not say anything...
So, I made a code returning the whole table (in the active sheet). Please, test it. It will take only some seconds:
Sub openXLSAsText()
Dim sh As Worksheet, arrTXT, arrLine, arrD, arrData, fileToOpen As String, i As Long, j As Long, k As Long
Set sh = ActiveSheet 'use here the sheet you need
fileToOpen = "xls file full name" 'use here the full name of the saved xls file
'put the file content in an array splitting the read text by end of line (vbCrLf):
arrTXT = Split(CreateObject("Scripting.FileSystemObject").OpenTextFile(fileToOpen, 1).ReadAll, vbCrLf)
ReDim arrData(1 To 10, 1 To UBound(arrTXT)) 'redim the array where the date will be kept, to have enough space for all the date values
For i = 38 To UBound(arrTXT) - 1 'iterate between the array elements, starting from the row where table header starts
arrLine = Split(arrTXT(i), vbTab) 'split the line by vbTab
k = k + 1 'increment the k variable (which will become the table row)
For j = 0 To 9
If j = 0 And k > 1 Then
arrD = Split(arrLine(j), "/") 'split the first line element (the date) by "/"
arrData(j + 1, k) = DateSerial(arrD(2), arrD(1), arrD(0)) 'propperely format as date and fill the arrDate elements
ElseIf j = 2 Or j = 3 Then
arrData(j + 1, k) = Replace(arrLine(j), ",", ".") 'correct the format for columns 3 and four (replace comma with dot)
Else
arrData(j + 1, k) = arrLine(j) 'put the rest of the column, not processed...
End If
Next j
Next i
ReDim Preserve arrData(1 To 10, 1 To k) 'keep only the array elements with data
With sh.Range("A1").Resize(UBound(arrData, 2), UBound(arrData))
.value = Application.Transpose(arrData) 'drop the array content
.EntireColumn.AutoFit 'autofit columns
.Columns(1).NumberFormat = "dd/mm/yyyy" 'format the first column
End With
MsgBox "Ready..."
End Sub
So i have a text file which has a number of lines and each line has entries separated by delimiters. I have managed to break the data down and put them into a multi-dimensional array, please see code below.
each row has differing amount of entries separated by delimeters
Public Sub testarr()
Dim i As Integer
Dim j As Integer
Dim iFile As Integer
Dim TotalRows() As String
Dim TotalColumns() As String
Dim sData As String
Dim MyArray() As String
Dim fso As Object
Set fso = CreateObject("Scripting.FileSystemObject")
Const forReading = 1
filepath = "C:\test1"
Set F = fso.OpenTextFile(filepath, forReading) 'open file for reading
y = 1
n = 5 'i've called a function to retrieve num of lines
'breaking into separate lines
For i = 1 To n
strContents = F.readline
strconts = strconts & vbCrLf & strContents
Debug.Print strconts
Next
Debug.Print strconts
TotalRows() = Split(strconts, vbNewLine)
'ReDim Preserve TotalRows(y)
'TotalRows = Split(sData, vbCrLf)
For y = 1 To 5
Debug.Print TotalRows(y)
Next y
'trying to separate each roads by the delimeters
For x = 1 To n
For y = 1 To 5
TotalColumns(x) = Split(TotalRows(y), "|")
Next y
Next x
above splits the rows but each line is the same
For i = LBound(TotalRows) To UBound(TotalRows)
For j = LBound(TotalColumns) To UBound(TotalColumns)
MyArray(i, j) = TotalColumns(j)
Debug.Print MyArray(i, j)
Next
Next
End Sub
maybe there is another suggestion to do this. I just want to be able to retrieve entries of a particular line and column array( line 2, column 3). But each line will have a different number of entries so i do not want to define the number of columns but will define the number of lines.
You can use a "jagged" array:
'...
TotalRows() = Split(strconts, vbNewLine)
For x = lbound(TotalRows) to unbound(TotalRows)
TotalRows(x) = Split(TotalRows(x), "|")
Next x
'....
Each "row" is now an array, so it would be something like:
blah = TotalRows(2)(2)
to get the third item on the third row. Obviously you'd need some bounds checking when accessing each sub-array.
edited after OP's clarification
may this is the code you are after
Option Explicit
Public Sub testarr()
Const forReading = 1
Dim filepath As String
filepath = "C:\test1"
Dim strconts As String
With CreateObject("Scripting.FileSystemObject") 'create and reference FilSystemObject object
With .OpenTextFile(filepath, forReading) 'open file for reading and reference it
'breaking into separate lines
Do While .AtEndOfStream <> True 'read the file till its last line
strconts = strconts & .ReadLine & vbCrLf
Loop
.Close 'close referenced file
End With
End With
Dim TotalRows As Variant
TotalRows = Split(strconts, vbNewLine)
ReDim TotalColumns(LBound(TotalRows) To UBound(TotalRows)) As Variant 'dim your TotalColumns array with same rows number as TotalRows
Dim i As Integer, nCols As Long, nColsMax As Long
For i = LBound(TotalRows) To UBound(TotalRows)
TotalColumns(i) = Split(TotalRows(i), "|") 'have each TotalColumn element store an array
nCols = UBound(TotalColumns) - LBound(TotalColumns)
If nCols > nColsMax Then nColsMax = nCols 'store maximum number of columns across TotalColumns arrays
Next
ReDim MyArray(LBound(TotalColumns) To UBound(TotalColumns), 0 To nColsMax) As String 'size MyArray to the same rows number of TotalColumns and the maximum number of columns
Dim j As Integer
For i = LBound(TotalColumns) To UBound(TotalColumns)
For j = LBound(TotalColumns(i)) To UBound(TotalColumns(i))
MyArray(i, j) = TotalColumns(i)(j)
Debug.Print MyArray(i, j)
Next
Next
End Sub
of course this code can be collapsed further, but that is something you can do afterwards
Quick background:
I am in the process of creating a search tool in Visual Basic that will allow me to search for inconsistently named materials in my database, that were entered as free text. While I have developed (with the help of Stack Overflow users) a tool which can search for hundreds or items at once, I need to further improve this.
My Question:
I need to be able to pull out item codes from these material descriptions. These items are general numbers such as: 20405-002 or alternatively: A445 or even B463-563. These are the main types of code I would be searching for, and these would be unique identifiers.
Some examples:
In a plant in Italy, I have a material named:
Siemens;Motor;A4002
In a plant in Germany, it is called:
Motor;FP4742;Siemens;TurnFast;A4002
I would search for the terms Siemens, Motor
My current search would return Siemens, Motor next to the first one, and Motor, Siemens next to the second. I would then want the visual basic to in essense say 'these could be the same part', then to look within both for a matching code. When it finds the matching code, I would want it to return some sort of indicator in an excel cell.
The overall aim: To have a tool I can use to find if two materials are actually the same, with minimum human input. There could be up to 50,000 materials from each of two plants. I also have the price and supplier of these parts. While the supplier is the same 75% of the time, the price is usually within 20% difference of the same material in a different country. If you have any other ideas as to how to see if two free text materials are actually the same, I'd be happy to hear.
My search code:
Function MultiSplitX(ByVal SourceText As String, RemoveBlankItems As Boolean, ParamArray Delimiters()) As String()
Dim a As Integer, b As Integer, n As Integer
Dim i As Integer: i = 33
Dim u As Variant, v As Variant
Dim tempArr() As String, finalArr() As String, fDelimiters() As String
If InStr(TypeName(Delimiters(0)), "()") <> 0 And LBound(Delimiters) = UBound(Delimiters) Then
ReDim fDelimiters(LBound(Delimiters(0)) To UBound(Delimiters(0))) 'If passing array vs array items then
For a = LBound(Delimiters(0)) To UBound(Delimiters(0)) 'build that array
fDelimiters(a) = Delimiters(0)(a)
Next a
Else
fDelimiters = Delimiters(0)
End If
Do While InStr(SourceText, Chr(i)) <> 0 'Find an unused character
i = i + 1
Loop
For a = LBound(fDelimiters) To UBound(fDelimiters) 'Sort Delimiters by length
For b = a + 1 To UBound(fDelimiters)
If Len(fDelimiters(a)) < Len(fDelimiters(b)) Then
u = fDelimiters(b)
fDelimiters(b) = fDelimiters(a)
fDelimiters(a) = u
End If
Next b
Next a
For Each v In fDelimiters 'Replace Delimiters with a common character
SourceText = Replace(SourceText, v, Chr(i))
Next
tempArr() = Split(SourceText, Chr(i)) 'Remove empty array items
If RemoveBlankItems = True Then
ReDim finalArr(LBound(tempArr) To UBound(tempArr))
n = LBound(tempArr)
For i = LBound(tempArr) To UBound(tempArr)
If tempArr(i) <> "" Then
finalArr(n) = tempArr(i)
n = n + 1
End If
Next i
n = n - 1
ReDim Preserve finalArr(LBound(tempArr) To n)
MultiSplitX = finalArr
Else: MultiSplitX = tempArr
End If
Erase finalArr
Erase tempArr
End Function
Thanks for your help everyone :)
This is response written in VBA for Excel but used arrays to get/put the data, so you should be able to modify it for a database easily. VB is very similar. If I were to do this work, I would do it in MS Access, in which case you can adapt this code even more easily. Of course, straight VB is always an option. VB is not a great tool for this.
If you work with data much, I strongly recommend you learn the free and open source Python language. You can find a great Python video series for the noob on Youtube from Sentdex. His videos are nice and slow. You will quickly exceed what you can accomplish with VB.
It is difficult to answer the question comprehensively due to the lack of detail and small set of sample data.
There are many ways to approach this, depending on your desired output. I am making the following assumptions.
You are new to coding and want output that is easy to read. As
such, my solution defaults to a single 2x2 array of results. You
can change this to 3+ dimensions by setting DeepArr = True.
You want the results pasted in the same worksheet.
You have a separate list of supplier/vendor codes that can be found in the part codes. The GuessSupplier function depends on this assumption. If need be, update the function based on actual requirements.
I call your raw inputs (like Siemens;Motor;A4002) part codes.
I assume the text after the last semi-colon will always be the part
number. If not, you can easily replace that assumption in the
GuessPartNum function.
The following describes the spreadsheet I used for simple testing.
Sheet "PartCodes" contains the part codes in a single column with sample values in cells B3:B6 (header in B2). Columns G-H are reserved for the results.
Sheet "Suppliers" contains the unique supplier list in a single column (B3:B6).
You can specify sheet names and ranges for inputs and output in the RunMain() sub.
For convenience, I hard coded the sheet names in some places. You should bring these to the surface as arguments.
The code is somewhat verbose to make it easy to understand.
I did not test performance, as I do not have the data set and expect you will run this infrequently.
I added only a trivial amount of error handling.
My complete set of code is below. You will find the RunMain() sub near the bottom. This kicks off the Main() sub that controls the workflow.
Option Base 0
Option Explicit
' 1) Manually eliminate duplicates in your parts list using Excel built-in feature.
' a) highlight the range
' b) Data ribbon > Remove Duplicates
' 2) Create a supplier list on a separate sheet in teh same workbook
' 3) Edit the RunMain() procedure per your data. I assume: your part code list
' - part code list is in cells B3:B10 of the PartCodes sheet.
' - supplier list in cells b4:b6 of Suppliers sheet.
' - output goes to D2 in PartCodes sheet.
' 4) Run the RunMain() procedure simply kicks off Main.
' Main() sub does the following:
' a)Run ProcessPartCodes:
' i. load the parts codes from the worksheet into an array
' ii. run GuessPartNum and GuessSupplier and place results in the parts code array.
' b) Run FindMatches to add more to the array. Finds other part codes that may be for the same part.
' Logic is described in the function.
' c) Run ArrayToRange to paste part of the result set to the workseet. Note that
' the ourput array is more than 2 dimensions, so not all data is pasted neatly.
' I leave it to you to determine how you want to format the data for output.
'
Function RangeToArray(inputRange As Range)
'Copies values from a rectangular range to a 2D Array.
'Array is always 2D, even if data is a single column or row.
'inputRange: a rectangular range
Dim Col1 As Integer, row1 As Integer
Dim i As Integer, j As Integer
Dim rowCnt As Integer
Dim colCnt As Integer
Dim retArr() As Variant
' Size output array
rowCnt = inputRange.Rows.Count
colCnt = inputRange.Columns.Count
ReDim retArr(1 To rowCnt, 1 To colCnt) As Variant
' Load range values into array
For i = 1 To rowCnt
For j = 1 To (colCnt)
retArr(i, j) = Trim(inputRange.Cells(i, j))
Next j
Next i
' Return array
RangeToArray = retArr
End Function
Sub ArrayToRange(myArr As Variant, Target As Range)
' Copies the content of a 2D array to a Range.
' myArr must be exactly 2 dimensions
' Target is a range. If more than 1 cell, the top left cell is used.
' Copies the array to the range starting with the top left cell.
' Target Range size can be a single cell and need not match the array dimensions.
Dim r As Long, tgtRow As Long
Dim c As Long, tgtCol As Long
Dim firstRow As Long
Dim firstCol As Long
Dim lastRow As Long
Dim lastCol As Long
' Find the top left cell of the Target Range
tgtRow = Target.Row
tgtCol = Target.Column
' Set target range dimesions based on array size.
firstRow = tgtRow + LBound(myArr, 1)
firstCol = tgtCol + LBound(myArr, 2)
lastRow = tgtRow + UBound(myArr, 1)
lastCol = tgtCol + UBound(myArr, 2)
' The next row would usually work. If you get funky data, it will fail,
' so, we will use a loop instead.
' Range(Cells(firstRow, firstCol), Cells(lastRow, lastCol)) = myArr
' Loop through rows and columns, setting cell values one at a time.
For r = LBound(myArr, 1) To UBound(myArr, 1)
For c = LBound(myArr, 2) To UBound(myArr, 2)
On Error Resume Next ' Prevent one bad value from killing the entire operation.
Cells(tgtRow + r - 1, tgtCol + c) = myArr(r, c)
On Error GoTo 0
Next c
Next r
End Sub
' Not used, this is just an example
'Public Function RangeCorners(Optional MyRange As Range = Range("c2:c10"))
' TopLeft = MyRange.Cells(1)
' BottomLeft = MyRange.Cells(.Rows.Count, 1)
' TopRight = MyRange.Cells(1, .Columns.Count)
' BottomRigt = MyRange.Cells(.Cells.Count)
' RangeCorners = Array(TopLeft, TopRight, BottomLeft, BottomRight)
'End Function
Function IsInArray(stringToBeFound As String, arr As Variant) As Boolean
'Returns True if stringToBeFound is in the array (arr); else False
'This one-liner need not be in a fucntion, but makes reading code easier.
IsInArray = (UBound(Filter(arr, stringToBeFound)) > -1)
End Function
Function GuessPartNum(splitPartCode As Variant, Optional delim As String = ";")
' Find a way to determine what part of the partCode is the part number.
' Perhaps it is always last. Perhaps it always has at least 3 digits.
' Simply takes the last item from the part code. Update this logic to whatever
' makes sense for your dataset (which I could nto see when writing this).
GuessPartNum = splitPartCode(UBound(splitPartCode))
End Function
Function GuessSupplier(splitPartCode As Variant, supplierList As Variant, Optional delim As String = ";")
' Determine the supplier of this part from the partCode.
' For each supplier in the supplierList, see if the supplier name is in the partCode.
Dim i As Integer
For i = LBound(supplierList) To UBound(supplierList)
'Simply verifies if a supplier from supplierList is in the part code. Uses first match.
If (UBound(Filter(splitPartCode, supplierList(i, 1))) > -1) Then 'if arr(i) is in supplier_array
GuessSupplier = supplierList(i, 1)
Exit Function
End If
Next i
End Function
Function ProcessPartCodes(partCodeRange As Range, supplierListRange As Range, Optional delim As String = ";")
' Main ProcessPartCodes
'
' PartCodeRange: a range representing the part code list;
' must be in single column form.
' SupplierList: array of supplier names as strings
'
' Load part code array into array
Dim resultArr As Variant 'result set as array
Dim supplierList As Variant
Dim splitCode As Variant
Dim i As Integer
resultArr = RangeToArray(partCodeRange)
ReDim Preserve resultArr(LBound(resultArr) To UBound(resultArr), 0 To 4) As Variant
supplierList = RangeToArray(supplierListRange)
' Get supplier and part num from each part code
For i = LBound(resultArr) To UBound(resultArr)
If Len(resultArr(i, 0)) > 0 Then
splitCode = Split(resultArr(i, 0), delim) ' Split the partCode by delimiters, semi-colon (;)
resultArr(i, 0) = resultArr(i, 0) ' Part Code (not parsed)
resultArr(i, 1) = GuessPartNum(splitCode) ' Part Number
resultArr(i, 2) = GuessSupplier(splitCode, supplierList) ' Supplier
resultArr(i, 3) = splitCode ' Part Code (parsed)
'resultArr(i, 4) ' reserved for match information
Else
' Empty array element.
splitCode = ""
resultArr(i, 3) = Array()
End If
Next i
ProcessPartCodes = resultArr
End Function
Function CompareParts(splitPartCode1 As Variant, splitPartCode2 As Variant)
'
'
'splitPartCode1 is an array of a parsed partCode string
'splitPartCode2 is an array of a parsed partCode string
Dim matches() As String
Dim i As Integer
Dim matchCnt As String
ReDim matches(0 To 0) As String
' Check each item in arr1 (each substring of partCode1) for a match in arr2
For i = LBound(splitPartCode1) To UBound(splitPartCode1)
If (UBound(Filter(splitPartCode2, splitPartCode1(i))) > -1) Then 'if arr1(i) is in arr2
' Found an item in splitPartCode1 (a substring in partCode1) that is also in splitPartCode2.
' Add this item to the list of matches.
If LBound(matches) = -1 Then
ReDim matches(0 To 0) As String
Else
ReDim Preserve matches(LBound(matches) To UBound(matches) + 1) As String ' grow the matches array by one
End If
matches(UBound(matches)) = splitPartCode1(i) ' set value of last item in matches() = this item (this substring of partCode1)
End If
Next i
matchCnt = UBound(matches) - LBound(matches) + 1 ' Total number of matching substrings from each part.
CompareParts = Array(matchCnt, matches)
End Function
Function FindMatches(partCodeArr As Variant, Optional DeepArr As Boolean = False)
' Fucntion compares 2 part numbers to determine likelihood of a match.
' Parses partCode1 and partCode2 using the delimiter into arrays of strings.
' Then counts the number of matching strings in each array.
' Then determines if the part numbers (assumed to be the last string of each array) match.
' After running this, you can use the match count (matchCnt integer) and part number match
' (partNumMatch boolean) as a basis for determining how likely it is that partCode1=partCode2.
'
'
' DeepArr: If True, returns 3+ dimensional array. If False, flattens results to 2D array.
'
' Returns: Array(partCode1, partCode2, partNum1, partNum2, matchCnt, pricePct, supplierMatch, partNumMatch)
' partCode1 = partCode1 input argument
' partCode2 = partCode2 input argument
' partNum1 = the portion (substring) of partCode1 after the last ocurrence of the delimiter, delim.
' partNum2 = the portion (substring) of partCode2 after the last ocurrence of the delimiter, delim.
' match (boolean) = True if parts are likely the same.
' matchCnt = number of matching sub-strings between partCode1 and part 2
' (essentially, a match score, where higher is more likely a positive match)
' Returns -1 if partCode1=partCode2, meaning exact match.
' pricePct = percentage price match calculated as (decimal portion of price1/price2) * 100
' partNumMatch = True is partNum1=partNum2; else False
Dim i As Integer, j As Integer, k As Integer
Dim partCodei, partCodej
Dim partNumi As String, partNumj As String, numMatch As Boolean
Dim Duplicate As Boolean, newMatch As Boolean
Dim partSupplieri As String, partSupplierj As String, supplierMatch As Boolean
Dim splitCodei() As String, splitCodej() As String, matchCnt As Integer
Dim splitCompare
Dim matches() As String 'empty array has LBound=0 and UBound=-1, so UBound-LBound=-1 indicates an empty array
Dim matchstr As String
Dim s As String
matchCnt = 0 ' matchCnt = UBound(matches) - LBound(matches) + 1 ' starting with 0 matches.
For i = LBound(partCodeArr) To UBound(partCodeArr)
If i = 1 Or i = UBound(partCodeArr) Or i Mod 100 = 0 _
Then Debug.Print "Starting record " & i & ": " & Now()
If partCodeArr(i, 0) <> "" Then
matchstr = ""
For j = i + 1 To UBound(partCodeArr)
If Len(partCodeArr(j, 0)) > 0 Then
partCodei = partCodeArr(i, 0)
partCodej = partCodeArr(j, 0)
Duplicate = partCodei = partCodej 'found duplicate entry in table.
partNumi = partCodeArr(i, 1)
partNumj = partCodeArr(j, 1)
numMatch = partNumi = partNumj
partSupplieri = partCodeArr(i, 2)
partSupplierj = partCodeArr(j, 2)
supplierMatch = partSupplieri = partSupplierj
splitCodei = partCodeArr(i, 3)
splitCodej = partCodeArr(j, 3)
splitCompare = CompareParts(splitCodei, splitCodej)
matchCnt = splitCompare(0)
newMatch = False
If Duplicate Then
' You should have removed duplicates before starting.
On Error GoTo redimErr
ReDim Preserve matches(0 To UBound(matches) + 1, 0 To 2) As String
On Error GoTo 0
newMatch = True
matches(UBound(matches), 0) = partCodej 'The duplicate partCode
matches(UBound(matches), 1) = "0" ' Matching score, where -1 indicates an exact duplicate.
matches(UBound(matches), 2) = "Duplicate Entry. Part codes are identical." ' Matching score, where -1 indicates an exact duplicate.
ElseIf supplierMatch And numMatch Then
' Possible duplicate part since supplier and part number both match.
On Error GoTo redimErr
ReDim Preserve matches(0 To UBound(matches) + 1, 0 To 2) As String
On Error GoTo 0
newMatch = True
matches(UBound(matches), 0) = partCodej 'The duplicate partCode
matches(UBound(matches), 1) = "1" ' Matching score, where -1 indicates an exact duplicate.
matches(UBound(matches), 2) = "Probably same part with differnt part code. Same supplier and part number." ' Matching score, where -1 indicates an exact duplicate.
ElseIf supplierMatch And matchCnt > 2 Then
' Possible duplicate part since supplier and part number both match.
On Error GoTo redimErr
ReDim Preserve matches(0 To UBound(matches) + 1, 0 To 2) As String
On Error GoTo 0
newMatch = True
matches(UBound(matches), 0) = partCodej 'The duplicate partCode
matches(UBound(matches), 1) = "2" ' Matching score, where -1 indicates an exact duplicate.
matches(UBound(matches), 2) = "Possible duplicate. More likely a similar part from same supplier" ' Matching score, where -1 indicates an exact duplicate.
ElseIf supplierMatch = False And matchCnt > 2 Then
' Possible duplicate part since supplier and part number both match.
On Error GoTo redimErr
ReDim Preserve matches(0 To UBound(matches) + 1, 0 To 2) As String
On Error GoTo 0
newMatch = True
matches(UBound(matches), 0) = partCodej 'The duplicate partCode
matches(UBound(matches), 1) = "3" ' Matching score, where -1 indicates an exact duplicate.
matches(UBound(matches), 2) = "Possible part match from different supplier" ' Matching score, where -1 indicates an exact duplicate.
ElseIf supplierMatch = False And matchCnt > 1 Then
' Possible duplicate part since supplier and part number both match.
On Error GoTo redimErr
ReDim Preserve matches(0 To UBound(matches) + 1, 0 To 2) As String
On Error GoTo 0
newMatch = True
matches(UBound(matches), 0) = partCodej 'The duplicate partCode
matches(UBound(matches), 1) = "4" ' Matching score, where -1 indicates an exact duplicate.
matches(UBound(matches), 2) = "Low probability part match from different supplier" ' Matching score, where -1 indicates an exact duplicate.
End If
If newMatch And Not DeepArr Then
For k = LBound(matches) To UBound(matches)
matchstr = matchstr & "[" & partCodej & "," & matches(UBound(matches), 1) & "," & matches(UBound(matches), 2) & "], "
Next k
End If
End If
Next j
If DeepArr Then
' return 3+ dimensional array
partCodeArr(i, 4) = matches
Else
' return 2D array for easier pasting to worksheet
' Flatten partCodeArr(i, 4), the parsed potential part matches to an ordinary string
' with format [[part code, match value, match description],[part code, match value, match description],...]
If Len(matchstr) > 0 Then
matchstr = "[ " & Left(matchstr, Len(matchstr) - 2) & "] "
End If
partCodeArr(i, 4) = matchstr
' Flatten the parsed part code back to original string format.
partCodeArr(i, 3) = partCodeArr(i, 0)
End If
ReDim matches(0) As String
End If
Next i
FindMatches = partCodeArr
Exit Function
redimErr:
ReDim matches(0 To 0, 0 To 2) As String
Resume Next
End Function
Sub RunMain()
' Kicks off Main(partCodeRange As Range, supplierListRange As Range, destination As Range)
'
' Arguments:
' partCodeRange = Excel Range (not string name of range)
' that contains the raw part code list
' supplierListRange = Excel Range (not string name of range)
' that contains a unique list of supplier
' codes found in the part codes.
'
Call Main(Sheets("PartCodes").Range("B3:B10"), Sheets("Suppliers").Range("B4:B6"), Range("PartCodes!D2"))
End Sub
Sub Main(partCodeRange As Range, supplierListRange As Range, destination As Range)
' This is the main sub that runs the full process of finding equivalent part
' codes and writing the findings to an excel worksheet.
' See RunMain() sub for example use.
'
' Arguments:
' partCodeRange = Excel Range (not string name of range)
' that contains the raw part code list
' supplierListRange = Excel Range (not string name of range)
' that contains a unique list of supplier
' codes found in the part codes.
'
Dim partCodesArr, matchArr
Dim startdate As Date, stopdate As Date
startdate = Now()
Debug.Print
Debug.Print String(70, "*")
Debug.Print
Debug.Print "Starting: " & startdate
Debug.Print
partCodesArr = ProcessPartCodes(partCodeRange, supplierListRange)
matchArr = FindMatches(partCodesArr) ' FindMatches(partCodesArr, True) for 3+ dimensional results
Sheets("PartCodes").Activate
'Write column headers.
destination.Offset(0, 0) = "Part Code"
destination.Offset(0, 1) = "Part Num"
destination.Offset(0, 2) = "Part Supplier"
destination.Offset(0, 3) = "Part Code"
destination.Offset(0, 4) = "Potential equivalent part numbers"
Call ArrayToRange(matchArr, destination.Offset(1, 0))
stopdate = Now()
Debug.Print
Debug.Print "Finished: " & stopdate
Debug.Print
Debug.Print "Run time: " & (stopdate - startdate)
Debug.Print
Debug.Print String(70, "*")
Debug.Print
End Sub
I would really appreciate it if someone could give me some help with this.
I am quite familiar with vba and I can write simple code and also customise code from others. I have written /customised/copied several pieces of vba code to do the following (where copied source is acknowledged):
Select 2 different csv files which represent 2 matrixes of same columns and same rows.
Multiply each respective cells from the matrices.
Return results.
Unfortunately I cannot seem to be able to get this to run.
Any idea what I have not done correctly?
Please see the code below. Thanks so much.
Code changed from previous version
Public Sub doIt()
Dim sourceFile As String
Dim destinationFile As String
Dim data As Variant
Dim result As Variant
Dim sourceFile2 As String
Dim datarain As Variant
sourceFile = "C:\file1.csv"
sourceFile2 = "C:\file2.csv"
destinationFile = "C:\file3.txt"
data = getDataFromFile(sourceFile, ",")
datarain = getDataFromFile(sourceFile2, ",")
If Not isArrayEmpty(data) Then
result = MMULT2_FUNC(data, datarain)
writeToCsv result, destinationFile, ","
Else
MsgBox ("Empty file")
End If
End Sub
Function MMULT2_FUNC(ByRef ADATA_RNG As Variant, _
ByRef BDATA_RNG As Variant)
Dim i As Long
Dim j As Long
Dim k As Long
Dim ANROWS As Long
Dim BNROWS As Long
Dim ANCOLUMNS As Long
Dim BNCOLUMNS As Long
Dim ADATA_MATRIX As Variant
Dim BDATA_MATRIX As Variant
Dim TEMP_MATRIX As Variant
On Error GoTo ERROR_LABEL
ADATA_MATRIX = ADATA_RNG
BDATA_MATRIX = BDATA_RNG
ANROWS = UBound(ADATA_MATRIX, 1)
BNROWS = UBound(BDATA_MATRIX, 1)
ANCOLUMNS = UBound(ADATA_MATRIX, 2)
BNCOLUMNS = UBound(BDATA_MATRIX, 2)
If ANCOLUMNS <> BNROWS Then: GoTo ERROR_LABEL
ReDim TEMP_MATRIX(1 To ANROWS, 1 To BNCOLUMNS)
For i = 1 To ANROWS
For j = 1 To BNCOLUMNS
TEMP_MATRIX(i, j) = 0
For k = 1 To ANCOLUMNS
TEMP_MATRIX(i, j) = TEMP_MATRIX(i, j) + ADATA_MATRIX(i, k) * _
BDATA_MATRIX(k, j)
Next k
Next j
Next i
MMULT2_FUNC = TEMP_MATRIX
Exit Function
ERROR_LABEL:
MMULT2_FUNC = Err.Number
End Function
Public Sub writeToCsv(parData As Variant, parFileName As String, parDelimiter As String)
If getArrayNumberOfDimensions(parData) <> 2 Then Exit Sub
Dim i As Long
Dim j As Long
Dim FileNum As Long
Dim locLine As String
Dim locCsvString As String
FileNum = FreeFile
If Dir(parFileName) <> "" Then Kill (parFileName)
Open parFileName For Binary Lock Read Write As #FileNum
For i = LBound(parData, 1) To UBound(parData, 1)
locLine = ""
For j = LBound(parData, 2) To UBound(parData, 2)
If IsError(parData(i, j)) Then
locLine = locLine & "#N/A" & parDelimiter
Else
locLine = locLine & parData(i, j) & parDelimiter
End If
Next j
locLine = Left(locLine, Len(locLine) - 1)
If i <> UBound(parData, 1) Then locLine = locLine & vbCrLf
Put #FileNum, , locLine
Next i
error_handler:
Close #FileNum
End Sub
Public Function isArrayEmpty(parArray As Variant) As Boolean
'Returns false if not an array or dynamic array that has not been initialised (ReDim) or has been erased (Erase)
If IsArray(parArray) = False Then isArrayEmpty = True
On Error Resume Next
If UBound(parArray) < LBound(parArray) Then isArrayEmpty = True: Exit Function Else: isArrayEmpty = False
End Function
Public Function getArrayNumberOfDimensions(parArray As Variant) As Long
'Returns the number of dimension of an array - 0 for an empty array.
Dim i As Long
Dim errorCheck As Long
If isArrayEmpty(parArray) Then Exit Function 'returns 0
On Error GoTo FinalDimension
'Visual Basic for Applications arrays can have up to 60000 dimensions
For i = 1 To 60001
errorCheck = LBound(parArray, i)
Next i
'Not supposed to happen
getArrayNumberOfDimensions = 0
Exit Function
FinalDimension:
getArrayNumberOfDimensions = i - 1
End Function
Private Function getDataFromFile(parFileName As String, parDelimiter As String, Optional parExcludeCharacter As String = "") As Variant
'parFileName is supposed to be a delimited file (csv...)
'parDelimiter is the delimiter, "," for example in a comma delimited file
'Returns an empty array if file is empty or can't be opened
'number of columns based on the line with the largest number of columns, not on the first line
'parExcludeCharacter: sometimes csv files have quotes around strings: "XXX" - if parExcludeCharacter = """" then removes the quotes
Dim locLinesList() As Variant
Dim locData As Variant
Dim i As Long
Dim j As Long
Dim locNumRows As Long
Dim locNumCols As Long
Dim fso As Variant
Dim ts As Variant
Const REDIM_STEP = 10000
Set fso = CreateObject("Scripting.FileSystemObject")
On Error GoTo error_open_file
Set ts = fso.OpenTextFile(parFileName)
On Error GoTo unhandled_error
'Counts the number of lines and the largest number of columns
ReDim locLinesList(1 To 1) As Variant
i = 0
Do While Not ts.AtEndOfStream
If i Mod REDIM_STEP = 0 Then
ReDim Preserve locLinesList(1 To UBound(locLinesList, 1) + REDIM_STEP) As Variant
End If
locLinesList(i + 1) = Split(ts.ReadLine, parDelimiter)
j = UBound(locLinesList(i + 1), 1) 'number of columns
If locNumCols < j Then locNumCols = j
If j = 13 Then
j = j
End If
i = i + 1
Loop
ts.Close
locNumRows = i
If locNumRows = 0 Then Exit Function 'Empty file
ReDim locData(1 To locNumRows, 1 To locNumCols + 1) As Variant
'Copies the file into an array
If parExcludeCharacter <> "" Then
For i = 1 To locNumRows
For j = 0 To UBound(locLinesList(i), 1)
If Left(locLinesList(i)(j), 1) = parExcludeCharacter Then
If Right(locLinesList(i)(j), 1) = parExcludeCharacter Then
locLinesList(i)(j) = Mid(locLinesList(i)(j), 2, Len(locLinesList(i)(j)) - 2) 'If locTempArray = "", Mid returns ""
Else
locLinesList(i)(j) = Right(locLinesList(i)(j), Len(locLinesList(i)(j)) - 1)
End If
ElseIf Right(locLinesList(i)(j), 1) = parExcludeCharacter Then
locLinesList(i)(j) = Left(locLinesList(i)(j), Len(locLinesList(i)(j)) - 1)
End If
locData(i, j + 1) = locLinesList(i)(j)
Next j
Next i
Else
For i = 1 To locNumRows
For j = 0 To UBound(locLinesList(i), 1)
locData(i, j + 1) = locLinesList(i)(j)
Next j
Next i
End If
getDataFromFile = locData
Exit Function
error_open_file: 'returns empty variant
unhandled_error: 'returns empty variant
End Function
Despite my personal impression that your code can be improved in some instances, it syntactically executes here with no problem (on small matrices).
My test data
1,2,3 2,3,4 20,26,32
2,3,4 X 3,4,5 = 29,38,47
3,4,5 4,5,6 38,50,62
The result is neatly written to a CSV.
Only obvious problem (here on Win 7 !) is that Sub writeToCsv -> Open parFileName ... fails due to lack of write permissions into the root directory. This might be not a problem on XP.
On a different token, I have the impression the code can be improved, but I may not understand the rationale behind some parts of your code.
examples
Function MMULT2_FUNC(ByRef ADATA_RNG As Variant, ByRef BDATA_RNG As Variant) ' missing type of result
Private Function getDataFromFile(...)
...
If j = 13 Then
j = j
End If ' whow ... if j <> 13 then j again equals j ;-)
finding upper and lower bounds of the matrices on input as well as on output could be simplified by large ...
Thank you all for your help. The reason why my code was not printing results was that I had this:If ANCOLUMNS <> BNROWS Then: GoTo ERROR_LABEL. At the same time, I was using two matrices of 70*120, so it constantly exited the function as I had programmed it to do!!Corrected it all and worked fine. Thanks a lot for your help
I'm having a problem reading in mixed datatypes from a .csv datasource: Strings are returned as Null's when I have a column with mixed String/Numeric values. I have set IMEX=1 and changed the Registry entry TypeGuessRows from 8 to 0 (but even if I have mixed datatypes within the first 8 rows, strings are still coming through as Null). Also ImportMixedTypes=Text in the registry.
What am I missing?? Any ideas much appreciated.
Here's my connection string:
ConnString = "Provider=Microsoft.Jet.OLEDB.4.0;" _
& "Data Source=" & Folder & ";" _
& "Extended Properties='text;HDR=YES;FMT=CSVDelimited;IMEX=1';" _
& "Persist Security Info=False;"
Here is another code sample that does not use ADO, similar to what Fink posted, with a little more flexibility and error handling. Performance is not too bad (reads and parses a 20 MB csv file in less than 3 seconds on my machine).
Public Function getDataFromFile(parFileName As String, parDelimiter As String, Optional parExcludeCharacter As String = "") As Variant
'parFileName is supposed to be a delimited file (csv...)'
'Returns an empty array if file is empty or can't be opened
'number of columns based on the line with the largest number of columns, not on the first line'
'parExcludeCharacter: sometimes csv files have quotes around strings: "XXX" - if parExcludeCharacter = """" then quotes are removed'
Dim locLinesList() As Variant
Dim locData As Variant
Dim i As Long
Dim j As Long
Dim locNumRows As Long
Dim locNumCols As Long
Dim fso As New FileSystemObject
Dim ts As TextStream
Const REDIM_STEP = 10000
On Error GoTo error_open_file
Set ts = fso.OpenTextFile(parFileName)
On Error GoTo unhandled_error
'Counts the number of lines and finds the largest number of columns'
ReDim locLinesList(1 To 1) As Variant
i = 0
Do While Not ts.AtEndOfStream
If i Mod REDIM_STEP = 0 Then
ReDim Preserve locLinesList(1 To UBound(locLinesList, 1) + REDIM_STEP) As Variant
End If
locLinesList(i + 1) = Split(ts.ReadLine, parDelimiter)
j = UBound(locLinesList(i + 1), 1) 'number of columns'
If locNumCols < j Then locNumCols = j
i = i + 1
Loop
ts.Close
locNumRows = i
If locNumRows = 0 Then Exit Function 'Empty file'
ReDim locData(1 To locNumRows, 1 To locNumCols + 1) As Variant
'Copies the file into an array'
If parExcludeCharacter <> "" Then
For i = 1 To locNumRows
For j = 0 To UBound(locLinesList(i), 1)
If Left(locLinesList(i)(j), 1) = parExcludeCharacter Then
If Right(locLinesList(i)(j), 1) = parExcludeCharacter Then
locLinesList(i)(j) = Mid(locLinesList(i)(j), 2, Len(locLinesList(i)(j)) - 2) 'If locTempArray = "", Mid returns ""'
Else
locLinesList(i)(j) = Right(locLinesList(i)(j), Len(locLinesList(i)(j)) - 1)
End If
ElseIf Right(locLinesList(i)(j), 1) = parExcludeCharacter Then
locLinesList(i)(j) = Left(locLinesList(i)(j), Len(locLinesList(i)(j)) - 1)
End If
Next j
Next i
Else
For i = 1 To locNumRows
For j = 0 To UBound(locLinesList(i), 1)
locData(i, j + 1) = locLinesList(i)(j)
Next j
Next i
End If
getDataFromFile = locData
Exit Function
error_open_file: 'returns empty variant'
unhandled_error: 'returns empty variant'
End Function
Are you locked into reading the CSV with ADO? I always seem to run into problems trying to read textfiles with ADO like you are experiencing. I usually just give up on the ADO side and read the file directly with a text reader to get more control.
Public Sub TestIt()
Dim path As String
path = "C:\test.csv"
ReadText path
End Sub
Public Sub ReadText(path As String)
'requires reference to 'Microsoft Scripting Runtime' scrrun.dll OR use late binding
Const DELIM As String = ","
Dim fso As New Scripting.FileSystemObject
Dim text As Scripting.TextStream
Dim line As String
Dim vals() As String
Set text = fso.OpenTextFile(path, ForReading)
Do While Not text.AtEndOfStream
line = text.ReadLine
vals = Split(line, DELIM)
'do something with the values
Loop
text.Close
End Sub