convert csv data to DataTable in VB.net, capturing column names from row 0 - vb.net

I've adapted the code from the #tim-schmelter answer to question convert csv data to DataTable in VB.net (see below)
I would like to parse in the column titles from row 0 of the csv file
DT|Meter Number|Customer Account Number|Serial Number|Port...
but I'm not having any luck trying to figure out how to do this. any suggestions would be very appreciated.
Public Function csvToDatatable_2(ByVal filename As String, ByVal separator As String)
'////////////////////////////////////////
'Reads a selected txt or csv file into a datatable
'based on code from http://stackoverflow.com/questions/11118678/convert-csv-data-to-datatable-in-vb-net
'////////////////////////////////////////
Dim dt As System.Data.DataTable
Try
dt = New System.Data.DataTable
Dim lines = IO.File.ReadAllLines(filename)
Dim colCount = lines.First.Split(separator).Length
For i As Int32 = 1 To colCount
dt.Columns.Add(New DataColumn("Column_" & i, GetType(String)))
Next
For Each line In lines
Dim objFields = From field In line.Split(separator)
Dim newRow = dt.Rows.Add()
newRow.ItemArray = objFields.ToArray()
Next
Catch ex As Exception
Main.Msg2User(ex.Message.ToString)
Return Nothing
End Try
Return dt
End Function

Just loop thru all the line of the file. Use a boolean to check for the first row.
Public Function csvToDatatable_2(ByVal filename As String, ByVal separator As String)
Dim dt As New System.Data.DataTable
Dim firstLine As Boolean = True
If IO.File.Exists(filename) Then
Using sr As New StreamReader(filename)
While Not sr.EndOfStream
If firstLine Then
firstLine = False
Dim cols = sr.ReadLine.Split(separator)
For Each col In cols
dt.Columns.Add(New DataColumn(col, GetType(String)))
Next
Else
Dim data() As String = sr.Readline.Split(separator)
dt.Rows.Add(data.ToArray)
End If
End While
End Using
End If
Return dt
End Function

Here is a hybrid of the two solutions above, with a few other changes:
Public Shared Function FileToTable(ByVal fileName As String, ByVal separator As String, isFirstRowHeader As Boolean) As DataTable
Dim result As DataTable = Nothing
Try
If Not System.IO.File.Exists(fileName) Then Throw New ArgumentException("fileName", String.Format("The file does not exist : {0}", fileName))
Dim dt As New System.Data.DataTable
Dim isFirstLine As Boolean = True
Using sr As New System.IO.StreamReader(fileName)
While Not sr.EndOfStream
Dim data() As String = sr.ReadLine.Split(separator, StringSplitOptions.None)
If isFirstLine Then
If isFirstRowHeader Then
For Each columnName As String In data
dt.Columns.Add(New DataColumn(columnName, GetType(String)))
Next
isFirstLine = True ' Signal that this row is NOT to be considered as data.
Else
For i As Integer = 1 To data.Length
dt.Columns.Add(New DataColumn(String.Format("Column_{0}", i), GetType(String)))
Next
isFirstLine = False ' Signal that this row IS to be considered as data.
End If
End If
If Not isFirstLine Then
dt.Rows.Add(data.ToArray)
End If
isFirstLine = False ' All subsequent lines shall be considered as data.
End While
End Using
Catch ex As Exception
Throw New Exception(String.Format("{0}.CSVToDatatable Error", GetType(Table).FullName), ex)
End Try
Return result
End Function

Related

Opening a CSV file as a OLEDB Connection converts file text to double

I'm opening up a couple of CSV files and reading them in as a DataTable per the example I found here. The issue I am running into it the basic query I'm using to import the data is converting the column of IP addresses into Doubles. So I want to read in 10.0.0.1 and it shows up as 10.001. How can I get this column to read in as a string? I would like to not double process the file if I can.
Query I'm using is basic and is as follows:
SELECT * FROM [ComputerList.csv]
Here is my function to open and read the CSV file into a DataTable
Public Function OpenFile(ByVal strFolderPath as String, ByVal strQuery as String) as DataTable
Dim strConn as String = "Provider=Microsoft.Jet.OLEDB.4.0;Data Source=" & strFolderPath & ";Extended Propteries=""text; HDR=Yes;FMT=Delimited"""
Dim conn as OleDb.OleDbConnection = New OleDb.OleDbConnection(strConn)
Try
conn.Open()
Dim cmd as OleDb.OleDbCommand = New OleDb.OleDbCommand(strQuery, conn)
Dim da as OleDb.OleDbDataAdapter = New OleDb.OleDbDataAdapter()
da.SelectCommand = cmd
Dim ds as DataSet = New DataSet()
da.Fill(ds)
da.Dispose()
return ds.Tables(0)
Catch
return Nothing
Finally
conn.Close()
End Try
End Function
i use this to read csv's and force all to string.
Public Function convert_csv_to_data_table(ByVal File As String, ByVal separator As String) As DataTable
Dim dt As New DataTable
Dim firstLine As Boolean = True
If IO.File.Exists(File) Then
Using sr As New StreamReader(File)
While Not sr.EndOfStream
If firstLine Then
firstLine = False
Dim cols = sr.ReadLine.Split(separator)
For Each col In cols
dt.Columns.Add(New DataColumn(col, GetType(String)))
Next
Else
Dim data() As String = sr.ReadLine.Split(separator)
dt.Rows.Add(data.ToArray)
End If
End While
End Using
End If
Return dt
End Function
EDIT:- this will only work with a separator btw
ok I tried variations of everyone's suggested and have settled on a hybrid between all of them. My goal was to read in a CSV file manipulate it in a DataTable form and them write it back out. Some of my CSV files had multiple lines with in a cell and some had deliminators within a cell. Below you can find my hybrid solution that utilized TextFieldParser to read in the file and break it up.
Public Function OpenFile(ByVal File as String, NyVal delim as String) as DataTable
Dim dt as New DataTable()
Dim firstline as Boolean = True
Using MyReader as New Microsoft.VisualBasic.FileIO.TextFieldParser(File)
MyReader.TextFieldType = FileIO.FieldType.Delimited
MyReader.SetDelimiters(delim)
Dim currentRow as String()
While Not MyReader.EndOfData
Try
currentRow = MyReader.ReadFields()
If firstline
firstline = false
For Each col in currentRow
dt.Columns.Add(New DataColumn(col.ToString(), System.Type.GetType("System.String")))
Next
Else
dt.Rows.Add(currentRow.ToArray())
End If
Catch ex as Microsoft.VisualBasic.FileIO.MalformedLineException
Console.WriteLIne("Line " + ex.Message + " is not valid and will be skipped")
End Try
End While
End Using
return dt
End Function

oracle bulkcopy from vb.net databale

I am loading the CSV file to vb.net datatable .
then I am using Bulk copy command to write to database.
The problem is if one date cell in the data table is empty, i am receiving
ORA:-01840 input value not long enough for date format.
How to resolve this.
Public Class Form1
Dim cn As New OracleConnection("Data Source =(DESCRIPTION =(ADDRESS = (PROTOCOL = TCP)(HOST = ipaddressofserver)(PORT = portofserver))(CONNECT_DATA =(SERVER = DEDICATED)(SERVICE_NAME = oracleservicename)) ) ; User Id=useridofdb;Password=passwordofdb")
cn.Open()
MsgBox("connection opened")
Dim dt As DataTable = ReadCSV("D:\test.csv")
DataGridView1.DataSource = dt
Try
Dim _bulkCopy As New OracleBulkCopy(cn)
_bulkCopy.DestinationTableName = "TEST_TABLE"
_bulkCopy.BulkCopyTimeout = 10800
_bulkCopy.WriteToServer(dt)
cn.Close()
cn.Dispose()
cn = Nothing
MsgBox("Finished")
Catch ex As Exception
MsgBox(ex.Message)
End Try
End Sub
Function ReadCSV(ByVal path As String) As System.Data.DataTable
Try
Dim sr As New StreamReader(path)
Dim fullFileStr As String = sr.ReadToEnd()
sr.Close()
sr.Dispose()
Dim lines As String() = fullFileStr.Split(ControlChars.Lf)
Dim recs As New DataTable()
Dim sArr As String() = lines(0).Split(","c)
For Each s As String In sArr
recs.Columns.Add(New DataColumn())
Next
Dim row As DataRow
Dim finalLine As String = ""
For Each line As String In lines
row = recs.NewRow()
finalLine = line.Replace(Convert.ToString(ControlChars.Cr), "")
row.ItemArray = finalLine.Split(","c)
recs.Rows.Add(row)
Next
Return recs
Catch ex As Exception
Throw ex
End Try
End Function
End Class

VB: Count number of columns in csv

So, quite simple.
I am importing CSVs into a datagrid, though the csv always has to have a variable amount of columns.
For 3 Columns, I use this code:
Dim sr As New IO.StreamReader("E:\test.txt")
Dim dt As New DataTable
Dim newline() As String = sr.ReadLine.Split(";"c)
dt.Columns.AddRange({New DataColumn(newline(0)), _
New DataColumn(newline(1)), _
New DataColumn(newline(2))})
While (Not sr.EndOfStream)
newline = sr.ReadLine.Split(";"c)
Dim newrow As DataRow = dt.NewRow
newrow.ItemArray = {newline(0), newline(1), newline(2)}
dt.Rows.Add(newrow)
End While
DG1.DataSource = dt
This works perfectly. But how do I count the number of "newline"s ?
Can I issue a count on the number of newlines somehow? Any other example code doesn't issue column heads.
If my csv file has 5 columns, I would need an Addrange of 5 instead of 3 and so on..
Thanks in advance
Dim sr As New IO.StreamReader(path)
Dim dt As New DataTable
Dim newline() As String = sr.ReadLine.Split(","c)
' MsgBox(newline.Count)
' dt.Columns.AddRange({New DataColumn(newline(0)),
' New DataColumn(newline(1)),
' New DataColumn(newline(2))})
Dim i As Integer
For i = 0 To newline.Count - 1
dt.Columns.AddRange({New DataColumn(newline(i))})
Next
While (Not sr.EndOfStream)
newline = sr.ReadLine.Split(","c)
Dim newrow As DataRow = dt.NewRow
newrow.ItemArray = {newline(0), newline(1)}
dt.Rows.Add(newrow)
End While
dgv.DataSource = dt
End Sub
Columns and item values can be added to a DataTable individually, using dt.Columns.Add and newrow.Item, so that these can be done in a loop instead of hard-coding for a specific number of columns. e.g. (this code assumes Option Infer On, so adjust as needed):
Public Function CsvToDataTable(csvName As String, Optional delimiter As Char = ","c) As DataTable
Dim dt = New DataTable()
For Each line In File.ReadLines(csvName)
If dt.Columns.Count = 0 Then
For Each part In line.Split({delimiter})
dt.Columns.Add(New DataColumn(part))
Next
Else
Dim row = dt.NewRow()
Dim parts = line.Split({delimiter})
For i = 0 To parts.Length - 1
row(i) = parts(i)
Next
dt.Rows.Add(row)
End If
Next
Return dt
End Function
You could then use it like:
Dim dt = CsvToDataTable("E:\test.txt", ";"c)
DG1.DataSource = dt

Trimming a datagridview duplicat rows except the recent one

i'm in VS2008 Studio, i have this datagridview with multiple columns which the last column contains a date and time value.
lot's of rows are pretty the same except by they're date column.
what i wanted to do is to trim the whole datagridview duplicate rows except they're most recent ones based on they're date column.
i have sth like this:
Administrator,192.168.137.221,2,file://C:\WMPub\WMRoot\industrial.wmv , 07.Jul.2014 - 23:11:59
Administrator,192.168.137.221,2,file://C:\WMPub\WMRoot\industrial.wmv , 07.Jul.2014 -
21:11:59
Administrator,192.168.137.221,2,file://C:\WMPub\WMRoot\industrial.wmv , 07.Jul.2014 - 22:11:59
Administrator,192.168.137.221,2,file://C:\WMPub\WMRoot\industrial.wmv , 07.Jul.2014 - 20:11:59
Administrator,192.168.137.221,2,file://C:\WMPub\WMRoot\industrial.wmv , 07.Jul.2014 - 11:11:59
Everyone ,192.168.137.221,2,file://C:\WMPub\WMRoot\industrial.wmv , 07.Jul.2014 - 17:11:59
Everyone ,192.168.137.221,2,file://C:\WMPub\WMRoot\industrial.wmv , 07.Jul.2014 - 14:11:59
the output i want should be like this:
Administrator 192.168.137.221 2 file://C:\WMPub\WMRoot\industrial.wmv 07.Jul.2014 - 23:11:59
Everyone 192.168.137.201 2 file://C:\WMPub\WMRoot\industrial.wmv 07.Jul.2014 - 17:11:59
....
please consider "," as column seprators! (i dont know how to draw a table here, sorry again)!
i have this snippet that trim the duplicate lines in a datagridview but it lacks preserving the latest entry:
Public Function RemoveDuplicateRows(ByVal dTable As DataTable, ByVal colName As String) As DataTable
Dim hTable As New Hashtable()
Dim duplicateList As New ArrayList()
For Each dtRow As DataRow In dTable.Rows
If hTable.Contains(dtRow(colName)) Then
duplicateList.Add(dtRow)
Else
hTable.Add(dtRow(colName), String.Empty)
End If
Next
For Each dtRow As DataRow In duplicateList
dTable.Rows.Remove(dtRow)
Next
Return dTable
End Function
what should i do?
thanks in advance
Here is some code that illustrates the approach:
Dim dict As New dictionary(Of String, DataRow)
For Each dtRow As DataRow In dTable.Rows
Dim key As String = dtRow("column1") + "," + dtRow("column2") ' + etc.
Dim dictRow As DataRow = Nothing
If dict.TryGetValue(key, dictRow) Then
'check and update date
'you can skip this part, if your data is sorted
If dtRow("dateColumn") > dictRow("dateColumn") Then
dictRow("dateColumn") = dtRow("dateColumn")
End If
Else
dict.Add(key, dtRow)
End If
Next
In the end dict contains the rows you need, you can get them via dict.Values.ToArray()
EDIT: I found the error - dictRow should be dtRow in the above code (now fixed). Then it should work. Here is a full version of self contained example (console app), since I wrote it anyway - focus on RemoveDuplicates, the rest is just prepwork:
Sub Main()
Dim dt As New DataTable
With dt.Columns
.Add("PublishingPoint")
.Add("Username")
.Add("IP")
.Add("Status")
.Add("Req URL")
.Add("Last seen", GetType(Date))
End With
'this populates the initial data table, use your method
Dim _assembly As Assembly = Assembly.GetExecutingAssembly()
Dim _textStreamReader As New StreamReader(_assembly.GetManifestResourceStream("ConsoleApplication16.data.csv"))
While Not _textStreamReader.EndOfStream
Dim sLine As String = _textStreamReader.ReadLine().TrimEnd
If String.IsNullOrEmpty(sLine) Then Exit While
Dim values() As String = sLine.Split(",")
Dim newRow As DataRow = dt.NewRow
For iColumnIndex As Integer = 0 To dt.Columns.Count - 1
Dim columnName As String = dt.Columns(iColumnIndex).ColumnName
newRow.Item(columnName) = values(iColumnIndex)
Next
dt.Rows.Add(newRow)
End While
Console.WriteLine("Old count: " & dt.Rows.Count)
Dim newDt As DataTable = RemoveDuplicates(dt, "Last seen")
Console.WriteLine("New count: " & newDt.Rows.Count)
Console.ReadLine()
End Sub
Private Function RemoveDuplicates(dt As DataTable, colName As String) As DataTable
Dim keyColumnNames As New List(Of String)
Dim exceptColumnsHash As New HashSet(Of String)({colName})
For Each col As DataColumn In dt.Columns
Dim columnName As String = col.ColumnName
If Not exceptColumnsHash.Contains(col.ColumnName) Then
keyColumnNames.Add(columnName)
End If
Next
Dim dict As New Dictionary(Of String, DataRow)
For Each dtRow As DataRow In dt.Rows
Dim keyColumnValues As New List(Of String)
For Each keyColumnName In keyColumnNames
keyColumnValues.Add(dtRow.Item(keyColumnName))
Next
Dim key As String = String.Join(",", keyColumnValues)
Dim dictRow As DataRow = Nothing
If dict.TryGetValue(key, dictRow) Then
If dtRow(colName) > dictRow(colName) Then
dictRow(colName) = dtRow(colName)
End If
Else
dict.Add(key, dtRow)
End If
Next
Dim dtReturn As DataTable = dt.Clone
For Each dtRow As DataRow In dict.Values
dtReturn.ImportRow(dtRow)
Next
Return dtReturn
End Function
To make this code run, you need to manually add a file to the project and set build action to "Embedded resource".

Placement of constructor

For Some reason in the following segment in my for each statment its declaring the that the csv data cant be null even though as you see their when i create the csvData Object is its created within the loop is that the correct placement.
For Each thisDocument As String In documentList
filename = Path.GetFileName(thisDocument)
SiteId = filename.Substring(0, filename.IndexOf("_"))
Dim orderNumbers As String()
Dim csvData As New DataTable
Dim importUtils As New ImportController
filename = Path.GetFileName(thisDocument)
orderNumbers = filename.Split("_")
location = filename.Substring(0, 3)
importUtils = New ImportController(cfb.TargetFolder & filename)
csvData = importUtils.ConvertCsvToDatatable(True)
ImportDataTableToSql(cfb.TargetFolder & filename, csvData, cfb.StoreCompany)
Next
edits to show convertcsvtodatatable()
Public Function ConvertCsvToDatatable(ByVal ColumnNames As Boolean) As DataTable
Try
Dim dt As New DataTable
For Each columnName In GetColumnsfromCsv(ColumnNames, ",")
dt.Columns.Add(columnName)
Next
Dim fileReader As New StreamReader(FileName)
If ColumnNames Then
fileReader.ReadLine()
End If
Dim line As String = fileReader.ReadLine
While Not IsNothing(line)
line = line.Replace(Chr(34), "")
dt.Rows.Add(line.Split(","))
line = fileReader.ReadLine
End While
fileReader.Close()
Return dt
Catch ex As Exception
'log to file
End Try
Return Nothing
End Function