hello this is my firs thread ,
i'm trying to convert description of this page (https://www.tokopedia.com/indoislamicstore/cream-zaitun-arofah)
with regex and replace <br/> tag with new line and convert it to csv .
the datagridview it's alright but the csv got screwed
this is my code :
Dim dskrip As New System.Text.RegularExpressions.Regex("<p itemprop=""description"" class=""mt-20"">(.*?)\<\/p>\<\/div>")
Dim dskripm As MatchCollection = dskrip.Matches(rssourcecode0)
For Each itemdskrm As Match In dskripm
getdeskripsinew = itemdskrm.Groups(1).Value
Next
Dim deskripsinew As String = Replace(getdeskripsinew, ",", ";")
Dim deskripsitotal As String = Replace(deskripsinew, "<br/>", Environment.NewLine)
' ListView1.s = Environment.NewLine & deskripsinew
txtDeskripsi.Text = deskripsitotal
datascrapes.ColumnCount = 5
datascrapes.Columns(0).Name = "Title"
datascrapes.Columns(1).Name = "Price"
datascrapes.Columns(2).Name = "Deskripsi"
datascrapes.Columns(3).Name = "Gambar"
datascrapes.Columns(4).Name = "Total Produk"
Dim row As String() = New String() {getname, totalprice, deskripsitotal, directoryme + getfilename, "10"}
datascrapes.Rows.Add(row)
Dim filePath As String = Environment.GetFolderPath(Environment.SpecialFolder.Desktop) & "\" & "Tokopedia_Upload.csv"
Dim delimeter As String = ","
Dim sb As New StringBuilder
For i As Integer = 0 To datascrapes.Rows.Count - 1
Dim array As String() = New String(datascrapes.Columns.Count - 1) {}
If i.Equals(0) Then
For j As Integer = 0 To datascrapes.Columns.Count - 1
array(j) = datascrapes.Columns(j).HeaderText
Next
sb.AppendLine(String.Join(delimeter, array))
End If
For j As Integer = 0 To datascrapes.Columns.Count - 1
If Not datascrapes.Rows(i).IsNewRow Then
array(j) = datascrapes(j, i).Value.ToString
End If
Next
If Not datascrapes.Rows(i).IsNewRow Then
sb.AppendLine(String.Join(delimeter, array))
End If
Next
File.WriteAllText(filePath, sb.ToString)
this is the csv file
I'm not sure where your problem is looking at the CSV file, but there are certain cases where you'll want to quote the values for a CSV. There's no official spec but RFC 4180 is often used as an unofficial standard. I would recommend using a library like CSV Helper
Related
I tried to build a combination algorithm between 2 strings, unfortunately it has some errors.
Dim strWordsA() As String = TextBox1.Text.Split(",")
Dim strWordsB() As String = TextBox2.Text.Split(",")
Dim str As String = TextBox1.Text
Dim arr As String() = TextBox1.Text.Split(","c)
For i As Integer = 0 To TextBox1.Text.Split(",").Length - 1
Dim index As Integer = str.IndexOf(strWordsA(i))
TextBox1.Text = str.Insert(index + 2, "," & strWordsB(i))
str = TextBox1.Text
Next
so if we have Textbox1.Text = 1,2,3,4,5,6,7,8,9 and Textbox2.Text = a,b,c,f,d,b,i,h, and so on... I need to display this in a 3rd textbox
Textbox3.Text = 1,a,2,b,3,c,4,f and so on
so do I combine these 2 strings?
the first element in the index displays it incorrectly, otherwise it seems to work ok.
Try this:
Private Function MergeStrings(s1 As String, s2 As String) As String
Dim strWordsA() As String = s1.Split(","c)
Dim strWordsB() As String = s2.Split(","c)
Dim i As Integer = 0
Dim OutputString As String = String.Empty
While i < strWordsA.Length OrElse i < strWordsB.Length
If i < strWordsA.Length Then OutputString &= "," & strWordsA(i)
If i < strWordsB.Length Then OutputString &= "," & strWordsB(i)
i += 1
End While
If Not OutputString = String.Empty Then Return OutputString.Substring(1)
Return OutputString
End Function
Usage:
Dim s As String = MergeStrings("1,2,3,4,5,6,7,8,9", "a,b,c,f,d,b,i,h")
You will need to add your own validation to allow for trailing commas or no commas etc but it should work with different length input strings
EDIT: amended as per Mary's comment
I'm trying to make a program that downloads a bunch of domains and adds them windows hosts file but I'm having a bit of trouble. I keep getting an error when I try storing them in a list. I don't get why it doesn't work.
Sub Main()
Console.Title = "NoTrack blocklist to Windows Hosts File Converter"
Console.WriteLine("Downloading . . . ")
Dim FileDelete As String = Environment.GetFolderPath(Environment.SpecialFolder.UserProfile) & "/Downloads" & "/notracktemp.txt"
If System.IO.File.Exists(FileDelete) = True Then
System.IO.File.Delete(FileDelete)
End If
download()
Threading.Thread.Sleep(1000)
Dim s As New IO.StreamReader(Environment.GetFolderPath(Environment.SpecialFolder.UserProfile) & "/Downloads" & "/notracktemp.txt", True)
Dim tempRead As String ' = s.ReadLine
Dim tempSplit As String() ' = tempRead.Split(New Char() {" "})
Dim i As Integer = 0
Dim tempStore As String()
s.ReadLine()
s.ReadLine()
Do Until s.EndOfStream = True
tempRead = s.ReadLine
tempSplit = tempRead.Split(New Char() {" "})
Console.WriteLine(tempSplit(0))
tempStore(i) = tempSplit(0)'The part that gives me the error
i = i + 1
Loop
Console.ReadKey()
End Sub
Sub download()
Dim localDir As String = Environment.GetFolderPath(Environment.SpecialFolder.UserProfile)
'"Enter file URL"
Dim url As String = "https://quidsup.net/notrack/blocklist.php?download"
'"Enter directory"
Dim dirr As String = localDir & "/Downloads" & "/notracktemp.txt"
My.Computer.Network.DownloadFile(url, dirr)
'System.IO.File.Delete(localDir & "/notracktemp.txt")
End Sub
tempStore() has to have a size
count number of lines in file with loop, then declare it as tempStore(i) where i is the amount of lines. Here is a function that counts the lines.
Function countlines()
Dim count As Integer
Dim s As New IO.StreamReader(Environment.GetFolderPath(Environment.SpecialFolder.UserProfile) & "/Downloads" & "/notracktemp.txt", True)
s.ReadLine()
s.ReadLine()
count = 0
Do Until s.EndOfStream = True
s.ReadLine()
count = count + 1
Loop
Console.WriteLine(count)
Return count
Console.ReadKey()
End Function
Then what you do is:
Dim count As Integer
count = countlines()
Dim tempStore(count) As String
I'm writing the values of two (1 column) DGVs and a text box to a text file. The text box value is the heading, DGV1 is a column of strings (such as SI CA S C...etc) and DGV2 is a column of numbers of the format 11.23 or 0.01. The formats are exactly how I want them as displayed in the DGVs, but that formatting doesn't carry over to the text file these values are output to. Here's what I want the textfile to look like:
A012345 101 1.03
SI 32.13
C 1.45
CA 0.03
Here's what I'm getting instead:
A012345 101 1.03
SI32.13015
C1.452359
CA0.032568
Here's the code:
Try
Dim da, da2 As OleDbDataAdapter
Dim ds, ds2 As DataSet
'open connection to nit2.xlsm'
Dim cn As New OleDb.OleDbConnection("Provider=Microsoft.ACE.OLEDB.12.0;Data Source=""" & TextBox2.Text & """;Extended Properties=""Excel 12.0;HDR=YES;""")
'Fill dataviewgrid1 with element symbols, string, once'
da = New OleDbDataAdapter("select * from [Sheet1$A:A" & lastrow & "]", cn)
ds = New System.Data.DataSet
da.Fill(ds)
DataGridView1.DataSource = ds.Tables(0)
'''''''''''''loop through each sample and write to text file'''''''''''''''''''''''
da2 = New OleDbDataAdapter("select * from [Sheet1$B:B" & lastrow & "]", cn)
ds2 = New System.Data.DataSet
da2.Fill(ds2)
DataGridView2.DataSource = ds2.Tables(0)
'write sample heading to textbox1, looped'
TextBox1.Text = "Q0" & xlWsheet1.Cells(1, 2).Value & " " & xlWsheet2.Cells(1, 2).Value
'write to text file each data analysis point with formatted heading, looped'
Dim title As String = TextBox1.Text
Dim sub_title As String = title.Substring(0, 16)
Using writer As New System.IO.StreamWriter(fName, True)
writer.WriteLine(sub_title) 'heading'
Dim symbol, SymbolT As String
Dim compo As String
For cell1 As Integer = 0 To (DataGridView1.Rows.Count - 2)
symbol = Me.DataGridView1(0, cell1).Value 'element symbol'
symbolT = symbol.Trim
compo = Me.DataGridView2(0, cell1).Value 'composition'
writer.WriteLine(symbolT & compo)
Next
writer.Close()
End Using
cn.Close()
How can I format the numbers from DGV2 so that they only display to 2 decimals?
I also need to set the spacing between SI and 32.13 (for example) so that the decimal points in each line line up.
Formatting...anyone?
Something like this ?
Dim symbol As String
Dim compo As String
Dim symbolSpan As Integer = 0 ' stores the MaxLength of symbol
Dim compoSpan As Integer = 0 ' stores the Max Integer Part Length of compo
Dim symbolList As New List(Of String) ' all trimmed symbol
Dim compoList As New List(Of String) ' all trimmed compo
Dim compoIntLengthList As New List(Of Integer)
Dim doubleValue As Double
Dim i As Integer
Dim j As Integer
For i = 0 To (DataGridView1.Rows.Count - 2)
' element symbol...
symbol = Me.DataGridView1(0, i).Value.ToString().Trim()
symbolList.Add(symbol)
If symbol.Length > symbolSpan Then
symbolSpan = symbol.Length
End If
' composition...
If Double.TryParse(Me.DataGridView2(0, i).Value.ToString().Trim(), doubleValue) Then
compoList.Add(doubleValue.ToString("R"))
' well, the ToString("R") is not really the displayed value.
' if your DGV allows scientific display for Double then
' you'll have to handle that here with another code...
compo = doubleValue.ToString("F0") ' gets the Integer part length...
compoIntLengthList.add(compo.Length)
If compo.Length > compoSpan Then
compoSpan = compo.Length
End If
Else
' ohhhh ! the Cell doesn't contain a valid number... :/
compo = Me.DataGridView2(0, i).Value.ToString().Trim()
compoList.Add(compo)
If compo.Contains(".") Then ' Watch out culture about dots and commas...
If compoSpan < compo.IndexOf("."c) Then
compoSpan = compo.IndexOf("."c)
End If
compoIntLengthList.add(compo.IndexOf("."c))
Else
If compoSpan < compo.Length Then
compoSpan = compo.Length
End If
compoIntLengthList.add(compo.Length)
End If
End If
Next
symbolSpan = symbolSpan + 1 ' space between symbol and compo.
Using writer As New System.IO.StreamWriter(fName, True)
writer.WriteLine(sub_title) 'heading'
For i = 0 To symbolList.Count - 1
symbol = symbolList.Item(i)
While symbol.Length < symbolSpan
symbol = symbol + " "
End While
compo = compoList.Item(i)
j = compoIntLengthList.Item(i)
While j < compoSpan
compo = " " + compo
j = j + 1 ' this is what was causing the endless loop.
End While
writer.WriteLine(symbol & compo)
Next
symbolList.Clear()
symbolList= Nothing
compoList.Clear()
compoList = Nothing
compoIntLengthList.Clear()
compoIntLengthList = Nothing
writer.Close()
End Using
I haven't tested the code. Just written it on the fly.. The approach looks ugly (and I agree) but that's one way I remember so far. Could be better with String.Format or StringBuilder I guess but don't remember well how those ones works, sorry.
EDIT : Missed the two decimals part...
Oh, sorry ! You want only two decimals.. Replace this :
compoList.Add(doubleValue.ToString("R"))
by this :
compoList.Add(doubleValue.ToString("F2"))
' or this :
compoList.Add(doubleValue.ToString("#.##")) ' to allow 0 or 1 decimal aswell
and replace this part :
' ohhhh ! the Cell doesn't contain a valid number... :/
compo = Me.DataGridView2(0, i).Value.ToString().Trim()
compoList.Add(compo)
If compo.Contains(".") Then ' Watch out culture about dots and commas...
If compoSpan < compo.IndexOf("."c) Then
compoSpan = compo.IndexOf("."c)
End If
compoIntLengthList.add(compo.IndexOf("."c))
Else
If compoSpan < compo.Length Then
compoSpan = compo.Length
End If
compoIntLengthList.add(compo.Length)
End If
by this :
' ohhhh ! the Cell doesn't contain a valid number... :/
compo = Me.DataGridView2(0, i).Value.ToString().Trim()
If compo.Contains(".") Then ' Watch out culture about dots and commas...
If compoSpan < compo.IndexOf("."c) Then
compoSpan = compo.IndexOf("."c)
End If
If compo.Length > (compo.IndexOf("."c) + 3) Then
compo = compo.SubString(0, compo.IndexOf("."c) + 3)
End If
compoIntLengthList.add(compo.IndexOf("."c))
Else
If compoSpan < compo.Length Then
compoSpan = compo.Length
End If
compoIntLengthList.add(compo.Length)
End If
compoList.Add(compo)
I am trying to create CSV file for below code. When i run the code initially it usually create the csv file. For same code it not creating CSV file. Let me Know What issue is
If counter = 1 Then
counter = 0
Dim headerText = ""
Dim csvFile As String = IO.Path.Combine(My.Application.Info.DirectoryPath, "test.csv")
If Not IO.File.Exists((csvFile)) Then
headerText = "Date,TIME ,Current, "
End If
Using outFile = My.Computer.FileSystem.OpenTextFileWriter(csvFile, True)
If headerText.Length > 0 Then
outFile.WriteLine(headerText)
End If
Dim date1 As String = "24-10-2014"
Dim time1 As String = CStr(TimeOfDay())
Dim Current As String = CStr(distance)
'Dim x As String = CStr(CDbl(date1 + "," + time1 + ",") + distance)
Dim x As String = date1
outFile.Write(x)
End Using
End If
I have a xls file, or a csv without quotes, and using vb.net need to turn it into a csv with quotes around every cell. If I open the xls/csv without quotes in MS Access, set every column to text and then export it, its in the format I need. Is there an easier way? If not, how do I do replicate this in vb.net? Thanks.
If you use the .Net OLE DB provider, you can specify the .csv formatting details in a schema.ini file in the folder your data files live in. For the 'unquoted' .csv the specs
should look like
[noquotes.csv] <-- file name
ColNameHeader=True <-- or False
CharacterSet=1252 <-- your encoding
Format=Delimited(,) <--
TextDelimiter= <-- important: no " in source file
Col1=VendorID Integer <-- your columns, of course
Col2=AccountNumber Char Width 15
for the 'quoted' .csv, just change the name and delete the TextDelimiter= line (put quotes around text fields is the default).
Then connect to the Text Database and execute the statement
SELECT * INTO [quotes.csv] FROM [noquotes.csv]
(as this creates quotes.csv, you may want to delete the file before each experimental run)
Added to deal with "Empty fields must be quoted"
This is a VBScript demo, but as the important things are the parameters for .GetString(), you'll can port it to VB easily:
Dim sDir : sDir = resolvePath( "§LibDir§testdata\txt" )
Dim sSrc : sSrc = "noquotes.csv"
Dim sSQL : sSQL = "SELECT * FROM [" & sSrc & "]"
Dim oTxtDb : Set oTxtDb = New cADBC.openDb( Array( "jettxt", sDir ) )
WScript.Echo goFS.OpenTextFile( goFS.BuildPath( sDir, sSrc ) ).ReadAll()
Dim sAll : sAll = oTxtDb.GetSelectFRO( sSQL ).GetString( _
adClipString, , """,""", """" & vbCrlf & """", "" _
)
WScript.Echo """" & Left( sAll, Len( sAll ) - 1 )
and output:
VendorID;AccountNumber;SomethingElse
1;ABC 123 QQQ;1,2
2;IJK 654 ZZZ;2,3
3;;3,4
"1","ABC 123 QQQ","1,2"
"2","IJK 654 ZZZ","2,3"
"3","","3,4"
(german locale, therefore field separator ; and decimal symbol ,)
Same output from this VB.Net code:
Imports ADODB
...
Sub useGetString()
Console.WriteLine("useGetString")
Const adClipString As Integer = 2
Dim cn As New ADODB.Connection
Dim rs As ADODB.Recordset
Dim sAll As String
cn.ConnectionString = _
"Provider=Microsoft.Jet.OLEDB.4.0;" _
& "Data Source=M:\lib\kurs0705\testdata\txt\;" _
& "Extended Properties=""text;"""
cn.Open()
rs = cn.Execute("SELECT * FROM [noquotes.csv]")
sAll = rs.GetString( adClipString, , """,""", """" & vbCrLf & """", "" )
cn.Close()
sAll = """" & Left( sAll, Len( sAll ) - 1 )
Console.WriteLine( sAll )
End Sub
Check out the method at this link.
What you can do to make sure quotes go around is append quotes to the beginning and end of each column data in the loop that is putting the column data in the file.
for example make the loop like this:
For InnerCount = 0 To ColumnCount - 1
Str &= """" & DS.Tables(0).Rows(OuterCount).Item(InnerCount) & ""","
Next
Public Class clsTest
Public Sub Test
Dim s as string = "C:\!Data\Test1.csv"
Dim Contents As String = System.IO.File.ReadAllText(s)
Dim aryLines As String() = Contents.Split(New String() { Environment.Newline }, StringSplitOptions.None)
Dim aryParts() As String
Dim aryHeader() As String
Dim dt As System.Data.DataTable
For i As Integer = 0 To aryLines.Length - 1
aryParts = SplitCSVLine(aryLines(i))
If dt Is Nothing And aryHeader Is Nothing Then
aryHeader = CType(aryParts.Clone, String())
ElseIf dt Is Nothing And aryHeader IsNot Nothing Then
dt = DTFromStringArray(aryParts, 1000, "", aryHeader)
Else
DTAddStringArray(dt, aryParts)
End If
Next
dt.dump
End Sub
Public Shared Function SplitCSVLine(strCSVQuotedLine As String) As String()
Dim aryLines As String() = strCSVQuotedLine.Split(New String() {Environment.NewLine}, StringSplitOptions.None)
Dim aryParts As String() = Nothing
For i As Integer = 0 To aryLines.Length - 1
Dim regx As New Text.RegularExpressions.Regex(",(?=(?:[^\""]*\""[^\""]*\"")*(?![^\""]*\""))")
aryParts = regx.Split(aryLines(i))
For p As Integer = 0 To aryParts.Length - 1
aryParts(p) = aryParts(p).Trim(" "c, """"c)
Next
Next
Return aryParts
End Function
Public Shared Function DTFromStringArray(ByVal aryValues() As String, Optional ByVal intDefaultColumnWidth As Integer = 255, Optional ByVal strTableName As String = "tblArray", Optional ByVal aryColumnNames() As String = Nothing) As DataTable
If String.IsNullOrWhiteSpace(strTableName) Then strTableName = "tblArray"
Dim dt As DataTable = New DataTable(strTableName)
Dim colNew(aryValues.GetUpperBound(0)) As DataColumn
If aryColumnNames Is Nothing Then
ReDim aryColumnNames(aryValues.Length)
Else
If aryColumnNames.GetUpperBound(0) < aryValues.GetUpperBound(0) Then
ReDim Preserve aryColumnNames(aryValues.Length)
End If
End If
For x As Integer = aryColumnNames.GetLowerBound(0) To aryColumnNames.GetUpperBound(0)
If String.IsNullOrWhiteSpace(aryColumnNames(x)) Then
aryColumnNames(x) = "Field" & x.ToString
Else
aryColumnNames(x) = aryColumnNames(x)
End If
Next
For i As Integer = 0 To aryValues.GetUpperBound(0)
colNew(i) = New DataColumn
With colNew(i)
.ColumnName = aryColumnNames(i) '"Value " & i
.DataType = GetType(String)
.AllowDBNull = False
.DefaultValue = ""
.MaxLength = intDefaultColumnWidth
.Unique = False
End With
Next
dt.Columns.AddRange(colNew)
Dim pRow As DataRow = dt.NewRow
For i As Integer = aryValues.GetLowerBound(0) To aryValues.GetUpperBound(0)
pRow.Item(i) = aryValues(i)
Next
dt.Rows.Add(pRow)
Return dt
End Function
Public Shared Sub DTAddStringArray(ByRef dt As DataTable, ByVal aryRowValues() As String)
Dim pRow As DataRow
pRow = dt.NewRow
For i As Integer = aryRowValues.GetLowerBound(0) To aryRowValues.GetUpperBound(0)
pRow.Item(i) = aryRowValues(i)
Next
dt.Rows.Add(pRow)
End Sub
End Class