I used the code from http://arranmaclean.wordpress.com/2010/07/20/net-mvc-upload-a-csv-file-to-database-with-bulk-upload/#comment-188 to upload, read and insert to DB the csv file. My problem now is how can I pass the values from the csv file to the specific database table columns.
string Feedback = string.Empty;
string line = string.Empty;
string[] strArray;
DataTable dt = new DataTable();
DataRow row;
Regex r = new Regex(",(?=(?:[^\"]*\"[^\"]*\")*(?![^\"]*\"))");
StreamReader sr = new StreamReader(fileName);
line = sr.ReadLine();
strArray = r.Split(line);
Array.ForEach(strArray, s => dt.Columns.Add(new DataColumn()));
while ((line = sr.ReadLine()) != null)
{
row = dt.NewRow();
row.ItemArray = r.Split(line);
dt.Rows.Add(row);
}
and ...
private static String ProcessBulkCopy(DataTable dt)
{
string Feedback = string.Empty;
string connString = ConfigurationManager.ConnectionStrings["DataBaseConnectionString"].ConnectionString;
using( SqlConnection conn = new SqlConnection(connString))
{
using (var copy = new SqlBulkCopy(conn))
{
conn.Open();
copy.DestinationTableName = "BulkImportDetails";
copy.BatchSize = dt.Rows.Count;
try
{
copy.WriteToServer(dt);
Feedback = "Upload complete";
}
catch (Exception ex)
{
Feedback = ex.Message;
}
}
}
return Feedback;
}
Below are my sample contents:
08/01/12,05:20:12 AM,243752,,South Lobby3,522557,IN
08/01/12,05:26:03 AM,188816,,North Lobby1,358711,IN
My DB table columns:
empno | date | time
I only need to insert the first three arrays.. (e.g. 08/01/12,05:20:12 AM,243752) and proceed to the next row to insert it again to its specified columns. My csv file doesn't have headers. I saw a code about passing the array values but it requires headers. How can I pass the values even without having a header in my csv file? Please help me guys.. Thank you..
Related
I am a bit new to this but my goal is to import the data from a csv file into a sql table and include additional values for each row being the file name and date. I was able to accomplish this using entity frame work and iterating through each row of the file but with the size of the files it will take too long too actually complete.
I am looking for a method to accomplish this import faster. I was looking into potentially using csvhelper with sqlbulkcopy to accomplish this but was not sure if there was a way to pass in the additional values needed for each row.
public void Process(string filePath)
{
InputFilePath = filePath;
DateTime fileDate = DateTime.Today;
string[] fPath = Directory.GetFiles(InputFilePath);
foreach (var file in fPath)
{
string fileName = Path.GetFileName(file);
char[] delimiter = new char[] { '\t' };
try
{
using (var db = new DatabaseName())
{
using (var reader = new StreamReader(file))
{
string line;
int count = 0;
int sCount = 0;
reader.ReadLine();
reader.ReadLine();
while ((line = reader.ReadLine()) != null)
{
count++;
string[] row = line.Split(delimiter);
var rowload = new ImportDestinationTable()
{
ImportCol0 = row[0],
ImportCol1 = row[1],
ImportCol2 = TryParseNullable(row[2]),
ImportCol3 = row[3],
ImportCol4 = row[4],
ImportCol5 = row[5],
IMPORT_FILE_NM = fileName,
IMPORT_DT = fileDate
};
db.ImportDestinationTable.Add(rowload);
if (count > 100)
{
db.SaveChanges();
count = 0;
}
}
db.SaveChanges();
//ReadLine();
}
}
}
static int? TryParseNullable(string val)
{
int outValue;
return int.TryParse(val, out outValue) ? (int?)outValue : null;
}
}
I am trying to execute the query (Basic select statement with 10 fields). My table contains more than 500k rows. C# application returns the response with only 4260 rows. However Web UI returns all the records.
Why my code returns only partial data, What is the best way to select all the records and load into C# Data Table? If there is any code snippet it would be more helpful to me.
using Google.Apis.Auth.OAuth2;
using System.IO;
using System.Threading;
using Google.Apis.Bigquery.v2;
using Google.Apis.Bigquery.v2.Data;
using System.Data;
using Google.Apis.Services;
using System;
using System.Security.Cryptography.X509Certificates;
namespace GoogleBigQuery
{
public class Class1
{
private static void Main()
{
try
{
Console.WriteLine("Start Time: {0}", DateTime.Now.ToString());
String serviceAccountEmail = "SERVICE ACCOUNT EMAIL";
var certificate = new X509Certificate2(#"KeyFile.p12", "notasecret", X509KeyStorageFlags.Exportable);
ServiceAccountCredential credential = new ServiceAccountCredential(
new ServiceAccountCredential.Initializer(serviceAccountEmail)
{
Scopes = new[] { BigqueryService.Scope.Bigquery, BigqueryService.Scope.BigqueryInsertdata, BigqueryService.Scope.CloudPlatform, BigqueryService.Scope.DevstorageFullControl }
}.FromCertificate(certificate));
BigqueryService Service = new BigqueryService(new BaseClientService.Initializer()
{
HttpClientInitializer = credential,
ApplicationName = "PROJECT NAME"
});
string query = "SELECT * FROM [publicdata:samples.shakespeare]";
JobsResource j = Service.Jobs;
QueryRequest qr = new QueryRequest();
string ProjectID = "PROJECT ID";
qr.Query = query;
qr.MaxResults = Int32.MaxValue;
qr.TimeoutMs = Int32.MaxValue;
DataTable DT = new DataTable();
int i = 0;
QueryResponse response = j.Query(qr, ProjectID).Execute();
string pageToken = null;
if (response.JobComplete == true)
{
if (response != null)
{
int colCount = response.Schema.Fields.Count;
if (DT == null)
DT = new DataTable();
if (DT.Columns.Count == 0)
{
foreach (var Column in response.Schema.Fields)
{
DT.Columns.Add(Column.Name);
}
}
pageToken = response.PageToken;
if (response.Rows != null)
{
foreach (TableRow row in response.Rows)
{
DataRow dr = DT.NewRow();
for (i = 0; i < colCount; i++)
{
dr[i] = row.F[i].V;
}
DT.Rows.Add(dr);
}
}
Console.WriteLine("No of Records are Readed: {0} # {1}", DT.Rows.Count.ToString(), DateTime.Now.ToString());
while (true)
{
int StartIndexForQuery = DT.Rows.Count;
Google.Apis.Bigquery.v2.JobsResource.GetQueryResultsRequest SubQR = Service.Jobs.GetQueryResults(response.JobReference.ProjectId, response.JobReference.JobId);
SubQR.StartIndex = (ulong)StartIndexForQuery;
//SubQR.MaxResults = Int32.MaxValue;
GetQueryResultsResponse QueryResultResponse = SubQR.Execute();
if (QueryResultResponse != null)
{
if (QueryResultResponse.Rows != null)
{
foreach (TableRow row in QueryResultResponse.Rows)
{
DataRow dr = DT.NewRow();
for (i = 0; i < colCount; i++)
{
dr[i] = row.F[i].V;
}
DT.Rows.Add(dr);
}
}
Console.WriteLine("No of Records are Readed: {0} # {1}", DT.Rows.Count.ToString(), DateTime.Now.ToString());
if (null == QueryResultResponse.PageToken)
{
break;
}
}
else
{
break;
}
}
}
else
{
Console.WriteLine("Response is null");
}
}
int TotalCount = 0;
if (DT != null && DT.Rows.Count > 0)
{
TotalCount = DT.Rows.Count;
}
else
{
TotalCount = 0;
}
Console.WriteLine("End Time: {0}", DateTime.Now.ToString());
Console.WriteLine("No. of records readed from google bigquery service: " + TotalCount.ToString());
}
catch (Exception e)
{
Console.WriteLine("Error Occurred: " + e.Message);
}
Console.ReadLine();
}
}
}
In this Sample Query get the results from public data set, In table contains 164656 rows but response returns 85000 rows only for the first time, then query again to get the second set of results. (But not known this is the only solution to get all the results).
In this sample contains only 4 fields, even-though it does not return all rows, in my case table contains more than 15 fields, I get response of ~4000 rows out of ~10k rows, I need to query again and again to get the remaining results for selecting 1000 rows takes time up to 2 minutes in my methodology so I am expecting best way to select all the records within single response.
Answer from User #:Pentium10
There is no way to run a query and select a large response in a single shot. You can either paginate the results, or if you can create a job to export to files, then use the files generated in your app. Exporting is free.
Step to run a large query and export results to files stored on GCS:
1) Set allowLargeResults to true in your job configuration. You must also specify a destination table with the allowLargeResults flag.
Example:
"configuration":
{
"query":
{
"allowLargeResults": true,
"query": "select uid from [project:dataset.table]"
"destinationTable": [project:dataset.table]
}
}
2) Now your data is in a destination table you set. You need to create a new job, and set the export property to be able to export the table to file(s). Exporting is free, but you need to have Google Cloud Storage activated to put the resulting files there.
3) In the end you download your large files from GCS.
It my turn to design the solution for better results.
Hoping this might help someone. One could retrieve next set of paginated result using PageToken. Here is the sample code for how to use PageToken. Although, I liked the idea of exporting for free. Here, I write rows to flat file but you could add them to your DataTable. Obviously, it is a bad idea to keep large DataTable in memory though.
public void ExecuteSQL(BigqueryService bqservice, String ProjectID)
{
string sSql = "SELECT r.Dealname, r.poolnumber, r.loanid FROM [MBS_Dataset.tblRemitData] R left join each [MBS_Dataset.tblOrigData] o on R.Dealname = o.Dealname and R.Poolnumber = o.Poolnumber and R.LoanID = o.LoanID Order by o.Dealname, o.poolnumber, o.loanid limit 100000";
QueryRequest _r = new QueryRequest();
_r.Query = sSql;
QueryResponse _qr = bqservice.Jobs.Query(_r, ProjectID).Execute();
string pageToken = null;
if (_qr.JobComplete != true)
{
//job not finished yet! expecting more data
while (true)
{
var resultReq = bqservice.Jobs.GetQueryResults(_qr.JobReference.ProjectId, _qr.JobReference.JobId);
resultReq.PageToken = pageToken;
var result = resultReq.Execute();
if (result.JobComplete == true)
{
WriteRows(result.Rows, result.Schema.Fields);
pageToken = result.PageToken;
if (pageToken == null)
break;
}
}
}
else
{
List<string> _fieldNames = _qr.Schema.Fields.ToList().Select(x => x.Name).ToList();
WriteRows(_qr.Rows, _qr.Schema.Fields);
}
}
The Web UI automatically flattens the data. This means that you see multiple rows for each nested field.
When you run the same query via the API, it won't be flattened, and you get fewer rows, as the nested fields are returned as objects. You should check if this is the case at you.
The other is that indeed you need to paginate through the results. Paging through list results has this explained.
If you want to do only one job, than you should write your query ouput to a table, than export the table as JSON, and download the export from GCS.
Can anyone tell me, I am going to upload excel file, this file has unnecessary table like "_xlnm#Print_Titles" that I need to remove or delete that field. This a my method. But it is does not work for remove or delete.
static string[] GetExcelSheetNames(string connectionString)
{
OleDbConnection con = null;
DataTable dt = null;
con = new OleDbConnection(connectionString);
con.Open();
dt = con.GetOleDbSchemaTable(OleDbSchemaGuid.Tables, null);
if ((dt == null) )
{
return null;
}
String[] excelSheetNames = new String[dt.Rows.Count];
int i = 0;
foreach (DataRow row in dt.Rows)
{
excelSheetNames[i] = row["TABLE_NAME"].ToString();
if ((excelSheetNames[i].Contains("_xlnm#Print_Titles") || (excelSheetNames[i].Contains("Print_Titles"))))
{
if (true)
{
row.Table.Rows.Remove(row);
dt.AcceptChanges();
}
}
i++;
}
return excelSheetNames;
}
Instead of removing items in the foreach loop, we'll find them and add them to a list, then we'll go through that list and remove them from your data table.
static string[] GetExcelSheetNames(string connectionString)
{
OleDbConnection con = null;
DataTable dt = null;
con = new OleDbConnection(connectionString);
con.Open();
dt = con.GetOleDbSchemaTable(OleDbSchemaGuid.Tables, null);
if ((dt == null))
{
return null;
}
String[] excelSheetNames = new String[dt.Rows.Count];
var rowsToRemove = new List<DataRow>();
for (int i = 0; i < dt.Rows.Count; i++)
{
var row = dt.Rows[i];
excelSheetNames[i] = row["TABLE_NAME"].ToString();
if ((excelSheetNames[i].Contains("_xlnm#Print_Titles") || (excelSheetNames[i].Contains("Print_Titles"))))
{
rowsToRemove.Add(dt.Rows[i]);
}
i++;
}
foreach (var dataRow in rowsToRemove)
{
dt.Rows.Remove(dataRow);
}
return excelSheetNames;
}
Those _xlnm and "$" are sheets that, turns out, shouldn't be normally accessed by the users.
You can solve this in 2 ways.
Ignore them
Drop them
The former is highly recommended.
To do this you need to use the following code:
if (!dt.Rows[i]["Table_Name"].ToString().Contains("FilterDatabase") && !dt.Rows[i]["Table_Name"].ToString().EndsWith("$'"))
{
}
You can either use .Contains() and/or .EndsWith() to filter out those sheets.
I am trying to update a few columns in a Oracle table from my C# code.
Here is my method:
private static bool UpdateOracleTable(OracleTable table, string whereClause, List<int> entIDs)
{
try
{
var tableName = table.ToString();
using (OracleConnection conn = new OracleConnection(_oracleConnection))
{
conn.Open();
foreach (var id in entIDs)
{
whereClause = String.Format(whereClause, id);
var query = Resources.UpdateOracle;
query = String.Format(query, tableName, "20", DateTime.Now.ToString("yyyy/MM/dd"), whereClause);
using (OracleCommand cmd = new OracleCommand(query, conn))
{
cmd.ExecuteNonQuery();
}
}
}
return true;
}
catch (Exception ex)
{
Log.Debug(LogType.Error, ex);
return false;
}
}
Here is the Query:
UPDATE
{0}
SET
SYNC_STATUS = '{1}'
,SYNC_DATE = TO_DATE('{2}', 'yyyy/mm/dd')
{3}
And the where clause will look something like:
WHERE ID = {0}
This method updates about 10 records, and the rest stays null. This mehod does return true, and I have debugged, no exception is thrown.
Why does it not update all records?
This isn't an answer but might help debug the problem.
Instead of the like:
cmd.ExecuteNonQuery();
put in this:
int count = cmd.ExecuteNonQuery();
if (count == 0)
{
Console.WriteLine("");
}
Put a break on the Console.WriteLine("") and run it. The debugger will stop if no rows were updated. You can then check the query, and whether or not that ID actually exists.
The problem was with the WHERE clause. Since it contains a place holder {0}, after I I formatted the WHERE clause, the ID always stayed to the value it was formatted with first.
This is what my new method looks like.
private static bool UpdateOracleTable(OracleTable table, string whereClause, List<int> entIDs)
{
try
{
var tableName = table.ToString();
using (OracleConnection conn = new OracleConnection(_oracleConnection))
{
conn.Open();
foreach (var id in entIDs)
{
string originalWhere = whereClause;
originalWhere = String.Format(originalWhere, id);
var query = Resources.UpdateOracle;
query = String.Format(query, tableName, "20", DateTime.Now.ToString("yyyy/MM/dd"), originalWhere);
using (OracleCommand cmd = new OracleCommand(query, conn))
{
bool success = cmd.ExecuteNonQuery() > 0;
}
}
}
return true;
}
catch (Exception ex)
{
Log.Debug(LogType.Error, ex);
return false;
}
}
As can be seen, I added a variable 'originalWhere', that gets formatted, but most importantly, is being set to original WHERE clause parameter passed, so that it will always contain the place holder.
I am inserting records in mysql DB by reading PDF file. There are 14000 records which are suppose to be inserted in DB. but after some 700-800 records I get an exception as "An Invalid or incomplete configuration was used while creating session factory". I am using fluent Nhibernate & My code is in asp.net.Can anyone please help me with this issue??
for (int i = 1; i <= iPages; i++) // ipages value is 2213
{
string strPageText = PdfTextExtractor.GetTextFromPage(pdfRdr, i);
strPageText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(strPageText)));
string[] strRows = strPageText.Split(new char[] { '\n' }, StringSplitOptions.RemoveEmptyEntries);
string strName = string.Empty;
string[] strNames;
string strStatus = string.Empty;
string EmailAddress = string.Empty;
int rowCount = 1;
int iPtRowCnt = 0;
bool caseFiveFlag = false;
Patient objPt = new Patient();
objPt.PatientContact = new PatientContact();
foreach (string strRowText in strRows)
{
// here I am inserting records by parsing .pdf file
// my pdf file is 5mb
}