How to bulk load several CSV files into SQL Server? - sql

I cobbled together the code below. I can write file paths into a table but I can't use the paths in the table to bulk load CSV files in a folder. Can some expert here take a look and let me know what's wrong? TIA.
IF OBJECT_ID('tempdb..#DirectoryTree') IS NOT NULL
DROP TABLE #DirectoryTree;
CREATE TABLE #DirectoryTree (
id int IDENTITY(1,1)
,subdirectory nvarchar(512)
,depth int
,isfile bit);
INSERT #DirectoryTree (subdirectory,depth,isfile)
EXEC master.sys.xp_dirtree 'C:\my_path\CSV Files\',1,1;
SELECT * FROM #DirectoryTree
WHERE isfile = 1 AND RIGHT(subdirectory,4) = '.csv'
ORDER BY id;
GO
DROP TABLE ALLFILENAMES
--CREATE TABLE ALLFILENAMES(id VARCHAR(999),subdirectory VARCHAR(255),depth VARCHAR(1),isfile VARCHAR(1))
Select * INTO ALLFILENAMES
From #DirectoryTree
--code above is fine; problems start here
--cursor loop
--bulk insert won't take a variable name, so make a sql and execute it instead:
Declare #sql varchar(8000)
set #sql = 'BULK INSERT BULKACT FROM ''' + 'ALLFILENAMES.subdirectory' + ''' '
+ ' WITH (
DATAFILETYPE = ''char'',
FIELDTERMINATOR = '','',
ROWTERMINATOR = ''\n'',
FIRSTROW = 2
) '
print #sql
exec (#sql)
The problem is with the Bulk Insert. Here is the error message that I get: Msg 4860, Level 16, State 1, Line 28
Cannot bulk load. The file "ALLFILENAMES.subdirectory" does not exist.
So, 'ALLFILENAMES' is the name of the table and 'subdirectory' is the name of the field that contains all paths to all CSV files.

you need to do a select from that ALLFILENAMES table. You can't just specified the tablename + column name like that and expect it to work
also you need to specify the full path in the FROM file name
and you may use temp table for ALLFILENAMES instead of permanent tble
Declare #sql varchar(max)
select #sql = isnull(#sql , '')
+ 'BULK INSERT BULKACT FROM ''C:\my_path\CSV Files\' + ALLFILENAMES.subdirectory + ''' '
+ ' WITH (
DATAFILETYPE = ''char'',
FIELDTERMINATOR = '','',
ROWTERMINATOR = ''\n'',
FIRSTROW = 2
); ' + char(13)
from ALLFILENAMES
print #sql
and there is a WITH (FORMAT = 'CSV'); option for bulk insert from CSV file

Thanks for the help, Sqiurrel. I got the code below to work, and add some comments. It's really ugly though. I guess SQL Server really isn't designed for these kinds of things...
----------------------------------------------------------
-- Create 5 tables and all fields in tables
DECLARE #intFlag INT
SET #intFlag = 1
WHILE (#intFlag <=5)
BEGIN
declare #cmd nvarchar(1000),
#MyTableName nvarchar(100)
print str(#intFlag)
set #MyTableName = 'CSV' + replace(str(#intFlag),' ','')
print #MyTableName
set #cmd = 'CREATE TABLE dbo.' + quotename(#MyTableName, '[') + '(Name varchar(255), Address varchar(255), Age varchar(255), Work varchar(255));';
print #cmd
exec(#cmd)
SET #intFlag = #intFlag + 1
END
GO
----------------------------------------------------------
IF OBJECT_ID('tempdb..#DirectoryTree') IS NOT NULL
DROP TABLE #DirectoryTree;
CREATE TABLE #DirectoryTree (
id int IDENTITY(1,1)
,subdirectory nvarchar(512)
,depth int
,isfile bit);
INSERT #DirectoryTree (subdirectory,depth,isfile)
EXEC master.sys.xp_dirtree 'C:\your_path_here\',1,1;
SELECT * FROM #DirectoryTree
WHERE isfile = 1 AND RIGHT(subdirectory,4) = '.csv'
ORDER BY id;
GO
DROP TABLE ALLFILENAMES
Select * INTO ALLFILENAMES
From #DirectoryTree
----------------------------------------------------------
--cursor loop
--bulk insert won't take a variable name, so make a sql and execute it instead:
Declare #sql varchar(max)
select #sql = isnull(#sql , '')
+ 'BULK INSERT ' + ALLFILENAMES.subdirectory + ' FROM ''C:\your_path_here\' + ALLFILENAMES.subdirectory + ''' '
+ ' WITH (
DATAFILETYPE = ''char'',
FIELDTERMINATOR = '','',
ROWTERMINATOR = ''\n'',
FIRSTROW = 2
); ' + char(13)
from ALLFILENAMES
print #sql
exec (#sql)
----------------------------------------

Related

MSSMS - CSV file contains data but returns NULL with stored procedures

I have been trying my darndest to get the code described below to work. I am very inexpert at MSSMS and SQL. That said, I love the efficiency of SQL databases and would really love to make this code work.
I have tested my CSV files with this code:
BULK INSERT BCPData
FROM 'D:\cheese\bcp_test.csv'
WITH (FIRSTROW = 2,
FIELDTERMINATOR = ','
,ROWTERMINATOR = '0x0a'
);
GO
They import easily and the data appears.
However, if I try to use the code shown below (I need an code that automatically imports multiple CSV files into my table) I only get "NULL" results in the columns.
My query is as follows:
exec ImportFiles 'd:\cheese\' , 'd:\cheese\Archive' , 'bcp*.csv' , 'MergeBCPData'
I run this query after using the following code to create the necessary stored procedures:
if exists (select * from dbo.sysobjects where id = object_id(N'[dbo].[ImportFiles]') and `OBJECTPROPERTY(id, N'IsProcedure') = 1)`
drop procedure [dbo].[ImportFiles]
GO
create procedure ImportFiles
#FilePath varchar(1000) = 'd:\cheese\' ,
#ArchivePath varchar(1000) = 'd:\cheese\Archive\' ,
#FileNameMask varchar(1000) = 'bcp*.csv' ,
#MergeProc varchar(128) = 'MergeBCPData'
AS
set nocount on
declare #ImportDate datetime
select #ImportDate = getdate()
declare #FileName varchar(1000) ,
#File varchar(1000)
declare #cmd varchar(2000)
create table ##Import (s varchar(8000))
create table #Dir (s varchar(8000))
/*****************************************************************/
-- Import file
/*****************************************************************/
select #cmd = 'dir /B ' + #FilePath + #FileNameMask
delete #Dir
insert #Dir exec master..xp_cmdshell #cmd
delete #Dir where s is null or s like '%not found%'
while exists (select * from #Dir)
begin
select #FileName = min(s) from #Dir
select #File = #FilePath + #FileName
select #cmd = 'bulk insert'
select #cmd = #cmd + ' ##Import'
select #cmd = #cmd + ' from'
select #cmd = #cmd + ' ''' + replace(#File,'"','') + ''''
select #cmd = #cmd + ' with (FIELDTERMINATOR = '','''
select #cmd = #cmd + ',ROWTERMINATOR = ''0x0a''
)'
truncate table ##Import
-- import the data
exec (#cmd)
-- remove filename just imported
delete #Dir where s = #FileName
exec #MergeProc
-- Archive the file
select #cmd = 'move ' + #FilePath + #FileName + ' ' + #ArchivePath + #FileName
exec master..xp_cmdshell #cmd
end
drop table ##Import
drop table #Dir
go
if exists (select * from dbo.sysobjects where id = object_id(N'[dbo].[MergeBCPData]') and OBJECTPROPERTY(id, N'IsProcedure') = 1)
drop procedure [dbo].[MergeBCPData]
GO
create procedure MergeBCPData
AS
set nocount on
-- insert data to production table
insert BCPData
(
City ,
Visit_Duration_Seconds ,
Timezone ,
Most_Likely_Company
)
select
SUBSTRING('City', 1, 5),
SUBSTRING('Visit_Duration_Seconds', 1, 12),
SUBSTRING('Timezone', 1, 3),
SUBSTRING('Most_Likely_Company',1, 30)
from ##Import
go
Any help would be very appreciated. I'm hopeful it is just an error that my inexperienced eyes are too novel to catch. THANK YOU!
You stated 'I need an code that automatically imports multiple CSV files into my table'. Is there a pattern in the file names that you can exploit? Do the file names have dates in them, per chance? If there is some repeating pattern that you can exploit, like a series of dates, you can loop through all the files in your folder, and append all files to one table, in one go. Check out the code below, and post back if you have questions.
DECLARE #intFlag INT
SET #intFlag = 1
WHILE (#intFlag <=50) – we are running 50 loops...change this as needed
BEGIN
PRINT #intFlag
declare #fullpath1 varchar(1000)
select #fullpath1 = '''\\your_path_here\FTP\' + convert(varchar, getdate()- #intFlag , 112) + '_Daily.csv'''
declare #cmd1 nvarchar(1000)
select #cmd1 = 'bulk insert [dbo].[Daily] from ' + #fullpath1 + ' with (FIELDTERMINATOR = ''\t'', FIRSTROW = 5, ROWTERMINATOR=''0x0a'')'
exec (#cmd1)
SET #intFlag = #intFlag + 1
END
Here are some common date formats.
http://www.sql-server-helper.com/tips/date-formats.aspx
Again, I'm assuming you have dates in your file names.

Bulk insert with list of files

I create a SQL procedure to import data from txt file. However there are lots of files(about 80 files) and i cannot list its name. Its name formatted like 'DATA_XXXXXXX.TXT'
DECLARE #sql1 nvarchar(max) = N'BULK INSERT dbo.Student FROM '''
+ 'D:\NEW_FOLDER\DATA_20190222'
+ '.TXT'
+ ''' WITH
(
FIELDTERMINATOR = ''|'',
MAXERRORS = 10000
);';
EXEC sys.sp_executesql #sql1;
I want all data on theses file should be loaded into table. How can I do that?
You could do it with a while loop, insert the values into a temptable and increment the file name variable with dynamic SQL:
CREATE TABLE #TEMP_FILENAMES
(
FILENAME VARCHAR(50)
)
INSERT INTO #TEMP_FILENAMES
VALUES('20190222')
INSERT INTO #TEMP_FILENAMES
VALUES('20190223')
DECLARE INT #YEARMMDD
WHILE EXISTS(SELECT * FROM #TEMP_FILENAMES)
BEGIN
SET #YEARMMDD = (SELECT TOP 1 FILENAME FROM #TEMP_FILENAMES)
DECLARE #sql1 nvarchar(max) = N'BULK INSERT dbo.Student FROM '''
+ 'D:\NEW_FOLDER\DATA_' + #YEARMMDD
+ '.TXT'
+ ''' WITH
(
FIELDTERMINATOR = ''|'',
MAXERRORS = 10000
);';
EXEC sys.sp_executesql #sql1;
DELETE FROM #TEMP_FILENAMES WHERE FILENAME = #YEARMMDD
END

SQL Bulk Insert in a loop

I'm trying run BULK INSERT in a loop. Loop through each file in some directory ends with no of particular file. Below is my solution
DECLARE #startFlag INT
DECLARE #endFlag INT
DECLARE #fileName varchar(50)
SET #startFlag = 1
SET #endFlag = 10
WHILE (#startFlag <= #endFlag)
BEGIN
SET #fileName = 'c:\path to file\filename_' + cast(#startFlag as varchar) + '.csv'
BULK
INSERT dbo.Intraday
FROM #fileName
WITH
(
FIELDTERMINATOR = '|',
ROWTERMINATOR = '\n'
)
SET #startFlag = #startFlag + 1
END
GO
but seems don't work. Is there anything I've overlooked or another missing stuff I can fix this issue?
You can't use variables or expressions all the places you might like in TSQL. You'll have to use dynamic SQL:
declare #fileName nvarchar(2000) = 'foo.csv'
SET #fileName = 'foo'
declare #sql nvarchar(max) = N'
BULK
INSERT dbo.Intraday
FROM '''+#fileName+'''
WITH
(
FIELDTERMINATOR = ''|'',
ROWTERMINATOR = ''\n''
)';
exec (#sql);
you can not use veritable name after From. you have to provide the name of file after from clause not variable. so you need to make complete bulk insert statement dynamically. please refer below sample code -
declare #sql nvarchar(max)
DECLARE #fileName varchar(50)
set #fileName ='C:\Input.txt'
set #sql = 'BULK
INSERT dbo.Intraday
FROM ''' + #fileName + '''
WITH
(
FIELDTERMINATOR = ''|'',
ROWTERMINATOR = ''\n''
)'
exec(#sql)

T-SQL Dynamic table create

So this is kind of a follow on from my last question
I have a string that looks like this:
Acc_id,Field label,Data point
I'd like to create a table from the above string using somthing like
CREATE TABLE #temp
(Acc_id NVARCHAR(MAX),
Field label REAL,
Data point REAL)
The commas seperate the columns - it needs to be dynamic so if more columns appear they get created in the table.
UPDATE:
this is what I have thus far from the net but i need to make the first column NVARCHAR and the rest REAL rather than the other way round.
declare #path NVARCHAR(MAX)
SET #path = 'c:\temp\Book2.txt'
declare #execSQL nvarchar(1000)
declare #tempstr varchar(1000)
declare #col varchar(1000)
declare #table nvarchar(1000)
-- Create a temp table to with one column to hold the first row of the csv file
CREATE TABLE #tbl (line VARCHAR(1000))
SET #execSQL =
'BULK INSERT #tbl
FROM ''' + #path + '''
WITH (
FIELDTERMINATOR ='','',
FIRSTROW = 1,
ROWTERMINATOR = ''\n'',
LASTROW = 1
)
'
EXEC sp_executesql #stmt=#execSQL
update #tbl set line = REPLACE(line,' ','_') where line like '% %'
SET #col = ''
SET #tempstr = (SELECT TOP 1 RTRIM(REPLACE(Line, CHAR(9), ',')) FROM #tbl)
DROP TABLE #tbl
WHILE CHARINDEX(',',#tempstr) > 0
BEGIN
SET #col=#col + LTRIM(RTRIM(SUBSTRING(#tempstr, 1, CHARINDEX(',',#tempstr)-1))) + ' varchar(100),'
SET #tempstr = SUBSTRING(#tempstr, CHARINDEX(',',#tempstr)+1, len(#tempstr))
END
SET #col = #col + #tempstr + ' real'
IF Object_id('tempdb..##temptable') IS NOT NULL
DROP TABLE #temptable
SET #table = 'create table ##temptable (' + #col + ')'
EXEC sp_executesql #stmt=#table
-- Load data from csv
SET #execSQL =
'BULK INSERT ##temptable
FROM ''' + #path + '''
WITH (
FIELDTERMINATOR ='','',
FIRSTROW = 2,
ROWTERMINATOR = ''\n''
)
'
EXEC sp_executesql #stmt=#execSQL
Thank you
Rob
The Following works perfectly for what I want to do
declare #path NVARCHAR(MAX)
SET #path = 'c:\temp\Book2.txt'
declare #execSQL nvarchar(1000)
declare #tempstr varchar(1000)
declare #col varchar(1000)
declare #table nvarchar(1000)
-- Create a temp table to with one column to hold the first row of the csv file
IF Object_id('tempdb..#tbl') IS NOT NULL
DROP TABLE #tbl
CREATE TABLE #tbl (line VARCHAR(1000))
SET #execSQL =
'BULK INSERT #tbl
FROM ''' + #path + '''
WITH (
FIELDTERMINATOR ='','',
FIRSTROW = 1,
ROWTERMINATOR = ''\n'',
LASTROW = 1
)
'
EXEC sp_executesql #stmt=#execSQL
update #tbl set line = REPLACE(line,' ','_') where line like '% %'
SET #col = ''
SET #tempstr = (SELECT TOP 1 RTRIM(REPLACE(Line, CHAR(9), ',')) FROM #tbl)
DROP TABLE #tbl
SET #col=#col + LTRIM(RTRIM(SUBSTRING(#tempstr, 1, CHARINDEX(',',#tempstr)-1))) + ' nvarchar(max),'
SET #tempstr = SUBSTRING(#tempstr, CHARINDEX(',',#tempstr)+1, len(#tempstr))
WHILE CHARINDEX(',',#tempstr) > 0
BEGIN
SET #col=#col + LTRIM(RTRIM(SUBSTRING(#tempstr, 1, CHARINDEX(',',#tempstr)-1))) + ' nvarchar(max),'
SET #tempstr = SUBSTRING(#tempstr, CHARINDEX(',',#tempstr)+1, len(#tempstr))
END
SET #col = #col + #tempstr + ' real'
IF Object_id('tempdb..##temptable') IS NOT NULL
DROP TABLE ##temptable
SET #table = 'create table ##temptable (' + #col + ')'
EXEC sp_executesql #stmt=#table
-- Load data from csv
SET #execSQL =
'BULK INSERT ##temptable
FROM ''' + #path + '''
WITH (
FIELDTERMINATOR ='','',
FIRSTROW = 2,
ROWTERMINATOR = ''\n''
)
'
EXEC sp_executesql #stmt=#execSQL
select * from ##temptable
I would suggest doing a while loop and concatenating a string together for your table creation. This can beuseful for parsing through delimiter-separated lists. Something similar to the below should get you started.
set #IDList='Field1,Field2,Field3,'
set #i=1
set #pos = patindex('%,%' , #IDList)
while #pos <> 0 begin
-- Loop through Elements
set #CurrentID= isnull(left(#IDList, #pos-1),null)
set #SQLConstructor=#SQLConstructor+',sum('+#CurrentID+') as Column'+#si
--- Reset loop
set #IDList = stuff(#IDList, 1, #pos, '')
set #pos = patindex('%,%' , #IDList)
set #i=#i+1
end

Creating table with the same columns as in a csv

I am writing a stored procedure which is supposed to take data from a csv file and insert into a table. My problem is that the number of columns in the csv file are not fixed(ie number of columns is variable). So I need some way to create a temporary table with exactly the same number of columns as in the csv file. So that I can use bulk insert.
Well I tried solving the issue by writing a sp which will take the csv file path as parameter and create a table names as temptable with the same format as that of the number of columns in the csv. CSV file looks like
eid,ename,esalary,etemp
1,Mark,1000,
2,Peter,1000,
Stored Proc script
create proc createtable
#path nvarchar(50)
as
begin
declare #execSQL nvarchar(1000)
declare #tempstr varchar(1000)
declare #col varchar(1000)
declare #table nvarchar(1000)
-- Create a temp table to with one column to hold the first row of the csv file
CREATE TABLE #tbl (line VARCHAR(1000))
SET #execSQL =
'BULK INSERT #tbl
FROM ''' + #path + '''
WITH (
FIELDTERMINATOR =''\n'',
FIRSTROW = 1,
ROWTERMINATOR = ''\n'',
LASTROW = 1
)
'
EXEC sp_executesql #stmt=#execSQL
SET #col = ''
SET #tempstr = (SELECT TOP 1 RTRIM(REPLACE(Line, CHAR(9), ',')) FROM #tbl)
DROP TABLE #tbl
WHILE CHARINDEX(',',#tempstr) > 0
BEGIN
SET #col=#col + LTRIM(RTRIM(SUBSTRING(#tempstr, 1, CHARINDEX(',',#tempstr)-1))) + ' varchar(100),'
SET #tempstr = SUBSTRING(#tempstr, CHARINDEX(',',#tempstr)+1, len(#tempstr))
END
SET #col = #col + #tempstr + ' varchar(100)'
if object_id('temptable') is not null
drop table temptable
SET #table = 'create table temptable (' + #col + ')'
EXEC sp_executesql #stmt=#table
-- Load data from csv
SET #execSQL =
'BULK INSERT temptable
FROM ''' + #path + '''
WITH (
FIELDTERMINATOR ='','',
FIRSTROW = 2,
ROWTERMINATOR = ''\n''
)
'
EXEC sp_executesql #stmt=#execSQL
end
improved nadeems script... A little bit more robust.
This code is excelent for loading multiple CSV files without using the default wizzards.
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE proc [dbo].[importeer_csv_as_table]
#path nvarchar(255),
#new_table_name varchar(255),
#field_terminator varchar(255),
#row_terminator varchar(255)
as
begin
declare #execsql nvarchar(max)
declare #tempstr varchar(max)
declare #col varchar(max)
declare #table nvarchar(max)
declare #drop_table varchar(max)
-- Create a temp table to with one column to hold the first row of the csv file
create table #tbl (line varchar(1000))
set #execsql =
'bulk insert #tbl
from ''' + #path + '''
with (
fieldterminator =''' + #row_terminator + ''',
firstrow = 1,
rowterminator = ''' + #row_terminator + ''',
lastrow = 1
)
'
exec sp_executesql #stmt=#execsql
--replace field terminator with comma
update #tbl set line = replace(line, #field_terminator, ',')
set #col = ''
set #tempstr = (select top 1 rtrim(replace(line, char(9), ',')) from #tbl)
drop table #tbl
while charindex(',',#tempstr) > 0
begin
set #col=#col + '[' + ltrim(rtrim(substring(#tempstr, 1, charindex(',',#tempstr)-1))) + '] varchar(max),'
set #tempstr = substring(#tempstr, charindex(',',#tempstr)+1, len(#tempstr))
end
set #col = #col + '[' + #tempstr + '] varchar(max)'
if object_id(#new_table_name) is not null
begin
set #drop_table = 'drop table [' + #new_table_name + ']'
exec sp_executesql #stmt= #drop_table
end
set #table = 'create table [' + #new_table_name + '] (' + #col + ')'
--select #table
exec sp_executesql #stmt=#table
--Load data from csvle
set #execsql =
'bulk insert [' + #new_table_name + ']
from ''' + #path + '''
with (
fieldterminator =''' + #field_terminator + ''',
firstrow = 2,
rowterminator = ''' + #row_terminator + '''
)
'
exec sp_executesql #stmt=#execsql
end
GO
You could use Powershell to process the CSV file, there is an example here which you could probably adapt to take account of the variable number of fields. You can build the SQL to create a table and then issue a bulk load.