Pandas HDFStore strange behaviour on shape

Pandas HDFStore strange behaviour on shape - pandas

i am facing this strange behaviour, i got a HDFStore containing DataFrames.
For 2 keys in the store , shape information differs depending how they are query.
Example:
In [1]: mystore = pandas.HDFStore('/store')
In [2]: mystore
Out[2]:
<class 'pandas.io.pytables.HDFStore'>
File path: /store
/chunk_data frame (shape->[1,1])
/enrich_data_kb frame (shape->[1,11])
/inputs frame (shape->[105,4])
/prepare_data frame (shape->[105,7])
/reduce_data frame (shape->[18,4])
In [3]: mystore['chunk_data'].shape
Out[3]: (0, 1)
In [4]: mystore['enrich_data_kb'].shape
Out[4]: (18, 11)
In [5]: mystore['inputs'].shape
Out[5]: (105, 4)
Any Idea ?
As Jeff suggest, here is the result of ptdump (restricted to enrich_data_kb key):
/enrich_data_kb (Group) ''
/enrich_data_kb._v_attrs (AttributeSet), 13 attributes:
[CLASS := 'GROUP',
TITLE := '',
VERSION := '1.0',
axis0_variety := 'regular',
axis1_variety := 'regular',
block0_items_variety := 'regular',
block1_items_variety := 'regular',
block2_items_variety := 'regular',
encoding := None,
nblocks := 3,
ndim := 2,
pandas_type := 'frame',
pandas_version := '0.15.2']
/enrich_data_kb/axis0 (Array(11,)) ''
atom := StringAtom(itemsize=10, shape=(), dflt='')
maindim := 0
flavor := 'numpy'
byteorder := 'irrelevant'
chunkshape := None
/enrich_data_kb/axis0._v_attrs (AttributeSet), 7 attributes:
[CLASS := 'ARRAY',
FLAVOR := 'numpy',
TITLE := '',
VERSION := '2.4',
kind := 'string',
name := None,
transposed := True]
/enrich_data_kb/axis1 (Array(18,)) ''
atom := Int64Atom(shape=(), dflt=0)
maindim := 0
flavor := 'numpy'
byteorder := 'little'
chunkshape := None
/enrich_data_kb/axis1._v_attrs (AttributeSet), 7 attributes:
[CLASS := 'ARRAY',
FLAVOR := 'numpy',
TITLE := '',
VERSION := '2.4',
kind := 'integer',
name := None,
transposed := True]
/enrich_data_kb/block0_items (Array(8,)) ''
atom := StringAtom(itemsize=10, shape=(), dflt='')
maindim := 0
flavor := 'numpy'
byteorder := 'irrelevant'
chunkshape := None
/enrich_data_kb/block0_items._v_attrs (AttributeSet), 8 attributes:
[CLASS := 'ARRAY',
FLAVOR := 'numpy',
TITLE := '',
VERSION := '2.4',
freq := None,
kind := 'string',
name := None,
transposed := True]
/enrich_data_kb/block0_values (VLArray(1,)) ''
atom = ObjectAtom()
byteorder = 'irrelevant'
nrows = 1
flavor = 'numpy'
/enrich_data_kb/block0_values._v_attrs (AttributeSet), 5 attributes:
[CLASS := 'VLARRAY',
PSEUDOATOM := 'object',
TITLE := '',
VERSION := '1.4',
transposed := True]
/enrich_data_kb/block1_items (Array(2,)) ''
atom := StringAtom(itemsize=10, shape=(), dflt='')
maindim := 0
flavor := 'numpy'
byteorder := 'irrelevant'
chunkshape := None
/enrich_data_kb/block1_items._v_attrs (AttributeSet), 8 attributes:
[CLASS := 'ARRAY',
FLAVOR := 'numpy',
TITLE := '',
VERSION := '2.4',
freq := None,
kind := 'string',
name := None,
transposed := True]
/enrich_data_kb/block1_values (Array(18, 2)) ''
atom := Float64Atom(shape=(), dflt=0.0)
maindim := 0
flavor := 'numpy'
byteorder := 'little'
chunkshape := None
/enrich_data_kb/block1_values._v_attrs (AttributeSet), 5 attributes:
[CLASS := 'ARRAY',
FLAVOR := 'numpy',
TITLE := '',
VERSION := '2.4',
transposed := True]
/enrich_data_kb/block2_items (Array(1,)) ''
atom := StringAtom(itemsize=8, shape=(), dflt='')
maindim := 0
flavor := 'numpy'
byteorder := 'irrelevant'
chunkshape := None
/enrich_data_kb/block2_items._v_attrs (AttributeSet), 8 attributes:
[CLASS := 'ARRAY',
FLAVOR := 'numpy',
TITLE := '',
VERSION := '2.4',
freq := None,
kind := 'string',
name := None,
transposed := True]
/enrich_data_kb/block2_values (Array(18, 1)) ''
atom := Int64Atom(shape=(), dflt=0)
maindim := 0
flavor := 'numpy'
byteorder := 'little'
chunkshape := None
/enrich_data_kb/block2_values._v_attrs (AttributeSet), 5 attributes:
[CLASS := 'ARRAY',
FLAVOR := 'numpy',
TITLE := '',
VERSION := '2.4',
transposed := True]

Related

While loop doesn't exit when eof is detected

I'm having troubles with the eof sequence at the while loop. Basically I have to read a txt file (sequence) and each character has a different character that will be printed on an exit.txt file. But my while loop doesn't recognize the eof. Here's my code.
program LaboratorioPascal;
uses crt;
var
sec, sal: Textfile;
v: char;
por_especial, cont_palabra, cont_caracter, cont_especial: integer;
vocales2: set of char;
pares: set of char;
impares: set of char;
consonantes: set of char;
consonantes2: set of char;
procedure numeros(var x: char);
begin
case x of
'0': Write(sal, '0');
'1': Write(sal, '1');
'2': Write(sal, '4');
'3': begin
Write(sal, '2');
Write(sal, '7');
end;
'4': Write(sal, '8');
'5': begin
Write(sal, '1');
Write(sal, '2');
Write(sal, '5');
end;
'6': begin
Write(sal, '1');
Write(sal, '2');
end;
'7': begin
Write(sal, '3');
Write(sal, '4');
Write(sal, '3');
end;
'8': begin
Write(sal, '1');
Write(sal, '6');
end;
'9': begin
Write(sal, '7');
Write(sal, '2');
Write(sal, '9');
end;
else Exit;
end;
end;
function vocales(var s: char): char;
begin
case s of
'e': vocales := 'u';
'a': vocales := 'o';
'i': vocales := 'a';
'o': vocales := 'e';
else vocales := 'i';
end;
end;
begin
assign(sec, 'input.txt'); // Le asignamos un archivo del cual lea
reset(sec); // arrancamos la secuencia
read(sec, v); // leemos la secuencia. avz(sec, v)
assign(sal, 'salida.txt');
rewrite(sal);
vocales2 := ['a', 'e', 'i', 'o', 'u'];
pares := ['0', '2', '4', '6', '8'];
impares := ['1', '3', '5', '7', '9'];
consonantes := ['b', 'c', 'd', 'f', 'g', 'h', 'j','k','l','m', 'n'];
consonantes2 := ['p', 'q', 'r', 's', 't', 'v', 'w', 'x', 'y', 'z'];
por_especial := 0;
cont_palabra := 0;
cont_caracter := 0;
cont_especial := 0;
writeln('El objetivo de este programa es cifrar un mensaje para favorecer a la inteligencia Rusa.');
while not eof(sec) do
begin
while v = ' ' do
begin
write(sal, ' ');
read(sec, v);
end;
cont_palabra := cont_palabra + 1;
while v <> ' ' do
begin
if (v in consonantes) or (v in consonantes2) then
begin
write(sal, '1');
end
else
begin
if v in vocales2 then
begin
Write(sal, vocales(v));
end
else
begin
if v in pares then;
begin
numeros(v);
end;
begin
if v in impares then
begin
numeros(v);
end
else
begin
cont_especial := cont_especial + 1;
Write(sal, '#');
end;
end;
end;
end;
read(sec, v);
end;
end;
write(cont_palabra, ' se crifraon con [Exito]');
close(sec);
close(sal);
end.
But the result I have in the exit file (salida.txt) is
1o1ao i1o 1u1 i1 1e1111ie 1iu 1u 1e1ae o i1o 11a11u1o### 1a1########################################################################################################################################################################################################
I've done my research about the eof topic, but I can't find anything about pascal. And if I try to put an
if eof then
Exit;
end;
inside the while loop, and it just read one character from the input.txt file.

The problem is that you are in the inner loop ("while v <> ' ' do") when you come to the end of your input file.
If the last character in the input file is a space, you jump out of the inner loop and out of the outer loop, because you reached eof.
But if it isn't, you stay in the inner loop, and keep reading beyond the eof, until you encounter a space or a problem.
You can change the inner loop's
"while v <> ' ' do"
to
"while (v <> ' ') and (not eof(sec)) do".
Or make it one loop and handle the space in an if statement.

Get values after and before specific character in SQL/PL SQL?

I have a string value as a parameter and I need to parse it. My value is :
param := ('1234#5432#4567#8763');
I have to get 1234, 5432, 4567 and 8763 values partially. I will set these values different parameters.
How can I solve it with SQL?
Thanks,

select level, regexp_substr(a,'\d+',1,level)
from(select '1234#5432#4567#8763' a from dual)
connect by level <= regexp_count(a,'#') + 1

Assuming that you are in PL/SQL and you need to split a value of a parameter or a variable into four variables, this could be a way:
declare
param varchar2(100);
param1 varchar2(100);
param2 varchar2(100);
param3 varchar2(100);
param4 varchar2(100);
begin
param := '1234#5432#4567#8763';
--
param1 := substr(param, 1, instr(param, '#', 1, 1)-1);
param2 := substr(param, instr(param, '#', 1, 1) +1 , instr(param, '#', 1, 2) - instr(param, '#', 1, 1)-1);
param3 := substr(param, instr(param, '#', 1, 2) +1 , instr(param, '#', 1, 3) - instr(param, '#', 1, 2)-1);
param4 := substr(param, instr(param, '#', 1, 3) +1 );
--
dbms_output.put_line('Param1: ' || param1);
dbms_output.put_line('Param2: ' || param2);
dbms_output.put_line('Param3: ' || param3);
dbms_output.put_line('Param4: ' || param4);
end;
With regular expressions, you can get the same result by searching the 1st, 2nd, ... occurrence of a string that is followed by a # or by the end of the line ('$'); a better explanation of this approach is described in the link gave by Gary_W in his comment
...
param1 := regexp_substr(param, '(.*?)(#|$)', 1, 1, '', 1 );
param2 := regexp_substr(param, '(.*?)(#|$)', 1, 2, '', 1 );
param3 := regexp_substr(param, '(.*?)(#|$)', 1, 3, '', 1 );
param4 := regexp_substr(param, '(.*?)(#|$)', 1, 4, '', 1 );
...

Pandas HDFStore: slow on query for non-matching string

My issue is that when I try to look for a string that is NOT contained in the DataFrame (which is stored in an hdf5 file), it takes a very long time to complete the query. For example:
I have a df that contains 2*10^9 rows. It is stored in an HDF5 file. I have a string column named "code", that was marked as "data_column" (therefore it is indexed).
When I search for a code that exists in the dataset ( store.select('df', 'code=valid_code') ) it takes around 10 seconds to get 70K rows.
However, when I search for a code that does NOT exist in the dataset ( store.select('df', 'code=not_valid_code') ) it takes around 980 seconds to get the result of the query (0 rows).
I create the store like:
store = pd.HDFStore('data.h5', complevel=1, complib='zlib')
And the first append is like:
store.append('df', chunk, data_columns=['code'], expectedrows=2318185498)
Is this behavior normal or is there something wrong going on?
Thanks!
PS: this question is probably related with this other question
UPDATE:
Following Jeff's advice, I replicated his experiment, and I got the following results on a Mac. This is the table that was generated:
!ptdump -av test.h5
/ (RootGroup) ''
/._v_attrs (AttributeSet), 4 attributes:
[CLASS := 'GROUP',
PYTABLES_FORMAT_VERSION := '2.1',
TITLE := '',
VERSION := '1.0']
/df (Group) ''
/df._v_attrs (AttributeSet), 14 attributes:
[CLASS := 'GROUP',
TITLE := '',
VERSION := '1.0',
data_columns := ['A'],
encoding := None,
index_cols := [(0, 'index')],
info := {1: {'type': 'Index', 'names': [None]}, 'index': {}},
levels := 1,
nan_rep := 'nan',
non_index_axes := [(1, ['A'])],
pandas_type := 'frame_table',
pandas_version := '0.10.1',
table_type := 'appendable_frame',
values_cols := ['A']]
/df/table (Table(50000000,)) ''
description := {
"index": Int64Col(shape=(), dflt=0, pos=0),
"A": StringCol(itemsize=8, shape=(), dflt='', pos=1)}
byteorder := 'little'
chunkshape := (8192,)
autoindex := True
colindexes := {
"A": Index(6, medium, shuffle, zlib(1)).is_csi=False,
"index": Index(6, medium, shuffle, zlib(1)).is_csi=False}
/df/table._v_attrs (AttributeSet), 11 attributes:
[A_dtype := 'string64',
A_kind := ['A'],
CLASS := 'TABLE',
FIELD_0_FILL := 0,
FIELD_0_NAME := 'index',
FIELD_1_FILL := '',
FIELD_1_NAME := 'A',
NROWS := 50000000,
TITLE := '',
VERSION := '2.7',
index_kind := 'integer']
And these are the results:
In [8]: %timeit pd.read_hdf('test.h5','df',where='A = "foo00002"')
1 loops, best of 3: 277 ms per loop
In [9]: %timeit pd.read_hdf('test_zlib.h5','df',where='A = "foo00002"')
1 loops, best of 3: 391 ms per loop
In [10]: %timeit pd.read_hdf('test.h5','df',where='A = "bar"')
1 loops, best of 3: 533 ms per loop
In [11]: %timeit pd.read_hdf('test_zlib2.h5','df',where='A = "bar"')
1 loops, best of 3: 504 ms per loop
Since the differences were maybe not big enough, I tried the same experiment but with a bigger dataframe. Also, I did this experiment on a different machine, one with Linux.
This is the code (I just multiplied the original dataset by 10):
import pandas as pd
df = pd.DataFrame({'A' : [ 'foo%05d' % i for i in range(500000) ]})
df = pd.concat([ df ] * 20)
store = pd.HDFStore('test.h5',mode='w')
for i in range(50):
print "%s" % i
store.append('df',df,data_columns=['A'])
This is the table:
!ptdump -av test.h5
/ (RootGroup) ''
/._v_attrs (AttributeSet), 4 attributes:
[CLASS := 'GROUP',
PYTABLES_FORMAT_VERSION := '2.1',
TITLE := '',
VERSION := '1.0']
/df (Group) ''
/df._v_attrs (AttributeSet), 14 attributes:
[CLASS := 'GROUP',
TITLE := '',
VERSION := '1.0',
data_columns := ['A'],
encoding := None,
index_cols := [(0, 'index')],
info := {1: {'type': 'Index', 'names': [None]}, 'index': {}},
levels := 1,
nan_rep := 'nan',
non_index_axes := [(1, ['A'])],
pandas_type := 'frame_table',
pandas_version := '0.10.1',
table_type := 'appendable_frame',
values_cols := ['A']]
/df/table (Table(500000000,)) ''
description := {
"index": Int64Col(shape=(), dflt=0, pos=0),
"A": StringCol(itemsize=9, shape=(), dflt='', pos=1)}
byteorder := 'little'
chunkshape := (15420,)
autoindex := True
colindexes := {
"A": Index(6, medium, shuffle, zlib(1)).is_csi=False,
"index": Index(6, medium, shuffle, zlib(1)).is_csi=False}
/df/table._v_attrs (AttributeSet), 11 attributes:
[A_dtype := 'string72',
A_kind := ['A'],
CLASS := 'TABLE',
FIELD_0_FILL := 0,
FIELD_0_NAME := 'index',
FIELD_1_FILL := '',
FIELD_1_NAME := 'A',
NROWS := 500000000,
TITLE := '',
VERSION := '2.7',
index_kind := 'integer']
These are the files:
-rw-rw-r-- 1 user user 8.2G Oct 5 14:00 test.h5
-rw-rw-r-- 1 user user 9.9G Oct 5 14:30 test_zlib.h5
And these are the results:
In [9]:%timeit pd.read_hdf('test.h5','df',where='A = "foo00002"')
1 loops, best of 3: 1.02 s per loop
In [10]:%timeit pd.read_hdf('test_zlib.h5','df',where='A = "foo00002"')
1 loops, best of 3: 980 ms per loop
In [11]:%timeit pd.read_hdf('test.h5','df',where='A = "bar"')
1 loops, best of 3: 7.02 s per loop
In [12]:%timeit pd.read_hdf('test_zlib.h5','df',where='A = "bar"')
1 loops, best of 3: 7.27 s per loop
These are my versions of Pandas and Pytables:
user#host:~/$ pip show tables
---
Name: tables
Version: 3.1.1
Location: /usr/local/lib/python2.7/dist-packages
Requires:
user#host:~/$ pip show pandas
---
Name: pandas
Version: 0.14.1
Location: /usr/local/lib/python2.7/dist-packages
Requires: python-dateutil, pytz, numpy
Although I am quite sure that the issue is not related with Pandas, since I have observed similar behavior when using only Pytables without Pandas.
UPDATE 2:
I have switched to Pytables 3.0.0 and the problem got fixed. This is using the same files that were generated with Pytables 3.1.1.
In [4]:%timeit pd.read_hdf('test.h5','df',where='A = "bar"')
1 loops, best of 3: 205 ms per loop
In [4]:%timeit pd.read_hdf('test_zlib.h5','df',where='A = "bar"')
10 loops, best of 3: 101 ms per loop

I think your issue is one which we filed a bug a while ago here with the PyTables guys. Essentially, when using a compressed store AND specifying expectedrows AND using an indexed columns causes mis-indexing.
The soln is simply NOT to use expectedrows, and rather to ptrepack the file with a specified chunkshape (or AUTO). This is good practice anyhow. Further, not sure if you specifying compression up-front, but it is IMHO better to do this via ptrepack, see docs here. Their is also an issue on SO about this (can't find it right now, essentially if you are creating the file, don't don't index up-front but when you are done appending, if you can).
In any event, creating a test store:
In [1]: df = DataFrame({'A' : [ 'foo%05d' % i for i in range(50000) ]})
In [2]: df = pd.concat([ df ] * 20)
Append 50M rows.
In [4]: store = pd.HDFStore('test.h5',mode='w')
In [6]: for i in range(50):
...: print "%s" % i
...: store.append('df',df,data_columns=['A'])
...:
Here is the table
In [9]: !ptdump -av test.h5
/ (RootGroup) ''
/._v_attrs (AttributeSet), 4 attributes:
[CLASS := 'GROUP',
PYTABLES_FORMAT_VERSION := '2.1',
TITLE := '',
VERSION := '1.0']
/df (Group) ''
/df._v_attrs (AttributeSet), 14 attributes:
[CLASS := 'GROUP',
TITLE := '',
VERSION := '1.0',
data_columns := ['A'],
encoding := None,
index_cols := [(0, 'index')],
info := {1: {'type': 'Index', 'names': [None]}, 'index': {}},
levels := 1,
nan_rep := 'nan',
non_index_axes := [(1, ['A'])],
pandas_type := 'frame_table',
pandas_version := '0.10.1',
table_type := 'appendable_frame',
values_cols := ['A']]
/df/table (Table(50000000,)) ''
description := {
"index": Int64Col(shape=(), dflt=0, pos=0),
"A": StringCol(itemsize=8, shape=(), dflt='', pos=1)}
byteorder := 'little'
chunkshape := (8192,)
autoindex := True
colindexes := {
"A": Index(6, medium, shuffle, zlib(1)).is_csi=False,
"index": Index(6, medium, shuffle, zlib(1)).is_csi=False}
/df/table._v_attrs (AttributeSet), 11 attributes:
[A_dtype := 'string64',
A_kind := ['A'],
CLASS := 'TABLE',
FIELD_0_FILL := 0,
FIELD_0_NAME := 'index',
FIELD_1_FILL := '',
FIELD_1_NAME := 'A',
NROWS := 50000000,
TITLE := '',
VERSION := '2.7',
index_kind := 'integer']
Create a blosc and zlib version.
In [12]: !ptrepack --complib blosc --chunkshape auto --propindexes test.h5 test_blosc.h5
In [13]: !ptrepack --complib zlib --chunkshape auto --propindexes test.h5 test_zlib.h5
In [14]: !ls -ltr *.h5
-rw-rw-r-- 1 jreback users 866182540 Oct 4 20:31 test.h5
-rw-rw-r-- 1 jreback users 976674013 Oct 4 20:36 test_blosc.h5
-rw-rw-r-- 1 jreback users 976674013 Oct 4 2014 test_zlib.h5
Perf is pretty similar (for the found rows)
In [10]: %timeit pd.read_hdf('test.h5','df',where='A = "foo00002"')
1 loops, best of 3: 337 ms per loop
In [15]: %timeit pd.read_hdf('test_blosc.h5','df',where='A = "foo00002"')
1 loops, best of 3: 345 ms per loop
In [16]: %timeit pd.read_hdf('test_zlib.h5','df',where='A = "foo00002"')
1 loops, best of 3: 347 ms per loop
And missing rows (though the compressed do perform better here).
In [11]: %timeit pd.read_hdf('test.h5','df',where='A = "bar"')
10 loops, best of 3: 82.4 ms per loop
In [17]: %timeit pd.read_hdf('test_blosc.h5','df',where='A = "bar"')
10 loops, best of 3: 32.2 ms per loop
In [18]: %timeit pd.read_hdf('test_zlib.h5','df',where='A = "bar"')
10 loops, best of 3: 32.3 ms per loop
So. try w/o the expected rows specifier, and use ptrepack.
Another possiblity if you are expecting a relatively low density of entries for this column (e.g. a smaller number of unique entries). Is to select the entire column, store.select_column('df','A').unique() in this case, and use that as a quick lookup mechanism (so you don't search at all).

Thanks to Jeff's help I fixed the issue by downgrading Pytables to the version 3.0.0. The issue has been reported to the devs of Pytables.

Check whether a PyTables node in a pandas HDFStore is tabular

Is there a preferred way to check whether a PyTables node in a pandas HDFStore is tabular? This works, but NoSuchNodeError doesn't seem like part of the API, so maybe I should not rely on it.
In [34]: from tables.table import NoSuchNodeError
In [35]: def is_tabular(store, key):
try:
store.get_node(key).table
except NoSuchNodeError:
return False
return True
....:
In [36]: is_tabular(store, 'first_600')
Out[36]: False
In [37]: is_tabular(store, 'features')
Out[37]: True

You could do something like this. The pandas_type, table_type meta-data will be present in the pytables attribute _v_attrs at the top-level of the node.
In [28]: store = pd.HDFStore('test.h5',mode='w')
In [29]: store.append('df',DataFrame(np.random.randn(10,2),columns=list('AB')))
In [30]: store
Out[30]:
<class 'pandas.io.pytables.HDFStore'>
File path: test.h5
/df frame_table (typ->appendable,nrows->10,ncols->2,indexers->[index])
In [31]: store._handle.root.df._v_attrs
Out[31]:
/df._v_attrs (AttributeSet), 14 attributes:
[CLASS := 'GROUP',
TITLE := u'',
VERSION := '1.0',
data_columns := [],
encoding := None,
index_cols := [(0, 'index')],
info := {1: {'type': 'Index', 'names': [None]}, 'index': {}},
levels := 1,
nan_rep := 'nan',
non_index_axes := [(1, ['A', 'B'])],
pandas_type := 'frame_table',
pandas_version := '0.10.1',
table_type := 'appendable_frame',
values_cols := ['values_block_0']]
In [33]: getattr(getattr(getattr(store._handle.root,'df',None),'_v_attrs',None),'pandas_type',None)
Out[33]: 'frame_table'
In [34]: store.close()
In [35]:

Oracle PLSQL - Error handling in UTL_FILE

My script as below, it will load a csv file to PRODUCT_TBL and it any error happened during the process, the script will rollback transaction and output an error message, however it does not print out the message when it hit UTL_FILE error, example invalid file operations. Any help are appreciated. Thanks
DECLARE
V_error_code NUMBER;
V_error_message VARCHAR2(255);
V_ignore_headerlines NUMBER := 1;
V_eof BOOLEAN := FALSE;
F UTL_FILE.FILE_TYPE;
V_LINE VARCHAR2 (32767);
V_PRD_ID PRODUCT_TBL.PRD_ID%TYPE;
V_PATTERN PRODUCT_TBL.PATTERN%TYPE;
V_REMARK PRODUCT_TBL.REMARK%TYPE;
V_CREATED_BY PRODUCT_TBL.CREATED_BY%TYPE;
V_CREATED_DATE PRODUCT_TBL.CREATED_DATE%TYPE;
V_MODIFIED_BY PRODUCT_TBL.MODIFIED_BY%TYPE;
V_MODIFIED_DATE PRODUCT_TBL.MODIFIED_DATE%TYPE;
BEGIN
F := UTL_FILE.FOPEN ('DATA_DIR', 'PRODUCT_TBLv51.csv', 'R');
IF V_ignore_headerlines > 0
THEN
BEGIN
FOR i IN 1 .. V_ignore_headerlines
LOOP
UTL_FILE.get_line(F, V_LINE);
END LOOP;
EXCEPTION
WHEN NO_DATA_FOUND
THEN
V_eof := TRUE;
END;
END IF;
WHILE NOT V_eof
LOOP
BEGIN
UTL_FILE.GET_LINE(F, V_LINE, 32767);
IF V_LINE IS NULL THEN
EXIT;
END IF;
V_PRD_ID := REGEXP_SUBSTR(V_LINE, '([^,\(]*(\([^\)]*\)[^,\(]*)*)(,|$)', 1, 1, 'i', 1);
V_PATTERN := REGEXP_SUBSTR(V_LINE, '([^,\(]*(\([^\)]*\)[^,\(]*)*)(,|$)', 1, 2, 'i', 1);
V_REMARK := REGEXP_SUBSTR(V_LINE, '([^,\(]*(\([^\)]*\)[^,\(]*)*)(,|$)', 1, 12, 'i', 1);
V_CREATED_BY := REGEXP_SUBSTR(V_LINE, '([^,\(]*(\([^\)]*\)[^,\(]*)*)(,|$)', 1, 13, 'i', 1);
V_CREATED_DATE := REGEXP_SUBSTR(V_LINE, '([^,\(]*(\([^\)]*\)[^,\(]*)*)(,|$)', 1, 14, 'i', 1);
V_MODIFIED_BY := REGEXP_SUBSTR(V_LINE, '([^,\(]*(\([^\)]*\)[^,\(]*)*)(,|$)', 1, 15, 'i', 1);
V_MODIFIED_DATE := REGEXP_SUBSTR(V_LINE, '([^,\(]*(\([^\)]*\)[^,\(]*)*)(,|$)', 1, 16, 'i', 1);
INSERT INTO PRODUCT_TBL (PRD_ID,PATTERN,REMARK,CREATED_BY,CREATED_DATE,MODIFIED_BY,MODIFIED_DATE)
VALUES(V_PRD_ID, V_PATTERN, V_REMARK, V_CREATED_BY, V_CREATED_DATE, V_MODIFIED_BY, V_MODIFIED_DATE);
EXCEPTION
WHEN OTHERS THEN
ROLLBACK;
v_error_code := SQLCODE;
v_error_message := SQLERRM;
dbms_output.put_line(v_error_code || SQLERRM);
EXIT;
END;
END LOOP;
COMMIT;
UTL_FILE.FCLOSE(F);
EXCEPTION
WHEN UTL_FILE.INVALID_OPERATION THEN
UTL_FILE.FCLOSE(F);
dbms_output.put_line('File could not be opened or operated on as requested.');
END;
/

add an EXCEPTION ... OTHER Block after the UTL_FILE part and see what kind of Exceptions actually go throuhg to catch them.
EXCEPTION
WHEN UTL_FILE.INVALID_OPERATION THEN
UTL_FILE.FCLOSE(F);
dbms_output.put_line('File could not be opened or operated on as requested.');
WHEN OTHERS THEN
dbms_output.put_line('other trouble'||SQLCODE||SQLERRM);
When you know which one happened you will know how to catch it.

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

Pandas HDFStore strange behaviour on shape - pandas

Related

While loop doesn't exit when eof is detected

Get values after and before specific character in SQL/PL SQL?

Pandas HDFStore: slow on query for non-matching string

Check whether a PyTables node in a pandas HDFStore is tabular

Oracle PLSQL - Error handling in UTL_FILE

Categories

Resources