Usage of LSTM/GRU and Flatten throws dimensional incompatibility error - tensorflow
I want to make use of a promising NN I found at towardsdatascience for my case study.
The data shapes I have are:
X_train:(1200,18,15)
y_train:(1200,18,1)
Here the NN, which possesses among other layers GRU, Flatten and Dense.
def twds_model(layer1=32, layer2=32, layer3=16, dropout_rate=0.5, optimizer='Adam'
, learning_rate=0.001, activation='relu', loss='mse'):
model = Sequential()
model.add(Bidirectional(GRU(layer1, return_sequences=True),input_shape=(X_train.shape[1],X_train.shape[2])))
model.add(AveragePooling1D(2))
model.add(Conv1D(layer2, 3, activation=activation, padding='same',
name='extractor'))
model.add(Flatten())
model.add(Dense(layer3,activation=activation))
model.add(Dropout(dropout_rate))
model.add(Dense(1))
model.compile(optimizer=optimizer,loss=loss)
return model
twds_model=twds_model()
print(twds_model.summary())
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
bidirectional_4 (Bidirection (None, 18, 64) 9216
_________________________________________________________________
average_pooling1d_1 (Average (None, 9, 64) 0
_________________________________________________________________
extractor (Conv1D) (None, 9, 32) 6176
_________________________________________________________________
flatten_1 (Flatten) (None, 288) 0
_________________________________________________________________
dense_3 (Dense) (None, 16) 4624
_________________________________________________________________
dropout_4 (Dropout) (None, 16) 0
_________________________________________________________________
dense_4 (Dense) (None, 1) 17
=================================================================
Total params: 20,033
Trainable params: 20,033
Non-trainable params: 0
_________________________________________________________________
None
Unfortunately, I step into a kind of contradictory error trap, where input and output shapes do not match. Here the error under the upper circumstances.
InvalidArgumentError: Incompatible shapes: [144,1] vs. [144,18,1]
[[{{node loss_2/dense_4_loss/sub}}]]
[[{{node loss_2/mul}}]]
Train on 10420 samples, validate on 1697 samples
Epoch 1/8
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
<ipython-input-30-3f5256ff03ec> in <module>
----> 1 Test_tdws=twds_model.fit(X_train, y_train, epochs=8, batch_size=144, verbose=2, validation_split=(0.14), shuffle=False) #callbacks=[tensorboard])
~\Anaconda3\envs\Tensorflow\lib\site-packages\tensorflow\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, max_queue_size, workers, use_multiprocessing, **kwargs)
878 initial_epoch=initial_epoch,
879 steps_per_epoch=steps_per_epoch,
--> 880 validation_steps=validation_steps)
881
882 def evaluate(self,
~\Anaconda3\envs\Tensorflow\lib\site-packages\tensorflow\python\keras\engine\training_arrays.py in model_iteration(model, inputs, targets, sample_weights, batch_size, epochs, verbose, callbacks, val_inputs, val_targets, val_sample_weights, shuffle, initial_epoch, steps_per_epoch, validation_steps, mode, validation_in_fit, **kwargs)
327
328 # Get outputs.
--> 329 batch_outs = f(ins_batch)
330 if not isinstance(batch_outs, list):
331 batch_outs = [batch_outs]
~\Anaconda3\envs\Tensorflow\lib\site-packages\tensorflow\python\keras\backend.py in __call__(self, inputs)
3074
3075 fetched = self._callable_fn(*array_vals,
-> 3076 run_metadata=self.run_metadata)
3077 self._call_fetch_callbacks(fetched[-len(self._fetches):])
3078 return nest.pack_sequence_as(self._outputs_structure,
~\Anaconda3\envs\Tensorflow\lib\site-packages\tensorflow\python\client\session.py in __call__(self, *args, **kwargs)
1437 ret = tf_session.TF_SessionRunCallable(
1438 self._session._session, self._handle, args, status,
-> 1439 run_metadata_ptr)
1440 if run_metadata:
1441 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
~\Anaconda3\envs\Tensorflow\lib\site-packages\tensorflow\python\framework\errors_impl.py in __exit__(self, type_arg, value_arg, traceback_arg)
526 None, None,
527 compat.as_text(c_api.TF_Message(self.status.status)),
--> 528 c_api.TF_GetCode(self.status.status))
529 # Delete the underlying status object from memory otherwise it stays alive
530 # as there is a reference to status from this from the traceback due to
InvalidArgumentError: Incompatible shapes: [144,1] vs. [144,18,1]
[[{{node loss_2/dense_4_loss/sub}}]]
[[{{node loss_2/mul}}]]
And for completion the expectable error where y_train was reshaped to (1200*18,1):
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-47-2a6d0761b794> in <module>
----> 1 Test_tdws=twds_model.fit(X_train, y_train_flat, epochs=8, batch_size=144, verbose=2, validation_split=(0.14), shuffle=False) #callbacks=[tensorboard])
~\Anaconda3\envs\Tensorflow\lib\site-packages\tensorflow\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, max_queue_size, workers, use_multiprocessing, **kwargs)
774 steps=steps_per_epoch,
775 validation_split=validation_split,
--> 776 shuffle=shuffle)
777
778 # Prepare validation data.
~\Anaconda3\envs\Tensorflow\lib\site-packages\tensorflow\python\keras\engine\training.py in _standardize_user_data(self, x, y, sample_weight, class_weight, batch_size, check_steps, steps_name, steps, validation_split, shuffle)
2434 # Check that all arrays have the same length.
2435 if not self._distribution_strategy:
-> 2436 training_utils.check_array_lengths(x, y, sample_weights)
2437 if self._is_graph_network and not self.run_eagerly:
2438 # Additional checks to avoid users mistakenly using improper loss fns.
~\Anaconda3\envs\Tensorflow\lib\site-packages\tensorflow\python\keras\engine\training_utils.py in check_array_lengths(inputs, targets, weights)
454 'the same number of samples as target arrays. '
455 'Found ' + str(list(set_x)[0]) + ' input samples '
--> 456 'and ' + str(list(set_y)[0]) + ' target samples.')
457 if len(set_w) > 1:
458 raise ValueError('All sample_weight arrays should have '
ValueError: Input arrays should have the same number of samples as target arrays. Found 12117 input samples and 218106 target samples
Utilized versions are:
Package Version
---------------------- --------------------
- nsorflow-gpu
-ensorflow-gpu 1.13.1
-rotobuf 3.11.3
-umpy 1.18.1
absl-py 0.9.0
antlr4-python3-runtime 4.8
asn1crypto 1.3.0
astor 0.7.1
astropy 3.2.1
astunparse 1.6.3
attrs 19.3.0
audioread 2.1.8
autopep8 1.5.3
backcall 0.1.0
beautifulsoup4 4.9.0
bezier 0.8.0
bkcharts 0.2
bleach 3.1.4
blis 0.2.4
bokeh 1.1.0
boto3 1.9.253
botocore 1.12.253
Bottleneck 1.3.2
cachetools 4.1.0
certifi 2020.4.5.1
cffi 1.14.0
chardet 3.0.4
click 6.7
cloudpickle 0.5.3
cmdstanpy 0.4.0
color 0.1
colorama 0.4.3
colorcet 0.9.1
convertdate 2.2.1
copulas 0.2.5
cryptography 2.8
ctgan 0.2.1
cycler 0.10.0
cymem 2.0.2
Cython 0.29.17
dash 0.26.0
dash-core-components 0.27.2
dash-html-components 0.11.0
dash-renderer 0.13.2
dask 0.18.1
dataclasses 0.6
datashader 0.7.0
datashape 0.5.2
datawig 0.1.10
deap 1.3.0
decorator 4.4.2
defusedxml 0.6.0
deltapy 0.1.1
dill 0.2.9
distributed 1.22.1
docutils 0.14
entrypoints 0.3
ephem 3.7.7.1
et-xmlfile 1.0.1
exrex 0.10.5
Faker 4.0.3
fastai 1.0.60
fastprogress 0.2.2
fbprophet 0.6
fire 0.3.1
Flask 1.0.2
Flask-Compress 1.4.0
future 0.17.1
gast 0.3.3
geojson 2.4.1
geomet 0.2.0.post2
google-auth 1.14.0
google-auth-oauthlib 0.4.1
google-pasta 0.2.0
gplearn 0.4.1
graphviz 0.13.2
grpcio 1.29.0
h5py 2.10.0
HeapDict 1.0.0
holidays 0.10.2
holoviews 1.12.1
html2text 2018.1.9
hyperas 0.4.1
hyperopt 0.1.2
idna 2.6
imageio 2.5.0
imbalanced-learn 0.3.3
imblearn 0.0
importlib-metadata 1.5.0
impyute 0.0.8
ipykernel 5.1.4
ipython 7.13.0
ipython-genutils 0.2.0
ipywidgets 7.5.1
itsdangerous 0.24
jdcal 1.4
jedi 0.16.0
Jinja2 2.11.1
jmespath 0.9.5
joblib 0.13.2
jsonschema 3.2.0
jupyter 1.0.0
jupyter-client 6.1.2
jupyter-console 6.0.0
jupyter-core 4.6.3
Keras 2.2.5
Keras-Applications 1.0.8
Keras-Preprocessing 1.1.2
keras-rectified-adam 0.17.0
kiwisolver 1.2.0
korean-lunar-calendar 0.2.1
librosa 0.7.2
llvmlite 0.32.1
lml 0.0.1
locket 0.2.0
LunarCalendar 0.0.9
Markdown 2.6.11
MarkupSafe 1.1.1
matplotlib 3.2.1
missingpy 0.2.0
mistune 0.8.4
mkl-fft 1.0.15
mkl-random 1.1.0
mkl-service 2.3.0
mock 4.0.2
msgpack 0.5.6
multipledispatch 0.6.0
murmurhash 1.0.2
mxnet 1.4.1
nb-conda 2.2.1
nb-conda-kernels 2.2.3
nbconvert 5.6.1
nbformat 5.0.4
nbstripout 0.3.7
networkx 2.1
notebook 6.0.3
numba 0.49.1
numexpr 2.7.1
numpy 1.19.0
oauthlib 3.1.0
olefile 0.46
opencv-python 4.2.0.34
openpyxl 2.5.5
opt-einsum 3.2.1
packaging 20.3
pandas 1.0.3
pandasvault 0.0.3
pandocfilters 1.4.2
param 1.9.0
parso 0.6.2
partd 0.3.8
patsy 0.5.1
pbr 5.1.3
pickleshare 0.7.5
Pillow 7.0.0
pip 20.0.2
plac 0.9.6
plotly 4.7.1
plotly-express 0.4.1
preshed 2.0.1
prometheus-client 0.7.1
prompt-toolkit 3.0.4
protobuf 3.11.3
psutil 5.4.7
py 1.8.0
pyasn1 0.4.8
pyasn1-modules 0.2.8
pycodestyle 2.6.0
pycparser 2.20
pyct 0.4.5
pyensae 1.3.839
pyexcel 0.5.8
pyexcel-io 0.5.7
Pygments 2.6.1
pykalman 0.9.5
PyMeeus 0.3.7
pymongo 3.8.0
pyOpenSSL 19.1.0
pyparsing 2.4.7
pypi 2.1
pyquickhelper 1.9.3418
pyrsistent 0.16.0
PySocks 1.7.1
pystan 2.19.1.1
python-dateutil 2.8.1
pytz 2019.3
pyviz-comms 0.7.2
PyWavelets 0.5.2
pywin32 227
pywinpty 0.5.7
PyYAML 5.3.1
pyzmq 18.1.1
qtconsole 4.4.4
rdt 0.2.1
RegscorePy 1.1
requests 2.23.0
requests-oauthlib 1.3.0
resampy 0.2.2
retrying 1.3.3
rsa 4.0
s3transfer 0.2.1
scikit-image 0.15.0
scikit-learn 0.23.2
scipy 1.4.1
sdv 0.3.2
seaborn 0.9.0
seasonal 0.3.1
Send2Trash 1.5.0
sentinelsat 0.12.2
setuptools 46.3.0
setuptools-git 1.2
six 1.14.0
sklearn 0.0
sortedcontainers 2.0.4
SoundFile 0.10.3.post1
soupsieve 2.0
spacy 2.1.8
srsly 0.1.0
statsmodels 0.9.0
stopit 1.1.2
sugartensor 1.0.0.2
ta 0.5.25
tb-nightly 1.14.0a20190603
tblib 1.3.2
tensorboard 1.13.1
tensorboard-plugin-wit 1.6.0.post3
tensorflow-estimator 1.13.0
tensorflow-gpu 1.13.1
termcolor 1.1.0
terminado 0.8.3
testpath 0.4.4
text-unidecode 1.3
texttable 1.4.0
tf-estimator-nightly 1.14.0.dev2019060501
Theano 1.0.4
thinc 7.0.8
threadpoolctl 2.1.0
toml 0.10.1
toolz 0.10.0
torch 1.4.0
torchvision 0.5.0
tornado 6.0.4
TPOT 0.10.2
tqdm 4.45.0
traitlets 4.3.3
transforms3d 0.3.1
tsaug 0.2.1
typeguard 2.7.1
typing 3.6.6
update-checker 0.16
urllib3 1.22
utm 0.4.2
wasabi 0.2.2
wcwidth 0.1.9
webencodings 0.5.1
Werkzeug 1.0.1
wheel 0.34.2
widgetsnbextension 3.5.1
win-inet-pton 1.1.0
wincertstore 0.2
wrapt 1.11.2
xarray 0.10.8
xlrd 1.1.0
yahoo-historical 0.3.2
zict 0.1.3
zipp 2.2.0
A lot of thanks in advance for every hint that points towards a running code ;-)!
EDITEDITEDIT
After updating tensorflow and keras to the latest version, I received the error below. The error persisted, although tensorlfow, CUDA 10.1 and cudnn 8.0.2 were completely deleted and reinstalled. The error was produced both with my original and with Fallen Aparts example code.
UnknownError: Fail to find the dnn implementation.
[[{{node CudnnRNN}}]]
[[sequential/bidirectional/forward_gru/PartitionedCall]] [Op:__inference_train_function_5731]
Function call stack:
train_function -> train_function -> train_function
None
Epoch 1/4
---------------------------------------------------------------------------
UnknownError Traceback (most recent call last)
<ipython-input-1-64eb8afffe02> in <module>
27 print(twds_model.summary())
28
---> 29 twds_model.fit(X_train, y_train, epochs=4)
~\Anaconda3\envs\Tensorflow\lib\site-packages\tensorflow\python\keras\engine\training.py in _method_wrapper(self, *args, **kwargs)
106 def _method_wrapper(self, *args, **kwargs):
107 if not self._in_multi_worker_mode(): # pylint: disable=protected-access
--> 108 return method(self, *args, **kwargs)
109
110 # Running inside `run_distribute_coordinator` already.
~\Anaconda3\envs\Tensorflow\lib\site-packages\tensorflow\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
1096 batch_size=batch_size):
1097 callbacks.on_train_batch_begin(step)
-> 1098 tmp_logs = train_function(iterator)
1099 if data_handler.should_sync:
1100 context.async_wait()
~\Anaconda3\envs\Tensorflow\lib\site-packages\tensorflow\python\eager\def_function.py in __call__(self, *args, **kwds)
778 else:
779 compiler = "nonXla"
--> 780 result = self._call(*args, **kwds)
781
782 new_tracing_count = self._get_tracing_count()
~\Anaconda3\envs\Tensorflow\lib\site-packages\tensorflow\python\eager\def_function.py in _call(self, *args, **kwds)
838 # Lifting succeeded, so variables are initialized and we can run the
839 # stateless function.
--> 840 return self._stateless_fn(*args, **kwds)
841 else:
842 canon_args, canon_kwds = \
~\Anaconda3\envs\Tensorflow\lib\site-packages\tensorflow\python\eager\function.py in __call__(self, *args, **kwargs)
2827 with self._lock:
2828 graph_function, args, kwargs = self._maybe_define_function(args, kwargs)
-> 2829 return graph_function._filtered_call(args, kwargs) # pylint: disable=protected-access
2830
2831 #property
~\Anaconda3\envs\Tensorflow\lib\site-packages\tensorflow\python\eager\function.py in _filtered_call(self, args, kwargs, cancellation_manager)
1846 resource_variable_ops.BaseResourceVariable))],
1847 captured_inputs=self.captured_inputs,
-> 1848 cancellation_manager=cancellation_manager)
1849
1850 def _call_flat(self, args, captured_inputs, cancellation_manager=None):
~\Anaconda3\envs\Tensorflow\lib\site-packages\tensorflow\python\eager\function.py in _call_flat(self, args, captured_inputs, cancellation_manager)
1922 # No tape is watching; skip to running the function.
1923 return self._build_call_outputs(self._inference_function.call(
-> 1924 ctx, args, cancellation_manager=cancellation_manager))
1925 forward_backward = self._select_forward_and_backward_functions(
1926 args,
~\Anaconda3\envs\Tensorflow\lib\site-packages\tensorflow\python\eager\function.py in call(self, ctx, args, cancellation_manager)
548 inputs=args,
549 attrs=attrs,
--> 550 ctx=ctx)
551 else:
552 outputs = execute.execute_with_cancellation(
~\Anaconda3\envs\Tensorflow\lib\site-packages\tensorflow\python\eager\execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
58 ctx.ensure_initialized()
59 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
---> 60 inputs, attrs, num_outputs)
61 except core._NotOkStatusException as e:
62 if name is not None:
UnknownError: Fail to find the dnn implementation.
[[{{node CudnnRNN}}]]
[[sequential/bidirectional/forward_gru/PartitionedCall]] [Op:__inference_train_function_5731]
Function call stack:
train_function -> train_function -> train_function
The respective version list:
Package Version
------------------------ ---------------
- nsorflow-gpu
-ensorflow-gpu 2.3.0
-rotobuf 3.11.3
absl-py 0.9.0
antlr4-python3-runtime 4.8
asn1crypto 1.3.0
astor 0.7.1
astropy 3.2.1
astunparse 1.6.3
attrs 19.3.0
audioread 2.1.8
autopep8 1.5.3
backcall 0.1.0
beautifulsoup4 4.9.0
bezier 0.8.0
bkcharts 0.2
bleach 3.1.4
blis 0.2.4
bokeh 1.1.0
boto3 1.9.253
botocore 1.12.253
Bottleneck 1.3.2
cachetools 4.1.0
certifi 2020.4.5.1
cffi 1.14.0
chardet 3.0.4
click 6.7
cloudpickle 0.5.3
cmdstanpy 0.4.0
color 0.1
colorama 0.4.3
colorcet 0.9.1
convertdate 2.2.1
copulas 0.2.5
cryptography 2.8
ctgan 0.2.1
cycler 0.10.0
cymem 2.0.2
Cython 0.29.17
dash 0.26.0
dash-core-components 0.27.2
dash-html-components 0.11.0
dash-renderer 0.13.2
dask 0.18.1
dataclasses 0.6
datashader 0.7.0
datashape 0.5.2
datawig 0.1.10
deap 1.3.0
decorator 4.4.2
defusedxml 0.6.0
deltapy 0.1.1
dill 0.2.9
distributed 1.22.1
docutils 0.14
entrypoints 0.3
ephem 3.7.7.1
et-xmlfile 1.0.1
exrex 0.10.5
Faker 4.0.3
fastai 1.0.60
fastprogress 0.2.2
fbprophet 0.6
fire 0.3.1
Flask 1.0.2
Flask-Compress 1.4.0
future 0.17.1
gast 0.3.3
geojson 2.4.1
geomet 0.2.0.post2
google-auth 1.14.0
google-auth-oauthlib 0.4.1
google-pasta 0.2.0
gplearn 0.4.1
graphviz 0.13.2
grpcio 1.29.0
h5py 2.10.0
HeapDict 1.0.0
holidays 0.10.2
holoviews 1.12.1
html2text 2018.1.9
hyperas 0.4.1
hyperopt 0.1.2
idna 2.6
imageio 2.5.0
imbalanced-learn 0.3.3
imblearn 0.0
importlib-metadata 1.5.0
impyute 0.0.8
ipykernel 5.1.4
ipython 7.13.0
ipython-genutils 0.2.0
ipywidgets 7.5.1
itsdangerous 0.24
jdcal 1.4
jedi 0.16.0
Jinja2 2.11.1
jmespath 0.9.5
joblib 0.13.2
jsonschema 3.2.0
jupyter 1.0.0
jupyter-client 6.1.2
jupyter-console 6.0.0
jupyter-core 4.6.3
Keras 2.4.3
Keras-Applications 1.0.8
Keras-Preprocessing 1.1.2
keras-rectified-adam 0.17.0
kiwisolver 1.2.0
korean-lunar-calendar 0.2.1
librosa 0.7.2
llvmlite 0.32.1
lml 0.0.1
locket 0.2.0
LunarCalendar 0.0.9
Markdown 2.6.11
MarkupSafe 1.1.1
matplotlib 3.2.1
missingpy 0.2.0
mistune 0.8.4
mkl-fft 1.0.15
mkl-random 1.1.0
mkl-service 2.3.0
mock 4.0.2
msgpack 0.5.6
multipledispatch 0.6.0
murmurhash 1.0.2
mxnet 1.4.1
nb-conda 2.2.1
nb-conda-kernels 2.2.3
nbconvert 5.6.1
nbformat 5.0.4
nbstripout 0.3.7
networkx 2.1
notebook 6.0.3
numba 0.49.1
numexpr 2.7.1
numpy 1.18.5
oauthlib 3.1.0
olefile 0.46
opencv-python 4.2.0.34
openpyxl 2.5.5
opt-einsum 3.2.1
packaging 20.3
pandas 1.0.3
pandasvault 0.0.3
pandocfilters 1.4.2
param 1.9.0
parso 0.6.2
partd 0.3.8
patsy 0.5.1
pbr 5.1.3
pickleshare 0.7.5
Pillow 7.0.0
pip 20.2.2
plac 0.9.6
plotly 4.7.1
plotly-express 0.4.1
preshed 2.0.1
prometheus-client 0.7.1
prompt-toolkit 3.0.4
protobuf 3.11.3
psutil 5.4.7
py 1.8.0
pyasn1 0.4.8
pyasn1-modules 0.2.8
pycodestyle 2.6.0
pycparser 2.20
pyct 0.4.5
pyensae 1.3.839
pyexcel 0.5.8
pyexcel-io 0.5.7
Pygments 2.6.1
pykalman 0.9.5
PyMeeus 0.3.7
pymongo 3.8.0
pyOpenSSL 19.1.0
pyparsing 2.4.7
pypi 2.1
pyquickhelper 1.9.3418
pyrsistent 0.16.0
PySocks 1.7.1
pystan 2.19.1.1
python-dateutil 2.8.1
pytz 2019.3
pyviz-comms 0.7.2
PyWavelets 0.5.2
pywin32 227
pywinpty 0.5.7
PyYAML 5.3.1
pyzmq 18.1.1
qtconsole 4.4.4
rdt 0.2.1
RegscorePy 1.1
requests 2.23.0
requests-oauthlib 1.3.0
resampy 0.2.2
retrying 1.3.3
rsa 4.0
s3transfer 0.2.1
scikit-image 0.15.0
scikit-learn 0.23.2
scipy 1.4.1
sdv 0.3.2
seaborn 0.9.0
seasonal 0.3.1
Send2Trash 1.5.0
sentinelsat 0.12.2
setuptools 46.3.0
setuptools-git 1.2
six 1.14.0
sklearn 0.0
sortedcontainers 2.0.4
SoundFile 0.10.3.post1
soupsieve 2.0
spacy 2.1.8
srsly 0.1.0
statsmodels 0.9.0
stopit 1.1.2
sugartensor 1.0.0.2
ta 0.5.25
tb-nightly 1.14.0a20190603
tblib 1.3.2
tensorboard 2.3.0
tensorboard-plugin-wit 1.7.0
tensorflow-gpu 2.3.0
tensorflow-gpu-estimator 2.3.0
termcolor 1.1.0
terminado 0.8.3
testpath 0.4.4
text-unidecode 1.3
texttable 1.4.0
Theano 1.0.4
thinc 7.0.8
threadpoolctl 2.1.0
toml 0.10.1
toolz 0.10.0
torch 1.4.0
torchvision 0.5.0
tornado 6.0.4
TPOT 0.10.2
tqdm 4.45.0
traitlets 4.3.3
transforms3d 0.3.1
tsaug 0.2.1
typeguard 2.7.1
typing 3.6.6
update-checker 0.16
urllib3 1.22
utm 0.4.2
wasabi 0.2.2
wcwidth 0.1.9
webencodings 0.5.1
Werkzeug 1.0.1
wheel 0.34.2
widgetsnbextension 3.5.1
win-inet-pton 1.1.0
wincertstore 0.2
wrapt 1.11.2
xarray 0.10.8
xlrd 1.1.0
yahoo-historical 0.3.2
zict 0.1.3
zipp 2.2.0
I cannot reproduce your error, check if the following code works for you:
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Conv1D, GRU, Bidirectional, AveragePooling1D, Dense, Flatten, Dropout
import numpy as np
def twds_model(layer1=32, layer2=32, layer3=16, dropout_rate=0.5, optimizer='Adam',
learning_rate=0.001, activation='relu', loss='mse'):
model = Sequential()
model.add(Bidirectional(GRU(layer1, return_sequences=True), input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(AveragePooling1D(2))
model.add(Conv1D(layer2, 3, activation=activation, padding='same',
name='extractor'))
model.add(Flatten())
model.add(Dense(layer3, activation=activation))
model.add(Dropout(dropout_rate))
model.add(Dense(1))
model.compile(optimizer=optimizer, loss=loss)
return model
if __name__ == '__main__':
X_train = np.random.rand(1200, 18, 15)
y_train = np.random.rand(1200, 18, 1)
twds_model = twds_model()
print(twds_model.summary())
twds_model.fit(X_train, y_train, epochs=20)
Okay, here is what worked for me:
Tensorflow 2.3.0
Keras 2.4.2
CUDA 10.1
cuDNN 7.6.5
alongside with this code snippet retrieved from this github issue
import tensorflow as tf
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = '0' # Set to -1 if CPU should be used CPU = -1 , GPU = 0
gpus = tf.config.experimental.list_physical_devices('GPU')
cpus = tf.config.experimental.list_physical_devices('CPU')
if gpus:
try:
# Currently, memory growth needs to be the same across GPUs
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
logical_gpus = tf.config.experimental.list_logical_devices('GPU')
print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
except RuntimeError as e:
# Memory growth must be set before GPUs have been initialized
print(e)
elif cpus:
try:
# Currently, memory growth needs to be the same across GPUs
logical_cpus= tf.config.experimental.list_logical_devices('CPU')
print(len(cpus), "Physical CPU,", len(logical_cpus), "Logical CPU")
except RuntimeError as e:
# Memory growth must be set before GPUs have been initialized
print(e)
A lot of thanks goes to #Fallen Apart who stayed with me. In case of being curious, you might also want to have a brief glimpse into my follow up question here ;-).
Related
Protobuf Error on TFOD API installation: ImportError: cannot import name 'builder'
I'm trying to install the Tensorflow Object Detection API by following this tutorial. When I got to the end, while trying the python object_detection/builders/model_builder_tf2_test.py command, I got the following error: Traceback (most recent call last): File "object_detection/builders/model_builder_tf2_test.py", line 25, in <module> from object_detection.builders import model_builder File "/home/deeplearning/model_compression/TFOD-TRT/venv/lib/python3.6/site-packages/object_detection/builders/model_builder.py", line 23, in <module> from object_detection.builders import anchor_generator_builder File "/home/deeplearning/model_compression/TFOD-TRT/venv/lib/python3.6/site-packages/object_detection/builders/anchor_generator_builder.py", line 27, in <module> from object_detection.protos import anchor_generator_pb2 File "/home/deeplearning/model_compression/TFOD-TRT/venv/lib/python3.6/site-packages/object_detection/protos/anchor_generator_pb2.py", line 5, in <module> from google.protobuf.internal import builder as _builder ImportError: cannot import name 'builder' These are my system settings: OS: Ubuntu 18.04 Python: 3.6 Protoc: 3.12.3 Tensorflow: 2.6.2 CUDA: 11.6 cudNN: 8.3.2 TensorRT: 8.4.0 Here you can check it: $ nvidia-smi Fri Mar 18 15:35:30 2022 +-----------------------------------------------------------------------------+ | NVIDIA-SMI 510.47.03 Driver Version: 510.47.03 CUDA Version: 11.6 | |-------------------------------+----------------------+----------------------+ | GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC | | Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. | | | | MIG M. | |===============================+======================+======================| | 0 NVIDIA GeForce ... On | 00000000:01:00.0 Off | N/A | | N/A 48C P0 N/A / N/A | 412MiB / 4096MiB | 0% Default | | | | N/A | +-------------------------------+----------------------+----------------------+ +-----------------------------------------------------------------------------+ | Processes: | | GPU GI CI PID Type Process name GPU Memory | | ID ID Usage | |=============================================================================| | 0 N/A N/A 1144 G /usr/lib/xorg/Xorg 28MiB | | 0 N/A N/A 1371 G /usr/bin/gnome-shell 67MiB | | 0 N/A N/A 1622 G /usr/lib/xorg/Xorg 135MiB | | 0 N/A N/A 1798 G /usr/bin/gnome-shell 24MiB | | 0 N/A N/A 2163 G /usr/lib/firefox/firefox 149MiB | | 0 N/A N/A 6459 G /usr/lib/firefox/firefox 1MiB | +-----------------------------------------------------------------------------+ $ nvcc --version nvcc: NVIDIA (R) Cuda compiler driver Copyright (c) 2005-2022 NVIDIA Corporation Built on Thu_Feb_10_18:23:41_PST_2022 Cuda compilation tools, release 11.6, V11.6.112 Build cuda_11.6.r11.6/compiler.30978841_0 $ /sbin/ldconfig -N -v $(sed 's/:/ /' <<< $LD_LIBRARY_PATH) 2>/dev/null | grep libcudnn libcudnn_cnn_train.so.8 -> libcudnn_cnn_train.so.8.3.2 libcudnn_adv_train.so.8 -> libcudnn_adv_train.so.8.3.2 libcudnn_adv_infer.so.8 -> libcudnn_adv_infer.so.8.3.2 libcudnn_ops_infer.so.8 -> libcudnn_ops_infer.so.8.3.2 libcudnn_ops_train.so.8 -> libcudnn_ops_train.so.8.3.2 libcudnn.so.8 -> libcudnn.so.8.3.2 libcudnn_cnn_infer.so.8 -> libcudnn_cnn_infer.so.8.3.2 (venv) deeplearning#deeplearning:~/model_compression/models/research$ dpkg -l | grep TensorRT ii graphsurgeon-tf 8.4.0-1+cuda11.6 amd64 GraphSurgeon for TensorRT package ii libnvinfer-bin 8.4.0-1+cuda11.6 amd64 TensorRT binaries ii libnvinfer-dev 8.4.0-1+cuda11.6 amd64 TensorRT development libraries and headers ii libnvinfer-doc 8.4.0-1+cuda11.6 all TensorRT documentation ii libnvinfer-plugin-dev 8.4.0-1+cuda11.6 amd64 TensorRT plugin libraries ii libnvinfer-plugin8 8.4.0-1+cuda11.6 amd64 TensorRT plugin libraries ii libnvinfer-samples 8.4.0-1+cuda11.6 all TensorRT samples ii libnvinfer8 8.4.0-1+cuda11.6 amd64 TensorRT runtime libraries ii libnvonnxparsers-dev 8.4.0-1+cuda11.6 amd64 TensorRT ONNX libraries ii libnvonnxparsers8 8.4.0-1+cuda11.6 amd64 TensorRT ONNX libraries ii libnvparsers-dev 8.4.0-1+cuda11.6 amd64 TensorRT parsers libraries ii libnvparsers8 8.4.0-1+cuda11.6 amd64 TensorRT parsers libraries ii onnx-graphsurgeon 8.4.0-1+cuda11.6 amd64 ONNX GraphSurgeon for TensorRT package ii python3-libnvinfer 8.4.0-1+cuda11.6 amd64 Python 3 bindings for TensorRT ii python3-libnvinfer-dev 8.4.0-1+cuda11.6 amd64 Python 3 development package for TensorRT ii tensorrt 8.4.0.6-1+cuda11.6 amd64 Meta package of TensorRT ii uff-converter-tf 8.4.0-1+cuda11.6 amd64 UFF converter for TensorRT package $ protoc --version libprotoc 3.12.3 pip list Package Version ----------------------------- --------- absl-py 0.12.0 apache-beam 2.37.0 appdirs 1.4.4 astunparse 1.6.3 attrs 21.4.0 avro-python3 1.10.2 beautifulsoup4 4.10.0 cached-property 1.5.2 cachetools 4.2.4 certifi 2021.10.8 charset-normalizer 2.0.12 clang 5.0 cloudpickle 2.0.0 colorama 0.4.4 contextlib2 21.6.0 crcmod 1.7 cycler 0.11.0 Cython 0.29.28 dataclasses 0.8 decorator 5.1.1 dill 0.3.1.1 dm-tree 0.1.6 docopt 0.6.2 fastavro 1.4.7 flatbuffers 1.12 gast 0.4.0 gin-config 0.5.0 google 3.0.0 google-api-core 2.7.1 google-api-python-client 2.41.0 google-auth 1.35.0 google-auth-httplib2 0.1.0 google-auth-oauthlib 0.4.6 google-cloud 0.34.0 google-pasta 0.2.0 googleapis-common-protos 1.56.0 grpcio 1.44.0 h5py 3.1.0 hdfs 2.6.0 httplib2 0.19.1 idna 3.3 importlib-metadata 4.8.3 importlib-resources 5.4.0 joblib 1.1.0 kaggle 1.5.12 keras 2.6.0 Keras-Preprocessing 1.1.2 kiwisolver 1.3.1 lvis 0.5.3 lxml 4.8.0 Mako 1.1.6 Markdown 3.3.6 MarkupSafe 2.0.1 matplotlib 3.3.4 neural-structured-learning 1.3.1 numpy 1.19.5 oauth2client 4.1.3 oauthlib 3.2.0 object-detection 0.1 onnx 1.8.1 onnxruntime 1.8.0 opencv-python 4.5.5.64 opencv-python-headless 4.5.5.64 opt-einsum 3.3.0 orjson 3.6.1 pandas 1.1.5 Pillow 8.2.0 pip 21.3.1 pkg_resources 0.0.0 platformdirs 2.4.0 portalocker 2.4.0 promise 2.3 proto-plus 1.20.3 protobuf 3.19.4 psutil 5.9.0 py-cpuinfo 8.0.0 pyarrow 6.0.1 pyasn1 0.4.8 pyasn1-modules 0.2.8 pycocotools 2.0 pycuda 2021.1 pydot 1.4.2 pymongo 3.12.3 pyparsing 2.4.7 python-dateutil 2.8.2 python-slugify 6.1.1 pytools 2022.1.2 pytz 2021.3 PyYAML 6.0 regex 2022.3.15 requests 2.27.1 requests-oauthlib 1.3.1 rsa 4.8 sacrebleu 2.0.0 scikit-learn 0.24.2 scipy 1.5.4 sentencepiece 0.1.96 seqeval 1.2.2 setuptools 59.6.0 six 1.15.0 soupsieve 2.3.1 tabulate 0.8.9 tensorboard 2.6.0 tensorboard-data-server 0.6.1 tensorboard-plugin-wit 1.8.1 tensorflow 2.6.2 tensorflow-addons 0.14.0 tensorflow-datasets 4.5.2 tensorflow-estimator 2.6.0 tensorflow-hub 0.12.0 tensorflow-io 0.21.0 tensorflow-io-gcs-filesystem 0.21.0 tensorflow-metadata 1.2.0 tensorflow-model-optimization 0.7.1 tensorflow-text 2.6.0 termcolor 1.1.0 text-unidecode 1.3 tf-models-official 2.7.1 tf-slim 1.1.0 tf2onnx 1.8.1 threadpoolctl 3.1.0 tqdm 4.63.0 typeguard 2.13.3 typing-extensions 3.7.4.3 uritemplate 4.1.1 urllib3 1.26.9 Werkzeug 2.0.3 wheel 0.37.1 wrapt 1.12.1 zipp 3.6.0 What's missing? I've tried reinstalling everything, installing protobuf from source and so on. Tensorflow works, this doesn't. Thanks for your time, Fran. EDIT 1 A bit of information that may be important. I've tried to switch python versions and reinstalling all the NVIDIA/CUDA environment. It seems to have nothing to do with that. The package named builder should be here: ~/model_compression/TFOD-TRT/venv/lib/python3.8/site-packages/google/protobuf/internal$ ls _api_implementation.cpython-38-x86_64-linux-gnu.so __init__.py api_implementation.py message_listener.py containers.py __pycache__ decoder.py python_message.py encoder.py type_checkers.py enum_type_wrapper.py well_known_types.py extension_dict.py wire_format.py But it's not. Shouldn't the feature resolver do this on it's own? Edit 2 Add tag for tensorflow-model-garden
Got same error msg. Fortunately, solved after a few tries but i can't point it out exactly. Hope you can solve the issue. try conda install protobuf==3.14 and check if conda dependencies has libprotobuf and pip dependencies has protobuf. Below is my case (environment); # environment.yaml name: tfod channels: - defaults dependencies: - ca-certificates=2022.07.19=haa95532_0 - certifi=2022.6.15=py39haa95532_0 - libprotobuf=3.14.0=h23ce68f_0 ##### this one and (below↓)##### - openssl=1.1.1q=h2bbff1b_0 - pip=22.1.2=py39haa95532_0 - python=3.9.12=h6244533_0 - setuptools=61.2.0=py39haa95532_0 - six=1.16.0=pyhd3eb1b0_1 - sqlite=3.39.2=h2bbff1b_0 - tzdata=2022a=hda174b7_0 - vc=14.2=h21ff451_1 - vs2015_runtime=14.27.29016=h5e58377_2 - wheel=0.37.1=pyhd3eb1b0_0 - wincertstore=0.2=py39haa95532_2 - zlib=1.2.12=h8cc25b3_2 - pip: - apache-beam==2.40.0 - astunparse==1.6.3 - avro-python3==1.10.2 - cloudpickle==2.1.0 - contextlib2==21.6.0 - crcmod==1.7 - dill==0.3.1.1 - docopt==0.6.2 - fastavro==1.5.4 - flatbuffers==1.12 - gast==0.4.0 - google-auth-oauthlib==0.4.6 - google-pasta==0.2.0 - grpcio==1.47.0 - h5py==3.7.0 - hdfs==2.7.0 - importlib-metadata==4.12.0 - keras==2.9.0 - keras-preprocessing==1.1.2 - libclang==14.0.6 - lvis==0.5.3 - markdown==3.4.1 - markupsafe==2.1.1 - numpy==1.22.4 - oauthlib==3.2.0 - object-detection==0.1 - opencv-python==4.6.0.66 - opt-einsum==3.3.0 - orjson==3.7.11 - proto-plus==1.22.0 - protobuf==3.19.4 ##### this one ##### - pyarrow==7.0.0 - pydot==1.4.2 - pymongo==3.12.3 - pyparsing==2.4.7 - requests-oauthlib==1.3.1 - tensorboard==2.9.1 - tensorboard-data-server==0.6.1 - tensorboard-plugin-wit==1.8.1 - tensorflow==2.9.1 - tensorflow-estimator==2.9.0 - tensorflow-io==0.26.0 - tensorflow-io-gcs-filesystem==0.26.0 - tensorflow-text==2.9.0 - tf-models-official==2.9.2 - werkzeug==2.2.2 - wrapt==1.14.1 prefix: C:\Users\user\anaconda3\envs\tfod
Inconsistent scipiy.find_peaks results from pandas_udf with pyspark 3.0
I try to use scipy's find_peaks inside pyspark's pandas_udf. A barebone example: from pyspark.sql import SparkSession, SQLContext, Row from pyspark.sql.functions import pandas_udf, col from pyspark.sql.types import DoubleType import pandas as pd import numpy as np from scipy.signal import find_peaks spark = SparkSession.builder.master("yarn") \ .appName("UDF_debug") \ .config("spark.yarn.dist.archives", "hdfs://PATH/TO/MY/USERFOLDER/envs/my_env.zip#MYENV")\ .config("spark.submit.deployMode", "client")\ .config("spark.yarn.queue", "root.dev")\ .enableHiveSupport()\ .getOrCreate() # Create a sample dataframe and a corresponding pandas data frame, for cross-checking df = spark.createDataFrame( [Row(id=1, c=3), Row(id=2, c=6), Row(id=3, c=2), Row(id=4, c=9), Row(id=5, c=7)]) dfp = df.toPandas() def peak_finder(C: pd.Series) -> pd.Series: # Find peaks (maxima) pos_peaks, pos_properties = find_peaks(C) # Create an empty series of appropriate length r = pd.Series(np.full(len(C), np.nan)) # Wherever a peak was found ... for idx in pos_peaks: # ... mark it by noting its height r[idx] = C[idx] return r # Peak finding using pyspark's pandas_udf peak_finder_udf = pandas_udf(peak_finder, returnType=DoubleType()) df = df.withColumn('peak', peak_finder_udf(df.c)) df.show() # Peak finding directly on a pandas df dfp["peaks_pandas"] = peak_finder(dfp["c"]) print(dfp) The result of the two prints is as follows. First, peak finding with pandas_udf: +---+---+----+ | id| c|peak| +---+---+----+ | 1| 3|null| | 2| 6|null| | 3| 2|null| | 4| 9| 9.0| | 5| 7|null| +---+---+----+ Second, using just stock pandas and numpy on the edge node: id c peaks_pandas 0 1 3 NaN 1 2 6 6.0 2 3 2 NaN 3 4 9 9.0 4 5 7 NaN The line with id=2 is inconsistent. This might be understandable from the pyspark documentation, stating: Internally, PySpark will execute a Pandas UDF by splitting columns into batches and calling the function for each batch as a subset of the data, then concatenating the results together. It seems weird that such a small split should occur, but maybe ... Question 1: Is this inconsistent behaviour expected? Can I avoid it? EDIT: Answer: Yes, this is due to partitioning. See my comment below. Another weird behaviour might point to the solution (but to me raises further questions). Continuing with the above code: fname = "debug.parquet" df.dropna().write.parquet(fname) dfnew = spark.read.parquet(fname) dfnew.show() It yields the result +---+---+----+ | id| c|peak| +---+---+----+ | 4| 9|null| +---+---+----+ Peak is no longer = 9, as it should be, but null. Question 2: Can anyone explain this data loss during saving? Relevant packages in conda env: # Name Version Build Channel _libgcc_mutex 0.1 main defaults arrow-cpp 0.15.1 py38h7cd5009_5 defaults attrs 19.3.0 py_0 defaults backcall 0.2.0 py_0 defaults blas 1.0 mkl defaults bleach 3.1.5 py_0 defaults boost-cpp 1.71.0 h7b6447c_0 defaults brotli 1.0.7 he6710b0_0 defaults brotlipy 0.7.0 py38h7b6447c_1000 defaults bzip2 1.0.8 h7b6447c_0 defaults c-ares 1.15.0 h7b6447c_1001 defaults ca-certificates 2020.6.24 0 defaults certifi 2020.6.20 py38_0 defaults cffi 1.14.0 py38he30daa8_1 defaults chardet 3.0.4 py38_1003 defaults cryptography 2.9.2 py38h1ba5d50_0 defaults dbus 1.13.16 hb2f20db_0 defaults decorator 4.4.2 py_0 defaults defusedxml 0.6.0 py_0 defaults double-conversion 3.1.5 he6710b0_1 defaults entrypoints 0.3 py38_0 defaults expat 2.2.9 he6710b0_2 defaults fontconfig 2.13.0 h9420a91_0 defaults freetype 2.10.2 h5ab3b9f_0 defaults gflags 2.2.2 he6710b0_0 defaults glib 2.65.0 h3eb4bd4_0 defaults glog 0.4.0 he6710b0_0 defaults grpc-cpp 1.26.0 hf8bcb03_0 defaults gst-plugins-base 1.14.0 hbbd80ab_1 defaults gstreamer 1.14.0 hb31296c_0 defaults icu 58.2 he6710b0_3 defaults idna 2.10 py_0 defaults importlib-metadata 1.7.0 py38_0 defaults importlib_metadata 1.7.0 0 defaults intel-openmp 2020.1 217 defaults ipykernel 5.3.0 py38h5ca1d4c_0 defaults ipython 7.16.1 py38h5ca1d4c_0 defaults ipython_genutils 0.2.0 py38_0 defaults ipywidgets 7.5.1 py_0 defaults jedi 0.17.1 py38_0 defaults jinja2 2.11.2 py_0 defaults jpeg 9b h024ee3a_2 defaults json5 0.9.5 py_0 defaults jsonschema 3.2.0 py38_0 defaults jupyter 1.0.0 py38_7 defaults jupyter_client 6.1.3 py_0 defaults jupyter_console 6.1.0 py_0 defaults jupyter_core 4.6.3 py38_0 defaults jupyterlab 2.1.5 py_0 defaults jupyterlab_server 1.1.5 py_0 defaults ld_impl_linux-64 2.33.1 h53a641e_7 defaults libboost 1.71.0 h97c9712_0 defaults libedit 3.1.20191231 h7b6447c_0 defaults libevent 2.1.8 h1ba5d50_0 defaults libffi 3.3 he6710b0_2 defaults libgcc-ng 9.1.0 hdf63c60_0 defaults libgfortran-ng 7.3.0 hdf63c60_0 defaults libpng 1.6.37 hbc83047_0 defaults libprotobuf 3.11.4 hd408876_0 defaults libsodium 1.0.18 h7b6447c_0 defaults libstdcxx-ng 9.1.0 hdf63c60_0 defaults libuuid 1.0.3 h1bed415_2 defaults libxcb 1.14 h7b6447c_0 defaults libxml2 2.9.10 he19cac6_1 defaults lz4-c 1.8.1.2 h14c3975_0 defaults markupsafe 1.1.1 py38h7b6447c_0 defaults mistune 0.8.4 py38h7b6447c_1000 defaults mkl 2020.1 217 defaults mkl-service 2.3.0 py38he904b0f_0 defaults mkl_fft 1.1.0 py38h23d657b_0 defaults mkl_random 1.1.1 py38h0573a6f_0 defaults nbconvert 5.6.1 py38_0 defaults nbformat 5.0.7 py_0 defaults ncurses 6.2 he6710b0_1 defaults notebook 6.0.3 py38_0 defaults numpy 1.18.5 py38ha1c710e_0 defaults numpy-base 1.18.5 py38hde5b4d6_0 defaults openssl 1.1.1g h7b6447c_0 defaults packaging 20.4 py_0 defaults pandas 1.0.5 py38h0573a6f_0 defaults pandoc 2.9.2.1 0 defaults pandocfilters 1.4.2 py38_1 defaults parso 0.7.0 py_0 defaults pcre 8.44 he6710b0_0 defaults pexpect 4.8.0 py38_0 defaults pickleshare 0.7.5 py38_1000 defaults pip 20.1.1 py38_1 defaults prometheus_client 0.8.0 py_0 defaults prompt-toolkit 3.0.5 py_0 defaults prompt_toolkit 3.0.5 0 defaults ptyprocess 0.6.0 py38_0 defaults py4j 0.10.9 py_0 defaults pyarrow 0.15.1 py38h0573a6f_0 defaults pycparser 2.20 py_0 defaults pygments 2.6.1 py_0 defaults pyopenssl 19.1.0 py38_0 defaults pyparsing 2.4.7 py_0 defaults pyqt 5.9.2 py38h05f1152_4 defaults pyrsistent 0.16.0 py38h7b6447c_0 defaults pysocks 1.7.1 py38_0 defaults pyspark 3.0.0 py_0 defaults python 3.8.3 hcff3b4d_2 defaults python-dateutil 2.8.1 py_0 defaults pytz 2020.1 py_0 defaults pyzmq 19.0.1 py38he6710b0_1 defaults qt 5.9.7 h5867ecd_1 defaults qtconsole 4.7.5 py_0 defaults qtpy 1.9.0 py_0 defaults re2 2019.08.01 he6710b0_0 defaults readline 8.0 h7b6447c_0 defaults requests 2.24.0 py_0 defaults scipy 1.5.0 py38h0b6359f_0 defaults send2trash 1.5.0 py38_0 defaults setuptools 47.3.1 py38_0 defaults sip 4.19.13 py38he6710b0_0 defaults six 1.15.0 py_0 defaults snappy 1.1.8 he6710b0_0 defaults sqlite 3.32.3 h62c20be_0 defaults terminado 0.8.3 py38_0 defaults testpath 0.4.4 py_0 defaults thrift-cpp 0.11.0 h02b749d_3 defaults tk 8.6.10 hbc83047_0 defaults tornado 6.0.4 py38h7b6447c_1 defaults traitlets 4.3.3 py38_0 defaults uriparser 0.9.3 he6710b0_1 defaults urllib3 1.25.9 py_0 defaults wcwidth 0.2.5 py_0 defaults webencodings 0.5.1 py38_1 defaults wheel 0.34.2 py38_0 defaults widgetsnbextension 3.5.1 py38_0 defaults xz 5.2.5 h7b6447c_0 defaults zeromq 4.3.2 he6710b0_2 defaults zipp 3.1.0 py_0 defaults zlib 1.2.11 h7b6447c_3 defaults zstd 1.3.7 h0b5b093_0 defaults I also tried with pyspark 2.4.5 (combined with pyarrow 0.8). Exact same results.
Question 1: Inconsistent behaviour is indeed due to partitioning. Question 2: Workaround found: Converting first to rdd and then immediately back to data frame solved the issue (i.e. adding .rdd.toDF()). I am unclear about the reason, probably something going on in the background that I don't understand.
Tensorflow 2.0 can't use GPU, something wrong in cuDNN? :Failed to get convolution algorithm. This is probably because cuDNN failed to initialize
I am trying to understand and debug my code. I try to predict with a CNN model developed under tf2.0/tf.keras on GPU, but get those error messages. could someone help me to fix it? here is my environmental configuration enviroments: python 3.6.8 tensorflow-gpu 2.0.0-rc0 nvidia 418.x CUDA 10.0 cuDNN 7.6+** and the log file, 2019-09-28 13:10:59.833892: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10.0 2019-09-28 13:11:00.228025: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7 2019-09-28 13:11:00.957534: E tensorflow/stream_executor/cuda/cuda_dnn.cc:329] Could not create cudnn handle: CUDNN_STATUS_INTERNAL_ERROR 2019-09-28 13:11:00.963310: E tensorflow/stream_executor/cuda/cuda_dnn.cc:329] Could not create cudnn handle: CUDNN_STATUS_INTERNAL_ERROR 2019-09-28 13:11:00.963416: W tensorflow/core/common_runtime/base_collective_executor.cc:216] BaseCollectiveExecutor::StartAbort Unknown: Failed to get convolution algorithm. This is probably because cuDNN failed to initialize, so try looking to see if a warning log message was printed above. [[{{node mobilenetv2_1.00_192/Conv1/Conv2D}}]] mobilenetv2_1.00_192/block_15_expand_BN/cond/then/_630/Const: (Const): /job:localhost/replica:0/task:0/device:GPU:0=====>GPU Available: True =====> 4 Physical GPUs, 1 Logical GPUs mobilenetv2_1.00_192/block_15_expand_BN/cond/then/_630/Const_1: (Const): /job:localhost/replica:0/task:0/device:GPU:0 mobilenetv2_1.00_192/block_15_depthwise_BN/cond/then/_644/Const: (Const): /job:localhost/replica:0/task:0/device:GPU:0 mobilenetv2_1.00_192/block_15_depthwise_BN/cond/then/_644/Const_1: (Const): /job:localhost/replica:0/task:0/device:GPU:0 mobilenetv2_1.00_192/block_15_project_BN/cond/then/_658/Const: (Const): /job:localhost/replica:0/task:0/device:GPU:0 mobilenetv2_1.00_192/block_15_project_BN/cond/then/_658/Const_1: (Const): /job:localhost/replica:0/task:0/device:GPU:0 mobilenetv2_1.00_192/block_16_expand_BN/cond/then/_672/Const: (Const): /job:localhost/replica:0/task:0/device:GPU:0 mobilenetv2_1.00_192/block_16_expand_BN/cond/then/_672/Const_1: (Const): /job:localhost/replica:0/task:0/device:GPU:0 mobilenetv2_1.00_192/block_16_depthwise_BN/cond/then/_686/Const: (Const): /job:localhost/replica:0/task:0/device:GPU:0 mobilenetv2_1.00_192/block_16_depthwise_BN/cond/then/_686/Const_1: (Const): /job:localhost/replica:0/task:0/device:GPU:0 mobilenetv2_1.00_192/block_16_project_BN/cond/then/_700/Const: (Const): /job:localhost/replica:0/task:0/device:GPU:0 mobilenetv2_1.00_192/block_16_project_BN/cond/then/_700/Const_1: (Const): /job:localhost/replica:0/task:0/device:GPU:0 mobilenetv2_1.00_192/Conv_1_bn/cond/then/_714/Const: (Const): /job:localhost/replica:0/task:0/device:GPU:0 mobilenetv2_1.00_192/Conv_1_bn/cond/then/_714/Const_1: (Const): /job:localhost/replica:0/task:0/device:GPU:0 Traceback (most recent call last): File "NSFW_Server.py", line 162, in <module> model.predict(initial_tensor) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training.py", line 915, in predict use_multiprocessing=use_multiprocessing) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training_arrays.py", line 722, in predict callbacks=callbacks) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training_arrays.py", line 393, in model_iteration batch_outs = f(ins_batch) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/backend.py", line 3625, in __call__ outputs = self._graph_fn(*converted_inputs) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/eager/function.py", line 1081, in __call__ return self._call_impl(args, kwargs) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/eager/function.py", line 1121, in _call_impl return self._call_flat(args, self.captured_inputs, cancellation_manager) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/eager/function.py", line 1224, in _call_flat ctx, args, cancellation_manager=cancellation_manager) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/eager/function.py", line 511, in call ctx=ctx) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/eager/execute.py", line 67, in quick_execute six.raise_from(core._status_to_exception(e.code, message), None) File "<string>", line 3, in raise_from tensorflow.python.framework.errors_impl.UnknownError: Failed to get convolution algorithm. This is probably because cuDNN failed to initialize, so try looking to see if a warning log message was printed above. [[node mobilenetv2_1.00_192/Conv1/Conv2D (defined at /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/framework/ops.py:1751) ]] [Op:__inference_keras_scratch_graph_10727] Function call stack: keras_scratch_graph The code if __name__ == "__main__": print("=====>GPU Available: ", tf.test.is_gpu_available()) tf.debugging.set_log_device_placement(True) gpus = tf.config.experimental.list_physical_devices('GPU') if gpus: try: # Currently, memory growth needs to be the same across GPUs tf.config.experimental.set_visible_devices(gpus[0], 'GPU') tf.config.experimental.set_memory_growth(gpus[0], True) logical_gpus = tf.config.experimental.list_logical_devices('GPU') print("=====>", len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs") except RuntimeError as e: # Memory growth must be set before GPUs have been initialized print(e) paras_path = "./paras/{}".format(int(2011)) model = tf.keras.experimental.load_from_saved_model(paras_path) initial_tensor = np.zeros((1, INPUT_SHAPE, INPUT_SHAPE, 3)) model.predict(initial_tensor)
You have to check that you have the right version of CUDA + CUDNN + TensorFlow (also ensure that you have all installed). A couple of examples of running configurations are presented below(UPDATE FOR LATEST VERSIONS OF TENSORFLOW) Cuda 11.3.1 + CuDNN 8.2.1.32 + TensorFlow 2.7.0 Cuda 11.0 + CuDNN 8.0.4 + TensorFlow 2.4.0 Cuda 10.1 + CuDNN 7.6.5 (normally > 7.6) + TensorFlow 2.2.0/TensorFlow 2.3.0 (TF >= 2.1 requires CUDA >=10.1) Cuda 10.1 + CuDNN 7.6.5 (normally > 7.6) + TensorFlow 2.1.0 (TF >= 2.1 requires CUDA >= 10.1) Cuda 10.0 + CuDNN 7.6.3 + / TensorFlow 1.13/1.14 / TensorFlow 2.0. Cuda 9.0 + CuDNN 7.0.5 + TensorFlow 1.10 Usually this error appears when you have an incompatible version of TensorFlow/CuDNN installed. In my case, this appeared when I tried using an older TensorFlow with a newer version of CuDNN. **If for some reason you get an error message like(and nothing happens afterwards) : Relying on the driver to perform ptx compilation Solution : Install the latest nvidia driver [SEEMS TO BE SOLVED IN TF >= 2.5.0] (see below): Only for Windows Users : Some late combintations of CUDA, CUDNN and TF may not work, due to a bug (a .dll extension named improperly). To handle that specific case, please consult this link: Tensorflow GPU Could not load dynamic library 'cusolver64_10.dll'; dlerror: cusolver64_10.dll not found
For those who are facing issues regarding the above error(For Windows platform), I sorted it just by installing CuDNN version compatible with the CUDA already installed in the system. This suitable version can be downloaded from the website Download CuDNN from Developer's portal. You might need Nvidia account for it. This will be easily created by providing mail id and filling a questionnaire. To check the CUDA version, run NVCC --version. Once the suitable version is downloaded, extract the folder from the zip file. Go to the bin folder of the extracted folder. copy the cudnn64:7.dll and paste it in the CUDA's bin folder. In my case, the location where Cuda is installed is C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.0\bin. This would most probably solve the problem. My system details: Windows 10 CUDA 10.0 TensorFlow 2.0 GPU- Nvidia GTX 1060 I also found this blog Installing TensorFlow with CUDA and GPU support on Windows 10. very useful.
Check the instructions on this TensorFlow GPU instruction page for your OS. It resolved issue for me on Ubuntu 16.04.6 LTS and Tensorflow 2.0
pyhive connection error: thrift.transport.TTransport.TTransportException: TSocket read 0 bytes
I'm trying to get a table located in hive (hortonworks) ,to collect some twitter data to implement on a machine learning project, using pyhive since pyhs2 is not supported by python3.6. Here's my code: from pyhive import hive conn = hive.Connection(host='192.168.1.11', port=10000, auth='NOSASL') import pandas as pd import sys df = pd.read_sql("SELECT * FROM my_table", conn) print(sys.getsizeof(df)) df.head() Got this error: Traceback (most recent call last): File "C:\Users\PWST112\Desktop\import.py", line 44, in <module> conn = hive.Connection(host='192.168.1.11', port=10000, auth='NOSASL') File "C:\Users\PWST112\AppData\Local\Programs\Python\Python36\lib\site- packages\pyhive\hive.py", line 164, in __init__ response = self._client.OpenSession(open_session_req) File "C:\Users\PWST112\AppData\Local\Programs\Python\Python36\lib\site- packages\TCLIService\TCLIService.py", line 187, in OpenSession return self.recv_OpenSession() File "C:\Users\PWST112\AppData\Local\Programs\Python\Python36\lib\site-packages\TCLIService\TCLIService.py", line 199, in recv_OpenSession (fname, mtype, rseqid) = iprot.readMessageBegin() File "C:\Users\PWST112\AppData\Local\Programs\Python\Python36\lib\site-packages\thrift\protocol\TBinaryProtocol.py", line 148, in readMessageBegin name = self.trans.readAll(sz) File "C:\Users\PWST112\AppData\Local\Programs\Python\Python36\lib\site-packages\thrift\transport\TTransport.py", line 60, in readAll chunk = self.read(sz - have) File "C:\Users\PWST112\AppData\Local\Programs\Python\Python36\lib\site-packages\thrift\transport\TTransport.py", line 161, in read self.__rbuf = BufferIO(self.__trans.read(max(sz, self.__rbuf_size))) File "C:\Users\PWST112\AppData\Local\Programs\Python\Python36\lib\site-packages\thrift\transport\TSocket.py", line 132, in read message='TSocket read 0 bytes') thrift.transport.TTransport.TTransportException: TSocket read 0 bytes [Finished in 0.3s] Here's the pip list: beautifulsoup4 (4.6.0) bleach (2.0.0) colorama (0.3.9) cycler (0.10.0) decorator (4.0.11) entrypoints (0.2.3) ez-setup (0.9) future (0.16.0) html5lib (0.999999999) impala (0.2) ipykernel (4.6.1) ipython (6.1.0) ipython-genutils (0.2.0) ipywidgets (6.0.0) jedi (0.10.2) Jinja2 (2.9.6) jsonschema (2.6.0) jupyter (1.0.0) jupyter-client (5.1.0) jupyter-console (5.1.0) jupyter-core (4.3.0) konlpy (0.4.4) MarkupSafe (1.0) matplotlib (2.0.2) mistune (0.7.4) nbconvert (5.2.1) nbformat (4.3.0) nltk (3.2.4) notebook (5.0.0) numpy (1.13.1+mkl) pandas (0.20.3) pandocfilters (1.4.1) pickleshare (0.7.4) pip (9.0.1) prompt-toolkit (1.0.14) pure-sasl (0.4.0) Pygments (2.2.0) PyHive (0.5.0) pyhs2 (0.6.0) pyparsing (2.2.0) python-dateutil (2.6.0) pytz (2017.2) pyzmq (16.0.2) qtconsole (4.3.0) sasl (0.2.1) scikit-learn (0.18.2) scipy (0.19.1) setuptools (28.8.0) simplegeneric (0.8.1) six (1.10.0) testpath (0.3.1) thrift (0.10.0) thrift-sasl (0.3.0) tornado (4.5.1) traitlets (4.3.2) wcwidth (0.1.7) webencodings (0.5.1) wheel (0.30.0) widgetsnbextension (2.0.0) Can anyone please help? I'm using Windows 10. Many thanks in advance.
I'm not sure about the Hortonworks tools, but in general Cloudera connections seem to be having issues with Thrift and Sasl. I was able to get a SqlAlchemy connection (which uses Thrift) pushing and pulling data with help from this issue over at Cloudera's Impyla module - it's not PyHive but the Thrift Tsocket connection seems to be what's causing the error for your code too. You can try version locking the modules; the downside is it requires Python 2.7. If you want to test version locking, here's what got me to a working Thrift connection: pip install thrift==0.9.3 pip install thrift_sasl==0.2.1 pip uninstall sasl && pip install pure-sasl Hope this helps!
Pandas segmentation fault
The following lines of code is not being successfully executed because of low memory. import pandas as pd import datetime as dt u_cols=['remote_host', 'dummy1', 'dummy2', 'date', 'timezone', 'get', 'status', 'bytes_sent', 'dummy3', 'logline'] logfile='/var/log/apache2/error.log.1' info = pd.read_csv(logfile, sep=' ', names=u_cols, low_memory=False) In [5]: info = pd.read_csv(logfile, sep=' ', names=u_cols, low_memory=False) Segmentation fault (core dumped) # free -m total used free shared buffers cached Mem: 590 495 94 0 25 89 -/+ buffers/cache: 380 209 Swap: 0 0 0 How do I exit gracefully? This error is causing Django to stop working. >>> pd.show_versions() INSTALLED VERSIONS ------------------ commit: None python: 2.7.7.final.0 python-bits: 64 OS: Linux OS-release: 3.2.0-31-virtual machine: x86_64 processor: x86_64 byteorder: little LC_ALL: None LANG: en_US.UTF-8 pandas: 0.14.1 nose: 1.3.3 Cython: 0.20.1 numpy: 1.8.1 scipy: 0.14.0 statsmodels: 0.5.0 IPython: 2.1.0 sphinx: 1.2.2 patsy: 0.2.1 scikits.timeseries: None dateutil: 1.5 pytz: 2014.3 bottleneck: None tables: 3.1.1 numexpr: 2.3.1 matplotlib: 1.3.1 openpyxl: 1.8.5 xlrd: 0.9.3 xlwt: 0.7.5 xlsxwriter: 0.5.5 lxml: 3.3.5 bs4: 4.3.1 html5lib: None httplib2: None apiclient: None rpy2: None sqlalchemy: 0.9.4 pymysql: 0.6.2.None psycopg2: 2.5.4 (dt dec pq3 ext) You can try this with your own error log or use a test file from: http://testbyshantanu.s3.amazonaws.com/error.log.1 The file is not correctly formatted but it should not force python to exit :)
need to add "engine" parameter to read_csv method or else get segmentation error. In [5]: info = pd.read_csv(logfile, sep=' ', names=u_cols, engine='python' ) In [6]: info = pd.read_csv(logfile, sep=' ', names=u_cols ) Segmentation fault (core dumped)