Skopt gp_minimize eroor while looking for optimal point - tensorflow

I have a problem with gp_minimize from skopt. I'm doing hyperparameter tunning for transfer learning (base model vgg19). Everything works fine until gp_minimize starts to evaluate optimal point (evaluate random point works fine). I have this error, i spend few days on this and i still don't know what to do:
Traceback (most recent call last):
File "C:/Users/mea/Train_models/04_VGG19_train_model.py", line 144, in <module>
search_result = gp_minimize(func=fitness,
File "C:\Users\me\anaconda3\envs\Train_models\lib\site-packages\skopt\optimizer\gp.py", line 259, in gp_minimize
return base_minimize(
File "C:\Users\Wme\anaconda3\envs\Train_models\lib\site-packages\skopt\optimizer\base.py", line 300, in base_minimize
result = optimizer.tell(next_x, next_y)
File "C:\Users\mea\anaconda3\envs\Train_models\lib\site-packages\skopt\optimizer\optimizer.py", line 493, in tell
return self._tell(x, y, fit=fit)
File "C:\Users\me\anaconda3\envs\Train_models\lib\site-packages\skopt\optimizer\optimizer.py", line 552, in _tell
X = self.space.transform(self.space.rvs(
File "C:\Users\me\anaconda3\envs\Train_models\lib\site-packages\skopt\space\space.py", line 900, in rvs
columns.append(dim.rvs(n_samples=n_samples, random_state=rng))
File "C:\Users\Weronika Gramacka\anaconda3\envs\Train_models\lib\site-packages\skopt\space\space.py", line 698, in rvs
return self.inverse_transform(list(choices))
File "C:\Users\me\anaconda3\envs\Train_models\lib\site-packages\skopt\space\space.py", line 685, in inverse_transform
inv_transform = super(Categorical, self).inverse_transform(Xt)
File "C:\Users\me\anaconda3\envs\Train_models\lib\site-packages\skopt\space\space.py", line 168, in inverse_transform
return self.transformer.inverse_transform(Xt)
File "C:\Users\me\anaconda3\envs\Train_models\lib\site-packages\skopt\space\transformers.py", line 309, in inverse_transform
X = transformer.inverse_transform(X)
File "C:\Users\me\anaconda3\envs\Train_models\lib\site-packages\skopt\space\transformers.py", line 216, in inverse_transform
return [
File "C:\Users\me\anaconda3\envs\Train_models\lib\site-packages\skopt\space\transformers.py", line 217, in <listcomp>
self.inverse_mapping_[int(np.round(i))] for i in Xt
KeyError: 6
Process finished with exit code 1
Code is from tutorial and looks like this (only important parts):
dim_num_dense_layers = Integer(low=1, high=3, name='num_dense_layers')
dim_num_dense_nodes = Integer(low=60, high=1500, name='num_dense_nodes')
dim_activation = Categorical(categories=[ 'sigmoid', 'tanh', 'relu', 'softmax'],
name='activation')
dim_dropout = Real(low = 0.01, high = 0.4, name = 'dropout')
dim_init = Categorical(categories = ['uniform', 'lecun_uniform', 'normal', 'zero', 'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform'], name = 'kernel_initializer')
dim_loss = Categorical(categories = ['categorical_crossentropy', 'categorical_hinge', 'mean_squared_error', 'huber_loss'], name = 'loss')
dimensions = [dim_num_dense_layers,
dim_num_dense_nodes,
dim_activation,
dim_dropout,
dim_init,
dim_loss]
default_parameters = [2, 600, 'relu', 0.2, 'uniform', 'huber_loss']
#use_named_args(dimensions=dimensions)
def fitness(num_dense_layers, num_dense_nodes, activation, dropout, kernel_initializer, loss):
# Print the hyper-parameters.
keras.backend.clear_session()
model = create_model(num_dense_layers, num_dense_nodes, activation, dropout, kernel_initializer, loss)
log_dir = "Tensor_board/04/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = TensorBoard(
log_dir='log_dir',
histogram_freq=1,
write_graph=True,
write_grads=False,
write_images=False)
history = model.fit(train_data,
epochs=1,
batch_size=32,
validation_data=val_data,
callbacks=[tensorboard_callback])
accuracy = max(history.history['val_accuracy'])
global best_accuracy
if accuracy > best_accuracy:
model.save("Models/vgg19_flat.h5")
best_accuracy = accuracy
del model
gc.collect()
keras.backend.clear_session()
return -accuracy
checkpoint_saver = CheckpointSaver("./checkpoint.pkl", compress=9)
search_result = gp_minimize(func=fitness,
dimensions=dimensions,
acq_func='EI',
n_calls=30,
n_initial_points=1,
x0=default_parameters, verbose=True, callback=[checkpoint_saver])

Related

tf.dataset, multiple path inputs, and mapping per batch to load images

I'm loading a dataset with multiple input images. The input image paths should only be decoded at batch time, in order to handle a large dataset.
The data set is N image path inputs and M float outputs. The images for each input have different resolutions.
Data is ([img_input_1.png, img_input_2.png, ...], [0.65, 0.7, 0.8])
The model is using the Keras functional api in symbolic mode.
Here is the most recently EDITED code
from itertools import zip_longest
def read_image(path, shape):
try:
image = tf.io.read_file(path)
image = tf.image.decode_png(image)
image = tf.image.resize(image, [shape[1],shape[2]])
image /= 255.0
return image
except:
print('ERROR: preprocess_image: bad path', path)
def load_image(x, y, shp):
pout = [(k, x[k]) for k in x.keys()]
l1 = tf.convert_to_tensor(list(x))
l2 = tf.convert_to_tensor(list(x.values()))
pl = tf.map_fn(
lambda args: (read_image(args[0], shp), args[1]), [l1, l2], dtype=(tf.float32, tf.float32)
)
pl = {path: (pl[0][i], pl[1][i]) for i, path in enumerate(x)}
return (pl,y)
def dataset_prep(json_data, seq, batch_size):
# LOAD DATA FROM JSON
x,y = json_parse_x_y(json_data[seq])
xx = [*zip_longest(*x)] # NOTE: goes from variable sized input to {'input_N':...}
yy = [*zip_longest(*y)]
# GET SHAPES (hard coded atm)
lns = [[len(xxx)] for xxx in xx]
rzs = [[24,512,1],[96,512,1]] # TEMP TODO! grab grom [(v['h'],v['w'],v['c']) for v in xx]
shp = [*zip_longest(*[lns,rzs])]
shp = [list(s) for s in shp]
shp = [[*itertools.chain.from_iterable(s)] for s in shp]
xd = dict([[ "input_{}".format(i+1),np.array(y)] for i,y in [*enumerate(xx)]])
yd = dict([["output_{}".format(i+1),np.array(y)] for i,y in [*enumerate(yy)]])
ds = tf.data.Dataset.from_tensor_slices((xd, yd))
ds = ds.shuffle(10000)
ds = ds.repeat()
ds = ds.map(map_func=lambda x,y: load_image(x, y, shp), num_parallel_calls=AUTOTUNE)
ds = ds.batch(batch_size) if batch_size else ds
ds = ds.prefetch(AUTOTUNE)
return ds
This is the error I'm getting:
Traceback (most recent call last):
File "/home/me/.local/bin/wavfeat", line 11, in <module>
load_entry_point('wavfeat==0.1.0', 'console_scripts', 'wavfeat')()
File "/home/me/.local/lib/python3.6/site-packages/wavfeat/__main__.py", line 91, in main
analysis_batch_sql(obj)
File "/home/me/.local/lib/python3.6/site-packages/wavfeat/analysis_run_csv.py", line 50, in analysis_batch_sql
qy = [*map(lambda c: run_elm(c[0], c[1]), ch)]
File "/home/me/.local/lib/python3.6/site-packages/wavfeat/analysis_run_csv.py", line 50, in <lambda>
qy = [*map(lambda c: run_elm(c[0], c[1]), ch)]
File "/home/me/.local/lib/python3.6/site-packages/wavfeat/analysis_run_csv.py", line 23, in run_elm
out = fn(input, elm)
File "/home/me/.local/lib/python3.6/site-packages/wavfeat/one_sec_onset.py", line 196, in one_sec_onset_train
return train(input, elm)
File "/home/me/.local/lib/python3.6/site-packages/wavfeat/one_sec_onset.py", line 182, in train
ts = dataset_prep(jd, 'train', bc)
File "/home/me/.local/lib/python3.6/site-packages/wavfeat/one_sec_onset.py", line 123, in dataset_prep
ds = ds.map(map_func=lambda x,y: load_image(x, y, shp), num_parallel_calls=AUTOTUNE)
File "/home/me/.local/lib/python3.6/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 1146, in map
self, map_func, num_parallel_calls, preserve_cardinality=True)
File "/home/me/.local/lib/python3.6/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 3264, in __init__
use_legacy_function=use_legacy_function)
File "/home/me/.local/lib/python3.6/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 2591, in __init__
self._function = wrapper_fn._get_concrete_function_internal()
File "/home/me/.local/lib/python3.6/site-packages/tensorflow/python/eager/function.py", line 1366, in _get_concrete_function_internal
*args, **kwargs)
File "/home/me/.local/lib/python3.6/site-packages/tensorflow/python/eager/function.py", line 1360, in _get_concrete_function_internal_garbage_collected
graph_function, _, _ = self._maybe_define_function(args, kwargs)
File "/home/me/.local/lib/python3.6/site-packages/tensorflow/python/eager/function.py", line 1648, in _maybe_define_function
graph_function = self._create_graph_function(args, kwargs)
File "/home/me/.local/lib/python3.6/site-packages/tensorflow/python/eager/function.py", line 1541, in _create_graph_function
capture_by_value=self._capture_by_value),
File "/home/me/.local/lib/python3.6/site-packages/tensorflow/python/framework/func_graph.py", line 716, in func_graph_from_py_func
func_outputs = python_func(*func_args, **func_kwargs)
File "/home/me/.local/lib/python3.6/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 2585, in wrapper_fn
ret = _wrapper_helper(*args)
File "/home/me/.local/lib/python3.6/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 2530, in _wrapper_helper
ret = func(*nested_args)
File "/home/me/.local/lib/python3.6/site-packages/wavfeat/one_sec_onset.py", line 123, in <lambda>
ds = ds.map(map_func=lambda x,y: load_image(x, y, shp), num_parallel_calls=AUTOTUNE)
File "/home/me/.local/lib/python3.6/site-packages/wavfeat/one_sec_data_loader.py", line 91, in load_image
print("x['input_1'].values(): ", x['input_1'].values())
AttributeError: 'Tensor' object has no attribute 'values'
What am I doing that is preventing the paths from being loaded?
EDIT:
Attempting pandrey's fix, I'm getting input errors. Here is the data before from_tensor_slices and ds.map and then after:
pre_from_tensor_slices x: {'input_1': array(['/media/me/sp_data/sp_data/datasets/chr_01/one_sec_onset_11_oac-leg/7388_39216_30--id=7388__sql_table=oac_1__sql_idx=405167__pitch=30__onset=39216.png',
'/media/me/sp_data/sp_data/datasets/chr_01/one_sec_onset_11_oac-leg/2447_864_27--id=2447__sql_table=oac_1__sql_idx=415458__pitch=27__onset=864.png',
'/media/me/sp_data/sp_data/datasets/chr_01/one_sec_onset_11_oac-leg/2386_20208_38--id=2386__sql_table=oac_1__sql_idx=433248__pitch=38__onset=20208.png',
...,
'/media/me/sp_data/sp_data/datasets/chr_01/one_sec_onset_11_oac-leg/6261_24528_57--id=6261__sql_table=oac_1__sql_idx=449753__pitch=57__onset=24528.png',
'/media/me/sp_data/sp_data/datasets/chr_01/one_sec_onset_11_oac-leg/3727_22944_31--id=3727__sql_table=oac_1__sql_idx=407620__pitch=31__onset=22944.png',
'/media/me/sp_data/sp_data/datasets/chr_01/one_sec_onset_11_oac-leg/1668_7056_60--id=1668__sql_table=oac_1__sql_idx=381152__pitch=60__onset=7056.png'],
dtype='<U162'), 'input_2': array(['/media/me/sp_data/sp_data/datasets/mel_01/one_sec_onset_11_oac-leg/7388_39216_30--id=7388__sql_table=oac_1__sql_idx=405167__pitch=30__onset=39216.png',
'/media/me/sp_data/sp_data/datasets/mel_01/one_sec_onset_11_oac-leg/2447_864_27--id=2447__sql_table=oac_1__sql_idx=415458__pitch=27__onset=864.png',
'/media/me/sp_data/sp_data/datasets/mel_01/one_sec_onset_11_oac-leg/2386_20208_38--id=2386__sql_table=oac_1__sql_idx=433248__pitch=38__onset=20208.png',
...,
'/media/me/sp_data/sp_data/datasets/mel_01/one_sec_onset_11_oac-leg/6261_24528_57--id=6261__sql_table=oac_1__sql_idx=449753__pitch=57__onset=24528.png',
'/media/me/sp_data/sp_data/datasets/mel_01/one_sec_onset_11_oac-leg/3727_22944_31--id=3727__sql_table=oac_1__sql_idx=407620__pitch=31__onset=22944.png',
'/media/me/sp_data/sp_data/datasets/mel_01/one_sec_onset_11_oac-leg/1668_7056_60--id=1668__sql_table=oac_1__sql_idx=381152__pitch=60__onset=7056.png'],
dtype='<U162')}
pre_from_tensor_slices y: {'output_1': array([0.817, 0.018, 0.421, ..., 0.511, 0.478, 0.147])}
_________________________
y: {'output_1': <tf.Tensor 'args_2:0' shape=() dtype=float64>}
x: {'input_1': <tf.Tensor 'args_0:0' shape=() dtype=string>, 'input_2': <tf.Tensor 'args_1:0' shape=() dtype=string>}
x.values(): dict_values([<tf.Tensor 'args_0:0' shape=() dtype=string>, <tf.Tensor 'args_1:0' shape=() dtype=string>])
x['input_1']: Tensor("args_0:0", shape=(), dtype=string)
Running x['input_1'].values() throws an error: 'Tensor' object has no attribute 'values'
I get an error situated around map_fn
File "/home/me/.local/lib/python3.6/site-packages/tensorflow/python/framework/constant_op.py", line 284, in _constant_impl
allow_broadcast=allow_broadcast))
File "/home/me/.local/lib/python3.6/site-packages/tensorflow/python/framework/tensor_util.py", line 455, in make_tensor_proto
raise ValueError("None values not supported.")
ValueError: None values not supported.
EDIT 2
Attempting the latest I get the following error
Traceback (most recent call last):
File "/home/me/.local/bin/wavfeat", line 11, in <module>
load_entry_point('wavfeat==0.1.0', 'console_scripts', 'wavfeat')()
File "/home/me/.local/lib/python3.6/site-packages/wavfeat/__main__.py", line 91, in main
analysis_batch_sql(obj)
File "/home/me/.local/lib/python3.6/site-packages/wavfeat/analysis_run_csv.py", line 50, in analysis_batch_sql
qy = [*map(lambda c: run_elm(c[0], c[1]), ch)]
File "/home/me/.local/lib/python3.6/site-packages/wavfeat/analysis_run_csv.py", line 50, in <lambda>
qy = [*map(lambda c: run_elm(c[0], c[1]), ch)]
File "/home/me/.local/lib/python3.6/site-packages/wavfeat/analysis_run_csv.py", line 23, in run_elm
out = fn(input, elm)
File "/home/me/.local/lib/python3.6/site-packages/wavfeat/one_sec_onset.py", line 216, in one_sec_onset_train
return train(input, elm)
File "/home/me/.local/lib/python3.6/site-packages/wavfeat/one_sec_onset.py", line 203, in train
vs = validation_prep(jd, 'validation', bc)
File "/home/me/.local/lib/python3.6/site-packages/wavfeat/one_sec_onset.py", line 176, in validation_prep
ds = ds.map(map_func=load_and_preprocess_from_path_label, num_parallel_calls=AUTOTUNE)
File "/home/me/.local/lib/python3.6/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 1146, in map
self, map_func, num_parallel_calls, preserve_cardinality=True)
File "/home/me/.local/lib/python3.6/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 3264, in __init__
use_legacy_function=use_legacy_function)
File "/home/me/.local/lib/python3.6/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 2591, in __init__
self._function = wrapper_fn._get_concrete_function_internal()
File "/home/me/.local/lib/python3.6/site-packages/tensorflow/python/eager/function.py", line 1366, in _get_concrete_function_internal
*args, **kwargs)
File "/home/me/.local/lib/python3.6/site-packages/tensorflow/python/eager/function.py", line 1360, in _get_concrete_function_internal_garbage_collected
graph_function, _, _ = self._maybe_define_function(args, kwargs)
File "/home/me/.local/lib/python3.6/site-packages/tensorflow/python/eager/function.py", line 1648, in _maybe_define_function
graph_function = self._create_graph_function(args, kwargs)
File "/home/me/.local/lib/python3.6/site-packages/tensorflow/python/eager/function.py", line 1541, in _create_graph_function
capture_by_value=self._capture_by_value),
File "/home/me/.local/lib/python3.6/site-packages/tensorflow/python/framework/func_graph.py", line 716, in func_graph_from_py_func
func_outputs = python_func(*func_args, **func_kwargs)
File "/home/me/.local/lib/python3.6/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 2585, in wrapper_fn
ret = _wrapper_helper(*args)
File "/home/me/.local/lib/python3.6/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 2530, in _wrapper_helper
ret = func(*nested_args)
File "/home/me/.local/lib/python3.6/site-packages/wavfeat/one_sec_data_loader.py", line 47, in load_and_preprocess_from_path_label
pl = dict([(pk, tf.map_fn(load_and_preprocess_image, po, dtype=tf.float32)) for pk,po in pout])
File "/home/me/.local/lib/python3.6/site-packages/wavfeat/one_sec_data_loader.py", line 47, in <listcomp>
pl = dict([(pk, tf.map_fn(load_and_preprocess_image, po, dtype=tf.float32)) for pk,po in pout])
File "/home/me/.local/lib/python3.6/site-packages/tensorflow/python/ops/map_fn.py", line 214, in map_fn
raise ValueError("elems must be a 1+ dimensional Tensor, not a scalar")
ValueError: elems must be a 1+ dimensional Tensor, not a scalar
Add-on: not using dict structures
This is a full code (save for defining json_parse_x_y and declaring AUTOTUNE) to achieve what you are attempting without using dict structures.
I tested that make_dataset works (see example below), so if you encounter an issue it should be due to a specification error regarding load_tensors.
from itertools import zip_longest
import tensorflow as tf
# additionnally, `json_parse_x_y` must be defined
# and `AUTOTUNE` must be declared (in my example, I set it to 2)
def read_image(path, shape):
"""Read an image of givent filepath and tensor shape.
Return a float tensor of given shape.
"""
try:
image = tf.io.read_file(path)
image = tf.image.decode_png(image)
image = tf.image.resize(image, [shape[1], shape[2]])
image /= 255.0
return image
except:
raise FileNotFoundError("preprocess_image: bad path '%s'" % path)
def load_images(paths, shapes):
"""Load an ensemble of images (associated with a single sample).
paths : rank-1 string Tensor
shapes : list of images' shapes (same length as `paths`)
Return a tuple of float tensors containing the loaded images.
"""
return tuple((
read_image(paths[i], shapes[i])
for i in range(len(shapes))
))
def load_tensors(json_data, seq):
"""Load images descriptors from a json dump.
Return a tuple containing:
* a rank-2 tensor containing lists of image paths (str)
* a rank-2 tensor containing resolution values (float)
* a list of image shapes, of same length as the rank-2
tensor's second axis
"""
x,y = json_parse_x_y(json_data[seq])
xx = [*zip_longest(*x)] # NOTE: goes from variable sized input to {'input_N':...}
yy = [*zip_longest(*y)]
# GET SHAPES (hard coded atm)
lns = [[len(xxx)] for xxx in xx]
rzs = [[24,512,1],[96,512,1]] # TEMP TODO! grab grom [(v['h'],v['w'],v['c']) for v in xx]
shp = [*zip_longest(*[lns,rzs])]
shp = [list(s) for s in shp]
shp = [[*itertools.chain.from_iterable(s)] for s in shp]
return (xx, yy, shp)
def make_dataset(xx, yy, shp, batch_size):
"""Build a Dataset instance containing loaded images.
xx, yy, shp : see the specification of `load_tensors`'s outputs
batch_size : batch size to set on the Dataset
Return a Dataset instance where each batched sample is a tuple
containing two elements: first, a tuple containing N loaded images'
rank-3 tensors; second, a rank-1 tensor containing M float values.
(to be clear: batching adds a dimension to all those tensors)
"""
data = tf.data.Dataset.from_tensor_slices((xx, yy))
data = data.shuffle(10000)
data = data.map(lambda x, y: (load_images(x, shapes), y))
data = data.repeat()
data = data.batch(batch_size) if batch_size else data
data = data.prefetch(AUTOTUNE)
return data
def dataset_prep(json_data, seq, batch_size):
"""Full pipeline to making a Dataset from json."""
xx, yy, shapes = load_tensors(json_data, seq)
return make_dataset(xx, yy, shapes)
Example, using "hand-made' values ; all images are actually
this classic image, of shape [512, 512, 3].
import numpy as np
import tensorflow as tf
# import previous code
# Here, N = 2, and I make 2 samples.
x = tf.convert_to_tensor(np.array([
['image_1a.png', 'image_1b.png'],
['image_2a.png', 'image_2b.png']
]))
shapes = [[1, 512, 512], [1, 512, 512]] # images are initially [512, 512, 3]
# Here, M = 3, and I make 2 samples. Values are purely random.
y = tf.convert_to_tensor(np.array([
[.087, .92, .276],
[.242, .37, .205]
]))
# This should work.
data = make_dataset(x, y, shapes, batch_size=1)
# Output signature is <PrefetchDataset shapes:
# (((None, 512, 512, None), (None, 512, 512, None)), (None, 3)),
# types: ((tf.float32, tf.float32), tf.float64)
# >
# Where the first None is actually `batch_size`
# and the second is, in this case, 3.
Answer to the current question:
Okay, the problem you are now encountering is that the revised load_image function does not fit the specifications of the Dataset, hence the exception raising. Please find below a full edited code that seems to work (I ran a test using custom images on my computer, with xd / yd dict initialized to look like your reported x and y in-dataset tensors). It is not pretty, and I would personally advise to drop the dict structures, but it works:
from itertools import zip_longest
def read_image(path, shape):
try:
image = tf.io.read_file(path)
image = tf.image.decode_png(image)
image = tf.image.resize(image, [shape[1],shape[2]])
image /= 255.0
return image
except:
raise FileNotFoundError("preprocess_image: bad path '%s'" % path)
# CHANGED: load_image is actually useless
def dataset_prep(json_data, seq, batch_size):
# LOAD DATA FROM JSON
x,y = json_parse_x_y(json_data[seq])
xx = [*zip_longest(*x)] # NOTE: goes from variable sized input to {'input_N':...}
yy = [*zip_longest(*y)]
# GET SHAPES (hard coded atm)
lns = [[len(xxx)] for xxx in xx]
rzs = [[24,512,1],[96,512,1]] # TEMP TODO! grab grom [(v['h'],v['w'],v['c']) for v in xx]
shp = [*zip_longest(*[lns,rzs])]
shp = [list(s) for s in shp]
shp = [[*itertools.chain.from_iterable(s)] for s in shp]
xd = dict([[ "input_{}".format(i+1),np.array(y)] for i,y in [*enumerate(xx)]])
yd = dict([["output_{}".format(i+1),np.array(y)] for i,y in [*enumerate(yy)]])
ds = tf.data.Dataset.from_tensor_slices((xd, yd))
ds = ds.shuffle(10000)
# CHANGED: the following line, to run images import (also moved epeat instruction later)
ds = ds.map(
lambda x, y: (
{key: read_image(path, shp[i]) for i, (key, path) in enumerate(x.items())},
y
),
num_parallel_calls=AUTOTUNE
)
ds = ds.repeat()
ds = ds.batch(batch_size) if batch_size else ds
ds = ds.prefetch(AUTOTUNE)
return ds
Initial answer (before question edit):
I will only deal with the exception raised by load_image in this answer, but there might be additional work to perform on the rest - I did not test for that, not having a convenient dataset at hand.
The exception message is actually quite explicit: you are passing a scalar element (e.g. n in [(k, tf.map_fn(lambda x: read_image(x, shp), n, dtype=tf.float32)) for k,n in pout]) as elems argument to tf.map_fn, when it expects a tensor (or (possibly nested) list or tuple of tensors), as clearly specified in its documentation.
You are also using tf.map_fn the wrong way in the quoted line of code, because basically you are mixing it up with a python intention list, when you should use either one or the other.
With intention list (also replacing the useless previous lines of the load_image function):
pl = {path: (load_image(path, shp), res) for path, res in x.items()}
With tf.map_fn:
# Read all images, return two tensors, one with images, the other with resolutions.
# (so, resolutions inclusion in this is actually useless and should be redesigned)
pl = tf.map_fn(
lambda args: (read_image(args[0], shp), args[1]),
[tf.convert_to_tensor(list(x)), tf.convert_to_tensor(list(x.values()))],
dtype=(tf.float32, tf.float32)
)
# If you really, really want to return a dict, but is it an optimal design?
pl = {path: (pl[0][i], pl[1][i]) for i, path in enumerate(x)}
I do not know whether returning a dict specified in this way is optimal (or even compatible) with Dataset instantiation, however if the rest of your code is working, this should do the trick.
At any rate, if you want to iterate over a dict, go ahead and use either the first version or a modified version of the second one (which may have the advantage of parallelizing images reading).
I hope this helps :-)

Pytorch detach() function failed to be excuated on different GPU severs

Recently, our lab bought a new server with 9 GPUs and I want to run my programming on this machine. However, I do not change my right code and I got an unexpected error like the following.
THCudaCheck FAIL file=/opt/conda/conda-bld/pytorch_1535491974311/work/aten/src/THC/THCGeneral.cpp line=663 error=11 : invalid argument
Traceback (most recent call last):
File "main.py", line 166, in <module>
p_img.copy_(netG(p_z).detach())
File "/usr/local/anaconda3/envs/tensorflow/lib/python3.6/site-packages/torch/nn/modules/module.py", line 477, in __call__
result = self.forward(*input, **kwargs)
File "/home/szhangcj/python/GBGAN/celebA_attention/sagan_models.py", line 100, in forward
out,p1 = self.attn1(out)
File "/usr/local/anaconda3/envs/tensorflow/lib/python3.6/site-packages/torch/nn/modules/module.py", line 477, in __call__
result = self.forward(*input, **kwargs)
File "/home/szhangcj/python/GBGAN/celebA_attention/sagan_models.py", line 32, in forward
energy = torch.bmm(proj_query,proj_key) # transpose check
RuntimeError: cublas runtime error : the GPU program failed to execute at /opt/conda/conda-bld/pytorch_1535491974311/work/aten/src/THC/THCBlas.cu:411
However, I can run my programming successfully on the old machine with 4 GPUs. I am not sure what the problem is and it seems that the error is caused by the detach() function. My code is as the following.
z_b = torch.FloatTensor(opt.batch_size, opt.z_dim).to(device)
img_b = torch.FloatTensor(opt.batch_size, 3, 64, 64).to(device)
img_a = torch.FloatTensor(opt.batch_size, 3, 64, 64).to(device)
p_z = torch.FloatTensor(pool_size, opt.z_dim).to(device)
p_img = torch.FloatTensor(pool_size, 3, 64, 64).to(device)
## evaluation placeholder
show_z_b = torch.FloatTensor(100, opt.z_dim).to(device)
eval_z_b = torch.FloatTensor(250, opt.z_dim).to(device) # 250/batch * 120 --> 300000
optim_D = optim.Adam(netD.parameters(), lr=opt.lr_d) # other param?
optim_G = optim.Adam(netG.parameters(), lr=opt.lr_g) #?suitable
criterion_G = nn.MSELoss()
eta = 1
loss_GD = []
pre_loss = 0
cur_loss = 0
G_epoch = 1
for epoch in range(start_epoch, start_epoch + opt.num_epoch):
print('Start epoch: %d' % epoch)
## input_pool: [pool_size, opt.z_dim] -> [pool_size, 32, 32]
netD.train()
netG.eval()
p_z.normal_()
# print(netG(p_z).detach().size())
p_img.copy_(netG(p_z).detach())
for t in range(opt.period):
for _ in range(opt.dsteps):
t = time.time()
### Update D
netD.zero_grad()
## real
real_img, _ = next(iter(dataloader)) # [batch_size, 1, 32, 32]
img_b.copy_(real_img.squeeze().to(device))
real_D_err = torch.log(1 + torch.exp(-netD(img_b))).mean()
print("D real loss", netD(img_b).mean())
# real_D_err.backward()
## fake
z_b_idx = random.sample(range(pool_size), opt.batch_size)
img_a.copy_(p_img[z_b_idx])
fake_D_err = torch.log(1 + torch.exp(netD(img_a))).mean() # torch scalar[]
loss_gp = calc_gradient_penalty(netD, img_b, img_a)
total_loss = real_D_err + fake_D_err + loss_gp
print("D fake loss", netD(img_a).mean())
total_loss.backward()
optim_D.step()
## update input pool
p_img_t = p_img.clone().to(device)
p_img_t.requires_grad_(True)
if p_img_t.grad is not None:
p_img_t.grad.zero_()
fake_D_score = netD(p_img_t)
fake_D_score.backward(torch.ones(len(p_img_t)).to(device))
p_img = img_truncate(p_img + eta * p_img_t.grad)
print("The mean of gradient", torch.mean(p_img_t.grad))
The error is caused by the version mismatching between the RTX GPU cards and the CUDA driver.

DecodeError: Truncated message.while running Graph.ParseFromString()

I downloaded the code from tensorflow/tensorflow/image_retraining/retrain.py and modified a little to retrain the model based on my need(like path to find training images folder and location to save the model and labels etc). While running the retrain.py file I get the following message towards the end of the execution
An exception has occurred, use %tb to see the full traceback.
SystemExit
On viewing the stack trace using %tb I get
Traceback (most recent call last):
File "<ipython-input-11-06ad74d82e7c>", line 1, in <module>
runfile('C:/Users/Srikanth1.R/Desktop/Desktop/My_Folder/Inage analytics/hub-master/examples/image_retraining/retrain.py', wdir='C:/Users/Srikanth1.R/Desktop/Desktop/My_Folder/Inage analytics/hub-master/examples/image_retraining')
File "C:\Users\Srikanth1.R\AppData\Local\Continuum\anaconda3\lib\site-packages\spyder\utils\site\sitecustomize.py", line 705, in runfile
execfile(filename, namespace)
File "C:\Users\Srikanth1.R\AppData\Local\Continuum\anaconda3\lib\site-packages\spyder\utils\site\sitecustomize.py", line 102, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "C:/Users/Srikanth1.R/Desktop/Desktop/My_Folder/Inage analytics/hub-master/examples/image_retraining/retrain.py", line 2424, in <module>
tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
File "C:\Users\Srikanth1.R\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\platform\app.py", line 134, in run
SystemExit
But although I get a model.pb file.
I used this file in my prediction of new images. But while Parsing the model.pb file the console is throwing the following error
File "C:\Users\Srikanth1.R\AppData\Local\Continuum\anaconda3\lib\site-packages\spyder\utils\site\sitecustomize.py", line 705, in runfile
execfile(filename, namespace)
File "C:\Users\Srikanth1.R\AppData\Local\Continuum\anaconda3\lib\site-packages\spyder\utils\site\sitecustomize.py", line 102, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "C:/Users/Srikanth1.R/Desktop/Desktop/My_Folder/Car_damage_prediction/tensorflow-master/tensorflow-master/tensorflow/examples/label_image/label_image.py", line 117, in <module>
graph = load_graph(model_file)
File "C:/Users/Srikanth1.R/Desktop/Desktop/My_Folder/Car_damage_prediction/tensorflow-master/tensorflow-master/tensorflow/examples/label_image/label_image.py", line 31, in load_graph
graph_def.ParseFromString(f.read())
File "C:\Users\Srikanth1.R\AppData\Local\Continuum\anaconda3\lib\site-packages\google\protobuf\message.py", line 185, in ParseFromString
self.MergeFromString(serialized)
File "C:\Users\Srikanth1.R\AppData\Local\Continuum\anaconda3\lib\site-packages\google\protobuf\internal\python_message.py", line 1083, in MergeFromString
if self._InternalParse(serialized, 0, length) != length:
File "C:\Users\Srikanth1.R\AppData\Local\Continuum\anaconda3\lib\site-packages\google\protobuf\internal\python_message.py", line 1120, in InternalParse
pos = field_decoder(buffer, new_pos, end, self, field_dict)
File "C:\Users\Srikanth1.R\AppData\Local\Continuum\anaconda3\lib\site-packages\google\protobuf\internal\decoder.py", line 633, in DecodeField
if value._InternalParse(buffer, pos, new_pos) != new_pos:
File "C:\Users\Srikanth1.R\AppData\Local\Continuum\anaconda3\lib\site-packages\google\protobuf\internal\python_message.py", line 1120, in InternalParse
pos = field_decoder(buffer, new_pos, end, self, field_dict)
File "C:\Users\Srikanth1.R\AppData\Local\Continuum\anaconda3\lib\site-packages\google\protobuf\internal\decoder.py", line 612, in DecodeRepeatedField
if value.add()._InternalParse(buffer, pos, new_pos) != new_pos:
File "C:\Users\Srikanth1.R\AppData\Local\Continuum\anaconda3\lib\site-packages\google\protobuf\internal\python_message.py", line 1120, in InternalParse
pos = field_decoder(buffer, new_pos, end, self, field_dict)
File "C:\Users\Srikanth1.R\AppData\Local\Continuum\anaconda3\lib\site-packages\google\protobuf\internal\decoder.py", line 743, in DecodeMap
if submsg._InternalParse(buffer, pos, new_pos) != new_pos:
File "C:\Users\Srikanth1.R\AppData\Local\Continuum\anaconda3\lib\site-packages\google\protobuf\internal\python_message.py", line 1109, in InternalParse
new_pos = local_SkipField(buffer, new_pos, end, tag_bytes)
File "C:\Users\Srikanth1.R\AppData\Local\Continuum\anaconda3\lib\site-packages\google\protobuf\internal\decoder.py", line 850, in SkipField
return WIRETYPE_TO_SKIPPER[wire_type](buffer, pos, end)
File "C:\Users\Srikanth1.R\AppData\Local\Continuum\anaconda3\lib\site-packages\google\protobuf\internal\decoder.py", line 799, in _SkipGroup
new_pos = SkipField(buffer, pos, end, tag_bytes)
File "C:\Users\Srikanth1.R\AppData\Local\Continuum\anaconda3\lib\site-packages\google\protobuf\internal\decoder.py", line 850, in SkipField
return WIRETYPE_TO_SKIPPER[wire_type](buffer, pos, end)
File "C:\Users\Srikanth1.R\AppData\Local\Continuum\anaconda3\lib\site-packages\google\protobuf\internal\decoder.py", line 814, in _SkipFixed32
raise _DecodeError('Truncated message.')
DecodeError: Truncated message.
This is my code for prediction on test images
def load_graph(model_file):
graph = tf.Graph()
graph_def = tf.GraphDef()
with open(model_file, "rb") as f:
graph_def.ParseFromString(f.read())
with graph.as_default():
tf.import_graph_def(graph_def)
return graph
def read_tensor_from_image_file(file_name,
input_height=299,
input_width=299,
input_mean=0,
input_std=255):
input_name = "file_reader"
output_name = "normalized"
file_reader = tf.read_file(file_name, input_name)
if file_name.endswith(".png"):
image_reader = tf.image.decode_png(
file_reader, channels=3, name="png_reader")
elif file_name.endswith(".gif"):
image_reader = tf.squeeze(
tf.image.decode_gif(file_reader, name="gif_reader"))
elif file_name.endswith(".bmp"):
image_reader = tf.image.decode_bmp(file_reader, name="bmp_reader")
else:
image_reader = tf.image.decode_jpeg(
file_reader, channels=3, name="jpeg_reader")
float_caster = tf.cast(image_reader, tf.float32)
dims_expander = tf.expand_dims(float_caster, 0)
resized = tf.image.resize_bilinear(dims_expander, [input_height, input_width])
normalized = tf.divide(tf.subtract(resized, [input_mean]), [input_std])
sess = tf.Session()
result = sess.run(normalized)
return result
def load_labels(label_file):
label = []
proto_as_ascii_lines = tf.gfile.GFile(label_file).readlines()
for l in proto_as_ascii_lines:
label.append(l.rstrip())
return label
if __name__ == "__main__":
file_name = "C:\\Users\\Srikanth1.R\\Desktop\\Car Images\\car.jpg"
model_file = "C:\\Users\\Srikanth1.R\\Desktop\\Desktop\\My_Folder\\Inage analytics\\hub-master\\examples\\image_retraining\\tmp\\saved_model\\saved_model.pb"
label_file = "C:\\Users\\Srikanth1.R\\Desktop\\Desktop\\My_Folder\\Inage analytics\\hub-master\\examples\\image_retraining\\tmp\\output_labels.txt"
input_height = 299
input_width = 299
input_mean = 0
input_std = 255
input_layer = "input"
output_layer = "InceptionV3/Predictions/Reshape_1"
parser = argparse.ArgumentParser()
parser.add_argument("--image", help="image to be processed")
parser.add_argument("--graph", help="graph/model to be executed")
parser.add_argument("--labels", help="name of file containing labels")
parser.add_argument("--input_height", type=int, help="input height")
parser.add_argument("--input_width", type=int, help="input width")
parser.add_argument("--input_mean", type=int, help="input mean")
parser.add_argument("--input_std", type=int, help="input std")
parser.add_argument("--input_layer", help="name of input layer")
parser.add_argument("--output_layer", help="name of output layer")
args = parser.parse_args()
if args.graph:
model_file = args.graph
if args.image:
file_name = args.image
if args.labels:
label_file = args.labels
if args.input_height:
input_height = args.input_height
if args.input_width:
input_width = args.input_width
if args.input_mean:
input_mean = args.input_mean
if args.input_std:
input_std = args.input_std
if args.input_layer:
input_layer = args.input_layer
if args.output_layer:
output_layer = args.output_layer
graph = load_graph(model_file)
t = read_tensor_from_image_file(
file_name,
input_height=input_height,
input_width=input_width,
input_mean=input_mean,
input_std=input_std)
input_name = "import/" + input_layer
output_name = "import/" + output_layer
input_operation = graph.get_operation_by_name(input_name)
output_operation = graph.get_operation_by_name(output_name)
with tf.Session(graph=graph) as sess:
results = sess.run(output_operation.outputs[0], {
input_operation.outputs[0]: t
})
results = np.squeeze(results)
top_k = results.argsort()[-5:][::-1]
labels = load_labels(label_file)
for i in top_k:
print(labels[i], results[i])
Is the above mentioned DecodeError in some way related to the error I get while running retrain.py ?
Or are both the errors independent ?
Can anyone please tell me how to solve the above errors?
Thank you in advance ?
You can also use below mentioned approach for prediction,
with tf.Session(graph=tf.Graph()) as sess:
tf.saved_model.loader.load(
sess, [tf.saved_model.tag_constants.SERVING], <path for .pb file>)
sess.run(...)
If you still want to use,
def load_graph(model_file):
graph = tf.Graph()
graph_def = tf.GraphDef()
with open(model_file, "rb") as f:
graph_def.ParseFromString(f.read())
with graph.as_default():
tf.import_graph_def(graph_def)
return graph
make sure model_file, should be a frozen graph.
Please refer link, Frozen Graph for more details.

No op named GatherTree when using BeamSearchDecoder

I'm implementing a Seq2Seq model with TensorFlow. My code works using the Greedy Decoder, but when I was using BeamSearchDecoder to improve the performance, I encountered this error:
Traceback (most recent call last):
File "/Users/MichaelChen/Projects/CN-isA-Relation-Extraction/isa_seq2seq/predict.py", line 83, in <module>
out_file='result/result_wc_4.out', checkpoint=checkpoint)
File "/Users/MichaelChen/Projects/CN-isA-Relation-Extraction/isa_seq2seq/predict.py", line 48, in predict
loader = tf.train.import_meta_graph(checkpoint + '.meta')
File "/usr/local/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1686, in import_meta_graph
**kwargs)
File "/usr/local/lib/python3.6/site-packages/tensorflow/python/framework/meta_graph.py", line 504, in import_scoped_meta_graph
producer_op_list=producer_op_list)
File "/usr/local/lib/python3.6/site-packages/tensorflow/python/framework/importer.py", line 283, in import_graph_def
raise ValueError('No op named %s in defined operations.' % node.op)
ValueError: No op named GatherTree in defined operations.
This error occurred when I used the infer module to generate
outputs:
with tf.Session(graph=loaded_graph) as sess:
loader = tf.train.import_meta_graph(checkpoint + '.meta')
loader.restore(sess, checkpoint)
input_data = loaded_graph.get_tensor_by_name('inputs:0')
logits = loaded_graph.get_tensor_by_name('predictions:0')
src_seq_len = loaded_graph.get_tensor_by_name('source_sequence_length:0')
tgt_seq_len = loaded_graph.get_tensor_by_name('target_sequence_length:0')
for i in range(len(text)):
if len(text[i].strip()) < 1:
continue
text_seq = src2seq_word(text[i], True)
answer_logits = sess.run(logits, {input_data: [text_seq] * batch_size,
tgt_seq_len: [len(text_seq)] * batch_size,
src_seq_len: [len(text_seq)] * batch_size}
)[0]
pred_res = "".join([pp.id2c[i] for i in answer_logits if i != pad and i != eos])
Program failed at loader = tf.train.import_meta_graph(checkpoint + '.meta')
I don't know if I handle the outputs of the decoder right, so here is the corresponding code:
# 5. Predicting decoder
# Share params with Training Deocder
tiled_dec_start_state = tf.contrib.seq2seq.tile_batch(encoder_state, beam_width)
tiled_encoder_outputs = tf.contrib.seq2seq.tile_batch(encoder_outputs, beam_width)
tiled_src_seq_len = tf.contrib.seq2seq.tile_batch(src_seq_len, beam_width)
with tf.variable_scope('decode', reuse=True):
batch_size_tensor = tf.constant(batch_size)
beam_decoder_cell = get_decoder_cell(tiled_encoder_outputs, tiled_src_seq_len, 2 * num_units)
beam_initial_state = beam_decoder_cell.zero_state(batch_size_tensor * beam_width, tf.float32)
beam_initial_state = beam_initial_state.clone(cell_state=tiled_dec_start_state)
start_tokens = tf.tile(tf.constant([c2id['<GO>']], dtype=tf.int32), [batch_size], name='start_tokens')
predicting_decoder = tf.contrib.seq2seq.BeamSearchDecoder(
cell=beam_decoder_cell,
embedding=decoder_embeddings,
start_tokens=start_tokens,
end_token=c2id['<EOS>'],
initial_state=beam_initial_state,
beam_width=beam_width,
output_layer=output_layer
)
predicting_decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode(decoder=predicting_decoder, maximum_iterations=max_tgt_seq_len)
Handling outputs:
training_decoder_output, predicting_decoder_output = seq2seq_model(params...)
training_logits = tf.identity(training_decoder_output.rnn_output, name='logits')
predicting_logits = tf.identity(predicting_decoder_output.predicted_ids[:,:,0], name='predictions')
Also, I found something in the nmt model in
https://github.com/tensorflow/nmt/blob/77e6c55052ba31a8d733c94bb820d091c8156d35/nmt/model.py (line 391)
if beam_width > 0:
logits = tf.no_op()
sample_id = outputs.predicted_ids
else:
logits = outputs.rnn_output
sample_id = outputs.sample_id
Is this has something to do with my error?
Thanks in advance!

Export Model For Serving But Tensor Type Dismatch

What related GitHub issues or StackOverflow threads have you found by searching the web for your problem?
I am trying to export the model for serving , but it's report type error about inputs tensor.
but in the export and predict part , the inputs are the same type.
If possible, provide a minimal reproducible example (We usually don't have time to read hundreds of lines of your code)
here is a sample code for my exporting
```
named_graph_signature = {
'inputs': exporter.generic_signature({
'sparse_index': tf.placeholder(tf.int64, name="feature_index")
'sparse_ids': tf.placeholder(tf.int64,name = "feature_ids"),
'sparse_values':tf.placeholder(tf.int64, name ="feature_values"),
'sparse_shape':tf.placeholder(tf.int64, name="feature_shape")
}),
'outputs': exporter.generic_signature({
'prob': inference_softmax
})}
model_exporter.init(
sess.graph.as_graph_def(),
#default_graph_signature=named_graph_signature,
named_graph_signatures=named_graph_signature,
init_op=init_op)
model_exporter.export(export_path, tf.constant(export_version), sess)
print('Done exporting!')
```
here is my code for predicting
```
ins = "0 142635:1 250810:1 335229:1 375278:1 392970:1 506983:1 554566:1 631968:1 647823:1 658803:1 733446:1 856305:1 868202:1"
FEATURE_SIZE = 1000000
tokens = ins.split(" ")
feature_num = 0
feature_ids = []
feature_values = []
feature_index = []
for feature in tokens[1:]:
feature_id, feature_value = feature.split(":")
feature_ids.append(int(feature_id))
feature_values.append(float(feature_value))
feature_index.append([1, feature_num])
feature_num += 1
feature_shape = [1, FEATURE_SIZE]
sparse_index = tf.contrib.util.make_tensor_proto(numpy.asarray(feature_index), dtype=tf.int64)
sparse_ids = tf.contrib.util.make_tensor_proto(numpy.asarray(feature_ids), dtype=tf.int64)
sparse_values = tf.contrib.util.make_tensor_proto(numpy.asarray(feature_values), dtype=tf.float32)
sparse_shape= tf.contrib.util.make_tensor_proto(numpy.asarray(feature_shape), dtype=tf.int64)
channel = implementations.insecure_channel(host, port)
stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)
request = predict_pb2.PredictRequest()
request.model_spec.name = model_name
request.model_spec.version.value = model_version
print model_name,model_version
request.inputs['sparse_index'].CopyFrom(sparse_index)
request.inputs['sparse_ids'].CopyFrom(sparse_ids)
request.inputs['sparse_values'].CopyFrom(sparse_values)
request.inputs['sparse_shape'].CopyFrom(sparse_shape)
# Send request
result = stub.Predict(request, request_timeout)
```
Logs or other output that would be helpful
(If logs are large, please upload as attachment or provide link).
```
Traceback (most recent call last):
File "run.py", line 63, in <module>
main()
File "run.py", line 59, in main
result = stub.Predict(request, request_timeout)
File "/home/serving/.local/lib/python2.7/site-packages/grpc/beta/_client_adaptations.py", line 305, in __call__
self._request_serializer, self._response_deserializer)
File "/home/serving/.local/lib/python2.7/site-packages/grpc/beta/_client_adaptations.py", line 203, in _blocking_unary_unary
raise _abortion_error(rpc_error_call)
grpc.framework.interfaces.face.face.AbortionError: AbortionError(code=StatusCode.INVALID_ARGUMENT, details="input size does not match signature")
```