tf.dataset, multiple path inputs, and mapping per batch to load images - tensorflow

I'm loading a dataset with multiple input images. The input image paths should only be decoded at batch time, in order to handle a large dataset.
The data set is N image path inputs and M float outputs. The images for each input have different resolutions.
Data is ([img_input_1.png, img_input_2.png, ...], [0.65, 0.7, 0.8])
The model is using the Keras functional api in symbolic mode.
Here is the most recently EDITED code
from itertools import zip_longest
def read_image(path, shape):
try:
image = tf.io.read_file(path)
image = tf.image.decode_png(image)
image = tf.image.resize(image, [shape[1],shape[2]])
image /= 255.0
return image
except:
print('ERROR: preprocess_image: bad path', path)
def load_image(x, y, shp):
pout = [(k, x[k]) for k in x.keys()]
l1 = tf.convert_to_tensor(list(x))
l2 = tf.convert_to_tensor(list(x.values()))
pl = tf.map_fn(
lambda args: (read_image(args[0], shp), args[1]), [l1, l2], dtype=(tf.float32, tf.float32)
)
pl = {path: (pl[0][i], pl[1][i]) for i, path in enumerate(x)}
return (pl,y)
def dataset_prep(json_data, seq, batch_size):
# LOAD DATA FROM JSON
x,y = json_parse_x_y(json_data[seq])
xx = [*zip_longest(*x)] # NOTE: goes from variable sized input to {'input_N':...}
yy = [*zip_longest(*y)]
# GET SHAPES (hard coded atm)
lns = [[len(xxx)] for xxx in xx]
rzs = [[24,512,1],[96,512,1]] # TEMP TODO! grab grom [(v['h'],v['w'],v['c']) for v in xx]
shp = [*zip_longest(*[lns,rzs])]
shp = [list(s) for s in shp]
shp = [[*itertools.chain.from_iterable(s)] for s in shp]
xd = dict([[ "input_{}".format(i+1),np.array(y)] for i,y in [*enumerate(xx)]])
yd = dict([["output_{}".format(i+1),np.array(y)] for i,y in [*enumerate(yy)]])
ds = tf.data.Dataset.from_tensor_slices((xd, yd))
ds = ds.shuffle(10000)
ds = ds.repeat()
ds = ds.map(map_func=lambda x,y: load_image(x, y, shp), num_parallel_calls=AUTOTUNE)
ds = ds.batch(batch_size) if batch_size else ds
ds = ds.prefetch(AUTOTUNE)
return ds
This is the error I'm getting:
Traceback (most recent call last):
File "/home/me/.local/bin/wavfeat", line 11, in <module>
load_entry_point('wavfeat==0.1.0', 'console_scripts', 'wavfeat')()
File "/home/me/.local/lib/python3.6/site-packages/wavfeat/__main__.py", line 91, in main
analysis_batch_sql(obj)
File "/home/me/.local/lib/python3.6/site-packages/wavfeat/analysis_run_csv.py", line 50, in analysis_batch_sql
qy = [*map(lambda c: run_elm(c[0], c[1]), ch)]
File "/home/me/.local/lib/python3.6/site-packages/wavfeat/analysis_run_csv.py", line 50, in <lambda>
qy = [*map(lambda c: run_elm(c[0], c[1]), ch)]
File "/home/me/.local/lib/python3.6/site-packages/wavfeat/analysis_run_csv.py", line 23, in run_elm
out = fn(input, elm)
File "/home/me/.local/lib/python3.6/site-packages/wavfeat/one_sec_onset.py", line 196, in one_sec_onset_train
return train(input, elm)
File "/home/me/.local/lib/python3.6/site-packages/wavfeat/one_sec_onset.py", line 182, in train
ts = dataset_prep(jd, 'train', bc)
File "/home/me/.local/lib/python3.6/site-packages/wavfeat/one_sec_onset.py", line 123, in dataset_prep
ds = ds.map(map_func=lambda x,y: load_image(x, y, shp), num_parallel_calls=AUTOTUNE)
File "/home/me/.local/lib/python3.6/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 1146, in map
self, map_func, num_parallel_calls, preserve_cardinality=True)
File "/home/me/.local/lib/python3.6/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 3264, in __init__
use_legacy_function=use_legacy_function)
File "/home/me/.local/lib/python3.6/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 2591, in __init__
self._function = wrapper_fn._get_concrete_function_internal()
File "/home/me/.local/lib/python3.6/site-packages/tensorflow/python/eager/function.py", line 1366, in _get_concrete_function_internal
*args, **kwargs)
File "/home/me/.local/lib/python3.6/site-packages/tensorflow/python/eager/function.py", line 1360, in _get_concrete_function_internal_garbage_collected
graph_function, _, _ = self._maybe_define_function(args, kwargs)
File "/home/me/.local/lib/python3.6/site-packages/tensorflow/python/eager/function.py", line 1648, in _maybe_define_function
graph_function = self._create_graph_function(args, kwargs)
File "/home/me/.local/lib/python3.6/site-packages/tensorflow/python/eager/function.py", line 1541, in _create_graph_function
capture_by_value=self._capture_by_value),
File "/home/me/.local/lib/python3.6/site-packages/tensorflow/python/framework/func_graph.py", line 716, in func_graph_from_py_func
func_outputs = python_func(*func_args, **func_kwargs)
File "/home/me/.local/lib/python3.6/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 2585, in wrapper_fn
ret = _wrapper_helper(*args)
File "/home/me/.local/lib/python3.6/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 2530, in _wrapper_helper
ret = func(*nested_args)
File "/home/me/.local/lib/python3.6/site-packages/wavfeat/one_sec_onset.py", line 123, in <lambda>
ds = ds.map(map_func=lambda x,y: load_image(x, y, shp), num_parallel_calls=AUTOTUNE)
File "/home/me/.local/lib/python3.6/site-packages/wavfeat/one_sec_data_loader.py", line 91, in load_image
print("x['input_1'].values(): ", x['input_1'].values())
AttributeError: 'Tensor' object has no attribute 'values'
What am I doing that is preventing the paths from being loaded?
EDIT:
Attempting pandrey's fix, I'm getting input errors. Here is the data before from_tensor_slices and ds.map and then after:
pre_from_tensor_slices x: {'input_1': array(['/media/me/sp_data/sp_data/datasets/chr_01/one_sec_onset_11_oac-leg/7388_39216_30--id=7388__sql_table=oac_1__sql_idx=405167__pitch=30__onset=39216.png',
'/media/me/sp_data/sp_data/datasets/chr_01/one_sec_onset_11_oac-leg/2447_864_27--id=2447__sql_table=oac_1__sql_idx=415458__pitch=27__onset=864.png',
'/media/me/sp_data/sp_data/datasets/chr_01/one_sec_onset_11_oac-leg/2386_20208_38--id=2386__sql_table=oac_1__sql_idx=433248__pitch=38__onset=20208.png',
...,
'/media/me/sp_data/sp_data/datasets/chr_01/one_sec_onset_11_oac-leg/6261_24528_57--id=6261__sql_table=oac_1__sql_idx=449753__pitch=57__onset=24528.png',
'/media/me/sp_data/sp_data/datasets/chr_01/one_sec_onset_11_oac-leg/3727_22944_31--id=3727__sql_table=oac_1__sql_idx=407620__pitch=31__onset=22944.png',
'/media/me/sp_data/sp_data/datasets/chr_01/one_sec_onset_11_oac-leg/1668_7056_60--id=1668__sql_table=oac_1__sql_idx=381152__pitch=60__onset=7056.png'],
dtype='<U162'), 'input_2': array(['/media/me/sp_data/sp_data/datasets/mel_01/one_sec_onset_11_oac-leg/7388_39216_30--id=7388__sql_table=oac_1__sql_idx=405167__pitch=30__onset=39216.png',
'/media/me/sp_data/sp_data/datasets/mel_01/one_sec_onset_11_oac-leg/2447_864_27--id=2447__sql_table=oac_1__sql_idx=415458__pitch=27__onset=864.png',
'/media/me/sp_data/sp_data/datasets/mel_01/one_sec_onset_11_oac-leg/2386_20208_38--id=2386__sql_table=oac_1__sql_idx=433248__pitch=38__onset=20208.png',
...,
'/media/me/sp_data/sp_data/datasets/mel_01/one_sec_onset_11_oac-leg/6261_24528_57--id=6261__sql_table=oac_1__sql_idx=449753__pitch=57__onset=24528.png',
'/media/me/sp_data/sp_data/datasets/mel_01/one_sec_onset_11_oac-leg/3727_22944_31--id=3727__sql_table=oac_1__sql_idx=407620__pitch=31__onset=22944.png',
'/media/me/sp_data/sp_data/datasets/mel_01/one_sec_onset_11_oac-leg/1668_7056_60--id=1668__sql_table=oac_1__sql_idx=381152__pitch=60__onset=7056.png'],
dtype='<U162')}
pre_from_tensor_slices y: {'output_1': array([0.817, 0.018, 0.421, ..., 0.511, 0.478, 0.147])}
_________________________
y: {'output_1': <tf.Tensor 'args_2:0' shape=() dtype=float64>}
x: {'input_1': <tf.Tensor 'args_0:0' shape=() dtype=string>, 'input_2': <tf.Tensor 'args_1:0' shape=() dtype=string>}
x.values(): dict_values([<tf.Tensor 'args_0:0' shape=() dtype=string>, <tf.Tensor 'args_1:0' shape=() dtype=string>])
x['input_1']: Tensor("args_0:0", shape=(), dtype=string)
Running x['input_1'].values() throws an error: 'Tensor' object has no attribute 'values'
I get an error situated around map_fn
File "/home/me/.local/lib/python3.6/site-packages/tensorflow/python/framework/constant_op.py", line 284, in _constant_impl
allow_broadcast=allow_broadcast))
File "/home/me/.local/lib/python3.6/site-packages/tensorflow/python/framework/tensor_util.py", line 455, in make_tensor_proto
raise ValueError("None values not supported.")
ValueError: None values not supported.
EDIT 2
Attempting the latest I get the following error
Traceback (most recent call last):
File "/home/me/.local/bin/wavfeat", line 11, in <module>
load_entry_point('wavfeat==0.1.0', 'console_scripts', 'wavfeat')()
File "/home/me/.local/lib/python3.6/site-packages/wavfeat/__main__.py", line 91, in main
analysis_batch_sql(obj)
File "/home/me/.local/lib/python3.6/site-packages/wavfeat/analysis_run_csv.py", line 50, in analysis_batch_sql
qy = [*map(lambda c: run_elm(c[0], c[1]), ch)]
File "/home/me/.local/lib/python3.6/site-packages/wavfeat/analysis_run_csv.py", line 50, in <lambda>
qy = [*map(lambda c: run_elm(c[0], c[1]), ch)]
File "/home/me/.local/lib/python3.6/site-packages/wavfeat/analysis_run_csv.py", line 23, in run_elm
out = fn(input, elm)
File "/home/me/.local/lib/python3.6/site-packages/wavfeat/one_sec_onset.py", line 216, in one_sec_onset_train
return train(input, elm)
File "/home/me/.local/lib/python3.6/site-packages/wavfeat/one_sec_onset.py", line 203, in train
vs = validation_prep(jd, 'validation', bc)
File "/home/me/.local/lib/python3.6/site-packages/wavfeat/one_sec_onset.py", line 176, in validation_prep
ds = ds.map(map_func=load_and_preprocess_from_path_label, num_parallel_calls=AUTOTUNE)
File "/home/me/.local/lib/python3.6/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 1146, in map
self, map_func, num_parallel_calls, preserve_cardinality=True)
File "/home/me/.local/lib/python3.6/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 3264, in __init__
use_legacy_function=use_legacy_function)
File "/home/me/.local/lib/python3.6/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 2591, in __init__
self._function = wrapper_fn._get_concrete_function_internal()
File "/home/me/.local/lib/python3.6/site-packages/tensorflow/python/eager/function.py", line 1366, in _get_concrete_function_internal
*args, **kwargs)
File "/home/me/.local/lib/python3.6/site-packages/tensorflow/python/eager/function.py", line 1360, in _get_concrete_function_internal_garbage_collected
graph_function, _, _ = self._maybe_define_function(args, kwargs)
File "/home/me/.local/lib/python3.6/site-packages/tensorflow/python/eager/function.py", line 1648, in _maybe_define_function
graph_function = self._create_graph_function(args, kwargs)
File "/home/me/.local/lib/python3.6/site-packages/tensorflow/python/eager/function.py", line 1541, in _create_graph_function
capture_by_value=self._capture_by_value),
File "/home/me/.local/lib/python3.6/site-packages/tensorflow/python/framework/func_graph.py", line 716, in func_graph_from_py_func
func_outputs = python_func(*func_args, **func_kwargs)
File "/home/me/.local/lib/python3.6/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 2585, in wrapper_fn
ret = _wrapper_helper(*args)
File "/home/me/.local/lib/python3.6/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 2530, in _wrapper_helper
ret = func(*nested_args)
File "/home/me/.local/lib/python3.6/site-packages/wavfeat/one_sec_data_loader.py", line 47, in load_and_preprocess_from_path_label
pl = dict([(pk, tf.map_fn(load_and_preprocess_image, po, dtype=tf.float32)) for pk,po in pout])
File "/home/me/.local/lib/python3.6/site-packages/wavfeat/one_sec_data_loader.py", line 47, in <listcomp>
pl = dict([(pk, tf.map_fn(load_and_preprocess_image, po, dtype=tf.float32)) for pk,po in pout])
File "/home/me/.local/lib/python3.6/site-packages/tensorflow/python/ops/map_fn.py", line 214, in map_fn
raise ValueError("elems must be a 1+ dimensional Tensor, not a scalar")
ValueError: elems must be a 1+ dimensional Tensor, not a scalar

Add-on: not using dict structures
This is a full code (save for defining json_parse_x_y and declaring AUTOTUNE) to achieve what you are attempting without using dict structures.
I tested that make_dataset works (see example below), so if you encounter an issue it should be due to a specification error regarding load_tensors.
from itertools import zip_longest
import tensorflow as tf
# additionnally, `json_parse_x_y` must be defined
# and `AUTOTUNE` must be declared (in my example, I set it to 2)
def read_image(path, shape):
"""Read an image of givent filepath and tensor shape.
Return a float tensor of given shape.
"""
try:
image = tf.io.read_file(path)
image = tf.image.decode_png(image)
image = tf.image.resize(image, [shape[1], shape[2]])
image /= 255.0
return image
except:
raise FileNotFoundError("preprocess_image: bad path '%s'" % path)
def load_images(paths, shapes):
"""Load an ensemble of images (associated with a single sample).
paths : rank-1 string Tensor
shapes : list of images' shapes (same length as `paths`)
Return a tuple of float tensors containing the loaded images.
"""
return tuple((
read_image(paths[i], shapes[i])
for i in range(len(shapes))
))
def load_tensors(json_data, seq):
"""Load images descriptors from a json dump.
Return a tuple containing:
* a rank-2 tensor containing lists of image paths (str)
* a rank-2 tensor containing resolution values (float)
* a list of image shapes, of same length as the rank-2
tensor's second axis
"""
x,y = json_parse_x_y(json_data[seq])
xx = [*zip_longest(*x)] # NOTE: goes from variable sized input to {'input_N':...}
yy = [*zip_longest(*y)]
# GET SHAPES (hard coded atm)
lns = [[len(xxx)] for xxx in xx]
rzs = [[24,512,1],[96,512,1]] # TEMP TODO! grab grom [(v['h'],v['w'],v['c']) for v in xx]
shp = [*zip_longest(*[lns,rzs])]
shp = [list(s) for s in shp]
shp = [[*itertools.chain.from_iterable(s)] for s in shp]
return (xx, yy, shp)
def make_dataset(xx, yy, shp, batch_size):
"""Build a Dataset instance containing loaded images.
xx, yy, shp : see the specification of `load_tensors`'s outputs
batch_size : batch size to set on the Dataset
Return a Dataset instance where each batched sample is a tuple
containing two elements: first, a tuple containing N loaded images'
rank-3 tensors; second, a rank-1 tensor containing M float values.
(to be clear: batching adds a dimension to all those tensors)
"""
data = tf.data.Dataset.from_tensor_slices((xx, yy))
data = data.shuffle(10000)
data = data.map(lambda x, y: (load_images(x, shapes), y))
data = data.repeat()
data = data.batch(batch_size) if batch_size else data
data = data.prefetch(AUTOTUNE)
return data
def dataset_prep(json_data, seq, batch_size):
"""Full pipeline to making a Dataset from json."""
xx, yy, shapes = load_tensors(json_data, seq)
return make_dataset(xx, yy, shapes)
Example, using "hand-made' values ; all images are actually
this classic image, of shape [512, 512, 3].
import numpy as np
import tensorflow as tf
# import previous code
# Here, N = 2, and I make 2 samples.
x = tf.convert_to_tensor(np.array([
['image_1a.png', 'image_1b.png'],
['image_2a.png', 'image_2b.png']
]))
shapes = [[1, 512, 512], [1, 512, 512]] # images are initially [512, 512, 3]
# Here, M = 3, and I make 2 samples. Values are purely random.
y = tf.convert_to_tensor(np.array([
[.087, .92, .276],
[.242, .37, .205]
]))
# This should work.
data = make_dataset(x, y, shapes, batch_size=1)
# Output signature is <PrefetchDataset shapes:
# (((None, 512, 512, None), (None, 512, 512, None)), (None, 3)),
# types: ((tf.float32, tf.float32), tf.float64)
# >
# Where the first None is actually `batch_size`
# and the second is, in this case, 3.
Answer to the current question:
Okay, the problem you are now encountering is that the revised load_image function does not fit the specifications of the Dataset, hence the exception raising. Please find below a full edited code that seems to work (I ran a test using custom images on my computer, with xd / yd dict initialized to look like your reported x and y in-dataset tensors). It is not pretty, and I would personally advise to drop the dict structures, but it works:
from itertools import zip_longest
def read_image(path, shape):
try:
image = tf.io.read_file(path)
image = tf.image.decode_png(image)
image = tf.image.resize(image, [shape[1],shape[2]])
image /= 255.0
return image
except:
raise FileNotFoundError("preprocess_image: bad path '%s'" % path)
# CHANGED: load_image is actually useless
def dataset_prep(json_data, seq, batch_size):
# LOAD DATA FROM JSON
x,y = json_parse_x_y(json_data[seq])
xx = [*zip_longest(*x)] # NOTE: goes from variable sized input to {'input_N':...}
yy = [*zip_longest(*y)]
# GET SHAPES (hard coded atm)
lns = [[len(xxx)] for xxx in xx]
rzs = [[24,512,1],[96,512,1]] # TEMP TODO! grab grom [(v['h'],v['w'],v['c']) for v in xx]
shp = [*zip_longest(*[lns,rzs])]
shp = [list(s) for s in shp]
shp = [[*itertools.chain.from_iterable(s)] for s in shp]
xd = dict([[ "input_{}".format(i+1),np.array(y)] for i,y in [*enumerate(xx)]])
yd = dict([["output_{}".format(i+1),np.array(y)] for i,y in [*enumerate(yy)]])
ds = tf.data.Dataset.from_tensor_slices((xd, yd))
ds = ds.shuffle(10000)
# CHANGED: the following line, to run images import (also moved epeat instruction later)
ds = ds.map(
lambda x, y: (
{key: read_image(path, shp[i]) for i, (key, path) in enumerate(x.items())},
y
),
num_parallel_calls=AUTOTUNE
)
ds = ds.repeat()
ds = ds.batch(batch_size) if batch_size else ds
ds = ds.prefetch(AUTOTUNE)
return ds
Initial answer (before question edit):
I will only deal with the exception raised by load_image in this answer, but there might be additional work to perform on the rest - I did not test for that, not having a convenient dataset at hand.
The exception message is actually quite explicit: you are passing a scalar element (e.g. n in [(k, tf.map_fn(lambda x: read_image(x, shp), n, dtype=tf.float32)) for k,n in pout]) as elems argument to tf.map_fn, when it expects a tensor (or (possibly nested) list or tuple of tensors), as clearly specified in its documentation.
You are also using tf.map_fn the wrong way in the quoted line of code, because basically you are mixing it up with a python intention list, when you should use either one or the other.
With intention list (also replacing the useless previous lines of the load_image function):
pl = {path: (load_image(path, shp), res) for path, res in x.items()}
With tf.map_fn:
# Read all images, return two tensors, one with images, the other with resolutions.
# (so, resolutions inclusion in this is actually useless and should be redesigned)
pl = tf.map_fn(
lambda args: (read_image(args[0], shp), args[1]),
[tf.convert_to_tensor(list(x)), tf.convert_to_tensor(list(x.values()))],
dtype=(tf.float32, tf.float32)
)
# If you really, really want to return a dict, but is it an optimal design?
pl = {path: (pl[0][i], pl[1][i]) for i, path in enumerate(x)}
I do not know whether returning a dict specified in this way is optimal (or even compatible) with Dataset instantiation, however if the rest of your code is working, this should do the trick.
At any rate, if you want to iterate over a dict, go ahead and use either the first version or a modified version of the second one (which may have the advantage of parallelizing images reading).
I hope this helps :-)

Related

Skopt gp_minimize eroor while looking for optimal point

I have a problem with gp_minimize from skopt. I'm doing hyperparameter tunning for transfer learning (base model vgg19). Everything works fine until gp_minimize starts to evaluate optimal point (evaluate random point works fine). I have this error, i spend few days on this and i still don't know what to do:
Traceback (most recent call last):
File "C:/Users/mea/Train_models/04_VGG19_train_model.py", line 144, in <module>
search_result = gp_minimize(func=fitness,
File "C:\Users\me\anaconda3\envs\Train_models\lib\site-packages\skopt\optimizer\gp.py", line 259, in gp_minimize
return base_minimize(
File "C:\Users\Wme\anaconda3\envs\Train_models\lib\site-packages\skopt\optimizer\base.py", line 300, in base_minimize
result = optimizer.tell(next_x, next_y)
File "C:\Users\mea\anaconda3\envs\Train_models\lib\site-packages\skopt\optimizer\optimizer.py", line 493, in tell
return self._tell(x, y, fit=fit)
File "C:\Users\me\anaconda3\envs\Train_models\lib\site-packages\skopt\optimizer\optimizer.py", line 552, in _tell
X = self.space.transform(self.space.rvs(
File "C:\Users\me\anaconda3\envs\Train_models\lib\site-packages\skopt\space\space.py", line 900, in rvs
columns.append(dim.rvs(n_samples=n_samples, random_state=rng))
File "C:\Users\Weronika Gramacka\anaconda3\envs\Train_models\lib\site-packages\skopt\space\space.py", line 698, in rvs
return self.inverse_transform(list(choices))
File "C:\Users\me\anaconda3\envs\Train_models\lib\site-packages\skopt\space\space.py", line 685, in inverse_transform
inv_transform = super(Categorical, self).inverse_transform(Xt)
File "C:\Users\me\anaconda3\envs\Train_models\lib\site-packages\skopt\space\space.py", line 168, in inverse_transform
return self.transformer.inverse_transform(Xt)
File "C:\Users\me\anaconda3\envs\Train_models\lib\site-packages\skopt\space\transformers.py", line 309, in inverse_transform
X = transformer.inverse_transform(X)
File "C:\Users\me\anaconda3\envs\Train_models\lib\site-packages\skopt\space\transformers.py", line 216, in inverse_transform
return [
File "C:\Users\me\anaconda3\envs\Train_models\lib\site-packages\skopt\space\transformers.py", line 217, in <listcomp>
self.inverse_mapping_[int(np.round(i))] for i in Xt
KeyError: 6
Process finished with exit code 1
Code is from tutorial and looks like this (only important parts):
dim_num_dense_layers = Integer(low=1, high=3, name='num_dense_layers')
dim_num_dense_nodes = Integer(low=60, high=1500, name='num_dense_nodes')
dim_activation = Categorical(categories=[ 'sigmoid', 'tanh', 'relu', 'softmax'],
name='activation')
dim_dropout = Real(low = 0.01, high = 0.4, name = 'dropout')
dim_init = Categorical(categories = ['uniform', 'lecun_uniform', 'normal', 'zero', 'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform'], name = 'kernel_initializer')
dim_loss = Categorical(categories = ['categorical_crossentropy', 'categorical_hinge', 'mean_squared_error', 'huber_loss'], name = 'loss')
dimensions = [dim_num_dense_layers,
dim_num_dense_nodes,
dim_activation,
dim_dropout,
dim_init,
dim_loss]
default_parameters = [2, 600, 'relu', 0.2, 'uniform', 'huber_loss']
#use_named_args(dimensions=dimensions)
def fitness(num_dense_layers, num_dense_nodes, activation, dropout, kernel_initializer, loss):
# Print the hyper-parameters.
keras.backend.clear_session()
model = create_model(num_dense_layers, num_dense_nodes, activation, dropout, kernel_initializer, loss)
log_dir = "Tensor_board/04/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = TensorBoard(
log_dir='log_dir',
histogram_freq=1,
write_graph=True,
write_grads=False,
write_images=False)
history = model.fit(train_data,
epochs=1,
batch_size=32,
validation_data=val_data,
callbacks=[tensorboard_callback])
accuracy = max(history.history['val_accuracy'])
global best_accuracy
if accuracy > best_accuracy:
model.save("Models/vgg19_flat.h5")
best_accuracy = accuracy
del model
gc.collect()
keras.backend.clear_session()
return -accuracy
checkpoint_saver = CheckpointSaver("./checkpoint.pkl", compress=9)
search_result = gp_minimize(func=fitness,
dimensions=dimensions,
acq_func='EI',
n_calls=30,
n_initial_points=1,
x0=default_parameters, verbose=True, callback=[checkpoint_saver])

Attempting to capture an EagerTensor without building a function tf.keras.models.save_model

I have the same error.
I'm adding DenseHashTable in DNNModel, intend to save embeddings in it, this is code:
class DNNModel(tf.keras.Model):
"""A DNN Model."""
def __init__(self, ...):
super(DNNModel, self).__init__(name, **kwargs)
vocabulary_list, embeddings = run_data.get_pretrain_vocabs_embeddings()
self.table = self._create_embedding_table(vocabulary_list, embeddings)
def _create_embedding_table(self, vocab_list, embeddings):
dimension = embeddings.shape[1]
table = tf.lookup.experimental.DenseHashTable(tf.string, tf.float32, [2.0]*dimension, 'empty_key', 'deleted_key')
# 对比,测试到底注释前后有没有效果
keys = tf.constant([i for i in vocab_list], tf.string)
values = tf.convert_to_tensor(embeddings, tf.float64)
values = tf.cast(values, tf.float32)
table.insert(keys, values)
return table
def call(self, inputs, training=None, mask=None):
fc_embeddings = self._input_layer(inputs)
bert_embeddings = self._look_up(inputs)
net = tf.concat([fc_embeddings, bert_embeddings], axis=1)
....
all is ok when runn it, but when i export model with the code tf.keras.models.save_model(model, FLAGS.servable_model_dir), it will raise error:RuntimeError: Attempting to capture an EagerTensor without building a function.
I debug the code, it likes serialize the DenseHashTable in dnnModel will raise Exceptions:
File "/Users/jiananliu/work/neirongrecom/model/ctr_model/wide_n_deep/wide_n_deep_keras_main.py", line 1021, in run
tf.keras.models.save_model(model, FLAGS.servable_model_dir, include_optimizer=False)
File "/Users/jiananliu/anaconda3/envs/transformer/lib/python3.7/site-packages/tensorflow/python/keras/saving/save.py", line 138, in save_model
signatures, options)
File "/Users/jiananliu/anaconda3/envs/transformer/lib/python3.7/site-packages/tensorflow/python/keras/saving/saved_model/save.py", line 78, in save
save_lib.save(model, filepath, signatures, options)
File "/Users/jiananliu/anaconda3/envs/transformer/lib/python3.7/site-packages/tensorflow/python/saved_model/save.py", line 951, in save
obj, export_dir, signatures, options, meta_graph_def)
File "/Users/jiananliu/anaconda3/envs/transformer/lib/python3.7/site-packages/tensorflow/python/saved_model/save.py", line 1027, in _build_meta_graph
options.namespace_whitelist)
File "/Users/jiananliu/anaconda3/envs/transformer/lib/python3.7/site-packages/tensorflow/python/saved_model/save.py", line 595, in _fill_meta_graph_def
object_map, resource_map, asset_info = saveable_view.map_resources()
File "/Users/jiananliu/anaconda3/envs/transformer/lib/python3.7/site-packages/tensorflow/python/saved_model/save.py", line 270, in map_resources
new_resource = new_obj._create_resource()
File "/Users/jiananliu/anaconda3/envs/transformer/lib/python3.7/site-packages/tensorflow/python/ops/lookup_ops.py", line 1945, in _create_resource
name=self._name)
File "/Users/jiananliu/anaconda3/envs/transformer/lib/python3.7/site-packages/tensorflow/python/ops/gen_lookup_ops.py", line 1113, in mutable_dense_hash_table_v2
max_load_factor=max_load_factor, name=name)
File "/Users/jiananliu/anaconda3/envs/transformer/lib/python3.7/site-packages/tensorflow/python/framework/op_def_library.py", line 470, in _apply_op_helper
preferred_dtype=default_dtype)
File "/Users/jiananliu/anaconda3/envs/transformer/lib/python3.7/site-packages/tensorflow/python/framework/ops.py", line 1307, in convert_to_tensor
raise RuntimeError("Attempting to capture an EagerTensor without "
RuntimeError: Attempting to capture an EagerTensor without building a function.
please help me!!!
I found to replace tensorflow.python.ops.lookup_ops.MutableHashTable with tf.lookup.experimental.DenseHashTable will solve the error.
def create_table():
table= MutableHashTable(key_dtype=tf.int32, value_dtype=tf.int32, default_value=[0])
# table = tf.lookup.experimental.DenseHashTable(
# key_dtype=tf.int32,
# value_dtype=tf.int32,
# default_value=-1,
# empty_key=0,
# deleted_key=-1)
return table

Keras repeat elements throwing ValueError List argument 'indices' to 'SparseConcat' Op with length 0 shorter than minimum length 2

I am trying to implement the code for Unsupervised Aspect Extraction from the code available here.
Link to the paper
While implementing Attention class in ml_layers.py, i am getting error in call function at line
y = K.repeat_elements(y, self.steps, axis=1)
Complete code of the function is given below:
def call(self, input_tensor, mask=None):
x = input_tensor[0]
y = input_tensor[1]
mask = mask[0]
y = K.transpose(K.dot(self.W, K.transpose(y)))
y = K.expand_dims(y, axis=-2)
y = K.repeat_elements(y, self.steps, axis=1)
eij = K.sum(x*y, axis=-1)
if self.bias:
b = K.repeat_elements(self.b, self.steps, axis=0)
eij += b
eij = K.tanh(eij)
a = K.exp(eij)
if mask is not None:
a *= K.cast(mask, K.floatx())
a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())
return a
The error is as follows
Traceback (most recent call last):
File "", line 1, in
model = create_model(ortho_reg, neg_size, emb_dim, aspect_size, emb_path, maxlen, vocab)
File "/home/fractaluser/Projects/workspace/UnsupervisedAspectExtraction/code/model.py", line 32, in create_model
att_weights = Attention(name='att_weights')([e_w, y_s])
File "/home/fractaluser/anaconda3/envs/venv_keras/lib/python3.5/site-packages/keras/engine/base_layer.py", line 457, in call
output = self.call(inputs, **kwargs)
File "/home/fractaluser/Projects/workspace/UnsupervisedAspectExtraction/code/my_layers.py", line 58, in call
y = K.repeat_elements(y, self.steps, axis=1)
File "/home/fractaluser/anaconda3/envs/venv_keras/lib/python3.5/site-packages/keras/backend/tensorflow_backend.py", line 2093, in repeat_elements
return concatenate(x_rep, axis)
File "/home/fractaluser/anaconda3/envs/venv_keras/lib/python3.5/site-packages/keras/backend/tensorflow_backend.py", line 1954, in concatenate
return tf.sparse_concat(axis, tensors)
File "/home/fractaluser/.local/lib/python3.5/site-packages/tensorflow/python/util/deprecation.py", line 488, in new_func
return func(*args, **kwargs)
File "/home/fractaluser/.local/lib/python3.5/site-packages/tensorflow/python/ops/sparse_ops.py", line 316, in sparse_concat
gen_sparse_ops.sparse_concat(inds, vals, shapes, axis, name=name))
File "/home/fractaluser/.local/lib/python3.5/site-packages/tensorflow/python/ops/gen_sparse_ops.py", line 911, in sparse_concat
concat_dim=concat_dim, name=name)
File "/home/fractaluser/.local/lib/python3.5/site-packages/tensorflow/python/framework/op_def_library.py", line 570, in _apply_op_helper
(input_name, op_type_name, len(values), num_attr.minimum))
ValueError: List argument 'indices' to 'SparseConcat' Op with length 0 shorter than minimum length 2.
Could not find any solution on internet. Please help
I used to have this problem
AttributeError: module 'keras.backend' has no attribute 'image_dim_ordering',
So I have to
modify the
K.image_dim_ordering() == 'th'('tf') ==> K.image_data_format() == 'channels_first'(channels_last)
after that, I met the same problem as you. But My problem is there still someplace haven't been correct. After I modify all the places. The problem is gone.
I hope this can help you.

Feed a Tensor of SparseTensors to estimators

To get started with TF, I wanted to learn a predictor of match outcomes for a game. There are three features: the 5 heros on team 0, the 5 heroes on team 1, and the map. The winner is the label, 0 or 1. I want to represent the teams and the maps as SparseTensors. Out of a possible 71 heroes, five will be selected. Likewise for maps, out of a possible 13, one will be selected.
import tensorflow as tf
import packunpack as source
import tempfile
from collections import namedtuple
GameRecord = namedtuple('GameRecord', 'team_0 team_1 game_map winner')
def parse(line):
parts = line.rstrip().split("\t")
return GameRecord(
game_map = parts[1],
team_0 = parts[2].split(","),
team_1 = parts[3].split(","),
winner = int(parts[4]))
def conjugate(record):
return GameRecord(
team_0 = record.team_1,
team_1 = record.team_0,
game_map = record.game_map,
winner = 0 if record.winner == 1 else 1)
def sparse_team(team):
indices = list(map(lambda x: [x], map(source.encode_hero, team)))
return tf.SparseTensor(indices=indices, values = [1] * len(indices), dense_shape=[len(source.heroes_array)])
def sparse_map(map_name):
return tf.SparseTensor(indices=[[source.encode_hero(map_name)]], values = [1], dense_shape=[len(source.maps_array)])
def make_input_fn(filename, shuffle = True, add_conjugate_games = True):
def _fn():
records = []
with open(filename, "r") as raw:
i = 0
for line in raw:
record = parse(line)
records.append(record)
if add_conjugate_games:
# since 0 and 1 are arbitrary team labels, learn and test the conjugate game whenever
# learning the original inference
records.append(conjugate(record))
print("Making team 0")
team_0s = tf.constant(list(map(lambda r: sparse_team(r.team_0), records)))
print("Making team 1")
team_1s = tf.constant(list(map(lambda r: sparse_team(r.team_1), records)))
print("making maps")
maps = tf.constant(list(map(lambda r: sparse_map(r.game_map), records)))
print("Making winners")
winners = tf.constant(list(map(lambda r: tf.constant([r.winner]), records)))
return {
"team_0": team_0s,
"team_1": team_1s,
"game_map": maps,
}, winners
#Please help me finish this function?
return _fn
team_0 = tf.feature_column.embedding_column(
tf.feature_column.categorical_column_with_vocabulary_list("team_0", source.heroes_array), len(source.heroes_array))
team_1 = tf.feature_column.embedding_column(
tf.feature_column.categorical_column_with_vocabulary_list("team_1", source.heroes_array), len(source.heroes_array))
game_map = tf.feature_column.embedding_column(
tf.feature_column.categorical_column_with_vocabulary_list("game_map", source.maps_array), len(source.maps_array))
model_dir = tempfile.mkdtemp()
m = tf.estimator.DNNClassifier(
model_dir=model_dir,
hidden_units = [1024, 512, 256],
feature_columns=[team_0, team_1, game_map])
def main():
m.train(input_fn=make_input_fn("tiny.txt"), steps = 100)
if __name__ == "__main__":
main()
This fails on team_0s = tf.constant(list(map(lambda r: sparse_team(r.team_0), records)))
It's very difficult to understand what tf wants me to return in my input_fn, because all of the examples I can find in the docs ultimately call out to a pandas or numpy helper function, and I'm not familiar with those frameworks. I thought that each dictionary value should be a Tensor containing all examples of a single feature. Each of my examples is a SparseTensor, and I want to simply embed them as their dense versions for the sake of the DNNClassifier.
I'm sure my mental model is horribly broken right now, and I appreciate any help setting it straight.
Error output:
python3 estimator.py
Making team 0
Traceback (most recent call last):
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/tensor_util.py", line 468, in make_tensor_proto
str_values = [compat.as_bytes(x) for x in proto_values]
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/tensor_util.py", line 468, in <listcomp>
str_values = [compat.as_bytes(x) for x in proto_values]
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/util/compat.py", line 65, in as_bytes
(bytes_or_text,))
TypeError: Expected binary or unicode string, got <tensorflow.python.framework.sparse_tensor.SparseTensor object at 0x7fe8
b4d7aef0>
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "estimator.py", line 79, in <module>
main()
File "estimator.py", line 76, in main
m.train(input_fn=make_input_fn("tiny.txt"), steps = 100)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/estimator/estimator.py", line 302, in train
loss = self._train_model(input_fn, hooks, saving_listeners)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/estimator/estimator.py", line 709, in _train_model
input_fn, model_fn_lib.ModeKeys.TRAIN)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/estimator/estimator.py", line 577, in _get_features_and_l
abels_from_input_fn
result = self._call_input_fn(input_fn, mode)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/estimator/estimator.py", line 663, in _call_input_fn
return input_fn(**kwargs)
File "estimator.py", line 44, in _fn
team_0s = tf.constant(list(map(lambda r: sparse_team(r.team_0), records)))
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/constant_op.py", line 208, in constant
value, dtype=dtype, shape=shape, verify_shape=verify_shape))
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/tensor_util.py", line 472, in make_tensor_proto
"supported type." % (type(values), values))
TypeError: Failed to convert object of type <class 'list'> to Tensor. Contents: [<tensorflow.python.framework.sparse_tenso
r.SparseTensor object at 0x7fe8b4d7aef0>, <tensorflow.python.framework.sparse_tensor.SparseTensor object at 0x7fe8b4d7af28
>, <tensorflow.python.framework.sparse_tensor.SparseTensor object at 0x7fe8b4d7af60>, <tensorflow.python.framework.sparse_
tensor.SparseTensor object at 0x7fe8b4d7aeb8> ... ]
Ultimately it wasn't necessary to convert my text representation into sparse vectors in my input_fn. Instead I had to tell the model to expect an input of an array of strings, which it understands how to convert into a "bag of words" or n-hot vector and how to embed as dense vectors.
import tensorflow as tf
import tempfile
import os
from collections import namedtuple
GameRecord = namedtuple('GameRecord', 'team_0 team_1 game_map winner')
def parse(line):
parts = line.rstrip().split("\t")
return GameRecord(
game_map = parts[1],
team_0 = parts[2].split(","),
team_1 = parts[3].split(","),
winner = int(parts[4]))
def conjugate(record):
return GameRecord(
team_0 = record.team_1,
team_1 = record.team_0,
game_map = record.game_map,
winner = 0 if record.winner == 1 else 1)
def make_input_fn(filename, batch_size=128, shuffle = True, add_conjugate_games = True, epochs=1):
def _fn():
records = []
with open(filename, "r") as raw:
i = 0
for line in raw:
record = parse(line)
records.append(record)
if add_conjugate_games:
records.append(conjugate(record))
team_0s = tf.constant(list(map(lambda r: r.team_0, records)))
team_1s = tf.constant(list(map(lambda r: r.team_1, records)))
maps = tf.constant(list(map(lambda r: r.game_map, records)))
winners = tf.constant(list(map(lambda r: [r.winner],
return {
"team_0": team_0s,
"team_1": team_1s,
"game_map": maps,
}, winners
return _fn
team_0 = tf.feature_column.embedding_column(
tf.feature_column.categorical_column_with_vocabulary_list("team_0", source.heroes_array), dimension=len(source.heroes_array))
team_1 = tf.feature_column.embedding_column(
tf.feature_column.categorical_column_with_vocabulary_list("team_1", source.heroes_array), dimension=len(source.heroes_array))
game_map = tf.feature_column.embedding_column(
tf.feature_column.categorical_column_with_vocabulary_list("game_map", source.maps_array), dimension=len(source.maps_array))
model_dir = "DNNClassifierModel_00"
os.mkdir(model_dir)
m = tf.estimator.DNNClassifier(
model_dir=model_dir,
hidden_units = [1024, 512, 256],
feature_columns=[team_0, team_1, game_map])
def main():
m.train(input_fn=make_input_fn("training.txt"))
results = m.evaluate(input_fn=make_input_fn("validation.txt"))
print("model directory = %s" % model_dir)
for key in sorted(results):
print("%s: %s" % (key, results[key]))
if __name__ == "__main__":
main()
Note that this code isn't perfect yet. I need to add in batching.

Computing Edit Distance (feed_dict error)

I've written some code in Tensorflow to compute the edit-distance between one string and a set of strings. I can't figure out the error.
import tensorflow as tf
sess = tf.Session()
# Create input data
test_string = ['foo']
ref_strings = ['food', 'bar']
def create_sparse_vec(word_list):
num_words = len(word_list)
indices = [[xi, 0, yi] for xi,x in enumerate(word_list) for yi,y in enumerate(x)]
chars = list(''.join(word_list))
return(tf.SparseTensor(indices, chars, [num_words,1,1]))
test_string_sparse = create_sparse_vec(test_string*len(ref_strings))
ref_string_sparse = create_sparse_vec(ref_strings)
sess.run(tf.edit_distance(test_string_sparse, ref_string_sparse, normalize=True))
This code works and when run, it produces the output:
array([[ 0.25],
[ 1. ]], dtype=float32)
But when I attempt to do this by feeding the sparse tensors in through sparse placeholders, I get an error.
test_input = tf.sparse_placeholder(dtype=tf.string)
ref_input = tf.sparse_placeholder(dtype=tf.string)
edit_distances = tf.edit_distance(test_input, ref_input, normalize=True)
feed_dict = {test_input: test_string_sparse,
ref_input: ref_string_sparse}
sess.run(edit_distances, feed_dict=feed_dict)
Here is the error traceback:
Traceback (most recent call last):
File "<ipython-input-29-4e06de0b7af3>", line 1, in <module>
sess.run(edit_distances, feed_dict=feed_dict)
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/client/session.py", line 372, in run
run_metadata_ptr)
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/client/session.py", line 597, in _run
for subfeed, subfeed_val in _feed_fn(feed, feed_val):
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/client/session.py", line 558, in _feed_fn
return feed_fn(feed, feed_val)
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/client/session.py", line 268, in <lambda>
[feed.indices, feed.values, feed.shape], feed_val)),
TypeError: zip argument #2 must support iteration
Any idea what is going on here?
TL;DR: For the return type of create_sparse_vec(), use tf.SparseTensorValue instead of tf.SparseTensor.
The problem here comes from the return type of create_sparse_vec(), which is tf.SparseTensor, and is not understood as a feed value in the call to sess.run().
When you feed a (dense) tf.Tensor, the expected value type is a NumPy array (or certain objects that can be converted to an array). When you feed a tf.SparseTensor, the expected value type is a tf.SparseTensorValue, which is similar to a tf.SparseTensor but its indices, values, and shape properties are NumPy arrays (or certain objects that can be converted to arrays, like the lists in your example.
The following code should work:
def create_sparse_vec(word_list):
num_words = len(word_list)
indices = [[xi, 0, yi] for xi,x in enumerate(word_list) for yi,y in enumerate(x)]
chars = list(''.join(word_list))
return tf.SparseTensorValue(indices, chars, [num_words,1,1])