Shape must be rank 0 but is rank 1, parse_single_sequence_example - tensorflow

For the past few days I have been having an issue with serializing data to tfrecord format and then subsequently deserializing it using parse_single_sequence example. I am attempting to retrieve data for use with a fairly standard RNN model, however this is my first attempt at using the tfrecords format and the associated pipeline that goes with it.
Here is a toy example to reproduce the issue I am having:
import tensorflow as tf
import tempfile
from IPython import embed
sequences = [[1, 2, 3], [4, 5, 1], [1, 2]]
label_sequences = [[0, 1, 0], [1, 0, 0], [1, 1]]
def make_example(sequence, labels):
ex = tf.train.SequenceExample()
sequence_length = len(sequence)
ex.context.feature["length"].int64_list.value.append(sequence_length)
fl_tokens = ex.feature_lists.feature_list["tokens"]
fl_labels = ex.feature_lists.feature_list["labels"]
for token, label in zip(sequence, labels):
fl_tokens.feature.add().int64_list.value.append(token)
fl_labels.feature.add().int64_list.value.append(label)
return ex
writer = tf.python_io.TFRecordWriter('./test.tfrecords')
for sequence, label_sequence in zip(sequences, label_sequences):
ex = make_example(sequence, label_sequence)
writer.write(ex.SerializeToString())
writer.close()
tf.reset_default_graph()
file_name_queue = tf.train.string_input_producer(['./test.tfrecords'], num_epochs=None)
reader = tf.TFRecordReader()
context_features = {
"length": tf.FixedLenFeature([], dtype=tf.int64)
}
sequence_features = {
"tokens": tf.FixedLenSequenceFeature([], dtype=tf.int64),
"labels": tf.FixedLenSequenceFeature([], dtype=tf.int64)
}
ex = reader.read(file_name_queue)
# Parse the example (returns a dictionary of tensors)
context_parsed, sequence_parsed = tf.parse_single_sequence_example(
serialized=ex,
context_features=context_features,
sequence_features=sequence_features
)
context = tf.contrib.learn.run_n(context_parsed, n=1, feed_dict=None)
print(context[0])
sequence = tf.contrib.learn.run_n(sequence_parsed, n=1, feed_dict=None)
print(sequence[0])
The associated stack trace is:
Traceback (most recent call last):
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/common_shapes.py", line 594, in call_cpp_shape_fn
status)
File "/usr/lib/python3.5/contextlib.py", line 66, in exit
next(self.gen)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/errors.py", line 463, in raise_exception_on_not_ok_status
pywrap_tensorflow.TF_GetCode(status))
tensorflow.python.framework.errors.InvalidArgumentError: Shape must be rank 0 but is rank 1
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "my_test.py", line 51, in
sequence_features=sequence_features
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/parsing_ops.py", line 640, in parse_single_sequence_example
feature_list_dense_defaults, example_name, name)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/parsing_ops.py", line 837, in _parse_single_sequence_example_raw
name=name)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/gen_parsing_ops.py", line 285, in _parse_single_sequence_example
name=name)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py", line 749, in apply_op
op_def=op_def)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py", line 2382, in create_op
set_shapes_for_outputs(ret)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py", line 1783, in set_shapes_for_outputs
shapes = shape_func(op)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/common_shapes.py", line 596, in call_cpp_shape_fn
raise ValueError(err.message)
ValueError: Shape must be rank 0 but is rank 1
I posted this as a potential issue over on github though it seems I may just be using it incorrectly: Tensorflow Github Issue
So with the background information out of the way, I'm just wondering if I am in fact making an error here? Any help in the right direction would be greatly appreciated, its been a few days and my poking around hasn't panned out. Thanks all!

Got it and it was a bad assumption on my part. The tf.TFRecordReader.read(queue, name=None) returns a tuple when I assumed it would have returned just the value not (key, value) which I was directly passing into the example parser.

Related

Error when using Tensorflow bucket_by_sequence_length() and tf.py_function() together

I have the following sample code:
import glob
import random
import tensorflow as tf
import cv2
def random_blur(image):
# do stuff which can't be done with a tf.image function...
random_x_blur = random.randint(1, 3)
random_y_blur = random.randint(1, 3)
return cv2.blur(image, (random_x_blur, random_y_blur))
def transform(image):
image = tf.image.random_jpeg_quality(image, 75, 100)
image = tf.image.random_brightness(image, 0.5)
image = tf.image.random_contrast(image, 0.2, 0.5)
# here's the problem...
# image = tf.py_function(func=random_blur, inp=[image], Tout=tf.uint8)
return image
def process_path(file_path):
image = tf.io.read_file(file_path)
image = tf.image.decode_png(image, channels=1)
return transform(image), image
train_directory = 'data/small/'
train_files = glob.glob(train_directory + '*.png')
ds_train = tf.data.Dataset.from_tensor_slices(train_files)
boundaries = [100, 200, 300, 400]
batch_sizes = [16, 16, 16, 16, 16]
ds_train = ds_train.map(process_path, 4)
ds_train = ds_train.bucket_by_sequence_length(element_length_func=lambda x, y: tf.shape(x)[1],
bucket_boundaries=boundaries,
bucket_batch_sizes=batch_sizes)
I'm trying to create a Tensorflow Dataset from variable-width 60px-high images in a directory, using the bucket_by_sequence_length() function to ensure the images in each minibatch have the same dimensions. This all works fine until I uncomment the line beneath "here's the problem" in the code above. When you uncomment that and run it, it produces the following error:
Traceback (most recent call last):
File "test.py", line 34, in <module>
ds_train = ds_train.bucket_by_sequence_length(element_length_func=lambda x, y: tf.shape(x)[1],
File "/Users/garnet/Library/Python/3.8/lib/python/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 3120, in bucket_by_sequence_length
return self.group_by_window(
File "/Users/garnet/Library/Python/3.8/lib/python/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 2976, in group_by_window
return _GroupByWindowDataset(
File "/Users/garnet/Library/Python/3.8/lib/python/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 5841, in __init__
self._make_reduce_func(reduce_func, input_dataset)
File "/Users/garnet/Library/Python/3.8/lib/python/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 5890, in _make_reduce_func
self._reduce_func = structured_function.StructuredFunctionWrapper(
File "/Users/garnet/Library/Python/3.8/lib/python/site-packages/tensorflow/python/data/ops/structured_function.py", line 271, in __init__
self._function = fn_factory()
File "/Users/garnet/Library/Python/3.8/lib/python/site-packages/tensorflow/python/eager/function.py", line 2610, in get_concrete_function
graph_function = self._get_concrete_function_garbage_collected(
File "/Users/garnet/Library/Python/3.8/lib/python/site-packages/tensorflow/python/eager/function.py", line 2576, in _get_concrete_function_garbage_collected
graph_function, _ = self._maybe_define_function(args, kwargs)
File "/Users/garnet/Library/Python/3.8/lib/python/site-packages/tensorflow/python/eager/function.py", line 2760, in _maybe_define_function
graph_function = self._create_graph_function(args, kwargs)
File "/Users/garnet/Library/Python/3.8/lib/python/site-packages/tensorflow/python/eager/function.py", line 2670, in _create_graph_function
func_graph_module.func_graph_from_py_func(
File "/Users/garnet/Library/Python/3.8/lib/python/site-packages/tensorflow/python/framework/func_graph.py", line 1247, in func_graph_from_py_func
func_outputs = python_func(*func_args, **func_kwargs)
File "/Users/garnet/Library/Python/3.8/lib/python/site-packages/tensorflow/python/data/ops/structured_function.py", line 248, in wrapped_fn
ret = wrapper_helper(*args)
File "/Users/garnet/Library/Python/3.8/lib/python/site-packages/tensorflow/python/data/ops/structured_function.py", line 177, in wrapper_helper
ret = autograph.tf_convert(self._func, ag_ctx)(*nested_args)
File "/Users/garnet/Library/Python/3.8/lib/python/site-packages/tensorflow/python/autograph/impl/api.py", line 689, in wrapper
return converted_call(f, args, kwargs, options=options)
File "/Users/garnet/Library/Python/3.8/lib/python/site-packages/tensorflow/python/autograph/impl/api.py", line 377, in converted_call
return _call_unconverted(f, args, kwargs, options)
File "/Users/garnet/Library/Python/3.8/lib/python/site-packages/tensorflow/python/autograph/impl/api.py", line 458, in _call_unconverted
return f(*args, **kwargs)
File "/Users/garnet/Library/Python/3.8/lib/python/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 3111, in batching_fn
shapes = make_padded_shapes(
File "/Users/garnet/Library/Python/3.8/lib/python/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 3083, in make_padded_shapes
shape = [
File "/Users/garnet/Library/Python/3.8/lib/python/site-packages/tensorflow/python/framework/tensor_shape.py", line 882, in __iter__
raise ValueError("Cannot iterate over a shape with unknown rank.")
ValueError: Cannot iterate over a shape with unknown rank.
Likewise, my code works fine if I uncomment that line but remove the call to bucket_by_sequence_length() and limit my training data to images with identical dimensions.
It seems that bucket_by_sequence_length() and tf.py_function() don't play nice together, even with eager mode enabled. I need to do some image augmentations/transformations which the standard tf.image functions don't provide. Any ideas?

Data type mismatch in streaming F1 score calculation in Tensorflow

I was trying to use this code as it is on Tensorflow 1.13.1. However, it throws the following error:
sherlock#mybox:~/cs273/autocat/bert$ python streaming2.py
Traceback (most recent call last):
File "streaming2.py", line 233, in <module>
tf_f1 = tf_f1_score(t, p)
File "streaming2.py", line 161, in tf_f1_score
f1s[2] = tf.reduce_sum(f1 * weights)
File "/home/sherlock/.virtualenvs/autocat/local/lib/python2.7/site-packages/tensorflow/python/ops/math_ops.py", line 812, in binary_op_wrapper
return func(x, y, name=name)
File "/home/sherlock/.virtualenvs/autocat/local/lib/python2.7/site-packages/tensorflow/python/ops/math_ops.py", line 1078, in _mul_dispatch
return gen_math_ops.mul(x, y, name=name)
File "/home/sherlock/.virtualenvs/autocat/local/lib/python2.7/site-packages/tensorflow/python/ops/gen_math_ops.py", line 5860, in mul
"Mul", x=x, y=y, name=name)
File "/home/sherlock/.virtualenvs/autocat/local/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 547, in _apply_op_helper
inferred_from[input_arg.type_attr]))
TypeError: Input 'y' of 'Mul' Op has type float64 that does not match type int64 of argument 'x'.
Tried fixing the casts for some time, but failed to find a minimal change that makes the code work. Can anyone please help me on this?
I could reproduce your error: it happens with Python 2 but not 3.
So either switch to Python 3 or change the code with tf.cast
f1 = tf.cast(f1, tf.float64)
f1s[2] = tf.reduce_sum(f1 * weights)
and maybe in other locations but that's the idea

Conditioning pixelsnail on classes

I am trying to condition a pixelcnn model that I adapted, but there is needed some changes to condition the model on classes (series). I am working with time-series so in fact I would like to know how could I condition the model in some series as well. But the case, when I try to one hot encode my "Y"[batch,] labels that I am giving to it as an array of the same batch length that "X" [batch, sqrt(seq_len), sqrt(seq_len), channels]. To condition the model, I have the next code:
if args.class_conditional:
# raise NotImplementedError
num_labels = train_data.get_num_labels()
y_init = tf.placeholder(tf.int32, shape=(args.init_batch_size,))
h_init = tf.one_hot(y_init, num_labels)
y_sample = np.split(
np.mod(np.arange(args.batch_size), num_labels), args.nr_gpu)
h_sample = [tf.one_hot(tf.Variable(y_sample[i], trainable=False), num_labels)
for i in range(args.nr_gpu)]
ys = [tf.placeholder(tf.int32, shape=(args.batch_size,))
for i in range(args.nr_gpu)]
hs = [tf.one_hot(ys[i], num_labels) for i in range(args.nr_gpu)]
else:
h_init = None
h_sample = [None] * args.nr_gpu
hs = h_sample
The current output of "y_sample" that is where the shell is locating me the error:
[array([0. , 1. , 0.30521799, 1.30521799, 0.61043598,
1.61043598, 0.91565397, 0.22087195, 1.22087195, 0.52608994,
1.52608994, 0.83130793, 0.13652592, 1.13652592, 0.44174391,
1.44174391, 0.7469619 , 0.05217988, 1.05217988, 0.35739787,
1.35739787, 0.66261586, 1.66261586, 0.96783385, 0.27305184,
1.27305184, 0.57826983, 1.57826983, 0.88348781, 0.1887058 ,
1.1887058 , 0.49392379, 1.49392379, 0.79914178, 0.10435977,
1.10435977, 0.40957776, 1.40957776, 0.71479575, 0.02001373,
1.02001373, 0.32523172, 1.32523172, 0.63044971, 1.63044971,
0.9356677 , 0.24088569, 1.24088569, 0.54610368, 1.54610368])]
it is giving me an error in h_sample when it is going to do the one_hot
Traceback (most recent call last):
File "train.py", line 398, in <module>
main(FLAGS)
File "train.py", line 111, in main
for i in range(args.nr_gpu)]
File "train.py", line 111, in <listcomp>
for i in range(args.nr_gpu)]
File "/home/proto/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/array_ops.py", line 2364, in one_hot
name)
File "/home/proto/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/gen_array_ops.py", line 2831, in _one_hot
off_value=off_value, axis=axis, name=name)
File "/home/proto/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/op_def_library.py", line 609, in _apply_op_helper
param_name=input_name)
File "/home/proto/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/op_def_library.py", line 60, in _SatisfiesTypeConstraint
", ".join(dtypes.as_dtype(x).name for x in allowed_list)))
TypeError: Value passed to parameter 'indices' has DataType float64 not in list of allowed values: uint8, int32, int64
I changed for i in range(args.nr_gpu)] hard-coded to 1 to see if it was the problem but it keeps giving me errors.

Python 3.5 Trying to plot PCA with sklearn and matplotlib

Using the following code generates the error: TypeError: float() argument must be a string or a number, not 'Pred':
I am struggling to figure out what is causing this error to be thrown.
self.features is a list composed of three floats ex. [1.1, 1.2, 1.3]
an example of self.features:
[array([-1.67191985, 0.1 , 9.69981494]), array([-0.68486623, 0.05 , 9.99085024]), array([ -1.36 , 0.1 , 10.44720459]), array([-2.46918915, 0. , 3.5483372 ]), array([-0.835 , 0.1 , 4.02740479])]
This is the method where the error is being thrown.
def pca(self):
pca = PCA(n_components=2)
x_np = np.asarray(self.features)
pca.fit(x_np)
X_reduced = pca.transform(x_np)
plt.figure(figsize=(10, 8))
plt.scatter(X_reduced[:, 0], X_reduced[:, 1], c=y, cmap='RdBu')
plt.xlabel('First component')
plt.ylabel('Second component')
The full trace back is:
Traceback (most recent call last):
File "/Users/user/PycharmProjects/Post-Translational-Modification-
Prediction/pred.py", line 244, in <module>
y.generate_pca()
File "/Users/user/PycharmProjects/Post-Translational-Modification-
Prediction/pred.py", line 222, in generate_pca
plt.scatter(X_reduced[:, 0], X_reduced[:, 1], c=y, cmap='RdBu')
File "/usr/local/lib/python3.5/site-packages/matplotlib/pyplot.py",
line 3435, in scatter
edgecolors=edgecolors, data=data, **kwargs)
File "/usr/local/lib/python3.5/site-packages/matplotlib/__init__.py",
line 1892, in inner
return func(ax, *args, **kwargs)
File "/usr/local/lib/python3.5/site-packages/matplotlib/axes/_axes.py", line 3976, in scatter
c_array = np.asanyarray(c, dtype=float)
File "/usr/local/lib/python3.5/site-packages/numpy/core/numeric.py", line 583, in asanyarray
return array(a, dtype, copy=False, order=order, subok=True)
TypeError: float() argument must be a string or a number, not 'Pred'
The suggested fix by #WhoIsJack is to add np.arange(len(self.features))
The functional code for those who run into similar issues is:
def generate_pca(self):
y= np.arange(len(self.features))
pca = PCA(n_components=2)
x_np = np.asarray(self.features)
pca.fit(x_np)
X_reduced = pca.transform(x_np)
plt.figure(figsize=(10, 8))
plt.scatter(X_reduced[:, 0], X_reduced[:, 1], c=y, cmap='RdBu')
plt.xlabel('First component')
plt.ylabel('Second component')
plt.show()

type matching error in tensor

I am following a Tensorflow example. However, running the code gives me the following error message.
It seems to me that the following code segment causes the problem. Or the last dimension of h_conv10 has float64. But there is no place where float64 is setup explicitly. Thank you for the suggestions and hints.
# Deconvolution 1
W_deconv_1 = weight_variable_devonc([max_pool_size, max_pool_size, 8*chanel_root, 16*chanel_root])
b_deconv1 = bias_variable([8*chanel_root])
h_deconv1 = tf.nn.relu(deconv2d(h_conv10, W_deconv_1, max_pool_size) + b_deconv1)
h_deconv1_concat = crop_and_concat(h_conv8,h_deconv1,[n_image,(((nx-180)/2+4)/2+4)/2+4,(((ny-180)/2+4)/2+4)/2+4,8*chanel_root])
python u-net.py
Traceback (most recent call last):
File "/experiment/tfw/lib/python3.4/site-
packages/tensorflow/python/ops/op_def_library.py", line 377, in apply_op
as_ref=input_arg.is_ref)
File "/experiment/tfw/lib/python3.4/site-
packages/tensorflow/python/framework/ops.py", line 608, in convert_n_to_tensor
ret.append(convert_to_tensor(value, dtype=dtype, name=n, as_ref=as_ref))
File "/experiment/tfw/lib/python3.4/site-
packages/tensorflow/python/framework/ops.py", line 566, in convert_to_tensor
ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
File "/experiment/tfw/lib/python3.4/site-
packages/tensorflow/python/framework/ops.py", line 510, in
_TensorTensorConversionFunction
% (dtype.name, t.dtype.name, str(t)))
ValueError: Tensor conversion requested dtype int32 for Tensor with dtype
float64: 'Tensor("truediv:0", shape=(), dtype=float64)'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "u-net.py", line 193, in <module>
h_deconv1 = tf.nn.relu(deconv2d(h_conv10, W_deconv_1, max_pool_size) + b_deconv1)
File "u-net.py", line 77, in deconv2d
output_shape = tf.pack([tf.shape(x)[0], tf.shape(x)[1]*2, tf.shape(x)[2]*2, tf.shape(x)[3]/2])
File "/experiment/tfw/lib/python3.4/site-packages/tensorflow/python/ops/array_ops.py", line 241, in pack
return gen_array_ops._pack(values, name=name)
File "/experiment/tfw/lib/python3.4/site-packages/tensorflow/python/ops/gen_array_ops.py", line 916, in _pack
return _op_def_lib.apply_op("Pack", values=values, name=name)
File "/experiment/tfw/lib/python3.4/site-packages/tensorflow/python/ops/op_def_library.py", line 396, in apply_op
raise TypeError("%s that don't all match." % prefix)
TypeError: Tensors in list passed to 'values' of 'Pack' Op have types [int32, int32, int32, float64] that don't all match.