TensorFlow's Estimator froze with low CPU usage - tensorflow

I updated my TF to v1.0rc1, and Estimator.evaluate does not work anymore because it froze at Restoring model.... I tried to reproduce this problem and the following sample code will make TF froze with a 220% (2CPU) CPU usage and no output at all. Any idea why this happen? Thanks!
import tensorflow as tf
from tensorflow.contrib.layers.python.layers.optimizers import optimize_loss
from tensorflow.contrib.learn.python.learn.estimators import model_fn
from tensorflow.contrib.learn.python.learn.estimators.estimator import Estimator
from tensorflow.python.framework import ops
def main(_):
def func(features, targets, mode, params):
idx = tf.concat([features['a'], features['b']], axis=1)
embedding = tf.get_variable("embed", [10, 20], dtype=tf.float32)
pred = tf.reduce_sum(tf.nn.embedding_lookup(embedding, idx))
train_op = optimize_loss(loss=pred,
global_step=tf.train.get_global_step(),
learning_rate=0.001,
optimizer='Adam',
variables=tf.trainable_variables(),
name="training_loss_optimizer")
eval_metric_dict = dict()
eval_metric_dict['metric'] = pred
return model_fn.ModelFnOps(mode=mode,
predictions=pred,
loss=pred,
train_op=train_op,
eval_metric_ops=eval_metric_dict)
model = Estimator(func, params={})
model.fit(
input_fn=lambda: (
{'a': ops.convert_to_tensor([[1, 2, 3, 4, 5]]), 'b': ops.convert_to_tensor([[2, 3, 4, 3, 5]])},
None), steps=1)
model.evaluate(
input_fn=lambda: (
{'a': ops.convert_to_tensor([[1, 2, 3, 4, 5]]), 'b': ops.convert_to_tensor([[2, 3, 4, 3, 5]])},
None))
if __name__ == "__main__":
tf.app.run()

By default Estimator.evaluate assumes queue-based input, and will continue evaluating until the input pipeline is exhausted. When there is no queue-based input, this means it will loop forever. The fix is easy: simply provide a steps argument to evaluate.

Related

Converting from tensorflow frozen graph to tflite for android inference

I'm trying to convert from pytorch to tflite for android inference for a app I'm working on that uses real-time camera data of basketball to create a heatmap of made and missed shots. It's already working for iOS. Here's a demo.
I've managed to convert from pytorch (.pth) to onnx and from onnx to a tensorflow frozen graph (.pb). Inference on that tf frozen graph checks out.
However, when I try to convert from the frozen graph to tflite, I get the following error:
RuntimeError: Inputs and outputs not all float|uint8|int16 types.Node number 2 (ADD) failed to invoke.
Input details from the interpreter [interpreter.get_input_details(), interpreter.get_output_details()] suggest the datatype is numpy.float32, which is where I'm confused. Shouldn't that qualify as float? Any suggestions/help will be much appreciated!
[{'name': 'image', 'index': 21904, 'shape': array([ 3, width, height], dtype=int32), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0)}]
[{'name': 'action', 'index': 7204, 'shape': array([], dtype=int32), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0)}]
From tensorflow's documentation:
import numpy as np
import tensorflow as tf
# Load TFLite model and allocate tensors.
interpreter = tf.lite.Interpreter(model_path="converted_model.tflite")
interpreter.allocate_tensors()
# Get input and output tensors.
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
# Test model on random input data.
input_shape = input_details[0]['shape']
input_data = np.array(np.random.random_sample(input_shape), dtype=np.float32)
interpreter.set_tensor(input_details[0]['index'], input_data)
interpreter.invoke()
# The function `get_tensor()` returns a copy of the tensor data.
# Use `tensor()` in order to get a pointer to the tensor.
output_data = interpreter.get_tensor(output_details[0]['index'])
print(output_data)
The inference file for tensorflow frozen graph (1.14.0) for anyone in need:
import numpy as np
import tensorflow as tf
from PIL import Image
w = ...
h = ...
class CNN(object):
def __init__(self, model_filepath):
self.model_filepath = model_filepath
self.load_graph(model_filepath = self.model_filepath)
def load_graph(self, model_filepath):
self.graph = tf.Graph()
with tf.gfile.GFile(model_filepath, 'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
with self.graph.as_default():
self.input = tf.placeholder(np.float32, shape = [3, h, w], name='image')
tf.import_graph_def(graph_def, {'image': self.input})
self.graph.finalize()
self.sess = tf.Session(graph = self.graph)
def test(self, data):
output_tensor = self.graph.get_tensor_by_name('import/action:0')
output = self.sess.run(output_tensor, feed_dict = {self.input: data})
return output
def main():
nn = CNN(model_filepath='out_1.14.pb')
img = np.asarray(Image.open('example.jpg')).astype(np.float32)
img = img.transpose(-1, 0, 1)
ans = nn.test(data=img)
print(ans)
if __name__ == '__main__':
main()

What's the equivalent of this Keras code in TensorFlow?

The code is as below and runs perfectly:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout
xData = np.array([[5, 3, 7], [1, 2, 6], [8, 7, 6]], dtype=np.float32)
yTrainData = np.array([[1], [0], [1]], dtype=np.float32)
model = Sequential()
model.add(Dense(64, input_dim=3, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
model.fit(xData, yTrainData, epochs=10, batch_size=128, verbose=2)
xTestData = np.array([[2, 8, 1], [3, 1, 9]], dtype=np.float32)
resultAry = model.predict(xTestData)
print("Cal result: %s" % resultAry)
I can't work out the code in TensowFlow, something I've written is like this:
import tensorflow as tf
import numpy as np
xData = np.array([[5, 3, 7], [1, 2, 6], [8, 7, 6]], dtype=np.float32)
yTrainData = np.array([[1], [0], [1]], dtype=np.float32)
x = tf.placeholder(tf.float32)
yTrain = tf.placeholder(tf.float32)
w = tf.Variable(tf.ones([64]), dtype=tf.float32)
b = tf.Variable(tf.zeros([1]), dtype=tf.float32)
y = tf.nn.relu(w * x + b)
w1 = tf.Variable(tf.ones([3]), dtype=tf.float32)
b1 = tf.Variable(0, dtype=tf.float32)
y1 = tf.reduce_mean(tf.nn.sigmoid(w1 * y + b1))
loss = tf.abs(y1 - tf.reduce_mean(yTrain))
optimizer = tf.train.AdadeltaOptimizer(0.1)
train = optimizer.minimize(loss)
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
for i in range(10):
for j in range(3):
result = sess.run([loss, y1, yTrain, x, w, b, train], feed_dict={x: xData[j], yTrain: yTrainData[j]})
if i % 10 == 0:
print("i: %d, j: %d, loss: %10.10f, y1: %f, yTrain: %s, x: %s" % (i, j, float(result[0]), float(result[1]), yTrainData[j], xData[j]))
result = sess.run([y1, loss], feed_dict={x: [1, 6, 0], yTrain: 0})
print(result)
But I will got the following error while running,
Traceback (most recent call last):
File "C:\Python36\lib\site-packages\tensorflow\python\client\session.py", line 1327, in _do_call
return fn(*args)
File "C:\Python36\lib\site-packages\tensorflow\python\client\session.py", line 1306, in _run_fn
status, run_metadata)
File "C:\Python36\lib\contextlib.py", line 88, in __exit__
next(self.gen)
File "C:\Python36\lib\site-packages\tensorflow\python\framework\errors_impl.py", line 466, in raise_exception_on_not_ok_status
pywrap_tensorflow.TF_GetCode(status))
tensorflow.python.framework.errors_impl.InvalidArgumentError: Incompatible shapes: [64] vs. [3]
[[Node: mul = Mul[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](Variable/read, _arg_Placeholder_0_0)]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "testidc.py", line 36, in <module>
result = sess.run([loss, y1, yTrain, x, w, b, train], feed_dict={x: xData[j], yTrain: yTrainData[j]})
File "C:\Python36\lib\site-packages\tensorflow\python\client\session.py", line 895, in run
run_metadata_ptr)
File "C:\Python36\lib\site-packages\tensorflow\python\client\session.py", line 1124, in _run
feed_dict_tensor, options, run_metadata)
File "C:\Python36\lib\site-packages\tensorflow\python\client\session.py", line 1321, in _do_run
options, run_metadata)
File "C:\Python36\lib\site-packages\tensorflow\python\client\session.py", line 1340, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Incompatible shapes: [64] vs. [3]
[[Node: mul = Mul[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](Variable/read, _arg_Placeholder_0_0)]]
Caused by op 'mul', defined at:
File "testidc.py", line 15, in <module>
y = tf.nn.relu(w * x + b)
File "C:\Python36\lib\site-packages\tensorflow\python\ops\variables.py", line 705, in _run_op
return getattr(ops.Tensor, operator)(a._AsTensor(), *args)
File "C:\Python36\lib\site-packages\tensorflow\python\ops\math_ops.py", line 865, in binary_op_wrapper
return func(x, y, name=name)
File "C:\Python36\lib\site-packages\tensorflow\python\ops\math_ops.py", line 1088, in _mul_dispatch
return gen_math_ops._mul(x, y, name=name)
File "C:\Python36\lib\site-packages\tensorflow\python\ops\gen_math_ops.py", line 1449, in _mul
result = _op_def_lib.apply_op("Mul", x=x, y=y, name=name)
File "C:\Python36\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 767, in apply_op
op_def=op_def)
File "C:\Python36\lib\site-packages\tensorflow\python\framework\ops.py", line 2630, in create_op
original_op=self._default_original_op, op_def=op_def)
File "C:\Python36\lib\site-packages\tensorflow\python\framework\ops.py", line 1204, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
InvalidArgumentError (see above for traceback): Incompatible shapes: [64] vs. [3]
[[Node: mul = Mul[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](Variable/read, _arg_Placeholder_0_0)]]
The main reason is the shape of W, must be the same as x in TensowFlow, but in Keras, the hidden Dense layer could have more nodes than the input(such as 64 in the example).
I need help for the equivalent TensorFlow code instead of the Keras one. Thanks.
This is an example that uses the tf.estimator.Estimator framework:
import tensorflow as tf
import numpy as np
# The model
def model(features):
dense = tf.layers.dense(inputs=features['x'], units=64, activation=tf.nn.relu)
dropout = tf.layers.dropout(dense, 0.2)
logits = tf.layers.dense(inputs=dropout, units=1, activation=tf.nn.sigmoid)
return logits
# Stuff needed to use the tf.estimator.Estimator framework
def model_fn(features, labels, mode):
logits = model(features)
predictions = {
'classes': tf.argmax(input=logits, axis=1),
'probabilities': tf.nn.softmax(logits)
}
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
loss = tf.losses.softmax_cross_entropy(onehot_labels=labels, logits=logits)
# Configure the training op
if mode == tf.estimator.ModeKeys.TRAIN:
optimizer = tf.train.RMSPropOptimizer(learning_rate=1e-4)
train_op = optimizer.minimize(loss, tf.train.get_or_create_global_step())
else:
train_op = None
accuracy = tf.metrics.accuracy(
tf.argmax(labels, axis=1), predictions['classes'])
metrics = {'accuracy': accuracy}
# Create a tensor named train_accuracy for logging purposes
tf.identity(accuracy[1], name='train_accuracy')
tf.summary.scalar('train_accuracy', accuracy[1])
return tf.estimator.EstimatorSpec(
mode=mode,
predictions=predictions,
loss=loss,
train_op=train_op,
eval_metric_ops=metrics)
# Setting up input for the model
def input_fn(mode, batch_size):
# function that processes your input and returns two tensors "samples" and "labels"
# that the estimator will use to fetch input batches.
# See https://www.tensorflow.org/get_started/input_fn for how to write this function.
return samples, labels
# Using the model
def main():
# Create the Estimator
classifier = tf.estimator.Estimator(
model_fn=model_fn, model_dir='some_dir')
# Train the model
# NOTE: I use this to make it compatible with your example, but you should
# defnitely set up your own input_fn above
train_input_fn = tf.estimator.inputs.numpy_input_fn(
x={"x": np.array([[5, 3, 7], [1, 2, 6], [8, 7, 6]], dtype=np.float32)},
y=np.array([[1], [0], [1]]),
num_epochs=10,
batch_size=128,
shuffle=False)
classifier.train(
input_fn=train_input_fn,
steps=20000, # change as needed
)
# Predict on new data
predict_input_fn = tf.estimator.inputs.numpy_input_fn(
x={"x": np.array([[5, 3, 7], [1, 2, 6], [8, 7, 6]], dtype=np.float32)},
num_epochs=1,
batch_size=1,
shuffle=False)
predictions_iterator = classifier.predict(
input_fn=predict_input_fn)
print('Predictions results:')
for pred in predictions_iterator:
print(pred)
There is quite bit going on here, so I'll try to explain the blocks one by one.
The model
The model is defined as a composition of tf.layers in a separate model function. This is done to keep the actual model_fn (which is required by the Estimator framework) independent of the model architecture.
The function takes a features parameter, which is the output of a call to input_fn (see below). In this example, since we're using tf.estimator.inputs.numpy_input_fn, features is a dictionary with item x:input_tensor. We use the input tensor as input for our model graph.
model_fn
This function is required by the framework and is used to generate a specification for your Estimator that is dependent on the mode the estimato is being used for. Typically, an estimator used for prediction will have less operations than when it's used for training (you don't have the loss, optimizer, etc). This function takes care of adding all that is necessary to your model graph for the three possible modes of operation (prediction, evaluation, training).
Breaking it down to logical pieces, we have:
Prediction: we only need the model graph, the predictions and the corresponding predicted labels (we could skip the labels, but having it here is handy).
Evaluation: we need everything for prediction plus: a loss function, some metric to evaluate on and optionally some summaries to visualize the metrics in Tensorboard.
Training: we need everything for evaluation plus: a training operation from an optimizer (in your sample, RMSProp)
input_fn
This is where we provide the input to our estimator.
Have a look at Building Input Functions with tf.estimator for a guide on how your custom input_fn should look like. For the example, we'll use the numpy_input_fn function from the framework.
Note that usually one input_fn handles all operation modes according to a mode parameter. Since we're using numpy_input_fn, we need two different instances of it for training and prediction to provide the data as needed.
main
Here we actually train and use the estimator.
Firstly, we get an Estimator instance with the model_fn we specified, then we call train() and wait for the training to be over.
Once that is done, calling predict() returns an iterable that you can use to get the prediction results for all the samples in the dataset you're predicting.
This is a couple of months old but it's worth noting that there is absolutely no reason to not use keras with tensorflow. It's even part of the tensorflow library now!
So if you want full control of your tensors but still want to use keras' layers, you can easily achieve that by using keras as-is:
x = tf.placeholder(tf.float32, [None, 1024])
y = keras.layers.Dense(512, activation='relu')(x)
For more on that, keras' creator made a pretty cool post about it.

Model Won't Train (Loss Doesn't Move)

TL;DR: I can't find my mistake when using the Tensorflow optimizer to train an extremely small neural net. The loss either doesn't move or moves once then gets stuck (it seems to really like the value 0.693147 which is ln(2)...).
Issue and Code: I'm trying to implement the 12-net part of the cascade classifier in Li et al (here) in Tensorflow. It's an extremely simple net, but nothing I try seems to get it training.
import tensorflow as tf
import tensorflow.contrib.slim as slim
import cv2
import numpy as np
input_tensor = tf.placeholder(tf.float32, shape=[1, 12, 12, 3])
input_label = tf.placeholder(tf.float16, shape=[1, 2])
conv_1 = slim.conv2d(input_tensor, 16, (3, 3), scope='conv1')
pool_1 = slim.max_pool2d(conv_1, (3, 3), 2, scope='pool1')
flatten = slim.flatten(pool_1)
fully_con = slim.fully_connected(flatten, 16, scope='full_con')
fully_con_2 = slim.fully_connected(fully_con, 2, scope='output')
probs = tf.nn.softmax(fully_con_2)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=input_label, logits=fully_con_2))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=.001).minimize(loss)
This defines the net. It takes in a (for now, single) 12x12 image and label, does a single 3x3 convolution with stride 1 and 16 filters, a 3x3 max pool with stride 2, then fully connects to 16 features, and finally makes a binary classification. I am able to perform a forward pass through the code, so I don't think the issue is here. This is my training loop - I have 3 12x12 images (2 faces, 1 tree) and just alternately feed them to the optimizer (clearly not best training practice, but I'm just trying to get it to work):
if __name__ == '__main__':
im = cv2.imread('resized.jpg').reshape(1, 12, 12, 3).astype('float16')
im2 = cv2.imread('resized2.jpg').reshape(1, 12, 12, 3).astype('float16')
im3 = cv2.imread('resize3.jpg').reshape(1, 12, 12, 3).astype('float16')
im_lab_1 = np.array([[0, 1]], dtype='float16')
im_lab_2 = np.array([[0, 1]], dtype='float16')
im_lab_3 = np.array([[1, 0]], dtype='float16')
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
print(sess.run(loss, feed_dict={input_tensor: im3, input_label: im_lab_3}))
for i in range(50000):
if i % 3 == 0:
# _, l = sess.run([optimizer, loss], feed_dict=feed1)
# print(l)
optimizer.run(feed_dict={input_tensor: im, input_label: im_lab_1})
elif i % 4 == 0:
# _, l = sess.run([optimizer, loss], feed_dict=feed2)
# print(l)
optimizer.run(feed_dict={input_tensor: im2, input_label: im_lab_2})
elif i % 5 == 0:
optimizer.run(feed_dict={input_tensor: im3, input_label: im_lab_3})
print(sess.run(loss, feed_dict={input_tensor: im3, input_label: im_lab_3}))
I've tried both optimizer.run(...) and the commented out sess.run([optimizer, loss]...). The first sess.run(loss...) seems to spit out something correct, but after that, the loss gets stuck and never moves again. Clearly, I'm doing something wrong here, and any help would be appreciated!

Tensorflow embedding for categorical feature

In machine learning, it is common to represent a categorical (specifically: nominal) feature with one-hot-encoding. I am trying to learn how to use tensorflow's embedding layer to represent a categorical feature in a classification problem. I have got tensorflow version 1.01 installed and I am using Python 3.6.
I am aware of the tensorflow tutorial for word2vec, but it is not very instructive for my case. While building the tf.Graph, it uses NCE-specific weights and tf.nn.nce_loss.
I just want a simple feed-forward net as below, and the input layer to be an embedding. My attempt is below. It complains when I try to matrix multiply the embedding with the hidden layer due to shape incompatibility. Any ideas how I can fix this?
from __future__ import print_function
import pandas as pd;
import tensorflow as tf
import numpy as np
from sklearn.preprocessing import LabelEncoder
if __name__ == '__main__':
# 1 categorical input feature and a binary output
df = pd.DataFrame({'cat2': np.array(['o', 'm', 'm', 'c', 'c', 'c', 'o', 'm', 'm', 'm']),
'label': np.array([0, 0, 1, 1, 0, 0, 1, 0, 1, 1])})
encoder = LabelEncoder()
encoder.fit(df.cat2.values)
X = encoder.transform(df.cat2.values)
Y = np.zeros((len(df), 2))
Y[np.arange(len(df)), df.label.values] = 1
# Neural net parameters
training_epochs = 5
learning_rate = 1e-3
cardinality = len(np.unique(X))
embedding_size = 2
input_X_size = 1
n_labels = len(np.unique(Y))
n_hidden = 10
# Placeholders for input, output
x = tf.placeholder(tf.int32, [None, 1], name="input_x")
y = tf.placeholder(tf.float32, [None, 2], name="input_y")
# Neural network weights
embeddings = tf.Variable(tf.random_uniform([cardinality, embedding_size], -1.0, 1.0))
h = tf.get_variable(name='h2', shape=[embedding_size, n_hidden],
initializer=tf.contrib.layers.xavier_initializer())
W_out = tf.get_variable(name='out_w', shape=[n_hidden, n_labels],
initializer=tf.contrib.layers.xavier_initializer())
# Neural network operations
embedded_chars = tf.nn.embedding_lookup(embeddings, x)
layer_1 = tf.matmul(embedded_chars,h)
layer_1 = tf.nn.relu(layer_1)
out_layer = tf.matmul(layer_1, W_out)
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=out_layer, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Initializing the variables
init = tf.global_variables_initializer()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
for epoch in range(training_epochs):
avg_cost = 0.
# Run optimization op (backprop) and cost op (to get loss value)
_, c = sess.run([optimizer, cost],
feed_dict={x: X, y: Y})
print("Optimization Finished!")
EDIT:
Please see below the error message:
Traceback (most recent call last):
File "/home/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/common_shapes.py", line 671, in _call_cpp_shape_fn_impl
input_tensors_as_shapes, status)
File "/home/anaconda3/lib/python3.6/contextlib.py", line 89, in __exit__
next(self.gen)
File "/home/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/errors_impl.py", line 466, in raise_exception_on_not_ok_status
pywrap_tensorflow.TF_GetCode(status))
tensorflow.python.framework.errors_impl.InvalidArgumentError: Shape must be rank 2 but is rank 3 for 'MatMul' (op: 'MatMul') with input shapes: [?,1,2], [2,10].
Just make your x placeholder be size [None] instead of [None, 1]

Why are some nodes ignored by `print_model_analysis ` when `run_meta` is provided?

I want to compute the number of variables and the number of floating point operations of models. However, it seems that tf.contrib.tfprof.model_analyzer.print_model_analysis ignores the first node when run_meta is provided.
For example, (test with tensorflow 1.0.0)
import numpy as np
import tensorflow as tf
slim = tf.contrib.slim
x = tf.placeholder(tf.float32, [None, 7, 7, 3])
c1 = slim.conv2d(x, 22, [3, 3])
run_metadata = tf.RunMetadata()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
_ = sess.run(c1, feed_dict={x: np.zeros([1, 7, 7, 3])},
options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE),
run_metadata=run_metadata)
analysis = tf.contrib.tfprof.model_analyzer.print_model_analysis(
tf.get_default_graph(), run_meta=run_metadata,
tfprof_options=tf.contrib.tfprof.model_analyzer.FLOAT_OPS_OPTIONS)
# 1078
print(analysis.total_float_ops)
It only contains the number of floating point operations for Conv/BiasAdd. How can I analyze the model correctly using tfprog?