Tensorflow seed not working with LSTM model - tensorflow

tf.set_random_seed() is not working and opt seed not found.
For many parameters in the LSTM, it seems no opt seed found in the tf.nn.rnn_cell.BasicLSTMCell. Thus, for every time it produces different results. How to set the seed to produce the same results for running several times?
import numpy as np
import tensorflow as tf
from tensorflow.python.ops import rnn, rnn_cell
if __name__ == '__main__':
np.random.seed(1234)
X = np.array(np.array(range(1,121)).reshape(4, 6, 5), dtype = float)
x0 = tf.placeholder(tf.float32, [4, 6, 5])
x = tf.reshape(x0, [-1, 5])
x = tf.split(0, 4, x)
with tf.variable_scope('lstm') as scope:
lstm = tf.nn.rnn_cell.BasicLSTMCell(5, state_is_tuple = True)
outputs, states = tf.nn.rnn(lstm, x, dtype = tf.float32)
scope.reuse_variables()
outputs2, states2 = tf.nn.dynamic_rnn(lstm, x0, dtype=tf.float32,time_major = True)
outputs3, states3 = tf.nn.rnn(lstm, x, dtype=tf.float32)
print(outputs3)
with tf.Session() as sess:
tf.set_random_seed(1)
init = tf.initialize_all_variables()
sess.run(init)
for var in tf.trainable_variables():
print var.name
for i in range(3):
result1, result2, result3 = sess.run([outputs, outputs2, outputs3], feed_dict = {x0: X})
print result1
print '---------------------------------------'
print result2
print '---------------------------------------'
print result3
print '---------------------------------------'

I believe this should work "as expected" in the tensorflow nightly builds. Please try this with a TF nightly build and report back:
Oh, also call tf.set_random_seed before creating any ops.

Related

TypeError: Fetch argument None has invalid type <class 'NoneType'> on operation that seems to be not none

N.B. Tensorflow version less than 2.0
In the following reproducible code, wd_d_op=sess.run([wd_d_op], feed_dict={X: x}) run successfully but grads_and_vars=sess.run([grad_and_vars], feed_dict={X: x}) raises the mentioned noneType error. If grad_and_vars then how come the next operation run successfully?
import tensorflow as tf
import numpy as np
from sklearn.datasets import make_blobs
##function for creating layer with fixed weight, don't worry about this
def fc_layer(input_tensor, input_dim, output_dim, component_name,act=tf.nn.relu, input_type='dense'):
# weight = tf.Variable(tf.truncated_normal([input_dim, output_dim], stddev=1. / tf.sqrt(input_dim / 2.)), name='weight')
if component_name=="weight1":
weight=tf.Variable([[-0.46401197, -0.02868146, -0.02945778, -0.19310321],[-0.06130088, -0.3782992 , -1.4025078 , -0.8482222 ]])
bias=tf.Variable([0.1,0.1,0.1,0.1])
else:
weight=tf.Variable([[ 0.27422005],[-1.2150304 ],[-0.43404067],[-0.3352416 ]])
bias=tf.Variable([0.1])
# weight=tf.Print(weight,[weight],component_name,summarize=-1)
bias = tf.Variable(tf.constant(0.1, shape=[output_dim]), name='bias')
# bias=tf.Print(bias,[type(bias)],component_name+"bias",summarize=-1)
weight=tf.cast(weight, tf.float32)
bias=tf.cast(bias, tf.float32)
input_tensor=tf.cast(input_tensor, tf.float32)
if input_type == 'sparse':
activations = act(tf.sparse_tensor_dense_matmul(input_tensor, weight) + bias)
else:
activations = act(tf.matmul(input_tensor, weight) + bias,name="features")
return activations
"""fixed input"""
x=np.array([[-0.9233333412304945, -0.5148649076298134],[-0.9366679176350374, -2.086600005395918],[50.366624846708156, -9.02965996391532],[51.09416621163187, -12.101430685982692]])
lr_wd_D = 1e-3
with tf.name_scope('input'):
X = tf.placeholder(dtype=tf.float32,name="exmaple")
with tf.name_scope('generator'):
h1 = fc_layer(X, 2, 4,component_name="weight1",input_type='dense')
output = fc_layer(h1, 4, 1,component_name="weight2",act=tf.identity,input_type='dense')
# output=tf.Print(output,[output],"output",summarize=-1)
output=tf.convert_to_tensor(output, dtype=tf.float32)
critic_s = tf.slice(output, [0, 0], [2, -1])
critic_t = tf.slice(output, [2, 0], [2, -1])
wd_loss = (tf.reduce_mean(critic_s) - tf.reduce_mean(critic_t))
# wd_loss=tf.convert_to_tensor(wd_loss, dtype=tf.float32)
theta_C = [v for v in tf.global_variables() if 'generator' in v.name]
wd_op=tf.train.AdamOptimizer(lr_wd_D)
"""only calling this operation does not work, raised the mentioned error"""
grad_and_vars = wd_op.compute_gradients(wd_loss,var_list=theta_C)
"""But the following operation works even that use the previous variable"""
wd_d_op=wd_op.apply_gradients(grad_and_vars)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
#this works
wd_loss,theta_C=sess.run([wd_loss,theta_C], feed_dict={X: x})
print("wd_loss")
print(wd_loss)
print("theta_C")
print(theta_C)
#this does works
wd_d_op=sess.run([wd_d_op], feed_dict={X: x})
#this does not work , even though grads_and_vars used by wd_d_op
grads_and_vars=sess.run([grad_and_vars], feed_dict={X: x})
Solution:
If you comment out the following two lines of code, it will run correctly.
# bias=tf.Variable([0.1,0.1,0.1,0.1])
# bias=tf.Variable([0.1])
Explain:
Gradients are returned None if there are no explicit connections between wd_loss and theta_C in the graph. If you print theta_C, you will find two bias variables. These two bias variables don't actually participate in the calculation of wd_loss.
I give an example of an error below when w3 does not participate in the calculation of y but differentiates it.
import tensorflow as tf
w1 = tf.Variable([[1.,2.]])
w2 = tf.Variable([[9.],[10.]])
w3 = tf.Variable([[5.,6.]])
y = tf.matmul(w1, w2)
# this work
grads = tf.gradients(y,[w1,w2])
# this does not work, TypeError: Fetch argument None has invalid type <class 'NoneType'>
# grads = tf.gradients(y,[w1,w2,w3])
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
gradval = sess.run(grads)
print(gradval)

"keras.backend.variable" is not behaving correctly in keras as opposed to tensorflow

I want to define trainable scalar in my models. In TensorFlow, this is done using tf.Variable. In Keras, keras.backend.variable is supposed to behave the same way. However, when I use model.fit, keras does not change the variable during the optimization process. Does anyone know why?
To test, please uncomment RUN_ON = "tensorflow" or RUN_ON = "keras" to run on either of engines.
import numpy as np
import keras as k
import tensorflow as tf
import matplotlib.pyplot as plt
# RUN_ON = "tensorflow"
# RUN_ON = "keras"
b_true = 3.0
w_true = 5.0
x_true = np.linspace(0.0, 1.0, 1000).reshape(-1, 1)
y_true = x_true * w_true + b_true
ids = np.arange(0, x_true.shape[0])
if RUN_ON=="keras":
x = k.Input((1,), dtype="float32", name="x")
Fx = k.layers.Dense(1, use_bias=False, name="Fx")(x)
b = k.backend.variable(1.0, name="b")
y = k.layers.Lambda(lambda x: x+b, name="Add")(Fx)
model = k.Model(inputs=[x], outputs=[y])
model.compile("adam", loss="mse")
# model.summary()
model.fit(x_true, [y_true], epochs=100000, batch_size=1000)
y_pred = model.predict(x_true)
elif RUN_ON=="tensorflow":
x = tf.placeholder("float32", shape=[None, 1], name="x")
Fx = tf.layers.Dense(1, use_bias=False, name="Fx")(x)
b = tf.Variable(1.0, name="b")
y = Fx + b
yp = tf.placeholder("float32", shape=[None, 1], name="y")
loss = tf.reduce_mean(tf.square(yp - y))
opt = tf.train.AdamOptimizer(0.001).minimize(loss)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(100000):
np.random.shuffle(ids)
opt_out, loss_val, b_val = sess.run([opt, loss, b], feed_dict={x: x_true[ids], yp: y_true[ids]})
print("epoch={:d} loss={:e} b_val={:f}".format(i, loss_val, b_val))
if loss_val < 1.0e-9:
break
y_pred = sess.run([y], feed_dict={x: x_true, yp: y_true})[0]
else:
raise ValueError('`RUN_ON` should be either `keras` or `tensorflow`.')
plt.plot(x_true, y_true, '--b', linewidth=4)
plt.plot(x_true, y_pred, 'r')
plt.show()
#

Unable to use core Estimator with contrib Predictor

I'm using canned estimators and are struggling with poor predict performance so I'm trying to use tf.contrib.predictor to improve my inference performance. I've made this minimalistic example to reproduce my problems:
import tensorflow as tf
from tensorflow.contrib import predictor
def serving_input_fn():
x = tf.placeholder(dtype=tf.string, shape=[1], name='x')
inputs = {'x': x }
return tf.estimator.export.ServingInputReceiver(inputs, inputs)
input_feature_column = tf.feature_column.numeric_column('x', shape=[1])
estimator = tf.estimator.DNNRegressor(
feature_columns=[input_feature_column],
hidden_units=[10, 20, 10],
model_dir="model_dir\\predictor-test")
estimator_predictor = predictor.from_estimator(estimator, serving_input_fn)
estimator_predictor({"inputs": ["1.0"]})
This yields the following exception:
UnimplementedError (see above for traceback): Cast string to float is not supported
[[Node: dnn/input_from_feature_columns/input_layer/x/ToFloat = Cast[DstT=DT_FLOAT, SrcT=DT_STRING, _device="/job:localhost/replica:0/task:0/device:CPU:0"](dnn/input_from_feature_columns/input_layer/x/ExpandDims)]]
I've tried using tf.estimator.export.TensorServingInputReceiver instead of ServingInputReceiver in my serving_input_fn(), so that I can feed my model with a numerical tensor which is what I want:
def serving_input_fn():
x = tf.placeholder(dtype=tf.float32, shape=[1], name='x')
return tf.estimator.export.TensorServingInputReceiver(x, x)
but then I get the following exception in my predictor.from_estimator() call:
ValueError: features should be a dictionary of Tensors. Given type: <class 'tensorflow.python.framework.ops.Tensor'>
Any ideas?
My understanding of all of this is not really solid but I got it working and given the size of the community, I'll try to share what I did.
First, I'm running tensorflow 1.5 binaries with this patch applied manually.
The exact code I'm running is this:
def serving_input_fn():
x = tf.placeholder(dtype=tf.float32, shape=[3500], name='x')
inputs = {'x': x }
return tf.estimator.export.ServingInputReceiver(inputs, inputs)
estimator = tf.estimator.Estimator(
model_fn=model_fn,
model_dir="{}/model_dir_{}/model.ckpt-103712".format(script_dir, 3))
estimator_predictor = tf.contrib.predictor.from_estimator(
estimator, serving_input_fn)
p = estimator_predictor(
{"x": np.array(sample.normalized.input_data)})
My case is a bit different than your example because I'm using a custom Estimator but in your case, I guess you should try something like this:
def serving_input_fn():
x = tf.placeholder(dtype=tf.float32, shape=[1], name='x')
inputs = {'x': x }
return tf.estimator.export.ServingInputReceiver(inputs, inputs)
estimator = ...
estimator_predictor = tf.contrib.predictor.from_estimator(
estimator, serving_input_fn)
estimator_predictor({"x": [1.0]})
error is in following line:
estimator_predictor({"inputs": ["1.0"]})
please put 1.0 out of quotes. Currently it's a string.
After having worked on this for a couple of days, I want to share what I have done. The following code is also available from https://github.com/dage/tensorflow-estimator-predictor-example
TL;DR: predictor works best with custom estimators and the performance increase is massive.
import tensorflow as tf
import numpy as np
import datetime
import time
FEATURES_RANK = 3 # The number of inputs
LABELS_RANK = 2 # The number of outputs
# Returns a numpy array of rank LABELS_RANK based on the features argument.
# Can be used when creating a training dataset.
def features_to_labels(features):
sum_column = features.sum(1).reshape(features.shape[0], 1)
labels = np.hstack((sum_column*i for i in range(1, LABELS_RANK+1)))
return labels
def serving_input_fn():
x = tf.placeholder(dtype=tf.float32, shape=[None, FEATURES_RANK], name='x') # match dtype in input_fn
inputs = {'x': x }
return tf.estimator.export.ServingInputReceiver(inputs, inputs)
def model_fn(features, labels, mode):
net = features["x"] # input
for units in [4, 8, 4]: # hidden units
net = tf.layers.dense(net, units=units, activation=tf.nn.relu)
net = tf.layers.dropout(net, rate=0.1)
output = tf.layers.dense(net, LABELS_RANK, activation=None)
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(mode, predictions=output, export_outputs={"out": tf.estimator.export.PredictOutput(output)})
loss = tf.losses.mean_squared_error(labels, output)
if mode == tf.estimator.ModeKeys.EVAL:
return tf.estimator.EstimatorSpec(mode, loss=loss)
optimizer = tf.train.AdagradOptimizer(learning_rate=0.1)
train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
# expecting a numpy array of shape (1, FEATURE_RANK) for constant_feature argument
def input_fn(num_samples, constant_feature = None, is_infinite = True):
feature_values = np.full((num_samples, FEATURES_RANK), constant_feature) if isinstance(constant_feature, np.ndarray) else np.random.rand(num_samples, FEATURES_RANK)
feature_values = np.float32(feature_values) # match dtype in serving_input_fn
labels = features_to_labels(feature_values)
dataset = tf.data.Dataset.from_tensors(({"x": feature_values}, labels))
if is_infinite:
dataset = dataset.repeat()
return dataset.make_one_shot_iterator().get_next()
estimator = tf.estimator.Estimator(
model_fn=model_fn,
model_dir="model_dir\\estimator-predictor-test-{date:%Y-%m-%d %H.%M.%S}".format(date=datetime.datetime.now()))
train = estimator.train(input_fn=lambda : input_fn(50), steps=500)
evaluate = estimator.evaluate(input_fn=lambda : input_fn(20), steps=1)
predictor = tf.contrib.predictor.from_estimator(estimator, serving_input_fn)
consistency_check_features = np.random.rand(1, FEATURES_RANK)
consistency_check_labels = features_to_labels(consistency_check_features)
num_calls_predictor = 100
predictor_input = {"x": consistency_check_features}
start_time_predictor = time.clock()
for i in range(num_calls_predictor):
predictor_prediction = predictor(predictor_input)
delta_time_predictor = 1./num_calls_predictor*(time.clock() - start_time_predictor)
num_calls_estimator_predict = 10
estimator_input = lambda : input_fn(1, consistency_check_features, False)
start_time_estimator_predict = time.clock()
for i in range(num_calls_estimator_predict):
estimator_prediction = list(estimator.predict(input_fn=estimator_input))
delta_time_estimator = 1./num_calls_estimator_predict*(time.clock() - start_time_estimator_predict)
print("{} --> {}\n predictor={}\n estimator={}.\n".format(consistency_check_features, consistency_check_labels, predictor_prediction, estimator_prediction))
print("Time used per estimator.predict() call: {:.5f}s, predictor(): {:.5f}s ==> predictor is {:.0f}x faster!".format(delta_time_estimator, delta_time_predictor, delta_time_estimator/delta_time_predictor))
On my laptop I get the following results:
[[0.55424854 0.98057611 0.98604857]] --> [[2.52087322 5.04174644]]
predictor={'output': array([[2.5221248, 5.049496 ]], dtype=float32)}
estimator=[array([2.5221248, 5.049496 ], dtype=float32)].
Time used per estimator.predict() call: 0.30071s, predictor(): 0.00057s ==> predictor is 530x faster!

How to traverse the result of tf.unqiue?

After invoking tf.unqiue, the shape of tensor will be unknown, but I want to traverse the result of tf.unqiue
Suppose tensor = tf.unqiue(...)
I have tried:
for i in tf.range(tf.shape(tensor)[0])
tf.unstack(tensor, num=tf.shape(tensor)[0])
tf.split(tensor, num_or_size_splits=tf.shape(tensor)[0])
All of them can't work, because these functions all need static shape or num/num_or_size_splits = integer? So how can I traverse tensor?
Update
Example
I have two 1-D tensor with the same shape
x=[1,3,2,1,3]
y=[3,6,5,8,9]
I want to do like this:
x_u = unique(x) # [1,3,2]
get bool_mask, and slice y
for i in x_u:
y[x == i]
when i=1, y[x==i]=y[[True,False,False,True,False]], and I can get y[0] and y[3]
when i=3, I can get y[1] and y[4]
when i=2, I can get y[2]
Solution
After some trials, that may be a solution.
Try to use tf.while_loop:
import tensorflow as tf
import numpy as np
x = tf.constant(np.array([1,3,2,1,3]), dtype='int32')
y = tf.constant(np.array([3,6,5,8,9]), dtype='int32')
x_u, _ = tf.unique(x)
n = tf.shape(x_u)[0]
for_i = tf.constant(0)
re = tf.constant([], dtype=tf.int32)
cond = lambda i, res: i<n
def body(i, res):
x_0 = tf.slice(x_u, [i], [1])
selected = tf.boolean_mask(y, tf.equal(x_0, x))
return i+1, tf.concat([res, selected], axis=0)
op = tf.while_loop(cond, body, [for_i, re], shape_invariants=[for_i.get_shape(), tf.TensorShape([None])])
print(op[1].shape)
with tf.Session() as sess:
print(sess.run(op[1]))
I just tried this:
import tensorflow as tf
import numpy as np
a = tf.constant(np.random.randn(200), dtype='float32')
b = tf.unique(a)
print b[0] #Tensor("Unique:0", shape=(?,), dtype=float32)
c = tf.map_fn(lambda x: x*x, b[0])
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
d = sess.run(c)
print d
And it's working without knowing the shape of 'b' here. Be careful, tf.unique is returning a tuple (Tensor, Tensor) with the values and their indicies.
Update
This is the only way I found to do this, your result cannot have an insconsistent shape in Tensorflow.
import tensorflow as tf
import numpy as np
x = tf.constant(np.array([1,3,2,1,3]), dtype='int32')
y = tf.constant(np.array([3,6,5,8,9]), dtype='int32')
x_u = tf.unique(x)
eq = tf.equal(x, tf.expand_dims(x_u[0],1))
y_masked = y*tf.cast(eq, tf.int32)
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
e = sess.run(y_masked)
print e
tf.boolean_mask can be used as well but you are going to get a flat output.
Last Update
This is what you want, way faster than what you proposed and it was already explained the line just above.
import tensorflow as tf
import numpy as np
x = tf.constant(np.array([1,3,2,1,3]), dtype='int32')
y = tf.constant(np.array([3,6,5,8,9]), dtype='int32')
x_u, _ = tf.unique(x)
eq = tf.equal(x, tf.expand_dims(x_u,1))
y_tiled = tf.tile(tf.expand_dims(y, 0), [tf.shape(x_u)[0], 1])
y_masked = tf.boolean_mask(y_tiled, eq)
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
e = sess.run(y_masked)
print e

Why do the results of rnn and dynamic_rnn present disrepancy in tensorflow?

import numpy as np
import tensorflow as tf
from tensorflow.python.ops import rnn, rnn_cell
if __name__ == '__main__':
np.random.seed(1234)
X = np.array(np.array(range(1,121)).reshape(4, 6, 5),dtype=float)
x0 = tf.placeholder(tf.float32, [4, 6, 5])
x = tf.reshape(x0, [-1, 5])
x = tf.split(0, 4, x)
lstm = tf.nn.rnn_cell.BasicLSTMCell(5,state_is_tuple=True)
with tf.variable_scope('sen'):
outputs, states = tf.nn.rnn(lstm, x, dtype=tf.float32)
with tf.variable_scope('par'):
output2, states2 = tf.nn.dynamic_rnn(lstm, x0, dtype=tf.float32,time_major = True)
with tf.variable_scope('sen2'):
outputs3, states3 = tf.nn.rnn(lstm, x, dtype=tf.float32)
with tf.Session() as sess:
for i in range(3):
sess.run(tf.initialize_all_variables())
result1,result2, result3 = sess.run([outputs[-1],output2[-1],outputs3[-1]],{x0:X})
print result1
print '---------------------------------------'
print result2
print '---------------------------------------'
print result3
print '------------------------------------------------------------------------------'
I think result1, result2 and result3 should always be the same. But they don't equal to each other. And result2 changes each time I run the function. What is the problem?
The problem is despite you are using single LSTM cell, you created 3 RNN's within different variable scopes, so they can't share parameters. Consider printing all of the trainable variables to see that:
for var in tf.trainable_variables():
print var.name
For explicit use of the same parameters, try scope.reuse_variables() where scope is the same scope for different outputs.
I've come up with the following:
import numpy as np
import tensorflow as tf
from tensorflow.python.ops import rnn, rnn_cell
if __name__ == '__main__':
np.random.seed(1234)
X = np.array(np.array(range(1,121)).reshape(4, 6, 5), dtype = float)
x0 = tf.placeholder(tf.float32, [4, 6, 5])
x = tf.reshape(x0, [-1, 5])
x = tf.split(0, 4, x)
with tf.variable_scope('lstm') as scope:
lstm = tf.nn.rnn_cell.BasicLSTMCell(5, state_is_tuple = True)
outputs, states = tf.nn.rnn(lstm, x, dtype = tf.float32)
scope.reuse_variables()
outputs2, states2 = tf.nn.dynamic_rnn(lstm, x0, dtype=tf.float32,time_major = True)
outputs3, states3 = tf.nn.rnn(lstm, x, dtype=tf.float32)
print(outputs3)
with tf.Session() as sess:
init = tf.initialize_all_variables()
sess.run(init)
for var in tf.trainable_variables():
print var.name
for i in range(3):
result1, result2, result3 = sess.run([outputs, outputs2, outputs3], feed_dict = {x0: X})
print result1
print '---------------------------------------'
print result2
print '---------------------------------------'
print result3
print '---------------------------------------'
Seems to work just fine.