Error of initialization occurs even after global initialization
The error about initialization is this:
FailedPreconditionError: Attempting to use uninitialized value biases
[[Node: biases/read = IdentityT=DT_FLOAT, _class=["loc:#Adagrad/update_biases/ApplyAdagrad"], _device="/job:localhost/replica:0/task:0/device:CPU:0"]]
import functools
def lazy_property(function):
attribute = '_cache_' + function.__name__
#property
#functools.wraps(function)
def decorator(self):
if not hasattr(self, attribute):
setattr(self, attribute, function(self))
return getattr(self, attribute)
return decorator
class Model:
def __init__(self, data, target):
self.data = data
self.target = target
self._logits = None
self._prediction = None
self._optimize = None
self._error = None
#lazy_property
def logits(self):
w = tf.Variable(tf.truncated_normal([784, 1]), name='weights')
b = tf.Variable(tf.zeros([1]), name='biases')
self._logits = tf.matmul(self.data, w) + b
return self._logits
#lazy_property
def prediction(self):
self._prediction = tf.nn.softmax(self.logits)
return self._prediction
#lazy_property
def optimize(self):
labels = tf.to_int64(self.target)
logits = self.prediction
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels, name='xentropy')
loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')
self._optimize = tf.train.AdagradOptimizer(0.05).minimize(loss)
return self._optimize
#lazy_property
def error(self):
mistakes = tf.not_equal(tf.argmax(self.target, 1), tf.argmax(self.prediction, 1))
return tf.reduce_mean(tf.cast(mistakes, tf.float32))
batch_size = 100
num_steps = 1000
tf.reset_default_graph()
data = MNIST(data_dir="data/MNIST/")
X = tf.placeholder(tf.float32, [batch_size, 784], name='Placeholder_Input')
Y = tf.placeholder(tf.int64, [batch_size], name='Placeholder_Output')
model = Model(X, Y)
with tf.Session() as session:
session.run(tf.global_variables_initializer())
for step in range(num_steps):
model = Model(X,Y)
for _ in range(100):
x_batch, y_true_batch, _ = data.random_batch(batch_size=batch_size)
y_true_batch = np.argmax(y_true_batch, axis=1)
error,_ = session.run(model.optimize, feed_dict={X: x_batch, Y: y_true_batch})
if (step % 100 == 0):
print("Error rate # iter %d : %f" % (step, error))
You should run session.run(tf.global_variables_initializer()) once the model is fully defined. Note that you are defining a new model at each step, and the variables are only being instantiated when you call model.optimize. Here is my recommendation:
model = Model(X,Y)
optimize = model.optimize
with tf.Session() as session:
session.run(tf.global_variables_initializer())
for step in range(num_steps):
for _ in range(100):
x_batch, y_true_batch, _ = data.random_batch(batch_size=batch_size)
y_true_batch = np.argmax(y_true_batch, axis=1)
error,_ = session.run(optimize, feed_dict={X: x_batch, Y: y_true_batch})
if (step % 100 == 0):
print("Error rate # iter %d : %f" % (step, error))
Related
I am learning a simple model to perform a linear regression and then I save the model
class NN(tf.keras.Model):
def __init__(self):
super(NN, self).__init__()
L = 20
self.W1 = tf.Variable(tf.random.truncated_normal([1, L], stddev=math.sqrt(3)))
self.B1 = tf.Variable(tf.random.truncated_normal([1, L], stddev=1.0))
self.W2 = tf.Variable(tf.random.truncated_normal([L, 1], stddev=math.sqrt(3/L)))
self.B2 = tf.Variable(tf.zeros([1]))
def call(self, inputs):
Z1 = tf.matmul(inputs, self.W1) + self.B1
Y1 = tf.nn.tanh(Z1)
Y = tf.matmul(Y1, self.W2) + self.B2
return Y
# The loss function to be optimized
def loss(model, X, Y_):
error = model(X) - Y_
return tf.reduce_mean(tf.square(error))
model = NN()
optimizer = tf.optimizers.Adam(learning_rate=0.001)
bsize = 20
# You can call this function in a loop to train the model, bsize samples at a time
def training_step(i):
# read data
x_batch, y_batch = func.next_batch(bsize)
x_batch = np.reshape(x_batch, (bsize,1))
y_batch = np.reshape(y_batch, (bsize,1))
# compute training values
loss_fn = lambda: loss(model, x_batch, y_batch)
optimizer.minimize(loss_fn, [model.W1, model.B1, model.W2, model.B2])
if i%5000 == 0:
l = loss(model, x_batch, y_batch)
print(str(i) + ": epoch: " + str(func._epochs_completed) + ": loss: " + str(l.numpy()))
for i in range(50001):
training_step(i)
# save the model
tf.saved_model.save(model, "my_file")
and then I am trying to load the model with the following lines following tensorflow documentation:
model = tf.saved_model.load("my_file")
f = model.signatures["serving_default"]
y = f(x)
However I get the following error message:
f = model.signatures["serving_default"]
File "my_file/signature_serialization.py", line 195, in __getitem__
return self._signatures[key]
KeyError: 'serving_default'
What is wrong ? Why serving_default is not defined ?
I solved the problem by adding a third argument to the tf.saved_model.save function
tf.saved_model.save(model, "myfile", signatures=model.call.get_concrete_function(tf.TensorSpec(shape=[None,1], dtype=tf.float32, name="inp")))
and by adding the #tf.function above the call method
class NN(tf.keras.Model):
def __init__(self):
super(NN, self).__init__()
L = 20
self.W1 = tf.Variable(tf.random.truncated_normal([1, L], stddev=math.sqrt(3)))
self.B1 = tf.Variable(tf.random.truncated_normal([1, L], stddev=1.0))
self.W2 = tf.Variable(tf.random.truncated_normal([L, 1], stddev=math.sqrt(3/L)))
self.B2 = tf.Variable(tf.zeros([1]))
#tf.function
def call(self, X):
Z1 = tf.matmul(X, self.W1) + self.B1
Y1 = tf.nn.tanh(Z1)
Y = tf.matmul(Y1, self.W2) + self.B2
return Y
I am new to tensorflow and trying to create a simple MLP. My model is running fine but is not giving the desired performance. I tried to create summaries but am now getting this error:
FailedPreconditionError: GetNext() failed because the iterator has not been initialized. Ensure that you have run the initializer operation for this iterator before getting the next element.
My code:
def fc_layer(input, channels_in,channels_out, name = "fc"):
with tf.name_scope(name):
W = tf.Variable(tf.zeros([channels_in, channels_out]), name="weights")
clip_op = tf.assign(W, tf.clip_by_norm(W, 1, axes = None))
b = tf.Variable(tf.zeros([channels_out]), name="biases")
act = tf.matmul(input, W) + b
tf.summary.histogram("weights", W)
tf.summary.histogram("biases", b)
tf.summary.histogram("activations", act)
return act
# Setup placeholders, and reshape the data
y = tf.placeholder(tf.float32, shape=[None,128], name = 'y')
x = tf.placeholder(tf.float32, shape=[None,256], name = 'x')
dataset = tf.data.Dataset.from_tensor_slices((y, x)).batch(batch_size).repeat()
iter = dataset.make_initializable_iterator()
input_features, output_features = iter.get_next()
fc_1 = tf.nn.relu(fc_layer(input_features, 128,512, name = "fc1"))
fc_2 = tf.nn.relu(fc_layer(fc_1, 512,256, name = "fc1"))
out_layer = fc_layer(fc_2, 256,256, name = "out")
with tf.name_scope('loss'):
loss_op =
tf.sqrt(tf.reduce_mean(tf.squared_difference(out_layer,output_features)))
tf.summary.scalar("loss", loss_op)
with tf.name_scope('train'):
train_op =
tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss_op)
#Summary writer
merged_summary = tf.summary.merge_all()
writer = tf.summary.FileWriter(r'C:\Users\Jaweria\Documents\Code_logs',
graph=tf.get_default_graph())
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
# initialise iterator with train data
sess.run(iter.initializer, feed_dict={ y: train_data[0], x: train_data[1], batch_size: Batch_Size})
print('Training...')
for i in range(training_epochs):
tot_loss = 0
for _ in range(n_batches):
_, loss_value = sess.run([train_op, loss_op])
tot_loss += loss_value
s = sess.run(merged_summary)
writer.add_summary(s,i*n_batches+ _)
print("Iter: {}, Loss: {:.4f}".format(i, tot_loss / n_batches))
# initialise iterator with test data
sess.run(iter.initializer, feed_dict={ y: test_data[0], x: test_data[1],
batch_size: test_data[0].shape[0]})
print('Test Loss: {:4f}'.format(sess.run(loss_op)))
This is my code:
def next_batch(num, data, labels, length):
idx = np.arange(0 , length)
np.random.shuffle(idx)
idx = idx[:num]
data_shuffle = []
labels_shuffle = []
for i in idx:
data_shuffle.append(data[i])
labels_shuffle.append(labels[i])
a = np.asarray(data_shuffle)
b = np.zeros((num,len(labels[0]),9))
for i in range(0, num):
for j in range(0, len(labels[i])):
b[i][j][labels_shuffle[i][j]] = 1
return np.asarray(data_shuffle), b
and next_Batch function would be called here:
def train_step(x_batch, y_batch):
feed_dict = {
input_x: x_batch,
input_y: y_batch,
dropout_keep_prob: dropout_keep_prob
}
_, step, loss, accuracy = sess.run(
[train_op, global_step, losses, accuracys],
feed_dict=feed_dict)
...
step = 0
while step < num_epochs:
x_batch, y_batch = next_batch(batch_size,training_x, training_y, training_prot_num)
v_x_batch, v_y_batch = next_batch(batch_size, validation_x, validation_y, validation_prot_num)
train_step(x_batch, y_batch)
currenct_step = tf.train.global_step(sess, gloabl_step)
if currect_step % evaluate_every == 0:
print("\nEvaluation:")
dev_step(v_x_batch,v_y_batch)
print("")
And I got an error message :
in sess.run() of train_step ->
TypeError: The value of a feed cannot be a tf.Tensor object.
I don't know why there is an error in this code. next_batch function would return numpy array, so I think there would be no problem.
It looks like you have maybe set dropout_keep_prob = tf.constant(..) instead of a float value.
I write a simple code to do something like word2vec, but when training, I see the cross-entropy loss increases at each epochs beginning, then decrease. please help me find out if there is any mistake in my code, I have already reviewed many times...
from tensorflow.python import debug as tf_debug
import math
import os
import time
import random
import numpy as np
import tensorflow as tf
def gen_next_block(filenames,epochs):
for epoch in range(epochs):
for filename in filenames:
with open(filename) as f:
start = time.time()
line_cnt = 0
data = []
for line in f:
record = line.strip().split(',')
record = [int(record[0]), int(record[1]), float(record[2])] + [int(item) for item in record[3].split(';')]
record = record[:3] + [record[3 + epoch],]
data.append(record)
line_cnt += 1
if line_cnt % 4096000 == 0:
end = time.time()
elapsed_time = (end - start) * 1000
print("load block data: epoch %d, filename %s line_cnt %d, size %d, elapsed time %f ms" % (epoch, filename, line_cnt, len(data), elapsed_time))
random.shuffle(data)
yield data
data = []
start = time.time()
if len(data) > 0:
end = time.time()
elapsed_time = (end - start) * 1000
print("load block data: epoch %d, filename %s line_cnt %d, size %d, elapsed time %f ms" % (epoch, filename, line_cnt, len(data), elapsed_time))
random.shuffle(data)
yield data
data = None
next_block_generator = None
data_index = 0
last_time_data_index = 0
def generate_batch(filenames, epochs, batch_size):
global data
global data_index
global last_time_data_index
global next_block_generator
if next_block_generator is None:
next_block_generator = gen_next_block(filenames,epochs)
if data_index <= last_time_data_index:
data = next(next_block_generator,None)
data_index = 0
last_time_data_index = 0
if data is not None:
last_time_data_index = data_index
batch = np.ndarray(shape=(batch_size), dtype=np.int32)
labels = np.ndarray(shape=(batch_size), dtype=np.int32)
negative_labels = np.ndarray(shape=(batch_size), dtype=np.int32)
weights = np.ndarray(shape=(batch_size), dtype=np.float32)
negative_weights = np.ones(shape=(batch_size), dtype=np.float32)
for i in range(batch_size):
batch[i] = data[data_index][0]
labels[i] = data[data_index][1]
weights[i] = data[data_index][2]
negative_labels[i] = data[data_index][3]
data_index = (data_index + 1) % len(data)
return batch, labels, negative_labels, weights, negative_weights
else:
raise Exception("finish load file list [%s] %d times" % (','.join(filenames),epochs))
filename = 'data/dr_xianyu_item2vec_train_with_meta_20170725_dir/dr_xianyu_item2vec_train_with_meta_20170725_dir_'
filenames = [filename + str(i) for i in range(10)]
epochs = 5
batch_size = 2048
embedding_size = 32 # Dimension of the embedding vector.
num_sampled = batch_size # Number of negative examples to sample.
vocabulary_size = 7483025 + 1
graph = tf.Graph()
with graph.as_default():
with tf.device('/cpu:0'):
with tf.name_scope('input_data'):
train_inputs = tf.placeholder(tf.int32, shape=[batch_size], name = 'context_placeholder')
positive_labels = tf.placeholder(tf.int32, shape=[batch_size], name = 'target_placeholder')
negative_labels = tf.placeholder(tf.int32, shape=[num_sampled], name = 'negative_target_placeholder')
positive_weights = tf.placeholder(tf.float32, shape=([batch_size]), name = 'target_weight')
negative_weights = tf.placeholder(tf.float32, shape=([num_sampled]), name = 'negative_target_weight')
with tf.name_scope('emb_layer'):
embeddings = tf.Variable(
tf.random_uniform([vocabulary_size, embedding_size], -0.5/embedding_size, 0.5/embedding_size), name = 'emb')
embed = tf.nn.embedding_lookup(embeddings, train_inputs)
with tf.name_scope("neg_layer"):
nce_weights = tf.Variable(tf.random_uniform([vocabulary_size, embedding_size], -0.5/embedding_size, 0.5/embedding_size), name = 'nce_weight')
nce_biases = tf.Variable(tf.zeros([vocabulary_size]), name = 'nce_biase')
positive_embed = tf.nn.embedding_lookup(nce_weights,positive_labels)
positive_bias = tf.nn.embedding_lookup(nce_biases,positive_labels)
negative_embed = tf.nn.embedding_lookup(nce_weights,negative_labels)
negative_bias = tf.nn.embedding_lookup(nce_biases,negative_labels)
positive_logits = tf.reduce_sum(tf.multiply(embed,positive_embed),1) + positive_bias
negative_logits = tf.reduce_sum(tf.multiply(embed,negative_embed),1) + negative_bias
with tf.name_scope('loss_layer'):
positive_xent = tf.nn.sigmoid_cross_entropy_with_logits(labels = tf.ones_like(positive_logits), logits = positive_logits)
negative_xent = tf.nn.sigmoid_cross_entropy_with_logits(labels = tf.zeros_like(negative_logits), logits = negative_logits)
weighted_positive_logits = tf.multiply(positive_logits,positive_weights)
weighted_negative_logits = tf.multiply(negative_logits,negative_weights)
loss = (tf.reduce_sum(positive_xent) + tf.reduce_sum(negative_xent)) /(batch_size*2)
with tf.name_scope('train'):
optimizer = tf.train.RMSPropOptimizer(0.001).minimize(loss)
# global_step = tf.Variable(0, trainable=False)
# starter_learning_rate = 0.1
# learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, 20000, 0.8, staircase=True)
# optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss,global_step=global_step)
init = tf.global_variables_initializer()
init_local = tf.local_variables_initializer()
tf.summary.scalar('loss_layer/loss', loss)
for var in tf.trainable_variables():
tf.summary.histogram(var.op.name, var)
summary_op = tf.summary.merge_all()
saver = tf.train.Saver()
with tf.Session(graph=graph) as sess:
# sess = tf_debug.LocalCLIDebugWrapperSession(sess)
# sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan)
for v in tf.global_variables():
print(v.name,v.device,v.shape)
for v in tf.local_variables():
print(v.name,v.device,v.shape)
# if os.path.exists('tmp/model.ckpt.meta'):
# saver = tf.train.import_meta_graph('tmp/model.ckpt.meta')
# saver.restore(sess,tf.train.latest_checkpoint('tmp/'))
# print("model restored")
# else:
if True:
init.run()
init_local.run()
print("model init")
summary_writer = tf.summary.FileWriter('tmp/log', sess.graph)
average_loss = 0
start = time.time()
step = 1
try:
while True:
batch_inputs, batch_labels, batch_negative_labels, positive_weights_np, negative_weights_np = generate_batch(filenames, epochs,batch_size)
feed_dict = {train_inputs: batch_inputs, positive_labels: batch_labels, negative_labels: batch_negative_labels, positive_weights:positive_weights_np, negative_weights:negative_weights_np}
if step%1000 == 0:
loss_val,summary_str,_ = sess.run([loss, summary_op, optimizer], feed_dict=feed_dict)
summary_writer.add_summary(summary_str,step)
else:
loss_val,_ = sess.run([loss, optimizer], feed_dict=feed_dict)
average_loss += loss_val
if step % 1000 == 0:
average_loss /= 1000
end = time.time()
elapsed_time = (end - start)*1000 / 1000
print('Average loss at step ', step, ': ', average_loss, 'time cost', elapsed_time, 'ms')
average_loss = 0
start = time.time()
if step % 20000 == 0:
print('save model...')
save_path = saver.save(sess,'tmp/model.ckpt')
print("saved model in",save_path)
step +=1
except Exception,e:
print e
print("total batch count %d" % step)
summary_writer.flush()
there is my loss
the first pic is sgd generated in 5 epoch
the second pic is RMSProp generated in 2 epoch(still running)
I am training an RNN-based language-model using Tensorflow. The model is very similar to the PTB model example in the TF tutorials section. However, when I attempt to train the model on my own data, the perplexity of the model does not go down; it remains constant throughout multiple epochs. Could anyone let me know what I might be doing wrong.
I have a feeling that I am not handling the targets properly, but the gist of my code for the targets is:
def batcher(batch_size,unroll_steps,data,pad):
print(len(data))
batches = len(data) / batch_size
inp = []
target = []
for i in range(batches):
#print(len(data[i*batch_size:(i+1)*batch_size]))
x = data[i*batch_size:(i+1)*batch_size]
y = [ line[1:]+[pad] for line in x ]
yield (x,y)
That is, I just shift the data by 1 and use that as the target for the next word in a sentence.
The training script and model (class) are seen below
Training script (excerpt):
def train(session, model, folder,batch_size,unroll_steps,epoch):
word_to_id, id_to_word, train, val = build_inputs(folder,unroll_steps)
pad = word_to_id['<pad>']
costs = 0
iters = 0
train_size = len(train)
batch_size = model.batch_size
batches = train_size / batch_size
state = session.run(model._initial_state)
print("Running epoch %d" % epoch)
for i in range(batches):
fetches = [model.cost, model._final_state, model.logits]
feed_dict = {}
x = train[i*batch_size:(i+1)*batch_size]
y = [ line[1:] +[pad] for line in x ]
feed_dict[model.input] = x
feed_dict[model.targets] = y
feed_dict[model._initial_state] = state
#print("Cell-state complete - Running")
cost, state, logits = session.run(fetches, feed_dict)
#print("Single Run complete")
costs += cost
iters += model.unroll_steps
print("\tEpoch %d: Perplexity is %f" % (epoch, np.exp(costs/iters)))
return np.exp(costs/iters)
Model:
import tensorflow as tf
class LM(object):
def __init__(self, train, max_gradient, batch_size, unroll_steps, vocab, size, layers, learning_rate, init, prob):
self.batch_size = batch_size
self.max_gradient = max_gradient
self.layers = layers
self.learning_rate = learning_rate
self.unroll_steps = unroll_steps
self.init = init
#with tf. name_scope("Paramters"):
with tf.device('/gpu:0'), tf.name_scope("Input"):
self.input = tf.placeholder(tf.int64, shape=[batch_size, unroll_steps], name="input")
self.targets = tf.placeholder(tf.int64, shape=[batch_size, unroll_steps], name="targets")
#self.init = tf.placeholder(tf.float32, shape=[], name="init")
with tf.device('/gpu:0'), tf.name_scope("Embedding"):
embedding = tf.Variable(tf.random_uniform([vocab, size], -self.init, self.init), dtype=tf.float32, name="embedding")
embedded_input = tf.nn.embedding_lookup(embedding, self.input, name="embedded_input")
with tf.device('/gpu:0'), tf.name_scope("RNN"), tf.variable_scope(tf.get_variable_scope(), reuse = False) as scope:
lstm_cell = tf.contrib.rnn.BasicLSTMCell(size, forget_bias=0.0, state_is_tuple=True)
if train and prob < 1.0:
lstm_cell = tf.contrib.rnn.DropoutWrapper(lstm_cell, output_keep_prob=prob)
cell = tf.contrib.rnn.MultiRNNCell([lstm_cell for _ in range(layers)], state_is_tuple=True)
self._initial_state = cell.zero_state(batch_size, tf.float32)
outputs = []
state = self._initial_state
for step in range(unroll_steps):
if step > 0: tf.get_variable_scope().reuse_variables()
(cell_output, state) = cell(embedded_input[:, step, :], state)
outputs.append(cell_output)
with tf.device('/gpu:0'), tf.name_scope("Cost"), tf.variable_scope(tf.get_variable_scope(), reuse = False) as scope:
output = tf.reshape(tf.concat(outputs,1), [-1,size])
softmax_w = tf.get_variable("softmax_w", [size, vocab], dtype=tf.float32)
softmax_b = tf.get_variable("softmax_b", [vocab], dtype=tf.float32)
logits = tf.matmul(output, softmax_w) + softmax_b
losses = []
for logit, target in zip([logits], [tf.reshape(self.targets,[-1])]):
target = tf.reshape(target, [-1])
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logit,labels=target)
losses.append(loss)
self.cost = tf.reduce_sum(losses) / batch_size
self._final_state = state
self.logits = logits
scope.reuse_variables()
if not train:
return
with tf.device('/gpu:0'), tf.name_scope("Train"), tf.variable_scope(tf.get_variable_scope(), reuse=False):
train_variables = tf.trainable_variables()
gradients, _ = tf.clip_by_global_norm(tf.gradients(self.cost, train_variables),self.max_gradient)
optimizer = tf.train.AdamOptimizer(self.learning_rate)
self.training = optimizer.apply_gradients(zip(gradients, train_variables))
tf.get_variable_scope().reuse_variables()