I am new to tensorflow and trying to create a simple MLP. My model is running fine but is not giving the desired performance. I tried to create summaries but am now getting this error:
FailedPreconditionError: GetNext() failed because the iterator has not been initialized. Ensure that you have run the initializer operation for this iterator before getting the next element.
My code:
def fc_layer(input, channels_in,channels_out, name = "fc"):
with tf.name_scope(name):
W = tf.Variable(tf.zeros([channels_in, channels_out]), name="weights")
clip_op = tf.assign(W, tf.clip_by_norm(W, 1, axes = None))
b = tf.Variable(tf.zeros([channels_out]), name="biases")
act = tf.matmul(input, W) + b
tf.summary.histogram("weights", W)
tf.summary.histogram("biases", b)
tf.summary.histogram("activations", act)
return act
# Setup placeholders, and reshape the data
y = tf.placeholder(tf.float32, shape=[None,128], name = 'y')
x = tf.placeholder(tf.float32, shape=[None,256], name = 'x')
dataset = tf.data.Dataset.from_tensor_slices((y, x)).batch(batch_size).repeat()
iter = dataset.make_initializable_iterator()
input_features, output_features = iter.get_next()
fc_1 = tf.nn.relu(fc_layer(input_features, 128,512, name = "fc1"))
fc_2 = tf.nn.relu(fc_layer(fc_1, 512,256, name = "fc1"))
out_layer = fc_layer(fc_2, 256,256, name = "out")
with tf.name_scope('loss'):
loss_op =
tf.sqrt(tf.reduce_mean(tf.squared_difference(out_layer,output_features)))
tf.summary.scalar("loss", loss_op)
with tf.name_scope('train'):
train_op =
tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss_op)
#Summary writer
merged_summary = tf.summary.merge_all()
writer = tf.summary.FileWriter(r'C:\Users\Jaweria\Documents\Code_logs',
graph=tf.get_default_graph())
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
# initialise iterator with train data
sess.run(iter.initializer, feed_dict={ y: train_data[0], x: train_data[1], batch_size: Batch_Size})
print('Training...')
for i in range(training_epochs):
tot_loss = 0
for _ in range(n_batches):
_, loss_value = sess.run([train_op, loss_op])
tot_loss += loss_value
s = sess.run(merged_summary)
writer.add_summary(s,i*n_batches+ _)
print("Iter: {}, Loss: {:.4f}".format(i, tot_loss / n_batches))
# initialise iterator with test data
sess.run(iter.initializer, feed_dict={ y: test_data[0], x: test_data[1],
batch_size: test_data[0].shape[0]})
print('Test Loss: {:4f}'.format(sess.run(loss_op)))
Related
I'm trying to train a GAN using tensorflow, but during graph construction I'm getting this error:
ValueError: Input 0 of layer conv1_1 is incompatible with the layer: its rank is undefined, but the layer requires a defined rank.
I'm guessing this is because my discriminator and generator are within different functions which I'm calling. I would ideally want to avoid rolling my entire architecture out of methods, as this would lead to a tediously-long python file.
I've tried using a default value for the placeholders before the first training examples are pushed through a sess.run() call, but this led to the same example being run through the graph at each stage of training (probably because that's how tensorflow constructed the graph)
My training loop code is below. Please let me know if seeing the generator and discriminator functions themselves would help.
img1 = tf.placeholder(dtype = tf.float32)
img2 = tf.placeholder(dtype = tf.float32)
lr = tf.placeholder(dtype = tf.float32)
synthetic_imgs, synthetic_logits, mse, _ = self.gen(img1)
fake_result, fake_logits_hr, fake_feature_2 = self.disc(img1, synthetic_imgs)
ground_truth_result, ground_truth_logits_hr, truth_feature_2 = self.disc(img1, img2)
_, fake_logits_lr = self.disc_two(img1, synthetic_imgs)
_, ground_truth_logits_lr = self.disc_two(img1, img2)
a = tf.nn.sigmoid_cross_entropy_with_logits
dis_labels = tf.random.uniform((self.batch_size, 1), minval = -0.2, maxval = 0.3)
gen_labels = tf.random.uniform((self.batch_size, 1), minval = 0.75, maxval = 1.2)
dis_loss = #Discriminator Loss
gen_loss = #Generator Loss
#May want to change to -log(MSE)
d_vars = [var for var in tf.trainable_variables() if 'disc' in var.name]
g_vars = [var for var in tf.trainable_variables() if 'g_' in var.name]
dis_loss = tf.reduce_mean(dis_loss)
gen_loss = tf.reduce_mean(gen_loss)
with tf.variable_scope('optimizers', reuse = tf.AUTO_REUSE) as scope:
gen_opt = tf.train.AdamOptimizer(learning_rate = lr, name = 'gen_opt')
disc_opt = tf.train.AdamOptimizer(learning_rate = lr, name = 'dis_opt')
gen1 = gen_opt.minimize(gen_loss, var_list = g_vars)
disc1 = disc_opt.minimize(dis_loss, var_list = d_vars)
#Tensorboard code for visualizing gradients
#global_step = variable_scope.get_variable("global_step", [], trainable=False, dtype=dtypes.int64, initializer=init_ops.constant_initializer(0, dtype=dtypes.int64))
# gen_training = tf.contrib.layers.optimize_loss(gen_loss, global_step, learning_rate=lr, optimizer=gen_opt, summaries=["gradients"], variables = g_vars)
# disc_training = tf.contrib.layers.optimize_loss(dis_loss, global_step, learning_rate=lr, optimizer=disc_opt, summaries=["gradients"], variables = d_vars)
#summary = tf.summary.merge_all()
with tf.Session() as sess:
print('start session')
sess.run(tf.global_variables_initializer())
#print(tf.trainable_variables()) #Find variable corresponding to conv filter weights, which you can use for tensorboard visualization
#Code to load each training example
gen = self.pairs()
for i in range(self.num_epochs):
print(str(i+ 1) + 'th epoch')
for j in range(self.num_batches):
i_1 = None
i_2 = None
#Crektes batch
for k in range(self.batch_size):
p = next(gen)
try:
i_1 = np.concatenate((i1, self.load_img(p[0])), axis = 0)
i_2 = np.concatenate((i2, self.load_img(p[1])), axis = 0)
except Exception:
i_1 = self.load_img(p[0])
i_2 = self.load_img(p[1])
l_r = 8e-4 * (0.5)**(i//100) #Play around with this value
test, gLoss, _ = sess.run([img1, gen_loss, gen1], feed_dict = {i1 : i_1, i2 : i_2, learn_rate : l_r})
dLoss, _ = sess.run([dis_loss, disc1], feed_dict = {i1 : i_1, i2 : i_2, learn_rate : l_r})
print(test.shape)
cv2.imwrite('./saved_imgs/gan_test'+str(j)+'.png', np.squeeze(test, axis = 3)[0])
#Code to display gradients and other relevant stats on tensorboard
#Will be under histogram tab, labelled OptimizeLoss
# if j%500 == 0:
# writer = tf.summary.FileWriter(sess.graph, logdir = './tensorboard/1') #Change logdir for each run
# summary_str = sess.run(summary, feed_dict = {i1 : i_1, i2 : i_2, learn_rate : l_r})
# writer.add_summmary(summary_str, str(i)+': '+str(j))#Can change to one epoch only if necessary
# writer.flush()
if j % 12 == 0: #Prints loss statistics every 12th batch
#print('Epoch: '+str(i))
print('Generator Loss: '+str(gLoss))
print('Discriminator Loss: '+str(dLoss))
self.save_model(sess, i)
Error of initialization occurs even after global initialization
The error about initialization is this:
FailedPreconditionError: Attempting to use uninitialized value biases
[[Node: biases/read = IdentityT=DT_FLOAT, _class=["loc:#Adagrad/update_biases/ApplyAdagrad"], _device="/job:localhost/replica:0/task:0/device:CPU:0"]]
import functools
def lazy_property(function):
attribute = '_cache_' + function.__name__
#property
#functools.wraps(function)
def decorator(self):
if not hasattr(self, attribute):
setattr(self, attribute, function(self))
return getattr(self, attribute)
return decorator
class Model:
def __init__(self, data, target):
self.data = data
self.target = target
self._logits = None
self._prediction = None
self._optimize = None
self._error = None
#lazy_property
def logits(self):
w = tf.Variable(tf.truncated_normal([784, 1]), name='weights')
b = tf.Variable(tf.zeros([1]), name='biases')
self._logits = tf.matmul(self.data, w) + b
return self._logits
#lazy_property
def prediction(self):
self._prediction = tf.nn.softmax(self.logits)
return self._prediction
#lazy_property
def optimize(self):
labels = tf.to_int64(self.target)
logits = self.prediction
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels, name='xentropy')
loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')
self._optimize = tf.train.AdagradOptimizer(0.05).minimize(loss)
return self._optimize
#lazy_property
def error(self):
mistakes = tf.not_equal(tf.argmax(self.target, 1), tf.argmax(self.prediction, 1))
return tf.reduce_mean(tf.cast(mistakes, tf.float32))
batch_size = 100
num_steps = 1000
tf.reset_default_graph()
data = MNIST(data_dir="data/MNIST/")
X = tf.placeholder(tf.float32, [batch_size, 784], name='Placeholder_Input')
Y = tf.placeholder(tf.int64, [batch_size], name='Placeholder_Output')
model = Model(X, Y)
with tf.Session() as session:
session.run(tf.global_variables_initializer())
for step in range(num_steps):
model = Model(X,Y)
for _ in range(100):
x_batch, y_true_batch, _ = data.random_batch(batch_size=batch_size)
y_true_batch = np.argmax(y_true_batch, axis=1)
error,_ = session.run(model.optimize, feed_dict={X: x_batch, Y: y_true_batch})
if (step % 100 == 0):
print("Error rate # iter %d : %f" % (step, error))
You should run session.run(tf.global_variables_initializer()) once the model is fully defined. Note that you are defining a new model at each step, and the variables are only being instantiated when you call model.optimize. Here is my recommendation:
model = Model(X,Y)
optimize = model.optimize
with tf.Session() as session:
session.run(tf.global_variables_initializer())
for step in range(num_steps):
for _ in range(100):
x_batch, y_true_batch, _ = data.random_batch(batch_size=batch_size)
y_true_batch = np.argmax(y_true_batch, axis=1)
error,_ = session.run(optimize, feed_dict={X: x_batch, Y: y_true_batch})
if (step % 100 == 0):
print("Error rate # iter %d : %f" % (step, error))
Here is my code:
def conv_pooling(data, sequence_length, filter_size, embedding_size, num_filters):
filter_shape = [filter_size, embedding_size, 1, num_filters]
w = tf.Variable(tf.truncated_normal(filter_shape,stddev = 0.1),
name = "w")
b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name =
"b")
conv = tf.nn.conv2d(
item,
w,
strides = [1,1,1,1],
padding = "VALID",
name = "conv"
)
h = tf.nn.relu(tf.nn.bias_add(conv, b), name = "relu")
pooled = tf.nn.max_pool(
h,
ksize = [1,sequence_length - filter_size + 1, 1, 1],
strides = [1,1,1,1],
padding = "VALID",
name = "pool"
)
return pooled
init_op = tf.global_variables_initializer()
pooled_outputs = []
with tf.Session() as sess:
sess.run(init_op)
for i, filter_size in enumerate(filter_sizes):
pooled = sess.run(conv_pooling(data, sequence_length, filter_size, embedding_size, num_filters), feed_dict = {embedded_chars: item})
pooled_outputs.append(pooled)
This 'data' is a tf.Variable that use the global tf.placeholder 'embedded_chars', so don't worry about if it is working. The error happens because of w and b cannot be initialized.
I tried sess.run(tf.local_variables_initializer()) also, not work and return the same error. Does anyone know a way that I can initialized w and b here? As you see the size of w change in for loop.
Thank you!
See the code below. That's why #mikkola means about creating your graph before initialization.
// create your computation graph
pooled = conv_pooling(data, sequence_length, filter_size, embedding_size, num_filters)
// initialize the variables in the graph
init_op = tf.global_variables_initializer()
pooled_outputs = []
with tf.Session() as sess:
sess.run(init_op)
for i, filter_size in enumerate(filter_sizes):
// run the graph to get your output
output = sess.run([pooled], feed_dict = {embedded_chars: item})
pooled_outputs.append(output)
I have a working tensorflow model that I am trying to convert to using queues. It may not be the best function but it works.
The data comes in as a list(dict()) called 'rows' from a processing pipeline outside of TF in the form format [{'y1': 1, 'y2': 0, 'y3':1, 'y4':0, 'x1':...'x1182': 0}] (SPECIAL_FIELD_CHAR is 'y', meaning it's calculated from the 'xN' data). The features_outputs() just returns the xs and the ys as ['y1', 'y2', 'y3', 'y4'] and ['x1', ..., 'x1182']. The idea is that the xs determine the ys. There are 4 independent ys that are calculated per row of xs.
def train_rows(initial_weights, weights_filename, rows):
(features, outputs ) = features_outputs(rows[0].keys())
x_true = [ [float(row[feature]) for feature in features] for row in rows]
try:
y_true = [ [float(row[output]) for output in outputs] for row in rows ]
except Exception as e:
print [row[output] for output in outputs], e
w_true = np.random.rand(len(features), 1) # init weights
b_true = np.random.rand(1) # init bias
x_in = tf.placeholder(tf.float32, [None, len(features)], "x_in")
if initial_weights is None:
w = tf.Variable(tf.random_normal((len(features), len(outputs))), name="w")
b = tf.Variable(tf.constant(0.1, shape=[len(outputs)]), name="b")
else:
w = tf.Variable(weights['w'], name="w")
b = tf.Variable(weights['b'], name="b")
h = tf.add(tf.matmul(x_in, w), b, name="h")
y_in = tf.placeholder(tf.float32, [None, len(outputs)], "y_in")
loss_op = tf.reduce_mean(tf.square(tf.subtract(y_in, h)), name="loss")
#train_op = tf.train.AdamOptimizer(0.01).minimize(loss_op)
train_op = tf.train.GradientDescentOptimizer(0.3).minimize(loss_op)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
last_error = 1.7976931348623157e+308
this_error = 1.7976931348623157e+307
diff = 1
iteration = initial_weights['iteration'] if initial_weights is not None and 'iteration' in initial_weights else 0
while diff > 0:
iteration += 1
last_error = this_error
for step in range(1000):
sess.run(train_op, feed_dict={
x_in: x_true,
y_in: y_true
})
w_computed = sess.run(w)
b_computed = sess.run(b)
pred = tf.add(tf.matmul(x_in, w), b)
results = sess.run(pred, feed_dict={x_in: x_true})
error = tf.losses.mean_squared_error(y_true, results)
this_error = float(error.eval())
(diff, locs) = compare(y_true, results)
if locs < 50:
print "iteration:", iteration, "error:",this_error, "diff:", diff, "locs:", locs
This produces a model that converges. However with the queue based version it does not, and error increases rapidly:
def multithreaded_train_rows(initial_weights, weights_filename, rows):
(features, outputs ) = features_outputs(rows[0].keys())
x_true = np.array([ [float(row[feature]) for feature in features] for row in rows])
y_true = np.array([ [float(row[output]) for output in outputs] for row in rows ])
#queue
q = tf.FIFOQueue(capacity=len(rows), dtypes=tf.float32)
#enq_op = q.enqueue_many(x_true)
enq_op = q.enqueue_many(np.array( [ [float(row[f]) for f in sorted(row.keys())] for row in rows] ))
qr = tf.train.QueueRunner(q, [enq_op] * 1)
tf.train.add_queue_runner(qr)
keys = sorted(row.keys())
x_indices = np.array([[i] for i in range(len(keys)) if not keys[i].startswith(SPECIAL_FIELD_CHAR)])
y_indices = np.array([[i] for i in range(len(keys)) if keys[i].startswith(SPECIAL_FIELD_CHAR)])
input = q.dequeue()
x_in = tf.transpose(tf.gather(input, x_indices))
y_in = tf.gather(input, y_indices)
if initial_weights is None:
print 'Creating weights', len(x_indices), len(y_indices)
w = tf.Variable(tf.random_normal((len(x_indices), len(y_indices))), name="w")
b = tf.Variable(tf.constant(0.1, shape=[len(y_indices)]), name="b")
else:
print 'Using supplied weights', len(weights['w']), len(weights['w'][0])
w = tf.Variable(weights['w'], name="w")
b = tf.Variable(weights['b'], name="b")
y = tf.add(tf.matmul(x_in, w), b, name="y")
loss_op = tf.reduce_mean(tf.squared_difference(y_in, y), name="loss")
train_op = tf.train.GradientDescentOptimizer(0.3).minimize(loss_op)
print 'Starting session'
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
last_error = 1.7976931348623157e+308
this_error = 1.7976931348623157e+307
diff = 1
iteration = initial_weights['iteration'] if initial_weights is not None and 'iteration' in initial_weights else 0
while diff > 0:
iteration += 1
last_error = this_error
for step in range(100):
sess.run([train_op, loss_op])
w_computed = sess.run(w)
b_computed = sess.run(b)
pred = tf.add(tf.matmul(x_in, w), b)
results = sess.run(y, feed_dict={x_in: x_true})
error = tf.losses.mean_squared_error(y_true, results)
this_error = float(error.eval())
(diff, locs) = compare(y_true, results)
if locs < 50:
print "iteration:", iteration, "error:",this_error, "diff:", diff, "locs:", locs
coord.request_stop()
coord.join(threads)
They are meant to be the same, but I've had to change a few things:
1. Add a tf.transpose() to the x_in for the matmul()
2. Queue the entire row of xs and ys, then pull apart using tf.gather().
I've searched a lot for examples that match mine, and I can find no documentation on how to restart a queue and continue the training from the beginning. It'll seemly train forever(not sure why, who is replenishing the queue?) It'll also never stop.
But most of all I have no idea why given the exact same data, the first converges and the second does not?
None of the gather marshalling is needed. Queue only the inputs (x_true), then
evaluate y against y_true, not y_in
loss_op = tf.reduce_mean(tf.square(y - y_true), name="loss")
I am training an RNN-based language-model using Tensorflow. The model is very similar to the PTB model example in the TF tutorials section. However, when I attempt to train the model on my own data, the perplexity of the model does not go down; it remains constant throughout multiple epochs. Could anyone let me know what I might be doing wrong.
I have a feeling that I am not handling the targets properly, but the gist of my code for the targets is:
def batcher(batch_size,unroll_steps,data,pad):
print(len(data))
batches = len(data) / batch_size
inp = []
target = []
for i in range(batches):
#print(len(data[i*batch_size:(i+1)*batch_size]))
x = data[i*batch_size:(i+1)*batch_size]
y = [ line[1:]+[pad] for line in x ]
yield (x,y)
That is, I just shift the data by 1 and use that as the target for the next word in a sentence.
The training script and model (class) are seen below
Training script (excerpt):
def train(session, model, folder,batch_size,unroll_steps,epoch):
word_to_id, id_to_word, train, val = build_inputs(folder,unroll_steps)
pad = word_to_id['<pad>']
costs = 0
iters = 0
train_size = len(train)
batch_size = model.batch_size
batches = train_size / batch_size
state = session.run(model._initial_state)
print("Running epoch %d" % epoch)
for i in range(batches):
fetches = [model.cost, model._final_state, model.logits]
feed_dict = {}
x = train[i*batch_size:(i+1)*batch_size]
y = [ line[1:] +[pad] for line in x ]
feed_dict[model.input] = x
feed_dict[model.targets] = y
feed_dict[model._initial_state] = state
#print("Cell-state complete - Running")
cost, state, logits = session.run(fetches, feed_dict)
#print("Single Run complete")
costs += cost
iters += model.unroll_steps
print("\tEpoch %d: Perplexity is %f" % (epoch, np.exp(costs/iters)))
return np.exp(costs/iters)
Model:
import tensorflow as tf
class LM(object):
def __init__(self, train, max_gradient, batch_size, unroll_steps, vocab, size, layers, learning_rate, init, prob):
self.batch_size = batch_size
self.max_gradient = max_gradient
self.layers = layers
self.learning_rate = learning_rate
self.unroll_steps = unroll_steps
self.init = init
#with tf. name_scope("Paramters"):
with tf.device('/gpu:0'), tf.name_scope("Input"):
self.input = tf.placeholder(tf.int64, shape=[batch_size, unroll_steps], name="input")
self.targets = tf.placeholder(tf.int64, shape=[batch_size, unroll_steps], name="targets")
#self.init = tf.placeholder(tf.float32, shape=[], name="init")
with tf.device('/gpu:0'), tf.name_scope("Embedding"):
embedding = tf.Variable(tf.random_uniform([vocab, size], -self.init, self.init), dtype=tf.float32, name="embedding")
embedded_input = tf.nn.embedding_lookup(embedding, self.input, name="embedded_input")
with tf.device('/gpu:0'), tf.name_scope("RNN"), tf.variable_scope(tf.get_variable_scope(), reuse = False) as scope:
lstm_cell = tf.contrib.rnn.BasicLSTMCell(size, forget_bias=0.0, state_is_tuple=True)
if train and prob < 1.0:
lstm_cell = tf.contrib.rnn.DropoutWrapper(lstm_cell, output_keep_prob=prob)
cell = tf.contrib.rnn.MultiRNNCell([lstm_cell for _ in range(layers)], state_is_tuple=True)
self._initial_state = cell.zero_state(batch_size, tf.float32)
outputs = []
state = self._initial_state
for step in range(unroll_steps):
if step > 0: tf.get_variable_scope().reuse_variables()
(cell_output, state) = cell(embedded_input[:, step, :], state)
outputs.append(cell_output)
with tf.device('/gpu:0'), tf.name_scope("Cost"), tf.variable_scope(tf.get_variable_scope(), reuse = False) as scope:
output = tf.reshape(tf.concat(outputs,1), [-1,size])
softmax_w = tf.get_variable("softmax_w", [size, vocab], dtype=tf.float32)
softmax_b = tf.get_variable("softmax_b", [vocab], dtype=tf.float32)
logits = tf.matmul(output, softmax_w) + softmax_b
losses = []
for logit, target in zip([logits], [tf.reshape(self.targets,[-1])]):
target = tf.reshape(target, [-1])
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logit,labels=target)
losses.append(loss)
self.cost = tf.reduce_sum(losses) / batch_size
self._final_state = state
self.logits = logits
scope.reuse_variables()
if not train:
return
with tf.device('/gpu:0'), tf.name_scope("Train"), tf.variable_scope(tf.get_variable_scope(), reuse=False):
train_variables = tf.trainable_variables()
gradients, _ = tf.clip_by_global_norm(tf.gradients(self.cost, train_variables),self.max_gradient)
optimizer = tf.train.AdamOptimizer(self.learning_rate)
self.training = optimizer.apply_gradients(zip(gradients, train_variables))
tf.get_variable_scope().reuse_variables()