Tensorflow No gradients provided for any variable LSTM encoder-decoder - tensorflow

I'm new to Tensorflow and i'm trying to impletement LSTM encoder-decoder from scratch using tensorflow, according to this blog post:
https://medium.com/#shiyan/understanding-lstm-and-its-diagrams-37e2f46f1714
This is the code for the encoder (using tf.while_loop)
def decode_timestep(self, context, max_dec, result, C_t_d, H_t_d, current_index):
with tf.variable_scope('decode_scope', reuse=True):
W_f_d = tf.get_variable('W_f_d', (self.n_dim, self.n_dim), tf.float32)
U_f_d = tf.get_variable('U_f_d', (self.n_dim, self.n_dim), tf.float32)
# ...
# Decoder
# Forget Gate
f_t_d = tf.nn.sigmoid(
tf.add(tf.reduce_sum(tf.add(tf.matmul(context, W_f_d), tf.matmul(H_t_d, U_f_d))), b_f_d))
C_t_d_f = tf.multiply(C_t_d, f_t_d)
# Input Gate
# Part 1. New Memory Impaction
i_t_d = tf.nn.sigmoid(
tf.add(tf.reduce_sum(tf.add(tf.matmul(context, W_i_d), tf.matmul(H_t_d, U_i_d))), b_i_d))
# Part 2. Calculate New Memory
c_d_new = tf.nn.tanh(tf.add(tf.reduce_sum(tf.add(tf.matmul(context, W_c_d), tf.matmul(H_t_d, U_c_d))), b_c_d))
# Part 3. Update old Memory
C_t_d_i = tf.add(C_t_d_f, tf.multiply(i_t_d, c_d_new))
# Output Gate
o_t_d = tf.nn.sigmoid(
tf.add(tf.reduce_sum(tf.add(tf.matmul(context, W_o_d), tf.matmul(H_t_d, U_o_d))), b_o_d))
# Calculate new H_t
H_t_d_new = tf.multiply(o_t_d, tf.nn.tanh(C_t_d))
# Write the result of this timestep to the tensor array at pos current_index
result.write(tf.subtract(tf.subtract(max_dec, 1), current_index), H_t_d_new)
# Decrement the current_index by 1
index_next = tf.subtract(current_index, 1)
return context, max_dec, result, C_t_d_i, H_t_d_new, index_next
This is the code for the decoder:
def decode_timestep(self, context, max_dec, result, C_t_d, H_t_d, current_index):
# Same as above
# Calculate new H_t
H_t_d_new = tf.multiply(o_t_d, tf.nn.tanh(C_t_d))
# Write the result of this timestep to the tensor array at pos current_index
result.write(tf.subtract(tf.subtract(max_dec, 1), current_index), H_t_d_new)
# Decrement the current_index by 1
index_next = tf.subtract(current_index, 1)
return context, max_dec, result, C_t_d_i, H_t_d_new, index_next
And this is the code for the session:
with tf.variable_scope('encode_scope', reuse=True):
H_t_e = tf.get_variable('H_t_e')
C_t_e = tf.get_variable('C_t_e')
with tf.variable_scope('global_scope', reuse=True):
current_index = tf.get_variable('current_index', dtype=tf.int32)
context, _, C_t_e, H_t_e, current_index = tf.while_loop(self.encode_timestep_cond, self.encode_timestep,
[X_Sent, max_enc, C_t_e, H_t_e, current_index])
result = tf.TensorArray(tf.float32, size=max_dec)
_,_,result, C_t_e, H_t_e, current_index = tf.while_loop(self.decode_timestep_cond, self.decode_timestep,
[context, max_dec, result, C_t_e, H_t_e, current_index])
loss = tf.reduce_sum(tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(result.concat(), Y_Sent)), reduction_indices=1)))
train_step = tf.train.AdamOptimizer().minimize(loss)
The error is :
ValueError: No gradients provided for any variable, check your graph
for ops that do not support gradients, between variables
["", ...
Please help! This is the implementation that i think it should right after what i read and understand from that post. Thank you and sorry about my english!

Related

How to build a custom question-answering head when using hugginface transformers?

Using the TFBertForQuestionAnswering.from_pretrained() function, we get a predefined head on top of BERT together with a loss function that are suitable for this task.
My question is how to create a custom head without relying on TFAutoModelForQuestionAnswering.from_pretrained().
I want to do this because there is no place where the architecture of the head is explained clearly. By reading the code here we can see the architecture they are using, but I can't be sure I understand their code 100%.
Starting from How to Fine-tune HuggingFace BERT model for Text Classification is good. However, it covers only the classification task, which is much simpler.
'start_positions' and 'end_positions' are created following this tutorial.
So far, I've got the following:
train_dataset
# Dataset({
# features: ['input_ids', 'token_type_ids', 'attention_mask', 'start_positions', 'end_positions'],
# num_rows: 99205
# })
train_dataset.set_format(type='tensorflow', columns=['input_ids', 'token_type_ids', 'attention_mask'])
features = {x: train_dataset[x] for x in ['input_ids', 'token_type_ids', 'attention_mask']}
labels = [train_dataset[x] for x in ['start_positions', 'end_positions']]
labels = np.array(labels).T
tfdataset = tf.data.Dataset.from_tensor_slices((features, labels)).batch(16)
input_ids = tf.keras.layers.Input(shape=(256,), dtype=tf.int32, name='input_ids')
token_type_ids = tf.keras.layers.Input(shape=(256,), dtype=tf.int32, name='token_type_ids')
attention_mask = tf.keras.layers.Input((256,), dtype=tf.int32, name='attention_mask')
bert = TFAutoModel.from_pretrained("bert-base-multilingual-cased")
output = bert([input_ids, token_type_ids, attention_mask]).last_hidden_state
output = tf.keras.layers.Dense(2, name="qa_outputs")(output)
model = tf.keras.models.Model(inputs=[input_ids, token_type_ids, attention_mask], outputs=output)
num_train_epochs = 3
num_train_steps = len(tfdataset) * num_train_epochs
optimizer, schedule = create_optimizer(
init_lr=2e-5,
num_warmup_steps=0,
num_train_steps=num_train_steps,
weight_decay_rate=0.01
)
def qa_loss(labels, logits):
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(
from_logits=True, reduction=tf.keras.losses.Reduction.NONE
)
start_loss = loss_fn(labels[0], logits[0])
end_loss = loss_fn(labels[1], logits[1])
return (start_loss + end_loss) / 2.0
model.compile(
loss=loss_fn,
optimizer=optimizer
)
model.fit(tfdataset, epochs=num_train_epochs)
And I am getting the following error:
ValueError: `labels.shape` must equal `logits.shape` except for the last dimension. Received: labels.shape=(2,) and logits.shape=(256, 2)
It is complaining about the shape of the labels. This should not happen since I am using SparseCategoricalCrossentropy loss.
For future reference, I actually found a solution, which is just editing the TFBertForQuestionAnswering class itself. For example, I added an additional layer in the following code and trained the model as usual and it worked.
from transformers import TFBertPreTrainedModel
from transformers import TFBertMainLayer
from transformers.modeling_tf_utils import TFQuestionAnsweringLoss, get_initializer, input_processing
from transformers.modeling_tf_outputs import TFQuestionAnsweringModelOutput
from transformers import BertConfig
class MY_TFBertForQuestionAnswering(TFBertPreTrainedModel, TFQuestionAnsweringLoss):
# names with a '.' represents the authorized unexpected/missing layers when a TF model is loaded from a PT model
_keys_to_ignore_on_load_unexpected = [
r"pooler",
r"mlm___cls",
r"nsp___cls",
r"cls.predictions",
r"cls.seq_relationship",
]
def __init__(self, config: BertConfig, *inputs, **kwargs):
super().__init__(config, *inputs, **kwargs)
self.num_labels = config.num_labels
self.bert = TFBertMainLayer(config, add_pooling_layer=False, name="bert")
# This is the dense layer I added
self.my_dense = tf.keras.layers.Dense(
units=config.hidden_size,
kernel_initializer=get_initializer(config.initializer_range),
name="my_dense",
)
self.qa_outputs = tf.keras.layers.Dense(
units=config.num_labels,
kernel_initializer=get_initializer(config.initializer_range),
name="qa_outputs",
)
def call(
self,
input_ids = None,
attention_mask = None,
token_type_ids = None,
position_ids = None,
head_mask = None,
inputs_embeds = None,
output_attentions = None,
output_hidden_states = None,
return_dict = None,
start_positions = None,
end_positions= None,
training = False,
**kwargs,
):
r"""
start_positions (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss.
Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
are not taken into account for computing the loss.
end_positions (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the end of the labelled span for computing the token classification loss.
Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
are not taken into account for computing the loss.
"""
inputs = input_processing(
func=self.call,
config=self.config,
input_ids=input_ids,
attention_mask=attention_mask,
token_type_ids=token_type_ids,
position_ids=position_ids,
head_mask=head_mask,
inputs_embeds=inputs_embeds,
output_attentions=output_attentions,
output_hidden_states=output_hidden_states,
return_dict=return_dict,
start_positions=start_positions,
end_positions=end_positions,
training=training,
kwargs_call=kwargs,
)
outputs = self.bert(
input_ids=inputs["input_ids"],
attention_mask=inputs["attention_mask"],
token_type_ids=inputs["token_type_ids"],
position_ids=inputs["position_ids"],
head_mask=inputs["head_mask"],
inputs_embeds=inputs["inputs_embeds"],
output_attentions=inputs["output_attentions"],
output_hidden_states=inputs["output_hidden_states"],
return_dict=inputs["return_dict"],
training=inputs["training"],
)
sequence_output = outputs[0]
# You also have to add it here
my_logits = self.my_dense(inputs=sequence_output)
logits = self.qa_outputs(inputs=my_logits)
start_logits, end_logits = tf.split(value=logits, num_or_size_splits=2, axis=-1)
start_logits = tf.squeeze(input=start_logits, axis=-1)
end_logits = tf.squeeze(input=end_logits, axis=-1)
loss = None
if inputs["start_positions"] is not None and inputs["end_positions"] is not None:
labels = {"start_position": inputs["start_positions"]}
labels["end_position"] = inputs["end_positions"]
loss = self.hf_compute_loss(labels=labels, logits=(start_logits, end_logits))
if not inputs["return_dict"]:
output = (start_logits, end_logits) + outputs[2:]
return ((loss,) + output) if loss is not None else output
return TFQuestionAnsweringModelOutput(
loss=loss,
start_logits=start_logits,
end_logits=end_logits,
hidden_states=outputs.hidden_states,
attentions=outputs.attentions,
)
def serving_output(self, output: TFQuestionAnsweringModelOutput) -> TFQuestionAnsweringModelOutput:
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFQuestionAnsweringModelOutput(
start_logits=output.start_logits, end_logits=output.end_logits, hidden_states=hs, attentions=attns
)

FailedPreconditionError: FailedPr...onError()

I have FailedPreconditionError when running sess.
My network has two different parts, pretrained-network and new add in Recognition network.
Pretrained model is used to extract features and the feature is used to train again for recognition.
In my code, pre-trained model is loaded first.
graph = tf.Graph()
with graph.as_default():
input_data, input_labels, input_boxes = input_train_data.input_fn()
input_boxes = tf.reshape(input_boxes,[input_boxes.shape[0]*2,-1])#convert from Nx8 to 2Nx4
# build model and loss
net = Net(input_data, is_training = False)
f_saver = tf.train.Saver(max_to_keep=1000, write_version=tf.train.SaverDef.V2, save_relative_paths=True)
sess_config = tf.ConfigProto(log_device_placement = False, allow_soft_placement = True)
if FLAGS.gpu_memory_fraction < 0:
sess_config.gpu_options.allow_growth = True
elif FLAGS.gpu_memory_fraction > 0:
sess_config.gpu_options.per_process_gpu_memory_fraction = FLAGS.gpu_memory_fraction;
session = tf.Session(graph=graph, config=sess_config)
tf.logging.info('Initialize from: ' + config.train.init_checkpoint)
f_saver.restore(session, config.train.init_checkpoint)
f_saver restores the pre-trained model.
Then feature conv5_3 is extracted and fed into Recognition network.
conv5_3 = net.end_points['conv5_3']
with tf.variable_scope("Recognition"):
global_step_rec = tf.Variable(0, name='global_step_rec', trainable=False)
#Pass through recognition net
r_net = regnet.ConstructRecNet(conv5_3)
conv7_7 = r_net.end_points['pool7']
#implement ROI Pooling
#input boxes be in x1, y1, x2, y2
h_fmap = tf.dtypes.cast(tf.shape(conv7_7)[1],tf.float32)
w_fmap = tf.dtypes.cast(tf.shape(conv7_7)[2],tf.float32)
#remap boxes at input images to feature mats
#input_boxes = input_boxes / tf.constant([config.train.input_shape[0], config.train.input_shape[0],\
# config.train.input_shape[0], config.train.input_shape[0]], dtype=tf.float32)#Normalize with image size first
remap_boxes=tf.matmul(input_boxes,tf.diag([w_fmap,h_fmap,w_fmap,h_fmap]))
#put first column with image indexes
rows = tf.expand_dims(tf.range(remap_boxes.shape[0]), 1)/2
add_index = tf.concat([tf.cast(rows,tf.float32),remap_boxes],-1)
index = tf.not_equal(tf.reduce_sum(add_index[:,4:],axis=1),0)
remap_boxes = tf.gather_nd(add_index,tf.where(index))
remap_boxes=tf.dtypes.cast(remap_boxes,tf.int32)
prob = roi_pooling(conv7_7, remap_boxes, pool_height=1, pool_width=28)
#Get features for CTC training
prob = tf.transpose(prob, (1, 0, 2)) # prepare for CTC
data_length = tf.fill([tf.shape(prob)[1]], tf.shape(prob)[0]) # input seq length, batch size
ctc = tf.py_func(CTCUtils.compute_ctc_from_labels, [input_labels], [tf.int64, tf.int64, tf.int64])
ctc_labels = tf.to_int32(tf.SparseTensor(ctc[0], ctc[1], ctc[2]))
predictions = tf.to_int32(tf.nn.ctc_beam_search_decoder(prob, data_length, merge_repeated=False, beam_width=10)[0][0])
tf.sparse_tensor_to_dense(predictions, default_value=-1, name='d_predictions')
tf.reduce_mean(tf.edit_distance(predictions, ctc_labels, normalize=False), name='error_rate')
loss = tf.reduce_mean(tf.compat.v1.nn.ctc_loss(inputs=prob, labels=ctc_labels, sequence_length=data_length, ctc_merge_repeated=True), name='loss')
learning_rate = tf.train.piecewise_constant(global_step_rec, [150000, 200000],[config.train.learning_rate, 0.1 * config.train.learning_rate,0.01 * config.train.learning_rate])
opt_loss = tf.contrib.layers.optimize_loss(loss, global_step_rec, learning_rate, config.train.opt_type,config.train.grad_noise_scale, name='train_step')
tf.global_variables_initializer()
I can run sess till feature extraction conv5_3. But can't run those in Recognition and got error as FailedPreconditionError: FailedPr...onError(). What could be the problem?
graph.finalize()
with tf.variable_scope("Recognition"):
for i in range(config.train.steps):
input_data_, input_labels_, input_boxes_ = session.run([input_data, input_labels, input_boxes])
conv5_3_ = session.run([conv5_3]) #can run this line
global_step_rec_ = session.run([global_step_rec]) # got FailedPreconditionError: FailedPr...onError() error at this line
conv7_7_ = session.run([conv7_7])
h_fmap_ = session.run([h_fmap])
Now it works.
Since my graph has two parts, I need to initialize separately.
(1)First get all variables from pre-trained model to initialize with those from checkpoint.
Then initialize with tf.train.Saver.
(2)Then initialize the rest add-in layers using tf.global_variables_initializer()
My code is as follow.
#Initialization
#Initialize pre-trained model first
#Since we need to restore pre-trained model and initialize to respective variables in this current graph
#(1)make a variable list for checkpoint
#(2)initialize a saver for the variable list
#(3)then restore
#(1)
def print_tensors_in_checkpoint_file(file_name, tensor_name, all_tensors):
varlist=[]
reader = pywrap_tensorflow.NewCheckpointReader(file_name)
if all_tensors:
var_to_shape_map = reader.get_variable_to_shape_map()
for key in sorted(var_to_shape_map):
print(key)
varlist.append(key)
return varlist
varlist=print_tensors_in_checkpoint_file(file_name=config.train.init_checkpoint,all_tensors=True,tensor_name=None)
#(2)prepare the list of variables by calling variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
#countcheckpt_vars=0
#for n in tf.get_default_graph().as_graph_def().node:
# print(n.name)
#for op in tf.get_default_graph().get_operations():
# print(str(op.name))
#for var in zip(variables):
# countcheckpt_vars=countcheckpt_vars+1
#(3)
loader = tf.train.Saver(variables[:46])#since I need to initialize only 46 variables from global variables
tf.logging.info('Initialize from: ' + config.train.init_checkpoint)
sess_config = tf.ConfigProto(log_device_placement = False, allow_soft_placement = True)
if FLAGS.gpu_memory_fraction < 0:
sess_config.gpu_options.allow_growth = True
elif FLAGS.gpu_memory_fraction > 0:
sess_config.gpu_options.per_process_gpu_memory_fraction = FLAGS.gpu_memory_fraction;
session = tf.Session(graph=graph, config=sess_config)
loader.restore(session, config.train.init_checkpoint)
Then initialize the rest of variables
init = tf.global_variables_initializer()
session.run(init)

Why am I getting shape errors when trying to pass a batch from the Tensorflow Dataset API to my session operations?

I am dealing with an issue in my conversion over to the Dataset API and I guess I just don't have enough experience yet with the API to know how to handle the below situation. We currently have image augmentation that we perform currently using queueing and batching. I was tasked with checking out the new Dataset API and converting over our existing implementation using it rather than queues.
What we would like to do is get a reference to all the paths and handle all operations from just that reference. As you see in the dataset initialization, I have mapped the parse_fn to the dataset itself which then goes about reading the file and extracting the initial values from the filenames. However when I then go about calling the iterators next_batch method and then pass those values to get_summary, I'm now getting an error around shape. I have been trying a number of things which just keeps changing the error and so I felt I should see if anyone on SO saw possibly that I was going about this all wrong and should be taking a different route. Does anything jump out as absolutely wrong in my use of the Dataset API?
Should I not be calling the ops this way any longer? I noticed the majority of the examples I saw they would get the batch, pass the variables to the op and then capture that in a variable and pass that to sess.run, however I haven't found an easy way of doing that as of yet with our setup that wasn't erroring so this was the approach I took instead (but its still erroring). I'll be continuing to try to trace down the problem and post here should I find anything, but if anyone sees something please advise. Thanks!
Current Error:
... in get_summary summary, acc = sess.run([self._summary_op,
self._accuracy], feed_dict=feed_dict) ValueError: Cannot feed value of
shape (32,) for Tensor 'ph_input_labels:0', which has shape '(?, 1)
Below is the block where the get_summary method is called and error is fired:
def perform_train():
if __name__ == '__main__':
#Get all our image paths
filenames = data_layer_train.get_image_paths()
next_batch, iterator = preproc_image_fn(filenames=filenames)
with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
with sess.graph.as_default():
# Set the random seed for tensorflow
tf.set_random_seed(cfg.RNG_SEED)
classifier_network = c_common.create_model(len(products_to_class_dict), is_training=True)
optimizer, global_step_var = c_common.create_optimizer(classifier_network)
sess.run(tf.local_variables_initializer())
sess.run(tf.global_variables_initializer())
# Init tables and dataset iterator
sess.run(tf.tables_initializer())
sess.run(iterator.initializer)
cur_epoch = 0
blobs = None
try:
epoch_size = data_layer_train.get_steps_per_epoch()
num_steps = num_epochs * epoch_size
for step in range(num_steps):
timer_summary.tic()
if blobs is None:
#Now populate from our training dataset
blobs = sess.run(next_batch)
# *************** Below is where it is erroring *****************
summary_train, acc = classifier_network.get_summary(sess, blobs["images"], blobs["labels"], blobs["weights"])
...
Believe the error is in preproc_image_fn:
def preproc_image_fn(filenames, images=None, labels=None, image_paths=None, cells=None, weights=None):
def _parse_fn(filename, label, weight):
augment_instance = False
paths=[]
selected_cells=[]
if vals.FIRST_ITER:
#Perform our check of the path to see if _data_augmentation is within it
#If so set augment_instance to true and replace the substring with an empty string
new_filename = tf.regex_replace(filename, "_data_augmentation", "")
contains = tf.equal(tf.size(tf.string_split([filename], "")), tf.size(tf.string_split([new_filename])))
filename = new_filename
if contains is True:
augment_instance = True
core_file = tf.string_split([filename], '\\').values[-1]
product_id = tf.string_split([core_file], ".").values[0]
label = search_tf_table_for_entry(product_id)
weight = data_layer_train.get_weights(product_id)
image_string = tf.read_file(filename)
img = tf.image.decode_image(image_string, channels=data_layer_train._channels)
img.set_shape([None, None, None])
img = tf.image.resize_images(img, [data_layer_train._target_height, data_layer_train._target_width])
#Previously I was returning the below, but I was getting an error from the op when assigning feed_dict stating that it didnt like the dictionary
#retval = dict(zip([filename], [img])), label, weight
retval = img, label, weight
return retval
num_files = len(filenames)
filenames = tf.constant(filenames)
#*********** Setup dataset below ************
dataset = tf.data.Dataset.from_tensor_slices((filenames, labels, weights))
dataset=dataset.map(_parse_fn)
dataset = dataset.repeat()
dataset = dataset.batch(32)
iterator = dataset.make_initializable_iterator()
batch_features, batch_labels, batch_weights = iterator.get_next()
return {'images': batch_features, 'labels': batch_labels, 'weights': batch_weights}, iterator
def search_tf_table_for_entry(self, product_id):
'''Looks up keys in the table and outputs the values. Will return -1 if not found '''
if product_id is not None:
return self._products_to_class_table.lookup(product_id)
else:
if not self._real_eval:
logger().info("class not found in training {} ".format(product_id))
return -1
Where I create the model and have the placeholders used previously:
...
def create_model(self):
weights_regularizer = tf.contrib.layers.l2_regularizer(cfg.TRAIN.WEIGHT_DECAY)
biases_regularizer = weights_regularizer
# Input data.
self._input_images = tf.placeholder(
tf.float32, shape=(None, self._image_height, self._image_width, self._num_channels), name="ph_input_images")
self._input_labels = tf.placeholder(tf.int64, shape=(None, 1), name="ph_input_labels")
self._input_weights = tf.placeholder(tf.float32, shape=(None, 1), name="ph_input_weights")
self._is_training = tf.placeholder(tf.bool, name='ph_is_training')
self._keep_prob = tf.placeholder(tf.float32, name="ph_keep_prob")
self._accuracy = tf.reduce_mean(tf.cast(self._correct_prediction, tf.float32))
...
self.create_summaries()
def create_summaries(self):
val_summaries = []
with tf.device("/cpu:0"):
for var in self._act_summaries:
self._add_act_summary(var)
for var in self._train_summaries:
self._add_train_summary(var)
self._summary_op = tf.summary.merge_all()
self._summary_op_val = tf.summary.merge(val_summaries)
def get_summary(self, sess, images, labels, weights):
feed_dict = {self._input_images: images, self._input_labels: labels,
self._input_weights: weights, self._is_training: False}
summary, acc = sess.run([self._summary_op, self._accuracy], feed_dict=feed_dict)
return summary, acc
Since the error says:
Cannot feed value of shape (32,) for Tensor 'ph_input_labels:0', which has shape '(?, 1)
My guess is your labels in get_summary has the shape [32]. Can you just reshape it to (32, 1)? Or maybe reshape the label earlier in _parse_fn?

How to properly update variables in a while loop in TensorFlow?

Can someone please explain (or point me to the relevant place in the documentation that I've missed) how to properly update a tf.Variable() in a tf.while_loop? I am trying to update variables in the loop that will store some information until the next iteration of the loop using the assign() method. However, this isn't doing anything.
As the values of mu_tf and sigma_tf are being updated by the minimizer, while step_mu isn't, I am obviously doing something wrong, but I don't understand what it is. Specifically, I guess I should say that I know assign() does not do anything until it is executed when the graph is run, so I know that I can do
sess.run(step_mu.assign(mu_tf))
and that will update step_mu, but I want to do this in the loop correctly. I don't understand how to add an assign operation to the body of the loop.
A simplified working example of what I'm doing follows here:
import numpy as np
import tensorflow as tf
mu_true = 0.5
sigma_true = 1.5
n_events = 100000
# Placeholders
X = tf.placeholder(dtype=tf.float32)
# Variables
mu_tf = tf.Variable(initial_value=tf.random_normal(shape=[], mean=0., stddev=0.1,
dtype=tf.float32),
dtype=tf.float32)
sigma_tf = tf.Variable(initial_value=tf.abs(tf.random_normal(shape=[], mean=1., stddev=0.1,
dtype=tf.float32)),
dtype=tf.float32,
constraint=lambda x: tf.abs(x))
step_mu = tf.Variable(initial_value=-99999., dtype=tf.float32)
step_loss = tf.Variable(initial_value=-99999., dtype=tf.float32)
# loss function
gaussian_dist = tf.distributions.Normal(loc=mu_tf, scale=sigma_tf)
log_prob = gaussian_dist.log_prob(value=X)
negative_log_likelihood = -1.0 * tf.reduce_sum(log_prob)
# optimizer
optimizer = tf.train.AdamOptimizer(learning_rate=0.1)
# sample data
x_sample = np.random.normal(loc=mu_true, scale=sigma_true, size=n_events)
# Construct the while loop.
def cond(step):
return tf.less(step, 10)
def body(step):
# gradient step
train_op = optimizer.minimize(loss=negative_log_likelihood)
# update step parameters
with tf.control_dependencies([train_op]):
step_mu.assign(mu_tf)
return tf.add(step,1)
loop = tf.while_loop(cond, body, [tf.constant(0)])
# Execute the graph
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
step_loss = sess.run(fetches=negative_log_likelihood, feed_dict={X: x_sample})
print('Before loop:\n')
print('mu_tf: {}'.format(sess.run(mu_tf)))
print('sigma_tf: {}'.format(sess.run(sigma_tf)))
print('step_mu: {}'.format(sess.run(step_mu)))
print('step_loss: {}\n'.format(step_loss))
sess.run(fetches=loop, feed_dict={X: x_sample})
print('After loop:\n')
print('mu_tf: {}'.format(sess.run(mu_tf)))
print('sigma_tf: {}'.format(sess.run(sigma_tf)))
print('step_mu: {}'.format(sess.run(step_mu)))
print('step_loss: {}'.format(step_loss))

tensorflow serving uninitialized

Hello I want to initialize variable named result in the code below.
I tried to initialize with this code* when I tried to serving.
sess.run(tf.global_variables_initializer(),feed_dict=
{userLat:0,userLon:0})
I just want to initialize the variable.
The reason for using the variable is to write validate_shape = false.
The reason for using this option is to resolve error 'Outer dimension for outputs must be unknown, outer dimension of 'Variable:0' is 1' when deploying the model version to the Google Cloud ml engine.
Initialization with the following code will output a value when feed_dict is 0 when attempting a prediction.
sess.run(tf.global_variables_initializer(),feed_dict=
{userLat:0,userLon:0})
Is there a way to simply initialize the value of result?
Or is it possible to store the list of stored tensor values as a String with a comma without shape?
It's a very basic question.
I'm sorry.
I am a beginner of the tensor flow.
I need help. Thank you for reading.
import tensorflow as tf
import sys,os
#define filename queue
filenameQueue =tf.train.string_input_producer(['./data.csv'],
shuffle=False,name='filename_queue')
# define reader
reader = tf.TextLineReader()
key,value = reader.read(filenameQueue)
#define decoder
recordDefaults = [ ["null"],[0.0],[0.0]]
sId,lat, lng = tf.decode_csv(
value, record_defaults=recordDefaults,field_delim=',')
taxiData=[]
with tf.Session() as sess:
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
for i in range(18):
data=sess.run([sId, lat, lng])
tmpTaxiData=[]
tmpTaxiData.append(data[0])
tmpTaxiData.append(data[1])
tmpTaxiData.append(data[2])
taxiData.append(tmpTaxiData)
coord.request_stop()
coord.join(threads)
from math import sin, cos,acos, sqrt, atan2, radians
#server input data
userLat = tf.placeholder(tf.float32, shape=[])
userLon = tf.placeholder(tf.float32, shape=[])
R = 6373.0
radian=0.017453292519943295
distanceList=[]
for i in taxiData:
taxiId=tf.constant(i[0],dtype=tf.string,shape=[])
taxiLat=tf.constant(i[1],dtype=tf.float32,shape=[])
taxiLon=tf.constant(i[2],dtype=tf.float32,shape=[])
distanceValue=6371*tf.acos(tf.cos(radian*userLat)*
tf.cos(radian*taxiLat)*tf.cos(radian*taxiLon-
radian*126.8943311)+tf.sin(radian*37.4685225)*tf.sin(radian*taxiLat))
tmpDistance=[]
tmpDistance.append(taxiId)
tmpDistance.append(distanceValue)
distanceList.append(tmpDistance)
# result sort
sId,distances=zip(*distanceList)
indices = tf.nn.top_k(distances, k=len(distances)).indices
gather=tf.gather(sId, indices[::-1])[0:5]
result=tf.Variable(gather,validate_shape=False)
print "Done training!"
# serving
import os
from tensorflow.python.util import compat
model_version = 1
path = os.path.join("Taximodel", str(model_version))
builder = tf.saved_model.builder.SavedModelBuilder(path)
with tf.Session() as sess:
builder.add_meta_graph_and_variables(
sess,
[tf.saved_model.tag_constants.SERVING],
signature_def_map= {
"serving_default":
tf.saved_model.signature_def_utils.predict_signature_def(
inputs= {"userLat": userLat, "userLon":userLon},
outputs= {"result": result})
})
builder.save()
print 'Done exporting'
You can try to define the graph so that the output tensor preserves the shape (outer dimension) of the input tensor.
For example, something like:
#server input data
userLoc = tf.placeholder(tf.float32, shape=[None, 2])
def calculate_dist(user_loc):
distanceList = []
for i in taxiData:
taxiId=tf.constant(i[0],dtype=tf.string,shape=[])
taxiLat=tf.constant(i[1],dtype=tf.float32,shape=[])
taxiLon=tf.constant(i[2],dtype=tf.float32,shape=[])
distanceValue=6371*tf.acos(tf.cos(radian*user_loc[0])*
tf.cos(radian*taxiLat)*tf.cos(radian*taxiLon-
radian*126.8943311)+tf.sin(radian*37.4685225)*tf.sin(radian*taxiLat))
tmpDistance=[]
tmpDistance.append(taxiId)
tmpDistance.append(distanceValue)
distanceList.append(tmpDistance)
# result sort
sId,distances=zip(*distanceList)
indices = tf.nn.top_k(distances, k=len(distances)).indices
return tf.gather(sId, indices[::-1])[0:5]
result = tf.map_fn(calculate_dist, userLoc)