Tensorflow apply_gradients ValueError - tensorflow

I get the following error on second to last line of code: Not sure how to proceed. Can anyone give me some insight.
ValueError: Tensor("Variable_20:0", shape=(8, 8, 4, 32), dtype=float32_ref) must be from the same graph as Tensor("Variable_20/RMSProp_1:0", shape=(8, 8, 4, 32), dtype=float32_ref).
The code is as follows:
optimizer = tf.train.RMSPropOptimizer(0.00025, 0.95, 0.95, 0.01)
readout = tf.reduce_mean(tf.reduce_sum(tf.mul(l_readout,a), reduction_indices=1))
cost = tf.reduce_mean(tf.square(tf.sub(y,readout)))
grads = optimizer.compute_gradients(cost, localW)
grad_vals = sess.run([g for (g,v) in grads], feed_dict = {a: val_a, y: val_y, s: val_s})
placeholder_gradients = []
for var in localW:
placeholder_gradients.append( (tf.placeholder('float',shape=var.get_shape()) , var) )
feed_dict = {}
for i in range(len(placeholder_gradients)):
feed_dict[placeholder_gradients[i]] = grad_vals[i]
apply_gradients = optimizer.apply_gradients(placeholder_gradients) #ERROR LINE
apply_gradients.run(feed_dict=feed_dict)

This maybe related to using a thread which is not shown in the example. I will withdraw this question until I look further into how to use threads with the same graph.

Related

Hyperparameter tuning with XGBRanker

I am trying to figure how to tune my hyperparameter through RandomizedSearchCV with an XGBRanker model.
I could split the data into groups, feed it into the model and make predictions. However I am not sure how to set up the Search object, namely 2 specific things - how to inform it about the groups and also what kind of score I need to supply.
model = xg.XGBRanker(
tree_method='exact',
booster='gbtree',
objective='rank:pairwise',
random_state=42,
learning_rate=0.06,
max_depth=5,
n_estimators=700,
subsample=0.75,
#colsample_bytree=0.9,
#subsample=0.75
min_child_weight=0.06
)
model.fit(x_train, y_train, group=train_groups, verbose=True)
This works fine.
This is where I need some help
param_dist = {'n_estimators': stats.randint(40, 1000),
'learning_rate': stats.uniform(0.01, 0.59),
'subsample': stats.uniform(0.3, 0.6),
'max_depth': [3, 4, 5, 6, 7, 8, 9],
'colsample_bytree': stats.uniform(0.5, 0.4),
'min_child_weight': [0.05, 0.1, 0.02]
}
clf = RandomizedSearchCV(model,
param_distributions=param_dist,
cv=5,
n_iter=5,
scoring=???, #
error_score=0,
verbose=3,
n_jobs=-1)
#also what about the groups?
i had tried something similar. for scoring however i used the ndcg_scorer from sklearn.
i added
scoring = sklearn.metrics.make_scorer(sklearn.metrics.ndcg_score, greater_is_better=True)
for groups u can add to the fit_params in RandomizedSearchCV.
fit_params = {"model__groups": group}
clf = RandomizedSearchCV(model,
param_distributions=param_dist,
cv=5,
n_iter=5,
scoring=scoring,
error_score=0,
verbose=3,
n_jobs=-1,fit_params = fit_params)

dimension of tf.Variables change after some epochs

I am new to TensorFlow and I am learning.
I define some variables and start training. Everything runs smoothly for the first epochs but suddenly it throws the following error:
tensorflow.python.framework.errors_impl.InvalidArgumentError: 2 root error(s) found.
(0) Invalid argument: Matrix size-incompatible: In[0]: [17952,50], In[1]: [0,20]
[[{{node gradients/Embeddings_1/MatMul_grad/MatMul_1}}]]
[[gradients/Embeddings_1/MatMul_grad/tuple/control_dependency/_1867]]
(1) Invalid argument: Matrix size-incompatible: In[0]: [17952,50], In[1]: [0,20]
[[{{node gradients/Embeddings_1/MatMul_grad/MatMul_1}}]]
My problem is that why it is giving the error after some epochs and not in the first place. Usually, these types of errors are thrown when the graph is built.
This is my code for creating the variables and embedding the trees:
def __init__(self, vocab, embedding):
self.add_model_variables()
with tf.variable_scope("Embeddings", reuse=True):
with tf.device('/cpu:0'):
w_embed = tf.get_variable('WE', [self.vocab_embedding_size, self.embed_size])
b_embed = tf.get_variable('bE', [1, self.embed_size])
embeddings = tf.get_variable('embeddings')
self.embeddings = tf.add(tf.matmul(embeddings, w_embed), b_embed)
def add_model_variables(self):
myinitilizer = tf.random_uniform_initializer(-self.calc_wt_init(),self.calc_wt_init())
with tf.variable_scope('Embeddings'):
with tf.device('/cpu:0'):
w_embed = tf.get_variable('WE', [self.vocab_embedding_size, self.embed_size], initializer = myinitilizer)
b_embed = tf.get_variable('bE', [1, self.embed_size], initializer = myinitilizer)
embeddings = tf.get_variable('embeddings',
initializer=tf.convert_to_tensor(self.pretrained_embedding),
dtype=tf.float32)
with tf.variable_scope('Composition'):
self.W1 = tf.get_variable('W1', [2 * self.embed_size, self.embed_size], initializer = myinitilizer)
self.b1 = tf.get_variable('b1', [1, self.embed_size], initializer = myinitilizer)
with tf.variable_scope('Projection'):
self.U = tf.get_variable('U', [self.embed_size, 1], initializer = myinitilizer)
self.bu = tf.get_variable('bu', [self.max_number_nodes, 1], initializer = myinitilizer)
def embed_tree(self, batch_index):
def combine_children( left_tensor, right_tensor):
return tf.nn.relu(tf.matmul(tf.concat([left_tensor, right_tensor], axis=1, name='combine_children'), self.W1) + self.b1)
def embed_word(word_index):
with tf.device('/cpu:0'):
return tf.expand_dims(tf.gather(self.embeddings, word_index), 0)
def loop_body(node_tensors, i):
node_is_leaf = tf.gather(is_leaf, i)
word = tf.gather(words, i)
left_child = tf.gather(left_children, i)
right_child = tf.gather(right_children, i)
node_tensor = tf.cond(
node_is_leaf,
lambda: embed_word(word),
lambda: combine_children(
node_tensors.read(n-right_child),
node_tensors.read(n-left_child)))
node_tensors = node_tensors.write(i, node_tensor)
i = tf.add(i, 1)
return node_tensors, i
is_leaf = tf.gather(self.batch_is_leaf, batch_index)
left_children = tf.gather(self.batch_left_children, batch_index)
right_children = tf.gather(self.batch_right_children, batch_index)
words = tf.gather(self.batch_words, batch_index)
n = tf.reduce_sum(tf.cast(tf.not_equal(left_children, -1), tf.int32))-2
#iself.batch_operation = tf.print(batch_index,'N::::::::',output_stream=sys.stdout)
node_tensors = tf.TensorArray(tf.float32, size=self.max_number_nodes,
dynamic_size=False, clear_after_read=False, element_shape=[1, self.embed_size])
loop_cond = lambda node_tensors, i: tf.less(i, n+2)
#with tf.control_dependencies([self.batch_operation]):
node_tensors, _ = tf.while_loop(loop_cond, loop_body, [node_tensors, 0], parallel_iterations=1)
tree_embedding = tf.convert_to_tensor(node_tensors.stack())
return tree_embedding
The other problem is that I cannot replicate the error as it happens occasionally.
Update:
When I reduce the batch_size, the chance of getting this error reduces.
Is it possible for this to be because of working close to GPU memory limit?
The tf.gather produces zeros for invalid indices on GPU (it works correctly on CPU however). In other words, Tensorflow does not check for the range of indices while running on GPU.
The errors caused by returned 0s accumulate on the gradient and finally result in confusing error messages that are not related to the original problem.
For reference:
https://github.com/tensorflow/tensorflow/issues/3638
I changed tf.gather to index-based retrieval(a[i]) and the problem is fixed. I don't know exactly why!

rebuild torch tensor from its batchified version

This is very nice example of how to build a 3D tensor:
import torch
y = torch.rand(100, 1)
batch_size = 10
batched_data = y.contiguous().view(batch_size, -1, y.size(-1)).transpose(0,1)
batched_data.shape
the output is:
torch.Size([10, 10, 1])
ok, now what I want to do is, starting from batched_data I want to build y.
The other way around.
Any good suggestion with a powerful pytorch streamlined code?
==== Additional input =====
I am using this for RNN and now I have some doubts, becaus eif you consider the following code:
import torch
y = torch.arange(100).view(100,1)
batch_size = 10
batched_data = y.contiguous().view(batch_size, -1, y.size(-1)).transpose(0,1)
batched_data.shape
The output is:
tensor([[[ 0],
[10],
[20],
[30],
[40],
[50],
[60],
[70],
[80],
[90]],
[[ 1],
[11],
[21],
[31],
[41],
[51],
[61],
[71],
[81],
[91]],
Which I would not expect. I would expect something like:
[[1,2,3,4,5,6,7,8,9,10],[11,12,13,14,15,16,17,18,19,20],....
Suppose you want to do something like this to rebuild y:
rebuilded_y = batched_data.transpose(0,1).view(*y.shape)
To make the input look like you expected you need to remove transpose and additional dimension in batched_data:
batched_data = y.contiguous().view(batch_size, -1)
If you want to prepare inputs for RNN, you need to know that RNN takes 3d tensors of shape, seq_len, batch, input_size. Here, input_size refers to the number of features and in your scenario, it is 1. So, the input tensor of shape 10, 10, 1 can still be a valid input for an RNN.
Example
rnn = nn.RNN(input_size=1, hidden_size=20, num_layers=1)
input = torch.randn(10, 10, 1)
output, hn = rnn(input)
print(output.size()) # 10, 10, 20
RNN's output is of shape, seq_len, batch, num_directions * hidden_size.

Tensorflow, ValueError: The two structures don't have the same nested structure

import tensorflow as tf
vocab_num = 4000
word_dim = 300
question_encode = None
answer_num = 1000
common_dim = 256
memory_dim = 256
question_encode = tf.placeholder(
tf.int64, [None, None], 'question_encode')
with tf.variable_scope('embedding'):
embedding_matrix = tf.get_variable(
'embedding_matrix',
[vocab_num, word_dim], regularizer=tf.nn.l2_loss)
question_embedding = tf.nn.embedding_lookup(
embedding_matrix, question_encode, name='word_embedding')
print('question_embedding', question_embedding)
shape = tf.shape(question_encode)
batch_size = shape[0]
question_length = tf.constant(15)
time = tf.constant(0, name='time')
max_length = tf.constant(20)
q_cell = tf.nn.rnn_cell.LSTMCell(word_dim)
q_state = q_cell.zero_state(batch_size, tf.float32)
word_embed_W = tf.get_variable('word_embed_W', [word_dim, common_dim], regularizer=tf.nn.l2_loss)
word_embed_b = tf.get_variable('word_embed_b', [common_dim])
word_embedding = question_embedding[:, time]
out_ = tf.ones((1, 256))
time = tf.constant(0)
out = tf.zeros((max_length - question_length, 256))
def _one_step(time, q_state, word_list):
"""One time step of model."""
word_embedding = question_embedding[:, time]
with tf.variable_scope('lstm_q'):
q_output, q_state = q_cell(word_embedding, q_state)
with tf.name_scope('transform_w'):
word = tf.nn.xw_plus_b(
word_embedding, word_embed_W, word_embed_b)
word = tf.nn.tanh(word)
word_list = tf.concat([word_list, word], axis=0)
return time + 1, q_state, word_list
# main loop
time, q_state, out_ = tf.while_loop(
cond=lambda time, *_: time < question_length,
body=_one_step,
loop_vars=[time, q_state, out_],
shape_invariants=[time.get_shape(), tf.TensorShape([None, 256])]
)
word_list = tf.concat([out_, out], axis=0)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
res = sess.run(out)
When problems arise :
ValueError: The two structures don't have the same nested structure.
First structure: type=list str=[<tf.Tensor 'Const_2:0' shape=() dtype=int32>, LSTMStateTuple(c=<tf.Tensor 'LSTMCellZeroState/zeros:0' shape=(?, 300) dtype=float32>, h=<tf.Tensor 'LSTMCellZeroState/zeros_1:0' shape=(?, 300) dtype=float32>), <tf.Tensor 'ones:0' shape=(1, 256) dtype=float32>]
Second structure: type=list str=[TensorShape([]), TensorShape([Dimension(None), Dimension(256)])]
What I was trying to achieve was a matrix with each word spliced together, but with the q_sate change it turned out to be wrong
But I tried many methods are wrong, so I hope to get your help
But I tried many methods are wrong, so I hope to get your help
But I tried many methods are wrong, so I hope to get your help
The variables loop_vars you input are three, butshape_invariants you input are two. So the error shows two structures don't have the same nested structure. You just need to add the structure of q_state.
# main loop
time, q_state, out_ = tf.while_loop(
cond=lambda time, *_: time < question_length,
body=_one_step,
loop_vars=[time, q_state, out_],
shape_invariants=[time.get_shape()
,tf.nn.rnn_cell.LSTMStateTuple(tf.TensorShape([None, 300]),tf.TensorShape([None, 300]))
,tf.TensorShape([None, 256])]
)

How to see multiple images through tf.image_summary

Problem - only one image is shown at TensorBoard
Inspired by this
How can I visualize the weights(variables) in cnn in Tensorflow?
Here is code:
# --- image reader ---
# - rsq: random shuffle queue with [fn l] pairs
def img_reader_jpg(rsq):
fn, label = rsq.dequeue()
img_b = tf.read_file(fn)
img_u = tf.image.decode_jpeg(img_b, channels=3)
img_f = tf.cast(img_u, tf.float32)
img_4 = tf.expand_dims(img_f,0)
return img_4, label
# filenames and labels are pre-loaded
fv = tf.constant(fnames)
lv = tf.constant(ohl)
rsq = tf.RandomShuffleQueue(len(fnames), 0, [tf.string, tf.float32])
do_enq = rsq.enqueue_many([fv, lv])
# reading_op
image, label = img_reader_jpg(rsq)
# test: some op
im_t = tf.placeholder(tf.float32, shape=[None,30,30,3], name='img_tensor')
lab_t = tf.placeholder(tf.float32, shape=[None,2], name='lab_tensor')
some_op = tf.add(im_t,im_t)
ims_op = tf.image_summary("img", im_t)
# service ops
init_op = tf.initialize_all_variables()
# run it
with tf.Session() as sess:
summary_writer = tf.train.SummaryWriter(summ_dir, graph_def=sess.graph_def)
print 'log at:', summ_dir
sess.run(init_op)
sess.run(do_enq)
print "rsq.size:", rsq.size().eval()
for i in xrange(5):
print "\ni:",i
img_i, lab_i = sess.run([image, label]) # read image - right?
print "I:", img_i.shape , " L:", lab_i
feed_dict = {
im_t: img_i
}
img2 = sess.run([some_op], feed_dict = feed_dict)
# now summary part
imss = sess.run(ims_op, feed_dict = feed_dict)
#print "imss",imss
summary_writer.add_summary(imss,i)
print "rsq.size:", rsq.size().eval()
summary_writer.close()
print 'ok'
Here is output:
log at: /mnt/code/test_00/log/2016-01-09 17:10:37
rsq.size: 1225
i: 0
I: (1, 30, 30, 3) L: [ 1. 0.]
i: 1
I: (1, 30, 30, 3) L: [ 1. 0.]
i: 2
I: (1, 30, 30, 3) L: [ 0. 1.]
i: 3
I: (1, 30, 30, 3) L: [ 0. 1.]
i: 4
I: (1, 30, 30, 3) L: [ 0. 1.]
rsq.size: 1220
ok
Looks ok
5 [image label] pairs were delivered
in case I uncomment print "imss",imss I can see 5 different buffers each with own png image
op graph looks ok in TB
However only one image in TB. I suspect I have missed something important about how TF is working -.i.e. what caused what at graph execution time.
Second question: what I need to do to see result i.e. img2 = img+img in TB?
You are right that you will only see one image. You are calling the image summary op once in each for loop, and each time you call it, you are passing it a single image.
What you could do to see all images that you want to see, would be to compile these images into a single tensor. If we refer to TensorFlow API (link always changes so find the latest one)
tf.image_summary(tag, tensor, max_images=3, collections=None,
name=None)
As of TF 1.0.0, it's this:
tf.summary.image(name, tensor, max_outputs=3, collections=None)
Put your "multiple image tensor" in, set max_images to the number of images you have, and you should be able to see all the images in TensorBoard.
Let me know if there are still problems.
As of r0.12, tf.image_summary has been replaced with tf.summary.image
tf.summary.image(name, tensor, max_outputs=3, collections=None)